Author: np
Date: Thu Oct 17 18:37:25 2013
New Revision: 256694
URL: http://svnweb.freebsd.org/changeset/base/256694

Log:
  iw_cxgbe: iWARP driver for Chelsio T4/T5 chips.  This is a straight port
  of the iw_cxgb4 found in OFED distributions.
  
  Obtained from:        Chelsio

Added:
  head/sys/dev/cxgbe/iw_cxgbe/
  head/sys/dev/cxgbe/iw_cxgbe/cm.c   (contents, props changed)
  head/sys/dev/cxgbe/iw_cxgbe/cq.c   (contents, props changed)
  head/sys/dev/cxgbe/iw_cxgbe/device.c   (contents, props changed)
  head/sys/dev/cxgbe/iw_cxgbe/ev.c   (contents, props changed)
  head/sys/dev/cxgbe/iw_cxgbe/id_table.c   (contents, props changed)
  head/sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h   (contents, props changed)
  head/sys/dev/cxgbe/iw_cxgbe/mem.c   (contents, props changed)
  head/sys/dev/cxgbe/iw_cxgbe/provider.c   (contents, props changed)
  head/sys/dev/cxgbe/iw_cxgbe/qp.c   (contents, props changed)
  head/sys/dev/cxgbe/iw_cxgbe/resource.c   (contents, props changed)
  head/sys/dev/cxgbe/iw_cxgbe/t4.h   (contents, props changed)
  head/sys/dev/cxgbe/iw_cxgbe/user.h   (contents, props changed)
  head/sys/modules/cxgbe/iw_cxgbe/
  head/sys/modules/cxgbe/iw_cxgbe/Makefile   (contents, props changed)
Modified:
  head/sys/modules/cxgbe/Makefile

Added: head/sys/dev/cxgbe/iw_cxgbe/cm.c
==============================================================================
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ head/sys/dev/cxgbe/iw_cxgbe/cm.c    Thu Oct 17 18:37:25 2013        
(r256694)
@@ -0,0 +1,2458 @@
+/*
+ * Copyright (c) 2009-2013 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *       copyright notice, this list of conditions and the following
+ *       disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *       copyright notice, this list of conditions and the following
+ *       disclaimer in the documentation and/or other materials
+ *       provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_inet.h"
+
+#ifdef TCP_OFFLOAD
+#include <sys/types.h>
+#include <sys/malloc.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/sockio.h>
+#include <sys/taskqueue.h>
+#include <netinet/in.h>
+#include <net/neighbour.h>
+#include <net/route.h>
+
+#include <netinet/in_systm.h>
+#include <netinet/in_pcb.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet/tcp_var.h>
+#include <netinet/tcp.h>
+#include <netinet/tcpip.h>
+
+#include <netinet/toecore.h>
+
+struct sge_iq;
+struct rss_header;
+#include <linux/types.h>
+#include "offload.h"
+#include "tom/t4_tom.h"
+
+#define TOEPCB(so)  ((struct toepcb *)(so_sototcpcb((so))->t_toe))
+
+#include "iw_cxgbe.h"
+#include <linux/module.h>
+#include <linux/workqueue.h>
+#include <linux/notifier.h>
+#include <linux/inetdevice.h>
+#include <linux/if_vlan.h>
+#include <net/netevent.h>
+
+static spinlock_t req_lock;
+static TAILQ_HEAD(c4iw_ep_list, c4iw_ep_common) req_list;
+static struct work_struct c4iw_task;
+static struct workqueue_struct *c4iw_taskq;
+static LIST_HEAD(timeout_list);
+static spinlock_t timeout_lock;
+
+static void process_req(struct work_struct *ctx);
+static void start_ep_timer(struct c4iw_ep *ep);
+static void stop_ep_timer(struct c4iw_ep *ep);
+static int set_tcpinfo(struct c4iw_ep *ep);
+static enum c4iw_ep_state state_read(struct c4iw_ep_common *epc);
+static void __state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state 
tostate);
+static void state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state tostate);
+static void *alloc_ep(int size, gfp_t flags);
+void __free_ep(struct c4iw_ep_common *epc);
+static struct rtentry * find_route(__be32 local_ip, __be32 peer_ip, __be16 
local_port,
+               __be16 peer_port, u8 tos);
+static int close_socket(struct c4iw_ep_common *epc, int close);
+static int shutdown_socket(struct c4iw_ep_common *epc);
+static void abort_socket(struct c4iw_ep *ep);
+static void send_mpa_req(struct c4iw_ep *ep);
+static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen);
+static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen);
+static void close_complete_upcall(struct c4iw_ep *ep);
+static int abort_connection(struct c4iw_ep *ep);
+static void peer_close_upcall(struct c4iw_ep *ep);
+static void peer_abort_upcall(struct c4iw_ep *ep);
+static void connect_reply_upcall(struct c4iw_ep *ep, int status);
+static void connect_request_upcall(struct c4iw_ep *ep);
+static void established_upcall(struct c4iw_ep *ep);
+static void process_mpa_reply(struct c4iw_ep *ep);
+static void process_mpa_request(struct c4iw_ep *ep);
+static void process_peer_close(struct c4iw_ep *ep);
+static void process_conn_error(struct c4iw_ep *ep);
+static void process_close_complete(struct c4iw_ep *ep);
+static void ep_timeout(unsigned long arg);
+static void init_sock(struct c4iw_ep_common *epc);
+static void process_data(struct c4iw_ep *ep);
+static void process_connected(struct c4iw_ep *ep);
+static struct socket * dequeue_socket(struct socket *head, struct sockaddr_in 
**remote, struct c4iw_ep *child_ep);
+static void process_newconn(struct c4iw_ep *parent_ep);
+static int c4iw_so_upcall(struct socket *so, void *arg, int waitflag);
+static void process_socket_event(struct c4iw_ep *ep);
+static void release_ep_resources(struct c4iw_ep *ep);
+
+#define START_EP_TIMER(ep) \
+    do { \
+           CTR3(KTR_IW_CXGBE, "start_ep_timer (%s:%d) ep %p", \
+               __func__, __LINE__, (ep)); \
+           start_ep_timer(ep); \
+    } while (0)
+
+#define STOP_EP_TIMER(ep) \
+    do { \
+           CTR3(KTR_IW_CXGBE, "stop_ep_timer (%s:%d) ep %p", \
+               __func__, __LINE__, (ep)); \
+           stop_ep_timer(ep); \
+    } while (0)
+
+#ifdef KTR
+static char *states[] = {
+       "idle",
+       "listen",
+       "connecting",
+       "mpa_wait_req",
+       "mpa_req_sent",
+       "mpa_req_rcvd",
+       "mpa_rep_sent",
+       "fpdu_mode",
+       "aborting",
+       "closing",
+       "moribund",
+       "dead",
+       NULL,
+};
+#endif
+
+static void
+process_req(struct work_struct *ctx)
+{
+       struct c4iw_ep_common *epc;
+
+       spin_lock(&req_lock);
+       while (!TAILQ_EMPTY(&req_list)) {
+               epc = TAILQ_FIRST(&req_list);
+               TAILQ_REMOVE(&req_list, epc, entry);
+               epc->entry.tqe_prev = NULL;
+               spin_unlock(&req_lock);
+               if (epc->so)
+                       process_socket_event((struct c4iw_ep *)epc);
+               c4iw_put_ep(epc);
+               spin_lock(&req_lock);
+       }
+       spin_unlock(&req_lock);
+}
+
+/*
+ * XXX: doesn't belong here in the iWARP driver.
+ * XXX: assumes that the connection was offloaded by cxgbe/t4_tom if TF_TOE is
+ *      set.  Is this a valid assumption for active open?
+ */
+static int
+set_tcpinfo(struct c4iw_ep *ep)
+{
+       struct socket *so = ep->com.so;
+       struct inpcb *inp = sotoinpcb(so);
+       struct tcpcb *tp;
+       struct toepcb *toep;
+       int rc = 0;
+
+       INP_WLOCK(inp);
+       tp = intotcpcb(inp);
+       if ((tp->t_flags & TF_TOE) == 0) {
+               rc = EINVAL;
+               log(LOG_ERR, "%s: connection not offloaded (so %p, ep %p)\n",
+                   __func__, so, ep);
+               goto done;
+       }
+       toep = TOEPCB(so);
+
+       ep->hwtid = toep->tid;
+       ep->snd_seq = tp->snd_nxt;
+       ep->rcv_seq = tp->rcv_nxt;
+       ep->emss = max(tp->t_maxseg, 128);
+done:
+       INP_WUNLOCK(inp);
+       return (rc);
+
+}
+
+static struct rtentry *
+find_route(__be32 local_ip, __be32 peer_ip, __be16 local_port,
+               __be16 peer_port, u8 tos)
+{
+       struct route iproute;
+       struct sockaddr_in *dst = (struct sockaddr_in *)&iproute.ro_dst;
+
+       CTR5(KTR_IW_CXGBE, "%s:frtB %x, %x, %d, %d", __func__, local_ip,
+           peer_ip, ntohs(local_port), ntohs(peer_port));
+       bzero(&iproute, sizeof iproute);
+       dst->sin_family = AF_INET;
+       dst->sin_len = sizeof *dst;
+       dst->sin_addr.s_addr = peer_ip;
+
+       rtalloc(&iproute);
+       CTR2(KTR_IW_CXGBE, "%s:frtE %p", __func__, (uint64_t)iproute.ro_rt);
+       return iproute.ro_rt;
+}
+
+static int
+close_socket(struct c4iw_ep_common *epc, int close)
+{
+       struct socket *so = epc->so;
+       int rc;
+
+       CTR4(KTR_IW_CXGBE, "%s: so %p, ep %p, state %s", __func__, epc, so,
+           states[epc->state]);
+
+       SOCK_LOCK(so);
+       soupcall_clear(so, SO_RCV);
+       SOCK_UNLOCK(so);
+
+       if (close)
+                rc = soclose(so);
+        else
+                rc = soshutdown(so, SHUT_WR | SHUT_RD);
+       epc->so = NULL;
+
+       return (rc);
+}
+
+static int
+shutdown_socket(struct c4iw_ep_common *epc)
+{
+
+       CTR4(KTR_IW_CXGBE, "%s: so %p, ep %p, state %s", __func__, epc->so, epc,
+           states[epc->state]);
+
+       return (soshutdown(epc->so, SHUT_WR));
+}
+
+static void
+abort_socket(struct c4iw_ep *ep)
+{
+       struct sockopt sopt;
+       int rc;
+       struct linger l;
+
+       CTR4(KTR_IW_CXGBE, "%s ep %p so %p state %s", __func__, ep, ep->com.so,
+           states[ep->com.state]);
+
+       l.l_onoff = 1;
+       l.l_linger = 0;
+
+       /* linger_time of 0 forces RST to be sent */
+       sopt.sopt_dir = SOPT_SET;
+       sopt.sopt_level = SOL_SOCKET;
+       sopt.sopt_name = SO_LINGER;
+       sopt.sopt_val = (caddr_t)&l;
+       sopt.sopt_valsize = sizeof l;
+       sopt.sopt_td = NULL;
+       rc = sosetopt(ep->com.so, &sopt);
+       if (rc) {
+               log(LOG_ERR, "%s: can't set linger to 0, no RST! err %d\n",
+                   __func__, rc);
+       }
+}
+
+static void
+process_peer_close(struct c4iw_ep *ep)
+{
+       struct c4iw_qp_attributes attrs;
+       int disconnect = 1;
+       int release = 0;
+
+       CTR4(KTR_IW_CXGBE, "%s:ppcB ep %p so %p state %s", __func__, ep,
+           ep->com.so, states[ep->com.state]);
+
+       mutex_lock(&ep->com.mutex);
+       switch (ep->com.state) {
+
+               case MPA_REQ_WAIT:
+                       CTR2(KTR_IW_CXGBE, "%s:ppc1 %p MPA_REQ_WAIT CLOSING",
+                           __func__, ep);
+                       __state_set(&ep->com, CLOSING);
+                       break;
+
+               case MPA_REQ_SENT:
+                       CTR2(KTR_IW_CXGBE, "%s:ppc2 %p MPA_REQ_SENT CLOSING",
+                           __func__, ep);
+                       __state_set(&ep->com, DEAD);
+                       connect_reply_upcall(ep, -ECONNABORTED);
+
+                       disconnect = 0;
+                       STOP_EP_TIMER(ep);
+                       close_socket(&ep->com, 0);
+                       ep->com.cm_id->rem_ref(ep->com.cm_id);
+                       ep->com.cm_id = NULL;
+                       ep->com.qp = NULL;
+                       release = 1;
+                       break;
+
+               case MPA_REQ_RCVD:
+
+                       /*
+                        * We're gonna mark this puppy DEAD, but keep
+                        * the reference on it until the ULP accepts or
+                        * rejects the CR.
+                        */
+                       CTR2(KTR_IW_CXGBE, "%s:ppc3 %p MPA_REQ_RCVD CLOSING",
+                           __func__, ep);
+                       __state_set(&ep->com, CLOSING);
+                       c4iw_get_ep(&ep->com);
+                       break;
+
+               case MPA_REP_SENT:
+                       CTR2(KTR_IW_CXGBE, "%s:ppc4 %p MPA_REP_SENT CLOSING",
+                           __func__, ep);
+                       __state_set(&ep->com, CLOSING);
+                       break;
+
+               case FPDU_MODE:
+                       CTR2(KTR_IW_CXGBE, "%s:ppc5 %p FPDU_MODE CLOSING",
+                           __func__, ep);
+                       START_EP_TIMER(ep);
+                       __state_set(&ep->com, CLOSING);
+                       attrs.next_state = C4IW_QP_STATE_CLOSING;
+                       c4iw_modify_qp(ep->com.dev, ep->com.qp,
+                                       C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
+                       peer_close_upcall(ep);
+                       break;
+
+               case ABORTING:
+                       CTR2(KTR_IW_CXGBE, "%s:ppc6 %p ABORTING (disconn)",
+                           __func__, ep);
+                       disconnect = 0;
+                       break;
+
+               case CLOSING:
+                       CTR2(KTR_IW_CXGBE, "%s:ppc7 %p CLOSING MORIBUND",
+                           __func__, ep);
+                       __state_set(&ep->com, MORIBUND);
+                       disconnect = 0;
+                       break;
+
+               case MORIBUND:
+                       CTR2(KTR_IW_CXGBE, "%s:ppc8 %p MORIBUND DEAD", __func__,
+                           ep);
+                       STOP_EP_TIMER(ep);
+                       if (ep->com.cm_id && ep->com.qp) {
+                               attrs.next_state = C4IW_QP_STATE_IDLE;
+                               c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
+                                               C4IW_QP_ATTR_NEXT_STATE, 
&attrs, 1);
+                       }
+                       close_socket(&ep->com, 0);
+                       close_complete_upcall(ep);
+                       __state_set(&ep->com, DEAD);
+                       release = 1;
+                       disconnect = 0;
+                       break;
+
+               case DEAD:
+                       CTR2(KTR_IW_CXGBE, "%s:ppc9 %p DEAD (disconn)",
+                           __func__, ep);
+                       disconnect = 0;
+                       break;
+
+               default:
+                       panic("%s: ep %p state %d", __func__, ep,
+                           ep->com.state);
+                       break;
+       }
+
+       mutex_unlock(&ep->com.mutex);
+
+       if (disconnect) {
+
+               CTR2(KTR_IW_CXGBE, "%s:ppca %p", __func__, ep);
+               c4iw_ep_disconnect(ep, 0, M_NOWAIT);
+       }
+       if (release) {
+
+               CTR2(KTR_IW_CXGBE, "%s:ppcb %p", __func__, ep);
+               c4iw_put_ep(&ep->com);
+       }
+       CTR2(KTR_IW_CXGBE, "%s:ppcE %p", __func__, ep);
+       return;
+}
+
+static void
+process_conn_error(struct c4iw_ep *ep)
+{
+       struct c4iw_qp_attributes attrs;
+       int ret;
+       int state;
+
+       state = state_read(&ep->com);
+       CTR5(KTR_IW_CXGBE, "%s:pceB ep %p so %p so->so_error %u state %s",
+           __func__, ep, ep->com.so, ep->com.so->so_error,
+           states[ep->com.state]);
+
+       switch (state) {
+
+               case MPA_REQ_WAIT:
+                       STOP_EP_TIMER(ep);
+                       break;
+
+               case MPA_REQ_SENT:
+                       STOP_EP_TIMER(ep);
+                       connect_reply_upcall(ep, -ECONNRESET);
+                       break;
+
+               case MPA_REP_SENT:
+                       ep->com.rpl_err = ECONNRESET;
+                       CTR1(KTR_IW_CXGBE, "waking up ep %p", ep);
+                       break;
+
+               case MPA_REQ_RCVD:
+
+                       /*
+                        * We're gonna mark this puppy DEAD, but keep
+                        * the reference on it until the ULP accepts or
+                        * rejects the CR.
+                        */
+                       c4iw_get_ep(&ep->com);
+                       break;
+
+               case MORIBUND:
+               case CLOSING:
+                       STOP_EP_TIMER(ep);
+                       /*FALLTHROUGH*/
+               case FPDU_MODE:
+
+                       if (ep->com.cm_id && ep->com.qp) {
+
+                               attrs.next_state = C4IW_QP_STATE_ERROR;
+                               ret = c4iw_modify_qp(ep->com.qp->rhp,
+                                       ep->com.qp, C4IW_QP_ATTR_NEXT_STATE,
+                                       &attrs, 1);
+                               if (ret)
+                                       log(LOG_ERR,
+                                                       "%s - qp <- error 
failed!\n",
+                                                       __func__);
+                       }
+                       peer_abort_upcall(ep);
+                       break;
+
+               case ABORTING:
+                       break;
+
+               case DEAD:
+                       CTR2(KTR_IW_CXGBE, "%s so_error %d IN DEAD STATE!!!!",
+                           __func__, ep->com.so->so_error);
+                       return;
+
+               default:
+                       panic("%s: ep %p state %d", __func__, ep, state);
+                       break;
+       }
+
+       if (state != ABORTING) {
+
+               CTR2(KTR_IW_CXGBE, "%s:pce1 %p", __func__, ep);
+               close_socket(&ep->com, 0);
+               state_set(&ep->com, DEAD);
+               c4iw_put_ep(&ep->com);
+       }
+       CTR2(KTR_IW_CXGBE, "%s:pceE %p", __func__, ep);
+       return;
+}
+
+static void
+process_close_complete(struct c4iw_ep *ep)
+{
+       struct c4iw_qp_attributes attrs;
+       int release = 0;
+
+       CTR4(KTR_IW_CXGBE, "%s:pccB ep %p so %p state %s", __func__, ep,
+           ep->com.so, states[ep->com.state]);
+
+       /* The cm_id may be null if we failed to connect */
+       mutex_lock(&ep->com.mutex);
+
+       switch (ep->com.state) {
+
+               case CLOSING:
+                       CTR2(KTR_IW_CXGBE, "%s:pcc1 %p CLOSING MORIBUND",
+                           __func__, ep);
+                       __state_set(&ep->com, MORIBUND);
+                       break;
+
+               case MORIBUND:
+                       CTR2(KTR_IW_CXGBE, "%s:pcc1 %p MORIBUND DEAD", __func__,
+                           ep);
+                       STOP_EP_TIMER(ep);
+
+                       if ((ep->com.cm_id) && (ep->com.qp)) {
+
+                               CTR2(KTR_IW_CXGBE, "%s:pcc2 %p QP_STATE_IDLE",
+                                   __func__, ep);
+                               attrs.next_state = C4IW_QP_STATE_IDLE;
+                               c4iw_modify_qp(ep->com.dev,
+                                               ep->com.qp,
+                                               C4IW_QP_ATTR_NEXT_STATE,
+                                               &attrs, 1);
+                       }
+
+                       if (ep->parent_ep) {
+
+                               CTR2(KTR_IW_CXGBE, "%s:pcc3 %p", __func__, ep);
+                               close_socket(&ep->com, 1);
+                       }
+                       else {
+
+                               CTR2(KTR_IW_CXGBE, "%s:pcc4 %p", __func__, ep);
+                               close_socket(&ep->com, 0);
+                       }
+                       close_complete_upcall(ep);
+                       __state_set(&ep->com, DEAD);
+                       release = 1;
+                       break;
+
+               case ABORTING:
+                       CTR2(KTR_IW_CXGBE, "%s:pcc5 %p ABORTING", __func__, ep);
+                       break;
+
+               case DEAD:
+               default:
+                       CTR2(KTR_IW_CXGBE, "%s:pcc6 %p DEAD", __func__, ep);
+                       panic("%s:pcc6 %p DEAD", __func__, ep);
+                       break;
+       }
+       mutex_unlock(&ep->com.mutex);
+
+       if (release) {
+
+               CTR2(KTR_IW_CXGBE, "%s:pcc7 %p", __func__, ep);
+               c4iw_put_ep(&ep->com);
+       }
+       CTR2(KTR_IW_CXGBE, "%s:pccE %p", __func__, ep);
+       return;
+}
+
+static void
+init_sock(struct c4iw_ep_common *epc)
+{
+       int rc;
+       struct sockopt sopt;
+       struct socket *so = epc->so;
+       int on = 1;
+
+       SOCK_LOCK(so);
+       soupcall_set(so, SO_RCV, c4iw_so_upcall, epc);
+       so->so_state |= SS_NBIO;
+       SOCK_UNLOCK(so);
+       sopt.sopt_dir = SOPT_SET;
+       sopt.sopt_level = IPPROTO_TCP;
+       sopt.sopt_name = TCP_NODELAY;
+       sopt.sopt_val = (caddr_t)&on;
+       sopt.sopt_valsize = sizeof on;
+       sopt.sopt_td = NULL;
+       rc = sosetopt(so, &sopt);
+       if (rc) {
+               log(LOG_ERR, "%s: can't set TCP_NODELAY on so %p (%d)\n",
+                   __func__, so, rc);
+       }
+}
+
+static void
+process_data(struct c4iw_ep *ep)
+{
+       struct sockaddr_in *local, *remote;
+
+       CTR5(KTR_IW_CXGBE, "%s: so %p, ep %p, state %s, sb_cc %d", __func__,
+           ep->com.so, ep, states[ep->com.state], ep->com.so->so_rcv.sb_cc);
+
+       switch (state_read(&ep->com)) {
+       case MPA_REQ_SENT:
+               process_mpa_reply(ep);
+               break;
+       case MPA_REQ_WAIT:
+               in_getsockaddr(ep->com.so, (struct sockaddr **)&local);
+               in_getpeeraddr(ep->com.so, (struct sockaddr **)&remote);
+               ep->com.local_addr = *local;
+               ep->com.remote_addr = *remote;
+               free(local, M_SONAME);
+               free(remote, M_SONAME);
+               process_mpa_request(ep);
+               break;
+       default:
+               if (ep->com.so->so_rcv.sb_cc)
+                       log(LOG_ERR, "%s: Unexpected streaming data.  "
+                           "ep %p, state %d, so %p, so_state 0x%x, sb_cc %u\n",
+                           __func__, ep, state_read(&ep->com), ep->com.so,
+                           ep->com.so->so_state, ep->com.so->so_rcv.sb_cc);
+               break;
+       }
+}
+
+static void
+process_connected(struct c4iw_ep *ep)
+{
+
+       if ((ep->com.so->so_state & SS_ISCONNECTED) && !ep->com.so->so_error)
+               send_mpa_req(ep);
+       else {
+               connect_reply_upcall(ep, -ep->com.so->so_error);
+               close_socket(&ep->com, 0);
+               state_set(&ep->com, DEAD);
+               c4iw_put_ep(&ep->com);
+       }
+}
+
+static struct socket *
+dequeue_socket(struct socket *head, struct sockaddr_in **remote,
+    struct c4iw_ep *child_ep)
+{
+       struct socket *so;
+
+       ACCEPT_LOCK();
+       so = TAILQ_FIRST(&head->so_comp);
+       if (!so) {
+               ACCEPT_UNLOCK();
+               return (NULL);
+       }
+       TAILQ_REMOVE(&head->so_comp, so, so_list);
+       head->so_qlen--;
+       SOCK_LOCK(so);
+       so->so_qstate &= ~SQ_COMP;
+       so->so_head = NULL;
+       soref(so);
+       soupcall_set(so, SO_RCV, c4iw_so_upcall, child_ep);
+       so->so_state |= SS_NBIO;
+       SOCK_UNLOCK(so);
+       ACCEPT_UNLOCK();
+       soaccept(so, (struct sockaddr **)remote);
+
+       return (so);
+}
+
+static void
+process_newconn(struct c4iw_ep *parent_ep)
+{
+       struct socket *child_so;
+       struct c4iw_ep *child_ep;
+       struct sockaddr_in *remote;
+
+       child_ep = alloc_ep(sizeof(*child_ep), M_NOWAIT);
+       if (!child_ep) {
+               CTR3(KTR_IW_CXGBE, "%s: parent so %p, parent ep %p, ENOMEM",
+                   __func__, parent_ep->com.so, parent_ep);
+               log(LOG_ERR, "%s: failed to allocate ep entry\n", __func__);
+               return;
+       }
+
+       child_so = dequeue_socket(parent_ep->com.so, &remote, child_ep);
+       if (!child_so) {
+               CTR4(KTR_IW_CXGBE,
+                   "%s: parent so %p, parent ep %p, child ep %p, dequeue err",
+                   __func__, parent_ep->com.so, parent_ep, child_ep);
+               log(LOG_ERR, "%s: failed to dequeue child socket\n", __func__);
+               __free_ep(&child_ep->com);
+               return;
+
+       }
+
+       CTR5(KTR_IW_CXGBE,
+           "%s: parent so %p, parent ep %p, child so %p, child ep %p",
+            __func__, parent_ep->com.so, parent_ep, child_so, child_ep);
+
+       child_ep->com.local_addr = parent_ep->com.local_addr;
+       child_ep->com.remote_addr = *remote;
+       child_ep->com.dev = parent_ep->com.dev;
+       child_ep->com.so = child_so;
+       child_ep->com.cm_id = NULL;
+       child_ep->com.thread = parent_ep->com.thread;
+       child_ep->parent_ep = parent_ep;
+
+       free(remote, M_SONAME);
+       c4iw_get_ep(&parent_ep->com);
+       child_ep->parent_ep = parent_ep;
+       init_timer(&child_ep->timer);
+       state_set(&child_ep->com, MPA_REQ_WAIT);
+       START_EP_TIMER(child_ep);
+
+       /* maybe the request has already been queued up on the socket... */
+       process_mpa_request(child_ep);
+}
+
+static int
+c4iw_so_upcall(struct socket *so, void *arg, int waitflag)
+{
+       struct c4iw_ep *ep = arg;
+
+       spin_lock(&req_lock);
+
+       CTR6(KTR_IW_CXGBE,
+           "%s: so %p, so_state 0x%x, ep %p, ep_state %s, tqe_prev %p",
+           __func__, so, so->so_state, ep, states[ep->com.state],
+           ep->com.entry.tqe_prev);
+
+       if (ep && ep->com.so && !ep->com.entry.tqe_prev) {
+               KASSERT(ep->com.so == so, ("%s: XXX review.", __func__));
+               c4iw_get_ep(&ep->com);
+               TAILQ_INSERT_TAIL(&req_list, &ep->com, entry);
+               queue_work(c4iw_taskq, &c4iw_task);
+       }
+
+       spin_unlock(&req_lock);
+       return (SU_OK);
+}
+
+static void
+process_socket_event(struct c4iw_ep *ep)
+{
+       int state = state_read(&ep->com);
+       struct socket *so = ep->com.so;
+
+       CTR6(KTR_IW_CXGBE, "process_socket_event: so %p, so_state 0x%x, "
+           "so_err %d, sb_state 0x%x, ep %p, ep_state %s", so, so->so_state,
+           so->so_error, so->so_rcv.sb_state, ep, states[state]);
+
+       if (state == CONNECTING) {
+               process_connected(ep);
+               return;
+       }
+
+       if (state == LISTEN) {
+               process_newconn(ep);
+               return;
+       }
+
+       /* connection error */
+       if (so->so_error) {
+               process_conn_error(ep);
+               return;
+       }
+
+       /* peer close */
+       if ((so->so_rcv.sb_state & SBS_CANTRCVMORE) && state < CLOSING) {
+               process_peer_close(ep);
+               return;
+       }
+
+       /* close complete */
+       if (so->so_state & SS_ISDISCONNECTED) {
+               process_close_complete(ep);
+               return;
+       }
+
+       /* rx data */
+       process_data(ep);
+}
+
+SYSCTL_NODE(_hw, OID_AUTO, iw_cxgbe, CTLFLAG_RD, 0, "iw_cxgbe driver 
parameters");
+
+int db_delay_usecs = 1;
+TUNABLE_INT("hw.iw_cxgbe.db_delay_usecs", &db_delay_usecs);
+SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, db_delay_usecs, CTLFLAG_RW, 
&db_delay_usecs, 0,
+               "Usecs to delay awaiting db fifo to drain");
+
+static int dack_mode = 1;
+TUNABLE_INT("hw.iw_cxgbe.dack_mode", &dack_mode);
+SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, dack_mode, CTLFLAG_RW, &dack_mode, 0,
+               "Delayed ack mode (default = 1)");
+
+int c4iw_max_read_depth = 8;
+TUNABLE_INT("hw.iw_cxgbe.c4iw_max_read_depth", &c4iw_max_read_depth);
+SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, c4iw_max_read_depth, CTLFLAG_RW, 
&c4iw_max_read_depth, 0,
+               "Per-connection max ORD/IRD (default = 8)");
+
+static int enable_tcp_timestamps;
+TUNABLE_INT("hw.iw_cxgbe.enable_tcp_timestamps", &enable_tcp_timestamps);
+SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, enable_tcp_timestamps, CTLFLAG_RW, 
&enable_tcp_timestamps, 0,
+               "Enable tcp timestamps (default = 0)");
+
+static int enable_tcp_sack;
+TUNABLE_INT("hw.iw_cxgbe.enable_tcp_sack", &enable_tcp_sack);
+SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, enable_tcp_sack, CTLFLAG_RW, 
&enable_tcp_sack, 0,
+               "Enable tcp SACK (default = 0)");
+
+static int enable_tcp_window_scaling = 1;
+TUNABLE_INT("hw.iw_cxgbe.enable_tcp_window_scaling", 
&enable_tcp_window_scaling);
+SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, enable_tcp_window_scaling, CTLFLAG_RW, 
&enable_tcp_window_scaling, 0,
+               "Enable tcp window scaling (default = 1)");
+
+int c4iw_debug = 1;
+TUNABLE_INT("hw.iw_cxgbe.c4iw_debug", &c4iw_debug);
+SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, c4iw_debug, CTLFLAG_RW, &c4iw_debug, 0,
+               "Enable debug logging (default = 0)");
+
+static int peer2peer;
+TUNABLE_INT("hw.iw_cxgbe.peer2peer", &peer2peer);
+SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, peer2peer, CTLFLAG_RW, &peer2peer, 0,
+               "Support peer2peer ULPs (default = 0)");
+
+static int p2p_type = FW_RI_INIT_P2PTYPE_READ_REQ;
+TUNABLE_INT("hw.iw_cxgbe.p2p_type", &p2p_type);
+SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, p2p_type, CTLFLAG_RW, &p2p_type, 0,
+               "RDMAP opcode to use for the RTR message: 1 = RDMA_READ 0 = 
RDMA_WRITE (default 1)");
+
+static int ep_timeout_secs = 60;
+TUNABLE_INT("hw.iw_cxgbe.ep_timeout_secs", &ep_timeout_secs);
+SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, ep_timeout_secs, CTLFLAG_RW, 
&ep_timeout_secs, 0,
+               "CM Endpoint operation timeout in seconds (default = 60)");
+
+static int mpa_rev = 1;
+TUNABLE_INT("hw.iw_cxgbe.mpa_rev", &mpa_rev);
+#ifdef IW_CM_MPAV2
+SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, mpa_rev, CTLFLAG_RW, &mpa_rev, 0,
+               "MPA Revision, 0 supports amso1100, 1 is RFC0544 spec 
compliant, 2 is IETF MPA Peer Connect Draft compliant (default = 1)");
+#else
+SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, mpa_rev, CTLFLAG_RW, &mpa_rev, 0,
+               "MPA Revision, 0 supports amso1100, 1 is RFC0544 spec compliant 
(default = 1)");
+#endif
+
+static int markers_enabled;
+TUNABLE_INT("hw.iw_cxgbe.markers_enabled", &markers_enabled);
+SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, markers_enabled, CTLFLAG_RW, 
&markers_enabled, 0,
+               "Enable MPA MARKERS (default(0) = disabled)");
+
+static int crc_enabled = 1;
+TUNABLE_INT("hw.iw_cxgbe.crc_enabled", &crc_enabled);
+SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, crc_enabled, CTLFLAG_RW, &crc_enabled, 0,
+               "Enable MPA CRC (default(1) = enabled)");
+
+static int rcv_win = 256 * 1024;
+TUNABLE_INT("hw.iw_cxgbe.rcv_win", &rcv_win);
+SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, rcv_win, CTLFLAG_RW, &rcv_win, 0,
+               "TCP receive window in bytes (default = 256KB)");
+
+static int snd_win = 128 * 1024;
+TUNABLE_INT("hw.iw_cxgbe.snd_win", &snd_win);
+SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, snd_win, CTLFLAG_RW, &snd_win, 0,
+               "TCP send window in bytes (default = 128KB)");
+
+int db_fc_threshold = 2000;
+TUNABLE_INT("hw.iw_cxgbe.db_fc_threshold", &db_fc_threshold);
+SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, db_fc_threshold, CTLFLAG_RW, 
&db_fc_threshold, 0,
+               "QP count/threshold that triggers automatic");
+
+static void
+start_ep_timer(struct c4iw_ep *ep)
+{
+
+       if (timer_pending(&ep->timer)) {
+               CTR2(KTR_IW_CXGBE, "%s: ep %p, already started", __func__, ep);
+               printk(KERN_ERR "%s timer already started! ep %p\n", __func__,
+                   ep);
+               return;
+       }
+       clear_bit(TIMEOUT, &ep->com.flags);
+       c4iw_get_ep(&ep->com);
+       ep->timer.expires = jiffies + ep_timeout_secs * HZ;
+       ep->timer.data = (unsigned long)ep;
+       ep->timer.function = ep_timeout;
+       add_timer(&ep->timer);
+}
+
+static void
+stop_ep_timer(struct c4iw_ep *ep)
+{
+
+       del_timer_sync(&ep->timer);
+       if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) {
+               c4iw_put_ep(&ep->com);
+       }
+}
+
+static enum
+c4iw_ep_state state_read(struct c4iw_ep_common *epc)
+{
+       enum c4iw_ep_state state;
+
+       mutex_lock(&epc->mutex);
+       state = epc->state;
+       mutex_unlock(&epc->mutex);
+
+       return (state);
+}
+
+static void
+__state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new)
+{
+
+       epc->state = new;
+}
+
+static void
+state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new)
+{
+
+       mutex_lock(&epc->mutex);
+       __state_set(epc, new);
+       mutex_unlock(&epc->mutex);
+}
+
+static void *
+alloc_ep(int size, gfp_t gfp)
+{
+       struct c4iw_ep_common *epc;
+
+       epc = kzalloc(size, gfp);
+       if (epc == NULL)
+               return (NULL);
+
+       kref_init(&epc->kref);
+       mutex_init(&epc->mutex);
+       c4iw_init_wr_wait(&epc->wr_wait);
+
+       return (epc);
+}
+
+void
+__free_ep(struct c4iw_ep_common *epc)
+{
+       CTR2(KTR_IW_CXGBE, "%s:feB %p", __func__, epc);
+       KASSERT(!epc->so, ("%s warning ep->so %p \n", __func__, epc->so));
+       KASSERT(!epc->entry.tqe_prev, ("%s epc %p still on req list!\n", 
__func__, epc));
+       free(epc, M_DEVBUF);
+       CTR2(KTR_IW_CXGBE, "%s:feE %p", __func__, epc);
+}
+
+void _c4iw_free_ep(struct kref *kref)
+{
+       struct c4iw_ep *ep;
+       struct c4iw_ep_common *epc;
+
+       ep = container_of(kref, struct c4iw_ep, com.kref);
+       epc = &ep->com;
+       KASSERT(!epc->so, ("%s ep->so %p", __func__, epc->so));
+       KASSERT(!epc->entry.tqe_prev, ("%s epc %p still on req list",
+           __func__, epc));
+       kfree(ep);
+}
+
+static void release_ep_resources(struct c4iw_ep *ep)
+{
+       CTR2(KTR_IW_CXGBE, "%s:rerB %p", __func__, ep);
+       set_bit(RELEASE_RESOURCES, &ep->com.flags);
+       c4iw_put_ep(&ep->com);
+       CTR2(KTR_IW_CXGBE, "%s:rerE %p", __func__, ep);
+}
+
+static void
+send_mpa_req(struct c4iw_ep *ep)
+{
+       int mpalen;
+       struct mpa_message *mpa;
+       struct mpa_v2_conn_params mpa_v2_params;
+       struct mbuf *m;
+       char mpa_rev_to_use = mpa_rev;
+       int err;
+
+       if (ep->retry_with_mpa_v1)
+               mpa_rev_to_use = 1;
+       mpalen = sizeof(*mpa) + ep->plen;
+       if (mpa_rev_to_use == 2)
+               mpalen += sizeof(struct mpa_v2_conn_params);
+
+       if (mpalen > MHLEN)
+               CXGBE_UNIMPLEMENTED(__func__);
+
+       m = m_gethdr(M_NOWAIT, MT_DATA);
+       if (m == NULL) {
+               connect_reply_upcall(ep, -ENOMEM);
+               return;
+       }
+
+       mpa = mtod(m, struct mpa_message *);
+       m->m_len = mpalen;
+       m->m_pkthdr.len = mpalen;
+       memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key));
+       mpa->flags = (crc_enabled ? MPA_CRC : 0) |
+               (markers_enabled ? MPA_MARKERS : 0) |
+               (mpa_rev_to_use == 2 ? MPA_ENHANCED_RDMA_CONN : 0);
+       mpa->private_data_size = htons(ep->plen);
+       mpa->revision = mpa_rev_to_use;
+
+       if (mpa_rev_to_use == 1) {
+               ep->tried_with_mpa_v1 = 1;
+               ep->retry_with_mpa_v1 = 0;
+       }

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
_______________________________________________
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to