Hi Nithin,
Sorry, I come late on this one, change looks good.

We need to check if the IRP output buffer is big enough to fit the transaction 
message before copying it. Otherwise, we need to return error and break the 
NetLink connection.
Thanks,
Eitan


-----Original Message-----
From: dev [mailto:dev-boun...@openvswitch.org] On Behalf Of Nithin Raju
Sent: Tuesday, April 28, 2015 2:36 PM
To: dev@openvswitch.org
Subject: [ovs-dev] [PATCH v3] ovs-hyperv: make kernel return values netlink 
socket like

In this patch, we make changes to usersapce as well as kernel datapath on 
hyperv to make it more netlink socket like. Previously, the kernel datapath did 
not distinguish between "transport errors" and other errors. Netlink semantics 
dictate that netlink functions should only return an error only in the case of 
a "transport error"
which is generally something fatal. Eg. failure to communicate with the OVS 
module, or an invalid command altogether. Other errors such as an unsupported 
action, or an invalid flow key is not considered a "transport error", and in 
such cases, netlink functions are to return success with a 'struct nlmsgerr' 
populated in the output buffer.

This patch implements these semantics.

Signed-off-by: Nithin Raju <nit...@vmware.com>
Acked-by: Sorin Vinturis <svintu...@cloudbasesolutions.com>
Reported-at: 
https://urldefense.proofpoint.com/v2/url?u=https-3A__github.com_openvswitch_ovs-2Dissues_issues_72&d=AwIGaQ&c=Sqcl0Ez6M0X8aeM67LKIiDJAXVeAw-YihVMNtXt-uEs&r=CWsgHUxi6ExLXY798tmo3LJ4e3geGYp56lkcH-5cLCY&m=gnFVocxivsB-5iJX4WfGUf-eI54hnHjhLPFrIhaeSqU&s=4gTRd-4F3b_yYXbvXPMWy1sqnpt68ZC-3iT_GkKoVqc&e=
---
v2: addressed Sorin's comments
v3: rebase to HEAD

 datapath-windows/ovsext/Datapath.c |  61 +++++++++++++++++++---
 datapath-windows/ovsext/Datapath.h |   2 -
 lib/netlink-socket.c               | 102 ++++++++++++++++++++++---------------
 3 files changed, 114 insertions(+), 51 deletions(-)

diff --git a/datapath-windows/ovsext/Datapath.c 
b/datapath-windows/ovsext/Datapath.c
index 1dead33..7646f0a 100644
--- a/datapath-windows/ovsext/Datapath.c
+++ b/datapath-windows/ovsext/Datapath.c
@@ -78,10 +78,11 @@ typedef struct _NETLINK_CMD {
 /* A netlink family is a group of commands. */  typedef struct _NETLINK_FAMILY 
{
     CHAR *name;
-    UINT32 id;
+    UINT16 id;
     UINT8 version;
-    UINT8 pad;
+    UINT8 pad1;
     UINT16 maxAttr;
+    UINT16 pad2;
     NETLINK_CMD *cmds;          /* Array of netlink commands and handlers. */
     UINT16 opsCount;
 } NETLINK_FAMILY, *PNETLINK_FAMILY;
@@ -143,12 +144,12 @@ NETLINK_CMD nlControlFamilyCmdOps[] = {
     },
     { .cmd = OVS_CTRL_CMD_EVENT_NOTIFY,
       .handler = OvsReadEventCmdHandler,
-      .supportedDevOp = OVS_READ_EVENT_DEV_OP,
+      .supportedDevOp = OVS_READ_DEV_OP,
       .validateDpIndex = FALSE,
     },
     { .cmd = OVS_CTRL_CMD_READ_NOTIFY,
       .handler = OvsReadPacketCmdHandler,
-      .supportedDevOp = OVS_READ_PACKET_DEV_OP,
+      .supportedDevOp = OVS_READ_DEV_OP,
       .validateDpIndex = FALSE,
     }
 };
@@ -799,12 +800,17 @@ OvsDeviceControl(PDEVICE_OBJECT deviceObject,
         inputBufferLen = 0;
 
         ovsMsg = &ovsMsgReadOp;
-        ovsMsg->nlMsg.nlmsgType = OVS_WIN_NL_CTRL_FAMILY_ID;
+        RtlZeroMemory(ovsMsg, sizeof *ovsMsg);
+        ovsMsg->nlMsg.nlmsgLen = sizeof *ovsMsg;
+        ovsMsg->nlMsg.nlmsgType = nlControlFamilyOps.id;
         ovsMsg->nlMsg.nlmsgPid = instance->pid;
+
         /* An "artificial" command so we can use NL family function table*/
         ovsMsg->genlMsg.cmd = (code == OVS_IOCTL_READ_EVENT) ?
                               OVS_CTRL_CMD_EVENT_NOTIFY :
                               OVS_CTRL_CMD_READ_NOTIFY;
+        ovsMsg->genlMsg.version = nlControlFamilyOps.version;
+
         devOp = OVS_READ_DEV_OP;
         break;
 
@@ -895,8 +901,8 @@ OvsDeviceControl(PDEVICE_OBJECT deviceObject,
     }
 
     /*
-     * For read operation, the netlink command has already been validated
-     * previously.
+     * For read operation, avoid duplicate validation since 'ovsMsg' is either
+     * "artificial" or was copied from a previously validated 'ovsMsg'.
      */
     if (devOp != OVS_READ_DEV_OP) {
         status = ValidateNetlinkCmd(devOp, instance, ovsMsg, nlFamilyOps); @@ 
-982,7 +988,9 @@ done:
 
 /*
  * --------------------------------------------------------------------------
- * Function to invoke the netlink command handler.
+ * Function to invoke the netlink command handler. The function also 
+ stores
+ * the return value of the handler function to construct a 'NL_ERROR' 
+ message,
+ * and in turn returns success to the caller.
  * --------------------------------------------------------------------------
  */
 static NTSTATUS
@@ -1004,6 +1012,43 @@ InvokeNetlinkCmdHandler(POVS_USER_PARAMS_CONTEXT 
usrParamsCtx,
         }
     }
 
+    /*
+     * Netlink socket semantics dictate that the return value of the netlink
+     * function should be an error ONLY under fatal conditions. If the message
+     * made it all the way to the handler function, it is not a fatal 
condition.
+     * Absorb the error returned by the handler function into a 'struct
+     * NL_ERROR' and populate the 'output buffer' to return to userspace.
+     *
+     * This behavior is obviously applicable only to netlink commands that
+     * specify an 'output buffer'. For other commands, we return the error as
+     * is.
+     *
+     * 'STATUS_PENDING' is a special return value and userspace is equipped to
+     * handle it.
+     */
+    if (status != STATUS_SUCCESS && status != STATUS_PENDING) {
+        if (usrParamsCtx->devOp != OVS_WRITE_DEV_OP && *replyLen == 0) {
+            NL_ERROR nlError = NlMapStatusToNlErr(status);
+            POVS_MESSAGE msgIn = (POVS_MESSAGE)usrParamsCtx->inputBuffer;
+            POVS_MESSAGE_ERROR msgError = (POVS_MESSAGE_ERROR)
+                usrParamsCtx->outputBuffer;

+
+            ASSERT(msgError);
+            NlBuildErrorMsg(msgIn, msgError, nlError);
+            *replyLen = msgError->nlMsg.nlmsgLen;
+        }
+
+        if (*replyLen != 0) {
+            status = STATUS_SUCCESS;
+        }
+    }
+
+#ifdef DBG
+    if (usrParamsCtx->devOp != OVS_WRITE_DEV_OP) {
+        ASSERT(status == STATUS_PENDING || *replyLen != 0 || status == 
STATUS_SUCCESS);
+    }
+#endif
+
     return status;
 }
 
diff --git a/datapath-windows/ovsext/Datapath.h 
b/datapath-windows/ovsext/Datapath.h
index 863afc4..dbc9dea 100644
--- a/datapath-windows/ovsext/Datapath.h
+++ b/datapath-windows/ovsext/Datapath.h
@@ -32,8 +32,6 @@
 #define OVS_READ_DEV_OP          (1 << 0)
 #define OVS_WRITE_DEV_OP         (1 << 1)
 #define OVS_TRANSACTION_DEV_OP   (1 << 2)
-#define OVS_READ_EVENT_DEV_OP    (1 << 3)
-#define OVS_READ_PACKET_DEV_OP   (1 << 4)
 
 typedef struct _OVS_DEVICE_EXTENSION {
     INT numberOpenInstance;
diff --git a/lib/netlink-socket.c b/lib/netlink-socket.c index a809b1e..42eb232 
100644
--- a/lib/netlink-socket.c
+++ b/lib/netlink-socket.c
@@ -475,6 +475,8 @@ nl_sock_send__(struct nl_sock *sock, const struct ofpbuf 
*msg,
             retval = -1;
             /* XXX: Map to a more appropriate error based on GetLastError(). */
             errno = EINVAL;
+            VLOG_DBG_RL(&rl, "fatal driver failure in write: %s",
+                ovs_lasterror_to_string());
         } else {
             retval = msg->size;
         }
@@ -564,7 +566,10 @@ nl_sock_recv__(struct nl_sock *sock, struct ofpbuf *buf, 
bool wait)
         DWORD bytes;
         if (!DeviceIoControl(sock->handle, sock->read_ioctl,
                              NULL, 0, tail, sizeof tail, &bytes, NULL)) {
+            VLOG_DBG_RL(&rl, "fatal driver failure in transact: %s",
+                ovs_lasterror_to_string());
             retval = -1;
+            /* XXX: Map to a more appropriate error. */
             errno = EINVAL;
         } else {
             retval = bytes;
@@ -789,14 +794,27 @@ nl_sock_transact_multiple__(struct nl_sock *sock,
     uint8_t reply_buf[65536];
     for (i = 0; i < n; i++) {
         DWORD reply_len;
+        bool ret;
         struct nl_transaction *txn = transactions[i];
         struct nlmsghdr *request_nlmsg, *reply_nlmsg;
 
-        if (!DeviceIoControl(sock->handle, OVS_IOCTL_TRANSACT,
-                             txn->request->data,
-                             txn->request->size,
-                             reply_buf, sizeof reply_buf,
-                             &reply_len, NULL)) {
+        ret = DeviceIoControl(sock->handle, OVS_IOCTL_TRANSACT,
+                              txn->request->data,
+                              txn->request->size,
+                              reply_buf, sizeof reply_buf,
+                              &reply_len, NULL);
+
+        if (ret && reply_len == 0) {
+            /*
+             * The current transaction did not produce any data to read and 
that
+             * is not an error as such. Continue with the remainder of the
+             * transactions.
+             */
+            txn->error = 0;
+            if (txn->reply) {
+                ofpbuf_clear(txn->reply);
+            }
+        } else if (!ret) {
             /* XXX: Map to a more appropriate error. */
             error = EINVAL;
             VLOG_DBG_RL(&rl, "fatal driver failure: %s", @@ -804,48 +822,50 @@ 
nl_sock_transact_multiple__(struct nl_sock *sock,
             break;
         }
 
-        if (reply_len < sizeof *reply_nlmsg) {
-            nl_sock_record_errors__(transactions, n, 0);
-            VLOG_DBG_RL(&rl, "insufficient length of reply %#"PRIu32
-                " for seq: %#"PRIx32, reply_len, request_nlmsg->nlmsg_seq);
-            break;
-        }
-
-        /* Validate the sequence number in the reply. */
-        request_nlmsg = nl_msg_nlmsghdr(txn->request);
-        reply_nlmsg = (struct nlmsghdr *)reply_buf;
+        if (reply_len != 0) {
+            if (reply_len < sizeof *reply_nlmsg) {
+                nl_sock_record_errors__(transactions, n, 0);
+                VLOG_DBG_RL(&rl, "insufficient length of reply %#"PRIu32
+                    " for seq: %#"PRIx32, reply_len, request_nlmsg->nlmsg_seq);
+                break;
+            }
 
-        if (request_nlmsg->nlmsg_seq != reply_nlmsg->nlmsg_seq) {
-            ovs_assert(request_nlmsg->nlmsg_seq == reply_nlmsg->nlmsg_seq);
-            VLOG_DBG_RL(&rl, "mismatched seq request %#"PRIx32
-                ", reply %#"PRIx32, request_nlmsg->nlmsg_seq,
-                reply_nlmsg->nlmsg_seq);
-            break;
-        }
+            /* Validate the sequence number in the reply. */
+            request_nlmsg = nl_msg_nlmsghdr(txn->request);
+            reply_nlmsg = (struct nlmsghdr *)reply_buf;
 
-        /* Handle errors embedded within the netlink message. */
-        ofpbuf_use_stub(&tmp_reply, reply_buf, sizeof reply_buf);
-        tmp_reply.size = sizeof reply_buf;
-        if (nl_msg_nlmsgerr(&tmp_reply, &txn->error)) {
-            if (txn->reply) {
-                ofpbuf_clear(txn->reply);
-            }
-            if (txn->error) {
-                VLOG_DBG_RL(&rl, "received NAK error=%d (%s)",
-                            error, ovs_strerror(txn->error));
+            if (request_nlmsg->nlmsg_seq != reply_nlmsg->nlmsg_seq) {
+                ovs_assert(request_nlmsg->nlmsg_seq == reply_nlmsg->nlmsg_seq);
+                VLOG_DBG_RL(&rl, "mismatched seq request %#"PRIx32
+                    ", reply %#"PRIx32, request_nlmsg->nlmsg_seq,
+                    reply_nlmsg->nlmsg_seq);
+                break;
             }
-        } else {
-            txn->error = 0;
-            if (txn->reply) {
-                /* Copy the reply to the buffer specified by the caller. */
-                if (reply_len > txn->reply->allocated) {
-                    ofpbuf_reinit(txn->reply, reply_len);
+
+            /* Handle errors embedded within the netlink message. */
+            ofpbuf_use_stub(&tmp_reply, reply_buf, sizeof reply_buf);
+            tmp_reply.size = sizeof reply_buf;
+            if (nl_msg_nlmsgerr(&tmp_reply, &txn->error)) {
+                if (txn->reply) {
+                    ofpbuf_clear(txn->reply);
+                }
+                if (txn->error) {
+                    VLOG_DBG_RL(&rl, "received NAK error=%d (%s)",
+                                error, ovs_strerror(txn->error));
+                }
+            } else {
+                txn->error = 0;
+                if (txn->reply) {
+                    /* Copy the reply to the buffer specified by the caller. */
+                    if (reply_len > txn->reply->allocated) {
+                        ofpbuf_reinit(txn->reply, reply_len);
+                    }
+                    memcpy(txn->reply->data, reply_buf, reply_len);
+                    txn->reply->size = reply_len;
                 }
-                memcpy(txn->reply->data, reply_buf, reply_len);
-                txn->reply->size = reply_len;
             }
+            ofpbuf_uninit(&tmp_reply);
         }
-        ofpbuf_uninit(&tmp_reply);
 
         /* Count the number of successful transactions. */
         (*done)++;
--
1.9.4.msysgit.2

_______________________________________________
dev mailing list
dev@openvswitch.org
https://urldefense.proofpoint.com/v2/url?u=http-3A__openvswitch.org_mailman_listinfo_dev&d=AwIGaQ&c=Sqcl0Ez6M0X8aeM67LKIiDJAXVeAw-YihVMNtXt-uEs&r=CWsgHUxi6ExLXY798tmo3LJ4e3geGYp56lkcH-5cLCY&m=gnFVocxivsB-5iJX4WfGUf-eI54hnHjhLPFrIhaeSqU&s=zUpIggx2DJO4JcDcU7VNs0TnztMPDhIx12yPLPe3we0&e=
 
_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Reply via email to