]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
RDS: establish connection for legitimate remote RDMA message
authorSantosh Shilimkar <santosh.shilimkar@oracle.com>
Thu, 29 Oct 2015 16:24:46 +0000 (09:24 -0700)
committerSantosh Shilimkar <santosh.shilimkar@oracle.com>
Fri, 13 Nov 2015 17:12:00 +0000 (09:12 -0800)
The first message to a remote node should prompt a new
connection even if it is RDMA operation via CMSG. So that
means before CMSG parsing, the connection needs to be
established. Commit 3d6e0fed8edc ("rds_rdma: rds_sendmsg
should return EAGAIN if connection not setup")' tried to
address that issue as part of bug 20232581.

But it inadvertently broke the QoS policy evaluation. Basically
QoS has opposite requirement where it needs information from
CMSG to evaluate if the message is legitimate to be sent over
the wire. It basically needs to know how the total payload
which should include the actual payload and additional rdma
bytes. It then evaluates total payload with the systems QoS
thresholds to determine if the message is legitimate to be
sent.

Patch addresses these two opposite requirement by fetching
only the rdma bytes information for QoS evaluation and let
the full CMSG parsing happen after the connection is
initiated.  Since the connection establishment is asynchronous,
we make sure the map failure because of unavailable
connection reach to the user by appropriate error code.

Orabug: 22139696

Reviewed-by: Ajaykumar Hotchandani <ajaykumar.hotchandani@oracle.com>
Signed-off-by: Wengang Wang <wen.gang.wang@oracle.com>
Signed-off-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
net/rds/send.c

index 163bee4d64c92ab5938805393b90d03d9f39352e..622749b026d940dfec9ee718b1b542602f05881d 100644 (file)
@@ -1133,6 +1133,11 @@ static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm,
                        ret = rds_cmsg_rdma_map(rs, rm, cmsg);
                        if (!ret)
                                *allocated_mr = 1;
+                       else if (ret == -ENODEV)
+                               /* Accomodate the get_mr() case which can fail
+                                * if connection isn't established yet.
+                                */
+                               ret = -EAGAIN;
                        break;
                case RDS_CMSG_ATOMIC_CSWP:
                case RDS_CMSG_ATOMIC_FADD:
@@ -1154,6 +1159,22 @@ static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm,
        return ret;
 }
 
+static inline unsigned int rds_rdma_bytes(struct msghdr *msg)
+{
+       struct rds_rdma_args *args;
+       struct cmsghdr *cmsg;
+       unsigned int rdma_bytes = 0;
+
+       for_each_cmsghdr(cmsg, msg) {
+               if (cmsg->cmsg_type == RDS_CMSG_RDMA_ARGS) {
+                       args = CMSG_DATA(cmsg);
+                       rdma_bytes += args->remote_vec.bytes;
+               }
+       }
+
+       return rdma_bytes;
+}
+
 int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
 {
        struct sock *sk = sock->sk;
@@ -1228,13 +1249,11 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
 
        rm->m_daddr = daddr;
 
-       /* Parse any control messages the user may have included. */
-       ret = rds_cmsg_send(rs, rm, msg, &allocated_mr);
-       if (ret)
-               goto out;
-
+       /* For RDMA operation(s), add up rmda bytes to payload to make
+        * sure its within system QoS threshold limits.
+        */
        if (rm->rdma.op_active)
-               total_payload_len += rm->rdma.op_bytes;
+               total_payload_len += rds_rdma_bytes(msg);
 
        if (rds_check_qos_threshold(rs->rs_tos, total_payload_len)) {
                ret = -EINVAL;
@@ -1289,6 +1308,15 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
                }
        }
 
+       /* Parse any control messages the user may have included. */
+       ret = rds_cmsg_send(rs, rm, msg, &allocated_mr);
+       if (ret) {
+               /* Trigger connection so that its ready for the next retry */
+               if ( ret ==  -EAGAIN)
+                       rds_conn_connect_if_down(conn);
+               goto out;
+       }
+
        /* Not accepting new sends until all the failed ops have been reaped */
        if (rds_async_send_enabled && conn->c_pending_flush) {
                ret = -EAGAIN;