#define RDS_GET_MR_FOR_DEST 7
#define RDS_CONN_RESET 8
#define SO_RDS_TRANSPORT 9
+/* Socket option to tap receive path latency
+ * SO_RDS: SO_RDS_MSG_RXPATH_LATENCY
+ * Format used struct rds_rx_trace_so
+ */
+#define SO_RDS_MSG_RXPATH_LATENCY 10
+#define RDS6_CONN_RESET 11
/* supported values for SO_RDS_TRANSPORT */
#define RDS_TRANS_IB 0
#define RDS_TRANS_COUNT 3
#define RDS_TRANS_NONE (~0)
-/* Socket option to tap receive path latency
- * SO_RDS: SO_RDS_MSG_RXPATH_LATENCY
- * Format used struct rds_rx_trace_so
- */
-#define SO_RDS_MSG_RXPATH_LATENCY 10
-
/*
* ioctl commands for SOL_RDS
*/
#define SIOCRDSSETTOS (SIOCPROTOPRIVATE)
-#define SIOCRDSGETTOS (SIOCPROTOPRIVATE + 1)
+#define SIOCRDSGETTOS (SIOCPROTOPRIVATE + 1)
#define SIOCRDSENABLENETFILTER (SIOCPROTOPRIVATE + 2)
#define IPPROTO_OKA (142)
#define RDS_CMSG_CONG_UPDATE 5
#define RDS_CMSG_ATOMIC_FADD 6
#define RDS_CMSG_ATOMIC_CSWP 7
-#define RDS_CMSG_MASKED_ATOMIC_FADD 8
-#define RDS_CMSG_MASKED_ATOMIC_CSWP 9
-#define RDS_CMSG_ASYNC_SEND 10
+#define RDS_CMSG_MASKED_ATOMIC_FADD 8
+#define RDS_CMSG_MASKED_ATOMIC_CSWP 9
+#define RDS_CMSG_ASYNC_SEND 10
#define RDS_CMSG_RXPATH_LATENCY 11
#define RDS_INFO_FIRST 10000
#define RDS_INFO_IB_CONNECTIONS 10008
#define RDS_INFO_CONNECTION_STATS 10009
#define RDS_INFO_IWARP_CONNECTIONS 10010
-#define RDS_INFO_LAST 10010
+
+/* PF_RDS6 options */
+#define RDS6_INFO_CONNECTIONS 10011
+#define RDS6_INFO_SEND_MESSAGES 10012
+#define RDS6_INFO_RETRANS_MESSAGES 10013
+#define RDS6_INFO_RECV_MESSAGES 10014
+#define RDS6_INFO_SOCKETS 10015
+#define RDS6_INFO_TCP_SOCKETS 10016
+#define RDS6_INFO_IB_CONNECTIONS 10017
+
+#define RDS_INFO_LAST 10017
struct rds_info_counter {
u_int8_t name[32];
#define RDS_INFO_CONNECTION_FLAG_SENDING 0x01
#define RDS_INFO_CONNECTION_FLAG_CONNECTING 0x02
#define RDS_INFO_CONNECTION_FLAG_CONNECTED 0x04
-#define RDS_INFO_CONNECTION_FLAG_ERROR 0x08
+#define RDS_INFO_CONNECTION_FLAG_ERROR 0x08
#define TRANSNAMSIZ 16
u_int8_t tos;
} __attribute__((packed));
-struct rds_info_flow {
- __be32 laddr;
- __be32 faddr;
- u_int32_t bytes;
- __be16 lport;
- __be16 fport;
+struct rds6_info_connection {
+ uint64_t next_tx_seq;
+ uint64_t next_rx_seq;
+ struct in6_addr laddr;
+ struct in6_addr faddr;
+ uint8_t transport[TRANSNAMSIZ]; /* null term ascii */
+ uint8_t flags;
+ uint8_t tos;
} __attribute__((packed));
#define RDS_INFO_MESSAGE_FLAG_ACK 0x01
u_int8_t tos;
} __attribute__((packed));
+struct rds6_info_message {
+ uint64_t seq;
+ uint32_t len;
+ struct in6_addr laddr;
+ struct in6_addr faddr;
+ __be16 lport;
+ __be16 fport;
+ uint8_t flags;
+ uint8_t tos;
+} __attribute__((packed));
+
struct rds_info_socket {
u_int32_t sndbuf;
__be32 bound_addr;
u_int64_t inum;
} __attribute__((packed));
+struct rds6_info_socket {
+ uint32_t sndbuf;
+ struct in6_addr bound_addr;
+ struct in6_addr connected_addr;
+ __be16 bound_port;
+ __be16 connected_port;
+ uint32_t rcvbuf;
+ uint64_t inum;
+} __attribute__((packed));
+
struct rds_info_tcp_socket {
__be32 local_addr;
__be16 local_port;
u_int32_t last_seen_una;
} __attribute__((packed));
+struct rds6_info_tcp_socket {
+ struct in6_addr local_addr;
+ __be16 local_port;
+ struct in6_addr peer_addr;
+ __be16 peer_port;
+ uint64_t hdr_rem;
+ uint64_t data_rem;
+ uint32_t last_sent_nxt;
+ uint32_t last_expected_una;
+ uint32_t last_seen_una;
+} __attribute__((packed));
+
#define RDS_IB_GID_LEN 16
struct rds_info_rdma_connection {
__be32 src_addr;
};
+struct rds6_info_rdma_connection {
+ struct in6_addr src_addr;
+ struct in6_addr dst_addr;
+ uint8_t src_gid[RDS_IB_GID_LEN];
+ uint8_t dst_gid[RDS_IB_GID_LEN];
+
+ uint32_t max_send_wr;
+ uint32_t max_recv_wr;
+ uint32_t max_send_sge;
+ uint32_t rdma_mr_max;
+ uint32_t rdma_mr_size;
+ uint8_t tos;
+ uint8_t sl;
+ uint32_t cache_allocs;
+ uint32_t frag;
+ uint16_t flow_ctl_post_credit;
+ uint16_t flow_ctl_send_credit;
+ uint32_t qp_num;
+ uint32_t w_alloc_ctr;
+ uint32_t w_free_ctr;
+};
+
/*
* Congestion monitoring.
* Congestion control in RDS happens at the host connection
struct in_addr dst;
};
+struct rds6_reset {
+ uint8_t tos;
+ struct in6_addr src;
+ struct in6_addr dst;
+};
+
struct rds_asend_args {
u_int64_t user_token;
u_int64_t flags;
int32_t status;
};
-#define RDS_RDMA_SEND_SUCCESS 0
-#define RDS_RDMA_REMOTE_ERROR 1
-#define RDS_RDMA_SEND_CANCELED 2
-#define RDS_RDMA_SEND_DROPPED 3
+#define RDS_RDMA_SEND_SUCCESS 0
+#define RDS_RDMA_REMOTE_ERROR 1
+#define RDS_RDMA_SEND_CANCELED 2
+#define RDS_RDMA_SEND_DROPPED 3
#define RDS_RDMA_SEND_OTHER_ERROR 4
/*
#include <net/sock.h>
#include "rds.h"
-#include "tcp.h"
+
/* UNUSED for backwards compat only */
static unsigned int rds_ib_retry_count = 0xdead;
module_param(rds_ib_retry_count, int, 0444);
return 0;
}
+static int rds6_user_reset(struct rds_sock *rs, char __user *optval, int optlen)
+{
+ struct rds6_reset reset;
+ struct rds_connection *conn;
+ LIST_HEAD(s_addr_conns);
+
+ if (optlen != sizeof(struct rds6_reset))
+ return -EINVAL;
+
+ if (copy_from_user(&reset, (struct rds6_reset __user *)optval,
+ sizeof(struct rds6_reset)))
+ return -EFAULT;
+
+ /* Reset all conns associated with source addr */
+ if (ipv6_addr_any(&reset.dst)) {
+ pr_info("RDS: Reset ALL conns for Source %pI6c\n",
+ &reset.src);
+
+ rds_conn_laddr_list(sock_net(rds_rs_to_sk(rs)),
+ &reset.src, &s_addr_conns);
+ if (list_empty(&s_addr_conns))
+ goto done;
+
+ list_for_each_entry(conn, &s_addr_conns, c_laddr_node)
+ if (conn)
+ rds_user_conn_paths_drop(conn, 1);
+ goto done;
+ }
+
+ conn = rds_conn_find(sock_net(rds_rs_to_sk(rs)),
+ &reset.src, &reset.dst, rs->rs_transport,
+ reset.tos, rs->rs_bound_scope_id);
+
+ if (conn) {
+ bool is_tcp = conn->c_trans->t_type == RDS_TRANS_TCP;
+
+ printk(KERN_NOTICE "Resetting RDS/%s connection <%pI6c,%pI6c,%d>\n",
+ is_tcp ? "tcp" : "IB",
+ &reset.src, &reset.dst, conn->c_tos);
+ rds_user_conn_paths_drop(conn, DR_USER_RESET);
+ }
+done:
+ return 0;
+}
+
static int rds_set_transport(struct rds_sock *rs, char __user *optval,
int optlen)
{
}
ret = rds_user_reset(rs, optval, optlen);
break;
+ case RDS6_CONN_RESET:
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) {
+ ret = -EACCES;
+ break;
+ }
+ ret = rds6_user_reset(rs, optval, optlen);
+ break;
case SO_RDS_TRANSPORT:
lock_sock(sock->sk);
ret = rds_set_transport(rs, optval, optlen);
{
struct sock *sk = sock->sk;
struct sockaddr_in *sin;
+ struct sockaddr_in6 *sin6;
struct rds_sock *rs = rds_sk_to_rs(sk);
+ int addr_type;
int ret = 0;
lock_sock(sk);
break;
case sizeof(struct sockaddr_in6):
- ret = -EPROTONOSUPPORT;
+ sin6 = (struct sockaddr_in6 *)uaddr;
+ if (sin6->sin6_family != AF_INET6) {
+ ret = -EAFNOSUPPORT;
+ break;
+ }
+ addr_type = ipv6_addr_type(&sin6->sin6_addr);
+ if (!(addr_type & IPV6_ADDR_UNICAST)) {
+ ret = -EPROTOTYPE;
+ break;
+ }
+ if (addr_type & IPV6_ADDR_LINKLOCAL &&
+ sin6->sin6_scope_id == 0) {
+ ret = -EINVAL;
+ break;
+ }
+ rs->rs_conn_addr = sin6->sin6_addr;
+ rs->rs_conn_port = sin6->sin6_port;
break;
default:
lens->each = sizeof(struct rds_info_message);
}
+static void rds6_sock_inc_info(struct socket *sock, unsigned int len,
+ struct rds_info_iterator *iter,
+ struct rds_info_lengths *lens)
+{
+ struct rds_sock *rs;
+ struct rds_incoming *inc;
+ unsigned int total = 0;
+
+ len /= sizeof(struct rds6_info_message);
+
+ spin_lock_bh(&rds_sock_lock);
+
+ list_for_each_entry(rs, &rds_sock_list, rs_item) {
+ read_lock(&rs->rs_recv_lock);
+
+ /* XXX too lazy to maintain counts.. */
+ list_for_each_entry(inc, &rs->rs_recv_queue, i_item) {
+ total++;
+ if (total <= len)
+ rds6_inc_info_copy(inc, iter, &inc->i_saddr,
+ &rs->rs_bound_addr, 1);
+ }
+
+ read_unlock(&rs->rs_recv_lock);
+ }
+
+ spin_unlock_bh(&rds_sock_lock);
+
+ lens->nr = total;
+ lens->each = sizeof(struct rds6_info_message);
+}
+
static void rds_sock_info(struct socket *sock, unsigned int len,
struct rds_info_iterator *iter,
struct rds_info_lengths *lens)
spin_unlock_bh(&rds_sock_lock);
}
+static void rds6_sock_info(struct socket *sock, unsigned int len,
+ struct rds_info_iterator *iter,
+ struct rds_info_lengths *lens)
+{
+ struct rds6_info_socket sinfo6;
+ struct rds_sock *rs;
+
+ len /= sizeof(struct rds6_info_socket);
+
+ spin_lock_bh(&rds_sock_lock);
+
+ if (len < rds_sock_count)
+ goto out;
+
+ list_for_each_entry(rs, &rds_sock_list, rs_item) {
+ sinfo6.sndbuf = rds_sk_sndbuf(rs);
+ sinfo6.rcvbuf = rds_sk_rcvbuf(rs);
+ sinfo6.bound_addr = rs->rs_bound_addr;
+ sinfo6.connected_addr = rs->rs_conn_addr;
+ sinfo6.bound_port = rs->rs_bound_port;
+ sinfo6.connected_port = rs->rs_conn_port;
+ sinfo6.inum = sock_i_ino(rds_rs_to_sk(rs));
+
+ rds_info_copy(iter, &sinfo6, sizeof(sinfo6));
+ }
+
+out:
+ lens->nr = rds_sock_count;
+ lens->each = sizeof(struct rds6_info_socket);
+
+ spin_unlock_bh(&rds_sock_lock);
+}
+
static unsigned long parse_ul(char *ptr, unsigned long max)
{
unsigned long val;
rds_page_exit();
rds_info_deregister_func(RDS_INFO_SOCKETS, rds_sock_info);
rds_info_deregister_func(RDS_INFO_RECV_MESSAGES, rds_sock_inc_info);
+ rds_info_deregister_func(RDS6_INFO_SOCKETS, rds6_sock_info);
+ rds_info_deregister_func(RDS6_INFO_RECV_MESSAGES, rds6_sock_inc_info);
}
module_exit(rds_exit);
rds_info_register_func(RDS_INFO_SOCKETS, rds_sock_info);
rds_info_register_func(RDS_INFO_RECV_MESSAGES, rds_sock_inc_info);
+ rds_info_register_func(RDS6_INFO_SOCKETS, rds6_sock_info);
+ rds_info_register_func(RDS6_INFO_RECV_MESSAGES, rds6_sock_inc_info);
rds_qos_threshold_init();
struct in6_addr v6addr, *binding_addr;
struct rds_transport *trans;
__u32 scope_id = 0;
+ int addr_type;
int ret = 0;
__be16 port;
+ /* We allow an RDS socket to be bound to either IPv4 or IPv6
+ * address.
+ */
if (addr_len == sizeof(struct sockaddr_in)) {
struct sockaddr_in *sin = (struct sockaddr_in *)uaddr;
binding_addr = &v6addr;
port = sin->sin_port;
} else if (addr_len == sizeof(struct sockaddr_in6)) {
- return -EPROTONOSUPPORT;
+ struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)uaddr;
+
+ addr_type = ipv6_addr_type(&sin6->sin6_addr);
+ if (sin6->sin6_family != AF_INET6 ||
+ !(addr_type & IPV6_ADDR_UNICAST)) {
+ return -EINVAL;
+ }
+ /* The scope ID must be specified for link local address. */
+ if (addr_type & IPV6_ADDR_LINKLOCAL) {
+ if (sin6->sin6_scope_id == 0)
+ return -EINVAL;
+ scope_id = sin6->sin6_scope_id;
+ }
+ binding_addr = &sin6->sin6_addr;
+ port = sin6->sin6_port;
} else {
return -EINVAL;
}
#include "rds.h"
#include "loop.h"
-#include "tcp.h"
#define RDS_CONNECTION_HASH_BITS 12
#define RDS_CONNECTION_HASH_ENTRIES (1 << RDS_CONNECTION_HASH_BITS)
}
EXPORT_SYMBOL_GPL(rds_conn_destroy);
-static void rds_conn_message_info(struct socket *sock, unsigned int len,
- struct rds_info_iterator *iter,
- struct rds_info_lengths *lens,
- int want_send)
+static void __rds_inc_msg_cp(struct rds_incoming *inc,
+ struct rds_info_iterator *iter,
+ void *saddr, void *daddr, int flip, bool isv6)
+{
+ if (isv6)
+ rds6_inc_info_copy(inc, iter, saddr, daddr, flip);
+ else
+ rds_inc_info_copy(inc, iter, *(__be32 *)saddr,
+ *(__be32 *)daddr, flip);
+}
+
+static void rds_conn_message_info_cmn(struct socket *sock, unsigned int len,
+ struct rds_info_iterator *iter,
+ struct rds_info_lengths *lens,
+ int want_send, bool isv6)
{
struct hlist_head *head;
struct list_head *list;
size_t i;
int j;
- len /= sizeof(struct rds_info_message);
+ if (isv6)
+ len /= sizeof(struct rds6_info_message);
+ else
+ len /= sizeof(struct rds_info_message);
rcu_read_lock();
/* XXX too lazy to maintain counts.. */
list_for_each_entry(rm, list, m_conn_item) {
- __be32 laddr;
- __be32 faddr;
-
total++;
- laddr = conn->c_laddr.s6_addr32[3];
- faddr = conn->c_faddr.s6_addr32[3];
if (total <= len)
- rds_inc_info_copy(&rm->m_inc,
- iter,
- laddr,
- faddr,
- 0);
+ __rds_inc_msg_cp(&rm->m_inc,
+ iter,
+ &conn->c_laddr,
+ &conn->c_faddr,
+ 0, isv6);
}
cp->cp_rdsinfo_pending = 0;
rcu_read_unlock();
lens->nr = total;
- lens->each = sizeof(struct rds_info_message);
+ if (isv6)
+ lens->each = sizeof(struct rds6_info_message);
+ else
+ lens->each = sizeof(struct rds_info_message);
+}
+
+static void rds_conn_message_info(struct socket *sock, unsigned int len,
+ struct rds_info_iterator *iter,
+ struct rds_info_lengths *lens,
+ int want_send)
+{
+ rds_conn_message_info_cmn(sock, len, iter, lens, want_send, false);
+}
+
+static void rds6_conn_message_info(struct socket *sock, unsigned int len,
+ struct rds_info_iterator *iter,
+ struct rds_info_lengths *lens,
+ int want_send)
+{
+ rds_conn_message_info_cmn(sock, len, iter, lens, want_send, true);
}
static void rds_conn_message_info_send(struct socket *sock, unsigned int len,
rds_conn_message_info(sock, len, iter, lens, 1);
}
+static void rds6_conn_message_info_send(struct socket *sock, unsigned int len,
+ struct rds_info_iterator *iter,
+ struct rds_info_lengths *lens)
+{
+ rds6_conn_message_info(sock, len, iter, lens, 1);
+}
+
static void rds_conn_message_info_retrans(struct socket *sock,
unsigned int len,
struct rds_info_iterator *iter,
rds_conn_message_info(sock, len, iter, lens, 0);
}
+static void rds6_conn_message_info_retrans(struct socket *sock,
+ unsigned int len,
+ struct rds_info_iterator *iter,
+ struct rds_info_lengths *lens)
+{
+ rds6_conn_message_info(sock, len, iter, lens, 0);
+}
+
void rds_for_each_conn_info(struct socket *sock, unsigned int len,
struct rds_info_iterator *iter,
struct rds_info_lengths *lens,
struct rds_info_connection *cinfo = buffer;
struct rds_connection *conn = cp->cp_conn;
+ if (conn->c_isv6)
+ return 0;
+
cinfo->next_tx_seq = cp->cp_next_tx_seq;
cinfo->next_rx_seq = cp->cp_next_rx_seq;
cinfo->laddr = conn->c_laddr.s6_addr32[3];
return 1;
}
+static int rds6_conn_info_visitor(struct rds_conn_path *cp, void *buffer)
+{
+ struct rds6_info_connection *cinfo6 = buffer;
+ struct rds_connection *conn = cp->cp_conn;
+
+ cinfo6->next_tx_seq = cp->cp_next_tx_seq;
+ cinfo6->next_rx_seq = cp->cp_next_rx_seq;
+ cinfo6->laddr = conn->c_laddr;
+ cinfo6->faddr = conn->c_faddr;
+ cinfo6->tos = conn->c_tos;
+ strncpy(cinfo6->transport, conn->c_trans->t_name,
+ sizeof(cinfo6->transport));
+ cinfo6->flags = 0;
+
+ rds_conn_info_set(cinfo6->flags, test_bit(RDS_IN_XMIT, &cp->cp_flags),
+ SENDING);
+ /* XXX Future: return the state rather than these funky bits */
+ rds_conn_info_set(cinfo6->flags,
+ atomic_read(&cp->cp_state) == RDS_CONN_CONNECTING,
+ CONNECTING);
+ rds_conn_info_set(cinfo6->flags,
+ atomic_read(&cp->cp_state) == RDS_CONN_UP,
+ CONNECTED);
+ rds_conn_info_set(cinfo6->flags, cp->cp_pending_flush,
+ ERROR);
+ /* Just return 1 as there is no error case. This is a helper function
+ * for rds_walk_conn_path_info() and it wants a return value.
+ */
+ return 1;
+}
+
static void rds_conn_info(struct socket *sock, unsigned int len,
struct rds_info_iterator *iter,
struct rds_info_lengths *lens)
sizeof(struct rds_info_connection));
}
+static void rds6_conn_info(struct socket *sock, unsigned int len,
+ struct rds_info_iterator *iter,
+ struct rds_info_lengths *lens)
+{
+ rds_walk_conn_path_info(sock, len, iter, lens,
+ rds6_conn_info_visitor,
+ sizeof(struct rds6_info_connection));
+}
+
int rds_conn_init(void)
{
rds_conn_slab = kmem_cache_create("rds_connection",
rds_conn_message_info_send);
rds_info_register_func(RDS_INFO_RETRANS_MESSAGES,
rds_conn_message_info_retrans);
+ rds_info_register_func(RDS6_INFO_CONNECTIONS, rds6_conn_info);
+ rds_info_register_func(RDS6_INFO_SEND_MESSAGES,
+ rds6_conn_message_info_send);
+ rds_info_register_func(RDS6_INFO_RETRANS_MESSAGES,
+ rds6_conn_message_info_retrans);
return 0;
}
rds_conn_message_info_send);
rds_info_deregister_func(RDS_INFO_RETRANS_MESSAGES,
rds_conn_message_info_retrans);
-
+ rds_info_deregister_func(RDS6_INFO_CONNECTIONS, rds6_conn_info);
+ rds_info_deregister_func(RDS6_INFO_SEND_MESSAGES,
+ rds6_conn_message_info_send);
+ rds_info_deregister_func(RDS6_INFO_RETRANS_MESSAGES,
+ rds6_conn_message_info_retrans);
}
static char *conn_drop_reasons[] = {
#include <net/inet_common.h>
#include <net/ipoib/if_ipoib.h>
#include <linux/rtnetlink.h>
+#include <net/addrconf.h>
#include "rds.h"
#include "ib.h"
-#include "tcp.h"
#include "rds_single_path.h"
#include <linux/time.h>
struct workqueue_struct *rds_aux_wq;
struct socket *rds_ib_inet_socket;
+struct socket *rds_ib_inet6_socket;
static struct rds_ib_port *ip_config;
static u8 ip_port_cnt = 0;
.remove = rds_ib_remove_one
};
+/* Remove IB connection information. This function only reports IPv4
+ * connections for backward compatibility.
+ */
static int rds_ib_conn_info_visitor(struct rds_connection *conn,
void *buffer)
{
iinfo->max_send_sge = rds_ibdev->max_sge;
iinfo->qp_num = ic->i_cm_id->qp->qp_num;
iinfo->w_alloc_ctr = ic->i_recv_ring.w_alloc_ctr;
- iinfo->w_free_ctr = (u32) atomic_read(&ic->i_recv_ring.w_free_ctr);
+ iinfo->w_free_ctr =
+ (u32)atomic_read(&ic->i_recv_ring.w_free_ctr);
iinfo->flow_ctl_post_credit =
IB_GET_POST_CREDITS(atomic_read(&ic->i_credits));
iinfo->flow_ctl_send_credit =
return 1;
}
+/* IPv6 version of rds_ib_conn_info_visitor(). */
+static int rds6_ib_conn_info_visitor(struct rds_connection *conn,
+ void *buffer)
+{
+ struct rds6_info_rdma_connection *iinfo6 = buffer;
+ struct rds_ib_connection *ic = conn->c_transport_data;
+
+ /* We will only ever look at IB transports */
+ if (conn->c_trans != &rds_ib_transport)
+ return 0;
+
+ iinfo6->src_addr = conn->c_laddr;
+ iinfo6->dst_addr = conn->c_faddr;
+
+ memset(&iinfo6->src_gid, 0, sizeof(iinfo6->src_gid));
+ memset(&iinfo6->dst_gid, 0, sizeof(iinfo6->dst_gid));
+
+ if (ic) {
+ iinfo6->tos = conn->c_tos;
+ iinfo6->sl = ic->i_sl;
+ iinfo6->frag = ic->i_frag_sz;
+ }
+
+ if (rds_conn_state(conn) == RDS_CONN_UP) {
+ struct rds_ib_device *rds_ibdev;
+ struct rdma_dev_addr *dev_addr;
+
+ ic = conn->c_transport_data;
+ dev_addr = &ic->i_cm_id->route.addr.dev_addr;
+ rdma_addr_get_sgid(dev_addr,
+ (union ib_gid *)&iinfo6->src_gid);
+ rdma_addr_get_dgid(dev_addr,
+ (union ib_gid *)&iinfo6->dst_gid);
+
+ rds_ibdev = ic->rds_ibdev;
+ iinfo6->max_send_wr = ic->i_send_ring.w_nr;
+ iinfo6->max_recv_wr = ic->i_recv_ring.w_nr;
+ iinfo6->max_send_sge = rds_ibdev->max_sge;
+ iinfo6->qp_num = ic->i_cm_id->qp->qp_num;
+ iinfo6->w_alloc_ctr = ic->i_recv_ring.w_alloc_ctr;
+ iinfo6->w_free_ctr =
+ (u32)atomic_read(&ic->i_recv_ring.w_free_ctr);
+ iinfo6->flow_ctl_post_credit =
+ IB_GET_POST_CREDITS(atomic_read(&ic->i_credits));
+ iinfo6->flow_ctl_send_credit =
+ IB_GET_SEND_CREDITS(atomic_read(&ic->i_credits));
+ rds6_ib_get_mr_info(rds_ibdev, iinfo6);
+ iinfo6->cache_allocs = atomic_read(&ic->i_cache_allocs);
+ }
+ return 1;
+}
+
static void rds_ib_ic_info(struct socket *sock, unsigned int len,
struct rds_info_iterator *iter,
struct rds_info_lengths *lens)
sizeof(struct rds_info_rdma_connection));
}
+/* IPv6 version of rds_ib_ic_info(). */
+static void rds6_ib_ic_info(struct socket *sock, unsigned int len,
+ struct rds_info_iterator *iter,
+ struct rds_info_lengths *lens)
+{
+ rds_for_each_conn_info(sock, len, iter, lens,
+ rds6_ib_conn_info_visitor,
+ sizeof(struct rds6_info_rdma_connection));
+}
/*
* Early RDS/IB was built to only bind to an address if there is an IPoIB
{
int ret;
struct rdma_cm_id *cm_id;
+ struct sockaddr_in6 sin6;
struct sockaddr_in sin;
+ struct sockaddr *sa;
+ bool isv4;
+ isv4 = ipv6_addr_v4mapped(addr);
/* Link-local addresses don't play well with IB */
- if (ipv4_is_linklocal_169(addr->s6_addr32[3])) {
+ if (isv4 && ipv4_is_linklocal_169(addr->s6_addr32[3])) {
pr_info_once("\n");
pr_info_once("****************************************************\n");
pr_info_once("** WARNING WARNING WARNING WARNING WARNING **\n");
if (IS_ERR(cm_id))
return -EADDRNOTAVAIL;
- memset(&sin, 0, sizeof(sin));
- sin.sin_family = AF_INET;
- sin.sin_addr.s_addr = addr->s6_addr32[3];
+ if (isv4) {
+ memset(&sin, 0, sizeof(sin));
+ sin.sin_family = AF_INET;
+ sin.sin_addr.s_addr = addr->s6_addr32[3];
+ sa = (struct sockaddr *)&sin;
+ } else {
+ memset(&sin6, 0, sizeof(sin6));
+ sin6.sin6_family = AF_INET6;
+ sin6.sin6_addr = *addr;
+ sin6.sin6_scope_id = scope_id;
+ sa = (struct sockaddr *)&sin6;
+
+ /* XXX Do a special IPv6 link local address check here. The
+ * reason is that rdma_bind_addr() always succeeds with IPv6
+ * link local address regardless if it is configured or not in
+ * a system.
+ */
+ if (ipv6_addr_type(addr) & IPV6_ADDR_LINKLOCAL) {
+ struct net_device *dev;
+
+ if (scope_id == 0)
+ return -EADDRNOTAVAIL;
+
+ /* Use init_net for now as RDS is not network
+ * name space aware.
+ */
+ dev = dev_get_by_index(&init_net, scope_id);
+ if (!dev)
+ return -EADDRNOTAVAIL;
+ if (!ipv6_chk_addr(&init_net, addr, dev, 1)) {
+ dev_put(dev);
+ return -EADDRNOTAVAIL;
+ }
+ dev_put(dev);
+ }
+ }
/* rdma_bind_addr will only succeed for IB & iWARP devices */
- ret = rdma_bind_addr(cm_id, (struct sockaddr *)&sin);
+ ret = rdma_bind_addr(cm_id, sa);
/* due to this, we will claim to support iWARP devices unless we
check node_type. */
if (ret || !cm_id->device || cm_id->device->node_type != RDMA_NODE_IB_CA)
ret = -EADDRNOTAVAIL;
- rdsdebug("addr %pI6c ret %d node type %d\n",
- addr, ret,
+ rdsdebug("addr %pI6c%%%u ret %d node type %d\n",
+ addr, scope_id, ret,
cm_id->device ? cm_id->device->node_type : -1);
rdma_destroy_id(cm_id);
printk(KERN_ERR "RDS/IB: can't create TCP transport socket (%d).\n", -ret);
goto out;
}
+ ret = sock_create_kern(&init_net, PF_INET6, SOCK_DGRAM, 0,
+ &rds_ib_inet6_socket);
+ if (ret < 0) {
+ printk(KERN_ERR "RDS/IB: can't create IPv6 configuration socket (%d).\n",
+ -ret);
+ goto out;
+ }
sock_net_set(rds_ib_inet_socket->sk, &init_net);
+ sock_net_set(rds_ib_inet6_socket->sk, &init_net);
/* Initialise the RDS IB fragment size */
rds_ib_init_frag(RDS_PROTOCOL_VERSION);
goto out_aux_wq;
rds_info_register_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info);
+ rds_info_register_func(RDS6_INFO_IB_CONNECTIONS, rds6_ib_ic_info);
ret = rds_ip_threads_init();
if (ret) {
{
unregister_netdevice_notifier(&rds_ib_nb);
rds_info_deregister_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info);
+ rds_info_deregister_func(RDS6_INFO_IB_CONNECTIONS, rds6_ib_ic_info);
rds_ib_unregister_client();
rds_ib_destroy_nodev_conns();
rds_ib_sysctl_exit();
struct rds_ib_alias {
char if_name[IFNAMSIZ];
- __be32 ip_addr;
+ __be32 ip_addr;
__be32 ip_bcast;
__be32 ip_mask;
};
union ib_gid gid;
char port_label[4];
char if_name[IFNAMSIZ];
- __be32 ip_addr;
+ __be32 ip_addr;
__be32 ip_bcast;
__be32 ip_mask;
unsigned int ip_active_port;
void rds_ib_destroy_nodev_conns(void);
struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_dev, int npages);
void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_connection *iinfo);
+void rds6_ib_get_mr_info(struct rds_ib_device *rds_ibdev,
+ struct rds6_info_rdma_connection *iinfo6);
void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *);
void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
struct rds_sock *rs, u32 *key_ret,
#include "rds.h"
#include "ib.h"
-#include "tcp.h"
#include "rds_single_path.h"
static unsigned int rds_ib_max_frag = RDS_MAX_FRAG_SIZE;
(unsigned long long)be64_to_cpu(fguid),
dp_cmn->ricpc_tos);
- /* XXX IPoIB ACL Only support IPv4 */
- acl_ret = rds_ib_match_acl(cm_id, saddr6->s6_addr32[3]);
- if (acl_ret < 0) {
- err = RDS_ACL_FAILURE;
- rdsdebug("RDS: IB: passive: rds_ib_match_acl failed\n");
- goto out;
+ /* IPoIB ACL only supports IPv4. Let all IPv6 traffic pass. */
+ if (ipv6_addr_v4mapped(saddr6)) {
+ acl_ret = rds_ib_match_acl(cm_id, saddr6->s6_addr32[3]);
+ if (acl_ret < 0) {
+ err = RDS_ACL_FAILURE;
+ rdsdebug("RDS: IB: passive: rds_ib_match_acl failed\n");
+ goto out;
+ }
+ } else {
+ acl_ret = 0;
}
/* RDS/IB is not currently netns aware, thus init_net */
u16 frag;
int ret;
- ret = rds_ib_match_acl(ic->i_cm_id, conn->c_faddr.s6_addr32[3]);
+ /* IPoIB ACL only supports IPv4. Let all IPv6 traffic pass. */
+ if (ipv6_addr_v4mapped(&conn->c_faddr))
+ ret = rds_ib_match_acl(ic->i_cm_id, conn->c_faddr.s6_addr32[3]);
+ else
+ ret = 0;
if (ret < 0) {
pr_err("RDS: IB: active conn=%p, <%pI6c,%pI6c,%d> destroyed due ACL violation\n",
conn, &conn->c_laddr, &conn->c_faddr,
/* XXX I wonder what affect the port space has */
/* delegate cm event handler to rdma_transport */
- handler = rds_rdma_cm_event_handler;
+ if (conn->c_isv6)
+ handler = rds6_rdma_cm_event_handler;
+ else
+ handler = rds_rdma_cm_event_handler;
ic->i_cm_id = rdma_create_id(handler, conn, RDMA_PS_TCP, IB_QPT_RC);
if (IS_ERR(ic->i_cm_id)) {
sin6 = (struct sockaddr_in6 *)&dest;
sin6->sin6_family = AF_INET6;
sin6->sin6_addr = conn->c_faddr;
- sin6->sin6_port = (__force u16)htons(RDS_TCP_PORT);
+ sin6->sin6_port = (__force u16)htons(RDS_CM_PORT);
sin6->sin6_scope_id = conn->c_dev_if;
}
iinfo->rdma_mr_size = pool_1m->fmr_attr.max_pages;
}
+void rds6_ib_get_mr_info(struct rds_ib_device *rds_ibdev,
+ struct rds6_info_rdma_connection *iinfo6)
+{
+ struct rds_ib_mr_pool *pool_1m = rds_ibdev->mr_1m_pool;
+
+ iinfo6->rdma_mr_max = pool_1m->max_items;
+ iinfo6->rdma_mr_size = pool_1m->fmr_attr.max_pages;
+}
+
void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *pool)
{
struct rds_ib_mr *ibmr;
#include "rds.h"
#include "ib.h"
-#include "tcp.h"
#include "rds_single_path.h"
static char *rds_ib_wc_status_strings[] = {
#include "rdma_transport.h"
#include "ib.h"
#include "net/arp.h"
-#include "tcp.h"
#include "rds_single_path.h"
#include <net/sock.h>
#define RDS_REJ_CONSUMER_DEFINED 28
+/* Global IPv4 and IPv6 RDS RDMA listener cm_id */
static struct rdma_cm_id *rds_rdma_listen_id;
+static struct rdma_cm_id *rds6_rdma_listen_id;
int unload_allowed __initdata;
return rds_rdma_cm_event_handler_cmn(cm_id, event, false);
}
+int rds6_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
+ struct rdma_cm_event *event)
+{
+ return rds_rdma_cm_event_handler_cmn(cm_id, event, true);
+}
+
static int rds_rdma_listen_init_common(rdma_cm_event_handler handler,
struct sockaddr *sa,
struct rdma_cm_id **ret_cm_id)
}
rdsdebug("cm %p listening on port %u\n", cm_id,
- ntohs(((struct sockaddr_in *)sa)->sin_port));
+ sa->sa_family == PF_INET ?
+ ntohs(((struct sockaddr_in *)sa)->sin_port) :
+ ntohs(((struct sockaddr_in6 *)sa)->sin6_port));
*ret_cm_id = cm_id;
cm_id = NULL;
/* Initialize the RDS RDMA listeners. We create two listeners for
* compatibility reason. The one on RDS_PORT is used for IPv4
- * requests only. The one on RDS_TCP_PORT is used for IPv6 requests
+ * requests only. The one on RDS_CM_PORT is used for IPv6 requests
* only. So only IPv6 enabled RDS module will communicate using this
* port.
*/
static int rds_rdma_listen_init(void)
{
int ret;
+ struct sockaddr_in6 sin6;
struct sockaddr_in sin;
sin.sin_family = PF_INET;
ret = rds_rdma_listen_init_common(rds_rdma_cm_event_handler,
(struct sockaddr *)&sin,
&rds_rdma_listen_id);
- return ret;
+ if (ret)
+ return ret;
+
+ sin6.sin6_family = PF_INET6;
+ sin6.sin6_addr = in6addr_any;
+ sin6.sin6_port = htons(RDS_CM_PORT);
+ sin6.sin6_scope_id = 0;
+ sin6.sin6_flowinfo = 0;
+ ret = rds_rdma_listen_init_common(rds6_rdma_cm_event_handler,
+ (struct sockaddr *)&sin6,
+ &rds6_rdma_listen_id);
+ /* Keep going even when IPv6 is not enabled in the system. */
+ if (ret)
+ rdsdebug("Cannot set up IPv6 RDMA listener\n");
+ return 0;
}
static void rds_rdma_listen_stop(void)
rdma_destroy_id(rds_rdma_listen_id);
rds_rdma_listen_id = NULL;
}
+ if (rds6_rdma_listen_id) {
+ rdsdebug("cm %p\n", rds6_rdma_listen_id);
+ rdma_destroy_id(rds6_rdma_listen_id);
+ rds6_rdma_listen_id = NULL;
+ }
}
#define MODULE_NAME "rds_rdma"
int rds_rdma_conn_connect(struct rds_connection *conn);
int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
struct rdma_cm_event *event);
+int rds6_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
+ struct rdma_cm_event *event);
/* from rdma_transport.c */
int rds_rdma_init(void);
*/
#define RDS_ACL_FAILURE 0x04010020
-/*
- * XXX randomly chosen, but at least seems to be unused:
- * # 18464-18768 Unassigned
- * We should do better. We want a reserved port to discourage unpriv'ed
- * userspace from listening.
+/* The following ports, 16385, 18634, 18635, are registered with IANA as
+ * the ports to be used for RDS over TCP and UDP. 18634 is the historical
+ * value used for the RDMA_CM listener port. RDS/TCP uses port 16385. After
+ * IPv6 work, RDMA_CM also uses 16385 as the listener port. 18634 is kept
+ * to ensure compatibility with older RDS modules.
*/
#define RDS_PORT 18634
+#define RDS_CM_PORT 16385
+#define RDS_TCP_PORT RDS_CM_PORT
#ifdef ATOMIC64_INIT
#define KERNEL_HAS_ATOMIC64
void rds_inc_info_copy(struct rds_incoming *inc,
struct rds_info_iterator *iter,
__be32 saddr, __be32 daddr, int flip);
+void rds6_inc_info_copy(struct rds_incoming *inc,
+ struct rds_info_iterator *iter,
+ struct in6_addr *saddr, struct in6_addr *daddr,
+ int flip);
int rds_skb_local(struct sk_buff *skb);
/* send.c */
#include <linux/rds.h>
#include "rds.h"
-#include "tcp.h"
/* forward prototypes */
static void
rds_info_copy(iter, &minfo, sizeof(minfo));
}
+void rds6_inc_info_copy(struct rds_incoming *inc,
+ struct rds_info_iterator *iter,
+ struct in6_addr *saddr, struct in6_addr *daddr,
+ int flip)
+{
+ struct rds6_info_message minfo6;
+
+ minfo6.seq = be64_to_cpu(inc->i_hdr.h_sequence);
+ minfo6.len = be32_to_cpu(inc->i_hdr.h_len);
+
+ if (flip) {
+ minfo6.laddr = *daddr;
+ minfo6.faddr = *saddr;
+ minfo6.lport = inc->i_hdr.h_dport;
+ minfo6.fport = inc->i_hdr.h_sport;
+ } else {
+ minfo6.laddr = *saddr;
+ minfo6.faddr = *daddr;
+ minfo6.lport = inc->i_hdr.h_sport;
+ minfo6.fport = inc->i_hdr.h_dport;
+ }
+
+ rds_info_copy(iter, &minfo6, sizeof(minfo6));
+}
+
int rds_skb_local(struct sk_buff *skb)
{
struct rds_nf_hdr *dst, *org;
#include <linux/list.h>
#include "rds.h"
-#include "tcp.h"
/* When transmitting messages in rds_send_xmit, we need to emerge from
* time to time and briefly release the CPU. Otherwise the softlock watchdog
break;
case sizeof(*sin6): {
- ret = -EPROTONOSUPPORT;
- goto out;
+ int addr_type;
+
+ if (sin6->sin6_family != AF_INET6) {
+ ret = -EINVAL;
+ goto out;
+ }
+ addr_type = ipv6_addr_type(&sin6->sin6_addr);
+ if (!(addr_type & IPV6_ADDR_UNICAST)) {
+ ret = -EINVAL;
+ goto out;
+ }
+ if (addr_type & IPV6_ADDR_LINKLOCAL &&
+ sin6->sin6_scope_id == 0) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ daddr = sin6->sin6_addr;
+ dport = sin6->sin6_port;
+ scope_id = sin6->sin6_scope_id;
+ break;
}
default:
/* only for info exporting */
static DEFINE_SPINLOCK(rds_tcp_tc_list_lock);
static LIST_HEAD(rds_tcp_tc_list);
+
+/* rds_tcp_tc_count counts only IPv4 connections.
+ * rds6_tcp_tc_count counts both IPv4 and IPv6 connections.
+ */
static unsigned int rds_tcp_tc_count;
+static unsigned int rds6_tcp_tc_count;
/* Track rds_tcp_connection structs so they can be cleaned up */
static DEFINE_SPINLOCK(rds_tcp_conn_lock);
/* done under the callback_lock to serialize with write_space */
spin_lock(&rds_tcp_tc_list_lock);
list_del_init(&tc->t_list_item);
- rds_tcp_tc_count--;
+ rds6_tcp_tc_count--;
+ if (!tc->t_cpath->cp_conn->c_isv6)
+ rds_tcp_tc_count--;
spin_unlock(&rds_tcp_tc_list_lock);
tc->t_sock = NULL;
/* done under the callback_lock to serialize with write_space */
spin_lock(&rds_tcp_tc_list_lock);
list_add_tail(&tc->t_list_item, &rds_tcp_tc_list);
- rds_tcp_tc_count++;
+ if (!tc->t_cpath->cp_conn->c_isv6)
+ rds_tcp_tc_count++;
+ rds6_tcp_tc_count++;
spin_unlock(&rds_tcp_tc_list_lock);
/* accepted sockets need our listen data ready undone */
write_unlock_bh(&sock->sk->sk_callback_lock);
}
-static void rds_tcp_tc_info(struct socket *rds_sock, unsigned int len,
+/* Handle RDS_INFO_TCP_SOCKETS socket option. It only returns IPv4
+ * connections for backward compatibility.
+ */
+static void rds_tcp_tc_info(struct socket *sock, unsigned int len,
struct rds_info_iterator *iter,
struct rds_info_lengths *lens)
{
struct rds_info_tcp_socket tsinfo;
struct rds_tcp_connection *tc;
unsigned long flags;
- struct sockaddr_in sin;
- int sinlen;
- struct socket *sock;
spin_lock_irqsave(&rds_tcp_tc_list_lock, flags);
goto out;
list_for_each_entry(tc, &rds_tcp_tc_list, t_list_item) {
+ struct inet_sock *inet = inet_sk(tc->t_sock->sk);
- sock = tc->t_sock;
- if (sock) {
- sock->ops->getname(sock, (struct sockaddr *)&sin,
- &sinlen, 0);
- tsinfo.local_addr = sin.sin_addr.s_addr;
- tsinfo.local_port = sin.sin_port;
- sock->ops->getname(sock, (struct sockaddr *)&sin,
- &sinlen, 1);
- tsinfo.peer_addr = sin.sin_addr.s_addr;
- tsinfo.peer_port = sin.sin_port;
- }
+ if (tc->t_cpath->cp_conn->c_isv6)
+ continue;
+
+ tsinfo.local_addr = inet->inet_saddr;
+ tsinfo.local_port = inet->inet_sport;
+ tsinfo.peer_addr = inet->inet_daddr;
+ tsinfo.peer_port = inet->inet_dport;
tsinfo.hdr_rem = tc->t_tinc_hdr_rem;
tsinfo.data_rem = tc->t_tinc_data_rem;
spin_unlock_irqrestore(&rds_tcp_tc_list_lock, flags);
}
+/* Handle RDS6_INFO_TCP_SOCKETS socket option. It returns both IPv4 and
+ * IPv6 connections. IPv4 connection address is returned in an IPv4 mapped
+ * address.
+ */
+static void rds6_tcp_tc_info(struct socket *sock, unsigned int len,
+ struct rds_info_iterator *iter,
+ struct rds_info_lengths *lens)
+{
+ struct rds6_info_tcp_socket tsinfo6;
+ struct rds_tcp_connection *tc;
+ unsigned long flags;
+
+ spin_lock_irqsave(&rds_tcp_tc_list_lock, flags);
+
+ if (len / sizeof(tsinfo6) < rds6_tcp_tc_count)
+ goto out;
+
+ list_for_each_entry(tc, &rds_tcp_tc_list, t_list_item) {
+ struct sock *sk = tc->t_sock->sk;
+ struct inet_sock *inet = inet_sk(sk);
+
+ tsinfo6.local_addr = sk->sk_v6_rcv_saddr;
+ tsinfo6.local_port = inet->inet_sport;
+ tsinfo6.peer_addr = sk->sk_v6_daddr;
+ tsinfo6.peer_port = inet->inet_dport;
+
+ tsinfo6.hdr_rem = tc->t_tinc_hdr_rem;
+ tsinfo6.data_rem = tc->t_tinc_data_rem;
+ tsinfo6.last_sent_nxt = tc->t_last_sent_nxt;
+ tsinfo6.last_expected_una = tc->t_last_expected_una;
+ tsinfo6.last_seen_una = tc->t_last_seen_una;
+
+ rds_info_copy(iter, &tsinfo6, sizeof(tsinfo6));
+ }
+
+out:
+ lens->nr = rds6_tcp_tc_count;
+ lens->each = sizeof(tsinfo6);
+
+ spin_unlock_irqrestore(&rds_tcp_tc_list_lock, flags);
+}
+
static int rds_tcp_laddr_check(struct net *net, const struct in6_addr *addr,
__u32 scope_id)
{
err = -ENOMEM;
goto fail;
}
- rtn->rds_tcp_listen_sock = rds_tcp_listen_init(net);
+ rtn->rds_tcp_listen_sock = rds_tcp_listen_init(net, true);
if (!rtn->rds_tcp_listen_sock) {
- pr_warn("could not set up listen sock\n");
- unregister_net_sysctl_table(rtn->rds_tcp_sysctl);
- rtn->rds_tcp_sysctl = NULL;
- err = -EAFNOSUPPORT;
- goto fail;
+ pr_warn("could not set up IPv6 listen sock\n");
+
+ /* Try IPv4 as some systems disable IPv6 */
+ rtn->rds_tcp_listen_sock = rds_tcp_listen_init(net, false);
+ if (!rtn->rds_tcp_listen_sock) {
+ unregister_net_sysctl_table(rtn->rds_tcp_sysctl);
+ rtn->rds_tcp_sysctl = NULL;
+ err = -EAFNOSUPPORT;
+ goto fail;
+ }
}
INIT_WORK(&rtn->rds_tcp_accept_w, rds_tcp_accept_worker);
return 0;
static void __exit rds_tcp_exit(void)
{
rds_info_deregister_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info);
+ rds_info_deregister_func(RDS6_INFO_TCP_SOCKETS, rds6_tcp_tc_info);
unregister_pernet_subsys(&rds_tcp_net_ops);
if (unregister_netdevice_notifier(&rds_tcp_dev_notifier))
pr_warn("could not unregister rds_tcp_dev_notifier\n");
ret = rds_trans_register(&rds_tcp_transport);
rds_info_register_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info);
+ rds_info_register_func(RDS6_INFO_TCP_SOCKETS, rds6_tcp_tc_info);
goto out;
#ifndef _RDS_TCP_H
#define _RDS_TCP_H
-#define RDS_TCP_PORT 16385
-
struct rds_tcp_incoming {
struct rds_incoming ti_inc;
struct sk_buff_head ti_skb_list;
void rds_tcp_state_change(struct sock *sk);
/* tcp_listen.c */
-struct socket *rds_tcp_listen_init(struct net *);
+struct socket *rds_tcp_listen_init(struct net *net, bool isv6);
void rds_tcp_listen_stop(struct socket *sock, struct work_struct *acceptor);
void rds_tcp_listen_data_ready(struct sock *sk);
int rds_tcp_accept_one(struct socket *sock);
int rds_tcp_conn_path_connect(struct rds_conn_path *cp)
{
struct socket *sock = NULL;
+ struct sockaddr_in6 sin6;
struct sockaddr_in sin;
struct sockaddr *addr;
int addrlen;
+ bool isv6;
int ret;
struct rds_connection *conn = cp->cp_conn;
struct rds_tcp_connection *tc = cp->cp_transport_data;
return 0;
}
- ret = sock_create_kern(rds_conn_net(conn), PF_INET, SOCK_STREAM,
- IPPROTO_TCP, &sock);
+ if (ipv6_addr_v4mapped(&conn->c_laddr)) {
+ ret = sock_create_kern(rds_conn_net(conn), PF_INET,
+ SOCK_STREAM, IPPROTO_TCP, &sock);
+ isv6 = false;
+ } else {
+ ret = sock_create_kern(rds_conn_net(conn), PF_INET6,
+ SOCK_STREAM, IPPROTO_TCP, &sock);
+ isv6 = true;
+ }
if (ret < 0)
goto out;
rds_tcp_tune(sock);
- sin.sin_family = AF_INET;
- sin.sin_addr.s_addr = (__force u32)conn->c_laddr.s6_addr32[3];
- sin.sin_port = (__force u16)htons(0);
- addr = (struct sockaddr *)&sin;
- addrlen = sizeof(sin);
+ if (isv6) {
+ sin6.sin6_family = AF_INET6;
+ sin6.sin6_addr = conn->c_laddr;
+ sin6.sin6_port = 0;
+ sin6.sin6_flowinfo = 0;
+ sin6.sin6_scope_id = conn->c_dev_if;
+ addr = (struct sockaddr *)&sin6;
+ addrlen = sizeof(sin6);
+ } else {
+ sin.sin_family = AF_INET;
+ sin.sin_addr.s_addr = (__force u32)conn->c_laddr.s6_addr32[3];
+ sin.sin_port = (__force u16)htons(0);
+ addr = (struct sockaddr *)&sin;
+ addrlen = sizeof(sin);
+ }
ret = sock->ops->bind(sock, addr, addrlen);
if (ret) {
goto out;
}
- sin.sin_family = AF_INET;
- sin.sin_addr.s_addr = (__force u32)conn->c_faddr.s6_addr32[3];
- sin.sin_port = (__force u16)htons(RDS_TCP_PORT);
- addr = (struct sockaddr *)&sin;
- addrlen = sizeof(sin);
+ if (isv6) {
+ sin6.sin6_family = AF_INET6;
+ sin6.sin6_addr = conn->c_faddr;
+ sin6.sin6_port = htons(RDS_TCP_PORT);
+ sin6.sin6_flowinfo = 0;
+ sin6.sin6_scope_id = conn->c_dev_if;
+ addr = (struct sockaddr *)&sin6;
+ addrlen = sizeof(sin6);
+ } else {
+ sin.sin_family = AF_INET;
+ sin.sin_addr.s_addr = (__force u32)conn->c_faddr.s6_addr32[3];
+ sin.sin_port = (__force u16)htons(RDS_TCP_PORT);
+ addr = (struct sockaddr *)&sin;
+ addrlen = sizeof(sin);
+ }
/*
* once we call connect() we can start getting callbacks and they
inet = inet_sk(new_sock->sk);
- rdsdebug("accepted tcp %pI6c:%u -> %pI6c:%u\n",
+ rdsdebug("accepted family %d tcp %pI6c:%u -> %pI6c:%u\n",
+ sock->sk->sk_family,
&new_sock->sk->sk_v6_rcv_saddr, ntohs(inet->inet_sport),
&new_sock->sk->sk_v6_daddr, ntohs(inet->inet_dport));
ready(sk);
}
-struct socket *rds_tcp_listen_init(struct net *net)
+struct socket *rds_tcp_listen_init(struct net *net, bool isv6)
{
- struct sockaddr_in sin;
struct socket *sock = NULL;
+ struct sockaddr_storage ss;
+ struct sockaddr_in6 *sin6;
+ struct sockaddr_in *sin;
+ int addr_len;
int ret;
- ret = sock_create_kern(net, PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock);
- if (ret < 0)
+ ret = sock_create_kern(net, isv6 ? PF_INET6 : PF_INET, SOCK_STREAM,
+ IPPROTO_TCP, &sock);
+ if (ret < 0) {
+ rdsdebug("could not create listener socket: %d\n", ret);
goto out;
+ }
sock->sk->sk_reuse = 1;
rds_tcp_nonagle(sock);
sock->sk->sk_data_ready = rds_tcp_listen_data_ready;
write_unlock_bh(&sock->sk->sk_callback_lock);
- sin.sin_family = PF_INET;
- sin.sin_addr.s_addr = (__force u32)htonl(INADDR_ANY);
- sin.sin_port = (__force u16)htons(RDS_TCP_PORT);
+ if (isv6) {
+ sin6 = (struct sockaddr_in6 *)&ss;
+ sin6->sin6_family = PF_INET6;
+ sin6->sin6_addr = in6addr_any;
+ sin6->sin6_port = (__force u16)htons(RDS_TCP_PORT);
+ sin6->sin6_scope_id = 0;
+ sin6->sin6_flowinfo = 0;
+ addr_len = sizeof(*sin6);
+ } else {
+ sin = (struct sockaddr_in *)&ss;
+ sin->sin_family = PF_INET;
+ sin->sin_addr.s_addr = INADDR_ANY;
+ sin->sin_port = (__force u16)htons(RDS_TCP_PORT);
+ addr_len = sizeof(*sin);
+ }
- ret = sock->ops->bind(sock, (struct sockaddr *)&sin, sizeof(sin));
- if (ret < 0)
+ ret = sock->ops->bind(sock, (struct sockaddr *)&ss, addr_len);
+ if (ret < 0) {
+ rdsdebug("could not bind %s listener socket: %d\n",
+ isv6 ? "IPv6" : "IPv4", ret);
goto out;
+ }
ret = sock->ops->listen(sock, 64);
if (ret < 0)
#include <linux/random.h>
#include "rds.h"
-#include "tcp.h"
+
static unsigned int rds_conn_hb_timeout = 0;
module_param(rds_conn_hb_timeout, int, 0444);
MODULE_PARM_DESC(rds_conn_hb_timeout, " Connection heartbeat timeout");