/*
- * Copyright (c) 2008 Oracle. All rights reserved.
+ * Copyright (c) 2008, 2017 Oracle and/or its affiliates. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
/* netfilter related components */
struct rds_nf_hdr {
- __be32 saddr; /* source address of request */
- __be32 daddr; /* destination address */
- __be16 sport; /* source port number */
- __be16 dport; /* destination port number */
- __be16 protocol; /* rds socket protocol family to use */
+ struct in6_addr saddr; /* source address of request */
+ struct in6_addr daddr; /* destination address */
+ __be16 sport; /* source port number */
+ __be16 dport; /* destination port number */
+ __be16 protocol; /* rds socket protocol family to use */
#define RDS_NF_HDR_FLAG_BOTH (0x1) /* request needs to go locally and remote */
#define RDS_NF_HDR_FLAG_DONE (0x2) /* the request is consumed and done */
#include <linux/errno.h>
#include <linux/kernel.h>
#include <linux/in.h>
+#include <linux/ipv6.h>
#include <linux/poll.h>
#include <linux/version.h>
#include <linux/random.h>
static int rds_getname(struct socket *sock, struct sockaddr *uaddr,
int *uaddr_len, int peer)
{
- struct sockaddr_in *sin = (struct sockaddr_in *)uaddr;
struct rds_sock *rs = rds_sk_to_rs(sock->sk);
-
- memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
+ struct sockaddr_in6 *sin6;
+ struct sockaddr_in *sin;
/* racey, don't care */
if (peer) {
- if (!rs->rs_conn_addr)
+ if (ipv6_addr_any(&rs->rs_conn_addr))
return -ENOTCONN;
- sin->sin_port = rs->rs_conn_port;
- sin->sin_addr.s_addr = rs->rs_conn_addr;
+ if (ipv6_addr_v4mapped(&rs->rs_conn_addr)) {
+ sin = (struct sockaddr_in *)uaddr;
+ memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
+ sin->sin_family = AF_INET;
+ sin->sin_port = rs->rs_conn_port;
+ sin->sin_addr.s_addr = rs->rs_conn_addr_v4;
+ *uaddr_len = sizeof(*sin);
+ } else {
+ sin6 = (struct sockaddr_in6 *)uaddr;
+ sin6->sin6_family = AF_INET6;
+ sin6->sin6_port = rs->rs_conn_port;
+ sin6->sin6_addr = rs->rs_conn_addr;
+ sin6->sin6_flowinfo = 0;
+ /* scope_id is the same as in the bound address. */
+ sin6->sin6_scope_id = rs->rs_bound_scope_id;
+ *uaddr_len = sizeof(*sin6);
+ }
} else {
- sin->sin_port = rs->rs_bound_port;
- sin->sin_addr.s_addr = rs->rs_bound_addr;
+ if (ipv6_addr_v4mapped(&rs->rs_bound_addr)) {
+ sin = (struct sockaddr_in *)uaddr;
+ memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
+ sin->sin_family = AF_INET;
+ sin->sin_port = rs->rs_bound_port;
+ sin->sin_addr.s_addr = rs->rs_bound_addr_v4;
+ *uaddr_len = sizeof(*sin);
+ } else {
+ sin6 = (struct sockaddr_in6 *)uaddr;
+ sin6->sin6_family = AF_INET6;
+ sin6->sin6_port = rs->rs_bound_port;
+ sin6->sin6_addr = rs->rs_bound_addr;
+ sin6->sin6_flowinfo = 0;
+ sin6->sin6_scope_id = rs->rs_bound_scope_id;
+ *uaddr_len = sizeof(*sin6);
+ }
}
- sin->sin_family = AF_INET;
-
- *uaddr_len = sizeof(*sin);
return 0;
}
static int rds_cancel_sent_to(struct rds_sock *rs, char __user *optval,
int len)
{
+ struct sockaddr_in6 sin6;
struct sockaddr_in sin;
int ret = 0;
/* racing with another thread binding seems ok here */
- if (rs->rs_bound_addr == 0) {
+ if (ipv6_addr_any(&rs->rs_bound_addr)) {
ret = -ENOTCONN; /* XXX not a great errno */
goto out;
}
if (len < sizeof(struct sockaddr_in)) {
ret = -EINVAL;
goto out;
+ } else if (len < sizeof(struct sockaddr_in6)) {
+ /* Assume IPv4 */
+ if (copy_from_user(&sin, optval, sizeof(struct sockaddr_in))) {
+ ret = -EFAULT;
+ goto out;
+ }
+ ipv6_addr_set_v4mapped(sin.sin_addr.s_addr, &sin6.sin6_addr);
+ sin6.sin6_port = sin.sin_port;
+ } else {
+ if (copy_from_user(&sin6, optval,
+ sizeof(struct sockaddr_in6))) {
+ ret = -EFAULT;
+ goto out;
+ }
}
- if (copy_from_user(&sin, optval, sizeof(sin))) {
- ret = -EFAULT;
- goto out;
- }
-
- rds_send_drop_to(rs, &sin);
+ rds_send_drop_to(rs, &sin6);
out:
return ret;
}
{
struct rds_reset reset;
struct rds_connection *conn;
+ struct in6_addr src6, dst6;
LIST_HEAD(s_addr_conns);
if (optlen != sizeof(struct rds_reset))
return -EFAULT;
/* Reset all conns associated with source addr */
+ ipv6_addr_set_v4mapped(reset.src.s_addr, &src6);
if (reset.dst.s_addr == 0) {
pr_info("RDS: Reset ALL conns for Source %pI4\n",
&reset.src.s_addr);
rds_conn_laddr_list(sock_net(rds_rs_to_sk(rs)),
- reset.src.s_addr, &s_addr_conns);
+ &src6, &s_addr_conns);
if (list_empty(&s_addr_conns))
goto done;
list_for_each_entry(conn, &s_addr_conns, c_laddr_node)
if (conn)
- rds_user_conn_paths_drop(conn, 1);
+ rds_conn_drop(conn, DR_USER_RESET);
goto done;
}
- conn = rds_conn_find(sock_net(rds_rs_to_sk(rs)),
- reset.src.s_addr, reset.dst.s_addr,
- rs->rs_transport, reset.tos);
+ ipv6_addr_set_v4mapped(reset.dst.s_addr, &dst6);
+ conn = rds_conn_find(sock_net(rds_rs_to_sk(rs)), &src6, &dst6,
+ rs->rs_transport, reset.tos,
+ rs->rs_bound_scope_id);
if (conn) {
bool is_tcp = conn->c_trans->t_type == RDS_TRANS_TCP;
printk(KERN_NOTICE "Resetting RDS/%s connection <%pI4,%pI4,%d>\n",
- is_tcp ? "tcp" : "IB",
+ is_tcp ? "TCP" : "IB",
&reset.src.s_addr,
&reset.dst.s_addr, conn->c_tos);
rds_user_conn_paths_drop(conn, DR_USER_RESET);
int addr_len, int flags)
{
struct sock *sk = sock->sk;
- struct sockaddr_in *sin = (struct sockaddr_in *)uaddr;
+ struct sockaddr_in *sin;
struct rds_sock *rs = rds_sk_to_rs(sk);
int ret = 0;
lock_sock(sk);
- if (addr_len != sizeof(struct sockaddr_in)) {
- ret = -EINVAL;
- goto out;
- }
+ switch (addr_len) {
+ case sizeof(struct sockaddr_in):
+ sin = (struct sockaddr_in *)uaddr;
+ if (sin->sin_family != AF_INET) {
+ ret = -EAFNOSUPPORT;
+ break;
+ }
+ if (sin->sin_addr.s_addr == INADDR_ANY) {
+ ret = -EDESTADDRREQ;
+ break;
+ }
+ if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)) ||
+ sin->sin_addr.s_addr == INADDR_BROADCAST) {
+ ret = -EINVAL;
+ break;
+ }
+ ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &rs->rs_conn_addr);
+ rs->rs_conn_port = sin->sin_port;
+ break;
- if (sin->sin_family != AF_INET) {
- ret = -EAFNOSUPPORT;
- goto out;
- }
+ case sizeof(struct sockaddr_in6):
+ ret = -EPROTONOSUPPORT;
+ break;
- if (sin->sin_addr.s_addr == htonl(INADDR_ANY)) {
- ret = -EDESTADDRREQ;
- goto out;
+ default:
+ ret = -EINVAL;
+ break;
}
- rs->rs_conn_addr = sin->sin_addr.s_addr;
- rs->rs_conn_port = sin->sin_port;
-
-out:
release_sock(sk);
return ret;
}
rs->rs_netfilter_enabled = 0;
rs->rs_rx_traces = 0;
- if (rs->rs_bound_addr)
- printk(KERN_CRIT "bound addr %x at create\n", rs->rs_bound_addr);
+ if (!ipv6_addr_any(&rs->rs_bound_addr)) {
+ printk(KERN_CRIT "bound addr %pI6c at create\n",
+ &rs->rs_bound_addr);
+ }
spin_lock_bh(&rds_sock_lock);
list_add_tail(&rs->rs_item, &rds_sock_list);
list_for_each_entry(inc, &rs->rs_recv_queue, i_item) {
total++;
if (total <= len)
- rds_inc_info_copy(inc, iter, inc->i_saddr,
- rs->rs_bound_addr, 1);
+ rds_inc_info_copy(inc, iter,
+ inc->i_saddr.s6_addr32[3],
+ rs->rs_bound_addr_v4,
+ 1);
}
read_unlock(&rs->rs_recv_lock);
list_for_each_entry(rs, &rds_sock_list, rs_item) {
sinfo.sndbuf = rds_sk_sndbuf(rs);
sinfo.rcvbuf = rds_sk_rcvbuf(rs);
- sinfo.bound_addr = rs->rs_bound_addr;
- sinfo.connected_addr = rs->rs_conn_addr;
+ sinfo.bound_addr = rs->rs_bound_addr_v4;
+ sinfo.connected_addr = rs->rs_conn_addr_v4;
sinfo.bound_port = rs->rs_bound_port;
sinfo.connected_port = rs->rs_conn_port;
sinfo.inum = sock_i_ino(rds_rs_to_sk(rs));
rds_page_exit();
rds_info_deregister_func(RDS_INFO_SOCKETS, rds_sock_info);
rds_info_deregister_func(RDS_INFO_RECV_MESSAGES, rds_sock_inc_info);
-
}
+
module_exit(rds_exit);
u32 rds_gen_num;
/*
- * Copyright (c) 2006 Oracle. All rights reserved.
+ * Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
#include <linux/kernel.h>
#include <net/sock.h>
#include <linux/in.h>
+#include <linux/ipv6.h>
#include <linux/if_arp.h>
#include <linux/jhash.h>
#include "rds.h"
#define BIND_HASH_SIZE 8192
static struct bind_bucket bind_hash_table[BIND_HASH_SIZE];
-static struct bind_bucket *hash_to_bucket(__be32 addr, __be16 port)
+static struct bind_bucket *hash_to_bucket(struct in6_addr *addr, __be16 port)
{
- return bind_hash_table + (jhash_2words((u32)addr, (u32)port, 0) &
- (BIND_HASH_SIZE - 1));
+ return bind_hash_table +
+ (jhash_3words(addr->s6_addr32[0] ^ addr->s6_addr32[1],
+ addr->s6_addr32[2] ^ addr->s6_addr32[3],
+ (u32)port, 0) & (BIND_HASH_SIZE - 1));
}
/*
* must hold either read or write lock (write lock for insert != NULL)
*/
static struct rds_sock *rds_bind_lookup(struct bind_bucket *bucket,
- __be32 addr, __be16 port,
- struct rds_sock *insert)
+ const struct in6_addr *addr,
+ __be16 port,
+ struct rds_sock *insert,
+ __u32 scope_id)
{
struct rds_sock *rs;
struct hlist_head *head = &bucket->head;
- u64 cmp;
- u64 needle = ((u64)be32_to_cpu(addr) << 32) | be16_to_cpu(port);
+ u16 lport = be16_to_cpu(port);
hlist_for_each_entry(rs, head, rs_bound_node) {
- cmp = ((u64)be32_to_cpu(rs->rs_bound_addr) << 32) |
- be16_to_cpu(rs->rs_bound_port);
-
- if (cmp == needle) {
+ if (lport == be16_to_cpu(rs->rs_bound_port) &&
+ ipv6_addr_equal(addr, &rs->rs_bound_addr) &&
+ rs->rs_bound_scope_id == scope_id) {
rds_sock_addref(rs);
return rs;
}
* make sure our addr and port are set before
* we are added to the list.
*/
- insert->rs_bound_addr = addr;
+ insert->rs_bound_addr = *addr;
insert->rs_bound_port = port;
+ insert->rs_bound_scope_id = scope_id;
rds_sock_addref(insert);
hlist_add_head(&insert->rs_bound_node, head);
* The rx path can race with rds_release. We notice if rds_release() has
* marked this socket and don't return a rs ref to the rx path.
*/
-struct rds_sock *rds_find_bound(__be32 addr, __be16 port)
+struct rds_sock *rds_find_bound(struct in6_addr *addr, __be16 port,
+ __u32 scope_id)
{
struct rds_sock *rs;
unsigned long flags;
struct bind_bucket *bucket = hash_to_bucket(addr, port);
read_lock_irqsave(&bucket->lock, flags);
- rs = rds_bind_lookup(bucket, addr, port, NULL);
+ rs = rds_bind_lookup(bucket, addr, port, NULL, scope_id);
read_unlock_irqrestore(&bucket->lock, flags);
if (rs && sock_flag(rds_rs_to_sk(rs), SOCK_DEAD)) {
rs = NULL;
}
- rdsdebug("returning rs %p for %pI4:%u\n", rs, &addr,
- ntohs(port));
+ rdsdebug("returning rs %p for %pI6c:%u\n", rs, addr,
+ ntohs(port));
return rs;
}
/* returns -ve errno or +ve port */
-static int rds_add_bound(struct rds_sock *rs, __be32 addr, __be16 *port)
+static int rds_add_bound(struct rds_sock *rs, struct in6_addr *addr,
+ __be16 *port, __u32 scope_id)
{
unsigned long flags;
int ret = -EADDRINUSE;
bucket = hash_to_bucket(addr, cpu_to_be16(rover));
write_lock_irqsave(&bucket->lock, flags);
- rrs = rds_bind_lookup(bucket, addr, cpu_to_be16(rover), rs);
+ rrs = rds_bind_lookup(bucket, addr, cpu_to_be16(rover), rs,
+ scope_id);
write_unlock_irqrestore(&bucket->lock, flags);
if (!rrs) {
*port = rs->rs_bound_port;
ret = 0;
- rdsdebug("rs %p binding to %pI4:%d\n",
- rs, &addr, (int)ntohs(*port));
+ rdsdebug("rs %p binding to %pI6c:%d\n",
+ rs, addr, (int)ntohs(*port));
break;
} else
rds_sock_put(rrs);
{
unsigned long flags;
struct bind_bucket *bucket =
- hash_to_bucket(rs->rs_bound_addr, rs->rs_bound_port);
+ hash_to_bucket(&rs->rs_bound_addr, rs->rs_bound_port);
write_lock_irqsave(&bucket->lock, flags);
- if (rs->rs_bound_addr) {
- rdsdebug("rs %p unbinding from %pI4:%d\n",
- rs, &rs->rs_bound_addr,
- ntohs(rs->rs_bound_port));
+ if (!ipv6_addr_any(&rs->rs_bound_addr)) {
+ rdsdebug("rs %p unbinding from %pI6c:%d\n",
+ rs, &rs->rs_bound_addr,
+ ntohs(rs->rs_bound_port));
hlist_del_init(&rs->rs_bound_node);
rds_sock_put(rs);
- rs->rs_bound_addr = 0;
+ rs->rs_bound_addr = in6addr_any;
}
write_unlock_irqrestore(&bucket->lock, flags);
int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
{
struct sock *sk = sock->sk;
- struct sockaddr_in *sin = (struct sockaddr_in *)uaddr;
struct rds_sock *rs = rds_sk_to_rs(sk);
+ struct in6_addr v6addr, *binding_addr;
struct rds_transport *trans;
+ __u32 scope_id = 0;
int ret = 0;
+ __be16 port;
+
+ if (addr_len == sizeof(struct sockaddr_in)) {
+ struct sockaddr_in *sin = (struct sockaddr_in *)uaddr;
+
+ if (sin->sin_family != AF_INET ||
+ sin->sin_addr.s_addr == INADDR_ANY)
+ return -EINVAL;
+ ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &v6addr);
+ binding_addr = &v6addr;
+ port = sin->sin_port;
+ } else if (addr_len == sizeof(struct sockaddr_in6)) {
+ return -EPROTONOSUPPORT;
+ } else {
+ return -EINVAL;
+ }
lock_sock(sk);
- if (addr_len != sizeof(struct sockaddr_in) ||
- sin->sin_family != AF_INET ||
- rs->rs_bound_addr ||
- sin->sin_addr.s_addr == htonl(INADDR_ANY)) {
- ret = -EINVAL;
- goto out;
- }
-
- ret = rds_add_bound(rs, sin->sin_addr.s_addr, &sin->sin_port);
+ ret = rds_add_bound(rs, binding_addr, &port, scope_id);
if (ret)
goto out;
if (rs->rs_transport) { /* previously bound */
trans = rs->rs_transport;
if (trans->laddr_check(sock_net(sock->sk),
- sin->sin_addr.s_addr) != 0) {
+ binding_addr, scope_id) != 0) {
ret = -ENOPROTOOPT;
rds_remove_bound(rs);
} else {
}
goto out;
}
- trans = rds_trans_get_preferred(sock_net(sock->sk),
- sin->sin_addr.s_addr);
+ trans = rds_trans_get_preferred(sock_net(sock->sk), binding_addr,
+ scope_id);
if (!trans) {
ret = -EADDRNOTAVAIL;
rds_remove_bound(rs);
if (printk_ratelimit())
- printk(KERN_INFO "RDS: rds_bind() could not find a transport for %pI4, "
- "load rds_tcp or rds_rdma?\n", &sin->sin_addr.s_addr);
+ printk(KERN_INFO "RDS: rds_bind() could not find a transport for %pI6c, load rds_tcp or rds_rdma?\n",
+ binding_addr);
goto out;
}
/*
- * Copyright (c) 2007 Oracle. All rights reserved.
+ * Copyright (c) 2007, 2017 Oracle and/or its affiliates. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
static DEFINE_SPINLOCK(rds_cong_lock);
static struct rb_root rds_cong_tree = RB_ROOT;
-static struct rds_cong_map *rds_cong_tree_walk(__be32 addr,
+static struct rds_cong_map *rds_cong_tree_walk(struct in6_addr *addr,
struct rds_cong_map *insert)
{
struct rb_node **p = &rds_cong_tree.rb_node;
struct rds_cong_map *map;
while (*p) {
+ int diff;
parent = *p;
map = rb_entry(parent, struct rds_cong_map, m_rb_node);
- if (addr < map->m_addr)
+ diff = rds_addr_cmp(addr, &map->m_addr);
+ if (diff < 0)
p = &(*p)->rb_left;
- else if (addr > map->m_addr)
+ else if (diff > 0)
p = &(*p)->rb_right;
else
return map;
* these bitmaps in the process getting pointers to them. The bitmaps are only
* ever freed as the module is removed after all connections have been freed.
*/
-static struct rds_cong_map *rds_cong_from_addr(__be32 addr)
+static struct rds_cong_map *rds_cong_from_addr(struct in6_addr *addr)
{
struct rds_cong_map *map;
struct rds_cong_map *ret = NULL;
if (!map)
return NULL;
- map->m_addr = addr;
+ map->m_addr = *addr;
init_waitqueue_head(&map->m_waitq);
INIT_LIST_HEAD(&map->m_conn_list);
kfree(map);
}
- rdsdebug("map %p for addr %x\n", ret, be32_to_cpu(addr));
+ rdsdebug("map %p for addr %pI6c\n", ret, addr);
return ret;
}
int rds_cong_get_maps(struct rds_connection *conn)
{
- conn->c_lcong = rds_cong_from_addr(conn->c_laddr);
- conn->c_fcong = rds_cong_from_addr(conn->c_faddr);
+ conn->c_lcong = rds_cong_from_addr(&conn->c_laddr);
+ conn->c_fcong = rds_cong_from_addr(&conn->c_faddr);
if (!(conn->c_lcong && conn->c_fcong))
return -ENOMEM;
void rds_cong_map_updated(struct rds_cong_map *map, uint64_t portmask)
{
- rdsdebug("waking map %p for %pI4\n",
- map, &map->m_addr);
+ rdsdebug("waking map %p for %pI6c\n",
+ map, &map->m_addr);
rds_stats_inc(s_cong_update_received);
atomic_inc(&rds_cong_generation);
if (waitqueue_active(&map->m_waitq))
unsigned long i;
unsigned long off;
- rdsdebug("setting congestion for %pI4:%u in map %p\n",
- &map->m_addr, ntohs(port), map);
+ rdsdebug("setting congestion for %pI6c:%u in map %p\n",
+ &map->m_addr, ntohs(port), map);
i = be16_to_cpu(port) / RDS_CONG_MAP_PAGE_BITS;
off = be16_to_cpu(port) % RDS_CONG_MAP_PAGE_BITS;
unsigned long i;
unsigned long off;
- rdsdebug("clearing congestion for %pI4:%u in map %p\n",
- &map->m_addr, ntohs(port), map);
+ rdsdebug("clearing congestion for %pI6c:%u in map %p\n",
+ &map->m_addr, ntohs(port), map);
i = be16_to_cpu(port) / RDS_CONG_MAP_PAGE_BITS;
off = be16_to_cpu(port) % RDS_CONG_MAP_PAGE_BITS;
/* update congestion map for now-closed port */
spin_lock_irqsave(&rds_cong_lock, flags);
- map = rds_cong_tree_walk(rs->rs_bound_addr, NULL);
+ map = rds_cong_tree_walk(&rs->rs_bound_addr, NULL);
spin_unlock_irqrestore(&rds_cong_lock, flags);
if (map && rds_cong_test_bit(map, rs->rs_bound_port)) {
*/
#include <linux/kernel.h>
#include <linux/list.h>
-#include <net/inet_hashtables.h>
+#include <net/ipv6.h>
+#include <net/inet6_hashtables.h>
#include "rds.h"
#include "loop.h"
static struct hlist_head rds_conn_hash[RDS_CONNECTION_HASH_ENTRIES];
static struct kmem_cache *rds_conn_slab;
-static struct hlist_head *rds_conn_bucket(__be32 laddr, __be32 faddr)
+static struct hlist_head *rds_conn_bucket(const struct in6_addr *laddr,
+ const struct in6_addr *faddr)
{
+ static u32 rds6_hash_secret __read_mostly;
static u32 rds_hash_secret __read_mostly;
- unsigned long hash;
+ u32 lhash, fhash, hash;
net_get_random_once(&rds_hash_secret, sizeof(rds_hash_secret));
+ net_get_random_once(&rds6_hash_secret, sizeof(rds6_hash_secret));
+
+ lhash = (__force u32)laddr->s6_addr32[3];
+ fhash = __ipv6_addr_jhash(faddr, rds6_hash_secret);
+ hash = __inet6_ehashfn(lhash, 0, fhash, 0, rds_hash_secret);
- /* Pass NULL, don't need struct net for hash */
- hash = __inet_ehashfn(be32_to_cpu(laddr), 0,
- be32_to_cpu(faddr), 0,
- rds_hash_secret);
return &rds_conn_hash[hash & RDS_CONNECTION_HASH_MASK];
}
/* rcu read lock must be held or the connection spinlock */
static struct rds_connection *rds_conn_lookup(struct net *net,
struct hlist_head *head,
- __be32 laddr, __be32 faddr,
+ const struct in6_addr *laddr,
+ const struct in6_addr *faddr,
struct rds_transport *trans,
- u8 tos)
+ u8 tos,
+ int dev_if)
{
struct rds_connection *conn, *ret = NULL;
hlist_for_each_entry_rcu(conn, head, c_hash_node) {
- if (conn->c_faddr == faddr && conn->c_laddr == laddr &&
- conn->c_tos == tos &&
- conn->c_trans == trans &&
- net == rds_conn_net(conn)) {
+ if (ipv6_addr_equal(&conn->c_faddr, faddr) &&
+ ipv6_addr_equal(&conn->c_laddr, laddr) &&
+ conn->c_tos == tos && conn->c_trans == trans &&
+ net == rds_conn_net(conn) &&
+ conn->c_dev_if == dev_if) {
ret = conn;
break;
}
}
- rdsdebug("returning conn %p for %pI4 -> %pI4\n", ret,
- &laddr, &faddr);
+ rdsdebug("returning conn %p for %pI6c -> %pI6c\n", ret, laddr, faddr);
return ret;
}
-void rds_conn_laddr_list(struct net *net,
- __be32 laddr, struct list_head *laddr_conns)
+void rds_conn_laddr_list(struct net *net, struct in6_addr *laddr,
+ struct list_head *laddr_conns)
{
struct rds_connection *conn;
struct hlist_head *head;
for (i = 0, head = rds_conn_hash; i < ARRAY_SIZE(rds_conn_hash);
i++, head++) {
hlist_for_each_entry_rcu(conn, head, c_hash_node)
- if (conn->c_laddr == laddr &&
+ if (ipv6_addr_equal(&conn->c_laddr, laddr) &&
net == rds_conn_net(conn))
list_add(&conn->c_laddr_node, laddr_conns);
}
{
struct rds_connection *conn = cp->cp_conn;
- rdsdebug("connection %pI4 to %pI4 reset\n",
- &conn->c_laddr, &conn->c_faddr);
+ rdsdebug("connection %pI6c to %pI6c reset\n",
+ &conn->c_laddr, &conn->c_faddr);
rds_stats_inc(s_conn_reset);
rds_send_path_reset(cp);
* are torn down as the module is removed, if ever.
*/
static struct rds_connection *__rds_conn_create(struct net *net,
- __be32 laddr, __be32 faddr,
- struct rds_transport *trans, gfp_t gfp,
- u8 tos,
- int is_outgoing)
+ const struct in6_addr *laddr,
+ const struct in6_addr *faddr,
+ struct rds_transport *trans,
+ gfp_t gfp, u8 tos,
+ int is_outgoing,
+ int dev_if)
{
struct rds_connection *conn, *parent = NULL;
struct hlist_head *head = rds_conn_bucket(laddr, faddr);
int npaths;
rcu_read_lock();
- conn = rds_conn_lookup(net, head, laddr, faddr, trans, tos);
- if (conn
- && conn->c_loopback
- && conn->c_trans != &rds_loop_transport
- && laddr == faddr
- && !is_outgoing) {
+ conn = rds_conn_lookup(net, head, laddr, faddr, trans, tos, dev_if);
+ if (conn &&
+ conn->c_loopback &&
+ conn->c_trans != &rds_loop_transport &&
+ ipv6_addr_equal(laddr, faddr) &&
+ !is_outgoing) {
/* This is a looped back IB connection, and we're
* called by the code handling the incoming connect.
* We need a second connection object into which we
memset(conn, 0, sizeof(*conn));
INIT_HLIST_NODE(&conn->c_hash_node);
- conn->c_laddr = laddr;
- conn->c_faddr = faddr;
+ conn->c_laddr = *laddr;
+ conn->c_isv6 = !ipv6_addr_v4mapped(laddr);
+ conn->c_faddr = *faddr;
+ conn->c_dev_if = dev_if;
rds_conn_net_set(conn, net);
conn->c_tos = tos;
* can bind to the destination address then we'd rather the messages
* flow through loopback rather than either transport.
*/
- loop_trans = rds_trans_get_preferred(net, faddr);
+ loop_trans = rds_trans_get_preferred(net, faddr, conn->c_dev_if);
if (loop_trans) {
rds_trans_put(loop_trans);
conn->c_loopback = 1;
goto out;
}
- rdsdebug("allocated conn %p for %pI4 -> %pI4 over %s %s\n",
- conn, &laddr, &faddr,
- trans->t_name ? trans->t_name : "[unknown]",
- is_outgoing ? "(outgoing)" : "");
+ rdsdebug("allocated conn %p for %pI6c -> %pI6c over %s %s\n",
+ conn, laddr, faddr,
+ trans->t_name ? trans->t_name : "[unknown]",
+ is_outgoing ? "(outgoing)" : "");
/*
* Since we ran without holding the conn lock, someone could
/* Creating normal conn */
struct rds_connection *found;
- found = rds_conn_lookup(net, head, laddr, faddr, trans, tos);
+ found = rds_conn_lookup(net, head, laddr, faddr, trans, tos,
+ dev_if);
if (found) {
struct rds_conn_path *cp;
int i;
}
struct rds_connection *rds_conn_create(struct net *net,
- __be32 laddr, __be32 faddr,
- struct rds_transport *trans,
- u8 tos, gfp_t gfp)
+ const struct in6_addr *laddr,
+ const struct in6_addr *faddr,
+ struct rds_transport *trans,
+ u8 tos, gfp_t gfp, int dev_if)
{
- return __rds_conn_create(net, laddr, faddr, trans, gfp, tos, 0);
+ return __rds_conn_create(net, laddr, faddr, trans, gfp, tos, 0, dev_if);
}
EXPORT_SYMBOL_GPL(rds_conn_create);
struct rds_connection *rds_conn_create_outgoing(struct net *net,
- __be32 laddr, __be32 faddr,
- struct rds_transport *trans,
- u8 tos, gfp_t gfp)
+ struct in6_addr *laddr,
+ struct in6_addr *faddr,
+ struct rds_transport *trans,
+ u8 tos, gfp_t gfp, int dev_if)
{
- return __rds_conn_create(net, laddr, faddr, trans, gfp, tos, 1);
+ return __rds_conn_create(net, laddr, faddr, trans, gfp, tos, 1, dev_if);
}
EXPORT_SYMBOL_GPL(rds_conn_create_outgoing);
-struct rds_connection *rds_conn_find(struct net *net, __be32 laddr,
- __be32 faddr, struct rds_transport *trans,
- u8 tos)
+struct rds_connection *rds_conn_find(struct net *net, struct in6_addr *laddr,
+ struct in6_addr *faddr,
+ struct rds_transport *trans, u8 tos,
+ int dev_if)
{
struct rds_connection *conn;
struct hlist_head *head = rds_conn_bucket(laddr, faddr);
rcu_read_lock();
- conn = rds_conn_lookup(net, head, laddr, faddr, trans, tos);
+ conn = rds_conn_lookup(net, head, laddr, faddr, trans, tos, dev_if);
rcu_read_unlock();
return conn;
/* shut it down unless it's down already */
if (!rds_conn_path_transition(cp, RDS_CONN_DOWN, RDS_CONN_DOWN)) {
rds_rtd(RDS_RTD_CM_EXT,
- "RDS/%s: shutdown init <%pI4,%pI4,%d>, cn %p, cn->c_p %p\n",
+ "RDS/%s: shutdown init <%pI6c,%pI6c,%d>, cn %p, cn->c_p %p\n",
conn->c_trans->t_type == RDS_TRANS_TCP ? "TCP" : "IB",
&conn->c_laddr, &conn->c_faddr,
conn->c_tos, conn, conn->c_passive);
int i;
int npaths = (conn->c_trans->t_mp_capable ? RDS_MPATH_WORKERS : 1);
- rds_rtd(RDS_RTD_CM, "freeing conn %p <%pI4,%pI4,%d>\n",
+ rds_rtd(RDS_RTD_CM, "freeing conn %p <%pI6c,%pI6c,%d>\n",
conn, &conn->c_laddr, &conn->c_faddr,
conn->c_tos);
/* XXX too lazy to maintain counts.. */
list_for_each_entry(rm, list, m_conn_item) {
+ __be32 laddr;
+ __be32 faddr;
+
total++;
+ laddr = conn->c_laddr.s6_addr32[3];
+ faddr = conn->c_faddr.s6_addr32[3];
if (total <= len)
rds_inc_info_copy(&rm->m_inc,
iter,
- conn->c_laddr,
- conn->c_faddr,
+ laddr,
+ faddr,
0);
}
struct hlist_head *head;
struct rds_connection *conn;
size_t i;
- int j;
rcu_read_lock();
i++, head++) {
hlist_for_each_entry_rcu(conn, head, c_hash_node) {
struct rds_conn_path *cp;
- int npaths;
- npaths = (conn->c_trans->t_mp_capable ?
- RDS_MPATH_WORKERS : 1);
- for (j = 0; j < npaths; j++) {
- cp = &conn->c_path[j];
+ /* XXX We only copy the information from the first
+ * path for now. The problem is that if there are
+ * more than one underlying paths, we cannot report
+ * information of all of them using the exisitng
+ * API. For example, there is only one next_tx_seq,
+ * which path's next_tx_seq should we report? It is
+ * a bug in the design of MPRDS.
+ */
+ cp = conn->c_path;
- /* XXX no cp_lock usage.. */
- if (!visitor(cp, buffer))
- continue;
- }
+ /* XXX no cp_lock usage.. */
+ if (!visitor(cp, buffer))
+ continue;
/* We copy as much as we can fit in the buffer,
* but we count all items so that the caller
static int rds_conn_info_visitor(struct rds_conn_path *cp, void *buffer)
{
struct rds_info_connection *cinfo = buffer;
+ struct rds_connection *conn = cp->cp_conn;
cinfo->next_tx_seq = cp->cp_next_tx_seq;
cinfo->next_rx_seq = cp->cp_next_rx_seq;
- cinfo->laddr = cp->cp_conn->c_laddr;
- cinfo->faddr = cp->cp_conn->c_faddr;
- cinfo->tos = cp->cp_conn->c_tos;
- strncpy(cinfo->transport, cp->cp_conn->c_trans->t_name,
+ cinfo->laddr = conn->c_laddr.s6_addr32[3];
+ cinfo->faddr = conn->c_faddr.s6_addr32[3];
+ cinfo->tos = conn->c_tos;
+ strncpy(cinfo->transport, conn->c_trans->t_name,
sizeof(cinfo->transport));
cinfo->flags = 0;
rds_conn_message_info_send);
rds_info_deregister_func(RDS_INFO_RETRANS_MESSAGES,
rds_conn_message_info_retrans);
+
}
static char *conn_drop_reasons[] = {
cp->cp_reconnect_err = 0;
cp->cp_reconnect_racing = 0;
if (conn->c_trans->t_type != RDS_TRANS_TCP)
- printk(KERN_INFO "RDS/IB: connection <%pI4,%pI4,%d> dropped due to '%s'\n",
+ printk(KERN_INFO "RDS/IB: connection <%pI6c,%pI6c,%d> dropped due to '%s'\n",
&conn->c_laddr,
&conn->c_faddr,
conn->c_tos,
} else if ((cp->cp_reconnect_warn) &&
(now - cp->cp_reconnect_start > 60)) {
- printk(KERN_INFO "RDS/%s: re-connect <%pI4,%pI4,%d> stalling for more than 1 min...(drops=%u err=%d)\n",
+ printk(KERN_INFO "RDS/%s: re-connect <%pI6c,%pI6c,%d> stalling for more than 1 min...(drops=%u err=%d)\n",
conn->c_trans->t_type == RDS_TRANS_TCP ? "TCP" : "IB",
&conn->c_laddr,
&conn->c_faddr,
atomic_set(&cp->cp_state, RDS_CONN_ERROR);
rds_rtd(RDS_RTD_CM_EXT,
- "RDS/%s: queueing shutdown work, conn %p, <%pI4,%pI4,%d>\n",
+ "RDS/%s: queueing shutdown work, conn %p, <%pI6c,%pI6c,%d>\n",
conn->c_trans->t_type == RDS_TRANS_TCP ? "TCP" : "IB",
conn, &conn->c_laddr, &conn->c_faddr,
conn->c_tos);
if (rds_conn_path_state(cp) == RDS_CONN_DOWN &&
!test_and_set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags)) {
rds_rtd(RDS_RTD_CM_EXT,
- "queueing connect work, conn %p, <%pI4,%pI4,%d>\n",
+ "queueing connect work, conn %p, <%pI6c,%pI6c,%d>\n",
conn, &conn->c_laddr, &conn->c_faddr,
conn->c_tos);
queue_delayed_work(cp->cp_wq, &cp->cp_conn_w, 0);
/* We will only ever look at IB transports */
if (conn->c_trans != &rds_ib_transport)
return 0;
+ if (conn->c_isv6)
+ return 0;
- iinfo->src_addr = conn->c_laddr;
- iinfo->dst_addr = conn->c_faddr;
+ iinfo->src_addr = conn->c_laddr.s6_addr32[3];
+ iinfo->dst_addr = conn->c_faddr.s6_addr32[3];
memset(&iinfo->src_gid, 0, sizeof(iinfo->src_gid));
memset(&iinfo->dst_gid, 0, sizeof(iinfo->dst_gid));
* allowed to influence which paths have priority. We could call userspace
* asserting this policy "routing".
*/
-static int rds_ib_laddr_check(struct net *net, __be32 addr)
+static int rds_ib_laddr_check(struct net *net, const struct in6_addr *addr,
+ __u32 scope_id)
{
int ret;
struct rdma_cm_id *cm_id;
struct sockaddr_in sin;
/* Link-local addresses don't play well with IB */
- if (ipv4_is_linklocal_169(addr)) {
+ if (ipv4_is_linklocal_169(addr->s6_addr32[3])) {
pr_info_once("\n");
pr_info_once("****************************************************\n");
pr_info_once("** WARNING WARNING WARNING WARNING WARNING **\n");
pr_info_once("** **\n");
- pr_info_once("** RDS/IB: Link local address %pI4 NOT SUPPORTED **\n",
- &addr);
+ pr_info_once("** RDS/IB: Link local address %pI6c NOT SUPPORTED **\n",
+ addr);
pr_info_once("** **\n");
pr_info_once("** HAIP IP addresses should not be used on ORACLE **\n");
pr_info_once("** engineered systems **\n");
memset(&sin, 0, sizeof(sin));
sin.sin_family = AF_INET;
- sin.sin_addr.s_addr = addr;
+ sin.sin_addr.s_addr = addr->s6_addr32[3];
/* rdma_bind_addr will only succeed for IB & iWARP devices */
ret = rdma_bind_addr(cm_id, (struct sockaddr *)&sin);
if (ret || !cm_id->device || cm_id->device->node_type != RDMA_NODE_IB_CA)
ret = -EADDRNOTAVAIL;
- rdsdebug("addr %pI4 ret %d node type %d\n",
- &addr, ret,
- cm_id->device ? cm_id->device->node_type : -1);
+ rdsdebug("addr %pI6c ret %d node type %d\n",
+ addr, ret,
+ cm_id->device ? cm_id->device->node_type : -1);
rdma_destroy_id(cm_id);
struct list_head *ready;
};
+struct rds_ib_conn_priv_cmn {
+ u8 ricpc_protocol_major;
+ u8 ricpc_protocol_minor;
+ __be16 ricpc_protocol_minor_mask; /* bitmask */
+ u8 ricpc_tos;
+ u8 ricpc_reserved1;
+ __be16 ricpc_frag_sz;
+ __be64 ricpc_ack_seq;
+ __be32 ricpc_credit; /* non-zero enables flow ctl */
+};
+
struct rds_ib_connect_private {
/* Add new fields at the end, and don't permute existing fields. */
- __be32 dp_saddr;
- __be32 dp_daddr;
- u8 dp_protocol_major;
- u8 dp_protocol_minor;
- __be16 dp_protocol_minor_mask; /* bitmask */
- u8 dp_tos;
- u8 dp_reserved1;
- __be16 dp_frag_sz;
- __be64 dp_ack_seq;
- __be32 dp_credit; /* non-zero enables flow ctl */
+ __be32 dp_saddr;
+ __be32 dp_daddr;
+ struct rds_ib_conn_priv_cmn dp_cmn;
+};
+
+struct rds6_ib_connect_private {
+ /* Add new fields at the end, and don't permute existing fields. */
+ struct in6_addr dp_saddr;
+ struct in6_addr dp_daddr;
+ struct rds_ib_conn_priv_cmn dp_cmn;
+};
+
+#define dp_protocol_major dp_cmn.ricpc_protocol_major
+#define dp_protocol_minor dp_cmn.ricpc_protocol_minor
+#define dp_protocol_minor_mask dp_cmn.ricpc_protocol_minor_mask
+#define dp_tos dp_cmn.ricpc_tos
+#define dp_reserved1 dp_cmn.ricpc_reserved1
+#define dp_frag_sz dp_cmn.ricpc_frag_sz
+#define dp_ack_seq dp_cmn.ricpc_ack_seq
+#define dp_credit dp_cmn.ricpc_credit
+
+union rds_ib_conn_priv {
+ struct rds_ib_connect_private ricp_v4;
+ struct rds6_ib_connect_private ricp_v6;
};
struct rds_ib_send_work {
int rds_ib_listen_init(void);
void rds_ib_listen_stop(void);
int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
- struct rdma_cm_event *event);
-int rds_ib_cm_initiate_connect(struct rdma_cm_id *cm_id);
+ struct rdma_cm_event *event, bool isv6);
+int rds_ib_cm_initiate_connect(struct rdma_cm_id *cm_id, bool isv6);
void rds_ib_cm_connect_complete(struct rds_connection *conn,
struct rdma_cm_event *event);
void rds_ib_init_frag(unsigned int version);
void rds_ib_reset_fastreg(struct work_struct *work);
/* ib_rdma.c */
-int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr);
+int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev,
+ struct in6_addr *ipaddr);
void rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn);
void rds_ib_remove_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn);
void rds_ib_destroy_nodev_conns(void);
#include <rdma/rdma_cm_ib.h>
#include <rdma/ib_cache.h>
#include <rdma/ib_cm.h>
+#include <net/addrconf.h>
#include "rds.h"
#include "ib.h"
ic->i_frag_pages = ceil(ic->i_frag_sz, PAGE_SIZE);
- pr_debug("RDS/IB: conn <%pI4, %pI4,%d>, Frags <init,ic,dp>: {%d,%d,%d}, updated {%d -> %d}\n",
+ pr_debug("RDS/IB: conn <%pI6c, %pI6c,%d>, Frags <init,ic,dp>: {%d,%d,%d}, updated {%d -> %d}\n",
&conn->c_laddr, &conn->c_faddr, conn->c_tos,
ib_init_frag_size / SZ_1K, ic->i_frag_sz / SZ_1K, dp_frag / SZ_1K,
current_frag / SZ_1K, ic->i_frag_sz / SZ_1K);
*/
void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_event *event)
{
- const struct rds_ib_connect_private *dp = NULL;
struct rds_ib_connection *ic = conn->c_transport_data;
+ const union rds_ib_conn_priv *dp = NULL;
struct ib_qp_attr qp_attr;
+ __be16 frag_sz = 0;
+ __be64 ack_seq = 0;
+ __be32 credit = 0;
+ u8 major = 0;
+ u8 minor = 0;
int err;
- if (event->param.conn.private_data_len >= sizeof(*dp)) {
- dp = event->param.conn.private_data;
-
- /* make sure it isn't empty data */
- if (dp->dp_protocol_major) {
- rds_ib_set_protocol(conn,
- RDS_PROTOCOL(dp->dp_protocol_major,
- dp->dp_protocol_minor));
- rds_ib_set_flow_control(conn, be32_to_cpu(dp->dp_credit));
- rds_ib_set_frag_size(conn, be16_to_cpu(dp->dp_frag_sz));
+ dp = event->param.conn.private_data;
+ if (conn->c_isv6) {
+ if (event->param.conn.private_data_len >=
+ sizeof(struct rds6_ib_connect_private)) {
+ major = dp->ricp_v6.dp_protocol_major;
+ minor = dp->ricp_v6.dp_protocol_minor;
+ credit = dp->ricp_v6.dp_credit;
+ frag_sz = dp->ricp_v6.dp_frag_sz;
+ /* dp structure start is not guaranteed to be 8 bytes
+ * aligned. Since dp_ack_seq is 64-bit extended load
+ * operations can be used so go through get_unaligned
+ * to avoid unaligned errors.
+ */
+ ack_seq = get_unaligned(&dp->ricp_v6.dp_ack_seq);
}
+ } else if (event->param.conn.private_data_len >=
+ sizeof(struct rds_ib_connect_private)) {
+ major = dp->ricp_v4.dp_protocol_major;
+ minor = dp->ricp_v4.dp_protocol_minor;
+ credit = dp->ricp_v4.dp_credit;
+ frag_sz = dp->ricp_v4.dp_frag_sz;
+ ack_seq = get_unaligned(&dp->ricp_v4.dp_ack_seq);
+ }
+
+ /* make sure it isn't empty data */
+ if (major) {
+ rds_ib_set_protocol(conn, RDS_PROTOCOL(major, minor));
+ rds_ib_set_flow_control(conn, be32_to_cpu(credit));
+ rds_ib_set_frag_size(conn, be16_to_cpu(frag_sz));
}
if (conn->c_version < RDS_PROTOCOL_VERSION) {
if (conn->c_version != RDS_PROTOCOL_COMPAT_VERSION) {
- printk(KERN_NOTICE "RDS/IB: Connection to %pI4 version %u.%u failed, no longer supported\n",
+ printk(KERN_NOTICE "RDS/IB: Connection to %pI6c version %u.%u failed, no longer supported\n",
&conn->c_faddr,
RDS_PROTOCOL_MAJOR(conn->c_version),
RDS_PROTOCOL_MINOR(conn->c_version));
}
}
- printk(KERN_NOTICE "RDS/IB: %s conn %p i_cm_id %p, frag %dKB, connected <%pI4,%pI4,%d> version %u.%u%s%s\n",
+ printk(KERN_NOTICE "RDS/IB: %s conn %p i_cm_id %p, frag %dKB, connected <%pI6c,%pI6c,%d> version %u.%u%s%s\n",
ic->i_active_side ? "Active " : "Passive",
conn, ic->i_cm_id, ic->i_frag_sz / SZ_1K,
&conn->c_laddr, &conn->c_faddr, conn->c_tos,
printk(KERN_NOTICE "ib_modify_qp(IB_QP_STATE, RTS): err=%d\n", err);
/* update ib_device with this local ipaddr */
- err = rds_ib_update_ipaddr(ic->rds_ibdev, conn->c_laddr);
+ err = rds_ib_update_ipaddr(ic->rds_ibdev, &conn->c_laddr);
if (err)
printk(KERN_ERR "rds_ib_update_ipaddr failed (%d)\n",
err);
/* If the peer gave us the last packet it saw, process this as if
* we had received a regular ACK. */
if (dp) {
- /* dp structure start is not guaranteed to be 8 bytes aligned.
- * Since dp_ack_seq is 64-bit extended load operations can be
- * used so go through get_unaligned to avoid unaligned errors.
- */
- __be64 dp_ack_seq = get_unaligned(&dp->dp_ack_seq);
-
- if (dp_ack_seq)
- rds_send_drop_acked(conn, be64_to_cpu(dp_ack_seq),
+ if (ack_seq)
+ rds_send_drop_acked(conn, be64_to_cpu(ack_seq),
NULL);
}
}
static void rds_ib_cm_fill_conn_param(struct rds_connection *conn,
- struct rdma_conn_param *conn_param,
- struct rds_ib_connect_private *dp,
- u32 protocol_version,
- u32 max_responder_resources,
- u32 max_initiator_depth, u16 frag)
+ struct rdma_conn_param *conn_param,
+ union rds_ib_conn_priv *dp,
+ u32 protocol_version,
+ u32 max_responder_resources,
+ u32 max_initiator_depth, u16 frag,
+ bool isv6)
{
struct rds_ib_connection *ic = conn->c_transport_data;
struct rds_ib_device *rds_ibdev = ic->rds_ibdev;
if (dp) {
memset(dp, 0, sizeof(*dp));
- dp->dp_saddr = conn->c_laddr;
- dp->dp_daddr = conn->c_faddr;
- dp->dp_protocol_major = RDS_PROTOCOL_MAJOR(protocol_version);
- dp->dp_protocol_minor = RDS_PROTOCOL_MINOR(protocol_version);
- dp->dp_protocol_minor_mask = cpu_to_be16(RDS_IB_SUPPORTED_PROTOCOLS);
- dp->dp_ack_seq = cpu_to_be64(rds_ib_piggyb_ack(ic));
- dp->dp_tos = conn->c_tos;
+ if (isv6) {
+ dp->ricp_v6.dp_saddr = conn->c_laddr;
+ dp->ricp_v6.dp_daddr = conn->c_faddr;
+ dp->ricp_v6.dp_protocol_major =
+ RDS_PROTOCOL_MAJOR(protocol_version);
+ dp->ricp_v6.dp_protocol_minor =
+ RDS_PROTOCOL_MINOR(protocol_version);
+ dp->ricp_v6.dp_protocol_minor_mask =
+ cpu_to_be16(RDS_IB_SUPPORTED_PROTOCOLS);
+ dp->ricp_v6.dp_ack_seq =
+ cpu_to_be64(rds_ib_piggyb_ack(ic));
+ dp->ricp_v6.dp_tos = conn->c_tos;
+ dp->ricp_v6.dp_frag_sz = cpu_to_be16(frag);
+
+ conn_param->private_data = &dp->ricp_v6;
+ conn_param->private_data_len = sizeof(dp->ricp_v6);
+ } else {
+ dp->ricp_v4.dp_saddr = conn->c_laddr.s6_addr32[3];
+ dp->ricp_v4.dp_daddr = conn->c_faddr.s6_addr32[3];
+ dp->ricp_v4.dp_protocol_major =
+ RDS_PROTOCOL_MAJOR(protocol_version);
+ dp->ricp_v4.dp_protocol_minor =
+ RDS_PROTOCOL_MINOR(protocol_version);
+ dp->ricp_v4.dp_protocol_minor_mask =
+ cpu_to_be16(RDS_IB_SUPPORTED_PROTOCOLS);
+ dp->ricp_v4.dp_ack_seq =
+ cpu_to_be64(rds_ib_piggyb_ack(ic));
+ dp->ricp_v4.dp_tos = conn->c_tos;
+ dp->ricp_v4.dp_frag_sz = cpu_to_be16(frag);
+
+ conn_param->private_data = &dp->ricp_v4;
+ conn_param->private_data_len = sizeof(dp->ricp_v4);
+ }
/* Advertise flow control */
if (ic->i_flowctl) {
unsigned int credits;
- credits = IB_GET_POST_CREDITS(atomic_read(&ic->i_credits));
- dp->dp_credit = cpu_to_be32(credits);
- atomic_sub(IB_SET_POST_CREDITS(credits), &ic->i_credits);
+ credits = IB_GET_POST_CREDITS(
+ atomic_read(&ic->i_credits));
+ if (isv6)
+ dp->ricp_v6.dp_credit = cpu_to_be32(credits);
+ else
+ dp->ricp_v4.dp_credit = cpu_to_be32(credits);
+ atomic_sub(IB_SET_POST_CREDITS(credits),
+ &ic->i_credits);
}
-
- dp->dp_frag_sz = cpu_to_be16(frag);
- conn_param->private_data = dp;
- conn_param->private_data_len = sizeof(*dp);
}
}
while ((nr = ib_poll_cq(cq, RDS_WC_MAX, wcs)) > 0) {
for (i = 0; i < nr; i++) {
if ((++ic->i_rx_poll_cq % RDS_IB_RX_LIMIT) == 0) {
- rdsdebug("connection <%pI4,%pI4,%d> RX poll_cq processed %d\n",
+ rdsdebug("connection <%pI6c,%pI6c,%d> RX poll_cq processed %d\n",
&ic->conn->c_laddr,
&ic->conn->c_faddr,
ic->conn->c_tos,
break;
default:
rds_rtd(RDS_RTD_ERR,
- "Fatal QP Event %u (%s) - connection %pI4->%pI4 tos %d, reconnecting\n",
+ "Fatal QP Event %u (%s) - connection %pI6c->%pI6c tos %d, reconnecting\n",
event->event, rds_ib_event_str(event->event),
&conn->c_laddr, &conn->c_faddr, conn->c_tos);
rds_conn_drop(conn, DR_IB_QP_EVENT);
return ret;
}
-static u32 rds_ib_protocol_compatible(struct rdma_cm_event *event)
+static u32 rds_ib_protocol_compatible(struct rdma_cm_event *event, bool isv6)
{
- const struct rds_ib_connect_private *dp = event->param.conn.private_data;
- u16 common;
+ const union rds_ib_conn_priv *dp = event->param.conn.private_data;
+ u8 data_len, major, minor;
u32 version = 0;
+ __be16 mask;
+ u16 common;
/*
* rdma_cm private data is odd - when there is any private data in the
return 0;
}
+ if (isv6) {
+ data_len = sizeof(struct rds6_ib_connect_private);
+ major = dp->ricp_v6.dp_protocol_major;
+ minor = dp->ricp_v6.dp_protocol_minor;
+ mask = dp->ricp_v6.dp_protocol_minor_mask;
+ } else {
+ data_len = sizeof(struct rds_ib_connect_private);
+ major = dp->ricp_v4.dp_protocol_major;
+ minor = dp->ricp_v4.dp_protocol_minor;
+ mask = dp->ricp_v4.dp_protocol_minor_mask;
+ }
/* Even if len is crap *now* I still want to check it. -ASG */
- if (event->param.conn.private_data_len < sizeof(*dp)
- || dp->dp_protocol_major == 0)
+ if (event->param.conn.private_data_len < data_len || major == 0)
return RDS_PROTOCOL_4_0;
- common = be16_to_cpu(dp->dp_protocol_minor_mask) & RDS_IB_SUPPORTED_PROTOCOLS;
- if (dp->dp_protocol_major == 4 && common) {
+ common = be16_to_cpu(mask) & RDS_IB_SUPPORTED_PROTOCOLS;
+ if (major == 4 && common) {
version = RDS_PROTOCOL_4_0;
while ((common >>= 1) != 0)
version++;
- } else if (RDS_PROTOCOL_COMPAT_VERSION ==
- RDS_PROTOCOL(dp->dp_protocol_major, dp->dp_protocol_minor)) {
+ } else if (RDS_PROTOCOL_COMPAT_VERSION == RDS_PROTOCOL(major, minor)) {
version = RDS_PROTOCOL_COMPAT_VERSION;
- } else if (printk_ratelimit()) {
- printk(KERN_NOTICE "RDS: Connection from %pI4 using "
- "incompatible protocol version %u.%u\n",
- &dp->dp_saddr,
- dp->dp_protocol_major,
- dp->dp_protocol_minor);
+ } else {
+ if (isv6) {
+ printk_ratelimited(KERN_NOTICE "RDS: Connection from %pI6c using incompatible protocol version %u.%u\n",
+ &dp->ricp_v6.dp_saddr, major, minor);
+ } else {
+ printk_ratelimited(KERN_NOTICE "RDS: Connection from %pI4 using incompatible protocol version %u.%u\n",
+ &dp->ricp_v4.dp_saddr, major, minor);
+ }
}
return version;
}
+/* Given an IPv6 address, find the IB net_device which hosts that address and
+ * return its index. This is used by the rds_ib_cm_handle_connect() code to
+ * find the interface index of where an incoming request comes from when
+ * the request is using a link local address.
+ *
+ * Note one problem in this search. It is possible that two interfaces have
+ * the same link local address. Unfortunately, this cannot be solved unless
+ * the underlying layer gives us the interface which an incoming RDMA connect
+ * request comes from.
+ */
+static u32 __rds_find_ifindex(struct net *net, const struct in6_addr *addr)
+{
+ struct net_device *dev;
+ int idx = 0;
+
+ rcu_read_lock();
+ for_each_netdev_rcu(net, dev) {
+ if (dev->type == ARPHRD_INFINIBAND &&
+ ipv6_chk_addr(net, addr, dev, 0)) {
+ idx = dev->ifindex;
+ break;
+ }
+ }
+ rcu_read_unlock();
+
+ return idx;
+}
+
int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
- struct rdma_cm_event *event)
+ struct rdma_cm_event *event, bool isv6)
{
__be64 lguid = cm_id->route.path_rec->sgid.global.interface_id;
__be64 fguid = cm_id->route.path_rec->dgid.global.interface_id;
- const struct rds_ib_connect_private *dp = event->param.conn.private_data;
- struct rds_ib_connect_private dp_rep;
- struct rds_connection *conn = NULL;
+ const struct rds_ib_conn_priv_cmn *dp_cmn;
struct rds_ib_connection *ic = NULL;
+ struct rds_connection *conn = NULL;
struct rdma_conn_param conn_param;
- u32 version;
- int err = 1, destroy = 1;
+ const union rds_ib_conn_priv *dp;
+ union rds_ib_conn_priv dp_rep;
+ struct in6_addr s_mapped_addr;
+ struct in6_addr d_mapped_addr;
+ const struct in6_addr *saddr6;
+ const struct in6_addr *daddr6;
+ int destroy = 1;
int acl_ret = 0;
+ u32 ifindex = 0;
+ u32 version;
+ int err = 1;
u16 frag;
/* Check whether the remote protocol version matches ours. */
- version = rds_ib_protocol_compatible(event);
+ version = rds_ib_protocol_compatible(event, isv6);
if (!version)
goto out;
+ dp = event->param.conn.private_data;
+ if (isv6) {
+ dp_cmn = &dp->ricp_v6.dp_cmn;
+ saddr6 = &dp->ricp_v6.dp_saddr;
+ daddr6 = &dp->ricp_v6.dp_daddr;
+ /* If the local address is link local, need to find the
+ * interface index in order to create a proper RDS
+ * connection.
+ */
+ if (ipv6_addr_type(daddr6) & IPV6_ADDR_LINKLOCAL) {
+ /* Using init_net for now .. */
+ ifindex = __rds_find_ifindex(&init_net, daddr6);
+ /* No index found... Need to bail out. */
+ if (ifindex == 0) {
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+ }
+ } else {
+ dp_cmn = &dp->ricp_v4.dp_cmn;
+ ipv6_addr_set_v4mapped(dp->ricp_v4.dp_saddr, &s_mapped_addr);
+ ipv6_addr_set_v4mapped(dp->ricp_v4.dp_daddr, &d_mapped_addr);
+ saddr6 = &s_mapped_addr;
+ daddr6 = &d_mapped_addr;
+ }
+
rds_rtd(RDS_RTD_CM,
- "saddr %pI4 daddr %pI4 RDSv%u.%u lguid 0x%llx fguid 0x%llx tos %d\n",
- &dp->dp_saddr, &dp->dp_daddr,
+ "saddr %pI6c daddr %pI6c RDSv%u.%u lguid 0x%llx fguid 0x%llx tos %d\n",
+ saddr6, daddr6,
RDS_PROTOCOL_MAJOR(version),
RDS_PROTOCOL_MINOR(version),
(unsigned long long)be64_to_cpu(lguid),
(unsigned long long)be64_to_cpu(fguid),
- dp->dp_tos);
+ dp_cmn->ricpc_tos);
- acl_ret = rds_ib_match_acl(cm_id, dp->dp_saddr);
+ /* XXX IPoIB ACL Only support IPv4 */
+ acl_ret = rds_ib_match_acl(cm_id, saddr6->s6_addr32[3]);
if (acl_ret < 0) {
err = RDS_ACL_FAILURE;
rdsdebug("RDS: IB: passive: rds_ib_match_acl failed\n");
}
/* RDS/IB is not currently netns aware, thus init_net */
- conn = rds_conn_create(&init_net, dp->dp_daddr, dp->dp_saddr,
- &rds_ib_transport, dp->dp_tos, GFP_KERNEL);
+ conn = rds_conn_create(&init_net, daddr6, saddr6,
+ &rds_ib_transport, dp_cmn->ricpc_tos,
+ GFP_KERNEL, ifindex);
+
if (IS_ERR(conn)) {
rds_rtd(RDS_RTD_ERR, "rds_conn_create failed (%ld)\n",
PTR_ERR(conn));
retry = DIV_ROUND_UP(retry, 1000);
if (now > conn->c_connection_start &&
now - conn->c_connection_start > retry) {
- pr_info("RDS/IB: conn <%pI4,%pI4,%d> racing for more than %lus, retry\n",
+ pr_info("RDS/IB: conn <%pI6c,%pI6c,%d> racing for more than %lus, retry\n",
&conn->c_laddr, &conn->c_faddr,
conn->c_tos, retry);
set_bit(RDS_RECONNECT_TIMEDOUT,
*/
conn->c_connection_start = get_seconds();
- rds_ib_set_flow_control(conn, be32_to_cpu(dp->dp_credit));
+ rds_ib_set_flow_control(conn, be32_to_cpu(dp_cmn->ricpc_credit));
/* Use ic->i_flowctl as the first post credit to enable
* IB transport flow control. This first post credit is
* deducted after advertise the credit to the remote
/* If the peer gave us the last packet it saw, process this as if
* we had received a regular ACK. */
- if (dp->dp_ack_seq)
- rds_send_drop_acked(conn, be64_to_cpu(dp->dp_ack_seq), NULL);
+ if (dp_cmn->ricpc_ack_seq)
+ rds_send_drop_acked(conn, be64_to_cpu(dp_cmn->ricpc_ack_seq),
+ NULL);
BUG_ON(cm_id->context);
BUG_ON(ic->i_cm_id);
err = rds_ib_setup_qp(conn);
if (err) {
- pr_warn("RDS/IB: rds_ib_setup_qp failed with err(%d) for conn <%pI4,%pI4,%d>\n",
+ pr_warn("RDS/IB: rds_ib_setup_qp failed with err(%d) for conn <%pI6c,%pI6c,%d>\n",
err, &conn->c_laddr, &conn->c_faddr, conn->c_tos);
rds_conn_drop(conn, DR_IB_PAS_SETUP_QP_FAIL);
goto out;
}
- frag = rds_ib_set_frag_size(conn, be16_to_cpu(dp->dp_frag_sz));
+ frag = rds_ib_set_frag_size(conn, be16_to_cpu(dp_cmn->ricpc_frag_sz));
rds_ib_cm_fill_conn_param(conn, &conn_param, &dp_rep, version,
- event->param.conn.responder_resources,
- event->param.conn.initiator_depth,
- frag);
+ event->param.conn.responder_resources,
+ event->param.conn.initiator_depth,
+ frag, isv6);
/* rdma_accept() calls rdma_reject() internally if it fails */
err = rdma_accept(cm_id, &conn_param);
if (err) {
- pr_warn("RDS/IB: rdma_accept failed with err(%d) for conn <%pI4,%pI4,%d>\n",
+ pr_warn("RDS/IB: rdma_accept failed with err(%d) for conn <%pI6c,%pI6c,%d>\n",
err, &conn->c_laddr, &conn->c_faddr, conn->c_tos);
rds_conn_drop(conn, DR_IB_RDMA_ACCEPT_FAIL);
}
queue_delayed_work(rds_aux_wq, &work->work, 0);
}
-int rds_ib_cm_initiate_connect(struct rdma_cm_id *cm_id)
+int rds_ib_cm_initiate_connect(struct rdma_cm_id *cm_id, bool isv6)
{
struct rds_connection *conn = cm_id->context;
struct rds_ib_connection *ic = conn->c_transport_data;
struct rdma_conn_param conn_param;
- struct rds_ib_connect_private dp;
+ union rds_ib_conn_priv dp;
u16 frag;
int ret;
- ret = rds_ib_match_acl(ic->i_cm_id, conn->c_faddr);
+ ret = rds_ib_match_acl(ic->i_cm_id, conn->c_faddr.s6_addr32[3]);
if (ret < 0) {
- pr_err("RDS: IB: active conn=%p, <%pI4,%pI4,%d> destroyed due ACL violation\n",
+ pr_err("RDS: IB: active conn=%p, <%pI6c,%pI6c,%d> destroyed due ACL violation\n",
conn, &conn->c_laddr, &conn->c_faddr,
conn->c_tos);
rds_ib_conn_destroy_init(conn);
*/
atomic_set(&ic->i_credits, IB_SET_POST_CREDITS(ic->i_flowctl));
- pr_debug("RDS/IB: Initiate conn <%pI4, %pI4,%d> with Frags <init,ic>: {%d,%d}\n",
+ pr_debug("RDS/IB: Initiate conn <%pI6c, %pI6c,%d> with Frags <init,ic>: {%d,%d}\n",
&conn->c_laddr, &conn->c_faddr, conn->c_tos,
ib_init_frag_size / SZ_1K, ic->i_frag_sz / SZ_1K);
frag = rds_ib_set_frag_size(conn, ib_init_frag_size);
rds_ib_cm_fill_conn_param(conn, &conn_param, &dp,
conn->c_proposed_version, UINT_MAX, UINT_MAX,
- frag);
+ frag, isv6);
ret = rdma_connect(cm_id, &conn_param);
if (ret) {
pr_warn("RDS/IB: rdma_connect failed (%d)\n", ret);
int rds_ib_conn_path_connect(struct rds_conn_path *cp)
{
struct rds_connection *conn = cp->cp_conn;
- struct rds_ib_connection *ic = conn->c_transport_data;
- struct sockaddr_in src, dest;
+ struct sockaddr_storage src, dest;
+ rdma_cm_event_handler handler;
+ struct rds_ib_connection *ic;
int ret;
+ ic = conn->c_transport_data;
+
/* XXX I wonder what affect the port space has */
/* delegate cm event handler to rdma_transport */
- ic->i_cm_id = rdma_create_id(rds_rdma_cm_event_handler, conn,
- RDMA_PS_TCP, IB_QPT_RC);
+ handler = rds_rdma_cm_event_handler;
+ ic->i_cm_id = rdma_create_id(handler, conn, RDMA_PS_TCP, IB_QPT_RC);
+
if (IS_ERR(ic->i_cm_id)) {
ret = PTR_ERR(ic->i_cm_id);
ic->i_cm_id = NULL;
}
rds_rtd(RDS_RTD_CM_EXT,
- "RDS/IB: conn init <%pI4,%pI4,%d> cm_id %p\n",
+ "RDS/IB: conn init <%pI6c,%pI6c,%d> cm_id %p\n",
&conn->c_laddr, &conn->c_faddr,
conn->c_tos, ic->i_cm_id);
- src.sin_family = AF_INET;
- src.sin_addr.s_addr = (__force u32)conn->c_laddr;
- src.sin_port = (__force u16)htons(0);
+ if (ipv6_addr_v4mapped(&conn->c_faddr)) {
+ struct sockaddr_in *sin;
+
+ sin = (struct sockaddr_in *)&src;
+ sin->sin_family = AF_INET;
+ sin->sin_addr.s_addr = conn->c_laddr.s6_addr32[3];
+ sin->sin_port = (__force u16)htons(0);
- dest.sin_family = AF_INET;
- dest.sin_addr.s_addr = (__force u32)conn->c_faddr;
- dest.sin_port = (__force u16)htons(RDS_PORT);
+ sin = (struct sockaddr_in *)&dest;
+ sin->sin_family = AF_INET;
+ sin->sin_addr.s_addr = conn->c_faddr.s6_addr32[3];
+ sin->sin_port = (__force u16)htons(RDS_PORT);
+ } else {
+ struct sockaddr_in6 *sin6;
+
+ sin6 = (struct sockaddr_in6 *)&src;
+ sin6->sin6_family = AF_INET6;
+ sin6->sin6_addr = conn->c_laddr;
+ sin6->sin6_port = (__force u16)htons(0);
+ sin6->sin6_scope_id = conn->c_dev_if;
+
+ sin6 = (struct sockaddr_in6 *)&dest;
+ sin6->sin6_family = AF_INET6;
+ sin6->sin6_addr = conn->c_faddr;
+ sin6->sin6_port = (__force u16)htons(RDS_TCP_PORT);
+ sin6->sin6_scope_id = conn->c_dev_if;
+ }
ret = rdma_resolve_addr(ic->i_cm_id, (struct sockaddr *)&src,
(struct sockaddr *)&dest,
/*
- * Copyright (c) 2006 Oracle. All rights reserved.
+ * Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
kfree_rcu(to_free, rcu_head);
}
-int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr)
+int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev,
+ struct in6_addr *ipaddr)
{
struct rds_ib_device *rds_ibdev_old;
- rds_ibdev_old = rds_ib_get_device(ipaddr);
+ rds_ibdev_old = rds_ib_get_device(ipaddr->s6_addr32[3]);
if (!rds_ibdev_old)
- return rds_ib_add_ipaddr(rds_ibdev, ipaddr);
+ return rds_ib_add_ipaddr(rds_ibdev, ipaddr->s6_addr32[3]);
if (rds_ibdev_old != rds_ibdev) {
- rds_ib_remove_ipaddr(rds_ibdev_old, ipaddr);
+ rds_ib_remove_ipaddr(rds_ibdev_old, ipaddr->s6_addr32[3]);
rds_ib_dev_put(rds_ibdev_old);
- return rds_ib_add_ipaddr(rds_ibdev, ipaddr);
+ return rds_ib_add_ipaddr(rds_ibdev, ipaddr->s6_addr32[3]);
}
rds_ib_dev_put(rds_ibdev_old);
struct rds_ib_connection *ic = NULL;
int ret;
- rds_ibdev = rds_ib_get_device(rs->rs_bound_addr);
+ rds_ibdev = rds_ib_get_device(rs->rs_bound_addr.s6_addr32[3]);
if (!rds_ibdev) {
ret = -ENODEV;
goto out;
if (wc->status != IB_WC_SUCCESS) {
if (rds_conn_up(ic->conn)) {
- pr_warn("RDS: IB: MR completion <%pI4,%pI4,%d> status %u "
+ pr_warn("RDS: IB: MR completion <%pI6c,%pI6c,%d> status %u "
"vendor_err %u, disconnecting and reconnecting\n",
&ic->conn->c_laddr, &ic->conn->c_faddr,
ic->conn->c_tos, wc->status, wc->vendor_err);
/*
- * Copyright (c) 2006 Oracle. All rights reserved.
+ * Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
rds_ib_stats_inc(s_ib_rx_total_incs);
}
INIT_LIST_HEAD(&ibinc->ii_frags);
- rds_inc_init(&ibinc->ii_inc, ic->conn, ic->conn->c_faddr);
+ rds_inc_init(&ibinc->ii_inc, ic->conn, &ic->conn->c_faddr);
return ibinc;
}
ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, &failed_wr);
if (ret) {
rds_conn_drop(conn, DR_IB_POST_RECV_FAIL);
- pr_warn("RDS/IB: recv post on %pI4 returned %d, disconnecting and reconnecting\n",
+ pr_warn("RDS/IB: recv post on %pI6c returned %d, disconnecting and reconnecting\n",
&conn->c_faddr, ret);
break;
}
if (data_len < sizeof(struct rds_header)) {
rds_conn_drop(conn, DR_IB_HEADER_MISSING);
- pr_warn("RDS/IB: incoming message from %pI4 didn't inclue a header, disconnecting and reconnecting\n",
+ pr_warn("RDS/IB: incoming message from %pI6c didn't inclue a header, disconnecting and reconnecting\n",
&conn->c_faddr);
return;
}
/* Validate the checksum. */
if (!rds_message_verify_checksum(ihdr)) {
rds_conn_drop(conn, DR_IB_HEADER_CORRUPTED);
- pr_warn("RDS/IB: incoming message from %pI4 has corrupted header - forcing a reconnect\n",
+ pr_warn("RDS/IB: incoming message from %pI6c has corrupted header - forcing a reconnect\n",
&conn->c_faddr);
rds_stats_inc(s_recv_drop_bad_checksum);
return;
ic->i_recv_data_rem = 0;
ic->i_ibinc = NULL;
- if (ibinc->ii_inc.i_hdr.h_flags == RDS_FLAG_CONG_BITMAP)
+ if (ibinc->ii_inc.i_hdr.h_flags == RDS_FLAG_CONG_BITMAP) {
rds_ib_cong_recv(conn, ibinc);
- else {
- rds_recv_incoming(conn, conn->c_faddr, conn->c_laddr,
+ } else {
+ rds_recv_incoming(conn, &conn->c_faddr, &conn->c_laddr,
&ibinc->ii_inc, GFP_ATOMIC);
state->ack_next = be64_to_cpu(hdr->h_sequence);
state->ack_next_valid = 1;
struct rds_header *ihdr, *hdr;
if (data_len < sizeof(struct rds_header)) {
- printk(KERN_WARNING "RDS: from %pI4 didn't inclue a "
+ printk(KERN_WARNING "RDS: from %pI6c didn't inclue a "
"header, disconnecting and "
"reconnecting\n",
&conn->c_faddr);
/* Validate the checksum. */
if (!rds_message_verify_checksum(ihdr)) {
- printk(KERN_WARNING "RDS: from %pI4 has corrupted header - "
+ printk(KERN_WARNING "RDS: from %pI6c has corrupted header - "
"forcing a reconnect\n",
&conn->c_faddr);
rds_stats_inc(s_recv_drop_bad_checksum);
if (!ibinc) {
ibinc = recv->r_ibinc;
- rds_inc_init(&ibinc->ii_inc, ic->conn, ic->conn->c_faddr);
+ rds_inc_init(&ibinc->ii_inc, ic->conn, &ic->conn->c_faddr);
recv->r_ibinc = NULL;
ic->i_ibinc = ibinc;
hdr = &ibinc->ii_inc.i_hdr;
if (ibinc->ii_inc.i_hdr.h_flags == RDS_FLAG_CONG_BITMAP)
rds_ib_cong_recv(conn, ibinc);
else {
- rds_recv_incoming(conn, conn->c_faddr, conn->c_laddr,
- &ibinc->ii_inc, GFP_ATOMIC);
+ rds_recv_incoming(conn, &conn->c_faddr, &conn->c_laddr,
+ &ibinc->ii_inc, GFP_ATOMIC);
state->ack_next = be64_to_cpu(hdr->h_sequence);
state->ack_next_valid = 1;
if (rds_conn_up(conn) || rds_conn_connecting(conn)) {
/* Flush errors are normal while draining the QP */
if (wc->status != IB_WC_WR_FLUSH_ERR)
- pr_warn("RDS/IB: recv completion <%pI4,%pI4,%d> had status %u vendor_err 0x%x, disconnecting and reconnecting\n",
+ pr_warn("RDS/IB: recv completion <%pI6c,%pI6c,%d> had status %u vendor_err 0x%x, disconnecting and reconnecting\n",
&conn->c_laddr, &conn->c_faddr, conn->c_tos,
wc->status, wc->vendor_err);
if (wc->status == IB_WC_LOC_LEN_ERR)
/* Flush errors are normal while draining the QP */
if (!(wc->status == IB_WC_WR_FLUSH_ERR ||
wc->status == IB_WC_RETRY_EXC_ERR))
- pr_warn("RDS/IB: send completion <%pI4,%pI4,%d> status %u vendor_err 0x%x, disconnecting and reconnecting\n",
+ pr_warn("RDS/IB: send completion <%pI6c,%pI6c,%d> status %u vendor_err 0x%x, disconnecting and reconnecting\n",
&conn->c_laddr, &conn->c_faddr, conn->c_tos,
wc->status, wc->vendor_err);
rds_conn_drop(conn, DR_IB_SEND_COMP_ERR);
first, &first->s_wr, ret, failed_wr);
BUG_ON(failed_wr != &first->s_wr);
if (ret) {
- printk(KERN_WARNING "RDS/IB: ib_post_send to %pI4 "
+ printk(KERN_WARNING "RDS/IB: ib_post_send to %pI6c "
"returned %d\n", &conn->c_faddr, ret);
rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
rds_ib_sub_signaled(ic, nr_sig);
send, &send->s_wr, ret, failed_wr);
BUG_ON(failed_wr != &send->s_wr);
if (ret) {
- printk(KERN_WARNING "RDS/IB: atomic ib_post_send to %pI4 returned %d\n",
+ printk(KERN_WARNING "RDS/IB: atomic ib_post_send to %pI6c returned %d\n",
&conn->c_faddr, ret);
rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
rds_ib_sub_signaled(ic, nr_sig);
first, &first->s_wr, ret, failed_wr);
BUG_ON(failed_wr != &first->s_wr);
if (ret) {
- printk(KERN_WARNING "RDS/IB: rdma ib_post_send to %pI4 "
+ printk(KERN_WARNING "RDS/IB: rdma ib_post_send to %pI6c "
"returned %d\n", &conn->c_faddr, ret);
rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
rds_ib_sub_signaled(ic, nr_sig);
/*
- * Copyright (c) 2006 Oracle. All rights reserved.
+ * Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
*/
#include <linux/kernel.h>
#include <linux/in.h>
+#include <linux/ipv6.h>
#include "rds.h"
#include "loop.h"
BUG_ON(hdr_off || sg || off);
- rds_inc_init(&rm->m_inc, conn, conn->c_laddr);
+ rds_inc_init(&rm->m_inc, conn, &conn->c_laddr);
/* For the embedded inc. Matching put is in loop_inc_free() */
rds_message_addref(rm);
- rds_recv_incoming(conn, conn->c_laddr, conn->c_faddr, &rm->m_inc,
+ rds_recv_incoming(conn, &conn->c_laddr, &conn->c_faddr, &rm->m_inc,
GFP_KERNEL);
rds_send_drop_acked(conn, be64_to_cpu(rm->m_inc.i_hdr.h_sequence),
org = rds_nf_hdr_org(skb);
/* assuming original and dest are exactly the same then it's our own node */
- if (dst->daddr == org->daddr && dst->saddr == org->saddr &&
+ if (ipv6_addr_equal(&dst->daddr, &org->daddr) &&
+ ipv6_addr_equal(&dst->saddr, &org->saddr) &&
dst->sport == org->sport && dst->dport == org->dport) {
return 1;
}
/*
- * Copyright (c) 2007 Oracle. All rights reserved.
+ * Copyright (c) 2007, 2017 Oracle and/or its affiliates. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
long i;
int ret;
- if (rs->rs_bound_addr == 0) {
+ if (ipv6_addr_any(&rs->rs_bound_addr)) {
ret = -ENOTCONN; /* XXX not a great errno */
goto out;
}
args = CMSG_DATA(cmsg);
- if (rs->rs_bound_addr == 0) {
+ if (ipv6_addr_any(&rs->rs_bound_addr)) {
ret = -ENOTCONN; /* XXX not a great errno */
goto out;
}
ARRAY_SIZE(rds_cm_event_strings), type);
};
-int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
- struct rdma_cm_event *event)
+int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id,
+ struct rdma_cm_event *event,
+ bool isv6)
{
/* this can be null in the listening path */
struct rds_connection *conn = cm_id->context;
switch (event->event) {
case RDMA_CM_EVENT_CONNECT_REQUEST:
- ret = trans->cm_handle_connect(cm_id, event);
+ ret = trans->cm_handle_connect(cm_id, event, isv6);
break;
case RDMA_CM_EVENT_ADDR_RESOLVED:
if (conn) {
struct rds_ib_connection *ibic;
- printk(KERN_CRIT "rds dropping connection after rdma_resolve_route failure connection %pI4->%pI4\n",
+ printk(KERN_CRIT "rds dropping connection after rdma_resolve_route failure connection %pI6c->%pI6c\n",
&conn->c_laddr, &conn->c_faddr);
ibic = conn->c_transport_data;
if (ibic && ibic->i_cm_id == cm_id)
*/
cm_id->route.path_rec[0].sl = conn->c_tos;
cm_id->route.path_rec[0].qos_class = conn->c_tos;
- ret = trans->cm_initiate_connect(cm_id);
+ ret = trans->cm_initiate_connect(cm_id, isv6);
} else {
rds_rtd(RDS_RTD_CM,
- "ROUTE_RESOLVED: calling rds_conn_drop, conn %p <%pI4,%pI4,%d>\n",
+ "ROUTE_RESOLVED: calling rds_conn_drop, conn %p <%pI6c,%pI6c,%d>\n",
conn, &conn->c_laddr,
&conn->c_faddr, conn->c_tos);
rds_conn_drop(conn, DR_IB_RDMA_CM_ID_MISMATCH);
printk(KERN_ERR "alloc_page failed .. NO MEM\n");
ret = -ENOMEM;
} else {
- r = (struct arpreq *)kmap(page);
- memset(r, 0, sizeof(struct arpreq));
- sin = (struct sockaddr_in *)&r->arp_pa;
- sin->sin_family = AF_INET;
- sin->sin_addr.s_addr = conn->c_faddr;
- inet_ioctl(rds_ib_inet_socket, SIOCDARP, (unsigned long) r);
- kunmap(page);
- __free_page(page);
+ if (ipv6_addr_v4mapped(&conn->c_faddr)) {
+ r = (struct arpreq *)kmap(page);
+ memset(r, 0, sizeof(struct arpreq));
+ sin = (struct sockaddr_in *)&r->arp_pa;
+ sin->sin_family = AF_INET;
+ sin->sin_addr.s_addr =
+ conn->c_faddr.s6_addr32[3];
+ inet_ioctl(rds_ib_inet_socket, SIOCDARP,
+ (unsigned long)r);
+ kunmap(page);
+ __free_page(page);
+ }
}
if (conn) {
rds_rtd(RDS_RTD_ERR,
- "ROUTE_ERROR: conn %p, calling rds_conn_drop <%pI4,%pI4,%d>\n",
+ "ROUTE_ERROR: conn %p, calling rds_conn_drop <%pI6c,%pI6c,%d>\n",
conn, &conn->c_laddr,
&conn->c_faddr, conn->c_tos);
rds_conn_drop(conn, DR_IB_ROUTE_ERR);
case RDMA_CM_EVENT_ADDR_ERROR:
if (conn) {
rds_rtd(RDS_RTD_ERR,
- "ADDR_ERROR: conn %p, calling rds_conn_drop <%pI4,%pI4,%d>\n",
+ "ADDR_ERROR: conn %p, calling rds_conn_drop <%pI6c,%pI6c,%d>\n",
conn, &conn->c_laddr,
&conn->c_faddr, conn->c_tos);
rds_conn_drop(conn, DR_IB_ADDR_ERR);
case RDMA_CM_EVENT_DEVICE_REMOVAL:
if (conn) {
rds_rtd(RDS_RTD_ERR,
- "CONN/UNREACHABLE/RMVAL ERR: conn %p, calling rds_conn_drop <%pI4,%pI4,%d>\n",
+ "CONN/UNREACHABLE/RMVAL ERR: conn %p, calling rds_conn_drop <%pI6c,%pI6c,%d>\n",
conn, &conn->c_laddr,
&conn->c_faddr, conn->c_tos);
rds_conn_drop(conn, DR_IB_CONNECT_ERR);
if (event->status == RDS_REJ_CONSUMER_DEFINED &&
(*err) == 0) {
/* Rejection from RDSV3.1 */
- pr_warn("Rejected: CSR_DEF err 0, calling rds_conn_drop <%pI4,%pI4,%d>\n",
+ pr_warn("Rejected: CSR_DEF err 0, calling rds_conn_drop <%pI6c,%pI6c,%d>\n",
&conn->c_laddr,
&conn->c_faddr, conn->c_tos);
if (!conn->c_tos)
} else if (event->status == RDS_REJ_CONSUMER_DEFINED &&
(*err) == RDS_ACL_FAILURE) {
/* Rejection due to ACL violation */
- pr_err("RDS: IB: conn=%p, <%pI4,%pI4,%d> destroyed due to ACL violation\n",
+ pr_err("RDS: IB: conn=%p, <%pI6c,%pI6c,%d> destroyed due to ACL violation\n",
conn, &conn->c_laddr,
&conn->c_faddr,
conn->c_tos);
rds_ib_conn_destroy_init(conn);
} else {
rds_rtd(RDS_RTD_ERR,
- "Rejected: *err %d status %d calling rds_conn_drop <%pI4,%pI4,%d>\n",
+ "Rejected: *err %d status %d calling rds_conn_drop <%pI6c,%pI6c,%d>\n",
*err, event->status,
&conn->c_laddr,
&conn->c_faddr,
case RDMA_CM_EVENT_ADDR_CHANGE:
rds_rtd(RDS_RTD_CM_EXT,
- "ADDR_CHANGE event <%pI4,%pI4>\n",
+ "ADDR_CHANGE event <%pI6c,%pI6c>\n",
&conn->c_laddr,
&conn->c_faddr);
if (conn) {
rds_rtd(RDS_RTD_CM,
- "ADDR_CHANGE: calling rds_conn_drop <%pI4,%pI4,%d>\n",
+ "ADDR_CHANGE: calling rds_conn_drop <%pI6c,%pI6c,%d>\n",
&conn->c_laddr, &conn->c_faddr,
conn->c_tos);
if (!rds_conn_self_loopback_passive(conn)) {
case RDMA_CM_EVENT_DISCONNECTED:
rds_rtd(RDS_RTD_CM,
- "DISCONNECT event - dropping connection %pI4->%pI4 tos %d\n",
+ "DISCONNECT event - dropping connection %pI6c->%pI6c tos %d\n",
&conn->c_laddr, &conn->c_faddr, conn->c_tos);
rds_conn_drop(conn, DR_IB_DISCONNECTED_EVENT);
break;
case RDMA_CM_EVENT_TIMEWAIT_EXIT:
if (conn) {
- printk(KERN_INFO "TIMEWAIT_EXIT event - "
- "dropping connection "
- "%pI4->%pI4\n", &conn->c_laddr,
- &conn->c_faddr);
+ printk(KERN_INFO "TIMEWAIT_EXIT event - dropping connection %pI6c->%pI6c\n",
+ &conn->c_laddr, &conn->c_faddr);
rds_conn_drop(conn, DR_IB_TIMEWAIT_EXIT);
} else
printk(KERN_INFO "TIMEWAIT_EXIT event - conn=NULL\n");
return ret;
}
-static int rds_rdma_listen_init(void)
+int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
+ struct rdma_cm_event *event)
+{
+ return rds_rdma_cm_event_handler_cmn(cm_id, event, false);
+}
+
+static int rds_rdma_listen_init_common(rdma_cm_event_handler handler,
+ struct sockaddr *sa,
+ struct rdma_cm_id **ret_cm_id)
{
- struct sockaddr_in sin;
struct rdma_cm_id *cm_id;
int ret;
- cm_id = rdma_create_id(rds_rdma_cm_event_handler, NULL, RDMA_PS_TCP,
- IB_QPT_RC);
+ cm_id = rdma_create_id(handler, NULL, RDMA_PS_TCP, IB_QPT_RC);
if (IS_ERR(cm_id)) {
ret = PTR_ERR(cm_id);
- printk(KERN_ERR "RDS/RDMA: failed to setup listener, "
- "rdma_create_id() returned %d\n", ret);
+ printk(KERN_ERR "RDS/RDMA: failed to setup listener, rdma_create_id() returned %d\n",
+ ret);
return ret;
}
- sin.sin_family = PF_INET,
- sin.sin_addr.s_addr = (__force u32)htonl(INADDR_ANY);
- sin.sin_port = (__force u16)htons(RDS_PORT);
-
- /*
- * XXX I bet this binds the cm_id to a device. If we want to support
+ /* XXX I bet this binds the cm_id to a device. If we want to support
* fail-over we'll have to take this into consideration.
*/
- ret = rdma_bind_addr(cm_id, (struct sockaddr *)&sin);
+ ret = rdma_bind_addr(cm_id, sa);
if (ret) {
- printk(KERN_ERR "RDS/RDMA: failed to setup listener, "
- "rdma_bind_addr() returned %d\n", ret);
+ printk(KERN_ERR "RDS/RDMA: failed to setup listener, rdma_bind_addr() returned %d\n",
+ ret);
goto out;
}
ret = rdma_listen(cm_id, 128);
if (ret) {
- printk(KERN_ERR "RDS/RDMA: failed to setup listener, "
- "rdma_listen() returned %d\n", ret);
+ printk(KERN_ERR "RDS/RDMA: failed to setup listener, rdma_listen() returned %d\n",
+ ret);
goto out;
}
- rdsdebug("cm %p listening on port %u\n", cm_id, RDS_PORT);
+ rdsdebug("cm %p listening on port %u\n", cm_id,
+ ntohs(((struct sockaddr_in *)sa)->sin_port));
- rds_rdma_listen_id = cm_id;
+ *ret_cm_id = cm_id;
cm_id = NULL;
out:
if (cm_id)
return ret;
}
+/* Initialize the RDS RDMA listeners. We create two listeners for
+ * compatibility reason. The one on RDS_PORT is used for IPv4
+ * requests only. The one on RDS_TCP_PORT is used for IPv6 requests
+ * only. So only IPv6 enabled RDS module will communicate using this
+ * port.
+ */
+static int rds_rdma_listen_init(void)
+{
+ int ret;
+ struct sockaddr_in sin;
+
+ sin.sin_family = PF_INET;
+ sin.sin_addr.s_addr = htonl(INADDR_ANY);
+ sin.sin_port = htons(RDS_PORT);
+ ret = rds_rdma_listen_init_common(rds_rdma_cm_event_handler,
+ (struct sockaddr *)&sin,
+ &rds_rdma_listen_id);
+ return ret;
+}
+
static void rds_rdma_listen_stop(void)
{
if (rds_rdma_listen_id) {
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <uapi/linux/rds.h>
+#include <linux/in6.h>
#include <linux/sizes.h>
#include "info.h"
struct rds_cong_map {
struct rb_node m_rb_node;
- __be32 m_addr;
+ struct in6_addr m_addr;
wait_queue_head_t m_waitq;
struct list_head m_conn_list;
unsigned long m_page_addrs[RDS_CONG_MAP_PAGES];
struct rds_connection {
struct hlist_node c_hash_node;
- __be32 c_laddr;
- __be32 c_faddr;
+ struct in6_addr c_laddr;
+ struct in6_addr c_faddr;
+ int c_dev_if; /* c_laddr's interface index */
unsigned int c_loopback:1,
+ c_isv6:1,
c_ping_triggered:1,
c_destroy_in_prog:1,
- c_pad_to_32:29;
+
+ c_pad_to_32:28;
int c_npaths;
struct rds_connection *c_passive;
struct rds_transport *c_trans;
struct rds_conn_path *i_conn_path;
struct rds_header i_hdr;
unsigned long i_rx_jiffies;
- __be32 i_saddr;
+ struct in6_addr i_saddr;
/* extension fields for dealing with netfilter */
struct rds_connection *i_oconn;
struct list_head m_conn_item;
struct rds_incoming m_inc;
u64 m_ack_seq;
- __be32 m_daddr;
+ struct in6_addr m_daddr;
unsigned long m_flags;
/* Never access m_rs without holding m_rs_lock.
t_mp_capable:1;
unsigned int t_type;
- int (*laddr_check)(struct net *net, __be32 addr);
+ int (*laddr_check)(struct net *net, const struct in6_addr *addr,
+ __u32 scope_id);
int (*conn_alloc)(struct rds_connection *conn, gfp_t gfp);
void (*conn_free)(void *data);
int (*conn_path_connect)(struct rds_conn_path *cp);
int (*skb_local)(struct sk_buff *skb);
int (*cm_handle_connect)(struct rdma_cm_id *cm_id,
- struct rdma_cm_event *event);
- int (*cm_initiate_connect)(struct rdma_cm_id *cm_id);
+ struct rdma_cm_event *event, bool isv6);
+ int (*cm_initiate_connect)(struct rdma_cm_id *cm_id, bool isv6);
void (*cm_connect_complete)(struct rds_connection *conn,
struct rdma_cm_event *event);
* support.
*/
struct hlist_node rs_bound_node;
- __be32 rs_bound_addr;
- __be32 rs_conn_addr;
- __be16 rs_bound_port;
+ struct sockaddr_in6 rs_bound_sin6;
+#define rs_bound_addr rs_bound_sin6.sin6_addr
+#define rs_bound_addr_v4 rs_bound_sin6.sin6_addr.s6_addr32[3]
+#define rs_bound_port rs_bound_sin6.sin6_port
+#define rs_bound_scope_id rs_bound_sin6.sin6_scope_id
+ struct in6_addr rs_conn_addr;
+#define rs_conn_addr_v4 rs_conn_addr.s6_addr32[3]
__be16 rs_conn_port;
struct rds_transport *rs_transport;
/* bind.c */
int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len);
void rds_remove_bound(struct rds_sock *rs);
-struct rds_sock *rds_find_bound(__be32 addr, __be16 port);
+struct rds_sock *rds_find_bound(struct in6_addr *addr, __be16 port,
+ __u32 scope_id);
void rds_bind_lock_init(void);
/* cong.c */
int rds_conn_init(void);
void rds_conn_exit(void);
struct rds_connection *rds_conn_create(struct net *net,
- __be32 laddr, __be32 faddr,
- struct rds_transport *trans,
- u8 tos, gfp_t gfp);
+ const struct in6_addr *laddr,
+ const struct in6_addr *faddr,
+ struct rds_transport *trans,
+ u8 tos, gfp_t gfp, int dev_if);
struct rds_connection *rds_conn_create_outgoing(struct net *net,
- __be32 laddr, __be32 faddr,
- struct rds_transport *trans,
- u8 tos, gfp_t gfp);
-struct rds_connection *rds_conn_find(struct net *net, __be32 laddr,
- __be32 faddr,
- struct rds_transport *trans, u8 tos);
+ struct in6_addr *laddr,
+ struct in6_addr *faddr,
+ struct rds_transport *trans,
+ u8 tos, gfp_t gfp, int dev_if);
+struct rds_connection *rds_conn_find(struct net *net, struct in6_addr *laddr,
+ struct in6_addr *faddr,
+ struct rds_transport *trans, u8 tos,
+ int dev_if);
void rds_conn_shutdown(struct rds_conn_path *cp);
void rds_conn_destroy(struct rds_connection *conn, int shutdown);
void rds_conn_reset(struct rds_connection *conn);
void rds_conn_drop(struct rds_connection *conn, int reason);
void rds_conn_path_drop(struct rds_conn_path *cp, int reason);
-void rds_conn_laddr_list(struct net *net,
- __be32 laddr, struct list_head *laddr_conns);
+void rds_conn_laddr_list(struct net *net, struct in6_addr *laddr,
+ struct list_head *laddr_conns);
void rds_conn_connect_if_down(struct rds_connection *conn);
void rds_conn_path_connect_if_down(struct rds_conn_path *conn);
void rds_for_each_conn_info(struct socket *sock, unsigned int len,
static inline bool
rds_conn_self_loopback_passive(struct rds_connection *conn)
{
- if (conn->c_laddr == conn->c_faddr && !conn->c_passive)
+ if (ipv6_addr_equal(&conn->c_laddr, &conn->c_faddr) && !conn->c_passive)
return true;
else
return false;
/* recv.c */
void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn,
- __be32 saddr);
+ struct in6_addr *saddr);
void rds_inc_path_init(struct rds_incoming *inc, struct rds_conn_path *conn,
- __be32 saddr);
+ struct in6_addr *saddr);
void rds_inc_addref(struct rds_incoming *inc);
void rds_inc_put(struct rds_incoming *inc);
-void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr,
+void rds_recv_incoming(struct rds_connection *conn, struct in6_addr *saddr,
+ struct in6_addr *daddr,
struct rds_incoming *inc, gfp_t gfp);
int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
int msg_flags);
int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len);
void rds_send_path_reset(struct rds_conn_path *cp);
int rds_send_xmit(struct rds_conn_path *cp);
-struct sockaddr_in;
-void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest);
+void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in6 *dest);
typedef int (*is_acked_func)(struct rds_message *rm, uint64_t ack);
void rds_send_drop_acked(struct rds_connection *conn, u64 ack,
is_acked_func is_acked);
void rds_reconnect_timeout(struct work_struct *);
void rds_connect_path_complete(struct rds_conn_path *cp, int curr);
void rds_connect_complete(struct rds_connection *conn);
+int rds_addr_cmp(const struct in6_addr *a1, const struct in6_addr *a2);
/* transport.c */
int rds_trans_register(struct rds_transport *trans);
void rds_trans_unregister(struct rds_transport *trans);
-struct rds_transport *rds_trans_get_preferred(struct net *net, __be32 addr);
+struct rds_transport *rds_trans_get_preferred(struct net *net,
+ const struct in6_addr *addr,
+ __u32 scope_id);
void rds_trans_put(struct rds_transport *trans);
unsigned int rds_trans_stats_info_copy(struct rds_info_iterator *iter,
unsigned int avail);
/* forward prototypes */
static void
-rds_recv_drop(struct rds_connection *conn, __be32 saddr, __be32 daddr,
+rds_recv_drop(struct rds_connection *conn, struct in6_addr *saddr,
+ struct in6_addr *daddr,
struct rds_incoming *inc, gfp_t gfp);
static void
gfp_t gfp);
static void
-rds_recv_local(struct rds_conn_path *cp, __be32 saddr, __be32 daddr,
+rds_recv_local(struct rds_conn_path *cp, struct in6_addr *saddr,
+ struct in6_addr *daddr,
struct rds_incoming *inc, gfp_t gfp, struct rds_sock *rs);
static int
}
void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn,
- __be32 saddr)
+ struct in6_addr *saddr)
{
int i;
atomic_set(&inc->i_refcount, 1);
INIT_LIST_HEAD(&inc->i_item);
inc->i_conn = conn;
- inc->i_saddr = saddr;
+ inc->i_saddr = *saddr;
inc->i_rdma_cookie = 0;
inc->i_oconn = NULL;
inc->i_skb = NULL;
EXPORT_SYMBOL_GPL(rds_inc_init);
void rds_inc_path_init(struct rds_incoming *inc, struct rds_conn_path *cp,
- __be32 saddr)
+ struct in6_addr *saddr)
{
int i;
INIT_LIST_HEAD(&inc->i_item);
inc->i_conn = cp->cp_conn;
inc->i_conn_path = cp;
- inc->i_saddr = saddr;
+ inc->i_saddr = *saddr;
inc->i_rdma_cookie = 0;
inc->i_oconn = NULL;
inc->i_skb = NULL;
rds_stats_add(s_recv_bytes_removed_from_socket, -delta);
now_congested = rs->rs_rcv_bytes > rds_sk_rcvbuf(rs);
- rdsdebug("rs %p (%pI4:%u) recv bytes %d buf %d "
+ rdsdebug("rs %p (%pI6c:%u) recv bytes %d buf %d "
"now_cong %d delta %d\n",
rs, &rs->rs_bound_addr,
ntohs(rs->rs_bound_port), rs->rs_rcv_bytes,
struct rds_conn_path *cp;
if (conn->c_npaths > 1 &&
- IS_CANONICAL(conn->c_laddr, conn->c_faddr)) {
+ rds_addr_cmp(&conn->c_laddr, &conn->c_faddr) < 0) {
for (i = 0; i < conn->c_npaths; i++) {
cp = &conn->c_path[i];
rds_conn_path_connect_if_down(cp);
* conn. This lets loopback, who only has one conn for both directions,
* tell us which roles the addrs in the conn are playing for this message.
*/
-void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr,
+void rds_recv_incoming(struct rds_connection *conn, struct in6_addr *saddr,
+ struct in6_addr *daddr,
struct rds_incoming *inc, gfp_t gfp)
{
struct sk_buff *skb;
int ret;
struct rds_conn_path *cp;
- rdsdebug(KERN_ALERT "incoming: conn %p, inc %p, %pI4:%d -> %pI4:%d\n",
- conn, inc, &saddr, inc->i_hdr.h_sport, &daddr,
+ rdsdebug(KERN_ALERT "incoming: conn %p, inc %p, %pI6c : %d -> %pI6c : %d\n",
+ conn, inc, saddr, inc->i_hdr.h_sport, daddr,
inc->i_hdr.h_dport);
/* initialize some globals */
cp = &conn->c_path[0];
/* lets find a socket to which this request belongs */
- rs = rds_find_bound(daddr, inc->i_hdr.h_dport);
+ rs = rds_find_bound(daddr, inc->i_hdr.h_dport, conn->c_dev_if);
/* pass it on locally if there is no socket bound, or if netfilter is
* disabled for this socket */
if (NULL == skb) {
/* if we have allocation problems, then we just need to depart */
rds_rtd(RDS_RTD_ERR,
- "failure to allocate space for inc %p, %pI4 -> %pI4 tos %d\n",
- inc, &saddr, &daddr, conn->c_tos);
+ "failure to allocate space for inc %p, %pI6c -> %pI6c tos %d\n",
+ inc, saddr, daddr, conn->c_tos);
rds_recv_local(cp, saddr, daddr, inc, gfp, rs);
/* drop the reference if we had taken one */
if (NULL != rs)
org = rds_nf_hdr_org(skb);
/* now update our rds_nf_hdr for tracking locations of the request */
- dst->saddr = saddr;
- dst->daddr = daddr;
+ dst->saddr = *saddr;
+ dst->daddr = *daddr;
dst->sport = inc->i_hdr.h_sport;
dst->dport = inc->i_hdr.h_dport;
dst->flags = 0;
/* if we had a failure to convert, then just assuming to continue as local */
else {
rds_rtd(RDS_RTD_RCV_EXT,
- "failed to create skb form, conn %p, inc %p, %pI4 -> %pI4 tos %d\n",
- conn, inc, &saddr, &daddr, conn->c_tos);
+ "failed to create skb form, conn %p, inc %p, %pI6c -> %pI6c tos %d\n",
+ conn, inc, saddr, daddr, conn->c_tos);
ret = 1;
}
/* this is the normal good processed state */
else if (ret >= 0) {
/* check the original header and if changed do the needful */
- if (dst->saddr == org->saddr && dst->daddr == org->daddr &&
+ if (ipv6_addr_equal(&dst->saddr, &org->saddr) &&
+ ipv6_addr_equal(&dst->daddr, &org->daddr) &&
conn->c_trans->skb_local(skb)) {
rds_recv_local(cp, saddr, daddr, inc, gfp, NULL);
}
/* we don't really expect an error state from this call that isn't the done above */
else {
/* we don't really know how to handle this yet - just ignore for now */
- printk(KERN_ERR "unacceptible state for skb ret %d, conn %p, inc %p, %pI4 -> %pI4\n",
- ret, conn, inc, &saddr, &daddr);
+ printk(KERN_ERR "unacceptible state for skb ret %d, conn %p, inc %p, %pI6c -> %pI6c\n",
+ ret, conn, inc, saddr, daddr);
}
}
EXPORT_SYMBOL_GPL(rds_recv_incoming);
static void
-rds_recv_drop(struct rds_connection *conn, __be32 saddr, __be32 daddr,
+rds_recv_drop(struct rds_connection *conn, struct in6_addr *saddr,
+ struct in6_addr *daddr,
struct rds_incoming *inc, gfp_t gfp)
{
/* drop the existing incoming message */
- rdsdebug("dropping request on conn %p, inc %p, %pI4 -> %pI4",
- conn, inc, &saddr, &daddr);
+ rdsdebug("dropping request on conn %p, inc %p, %pI6c -> %pI6c",
+ conn, inc, saddr, daddr);
}
static void
org = rds_nf_hdr_org(inc->i_skb);
/* special case where we are swapping the message back on the same connection */
- if (dst->saddr == org->daddr && dst->daddr == org->saddr) {
+ if (ipv6_addr_equal(&dst->saddr, &org->daddr) &&
+ ipv6_addr_equal(&dst->daddr, &org->saddr)) {
nconn = conn;
} else {
/* reroute to a new conn structure, possibly the same one */
nconn = rds_conn_find(rds_conn_net(conn),
- dst->saddr, dst->daddr, conn->c_trans,
- conn->c_tos);
+ &dst->saddr, &dst->daddr, conn->c_trans,
+ conn->c_tos, conn->c_dev_if);
}
/* cannot find a matching connection so drop the request */
if (NULL == nconn) {
- printk(KERN_ALERT "cannot find matching conn for inc %p, %pI4 -> %pI4\n",
+ printk(KERN_ALERT "cannot find matching conn for inc %p, %pI6c -> %pI6c\n",
inc, &dst->saddr, &dst->daddr);
- rdsdebug("cannot find matching conn for inc %p, %pI4 -> %pI4",
+ rdsdebug("cannot find matching conn for inc %p, %pI6c -> %pI6c",
inc, &dst->saddr, &dst->daddr);
- rds_recv_drop(conn, dst->saddr, dst->daddr, inc, gfp);
+ rds_recv_drop(conn, &dst->saddr, &dst->daddr, inc, gfp);
}
/* this is a request for our local node, but potentially a different source
* either way we process it locally */
else if (conn->c_trans->skb_local(inc->i_skb)) {
WARN_ON(nconn->c_trans->t_mp_capable);
rds_recv_local(&nconn->c_path[0],
- dst->saddr, dst->daddr, inc, gfp, NULL);
+ &dst->saddr, &dst->daddr, inc, gfp, NULL);
}
/* looks like this request is going out to another node */
else {
org = rds_nf_hdr_org(inc->i_skb);
/* find the proper output socket - it should be the local one on which we originated */
- rs = rds_find_bound(dst->saddr, dst->sport);
+ rs = rds_find_bound(&dst->saddr, dst->sport, conn->c_dev_if);
if (!rs) {
rds_rtd(RDS_RTD_RCV,
- "failed to find output rds_socket dst %pI4 : %u, inc %p, conn %p tos %d\n",
+ "failed to find output rds_socket dst %pI6c : %u, inc %p, conn %p tos %d\n",
&dst->daddr, dst->dport, inc, conn,
conn->c_tos);
rds_stats_inc(s_recv_drop_no_sock);
ret = rds_send_internal(conn, rs, inc->i_skb, gfp);
if (len != ret) {
rds_rtd(RDS_RTD_RCV,
- "failed to send rds_data dst %pI4 : %u, inc %p, conn %p tos %d, len %d != ret %d\n",
+ "failed to send rds_data dst %pI6c : %u, inc %p, conn %p tos %d, len %d != ret %d\n",
&dst->daddr, dst->dport, inc, conn, conn->c_tos,
len, ret);
goto out;
NF_HOOK(PF_RDS_HOOK, NF_RDS_FORWARD_ERROR, sk, inc->i_skb, NULL, NULL, rds_recv_ok);
/* then hand the request off to normal local processing on the old connection */
- rds_recv_local(&inc->i_oconn->c_path[0], org->saddr, org->daddr,
+ rds_recv_local(&inc->i_oconn->c_path[0], &org->saddr, &org->daddr,
inc, gfp, NULL);
-
}
static void
-rds_recv_local(struct rds_conn_path *cp, __be32 saddr, __be32 daddr,
- struct rds_incoming *inc, gfp_t gfp, struct rds_sock *rs)
+rds_recv_local(struct rds_conn_path *cp, struct in6_addr *saddr,
+ struct in6_addr *daddr, struct rds_incoming *inc, gfp_t gfp,
+ struct rds_sock *rs)
{
struct sock *sk;
unsigned long flags;
if (inc_hdr_h_sequence != cp->cp_next_rx_seq) {
rds_rtd(RDS_RTD_RCV,
- "conn %p <%pI4,%pI4,%d> expect seq# %llu, recved seq# %llu, retrans bit %d\n",
+ "conn %p <%pI6c,%pI6c,%d> expect seq# %llu, recved seq# %llu, retrans bit %d\n",
conn, &conn->c_laddr, &conn->c_faddr,
conn->c_tos, cp->cp_next_rx_seq, inc_hdr_h_sequence,
inc->i_hdr.h_flags & RDS_FLAG_RETRANSMITTED);
if (rds_sysctl_ping_enable && inc->i_hdr.h_dport == 0) {
if (inc->i_hdr.h_sport == 0) {
- rdsdebug("ignore ping with 0 sport from 0x%x\n", saddr);
+ rdsdebug("ignore ping with 0 sport from %pI6c\n",
+ &saddr);
goto out;
}
if (inc->i_hdr.h_flags & RDS_FLAG_HB_PING) {
}
if (!rs)
- rs = rds_find_bound(daddr, inc->i_hdr.h_dport);
+ rs = rds_find_bound(daddr, inc->i_hdr.h_dport, conn->c_dev_if);
if (!rs) {
rds_stats_inc(s_recv_drop_no_sock);
goto out;
struct rds_sock *rs = rds_sk_to_rs(sk);
long timeo;
int ret = 0, nonblock = msg_flags & MSG_DONTWAIT;
+ struct sockaddr_in6 *sin6;
struct sockaddr_in *sin;
struct rds_incoming *inc = NULL;
break;
}
- rdsdebug("copying inc %p from %pI4:%u to user\n", inc,
+ rdsdebug("copying inc %p from %pI6c:%u to user\n", inc,
&inc->i_conn->c_faddr,
ntohs(inc->i_hdr.h_sport));
save = msg->msg_iter;
rds_stats_inc(s_recv_delivered);
- sin = (struct sockaddr_in *)msg->msg_name;
- if (sin) {
- sin->sin_family = AF_INET;
- sin->sin_port = inc->i_hdr.h_sport;
- sin->sin_addr.s_addr = inc->i_saddr;
- memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
- msg->msg_namelen = sizeof(*sin);
+ if (msg->msg_name) {
+ if (ipv6_addr_v4mapped(&inc->i_saddr)) {
+ sin = (struct sockaddr_in *)msg->msg_name;
+
+ sin->sin_family = AF_INET;
+ sin->sin_port = inc->i_hdr.h_sport;
+ sin->sin_addr.s_addr =
+ inc->i_saddr.s6_addr32[3];
+ memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
+ msg->msg_namelen = sizeof(*sin);
+ } else {
+ sin6 = (struct sockaddr_in6 *)msg->msg_name;
+
+ sin6->sin6_family = AF_INET6;
+ sin6->sin6_port = inc->i_hdr.h_sport;
+ sin6->sin6_addr = inc->i_saddr;
+ sin6->sin6_flowinfo = 0;
+ sin6->sin6_scope_id = rs->rs_bound_scope_id;
+ msg->msg_namelen = sizeof(*sin6);
+ }
}
break;
}
dst = rds_nf_hdr_dst(skb);
org = rds_nf_hdr_org(skb);
- /* just check to see that the destination is still the same */
- if (dst->daddr == org->daddr && dst->dport == org->dport) {
+ /* Just check to see that the destination is still the same.
+ * Otherwise, the sport/dport have likely swapped so consider
+ * it a different node.
+ */
+ if (ipv6_addr_equal(&dst->daddr, &org->daddr) &&
+ dst->dport == org->dport)
return 1;
- }
- /* otherwise, the sport/dport have likely swapped so consider
- * it a different node */
- else {
+ else
return 0;
- }
}
EXPORT_SYMBOL(rds_skb_local);
}
EXPORT_SYMBOL_GPL(rds_send_drop_acked);
-void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest)
+void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in6 *dest)
{
struct rds_message *rm, *tmp;
struct rds_connection *conn = NULL;
spin_lock_irqsave(&rs->rs_lock, flags);
list_for_each_entry_safe(rm, tmp, &rs->rs_send_queue, m_sock_item) {
- if (dest && (dest->sin_addr.s_addr != rm->m_daddr ||
- dest->sin_port != rm->m_inc.i_hdr.h_dport))
+ if (dest &&
+ (!ipv6_addr_equal(&dest->sin6_addr, &rm->m_daddr) ||
+ dest->sin6_port != rm->m_inc.i_hdr.h_dport))
continue;
list_move(&rm->m_sock_item, &list);
{
struct sock *sk = sock->sk;
struct rds_sock *rs = rds_sk_to_rs(sk);
+ struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)msg->msg_name;
struct sockaddr_in *usin = (struct sockaddr_in *)msg->msg_name;
- __be32 daddr;
__be16 dport;
struct rds_message *rm = NULL;
struct rds_connection *conn;
long timeo = sock_sndtimeo(sk, nonblock);
size_t total_payload_len = payload_len, rdma_payload_len = 0;
struct rds_conn_path *cpath;
+ struct in6_addr daddr;
+ __u32 scope_id = 0;
+ int namelen;
/* Mirror Linux UDP mirror of BSD error message compatibility */
/* XXX: Perhaps MSG_MORE someday */
goto out;
}
- if (msg->msg_namelen) {
- /* XXX fail non-unicast destination IPs? */
- if (msg->msg_namelen < sizeof(*usin) || usin->sin_family != AF_INET) {
+ namelen = msg->msg_namelen;
+ if (namelen != 0) {
+ if (namelen < sizeof(*usin)) {
+ ret = -EINVAL;
+ goto out;
+ }
+ switch (namelen) {
+ case sizeof(*usin):
+ if (usin->sin_family != AF_INET ||
+ usin->sin_addr.s_addr == INADDR_ANY ||
+ usin->sin_addr.s_addr == INADDR_BROADCAST ||
+ IN_MULTICAST(ntohl(usin->sin_addr.s_addr))) {
+ ret = -EINVAL;
+ goto out;
+ }
+ ipv6_addr_set_v4mapped(usin->sin_addr.s_addr, &daddr);
+ dport = usin->sin_port;
+ break;
+
+ case sizeof(*sin6): {
+ ret = -EPROTONOSUPPORT;
+ goto out;
+ }
+
+ default:
ret = -EINVAL;
goto out;
}
- daddr = usin->sin_addr.s_addr;
- dport = usin->sin_port;
} else {
/* We only care about consistency with ->connect() */
lock_sock(sk);
daddr = rs->rs_conn_addr;
dport = rs->rs_conn_port;
+ scope_id = rs->rs_bound_scope_id;
release_sock(sk);
}
lock_sock(sk);
- if (daddr == 0 || rs->rs_bound_addr == 0) {
- release_sock(sk);
- ret = -ENOTCONN; /* XXX not a great errno */
- goto out;
+ if (ipv6_addr_any(&rs->rs_bound_addr)) {
+ if (ipv6_addr_any(&daddr)) {
+ release_sock(sk);
+ ret = -ENOTCONN;
+ goto out;
+ }
+ } else if (namelen != 0) {
+ /* Cannot send to an IPv4 address using an IPv6 source
+ * address and cannot send to an IPv6 address using an
+ * IPv4 source address.
+ */
+ if (ipv6_addr_v4mapped(&daddr) ^
+ ipv6_addr_v4mapped(&rs->rs_bound_addr)) {
+ release_sock(sk);
+ ret = -EOPNOTSUPP;
+ goto out;
+ }
}
release_sock(sk);
/* rds_conn_create has a spinlock that runs with IRQ off.
* Caching the conn in the socket helps a lot. */
- if (rs->rs_conn && rs->rs_conn->c_faddr == daddr &&
- rs->rs_tos == rs->rs_conn->c_tos)
+ if (rs->rs_conn && ipv6_addr_equal(&rs->rs_conn->c_faddr, &daddr) &&
+ rs->rs_tos == rs->rs_conn->c_tos)
conn = rs->rs_conn;
else {
conn = rds_conn_create_outgoing(sock_net(sock->sk),
- rs->rs_bound_addr, daddr,
- rs->rs_transport, rs->rs_tos,
- sock->sk->sk_allocation);
+ &rs->rs_bound_addr, &daddr,
+ rs->rs_transport, rs->rs_tos,
+ sock->sk->sk_allocation,
+ scope_id);
if (IS_ERR(conn)) {
ret = PTR_ERR(conn);
goto out;
queue_delayed_work(conn->c_path[0].cp_wq,
&conn->c_path[0].cp_send_w, 1);
- rdsdebug("message sent for rs %p, conn %p, len %d, %pI4:%u->%pI4:%u\n",
+ rdsdebug("message sent for rs %p, conn %p, len %d, %pI6c:%u->%pI6c:%u\n",
rs, conn, skb->len, &dst->saddr, dst->sport, &dst->daddr,
dst->dport);
ret = skb->len;
#include <net/tcp.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
+#include <net/addrconf.h>
#include "rds.h"
#include "tcp.h"
spin_unlock_irqrestore(&rds_tcp_tc_list_lock, flags);
}
-static int rds_tcp_laddr_check(struct net *net, __be32 addr)
+static int rds_tcp_laddr_check(struct net *net, const struct in6_addr *addr,
+ __u32 scope_id)
{
- if (inet_addr_type(net, addr) == RTN_LOCAL)
+ struct net_device *dev = NULL;
+ int ret;
+
+ if (ipv6_addr_v4mapped(addr)) {
+ if (inet_addr_type(net, addr->s6_addr32[3]) == RTN_LOCAL)
+ return 0;
+ return -EADDRNOTAVAIL;
+ }
+
+ /* If the scope_id is specified, check only those addresses
+ * hosted on the specified interface.
+ */
+ if (scope_id != 0) {
+ rcu_read_lock();
+ dev = dev_get_by_index_rcu(net, scope_id);
+ /* scope_id is not valid... */
+ if (!dev) {
+ rcu_read_unlock();
+ return -EADDRNOTAVAIL;
+ }
+ }
+ ret = ipv6_chk_addr(net, addr, dev, 0);
+ if (scope_id != 0)
+ rcu_read_unlock();
+ if (ret)
return 0;
return -EADDRNOTAVAIL;
}
/*
- * Copyright (c) 2006 Oracle. All rights reserved.
+ * Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
case TCP_SYN_RECV:
break;
case TCP_ESTABLISHED:
- if (!IS_CANONICAL(cp->cp_conn->c_laddr,
- cp->cp_conn->c_faddr) &&
- rds_conn_path_transition(cp, RDS_CONN_CONNECTING,
- RDS_CONN_ERROR)) {
+ if (rds_addr_cmp(&cp->cp_conn->c_laddr,
+ &cp->cp_conn->c_faddr) >= 0 &&
+ rds_conn_path_transition(cp, RDS_CONN_CONNECTING,
+ RDS_CONN_ERROR)) {
rds_conn_path_drop(cp, DR_TCP_STATE_CLOSE);
} else {
rds_connect_path_complete(cp, RDS_CONN_CONNECTING);
int rds_tcp_conn_path_connect(struct rds_conn_path *cp)
{
struct socket *sock = NULL;
- struct sockaddr_in src, dest;
+ struct sockaddr_in sin;
+ struct sockaddr *addr;
+ int addrlen;
int ret;
struct rds_connection *conn = cp->cp_conn;
struct rds_tcp_connection *tc = cp->cp_transport_data;
rds_tcp_tune(sock);
- src.sin_family = AF_INET;
- src.sin_addr.s_addr = (__force u32)conn->c_laddr;
- src.sin_port = (__force u16)htons(0);
+ sin.sin_family = AF_INET;
+ sin.sin_addr.s_addr = (__force u32)conn->c_laddr.s6_addr32[3];
+ sin.sin_port = (__force u16)htons(0);
+ addr = (struct sockaddr *)&sin;
+ addrlen = sizeof(sin);
- ret = sock->ops->bind(sock, (struct sockaddr *)&src, sizeof(src));
+ ret = sock->ops->bind(sock, addr, addrlen);
if (ret) {
- rdsdebug("bind failed with %d at address %pI4\n",
+ rdsdebug("bind failed with %d at address %pI6c\n",
ret, &conn->c_laddr);
goto out;
}
- dest.sin_family = AF_INET;
- dest.sin_addr.s_addr = (__force u32)conn->c_faddr;
- dest.sin_port = (__force u16)htons(RDS_TCP_PORT);
+ sin.sin_family = AF_INET;
+ sin.sin_addr.s_addr = (__force u32)conn->c_faddr.s6_addr32[3];
+ sin.sin_port = (__force u16)htons(RDS_TCP_PORT);
+ addr = (struct sockaddr *)&sin;
+ addrlen = sizeof(sin);
/*
* once we call connect() we can start getting callbacks and they
* own the socket
*/
rds_tcp_set_callbacks(sock, cp);
- ret = sock->ops->connect(sock, (struct sockaddr *)&dest, sizeof(dest),
- O_NONBLOCK);
- rdsdebug("connect to address %pI4 returned %d\n", &conn->c_faddr, ret);
+ ret = sock->ops->connect(sock, addr, addrlen, O_NONBLOCK);
+ rdsdebug("connect to address %pI6c returned %d\n", &conn->c_faddr,
+ ret);
if (ret == -EINPROGRESS)
ret = 0;
/*
- * Copyright (c) 2006 Oracle. All rights reserved.
+ * Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
struct rds_tcp_connection *rds_tcp_accept_one_path(struct rds_connection *conn)
{
int i;
- bool peer_is_smaller = IS_CANONICAL(conn->c_faddr, conn->c_laddr);
int npaths = max_t(int, 1, conn->c_npaths);
- if (!peer_is_smaller) {
+ if (rds_addr_cmp(&conn->c_faddr, &conn->c_laddr) >= 0) {
if (npaths <= 1)
rds_conn_path_connect_if_down(&conn->c_path[0]);
return NULL;
inet = inet_sk(new_sock->sk);
- rdsdebug("accepted tcp %pI4:%u -> %pI4:%u\n",
- &inet->inet_saddr, ntohs(inet->inet_sport),
- &inet->inet_daddr, ntohs(inet->inet_dport));
+ rdsdebug("accepted tcp %pI6c:%u -> %pI6c:%u\n",
+ &new_sock->sk->sk_v6_rcv_saddr, ntohs(inet->inet_sport),
+ &new_sock->sk->sk_v6_daddr, ntohs(inet->inet_dport));
conn = rds_conn_create(sock_net(sock->sk),
- inet->inet_saddr, inet->inet_daddr,
- &rds_tcp_transport, 0, GFP_KERNEL);
+ &new_sock->sk->sk_v6_rcv_saddr,
+ &new_sock->sk->sk_v6_daddr,
+ &rds_tcp_transport, 0, GFP_KERNEL,
+ new_sock->sk->sk_bound_dev_if);
if (IS_ERR(conn)) {
ret = PTR_ERR(conn);
goto out;
/*
- * Copyright (c) 2006 Oracle. All rights reserved.
+ * Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
tc->t_tinc = tinc;
rdsdebug("alloced tinc %p\n", tinc);
rds_inc_path_init(&tinc->ti_inc, cp,
- cp->cp_conn->c_faddr);
+ &cp->cp_conn->c_faddr);
tinc->ti_inc.i_rx_lat_trace[RDS_MSG_RX_HDR] =
local_clock();
if (tinc->ti_inc.i_hdr.h_flags == RDS_FLAG_CONG_BITMAP)
rds_tcp_cong_recv(conn, tinc);
else
- rds_recv_incoming(conn, conn->c_faddr,
- conn->c_laddr, &tinc->ti_inc,
+ rds_recv_incoming(conn, &conn->c_faddr,
+ &conn->c_laddr,
+ &tinc->ti_inc,
arg->gfp);
tc->t_tinc_hdr_rem = sizeof(struct rds_header);
/*
- * Copyright (c) 2006 Oracle. All rights reserved.
+ * Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* an incoming RST.
*/
if (rds_conn_path_up(cp)) {
- pr_warn("RDS/tcp: send to %pI4 on cp [%d]"
+ pr_warn("RDS/tcp: send to %pI6c on cp [%d]"
"returned %d, "
"disconnecting and reconnecting\n",
&conn->c_faddr, cp->cp_index, ret);
struct rds_connection *conn = cp->cp_conn;
if (!rds_conn_path_transition(cp, curr, RDS_CONN_UP)) {
- pr_warn("RDS: Cannot transition conn <%pI4,%pI4,%d> to state UP, current state is %d\n",
+ pr_warn("RDS: Cannot transition conn <%pI6c,%pI6c,%d> to state UP, current state is %d\n",
&conn->c_laddr, &conn->c_faddr, conn->c_tos,
atomic_read(&cp->cp_state));
rds_conn_path_drop(cp, DR_IB_NOT_CONNECTING_STATE);
return;
}
- rds_rtd(RDS_RTD_CM_EXT, "conn %p for %pI4 to %pI4 tos %d complete\n",
+ rds_rtd(RDS_RTD_CM_EXT, "conn %p for %pI6c to %pI6c tos %d complete\n",
conn, &conn->c_laddr, &conn->c_faddr, conn->c_tos);
cp->cp_reconnect_jiffies = 0;
bool is_tcp = conn->c_trans->t_type == RDS_TRANS_TCP;
rds_rtd(RDS_RTD_CM_EXT,
- "conn %p for %pI4 to %pI4 tos %d reconnect jiffies %lu\n", conn,
- &conn->c_laddr, &conn->c_faddr, conn->c_tos,
+ "conn %p for %pI6c to %pI6c tos %d reconnect jiffies %lu\n",
+ conn, &conn->c_laddr, &conn->c_faddr, conn->c_tos,
cp->cp_reconnect_jiffies);
/* let peer with smaller addr initiate reconnect, to avoid duels */
- if (is_tcp && !IS_CANONICAL(conn->c_laddr, conn->c_faddr))
+ if (is_tcp && rds_addr_cmp(&conn->c_laddr, &conn->c_faddr) >= 0)
return;
set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags);
get_random_bytes(&rand, sizeof(rand));
rds_rtd(RDS_RTD_CM_EXT,
- "%lu delay %lu ceil conn %p for %pI4 -> %pI4 tos %d\n",
+ "%lu delay %lu ceil conn %p for %pI6c -> %pI6c tos %d\n",
rand % cp->cp_reconnect_jiffies, cp->cp_reconnect_jiffies,
conn, &conn->c_laddr, &conn->c_faddr, conn->c_tos);
bool is_tcp = conn->c_trans->t_type == RDS_TRANS_TCP;
if (is_tcp && cp->cp_index > 0 &&
- !IS_CANONICAL(cp->cp_conn->c_laddr, cp->cp_conn->c_faddr))
+ rds_addr_cmp(&cp->cp_conn->c_laddr, &cp->cp_conn->c_faddr) > 0)
return;
clear_bit(RDS_RECONNECT_PENDING, &cp->cp_flags);
ret = rds_conn_path_transition(cp, RDS_CONN_DOWN, RDS_CONN_CONNECTING);
ret = conn->c_trans->conn_path_connect(cp);
rds_rtd(RDS_RTD_CM_EXT,
- "conn %p for %pI4 to %pI4 tos %d dispatched, ret %d\n",
+ "conn %p for %pI6c to %pI6c tos %d dispatched, ret %d\n",
conn, &conn->c_laddr, &conn->c_faddr, conn->c_tos, ret);
if (ret) {
cp->cp_hb_start = now;
} else if (now - cp->cp_hb_start > rds_conn_hb_timeout) {
rds_rtd(RDS_RTD_CM,
- "RDS/IB: connection <%pI4,%pI4,%d> timed out (0x%lx,0x%lx)..discon and recon\n",
+ "RDS/IB: connection <%pI6c,%pI6c,%d> timed out (0x%lx,0x%lx)..discon and recon\n",
&conn->c_laddr, &conn->c_faddr,
conn->c_tos, cp->cp_hb_start, now);
rds_conn_path_drop(cp, DR_HB_TIMEOUT);
struct rds_connection *conn = cp->cp_conn;
if (cp->cp_reconnect_retry_count > rds_sysctl_reconnect_max_retries) {
- pr_info("RDS: connection <%pI4,%pI4,%d> reconnect retries(%d) exceeded, stop retry\n",
+ pr_info("RDS: connection <%pI6c,%pI6c,%d> reconnect retries(%d) exceeded, stop retry\n",
&conn->c_laddr, &conn->c_faddr, conn->c_tos,
cp->cp_reconnect_retry_count);
return;
} else {
cp->cp_reconnect_retry_count++;
rds_rtd(RDS_RTD_CM,
- "conn <%pI4,%pI4,%d> not up, retry(%d)\n",
+ "conn <%pI6c,%pI6c,%d> not up, retry(%d)\n",
&conn->c_laddr, &conn->c_faddr, conn->c_tos,
cp->cp_reconnect_retry_count);
queue_delayed_work(cp->cp_wq, &cp->cp_reconn_w,
rds_sysctl_shutdown_trace_start_time) &&
(now - cp->cp_reconnect_start <
rds_sysctl_shutdown_trace_end_time))
- pr_info("RDS/%s: connection <%pI4,%pI4,%d> shutdown init due to '%s'\n",
+ pr_info("RDS/%s: connection <%pI6c,%pI6c,%d> shutdown init due to '%s'\n",
(is_tcp ? "TCP" : "IB"),
&conn->c_laddr,
&conn->c_faddr,
return 0;
}
+
+/* Compare two IPv6 addresses. Return 0 if the two addresses are equal.
+ * Return 1 if the first is greater. Return -1 if the second is greater.
+ */
+int rds_addr_cmp(const struct in6_addr *addr1,
+ const struct in6_addr *addr2)
+{
+#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64
+ const __be64 *a1, *a2;
+ __be64 x, y;
+
+ a1 = (__be64 *)addr1;
+ a2 = (__be64 *)addr2;
+
+ if (*a1 != *a2) {
+ if (be64_to_cpu(*a1) < be64_to_cpu(*a2))
+ return -1;
+ else
+ return 1;
+ } else {
+ x = be64_to_cpu(*++a1);
+ y = be64_to_cpu(*++a2);
+ if (x < y)
+ return -1;
+ else if (x > y)
+ return 1;
+ else
+ return 0;
+ }
+#else
+ u32 a, b;
+ int i;
+
+ for (i = 0; i < 4; i++) {
+ if (addr1->s6_addr32[i] != addr2->s6_addr32[i]) {
+ a = ntohl(addr1->s6_addr32[i]);
+ b = ntohl(addr2->s6_addr32[i]);
+ if (a < b)
+ return -1;
+ else if (a > b)
+ return 1;
+ }
+ }
+ return 0;
+#endif
+}
+EXPORT_SYMBOL_GPL(rds_addr_cmp);
/*
- * Copyright (c) 2006 Oracle. All rights reserved.
+ * Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/in.h>
+#include <linux/ipv6.h>
#include "rds.h"
#include "loop.h"
module_put(trans->t_owner);
}
-struct rds_transport *rds_trans_get_preferred(struct net *net, __be32 addr)
+struct rds_transport *rds_trans_get_preferred(struct net *net,
+ const struct in6_addr *addr,
+ __u32 scope_id)
{
struct rds_transport *ret = NULL;
struct rds_transport *trans;
unsigned int i;
- if (IN_LOOPBACK(ntohl(addr)))
+ if (ipv6_addr_v4mapped(addr)) {
+ if (*(u_int8_t *)&addr->s6_addr32[3] == IN_LOOPBACKNET)
+ return &rds_loop_transport;
+ } else if (ipv6_addr_loopback(addr)) {
return &rds_loop_transport;
+ }
down_read(&rds_trans_sem);
for (i = 0; i < RDS_TRANS_COUNT; i++) {
trans = transports[i];
- if (trans && (trans->laddr_check(net, addr) == 0) &&
+ if (trans && (trans->laddr_check(net, addr, scope_id) == 0) &&
(!trans->t_owner || try_module_get(trans->t_owner))) {
ret = trans;
break;