#endif
#endif
-#define inet_num(sk) inet_sk(sk)->inet_num
-#define inet_sport(sk) inet_sk(sk)->inet_sport
-#define inet_dport(sk) inet_sk(sk)->inet_dport
-#define inet_saddr(sk) inet_sk(sk)->inet_saddr
-#define sdp_inet_daddr(sk) inet_sk(sk)->inet_daddr
-#define sdp_inet_rcv_saddr(sk) inet_sk(sk)->inet_rcv_saddr
+#define sdp_inet_num(sk) (inet_sk(sk)->inet_num)
+#define sdp_inet_sport(sk) (inet_sk(sk)->inet_sport)
+#define sdp_inet_dport(sk) (inet_sk(sk)->inet_dport)
+#define sdp_inet_saddr(sk) (inet_sk(sk)->inet_saddr)
+#define sdp_inet_daddr(sk) (inet_sk(sk)->inet_daddr)
+#define sdp_inet_rcv_saddr(sk) (inet_sk(sk)->inet_rcv_saddr)
#define sdp_sk_sleep(sk) sk_sleep(sk)
#define sk_ssk(ssk) ((struct sock *)ssk)
+#define TCP_PAGE(sk) (sk->sk_frag.page)
+#define TCP_OFF(sk) (sk->sk_frag.offset)
+
/* Interval between sucessive polls in the Tx routine when polling is used
instead of interrupts (in per-core Tx rings) - should be power of 2 */
#define SDP_TX_POLL_MODER 16
extern spinlock_t sdp_large_sockets_lock;
extern struct ib_client sdp_client;
#ifdef SDPSTATS_ON
-DECLARE_PER_CPU(struct sdpstats, sdpstats);
+DECLARE_PER_CPU(struct sdpstats_t, sdpstats);
#endif
enum sdp_mid {
skb = alloc_skb_fclone(size + sk->sk_prot->max_header, gfp);
if (skb) {
- if ((kind == SK_MEM_RECV && sk_rmem_schedule(sk, skb->truesize)) ||
- (kind == SK_MEM_SEND && sk_wmem_schedule(sk, skb->truesize))) {
+ if ((kind == SK_MEM_RECV &&
+ sk_rmem_schedule(sk, skb, skb->truesize)) ||
+ (kind == SK_MEM_SEND &&
+ sk_wmem_schedule(sk, skb->truesize))) {
/*
* Make sure that we have exactly size bytes
* available to the caller, no more, no less.
#ifdef SDPSTATS_ON
#define SDPSTATS_MAX_HIST_SIZE 256
-struct sdpstats {
+struct sdpstats_t {
u64 rx_bytes;
u64 tx_bytes;
u32 post_send[256];
h[idx]++;
}
-#define SDPSTATS_COUNTER_INC(stat) do { __get_cpu_var(sdpstats).stat++; } while (0)
-#define SDPSTATS_COUNTER_ADD(stat, val) do { __get_cpu_var(sdpstats).stat += val; } while (0)
-#define SDPSTATS_COUNTER_MID_INC(stat, mid) do { __get_cpu_var(sdpstats).stat[mid]++; } \
- while (0)
+#define SDPSTATS_COUNTER_INC(stat) this_cpu_inc(sdpstats.stat)
+#define SDPSTATS_COUNTER_ADD(stat, val) this_cpu_add(sdpstats.stat, val)
+#define SDPSTATS_COUNTER_MID_INC(stat, mid) this_cpu_inc(sdpstats.stat[mid])
#define SDPSTATS_HIST(stat, size) \
- sdpstats_hist(__get_cpu_var(sdpstats).stat, size, ARRAY_SIZE(__get_cpu_var(sdpstats).stat) - 1, 1)
+ sdpstats_hist(this_cpu_ptr(&sdpstats)->stat, size, \
+ ARRAY_SIZE(sdpstats.stat) - 1, 1)
#define SDPSTATS_HIST_LINEAR(stat, size) \
- sdpstats_hist(__get_cpu_var(sdpstats).stat, size, ARRAY_SIZE(__get_cpu_var(sdpstats).stat) - 1, 0)
+ sdpstats_hist(this_cpu_ptr(&sdpstats)->stat, size, \
+ ARRAY_SIZE(sdpstats.stat) - 1, 0)
#else
#define SDPSTATS_COUNTER_INC(stat)
int sdp_poll_rx_cq(struct sdp_sock *ssk);
/* sdp_zcopy.c */
-int sdp_sendmsg_zcopy(struct kiocb *iocb, struct sock *sk, struct iovec *iov);
+int sdp_sendmsg_zcopy(struct sock *sk, struct iov_iter *msg_iter, int iov_idx);
int sdp_handle_srcavail(struct sdp_sock *ssk, struct sdp_srcah *srcah);
void sdp_handle_sendsm(struct sdp_sock *ssk, u32 mseq_ack);
void sdp_handle_rdma_read_compl(struct sdp_sock *ssk, u32 mseq_ack,
u32 bytes_completed);
int sdp_handle_rdma_read_cqe(struct sdp_sock *ssk);
-int sdp_rdma_to_iovec(struct sock *sk, struct iovec *iov, int msg_iovlen,
- struct sk_buff *skb, unsigned long *used, u32 offset);
+int sdp_rdma_to_iter(struct sock *sk, struct iov_iter *msg_iter,
+ struct sk_buff *skb, unsigned long *used, u32 offset);
int sdp_post_rdma_rd_compl(struct sock *sk,
struct rx_srcavail_state *rx_sa);
int sdp_post_sendsm(struct sock *sk);
sk->sk_send_head = skb->next;
if (sk->sk_send_head == (struct sk_buff *)&sk->sk_write_queue) {
sk->sk_send_head = NULL;
- page = sk->sk_sndmsg_page;
+ page = TCP_PAGE(sk);
if (page) {
put_page(page);
- sk->sk_sndmsg_page = NULL;
+ TCP_PAGE(sk) = NULL;
}
}
}
#define SDP_MAJV_MINV 0x22
+#define ipv6_addr_copy(a, b) (*(a) = *(b))
+
SDP_MODPARAM_INT(sdp_rx_size, 0x40, "HW rx queue size (max num of credits)."
" Must be power of 2.");
if (!h->max_adverts)
return -EINVAL;
- child = sk_clone(sk, GFP_KERNEL);
+ child = sk_clone_lock(sk, GFP_KERNEL);
if (!child)
return -ENOMEM;
sdp_init_sock(child);
dst_addr = (struct sockaddr_in *)&id->route.addr.dst_addr;
- inet_dport(child) = dst_addr->sin_port;
+ sdp_inet_dport(child) = dst_addr->sin_port;
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
if (inet6_sk(sk)) {
if ((h->ipv_cap & HH_IPV_MASK) == HH_IPV4) {
/* V6 mapped */
sdp_inet_daddr(child) = dst_addr->sin_addr.s_addr;
- ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF),
- h->src_addr.ip4.addr);
+ ipv6_addr_set(&child->sk_v6_daddr, 0, 0,
+ htonl(0x0000FFFF), h->src_addr.ip4.addr);
ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF),
h->dst_addr.ip4.addr);
- ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
+ ipv6_addr_copy(&child->sk_v6_rcv_saddr, &newnp->saddr);
} else if ((h->ipv_cap & HH_IPV_MASK) == HH_IPV6) {
struct sockaddr_in6 *dst_addr6 = (struct sockaddr_in6 *)dst_addr;
struct sockaddr_in6 *src_addr6 =
(struct sockaddr_in6 *)&id->route.addr.src_addr;
- ipv6_addr_copy(&newnp->daddr, &dst_addr6->sin6_addr);
+ ipv6_addr_copy(&child->sk_v6_daddr,
+ &dst_addr6->sin6_addr);
+ ipv6_addr_copy(&child->sk_v6_rcv_saddr,
+ &src_addr6->sin6_addr);
ipv6_addr_copy(&newnp->saddr, &src_addr6->sin6_addr);
- ipv6_addr_copy(&newnp->rcv_saddr, &src_addr6->sin6_addr);
} else {
sdp_warn(child, "Bad IPV field: 0x%x\n", h->ipv_cap & HH_IPV_MASK);
}
- sdp_inet_daddr(child) = inet_saddr(child) = sdp_inet_rcv_saddr(child) = LOOPBACK4_IPV6;
+ sdp_inet_daddr(child) = sdp_inet_saddr(child) =
+ sdp_inet_rcv_saddr(child) = LOOPBACK4_IPV6;
} else
#endif
{
/* child->sk_write_space(child); */
/* child->sk_data_ready(child, 0); */
- sk->sk_data_ready(sk, 0);
+ sk->sk_data_ready(sk);
return 0;
}
sk_wake_async(sk, 0, POLL_OUT);
dst_addr = (struct sockaddr_in *)&id->route.addr.dst_addr;
- inet_dport(sk) = dst_addr->sin_port;
+ sdp_inet_dport(sk) = dst_addr->sin_port;
sdp_inet_daddr(sk) = dst_addr->sin_addr.s_addr;
#ifdef SDP_SOCK_HISTORY
if (sdp_link_layer_ib_only &&
rdma_node_get_transport(id->device->node_type) ==
RDMA_TRANSPORT_IB &&
- rdma_port_link_layer(id->device, id->port_num) !=
+ rdma_port_get_link_layer(id->device, id->port_num) !=
IB_LINK_LAYER_INFINIBAND) {
sdp_dbg(sk, "Link layer is: %d. Only IB link layer "
"is allowed\n",
- rdma_port_link_layer(id->device, id->port_num));
+ rdma_port_get_link_layer(id->device,
+ id->port_num));
rc = -ENETUNREACH;
break;
}
if (src_addr->sa_family == AF_INET) {
/* IPv4 over IPv6 */
- ipv6_addr_set(&inet6_sk(sk)->rcv_saddr, 0, 0, htonl(0xFFFF),
- addr4->sin_addr.s_addr);
+ ipv6_addr_set(&sk->sk_v6_rcv_saddr, 0, 0,
+ htonl(0xFFFF),
+ addr4->sin_addr.s_addr);
} else {
- inet6_sk(sk)->rcv_saddr = addr6->sin6_addr;
+ sk->sk_v6_rcv_saddr = addr6->sin6_addr;
}
- inet6_sk(sk)->saddr = inet6_sk(sk)->rcv_saddr;
+ inet6_sk(sk)->saddr = sk->sk_v6_rcv_saddr;
}
else
#endif
{
- inet_saddr(sk) = sdp_inet_rcv_saddr(sk) =
+ sdp_inet_saddr(sk) = sdp_inet_rcv_saddr(sk) =
((struct sockaddr_in *)&id->route.addr.src_addr)->sin_addr.s_addr;
}
memset(&conn_param, 0, sizeof conn_param);
rc = -ECONNREFUSED;
break;
case RDMA_CM_EVENT_ESTABLISHED:
- inet_saddr(sk) = sdp_inet_rcv_saddr(sk) =
+ sdp_inet_saddr(sk) = sdp_inet_rcv_saddr(sk) =
((struct sockaddr_in *)&id->route.addr.src_addr)->sin_addr.s_addr;
rc = sdp_connected_handler(sk);
break;
printk(level "%s:%d sdp_sock(%5d:%d %d:%d): " format, \
func, line, \
current->pid, smp_processor_id(), \
- (sk) ? inet_num(sk) : -1, \
- (sk) ? ntohs(inet_dport(sk)) : -1, ## arg); \
+ (sk) ? sdp_inet_num(sk) : -1, \
+ (sk) ? ntohs(sdp_inet_dport(sk)) : -1, ## arg); \
preempt_enable(); \
} while (0)
#define sdp_printk(level, sk, format, arg...) \
int idx;
int pid;
int cpu;
- int sk_num;
- int sk_dport;
+ int sdpprf_sk_num;
+ int sdpprf_sk_dport;
struct sk_buff *skb;
char msg[256];
preempt_disable(); \
l->idx = idx; \
l->pid = current->pid; \
- l->sk_num = (sk) ? inet_num(sk) : -1; \
- l->sk_dport = (sk) ? ntohs(inet_dport(sk)) : -1; \
+ l->sdpprf_sk_num = (sk) ? sdp_inet_num(sk) : -1; \
+ l->sdpprf_sk_dport = (sk) ? ntohs(sdp_inet_dport(sk)) : -1; \
l->cpu = smp_processor_id(); \
l->skb = s; \
snprintf(l->msg, sizeof(l->msg) - 1, format, ## arg); \
#include <rdma/sdp_socket.h>
#include "sdp.h"
#include <linux/delay.h>
+#include <linux/module.h>
MODULE_AUTHOR("Michael S. Tsirkin");
MODULE_DESCRIPTION("InfiniBand SDP module");
MODULE_LICENSE("Dual BSD/GPL");
+#define ipv6_addr_copy(a, b) (*(a) = *(b))
+
#ifdef CONFIG_INFINIBAND_SDP_DEBUG
SDP_MODPARAM_INT(sdp_debug_level, 0, "Enable debug tracing if > 0.");
#endif
"Enable data path debug tracing if > 0.");
#endif
-SDP_MODPARAM_INT(sdp_apm_enable, 1, "Enable APM.");
+SDP_MODPARAM_INT(sdp_apm_enable, 0, "Enable APM.");
SDP_MODPARAM_SINT(sdp_fmr_pool_size, 20, "Number of FMRs to allocate for pool");
SDP_MODPARAM_SINT(sdp_fmr_dirty_wm, 5, "Watermark to flush fmr pool");
sdp_add_to_history(sk, __func__);
if (!ssk->id)
- ssk->id = rdma_create_id(sdp_cma_handler, sk, RDMA_PS_SDP);
+ ssk->id = rdma_create_id(sdp_cma_handler, sk, RDMA_PS_SDP,
+ IB_QPT_RC);
if (!ssk->id)
return -ENOMEM;
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
if (inet6_sk(sk)) {
- int addr_type = ipv6_addr_type(&inet6_sk(sk)->rcv_saddr);
+ int addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr);
if (addr_type == IPV6_ADDR_MAPPED) {
addr4->sin_family = AF_INET;
addr4->sin_port = htons(snum);
- addr4->sin_addr.s_addr = inet6_sk(sk)->rcv_saddr.s6_addr32[3];
+ addr4->sin_addr.s_addr =
+ sk->sk_v6_rcv_saddr.s6_addr32[3];
addr_len = sizeof(*addr4);
} else {
addr6->sin6_family = AF_INET6;
addr6->sin6_port = htons(snum);
addr6->sin6_scope_id = sk->sk_bound_dev_if;
- ipv6_addr_copy(&addr6->sin6_addr, &inet6_sk(sk)->rcv_saddr);
+ ipv6_addr_copy(&addr6->sin6_addr, &sk->sk_v6_rcv_saddr);
addr_len = sizeof(*addr6);
}
}
}
src_addr = (struct sockaddr_in *)&(ssk->id->route.addr.src_addr);
- inet_num(sk) = ntohs(src_addr->sin_port);
+ sdp_inet_num(sk) = ntohs(src_addr->sin_port);
#ifdef SDP_SOCK_HISTORY
sdp_ssk_hist_rename(sk);
#endif
sk->sk_send_head = NULL;
skb_queue_purge(&sk->sk_write_queue);
- /*
- * If sendmsg cached page exists, toss it.
- */
- if (sk->sk_sndmsg_page) {
- __free_page(sk->sk_sndmsg_page);
- sk->sk_sndmsg_page = NULL;
- }
id = ssk->id;
if (ssk->id) {
sk->sk_bound_dev_if = usin->sin6_scope_id;
src_addr->sin6_family = AF_INET6;
- src_addr->sin6_port = htons(inet_sport(sk));
+ src_addr->sin6_port = htons(sdp_inet_sport(sk));
src_addr->sin6_addr = inet6_sk(sk)->saddr;
- if (ssk->id && (addr_type != ipv6_addr_type(&inet6_sk(sk)->rcv_saddr))) {
+ if (ssk->id && (addr_type != ipv6_addr_type(&sk->sk_v6_rcv_saddr))) {
sdp_dbg(sk, "Existing address type is different for the "
"requested. rebinding socket\n");
rdma_destroy_id(ssk->id);
if (!ssk->id) {
/* If IPv4 over IPv6, make sure rdma_bind will expect ipv4 address */
if (addr_type == IPV6_ADDR_MAPPED)
- ipv6_addr_set(&inet6_sk(sk)->rcv_saddr, 0, 0, htonl(0x0000FFFF), 0);
+ ipv6_addr_set(&sk->sk_v6_rcv_saddr, 0, 0,
+ htonl(0x0000FFFF), 0);
- rc = sdp_get_port(sk, htons(inet_sport(sk)));
+ rc = sdp_get_port(sk, htons(sdp_inet_sport(sk)));
if (rc)
return rc;
- inet_sport(sk) = htons(inet_num(sk));
+ sdp_inet_sport(sk) = htons(sdp_inet_num(sk));
}
- ipv6_addr_copy(&inet6_sk(sk)->daddr, &usin->sin6_addr);
+ ipv6_addr_copy(&sk->sk_v6_daddr, &usin->sin6_addr);
if (addr_type == IPV6_ADDR_MAPPED) {
struct sockaddr_in *addr4 = (struct sockaddr_in *)uaddr;
return -EAFNOSUPPORT;
if (!ssk->id) {
- rc = sdp_get_port(sk, htons(inet_num(sk)));
+ rc = sdp_get_port(sk, htons(sdp_inet_num(sk)));
if (rc)
return rc;
- inet_sport(sk) = htons(inet_num(sk));
+ sdp_inet_sport(sk) = htons(sdp_inet_num(sk));
}
src_addr->sin_family = AF_INET;
- src_addr->sin_port = htons(inet_sport(sk));
- src_addr->sin_addr.s_addr = inet_saddr(sk);
+ src_addr->sin_port = htons(sdp_inet_sport(sk));
+ src_addr->sin_addr.s_addr = sdp_inet_saddr(sk);
sdp_dbg(sk, "%s " NIPQUAD_FMT ":%hu -> " NIPQUAD_FMT ":%hu\n", __func__,
NIPQUAD(src_addr->sin_addr.s_addr),
lockdep_set_class(&sk->sk_callback_lock,
&ib_sdp_sk_callback_lock_key);
- sk->sk_route_caps |= NETIF_F_SG | NETIF_F_NO_CSUM;
+ sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
skb_queue_head_init(&ssk->rx_ctl_q);
if (len > 0) {
if (!(flags & MSG_TRUNC))
- err = memcpy_toiovec(msg->msg_iov, &c, 1);
+ err = memcpy_to_msg(msg, &c, 1);
len = 1;
} else
msg->msg_flags |= MSG_TRUNC;
sdp_sk(sk)->nonagle &= ~TCP_NAGLE_PUSH;
}
-#define TCP_PAGE(sk) (sk->sk_sndmsg_page)
-#define TCP_OFF(sk) (sk->sk_sndmsg_off)
static inline int sdp_bcopy_get(struct sock *sk, struct sk_buff *skb,
- char __user *from, int copy)
+ struct iov_iter *from, int copy)
{
int err;
struct sdp_sock *ssk = sdp_sk(sk);
/* We have some space in skb head. Superb! */
if (copy > skb_tailroom(skb))
copy = skb_tailroom(skb);
- if ((err = skb_add_data(skb, from, copy)) != 0)
+ if (skb_add_data_nocache(sk, skb, from, copy))
return SDP_ERR_FAULT;
} else {
/* Put data in skb->frags */
int i = skb_shinfo(skb)->nr_frags;
struct page *page = TCP_PAGE(sk);
int off = TCP_OFF(sk);
+ struct page_frag *pfrag = &sk->sk_frag;
+
+ if (!sk_page_frag_refill(sk, pfrag))
+ return SDP_DO_WAIT_MEM;
if (skb_can_coalesce(skb, i, page, off) &&
off != PAGE_SIZE) {
if (!page) {
/* Allocate new cache page. */
- page = sk_stream_alloc_page(sk);
+ pfrag = &sk->sk_frag;
+ page = pfrag->page;
+ if (!sk_page_frag_refill(sk, pfrag))
+ return SDP_DO_WAIT_MEM;
if (!page)
return SDP_DO_WAIT_MEM;
}
/* Time to copy data. We are close to
* the end! */
SDPSTATS_COUNTER_ADD(memcpy_count, copy);
- err = skb_copy_to_page(sk, from, skb, page,
- off, copy);
+ err = skb_copy_to_page_nocache(sk, from, skb, page, off, copy);
if (err) {
- /* If this page was new, give it to the
- * socket so it does not get leaked.
- */
- if (!TCP_PAGE(sk)) {
- TCP_PAGE(sk) = page;
- TCP_OFF(sk) = 0;
- }
return SDP_ERR_ERROR;
}
DEFINE_WAIT(wait);
if (sk_stream_memory_free(sk))
- current_timeo = vm_wait = (net_random() % (HZ / 5)) + 2;
+ current_timeo = vm_wait = (prandom_u32() % (HZ / 5)) + 2;
while (1) {
set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
/* Like tcp_sendmsg */
/* TODO: check locking */
-static int sdp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
- size_t size)
+static int sdp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
{
int i;
struct sdp_sock *ssk = sdp_sk(sk);
int flags;
const int size_goal = MIN(ssk->xmit_size_goal, SDP_MAX_PAYLOAD);
int err, copied;
+ int iov_num = msg->msg_iter.nr_segs;
long timeo;
int zcopy_thresh =
-1 != ssk->zcopy_thresh ? ssk->zcopy_thresh : sdp_zcopy_thresh;
if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
goto do_error;
- for (i = 0; i < msg->msg_iovlen; i++) {
- struct iovec *iov = &msg->msg_iov[i];
+ for (i = 0; i < iov_num; i++) {
+ const struct iovec *iov = &msg->msg_iter.iov[i];
int seglen = iov->iov_len;
- char __user *from = iov->iov_base;
+ struct iov_iter *from = &msg->msg_iter;
- sdp_dbg_data(sk, "Sending iov: 0x%x/0x%zx %p\n", i, msg->msg_iovlen, from);
+ sdp_dbg_data(sk, "Sending iov: 0x%x/0x%zx %p\n", i,
+ msg->msg_iter.nr_segs, from);
SDPSTATS_HIST(sendmsg_seglen, seglen);
ssk->sdp_dev->fmr_pool && !(flags & MSG_OOB)) {
int zcopied = 0;
- zcopied = sdp_sendmsg_zcopy(iocb, sk, iov);
+ zcopied = sdp_sendmsg_zcopy(sk, &msg->msg_iter, i);
if (zcopied < 0) {
sdp_dbg_data(sk, "ZCopy send err: %d\n", zcopied);
copied += zcopied;
seglen = iov->iov_len;
- from = iov->iov_base;
sdp_dbg_data(sk, "ZCopied: 0x%x/0x%x\n", zcopied, seglen);
}
* Check whether we can use HW checksum.
*/
if (sk->sk_route_caps &
- (NETIF_F_IP_CSUM | NETIF_F_NO_CSUM |
- NETIF_F_HW_CSUM))
+ (NETIF_F_IP_CSUM | NETIF_F_HW_CSUM))
skb->ip_summed = CHECKSUM_PARTIAL;
sdp_skb_entail(sk, skb);
SDP_SKB_CB(skb)->end_seq += copy;
/*unused: skb_shinfo(skb)->gso_segs = 0;*/
- from += copy;
copied += copy;
seglen -= copy;
continue;
/* Like tcp_recvmsg */
/* Maybe use skb_recv_datagram here? */
/* Note this does not seem to handle vectored messages. Relevant? */
-static int sdp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
- size_t len, int noblock, int flags,
- int *addr_len)
+static int sdp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
+ int noblock, int flags, int *addr_len)
{
struct sk_buff *skb = NULL;
struct sdp_sock *ssk = sdp_sk(sk);
lock_sock(sk);
ssk->cpu = smp_processor_id();
sdp_dbg_data(sk, "iovlen: %zd iov_len: 0x%zx flags: 0x%x peek: 0x%x\n",
- msg->msg_iovlen, msg->msg_iov[0].iov_len, flags,
- MSG_PEEK);
+ msg->msg_iter.nr_segs, msg->msg_iter.iov[0].iov_len,
+ flags, MSG_PEEK);
posts_handler_get(ssk);
if (!(flags & MSG_TRUNC)) {
if (rx_sa && offset >= skb->len) {
/* No more payload - start rdma copy */
- sdp_dbg_data(sk, "RDMA copy of 0x%lx bytes\n", used);
- err = sdp_rdma_to_iovec(sk, msg->msg_iov, msg->msg_iovlen, skb,
- &used, offset);
+ sdp_dbg_data(sk, "RDMA copy of 0x%lx bytes\n",
+ used);
+ err = sdp_rdma_to_iter(sk, &msg->msg_iter, skb,
+ &used, offset);
if (unlikely(err)) {
/* ssk->rx_sa might had been freed when
* we slept. */
}
} else {
sdp_dbg_data(sk, "memcpy 0x%lx bytes +0x%x -> %p\n",
- used, offset, msg->msg_iov[0].iov_base);
+ used, offset,
+ msg->msg_iter.iov[0].iov_base);
- err = skb_copy_datagram_iovec(skb, offset,
+ err = skb_copy_datagram_msg(skb, offset,
/* TODO: skip header? */
- msg->msg_iov, used);
+ msg, used);
if (rx_sa && !(flags & MSG_PEEK)) {
rx_sa->copied += used;
rx_sa->reported += used;
rc = sdp_get_port(sk, 0);
if (rc)
return rc;
- inet_sport(sk) = htons(inet_num(sk));
+ sdp_inet_sport(sk) = htons(sdp_inet_num(sk));
}
rc = rdma_listen(ssk->id, backlog);
BUG();
ssk->urg_data = TCP_URG_VALID | tmp;
if (!sock_flag(sk, SOCK_DEAD))
- sk->sk_data_ready(sk, 0);
+ sk->sk_data_ready(sk);
}
static struct percpu_counter *sockets_allocated;
.name = "SDP",
};
-static struct proto_ops sdp_ipv4_proto_ops = {
+static const struct proto_ops sdp_ipv4_proto_ops = {
.family = PF_INET,
.owner = THIS_MODULE,
.release = inet_release,
};
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-static struct proto_ops sdp_ipv6_proto_ops = {
+static const struct proto_ops sdp_ipv6_proto_ops = {
.family = PF_INET6,
.owner = THIS_MODULE,
.release = inet6_release,
};
#endif
-static int sdp_create_ipvx_socket(struct net *net, struct socket *sock, int protocol,
- struct proto_ops *proto_ops)
+static int sdp_create_ipvx_socket(struct net *net, struct socket *sock,
+ int protocol,
+ const struct proto_ops *proto_ops,
+ int kern)
{
struct sock *sk;
int rc;
return -EPROTONOSUPPORT;
}
- sk = sk_alloc(net, PF_INET_SDP, GFP_KERNEL, &sdp_proto);
+ sk = sk_alloc(net, PF_INET_SDP, GFP_KERNEL, &sdp_proto, kern);
if (!sk) {
sdp_warn(NULL, "SDP: failed to allocate socket.\n");
return -ENOMEM;
}
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-static int sdp_create_v6_socket(struct net *net, struct socket *sock, int protocol,
- int kern)
+static int sdp_create_v6_socket(struct net *net, struct socket *sock,
+ int protocol, int kern)
{
- return sdp_create_ipvx_socket(net, sock, protocol, &sdp_ipv6_proto_ops);
+ return sdp_create_ipvx_socket(net, sock, protocol, &sdp_ipv6_proto_ops,
+ kern);
}
#endif
-static int sdp_create_v4_socket(struct net *net, struct socket *sock, int protocol,
- int kern)
+static int sdp_create_v4_socket(struct net *net, struct socket *sock,
+ int protocol, int kern)
{
- return sdp_create_ipvx_socket(net, sock, protocol, &sdp_ipv4_proto_ops);
+ return sdp_create_ipvx_socket(net, sock, protocol, &sdp_ipv4_proto_ops,
+ kern);
}
static void sdp_add_device(struct ib_device *device)
if (!orphan_count)
goto no_mem_orphan_count;
- percpu_counter_init(sockets_allocated, 0);
- percpu_counter_init(orphan_count, 0);
+ /* percpu_counter_init func definition changed in
+ * git id: 908c7f1949cb7cc6e92ba8f18f2998e87e265b8e
+ */
+ percpu_counter_init(sockets_allocated, 0, GFP_KERNEL);
+ percpu_counter_init(orphan_count, 0, GFP_KERNEL);
sdp_proto.sockets_allocated = sockets_allocated;
sdp_proto.orphan_count = orphan_count;
char tmpbuf[TMPSZ + 1];
unsigned int dest;
unsigned int src;
- int uid;
+ kuid_t uid;
unsigned long inode;
__u16 destp;
__u16 srcp;
dest = sdp_inet_daddr(sk);
src = sdp_inet_rcv_saddr(sk);
- destp = ntohs(inet_dport(sk));
- srcp = ntohs(inet_sport(sk));
+ destp = ntohs(sdp_inet_dport(sk));
+ srcp = ntohs(sdp_inet_sport(sk));
uid = sock_i_uid(sk);
inode = sock_i_ino(sk);
rx_queue = rcv_nxt(sdp_sk(sk)) - sdp_sk(sk)->copied_seq;
tx_queue = sdp_sk(sk)->write_seq - sdp_sk(sk)->tx_ring.una_seq;
- sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X %5d %lu %08X:%08X %X",
- num, src, srcp, dest, destp, uid, inode,
+ sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X %5u %lu %08X:%08X %X",
+ num, src, srcp, dest, destp, uid.val, inode,
rx_queue, tx_queue, sk->sk_state);
seq_printf(seq, "%-*s\n", TMPSZ - 1, tmpbuf);
char tmpbuf[TMPSZ + 1];
struct in6_addr *src;
struct in6_addr *dest;
- int uid;
+ kuid_t uid;
unsigned long inode;
__u16 destp;
__u16 srcp;
__u32 rx_queue, tx_queue;
- dest = &inet6_sk(sk)->daddr;
- src = &inet6_sk(sk)->rcv_saddr;
- destp = ntohs(inet_dport(sk));
- srcp = ntohs(inet_sport(sk));
+ dest = &sk->sk_v6_daddr;
+ src = &sk->sk_v6_rcv_saddr;
+ destp = ntohs(sdp_inet_dport(sk));
+ srcp = ntohs(sdp_inet_sport(sk));
uid = sock_i_uid(sk);
inode = sock_i_ino(sk);
rx_queue = rcv_nxt(sdp_sk(sk)) - sdp_sk(sk)->copied_seq;
tx_queue = sdp_sk(sk)->write_seq - sdp_sk(sk)->tx_ring.una_seq;
sprintf(tmpbuf,
- "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
- "%5d %lu %08X:%08X %X",
- num,
- src->s6_addr32[0], src->s6_addr32[1],
- src->s6_addr32[2], src->s6_addr32[3],
- srcp,
- dest->s6_addr32[0], dest->s6_addr32[1],
- dest->s6_addr32[2], dest->s6_addr32[3],
- destp,
- uid, inode,
- rx_queue, tx_queue, sk->sk_state);
+ "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X %5u %lu %08X:%08X %X",
+ num,
+ src->s6_addr32[0], src->s6_addr32[1],
+ src->s6_addr32[2], src->s6_addr32[3],
+ srcp,
+ dest->s6_addr32[0], dest->s6_addr32[1],
+ dest->s6_addr32[2], dest->s6_addr32[3],
+ destp,
+ uid.val, inode,
+ rx_queue, tx_queue, sk->sk_state);
seq_printf(seq, "%-*s\n", TMPSZ - 1, tmpbuf);
static int sdp_seq_open(struct inode *inode, struct file *file)
{
- struct sdp_seq_afinfo *afinfo = PDE(inode)->data;
+ struct sdp_seq_afinfo *afinfo = PDE_DATA(inode);
struct seq_file *seq;
struct sdp_iter_state *s;
int rc;
};
#ifdef SDPSTATS_ON
-DEFINE_PER_CPU(struct sdpstats, sdpstats);
+DEFINE_PER_CPU(struct sdpstats_t, sdpstats);
static void sdpstats_seq_hist(struct seq_file *seq, char *str, u32 *h, int n,
int is_log)
})
#define __sdpstats_seq_hist(seq, msg, hist, is_log) ({ \
- int hist_len = ARRAY_SIZE(__get_cpu_var(sdpstats).hist);\
+ int hist_len = ARRAY_SIZE(sdpstats.hist);\
memset(h, 0, sizeof(*h) * h_len); \
SDPSTATS_HIST_GET(hist, hist_len, h); \
sdpstats_seq_hist(seq, msg, h, hist_len, is_log);\
seq_printf(seq, "memcpy_count \t\t: %u\n",
SDPSTATS_COUNTER_GET(memcpy_count));
- for (i = 0; i < ARRAY_SIZE(__get_cpu_var(sdpstats).post_send); i++) {
- if (mid2str(i)) {
- seq_printf(seq, "post_send %-20s\t: %u\n",
- mid2str(i),
- SDPSTATS_COUNTER_GET(post_send[i]));
- }
- }
+ for (i = 0; i < ARRAY_SIZE(sdpstats.post_send); i++) {
+ if (mid2str(i)) {
+ seq_printf(seq, "post_send %-20s\t: %u\n", mid2str(i),
+ SDPSTATS_COUNTER_GET(post_send[i]));
+ }
+ }
seq_printf(seq, "\n");
seq_printf(seq, "sdp_recvmsg() calls\t\t: %u\n",
int i;
for_each_possible_cpu(i)
- memset(&per_cpu(sdpstats, i), 0, sizeof(struct sdpstats));
+ memset(&per_cpu(sdpstats, i), 0, sizeof(struct sdpstats_t));
printk(KERN_WARNING "Cleared sdp statistics\n");
return count;
seq_printf(m, "%-6d: [%5lu.%06lu] %-50s - [%d{%d} %d:%d] "
"skb: %p %s:%d\n",
l->idx, t, usec_rem,
- l->msg, l->pid, l->cpu, l->sk_num, l->sk_dport,
- l->skb, l->func, l->line);
+ l->msg, l->pid, l->cpu, l->sdpprf_sk_num,
+ l->sdpprf_sk_dport, l->skb, l->func, l->line);
out:
return 0;
}
static void sdp_ssk_hist_name(char *sk_name, int len, struct sock *sk)
{
- int lport = inet_num(sk);
- int rport = ntohs(inet_dport(sk));
+ int lport = sdp_inet_num(sk);
+ int rport = ntohs(sdp_inet_dport(sk));
snprintf(sk_name, len, "%05x_%d:%d",
sdp_sk(sk)->sk_id, lport, rport);
}
#endif
- p = proc_net_fops_create(&init_net, sdp_seq_afinfo.name, S_IRUGO,
- sdp_seq_afinfo.seq_fops);
- if (p)
- p->data = &sdp_seq_afinfo;
- else
+ p = proc_create_data(sdp_seq_afinfo.name, S_IRUGO, init_net.proc_net,
+ sdp_seq_afinfo.seq_fops, &sdp_seq_afinfo);
+ if (!p)
goto no_mem;
#ifdef SDPSTATS_ON
- stats = proc_net_fops_create(&init_net, PROC_SDP_STATS,
- S_IRUGO | S_IWUGO, &sdpstats_fops);
+ stats = proc_create(PROC_SDP_STATS, S_IRUGO | S_IWUGO,
+ init_net.proc_net, &sdpstats_fops);
if (!stats)
goto no_mem_stats;
#endif
#ifdef SDPSTATS_ON
- proc_net_remove(&init_net, PROC_SDP_STATS);
+ remove_proc_entry(PROC_SDP_STATS, init_net.proc_net);
no_mem_stats:
#endif
- proc_net_remove(&init_net, sdp_seq_afinfo.name);
+ remove_proc_entry(sdp_seq_afinfo.name, init_net.proc_net);
no_mem:
return -ENOMEM;
void sdp_proc_unregister(void)
{
- proc_net_remove(&init_net, sdp_seq_afinfo.name);
+ remove_proc_entry(sdp_seq_afinfo.name, init_net.proc_net);
memset(sdp_seq_afinfo.seq_fops, 0, sizeof(*sdp_seq_afinfo.seq_fops));
#ifdef SDPSTATS_ON
- proc_net_remove(&init_net, PROC_SDP_STATS);
+ remove_proc_entry(PROC_SDP_STATS, init_net.proc_net);
#endif
#ifdef SDP_PROFILING
debugfs_remove(sdp_prof_file);
#include <linux/interrupt.h>
#include <linux/dma-mapping.h>
#include <linux/rcupdate.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
#include <rdma/ib_verbs.h>
#include <rdma/rdma_cm.h>
#include "sdp.h"
pages_alloced++;
}
frag = &skb_shinfo(skb)->frags[i];
- frag->page = page;
+ frag->page.p = page;
frag->page_offset = 0;
frag->size = min(PAGE_SIZE, SDP_MAX_PAYLOAD);
++skb_shinfo(skb)->nr_frags;
}
skb->truesize += ssk->recv_frags * min(PAGE_SIZE, SDP_MAX_PAYLOAD);
- if (!sk_rmem_schedule(sk, ssk->recv_frags * min(PAGE_SIZE, SDP_MAX_PAYLOAD))) {
+ if (!sk_rmem_schedule(sk, skb, ssk->recv_frags * min(PAGE_SIZE,
+ SDP_MAX_PAYLOAD))) {
sdp_dbg(sk, "RX couldn't post, rx posted = %d.",
rx_ring_posted(sdp_sk(sk)));
sdp_dbg(sk, "Out of memory\n");
if (rx_req->mapping[i + 1]) {
addr = rx_req->mapping[i + 1];
} else {
- addr = ib_dma_map_page(dev, skb_shinfo(skb)->frags[i].page,
- skb_shinfo(skb)->frags[i].page_offset,
- skb_shinfo(skb)->frags[i].size,
- DMA_FROM_DEVICE);
+ addr = ib_dma_map_page(dev,
+ skb_shinfo(skb)->frags[i].page.p,
+ skb_shinfo(skb)->frags[i].page_offset,
+ skb_shinfo(skb)->frags[i].size,
+ DMA_FROM_DEVICE);
BUG_ON(ib_dma_mapping_error(dev, addr));
rx_req->mapping[i + 1] = addr;
}
skb_queue_tail(&sk->sk_receive_queue, skb);
if (!sock_flag(sk, SOCK_DEAD))
- sk->sk_data_ready(sk, skb_len);
+ sk->sk_data_ready(sk);
return skb;
}
skb_shinfo(skb)->nr_frags = pagesz / PAGE_SIZE;
for (i = skb_shinfo(skb)->nr_frags; i < frags; ++i) {
- put_page(skb_shinfo(skb)->frags[i].page);
+ put_page(skb_shinfo(skb)->frags[i].page.p);
}
if (unlikely(h->flags & SDP_OOB_PEND))
sdp_arm_rx_cq(sk_ssk(ssk));
}
+/* SHAMIR - TODOL; remove this!!! */
+#define IB_CQ_VECTOR_LEAST_ATTACHED 0
+
int sdp_rx_ring_create(struct sdp_sock *ssk, struct ib_device *device)
{
struct ib_cq *rx_cq;
*/
#include <linux/interrupt.h>
#include <linux/dma-mapping.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
#include <rdma/ib_verbs.h>
#include <rdma/rdma_cm.h>
#include "sdp.h"
#define sdp_cnt(var) do { (var)++; } while (0)
SDP_MODPARAM_SINT(sdp_keepalive_probes_sent, 0,
- "Total number of keepalive probes sent.");
+ "Total number of keepalive probes sent.");
static int sdp_process_tx_cq(struct sdp_sock *ssk);
frags = skb_shinfo(skb)->nr_frags;
for (i = 0; i < frags; ++i) {
++sge;
- addr = ib_dma_map_page(dev, skb_shinfo(skb)->frags[i].page,
+ addr = ib_dma_map_page(dev,
+ skb_shinfo(skb)->frags[i].page.p,
skb_shinfo(skb)->frags[i].page_offset,
skb_shinfo(skb)->frags[i].size,
DMA_TO_DEVICE);
{
}
+/* SHAMIR - TODO: remove this!!! */
+#define IB_CQ_VECTOR_LEAST_ATTACHED 0
+
int sdp_tx_ring_create(struct sdp_sock *ssk, struct ib_device *device)
{
struct ib_cq *tx_cq;
int payload_len;
struct page *payload_pg;
int off, len;
- struct ib_umem_chunk *chunk;
if (ssk->tx_sa) {
/* ssk->tx_sa might already be there in a case of
BUG_ON(!tx_sa);
BUG_ON(!tx_sa->fmr || !tx_sa->fmr->fmr->lkey);
BUG_ON(!tx_sa->umem);
- BUG_ON(!tx_sa->umem->chunk_list.next);
+ BUG_ON(!tx_sa->umem->sg_head.sgl);
- chunk = list_entry(tx_sa->umem->chunk_list.next, struct ib_umem_chunk, list);
- BUG_ON(!chunk->nmap);
-
- off = tx_sa->umem->offset;
+ off = ib_umem_offset(tx_sa->umem);
len = tx_sa->umem->length;
tx_sa->bytes_sent = tx_sa->bytes_acked = 0;
/* must have payload inlined in SrcAvail packet in combined mode */
payload_len = MIN(tx_sa->umem->page_size - off, len);
payload_len = MIN(payload_len, ssk->xmit_size_goal - sizeof(struct sdp_srcah));
- payload_pg = sg_page(&chunk->page_list[0]);
+ payload_pg = sg_page(tx_sa->umem->sg_head.sgl);
get_page(payload_pg);
sdp_dbg_data(sk, "payload: off: 0x%x, pg: %p, len: 0x%x\n",
return 0;
}
-static int sdp_update_iov_used(struct sock *sk, struct iovec *iov, int len)
+static int sdp_update_iov_used(struct sock *sk, struct iov_iter *msg_iter,
+ int len)
{
sdp_dbg_data(sk, "updating consumed 0x%x bytes from iov\n", len);
- while (len > 0) {
- if (iov->iov_len) {
- int copy = min_t(unsigned int, iov->iov_len, len);
- len -= copy;
- iov->iov_len -= copy;
- iov->iov_base += copy;
- }
- iov++;
- }
+
+ iov_iter_advance(msg_iter, len);
return 0;
}
struct ib_umem *umem;
struct ib_device *dev = sdp_sk(sk)->ib_device;
u64 *pages;
- struct ib_umem_chunk *chunk;
- int n = 0, j, k;
+ struct scatterlist *sg;
+ int n = 0, k, i;
int rc = 0;
unsigned long max_lockable_bytes;
goto err_umem_get;
}
- sdp_dbg_data(sk, "umem->offset = 0x%x, length = 0x%zx\n",
- umem->offset, umem->length);
+ sdp_dbg_data(sk, "ib_umem_ofset(umem) = 0x%x, length = 0x%zx\n",
+ ib_umem_offset(umem), umem->length);
pages = (u64 *) __get_free_page(GFP_KERNEL);
if (!pages) {
goto err_pages_alloc;
}
- list_for_each_entry(chunk, &umem->chunk_list, list) {
- for (j = 0; j < chunk->nmap; ++j) {
- unsigned len2;
- len2 = ib_sg_dma_len(dev,
- &chunk->page_list[j]) >> PAGE_SHIFT;
+ for_each_sg(umem->sg_head.sgl, sg, umem->npages, i) {
+ unsigned len2;
- SDP_WARN_ON(len2 > len);
- len -= len2;
+ len2 = ib_sg_dma_len(dev, sg) >> PAGE_SHIFT;
- for (k = 0; k < len2; ++k) {
- pages[n++] = ib_sg_dma_address(dev,
- &chunk->page_list[j]) +
- umem->page_size * k;
- BUG_ON(n >= SDP_FMR_SIZE);
- }
+ SDP_WARN_ON(len2 > len);
+ len -= len2;
+ for (k = 0; k < len2; ++k) {
+ pages[n++] = ib_sg_dma_address(dev, sg) +
+ umem->page_size * k;
+ BUG_ON(n >= SDP_FMR_SIZE);
}
}
- fmr = ib_fmr_pool_map_phys(sdp_sk(sk)->sdp_dev->fmr_pool, pages, n, 0);
+ fmr = ib_fmr_pool_map_phys(sdp_sk(sk)->sdp_dev->fmr_pool, pages, n, 0,
+ NULL);
if (IS_ERR(fmr)) {
sdp_dbg_data(sk, "Error allocating fmr: %ld\n", PTR_ERR(fmr));
SDPSTATS_COUNTER_INC(fmr_alloc_error);
ssk->tx_ring.rdma_inflight = rx_sa;
- sge.addr = rx_sa->umem->offset;
+ sge.addr = ib_umem_offset(rx_sa->umem);
sge.length = rx_sa->umem->length;
sge.lkey = rx_sa->fmr->fmr->lkey;
return rc;
}
-int sdp_rdma_to_iovec(struct sock *sk, struct iovec *iov, int msg_iovlen,
- struct sk_buff *skb, unsigned long *used, u32 offset)
+int sdp_rdma_to_iter(struct sock *sk, struct iov_iter *msg_iter,
+ struct sk_buff *skb, unsigned long *used, u32 offset)
{
struct sdp_sock *ssk = sdp_sk(sk);
struct rx_srcavail_state *rx_sa = RX_SRCAVAIL_STATE(skb);
int rc = 0;
+ const struct iovec *iov = msg_iter->iov;
+ int msg_iovlen = msg_iter->nr_segs;
int len = *used;
int copied;
int i = 0;
copied = rx_sa->umem->length;
- sdp_update_iov_used(sk, iov, copied);
+ sdp_update_iov_used(sk, msg_iter, copied);
atomic_add(copied, &ssk->rcv_nxt);
*used = copied;
rx_sa->copied += copied;
return ret;
}
-static int do_sdp_sendmsg_zcopy(struct sock *sk, struct tx_srcavail_state *tx_sa,
- struct iovec *iov, long *timeo)
+static int do_sdp_sendmsg_zcopy(struct sock *sk,
+ struct tx_srcavail_state *tx_sa,
+ struct iov_iter *msg_iter, int iov_idx,
+ long *timeo)
{
struct sdp_sock *ssk = sdp_sk(sk);
int rc = 0;
unsigned long lock_flags;
+ const struct iovec *iov = &msg_iter->iov[iov_idx];
rc = sdp_alloc_fmr(sk, iov->iov_base, iov->iov_len,
&tx_sa->fmr, &tx_sa->umem, IB_ACCESS_REMOTE_READ, sdp_zcopy_thresh);
spin_unlock_irqrestore(&ssk->tx_sa_lock, lock_flags);
err_abort_send:
- sdp_update_iov_used(sk, iov, tx_sa->bytes_acked);
+ sdp_update_iov_used(sk, msg_iter, tx_sa->bytes_acked);
err_no_tx_slots:
sdp_free_fmr(sk, &tx_sa->fmr, &tx_sa->umem);
return rc;
}
-int sdp_sendmsg_zcopy(struct kiocb *iocb, struct sock *sk, struct iovec *iov)
+int sdp_sendmsg_zcopy(struct sock *sk, struct iov_iter *msg_iter, int iov_idx)
{
struct sdp_sock *ssk = sdp_sk(sk);
int rc = 0;
long timeo = SDP_SRCAVAIL_ADV_TIMEOUT;
struct tx_srcavail_state *tx_sa;
+ const struct iovec *iov = &msg_iter->iov[iov_idx];
size_t bytes_to_copy = iov->iov_len;
int copied = 0;
do {
tx_sa_reset(tx_sa);
- rc = do_sdp_sendmsg_zcopy(sk, tx_sa, iov, &timeo);
+ rc = do_sdp_sendmsg_zcopy(sk, tx_sa, msg_iter, iov_idx, &timeo);
if (iov->iov_len && iov->iov_len < sdp_zcopy_thresh) {
sdp_dbg_data(sk, "0x%zx bytes left, switching to bcopy\n",