From: Qing Huang Date: Fri, 25 Sep 2015 22:45:36 +0000 (-0700) Subject: ib_sdp: porting sdp from uek2 to uek-4.1 X-Git-Tag: v4.1.12-92~264^2~5^2 X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=8554bc7dbdb84e424920dab4562791535489a7ff;p=users%2Fjedix%2Flinux-maple.git ib_sdp: porting sdp from uek2 to uek-4.1 Perf result: [root@ca-ibdev10 src]# ./iperf -c 192.168.220.117 -P 8 -p 6001 -l 4k -t 300 -w 16m ------------------------------------------------------------ Client connecting to 192.168.220.117, TCP port 6001 TCP window size: 2.10 MByte (WARNING: requested 16.0 MByte) ------------------------------------------------------------ [ 7] local 192.168.220.110 port 28275 connected with 192.168.220.117 port 6001 [ 9] local 192.168.220.110 port 16839 connected with 192.168.220.117 port 6001 [ 4] local 192.168.220.110 port 13889 connected with 192.168.220.117 port 6001 [ 12] local 192.168.220.110 port 22535 connected with 192.168.220.117 port 6001 [ 13] local 192.168.220.110 port 33341 connected with 192.168.220.117 port 6001 [ 6] local 192.168.220.110 port 9645 connected with 192.168.220.117 port 6001 [ 16] local 192.168.220.110 port 54091 connected with 192.168.220.117 port 6001 [ 19] local 192.168.220.110 port 22246 connected with 192.168.220.117 port 6001 [ ID] Interval Transfer Bandwidth [ 7] 0.0-300.0 sec 126 GBytes 3.59 Gbits/sec [ 9] 0.0-300.0 sec 126 GBytes 3.60 Gbits/sec [ 4] 0.0-300.0 sec 126 GBytes 3.60 Gbits/sec [ 12] 0.0-300.0 sec 126 GBytes 3.59 Gbits/sec [ 13] 0.0-300.0 sec 126 GBytes 3.60 Gbits/sec [ 6] 0.0-300.0 sec 126 GBytes 3.60 Gbits/sec [ 16] 0.0-300.0 sec 126 GBytes 3.60 Gbits/sec [ 19] 0.0-300.0 sec 126 GBytes 3.59 Gbits/sec [SUM] 0.0-300.0 sec 1004 GBytes 28.8 Gbits/sec Signed-off-by: Qing Huang --- diff --git a/drivers/infiniband/ulp/sdp/sdp.h b/drivers/infiniband/ulp/sdp/sdp.h index 0017384d71af..17f817de98cd 100644 --- a/drivers/infiniband/ulp/sdp/sdp.h +++ b/drivers/infiniband/ulp/sdp/sdp.h @@ -42,16 +42,19 @@ #endif #endif -#define inet_num(sk) inet_sk(sk)->inet_num -#define inet_sport(sk) inet_sk(sk)->inet_sport -#define inet_dport(sk) inet_sk(sk)->inet_dport -#define inet_saddr(sk) inet_sk(sk)->inet_saddr -#define sdp_inet_daddr(sk) inet_sk(sk)->inet_daddr -#define sdp_inet_rcv_saddr(sk) inet_sk(sk)->inet_rcv_saddr +#define sdp_inet_num(sk) (inet_sk(sk)->inet_num) +#define sdp_inet_sport(sk) (inet_sk(sk)->inet_sport) +#define sdp_inet_dport(sk) (inet_sk(sk)->inet_dport) +#define sdp_inet_saddr(sk) (inet_sk(sk)->inet_saddr) +#define sdp_inet_daddr(sk) (inet_sk(sk)->inet_daddr) +#define sdp_inet_rcv_saddr(sk) (inet_sk(sk)->inet_rcv_saddr) #define sdp_sk_sleep(sk) sk_sleep(sk) #define sk_ssk(ssk) ((struct sock *)ssk) +#define TCP_PAGE(sk) (sk->sk_frag.page) +#define TCP_OFF(sk) (sk->sk_frag.offset) + /* Interval between sucessive polls in the Tx routine when polling is used instead of interrupts (in per-core Tx rings) - should be power of 2 */ #define SDP_TX_POLL_MODER 16 @@ -168,7 +171,7 @@ extern struct workqueue_struct *rx_comp_wq; extern spinlock_t sdp_large_sockets_lock; extern struct ib_client sdp_client; #ifdef SDPSTATS_ON -DECLARE_PER_CPU(struct sdpstats, sdpstats); +DECLARE_PER_CPU(struct sdpstats_t, sdpstats); #endif enum sdp_mid { @@ -628,8 +631,10 @@ static inline struct sk_buff *sdp_stream_alloc_skb(struct sock *sk, int size, skb = alloc_skb_fclone(size + sk->sk_prot->max_header, gfp); if (skb) { - if ((kind == SK_MEM_RECV && sk_rmem_schedule(sk, skb->truesize)) || - (kind == SK_MEM_SEND && sk_wmem_schedule(sk, skb->truesize))) { + if ((kind == SK_MEM_RECV && + sk_rmem_schedule(sk, skb, skb->truesize)) || + (kind == SK_MEM_SEND && + sk_wmem_schedule(sk, skb->truesize))) { /* * Make sure that we have exactly size bytes * available to the caller, no more, no less. @@ -772,7 +777,7 @@ static inline int credit_update_needed(struct sdp_sock *ssk) #ifdef SDPSTATS_ON #define SDPSTATS_MAX_HIST_SIZE 256 -struct sdpstats { +struct sdpstats_t { u64 rx_bytes; u64 tx_bytes; u32 post_send[256]; @@ -830,15 +835,16 @@ static inline void sdpstats_hist(u32 *h, u32 val, u32 maxidx, int is_log) h[idx]++; } -#define SDPSTATS_COUNTER_INC(stat) do { __get_cpu_var(sdpstats).stat++; } while (0) -#define SDPSTATS_COUNTER_ADD(stat, val) do { __get_cpu_var(sdpstats).stat += val; } while (0) -#define SDPSTATS_COUNTER_MID_INC(stat, mid) do { __get_cpu_var(sdpstats).stat[mid]++; } \ - while (0) +#define SDPSTATS_COUNTER_INC(stat) this_cpu_inc(sdpstats.stat) +#define SDPSTATS_COUNTER_ADD(stat, val) this_cpu_add(sdpstats.stat, val) +#define SDPSTATS_COUNTER_MID_INC(stat, mid) this_cpu_inc(sdpstats.stat[mid]) #define SDPSTATS_HIST(stat, size) \ - sdpstats_hist(__get_cpu_var(sdpstats).stat, size, ARRAY_SIZE(__get_cpu_var(sdpstats).stat) - 1, 1) + sdpstats_hist(this_cpu_ptr(&sdpstats)->stat, size, \ + ARRAY_SIZE(sdpstats.stat) - 1, 1) #define SDPSTATS_HIST_LINEAR(stat, size) \ - sdpstats_hist(__get_cpu_var(sdpstats).stat, size, ARRAY_SIZE(__get_cpu_var(sdpstats).stat) - 1, 0) + sdpstats_hist(this_cpu_ptr(&sdpstats)->stat, size, \ + ARRAY_SIZE(sdpstats.stat) - 1, 0) #else #define SDPSTATS_COUNTER_INC(stat) @@ -980,14 +986,14 @@ void sdp_handle_disconn(struct sock *sk); int sdp_poll_rx_cq(struct sdp_sock *ssk); /* sdp_zcopy.c */ -int sdp_sendmsg_zcopy(struct kiocb *iocb, struct sock *sk, struct iovec *iov); +int sdp_sendmsg_zcopy(struct sock *sk, struct iov_iter *msg_iter, int iov_idx); int sdp_handle_srcavail(struct sdp_sock *ssk, struct sdp_srcah *srcah); void sdp_handle_sendsm(struct sdp_sock *ssk, u32 mseq_ack); void sdp_handle_rdma_read_compl(struct sdp_sock *ssk, u32 mseq_ack, u32 bytes_completed); int sdp_handle_rdma_read_cqe(struct sdp_sock *ssk); -int sdp_rdma_to_iovec(struct sock *sk, struct iovec *iov, int msg_iovlen, - struct sk_buff *skb, unsigned long *used, u32 offset); +int sdp_rdma_to_iter(struct sock *sk, struct iov_iter *msg_iter, + struct sk_buff *skb, unsigned long *used, u32 offset); int sdp_post_rdma_rd_compl(struct sock *sk, struct rx_srcavail_state *rx_sa); int sdp_post_sendsm(struct sock *sk); diff --git a/drivers/infiniband/ulp/sdp/sdp_bcopy.c b/drivers/infiniband/ulp/sdp/sdp_bcopy.c index 508ac8490887..77ea0f22a1c7 100644 --- a/drivers/infiniband/ulp/sdp/sdp_bcopy.c +++ b/drivers/infiniband/ulp/sdp/sdp_bcopy.c @@ -107,10 +107,10 @@ static inline void update_send_head(struct sock *sk, struct sk_buff *skb) sk->sk_send_head = skb->next; if (sk->sk_send_head == (struct sk_buff *)&sk->sk_write_queue) { sk->sk_send_head = NULL; - page = sk->sk_sndmsg_page; + page = TCP_PAGE(sk); if (page) { put_page(page); - sk->sk_sndmsg_page = NULL; + TCP_PAGE(sk) = NULL; } } } diff --git a/drivers/infiniband/ulp/sdp/sdp_cma.c b/drivers/infiniband/ulp/sdp/sdp_cma.c index fa7a71218d69..40ac58000d03 100644 --- a/drivers/infiniband/ulp/sdp/sdp_cma.c +++ b/drivers/infiniband/ulp/sdp/sdp_cma.c @@ -52,6 +52,8 @@ #define SDP_MAJV_MINV 0x22 +#define ipv6_addr_copy(a, b) (*(a) = *(b)) + SDP_MODPARAM_INT(sdp_rx_size, 0x40, "HW rx queue size (max num of credits)." " Must be power of 2."); @@ -185,14 +187,14 @@ static int sdp_connect_handler(struct sock *sk, struct rdma_cm_id *id, if (!h->max_adverts) return -EINVAL; - child = sk_clone(sk, GFP_KERNEL); + child = sk_clone_lock(sk, GFP_KERNEL); if (!child) return -ENOMEM; sdp_init_sock(child); dst_addr = (struct sockaddr_in *)&id->route.addr.dst_addr; - inet_dport(child) = dst_addr->sin_port; + sdp_inet_dport(child) = dst_addr->sin_port; #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) if (inet6_sk(sk)) { @@ -205,26 +207,29 @@ static int sdp_connect_handler(struct sock *sk, struct rdma_cm_id *id, if ((h->ipv_cap & HH_IPV_MASK) == HH_IPV4) { /* V6 mapped */ sdp_inet_daddr(child) = dst_addr->sin_addr.s_addr; - ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF), - h->src_addr.ip4.addr); + ipv6_addr_set(&child->sk_v6_daddr, 0, 0, + htonl(0x0000FFFF), h->src_addr.ip4.addr); ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF), h->dst_addr.ip4.addr); - ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr); + ipv6_addr_copy(&child->sk_v6_rcv_saddr, &newnp->saddr); } else if ((h->ipv_cap & HH_IPV_MASK) == HH_IPV6) { struct sockaddr_in6 *dst_addr6 = (struct sockaddr_in6 *)dst_addr; struct sockaddr_in6 *src_addr6 = (struct sockaddr_in6 *)&id->route.addr.src_addr; - ipv6_addr_copy(&newnp->daddr, &dst_addr6->sin6_addr); + ipv6_addr_copy(&child->sk_v6_daddr, + &dst_addr6->sin6_addr); + ipv6_addr_copy(&child->sk_v6_rcv_saddr, + &src_addr6->sin6_addr); ipv6_addr_copy(&newnp->saddr, &src_addr6->sin6_addr); - ipv6_addr_copy(&newnp->rcv_saddr, &src_addr6->sin6_addr); } else { sdp_warn(child, "Bad IPV field: 0x%x\n", h->ipv_cap & HH_IPV_MASK); } - sdp_inet_daddr(child) = inet_saddr(child) = sdp_inet_rcv_saddr(child) = LOOPBACK4_IPV6; + sdp_inet_daddr(child) = sdp_inet_saddr(child) = + sdp_inet_rcv_saddr(child) = LOOPBACK4_IPV6; } else #endif { @@ -275,7 +280,7 @@ static int sdp_connect_handler(struct sock *sk, struct rdma_cm_id *id, /* child->sk_write_space(child); */ /* child->sk_data_ready(child, 0); */ - sk->sk_data_ready(sk, 0); + sk->sk_data_ready(sk); return 0; } @@ -313,7 +318,7 @@ static int sdp_response_handler(struct sock *sk, struct rdma_cm_id *id, sk_wake_async(sk, 0, POLL_OUT); dst_addr = (struct sockaddr_in *)&id->route.addr.dst_addr; - inet_dport(sk) = dst_addr->sin_port; + sdp_inet_dport(sk) = dst_addr->sin_port; sdp_inet_daddr(sk) = dst_addr->sin_addr.s_addr; #ifdef SDP_SOCK_HISTORY @@ -423,11 +428,12 @@ int sdp_cma_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) if (sdp_link_layer_ib_only && rdma_node_get_transport(id->device->node_type) == RDMA_TRANSPORT_IB && - rdma_port_link_layer(id->device, id->port_num) != + rdma_port_get_link_layer(id->device, id->port_num) != IB_LINK_LAYER_INFINIBAND) { sdp_dbg(sk, "Link layer is: %d. Only IB link layer " "is allowed\n", - rdma_port_link_layer(id->device, id->port_num)); + rdma_port_get_link_layer(id->device, + id->port_num)); rc = -ENETUNREACH; break; } @@ -461,17 +467,18 @@ int sdp_cma_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) if (src_addr->sa_family == AF_INET) { /* IPv4 over IPv6 */ - ipv6_addr_set(&inet6_sk(sk)->rcv_saddr, 0, 0, htonl(0xFFFF), - addr4->sin_addr.s_addr); + ipv6_addr_set(&sk->sk_v6_rcv_saddr, 0, 0, + htonl(0xFFFF), + addr4->sin_addr.s_addr); } else { - inet6_sk(sk)->rcv_saddr = addr6->sin6_addr; + sk->sk_v6_rcv_saddr = addr6->sin6_addr; } - inet6_sk(sk)->saddr = inet6_sk(sk)->rcv_saddr; + inet6_sk(sk)->saddr = sk->sk_v6_rcv_saddr; } else #endif { - inet_saddr(sk) = sdp_inet_rcv_saddr(sk) = + sdp_inet_saddr(sk) = sdp_inet_rcv_saddr(sk) = ((struct sockaddr_in *)&id->route.addr.src_addr)->sin_addr.s_addr; } memset(&conn_param, 0, sizeof conn_param); @@ -578,7 +585,7 @@ int sdp_cma_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) rc = -ECONNREFUSED; break; case RDMA_CM_EVENT_ESTABLISHED: - inet_saddr(sk) = sdp_inet_rcv_saddr(sk) = + sdp_inet_saddr(sk) = sdp_inet_rcv_saddr(sk) = ((struct sockaddr_in *)&id->route.addr.src_addr)->sin_addr.s_addr; rc = sdp_connected_handler(sk); break; diff --git a/drivers/infiniband/ulp/sdp/sdp_dbg.h b/drivers/infiniband/ulp/sdp/sdp_dbg.h index aa8c38b63669..5824e4fb1f4d 100644 --- a/drivers/infiniband/ulp/sdp/sdp_dbg.h +++ b/drivers/infiniband/ulp/sdp/sdp_dbg.h @@ -21,8 +21,8 @@ static inline struct sdp_sock *sdp_sk(const struct sock *sk); printk(level "%s:%d sdp_sock(%5d:%d %d:%d): " format, \ func, line, \ current->pid, smp_processor_id(), \ - (sk) ? inet_num(sk) : -1, \ - (sk) ? ntohs(inet_dport(sk)) : -1, ## arg); \ + (sk) ? sdp_inet_num(sk) : -1, \ + (sk) ? ntohs(sdp_inet_dport(sk)) : -1, ## arg); \ preempt_enable(); \ } while (0) #define sdp_printk(level, sk, format, arg...) \ @@ -49,8 +49,8 @@ struct sdpprf_log { int idx; int pid; int cpu; - int sk_num; - int sk_dport; + int sdpprf_sk_num; + int sdpprf_sk_dport; struct sk_buff *skb; char msg[256]; @@ -72,8 +72,8 @@ extern atomic_t sdpprf_log_count; preempt_disable(); \ l->idx = idx; \ l->pid = current->pid; \ - l->sk_num = (sk) ? inet_num(sk) : -1; \ - l->sk_dport = (sk) ? ntohs(inet_dport(sk)) : -1; \ + l->sdpprf_sk_num = (sk) ? sdp_inet_num(sk) : -1; \ + l->sdpprf_sk_dport = (sk) ? ntohs(sdp_inet_dport(sk)) : -1; \ l->cpu = smp_processor_id(); \ l->skb = s; \ snprintf(l->msg, sizeof(l->msg) - 1, format, ## arg); \ diff --git a/drivers/infiniband/ulp/sdp/sdp_main.c b/drivers/infiniband/ulp/sdp/sdp_main.c index 1067f21ced78..be3b93f19521 100644 --- a/drivers/infiniband/ulp/sdp/sdp_main.c +++ b/drivers/infiniband/ulp/sdp/sdp_main.c @@ -75,11 +75,14 @@ unsigned int csum_partial_copy_from_user_new (const char *src, char *dst, #include #include "sdp.h" #include +#include MODULE_AUTHOR("Michael S. Tsirkin"); MODULE_DESCRIPTION("InfiniBand SDP module"); MODULE_LICENSE("Dual BSD/GPL"); +#define ipv6_addr_copy(a, b) (*(a) = *(b)) + #ifdef CONFIG_INFINIBAND_SDP_DEBUG SDP_MODPARAM_INT(sdp_debug_level, 0, "Enable debug tracing if > 0."); #endif @@ -88,7 +91,7 @@ SDP_MODPARAM_INT(sdp_data_debug_level, 0, "Enable data path debug tracing if > 0."); #endif -SDP_MODPARAM_INT(sdp_apm_enable, 1, "Enable APM."); +SDP_MODPARAM_INT(sdp_apm_enable, 0, "Enable APM."); SDP_MODPARAM_SINT(sdp_fmr_pool_size, 20, "Number of FMRs to allocate for pool"); SDP_MODPARAM_SINT(sdp_fmr_dirty_wm, 5, "Watermark to flush fmr pool"); @@ -160,24 +163,26 @@ static int sdp_get_port(struct sock *sk, unsigned short snum) sdp_add_to_history(sk, __func__); if (!ssk->id) - ssk->id = rdma_create_id(sdp_cma_handler, sk, RDMA_PS_SDP); + ssk->id = rdma_create_id(sdp_cma_handler, sk, RDMA_PS_SDP, + IB_QPT_RC); if (!ssk->id) return -ENOMEM; #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) if (inet6_sk(sk)) { - int addr_type = ipv6_addr_type(&inet6_sk(sk)->rcv_saddr); + int addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr); if (addr_type == IPV6_ADDR_MAPPED) { addr4->sin_family = AF_INET; addr4->sin_port = htons(snum); - addr4->sin_addr.s_addr = inet6_sk(sk)->rcv_saddr.s6_addr32[3]; + addr4->sin_addr.s_addr = + sk->sk_v6_rcv_saddr.s6_addr32[3]; addr_len = sizeof(*addr4); } else { addr6->sin6_family = AF_INET6; addr6->sin6_port = htons(snum); addr6->sin6_scope_id = sk->sk_bound_dev_if; - ipv6_addr_copy(&addr6->sin6_addr, &inet6_sk(sk)->rcv_saddr); + ipv6_addr_copy(&addr6->sin6_addr, &sk->sk_v6_rcv_saddr); addr_len = sizeof(*addr6); } } @@ -209,7 +214,7 @@ static int sdp_get_port(struct sock *sk, unsigned short snum) } src_addr = (struct sockaddr_in *)&(ssk->id->route.addr.src_addr); - inet_num(sk) = ntohs(src_addr->sin_port); + sdp_inet_num(sk) = ntohs(src_addr->sin_port); #ifdef SDP_SOCK_HISTORY sdp_ssk_hist_rename(sk); #endif @@ -511,13 +516,6 @@ static void sdp_destroy_resources(struct sock *sk) sk->sk_send_head = NULL; skb_queue_purge(&sk->sk_write_queue); - /* - * If sendmsg cached page exists, toss it. - */ - if (sk->sk_sndmsg_page) { - __free_page(sk->sk_sndmsg_page); - sk->sk_sndmsg_page = NULL; - } id = ssk->id; if (ssk->id) { @@ -828,10 +826,10 @@ static int sdp_ipv6_connect(struct sock *sk, struct sockaddr_storage *saddr, sk->sk_bound_dev_if = usin->sin6_scope_id; src_addr->sin6_family = AF_INET6; - src_addr->sin6_port = htons(inet_sport(sk)); + src_addr->sin6_port = htons(sdp_inet_sport(sk)); src_addr->sin6_addr = inet6_sk(sk)->saddr; - if (ssk->id && (addr_type != ipv6_addr_type(&inet6_sk(sk)->rcv_saddr))) { + if (ssk->id && (addr_type != ipv6_addr_type(&sk->sk_v6_rcv_saddr))) { sdp_dbg(sk, "Existing address type is different for the " "requested. rebinding socket\n"); rdma_destroy_id(ssk->id); @@ -841,15 +839,16 @@ static int sdp_ipv6_connect(struct sock *sk, struct sockaddr_storage *saddr, if (!ssk->id) { /* If IPv4 over IPv6, make sure rdma_bind will expect ipv4 address */ if (addr_type == IPV6_ADDR_MAPPED) - ipv6_addr_set(&inet6_sk(sk)->rcv_saddr, 0, 0, htonl(0x0000FFFF), 0); + ipv6_addr_set(&sk->sk_v6_rcv_saddr, 0, 0, + htonl(0x0000FFFF), 0); - rc = sdp_get_port(sk, htons(inet_sport(sk))); + rc = sdp_get_port(sk, htons(sdp_inet_sport(sk))); if (rc) return rc; - inet_sport(sk) = htons(inet_num(sk)); + sdp_inet_sport(sk) = htons(sdp_inet_num(sk)); } - ipv6_addr_copy(&inet6_sk(sk)->daddr, &usin->sin6_addr); + ipv6_addr_copy(&sk->sk_v6_daddr, &usin->sin6_addr); if (addr_type == IPV6_ADDR_MAPPED) { struct sockaddr_in *addr4 = (struct sockaddr_in *)uaddr; @@ -889,15 +888,15 @@ static int sdp_ipv4_connect(struct sock *sk, struct sockaddr_storage *saddr, return -EAFNOSUPPORT; if (!ssk->id) { - rc = sdp_get_port(sk, htons(inet_num(sk))); + rc = sdp_get_port(sk, htons(sdp_inet_num(sk))); if (rc) return rc; - inet_sport(sk) = htons(inet_num(sk)); + sdp_inet_sport(sk) = htons(sdp_inet_num(sk)); } src_addr->sin_family = AF_INET; - src_addr->sin_port = htons(inet_sport(sk)); - src_addr->sin_addr.s_addr = inet_saddr(sk); + src_addr->sin_port = htons(sdp_inet_sport(sk)); + src_addr->sin_addr.s_addr = sdp_inet_saddr(sk); sdp_dbg(sk, "%s " NIPQUAD_FMT ":%hu -> " NIPQUAD_FMT ":%hu\n", __func__, NIPQUAD(src_addr->sin_addr.s_addr), @@ -1277,7 +1276,7 @@ int sdp_init_sock(struct sock *sk) lockdep_set_class(&sk->sk_callback_lock, &ib_sdp_sk_callback_lock_key); - sk->sk_route_caps |= NETIF_F_SG | NETIF_F_NO_CSUM; + sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM; skb_queue_head_init(&ssk->rx_ctl_q); @@ -1600,7 +1599,7 @@ static int sdp_recv_urg(struct sock *sk, long timeo, if (len > 0) { if (!(flags & MSG_TRUNC)) - err = memcpy_toiovec(msg->msg_iov, &c, 1); + err = memcpy_to_msg(msg, &c, 1); len = 1; } else msg->msg_flags |= MSG_TRUNC; @@ -1646,10 +1645,8 @@ void sdp_skb_entail(struct sock *sk, struct sk_buff *skb) sdp_sk(sk)->nonagle &= ~TCP_NAGLE_PUSH; } -#define TCP_PAGE(sk) (sk->sk_sndmsg_page) -#define TCP_OFF(sk) (sk->sk_sndmsg_off) static inline int sdp_bcopy_get(struct sock *sk, struct sk_buff *skb, - char __user *from, int copy) + struct iov_iter *from, int copy) { int err; struct sdp_sock *ssk = sdp_sk(sk); @@ -1659,7 +1656,7 @@ static inline int sdp_bcopy_get(struct sock *sk, struct sk_buff *skb, /* We have some space in skb head. Superb! */ if (copy > skb_tailroom(skb)) copy = skb_tailroom(skb); - if ((err = skb_add_data(skb, from, copy)) != 0) + if (skb_add_data_nocache(sk, skb, from, copy)) return SDP_ERR_FAULT; } else { /* Put data in skb->frags */ @@ -1667,6 +1664,10 @@ static inline int sdp_bcopy_get(struct sock *sk, struct sk_buff *skb, int i = skb_shinfo(skb)->nr_frags; struct page *page = TCP_PAGE(sk); int off = TCP_OFF(sk); + struct page_frag *pfrag = &sk->sk_frag; + + if (!sk_page_frag_refill(sk, pfrag)) + return SDP_DO_WAIT_MEM; if (skb_can_coalesce(skb, i, page, off) && off != PAGE_SIZE) { @@ -1696,7 +1697,10 @@ static inline int sdp_bcopy_get(struct sock *sk, struct sk_buff *skb, if (!page) { /* Allocate new cache page. */ - page = sk_stream_alloc_page(sk); + pfrag = &sk->sk_frag; + page = pfrag->page; + if (!sk_page_frag_refill(sk, pfrag)) + return SDP_DO_WAIT_MEM; if (!page) return SDP_DO_WAIT_MEM; } @@ -1704,16 +1708,8 @@ static inline int sdp_bcopy_get(struct sock *sk, struct sk_buff *skb, /* Time to copy data. We are close to * the end! */ SDPSTATS_COUNTER_ADD(memcpy_count, copy); - err = skb_copy_to_page(sk, from, skb, page, - off, copy); + err = skb_copy_to_page_nocache(sk, from, skb, page, off, copy); if (err) { - /* If this page was new, give it to the - * socket so it does not get leaked. - */ - if (!TCP_PAGE(sk)) { - TCP_PAGE(sk) = page; - TCP_OFF(sk) = 0; - } return SDP_ERR_ERROR; } @@ -1750,7 +1746,7 @@ int sdp_tx_wait_memory(struct sdp_sock *ssk, long *timeo_p, int *credits_needed) DEFINE_WAIT(wait); if (sk_stream_memory_free(sk)) - current_timeo = vm_wait = (net_random() % (HZ / 5)) + 2; + current_timeo = vm_wait = (prandom_u32() % (HZ / 5)) + 2; while (1) { set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); @@ -1839,8 +1835,7 @@ do_interrupted: /* Like tcp_sendmsg */ /* TODO: check locking */ -static int sdp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t size) +static int sdp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) { int i; struct sdp_sock *ssk = sdp_sk(sk); @@ -1848,6 +1843,7 @@ static int sdp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, int flags; const int size_goal = MIN(ssk->xmit_size_goal, SDP_MAX_PAYLOAD); int err, copied; + int iov_num = msg->msg_iter.nr_segs; long timeo; int zcopy_thresh = -1 != ssk->zcopy_thresh ? ssk->zcopy_thresh : sdp_zcopy_thresh; @@ -1879,12 +1875,13 @@ static int sdp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) goto do_error; - for (i = 0; i < msg->msg_iovlen; i++) { - struct iovec *iov = &msg->msg_iov[i]; + for (i = 0; i < iov_num; i++) { + const struct iovec *iov = &msg->msg_iter.iov[i]; int seglen = iov->iov_len; - char __user *from = iov->iov_base; + struct iov_iter *from = &msg->msg_iter; - sdp_dbg_data(sk, "Sending iov: 0x%x/0x%zx %p\n", i, msg->msg_iovlen, from); + sdp_dbg_data(sk, "Sending iov: 0x%x/0x%zx %p\n", i, + msg->msg_iter.nr_segs, from); SDPSTATS_HIST(sendmsg_seglen, seglen); @@ -1894,7 +1891,7 @@ static int sdp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, ssk->sdp_dev->fmr_pool && !(flags & MSG_OOB)) { int zcopied = 0; - zcopied = sdp_sendmsg_zcopy(iocb, sk, iov); + zcopied = sdp_sendmsg_zcopy(sk, &msg->msg_iter, i); if (zcopied < 0) { sdp_dbg_data(sk, "ZCopy send err: %d\n", zcopied); @@ -1904,7 +1901,6 @@ static int sdp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, copied += zcopied; seglen = iov->iov_len; - from = iov->iov_base; sdp_dbg_data(sk, "ZCopied: 0x%x/0x%x\n", zcopied, seglen); } @@ -1943,8 +1939,7 @@ new_segment: * Check whether we can use HW checksum. */ if (sk->sk_route_caps & - (NETIF_F_IP_CSUM | NETIF_F_NO_CSUM | - NETIF_F_HW_CSUM)) + (NETIF_F_IP_CSUM | NETIF_F_HW_CSUM)) skb->ip_summed = CHECKSUM_PARTIAL; sdp_skb_entail(sk, skb); @@ -1991,7 +1986,6 @@ new_segment: SDP_SKB_CB(skb)->end_seq += copy; /*unused: skb_shinfo(skb)->gso_segs = 0;*/ - from += copy; copied += copy; seglen -= copy; continue; @@ -2091,9 +2085,8 @@ int sdp_abort_rx_srcavail(struct sock *sk, int post_sendsm) /* Like tcp_recvmsg */ /* Maybe use skb_recv_datagram here? */ /* Note this does not seem to handle vectored messages. Relevant? */ -static int sdp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len, int noblock, int flags, - int *addr_len) +static int sdp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, + int noblock, int flags, int *addr_len) { struct sk_buff *skb = NULL; struct sdp_sock *ssk = sdp_sk(sk); @@ -2111,8 +2104,8 @@ static int sdp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, lock_sock(sk); ssk->cpu = smp_processor_id(); sdp_dbg_data(sk, "iovlen: %zd iov_len: 0x%zx flags: 0x%x peek: 0x%x\n", - msg->msg_iovlen, msg->msg_iov[0].iov_len, flags, - MSG_PEEK); + msg->msg_iter.nr_segs, msg->msg_iter.iov[0].iov_len, + flags, MSG_PEEK); posts_handler_get(ssk); @@ -2345,9 +2338,10 @@ sdp_mid_data: if (!(flags & MSG_TRUNC)) { if (rx_sa && offset >= skb->len) { /* No more payload - start rdma copy */ - sdp_dbg_data(sk, "RDMA copy of 0x%lx bytes\n", used); - err = sdp_rdma_to_iovec(sk, msg->msg_iov, msg->msg_iovlen, skb, - &used, offset); + sdp_dbg_data(sk, "RDMA copy of 0x%lx bytes\n", + used); + err = sdp_rdma_to_iter(sk, &msg->msg_iter, skb, + &used, offset); if (unlikely(err)) { /* ssk->rx_sa might had been freed when * we slept. */ @@ -2362,11 +2356,12 @@ sdp_mid_data: } } else { sdp_dbg_data(sk, "memcpy 0x%lx bytes +0x%x -> %p\n", - used, offset, msg->msg_iov[0].iov_base); + used, offset, + msg->msg_iter.iov[0].iov_base); - err = skb_copy_datagram_iovec(skb, offset, + err = skb_copy_datagram_msg(skb, offset, /* TODO: skip header? */ - msg->msg_iov, used); + msg, used); if (rx_sa && !(flags & MSG_PEEK)) { rx_sa->copied += used; rx_sa->reported += used; @@ -2482,7 +2477,7 @@ static int sdp_listen(struct sock *sk, int backlog) rc = sdp_get_port(sk, 0); if (rc) return rc; - inet_sport(sk) = htons(inet_num(sk)); + sdp_inet_sport(sk) = htons(sdp_inet_num(sk)); } rc = rdma_listen(ssk->id, backlog); @@ -2595,7 +2590,7 @@ void sdp_urg(struct sdp_sock *ssk, struct sk_buff *skb) BUG(); ssk->urg_data = TCP_URG_VALID | tmp; if (!sock_flag(sk, SOCK_DEAD)) - sk->sk_data_ready(sk, 0); + sk->sk_data_ready(sk); } static struct percpu_counter *sockets_allocated; @@ -2629,7 +2624,7 @@ struct proto sdp_proto = { .name = "SDP", }; -static struct proto_ops sdp_ipv4_proto_ops = { +static const struct proto_ops sdp_ipv4_proto_ops = { .family = PF_INET, .owner = THIS_MODULE, .release = inet_release, @@ -2653,7 +2648,7 @@ static struct proto_ops sdp_ipv4_proto_ops = { }; #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) -static struct proto_ops sdp_ipv6_proto_ops = { +static const struct proto_ops sdp_ipv6_proto_ops = { .family = PF_INET6, .owner = THIS_MODULE, .release = inet6_release, @@ -2677,8 +2672,10 @@ static struct proto_ops sdp_ipv6_proto_ops = { }; #endif -static int sdp_create_ipvx_socket(struct net *net, struct socket *sock, int protocol, - struct proto_ops *proto_ops) +static int sdp_create_ipvx_socket(struct net *net, struct socket *sock, + int protocol, + const struct proto_ops *proto_ops, + int kern) { struct sock *sk; int rc; @@ -2699,7 +2696,7 @@ static int sdp_create_ipvx_socket(struct net *net, struct socket *sock, int prot return -EPROTONOSUPPORT; } - sk = sk_alloc(net, PF_INET_SDP, GFP_KERNEL, &sdp_proto); + sk = sk_alloc(net, PF_INET_SDP, GFP_KERNEL, &sdp_proto, kern); if (!sk) { sdp_warn(NULL, "SDP: failed to allocate socket.\n"); return -ENOMEM; @@ -2734,17 +2731,19 @@ static int sdp_create_ipvx_socket(struct net *net, struct socket *sock, int prot } #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) -static int sdp_create_v6_socket(struct net *net, struct socket *sock, int protocol, - int kern) +static int sdp_create_v6_socket(struct net *net, struct socket *sock, + int protocol, int kern) { - return sdp_create_ipvx_socket(net, sock, protocol, &sdp_ipv6_proto_ops); + return sdp_create_ipvx_socket(net, sock, protocol, &sdp_ipv6_proto_ops, + kern); } #endif -static int sdp_create_v4_socket(struct net *net, struct socket *sock, int protocol, - int kern) +static int sdp_create_v4_socket(struct net *net, struct socket *sock, + int protocol, int kern) { - return sdp_create_ipvx_socket(net, sock, protocol, &sdp_ipv4_proto_ops); + return sdp_create_ipvx_socket(net, sock, protocol, &sdp_ipv4_proto_ops, + kern); } static void sdp_add_device(struct ib_device *device) @@ -2910,8 +2909,11 @@ static int __init sdp_init(void) if (!orphan_count) goto no_mem_orphan_count; - percpu_counter_init(sockets_allocated, 0); - percpu_counter_init(orphan_count, 0); + /* percpu_counter_init func definition changed in + * git id: 908c7f1949cb7cc6e92ba8f18f2998e87e265b8e + */ + percpu_counter_init(sockets_allocated, 0, GFP_KERNEL); + percpu_counter_init(orphan_count, 0, GFP_KERNEL); sdp_proto.sockets_allocated = sockets_allocated; sdp_proto.orphan_count = orphan_count; diff --git a/drivers/infiniband/ulp/sdp/sdp_proc.c b/drivers/infiniband/ulp/sdp/sdp_proc.c index 5dbd3fe94d3d..07739a0254fc 100644 --- a/drivers/infiniband/ulp/sdp/sdp_proc.c +++ b/drivers/infiniband/ulp/sdp/sdp_proc.c @@ -144,7 +144,7 @@ static int sdp_v4_seq_show(struct seq_file *seq, int num, struct sock *sk) char tmpbuf[TMPSZ + 1]; unsigned int dest; unsigned int src; - int uid; + kuid_t uid; unsigned long inode; __u16 destp; __u16 srcp; @@ -152,15 +152,15 @@ static int sdp_v4_seq_show(struct seq_file *seq, int num, struct sock *sk) dest = sdp_inet_daddr(sk); src = sdp_inet_rcv_saddr(sk); - destp = ntohs(inet_dport(sk)); - srcp = ntohs(inet_sport(sk)); + destp = ntohs(sdp_inet_dport(sk)); + srcp = ntohs(sdp_inet_sport(sk)); uid = sock_i_uid(sk); inode = sock_i_ino(sk); rx_queue = rcv_nxt(sdp_sk(sk)) - sdp_sk(sk)->copied_seq; tx_queue = sdp_sk(sk)->write_seq - sdp_sk(sk)->tx_ring.una_seq; - sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X %5d %lu %08X:%08X %X", - num, src, srcp, dest, destp, uid, inode, + sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X %5u %lu %08X:%08X %X", + num, src, srcp, dest, destp, uid.val, inode, rx_queue, tx_queue, sk->sk_state); seq_printf(seq, "%-*s\n", TMPSZ - 1, tmpbuf); @@ -173,33 +173,32 @@ static int sdp_v6_seq_show(struct seq_file *seq, int num, struct sock *sk) char tmpbuf[TMPSZ + 1]; struct in6_addr *src; struct in6_addr *dest; - int uid; + kuid_t uid; unsigned long inode; __u16 destp; __u16 srcp; __u32 rx_queue, tx_queue; - dest = &inet6_sk(sk)->daddr; - src = &inet6_sk(sk)->rcv_saddr; - destp = ntohs(inet_dport(sk)); - srcp = ntohs(inet_sport(sk)); + dest = &sk->sk_v6_daddr; + src = &sk->sk_v6_rcv_saddr; + destp = ntohs(sdp_inet_dport(sk)); + srcp = ntohs(sdp_inet_sport(sk)); uid = sock_i_uid(sk); inode = sock_i_ino(sk); rx_queue = rcv_nxt(sdp_sk(sk)) - sdp_sk(sk)->copied_seq; tx_queue = sdp_sk(sk)->write_seq - sdp_sk(sk)->tx_ring.una_seq; sprintf(tmpbuf, - "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " - "%5d %lu %08X:%08X %X", - num, - src->s6_addr32[0], src->s6_addr32[1], - src->s6_addr32[2], src->s6_addr32[3], - srcp, - dest->s6_addr32[0], dest->s6_addr32[1], - dest->s6_addr32[2], dest->s6_addr32[3], - destp, - uid, inode, - rx_queue, tx_queue, sk->sk_state); + "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X %5u %lu %08X:%08X %X", + num, + src->s6_addr32[0], src->s6_addr32[1], + src->s6_addr32[2], src->s6_addr32[3], + srcp, + dest->s6_addr32[0], dest->s6_addr32[1], + dest->s6_addr32[2], dest->s6_addr32[3], + destp, + uid.val, inode, + rx_queue, tx_queue, sk->sk_state); seq_printf(seq, "%-*s\n", TMPSZ - 1, tmpbuf); @@ -232,7 +231,7 @@ out: static int sdp_seq_open(struct inode *inode, struct file *file) { - struct sdp_seq_afinfo *afinfo = PDE(inode)->data; + struct sdp_seq_afinfo *afinfo = PDE_DATA(inode); struct seq_file *seq; struct sdp_iter_state *s; int rc; @@ -276,7 +275,7 @@ static struct sdp_seq_afinfo sdp_seq_afinfo = { }; #ifdef SDPSTATS_ON -DEFINE_PER_CPU(struct sdpstats, sdpstats); +DEFINE_PER_CPU(struct sdpstats_t, sdpstats); static void sdpstats_seq_hist(struct seq_file *seq, char *str, u32 *h, int n, int is_log) @@ -336,7 +335,7 @@ static void sdpstats_seq_hist(struct seq_file *seq, char *str, u32 *h, int n, }) #define __sdpstats_seq_hist(seq, msg, hist, is_log) ({ \ - int hist_len = ARRAY_SIZE(__get_cpu_var(sdpstats).hist);\ + int hist_len = ARRAY_SIZE(sdpstats.hist);\ memset(h, 0, sizeof(*h) * h_len); \ SDPSTATS_HIST_GET(hist, hist_len, h); \ sdpstats_seq_hist(seq, msg, h, hist_len, is_log);\ @@ -387,13 +386,12 @@ static int sdpstats_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "memcpy_count \t\t: %u\n", SDPSTATS_COUNTER_GET(memcpy_count)); - for (i = 0; i < ARRAY_SIZE(__get_cpu_var(sdpstats).post_send); i++) { - if (mid2str(i)) { - seq_printf(seq, "post_send %-20s\t: %u\n", - mid2str(i), - SDPSTATS_COUNTER_GET(post_send[i])); - } - } + for (i = 0; i < ARRAY_SIZE(sdpstats.post_send); i++) { + if (mid2str(i)) { + seq_printf(seq, "post_send %-20s\t: %u\n", mid2str(i), + SDPSTATS_COUNTER_GET(post_send[i])); + } + } seq_printf(seq, "\n"); seq_printf(seq, "sdp_recvmsg() calls\t\t: %u\n", @@ -455,7 +453,7 @@ static ssize_t sdpstats_write(struct file *file, const char __user *buf, int i; for_each_possible_cpu(i) - memset(&per_cpu(sdpstats, i), 0, sizeof(struct sdpstats)); + memset(&per_cpu(sdpstats, i), 0, sizeof(struct sdpstats_t)); printk(KERN_WARNING "Cleared sdp statistics\n"); return count; @@ -519,8 +517,8 @@ static int sdpprf_show(struct seq_file *m, void *v) seq_printf(m, "%-6d: [%5lu.%06lu] %-50s - [%d{%d} %d:%d] " "skb: %p %s:%d\n", l->idx, t, usec_rem, - l->msg, l->pid, l->cpu, l->sk_num, l->sk_dport, - l->skb, l->func, l->line); + l->msg, l->pid, l->cpu, l->sdpprf_sk_num, + l->sdpprf_sk_dport, l->skb, l->func, l->line); out: return 0; } @@ -762,8 +760,8 @@ static struct file_operations ssk_hist_fops = { static void sdp_ssk_hist_name(char *sk_name, int len, struct sock *sk) { - int lport = inet_num(sk); - int rport = ntohs(inet_dport(sk)); + int lport = sdp_inet_num(sk); + int rport = ntohs(sdp_inet_dport(sk)); snprintf(sk_name, len, "%05x_%d:%d", sdp_sk(sk)->sk_id, lport, rport); @@ -842,17 +840,15 @@ int __init sdp_proc_init(void) } #endif - p = proc_net_fops_create(&init_net, sdp_seq_afinfo.name, S_IRUGO, - sdp_seq_afinfo.seq_fops); - if (p) - p->data = &sdp_seq_afinfo; - else + p = proc_create_data(sdp_seq_afinfo.name, S_IRUGO, init_net.proc_net, + sdp_seq_afinfo.seq_fops, &sdp_seq_afinfo); + if (!p) goto no_mem; #ifdef SDPSTATS_ON - stats = proc_net_fops_create(&init_net, PROC_SDP_STATS, - S_IRUGO | S_IWUGO, &sdpstats_fops); + stats = proc_create(PROC_SDP_STATS, S_IRUGO | S_IWUGO, + init_net.proc_net, &sdpstats_fops); if (!stats) goto no_mem_stats; @@ -872,11 +868,11 @@ no_mem_prof: #endif #ifdef SDPSTATS_ON - proc_net_remove(&init_net, PROC_SDP_STATS); + remove_proc_entry(PROC_SDP_STATS, init_net.proc_net); no_mem_stats: #endif - proc_net_remove(&init_net, sdp_seq_afinfo.name); + remove_proc_entry(sdp_seq_afinfo.name, init_net.proc_net); no_mem: return -ENOMEM; @@ -884,11 +880,11 @@ no_mem: void sdp_proc_unregister(void) { - proc_net_remove(&init_net, sdp_seq_afinfo.name); + remove_proc_entry(sdp_seq_afinfo.name, init_net.proc_net); memset(sdp_seq_afinfo.seq_fops, 0, sizeof(*sdp_seq_afinfo.seq_fops)); #ifdef SDPSTATS_ON - proc_net_remove(&init_net, PROC_SDP_STATS); + remove_proc_entry(PROC_SDP_STATS, init_net.proc_net); #endif #ifdef SDP_PROFILING debugfs_remove(sdp_prof_file); diff --git a/drivers/infiniband/ulp/sdp/sdp_rx.c b/drivers/infiniband/ulp/sdp/sdp_rx.c index 7fa392d5955a..92cb1f8fdf9e 100644 --- a/drivers/infiniband/ulp/sdp/sdp_rx.c +++ b/drivers/infiniband/ulp/sdp/sdp_rx.c @@ -32,6 +32,8 @@ #include #include #include +#include +#include #include #include #include "sdp.h" @@ -195,13 +197,14 @@ static int sdp_post_recv(struct sdp_sock *ssk) pages_alloced++; } frag = &skb_shinfo(skb)->frags[i]; - frag->page = page; + frag->page.p = page; frag->page_offset = 0; frag->size = min(PAGE_SIZE, SDP_MAX_PAYLOAD); ++skb_shinfo(skb)->nr_frags; } skb->truesize += ssk->recv_frags * min(PAGE_SIZE, SDP_MAX_PAYLOAD); - if (!sk_rmem_schedule(sk, ssk->recv_frags * min(PAGE_SIZE, SDP_MAX_PAYLOAD))) { + if (!sk_rmem_schedule(sk, skb, ssk->recv_frags * min(PAGE_SIZE, + SDP_MAX_PAYLOAD))) { sdp_dbg(sk, "RX couldn't post, rx posted = %d.", rx_ring_posted(sdp_sk(sk))); sdp_dbg(sk, "Out of memory\n"); @@ -224,10 +227,11 @@ static int sdp_post_recv(struct sdp_sock *ssk) if (rx_req->mapping[i + 1]) { addr = rx_req->mapping[i + 1]; } else { - addr = ib_dma_map_page(dev, skb_shinfo(skb)->frags[i].page, - skb_shinfo(skb)->frags[i].page_offset, - skb_shinfo(skb)->frags[i].size, - DMA_FROM_DEVICE); + addr = ib_dma_map_page(dev, + skb_shinfo(skb)->frags[i].page.p, + skb_shinfo(skb)->frags[i].page_offset, + skb_shinfo(skb)->frags[i].size, + DMA_FROM_DEVICE); BUG_ON(ib_dma_mapping_error(dev, addr)); rx_req->mapping[i + 1] = addr; } @@ -352,7 +356,7 @@ mid_data: skb_queue_tail(&sk->sk_receive_queue, skb); if (!sock_flag(sk, SOCK_DEAD)) - sk->sk_data_ready(sk, skb_len); + sk->sk_data_ready(sk); return skb; } @@ -561,7 +565,7 @@ static int sdp_process_rx_skb(struct sdp_sock *ssk, struct sk_buff *skb) skb_shinfo(skb)->nr_frags = pagesz / PAGE_SIZE; for (i = skb_shinfo(skb)->nr_frags; i < frags; ++i) { - put_page(skb_shinfo(skb)->frags[i].page); + put_page(skb_shinfo(skb)->frags[i].page.p); } if (unlikely(h->flags & SDP_OOB_PEND)) @@ -885,6 +889,9 @@ static void sdp_arm_cq_timer(unsigned long data) sdp_arm_rx_cq(sk_ssk(ssk)); } +/* SHAMIR - TODOL; remove this!!! */ +#define IB_CQ_VECTOR_LEAST_ATTACHED 0 + int sdp_rx_ring_create(struct sdp_sock *ssk, struct ib_device *device) { struct ib_cq *rx_cq; diff --git a/drivers/infiniband/ulp/sdp/sdp_tx.c b/drivers/infiniband/ulp/sdp/sdp_tx.c index f99b87ec99ce..a7b68de2a468 100644 --- a/drivers/infiniband/ulp/sdp/sdp_tx.c +++ b/drivers/infiniband/ulp/sdp/sdp_tx.c @@ -31,6 +31,8 @@ */ #include #include +#include +#include #include #include #include "sdp.h" @@ -38,7 +40,7 @@ #define sdp_cnt(var) do { (var)++; } while (0) SDP_MODPARAM_SINT(sdp_keepalive_probes_sent, 0, - "Total number of keepalive probes sent."); + "Total number of keepalive probes sent."); static int sdp_process_tx_cq(struct sdp_sock *ssk); @@ -150,7 +152,8 @@ void sdp_post_send(struct sdp_sock *ssk, struct sk_buff *skb) frags = skb_shinfo(skb)->nr_frags; for (i = 0; i < frags; ++i) { ++sge; - addr = ib_dma_map_page(dev, skb_shinfo(skb)->frags[i].page, + addr = ib_dma_map_page(dev, + skb_shinfo(skb)->frags[i].page.p, skb_shinfo(skb)->frags[i].page_offset, skb_shinfo(skb)->frags[i].size, DMA_TO_DEVICE); @@ -452,6 +455,9 @@ static void sdp_tx_cq_event_handler(struct ib_event *event, void *data) { } +/* SHAMIR - TODO: remove this!!! */ +#define IB_CQ_VECTOR_LEAST_ATTACHED 0 + int sdp_tx_ring_create(struct sdp_sock *ssk, struct ib_device *device) { struct ib_cq *tx_cq; diff --git a/drivers/infiniband/ulp/sdp/sdp_zcopy.c b/drivers/infiniband/ulp/sdp/sdp_zcopy.c index 6abbca160bba..7700a55d26a5 100644 --- a/drivers/infiniband/ulp/sdp/sdp_zcopy.c +++ b/drivers/infiniband/ulp/sdp/sdp_zcopy.c @@ -53,7 +53,6 @@ static int sdp_post_srcavail(struct sock *sk, struct tx_srcavail_state *tx_sa) int payload_len; struct page *payload_pg; int off, len; - struct ib_umem_chunk *chunk; if (ssk->tx_sa) { /* ssk->tx_sa might already be there in a case of @@ -70,12 +69,9 @@ static int sdp_post_srcavail(struct sock *sk, struct tx_srcavail_state *tx_sa) BUG_ON(!tx_sa); BUG_ON(!tx_sa->fmr || !tx_sa->fmr->fmr->lkey); BUG_ON(!tx_sa->umem); - BUG_ON(!tx_sa->umem->chunk_list.next); + BUG_ON(!tx_sa->umem->sg_head.sgl); - chunk = list_entry(tx_sa->umem->chunk_list.next, struct ib_umem_chunk, list); - BUG_ON(!chunk->nmap); - - off = tx_sa->umem->offset; + off = ib_umem_offset(tx_sa->umem); len = tx_sa->umem->length; tx_sa->bytes_sent = tx_sa->bytes_acked = 0; @@ -93,7 +89,7 @@ static int sdp_post_srcavail(struct sock *sk, struct tx_srcavail_state *tx_sa) /* must have payload inlined in SrcAvail packet in combined mode */ payload_len = MIN(tx_sa->umem->page_size - off, len); payload_len = MIN(payload_len, ssk->xmit_size_goal - sizeof(struct sdp_srcah)); - payload_pg = sg_page(&chunk->page_list[0]); + payload_pg = sg_page(tx_sa->umem->sg_head.sgl); get_page(payload_pg); sdp_dbg_data(sk, "payload: off: 0x%x, pg: %p, len: 0x%x\n", @@ -287,18 +283,12 @@ int sdp_post_sendsm(struct sock *sk) return 0; } -static int sdp_update_iov_used(struct sock *sk, struct iovec *iov, int len) +static int sdp_update_iov_used(struct sock *sk, struct iov_iter *msg_iter, + int len) { sdp_dbg_data(sk, "updating consumed 0x%x bytes from iov\n", len); - while (len > 0) { - if (iov->iov_len) { - int copy = min_t(unsigned int, iov->iov_len, len); - len -= copy; - iov->iov_len -= copy; - iov->iov_base += copy; - } - iov++; - } + + iov_iter_advance(msg_iter, len); return 0; } @@ -397,8 +387,8 @@ static int sdp_alloc_fmr(struct sock *sk, void *uaddr, size_t len, struct ib_umem *umem; struct ib_device *dev = sdp_sk(sk)->ib_device; u64 *pages; - struct ib_umem_chunk *chunk; - int n = 0, j, k; + struct scatterlist *sg; + int n = 0, k, i; int rc = 0; unsigned long max_lockable_bytes; @@ -434,8 +424,8 @@ static int sdp_alloc_fmr(struct sock *sk, void *uaddr, size_t len, goto err_umem_get; } - sdp_dbg_data(sk, "umem->offset = 0x%x, length = 0x%zx\n", - umem->offset, umem->length); + sdp_dbg_data(sk, "ib_umem_ofset(umem) = 0x%x, length = 0x%zx\n", + ib_umem_offset(umem), umem->length); pages = (u64 *) __get_free_page(GFP_KERNEL); if (!pages) { @@ -443,25 +433,22 @@ static int sdp_alloc_fmr(struct sock *sk, void *uaddr, size_t len, goto err_pages_alloc; } - list_for_each_entry(chunk, &umem->chunk_list, list) { - for (j = 0; j < chunk->nmap; ++j) { - unsigned len2; - len2 = ib_sg_dma_len(dev, - &chunk->page_list[j]) >> PAGE_SHIFT; + for_each_sg(umem->sg_head.sgl, sg, umem->npages, i) { + unsigned len2; - SDP_WARN_ON(len2 > len); - len -= len2; + len2 = ib_sg_dma_len(dev, sg) >> PAGE_SHIFT; - for (k = 0; k < len2; ++k) { - pages[n++] = ib_sg_dma_address(dev, - &chunk->page_list[j]) + - umem->page_size * k; - BUG_ON(n >= SDP_FMR_SIZE); - } + SDP_WARN_ON(len2 > len); + len -= len2; + for (k = 0; k < len2; ++k) { + pages[n++] = ib_sg_dma_address(dev, sg) + + umem->page_size * k; + BUG_ON(n >= SDP_FMR_SIZE); } } - fmr = ib_fmr_pool_map_phys(sdp_sk(sk)->sdp_dev->fmr_pool, pages, n, 0); + fmr = ib_fmr_pool_map_phys(sdp_sk(sk)->sdp_dev->fmr_pool, pages, n, 0, + NULL); if (IS_ERR(fmr)) { sdp_dbg_data(sk, "Error allocating fmr: %ld\n", PTR_ERR(fmr)); SDPSTATS_COUNTER_INC(fmr_alloc_error); @@ -518,7 +505,7 @@ static int sdp_post_rdma_read(struct sock *sk, struct rx_srcavail_state *rx_sa, ssk->tx_ring.rdma_inflight = rx_sa; - sge.addr = rx_sa->umem->offset; + sge.addr = ib_umem_offset(rx_sa->umem); sge.length = rx_sa->umem->length; sge.lkey = rx_sa->fmr->fmr->lkey; @@ -538,12 +525,14 @@ static int sdp_post_rdma_read(struct sock *sk, struct rx_srcavail_state *rx_sa, return rc; } -int sdp_rdma_to_iovec(struct sock *sk, struct iovec *iov, int msg_iovlen, - struct sk_buff *skb, unsigned long *used, u32 offset) +int sdp_rdma_to_iter(struct sock *sk, struct iov_iter *msg_iter, + struct sk_buff *skb, unsigned long *used, u32 offset) { struct sdp_sock *ssk = sdp_sk(sk); struct rx_srcavail_state *rx_sa = RX_SRCAVAIL_STATE(skb); int rc = 0; + const struct iovec *iov = msg_iter->iov; + int msg_iovlen = msg_iter->nr_segs; int len = *used; int copied; int i = 0; @@ -591,7 +580,7 @@ int sdp_rdma_to_iovec(struct sock *sk, struct iovec *iov, int msg_iovlen, copied = rx_sa->umem->length; - sdp_update_iov_used(sk, iov, copied); + sdp_update_iov_used(sk, msg_iter, copied); atomic_add(copied, &ssk->rcv_nxt); *used = copied; rx_sa->copied += copied; @@ -630,12 +619,15 @@ static inline int wait_for_sndbuf(struct sock *sk, long *timeo_p) return ret; } -static int do_sdp_sendmsg_zcopy(struct sock *sk, struct tx_srcavail_state *tx_sa, - struct iovec *iov, long *timeo) +static int do_sdp_sendmsg_zcopy(struct sock *sk, + struct tx_srcavail_state *tx_sa, + struct iov_iter *msg_iter, int iov_idx, + long *timeo) { struct sdp_sock *ssk = sdp_sk(sk); int rc = 0; unsigned long lock_flags; + const struct iovec *iov = &msg_iter->iov[iov_idx]; rc = sdp_alloc_fmr(sk, iov->iov_base, iov->iov_len, &tx_sa->fmr, &tx_sa->umem, IB_ACCESS_REMOTE_READ, sdp_zcopy_thresh); @@ -694,7 +686,7 @@ static int do_sdp_sendmsg_zcopy(struct sock *sk, struct tx_srcavail_state *tx_sa spin_unlock_irqrestore(&ssk->tx_sa_lock, lock_flags); err_abort_send: - sdp_update_iov_used(sk, iov, tx_sa->bytes_acked); + sdp_update_iov_used(sk, msg_iter, tx_sa->bytes_acked); err_no_tx_slots: sdp_free_fmr(sk, &tx_sa->fmr, &tx_sa->umem); @@ -703,12 +695,13 @@ err_alloc_fmr: return rc; } -int sdp_sendmsg_zcopy(struct kiocb *iocb, struct sock *sk, struct iovec *iov) +int sdp_sendmsg_zcopy(struct sock *sk, struct iov_iter *msg_iter, int iov_idx) { struct sdp_sock *ssk = sdp_sk(sk); int rc = 0; long timeo = SDP_SRCAVAIL_ADV_TIMEOUT; struct tx_srcavail_state *tx_sa; + const struct iovec *iov = &msg_iter->iov[iov_idx]; size_t bytes_to_copy = iov->iov_len; int copied = 0; @@ -737,7 +730,7 @@ int sdp_sendmsg_zcopy(struct kiocb *iocb, struct sock *sk, struct iovec *iov) do { tx_sa_reset(tx_sa); - rc = do_sdp_sendmsg_zcopy(sk, tx_sa, iov, &timeo); + rc = do_sdp_sendmsg_zcopy(sk, tx_sa, msg_iter, iov_idx, &timeo); if (iov->iov_len && iov->iov_len < sdp_zcopy_thresh) { sdp_dbg_data(sk, "0x%zx bytes left, switching to bcopy\n",