To reduce SGEs, commit '
23f90cc {"RDS: fix the sg allocation based
on actual message size" used buddy allocator to allocate large
pages based on messages size.
This change though seems to create issue for TCP transport most
likely triggering memory leak some where in RDS TCP driver path.
The same core code with large pages seems to work just fine with
IB transport.
Patch avoids the hugepage allocation for RDS TCP sockets.
Orabug:
23635336
Reviewed-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Signed-off-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
rs->rs_conn = 0;
rs->rs_netfilter_enabled = 0;
rs->rs_rx_traces = 0;
+ rs->rs_large_page = true;
if (rs->rs_bound_addr)
printk(KERN_CRIT "bound addr %x at create\n", rs->rs_bound_addr);
}
rs->rs_transport = trans;
+ if (rs->rs_transport->t_type == RDS_TRANS_TCP)
+ rs->rs_large_page = false;
+
ret = 0;
out:
sg_init_table(&frag->f_sg, 1);
ret = rds_page_remainder_alloc(&frag->f_sg,
- ic->i_frag_sz, page_mask);
+ ic->i_frag_sz, page_mask, true);
if (ret) {
kmem_cache_free(rds_ib_frag_slab, frag);
atomic_sub(ic->i_frag_pages, &rds_ib_allocation);
sg_init_table(&recv->r_frag->f_sg, 1);
if (recv->r_ic)
ret = rds_page_remainder_alloc(&recv->r_frag->f_sg,
- recv->r_ic->i_frag_sz, page_mask);
+ recv->r_ic->i_frag_sz, page_mask, true);
else
ret = rds_page_remainder_alloc(&recv->r_frag->f_sg,
- RDS_FRAG_SIZE, page_mask);
+ RDS_FRAG_SIZE, page_mask, true);
if (ret) {
kmem_cache_free(rds_ib_frag_slab, recv->r_frag);
goto out;
}
int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from,
- gfp_t gfp)
+ gfp_t gfp, bool large_page)
{
unsigned long to_copy, nbytes;
unsigned long sg_off;
if (!sg_page(sg)) {
ret = rds_page_remainder_alloc(sg, iov_iter_count(from),
GFP_ATOMIC == gfp ?
- gfp : GFP_HIGHUSER);
+ gfp : GFP_HIGHUSER,
+ large_page);
if (ret)
return ret;
* reference until they are done with the region.
*/
int rds_page_remainder_alloc(struct scatterlist *scat, unsigned long bytes,
- gfp_t gfp)
+ gfp_t gfp, bool large_page)
{
struct rds_page_remainder *rem;
unsigned long flags;
struct page *page;
int ret;
+ unsigned int order, size;
gfp |= __GFP_HIGHMEM;
/* jump straight to allocation if we're trying for a huge page */
if (bytes >= PAGE_SIZE) {
- page = alloc_pages(gfp, get_order(bytes));
+ if (large_page) {
+ order = get_order(bytes);
+ size = bytes;
+ } else {
+ order = 0;
+ size = PAGE_SIZE;
+ }
+
+ page = alloc_pages(gfp, order);
if (!page) {
ret = -ENOMEM;
} else {
- sg_set_page(scat, page, bytes, 0);
+ sg_set_page(scat, page, size, 0);
ret = 0;
}
goto out;
/* Socket receive path trace points*/
u8 rs_rx_traces;
u8 rs_rx_trace[RDS_MSG_RX_DGRAM_TRACE_MAX];
+
+ bool rs_large_page;
};
static inline struct rds_sock *rds_sk_to_rs(const struct sock *sk)
struct rds_message *rds_message_alloc(unsigned int nents, gfp_t gfp);
struct scatterlist *rds_message_alloc_sgs(struct rds_message *rm, int nents);
int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from,
- gfp_t gfp);
+ gfp_t gfp, bool n);
void rds_message_populate_header(struct rds_header *hdr, __be16 sport,
__be16 dport, u64 seq);
int rds_message_add_extension(struct rds_header *hdr,
/* page.c */
int rds_page_remainder_alloc(struct scatterlist *scat, unsigned long bytes,
- gfp_t gfp);
+ gfp_t gfp, bool n);
int rds_page_copy_user(struct page *page, unsigned long offset,
void __user *ptr, unsigned long bytes,
int to_user);
int nonblock = msg->msg_flags & MSG_DONTWAIT;
long timeo = sock_sndtimeo(sk, nonblock);
size_t total_payload_len = payload_len;
+ bool large_page;
/* Mirror Linux UDP mirror of BSD error message compatibility */
/* XXX: Perhaps MSG_MORE someday */
}
lock_sock(sk);
+ large_page = rs->rs_large_page;
if (daddr == 0 || rs->rs_bound_addr == 0) {
release_sock(sk);
ret = -ENOTCONN; /* XXX not a great errno */
/* Attach data to the rm */
if (payload_len) {
rm->data.op_sg = rds_message_alloc_sgs(rm, ceil(payload_len, PAGE_SIZE));
- ret = rds_message_copy_from_user(rm, &msg->msg_iter, GFP_KERNEL);
+ ret = rds_message_copy_from_user(rm, &msg->msg_iter, GFP_KERNEL,
+ large_page);
if (ret)
goto out;
}