From: Linus Torvalds Date: Tue, 2 May 2017 23:40:27 +0000 (-0700) Subject: Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next X-Git-Tag: v4.12-rc1~129 X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=8d65b08debc7e62b2c6032d7fe7389d895b92cbc;p=linux-platform-drivers-x86.git Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next Pull networking updates from David Millar: "Here are some highlights from the 2065 networking commits that happened this development cycle: 1) XDP support for IXGBE (John Fastabend) and thunderx (Sunil Kowuri) 2) Add a generic XDP driver, so that anyone can test XDP even if they lack a networking device whose driver has explicit XDP support (me). 3) Sparc64 now has an eBPF JIT too (me) 4) Add a BPF program testing framework via BPF_PROG_TEST_RUN (Alexei Starovoitov) 5) Make netfitler network namespace teardown less expensive (Florian Westphal) 6) Add symmetric hashing support to nft_hash (Laura Garcia Liebana) 7) Implement NAPI and GRO in netvsc driver (Stephen Hemminger) 8) Support TC flower offload statistics in mlxsw (Arkadi Sharshevsky) 9) Multiqueue support in stmmac driver (Joao Pinto) 10) Remove TCP timewait recycling, it never really could possibly work well in the real world and timestamp randomization really zaps any hint of usability this feature had (Soheil Hassas Yeganeh) 11) Support level3 vs level4 ECMP route hashing in ipv4 (Nikolay Aleksandrov) 12) Add socket busy poll support to epoll (Sridhar Samudrala) 13) Netlink extended ACK support (Johannes Berg, Pablo Neira Ayuso, and several others) 14) IPSEC hw offload infrastructure (Steffen Klassert)" * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (2065 commits) tipc: refactor function tipc_sk_recv_stream() tipc: refactor function tipc_sk_recvmsg() net: thunderx: Optimize page recycling for XDP net: thunderx: Support for XDP header adjustment net: thunderx: Add support for XDP_TX net: thunderx: Add support for XDP_DROP net: thunderx: Add basic XDP support net: thunderx: Cleanup receive buffer allocation net: thunderx: Optimize CQE_TX handling net: thunderx: Optimize RBDR descriptor handling net: thunderx: Support for page recycling ipx: call ipxitf_put() in ioctl error path net: sched: add helpers to handle extended actions qed*: Fix issues in the ptp filter config implementation. qede: Fix concurrency issue in PTP Tx path processing. stmmac: Add support for SIMATIC IOT2000 platform net: hns: fix ethtool_get_strings overflow in hns driver tcp: fix wraparound issue in tcp_lp bpf, arm64: fix jit branch offset related to ldimm64 bpf, arm64: implement jiting of BPF_XADD ... --- 8d65b08debc7e62b2c6032d7fe7389d895b92cbc diff --cc drivers/block/nbd.c index 56efb0444b4d,d8a23561b4cb..9b482baa869e --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@@ -1500,535 -1138,10 +1500,535 @@@ out return err; } -/* - * And here should be modules and kernel interface - * (Just smiley confuses emacs :-) +static int find_free_cb(int id, void *ptr, void *data) +{ + struct nbd_device *nbd = ptr; + struct nbd_device **found = data; + + if (!refcount_read(&nbd->config_refs)) { + *found = nbd; + return 1; + } + return 0; +} + +/* Netlink interface. */ +static struct nla_policy nbd_attr_policy[NBD_ATTR_MAX + 1] = { + [NBD_ATTR_INDEX] = { .type = NLA_U32 }, + [NBD_ATTR_SIZE_BYTES] = { .type = NLA_U64 }, + [NBD_ATTR_BLOCK_SIZE_BYTES] = { .type = NLA_U64 }, + [NBD_ATTR_TIMEOUT] = { .type = NLA_U64 }, + [NBD_ATTR_SERVER_FLAGS] = { .type = NLA_U64 }, + [NBD_ATTR_CLIENT_FLAGS] = { .type = NLA_U64 }, + [NBD_ATTR_SOCKETS] = { .type = NLA_NESTED}, + [NBD_ATTR_DEAD_CONN_TIMEOUT] = { .type = NLA_U64 }, + [NBD_ATTR_DEVICE_LIST] = { .type = NLA_NESTED}, +}; + +static struct nla_policy nbd_sock_policy[NBD_SOCK_MAX + 1] = { + [NBD_SOCK_FD] = { .type = NLA_U32 }, +}; + +/* We don't use this right now since we don't parse the incoming list, but we + * still want it here so userspace knows what to expect. */ +static struct nla_policy __attribute__((unused)) +nbd_device_policy[NBD_DEVICE_ATTR_MAX + 1] = { + [NBD_DEVICE_INDEX] = { .type = NLA_U32 }, + [NBD_DEVICE_CONNECTED] = { .type = NLA_U8 }, +}; + +static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info) +{ + struct nbd_device *nbd = NULL; + struct nbd_config *config; + int index = -1; + int ret; + bool put_dev = false; + + if (!netlink_capable(skb, CAP_SYS_ADMIN)) + return -EPERM; + + if (info->attrs[NBD_ATTR_INDEX]) + index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]); + if (!info->attrs[NBD_ATTR_SOCKETS]) { + printk(KERN_ERR "nbd: must specify at least one socket\n"); + return -EINVAL; + } + if (!info->attrs[NBD_ATTR_SIZE_BYTES]) { + printk(KERN_ERR "nbd: must specify a size in bytes for the device\n"); + return -EINVAL; + } +again: + mutex_lock(&nbd_index_mutex); + if (index == -1) { + ret = idr_for_each(&nbd_index_idr, &find_free_cb, &nbd); + if (ret == 0) { + int new_index; + new_index = nbd_dev_add(-1); + if (new_index < 0) { + mutex_unlock(&nbd_index_mutex); + printk(KERN_ERR "nbd: failed to add new device\n"); + return ret; + } + nbd = idr_find(&nbd_index_idr, new_index); + } + } else { + nbd = idr_find(&nbd_index_idr, index); + } + if (!nbd) { + printk(KERN_ERR "nbd: couldn't find device at index %d\n", + index); + mutex_unlock(&nbd_index_mutex); + return -EINVAL; + } + if (!refcount_inc_not_zero(&nbd->refs)) { + mutex_unlock(&nbd_index_mutex); + if (index == -1) + goto again; + printk(KERN_ERR "nbd: device at index %d is going down\n", + index); + return -EINVAL; + } + mutex_unlock(&nbd_index_mutex); + + mutex_lock(&nbd->config_lock); + if (refcount_read(&nbd->config_refs)) { + mutex_unlock(&nbd->config_lock); + nbd_put(nbd); + if (index == -1) + goto again; + printk(KERN_ERR "nbd: nbd%d already in use\n", index); + return -EBUSY; + } + if (WARN_ON(nbd->config)) { + mutex_unlock(&nbd->config_lock); + nbd_put(nbd); + return -EINVAL; + } + config = nbd->config = nbd_alloc_config(); + if (!nbd->config) { + mutex_unlock(&nbd->config_lock); + nbd_put(nbd); + printk(KERN_ERR "nbd: couldn't allocate config\n"); + return -ENOMEM; + } + refcount_set(&nbd->config_refs, 1); + set_bit(NBD_BOUND, &config->runtime_flags); + + if (info->attrs[NBD_ATTR_SIZE_BYTES]) { + u64 bytes = nla_get_u64(info->attrs[NBD_ATTR_SIZE_BYTES]); + nbd_size_set(nbd, config->blksize, + div64_u64(bytes, config->blksize)); + } + if (info->attrs[NBD_ATTR_BLOCK_SIZE_BYTES]) { + u64 bsize = + nla_get_u64(info->attrs[NBD_ATTR_BLOCK_SIZE_BYTES]); + nbd_size_set(nbd, bsize, div64_u64(config->bytesize, bsize)); + } + if (info->attrs[NBD_ATTR_TIMEOUT]) { + u64 timeout = nla_get_u64(info->attrs[NBD_ATTR_TIMEOUT]); + nbd->tag_set.timeout = timeout * HZ; + blk_queue_rq_timeout(nbd->disk->queue, timeout * HZ); + } + if (info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]) { + config->dead_conn_timeout = + nla_get_u64(info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]); + config->dead_conn_timeout *= HZ; + } + if (info->attrs[NBD_ATTR_SERVER_FLAGS]) + config->flags = + nla_get_u64(info->attrs[NBD_ATTR_SERVER_FLAGS]); + if (info->attrs[NBD_ATTR_CLIENT_FLAGS]) { + u64 flags = nla_get_u64(info->attrs[NBD_ATTR_CLIENT_FLAGS]); + if (flags & NBD_CFLAG_DESTROY_ON_DISCONNECT) { + set_bit(NBD_DESTROY_ON_DISCONNECT, + &config->runtime_flags); + put_dev = true; + } + } + + if (info->attrs[NBD_ATTR_SOCKETS]) { + struct nlattr *attr; + int rem, fd; + + nla_for_each_nested(attr, info->attrs[NBD_ATTR_SOCKETS], + rem) { + struct nlattr *socks[NBD_SOCK_MAX+1]; + + if (nla_type(attr) != NBD_SOCK_ITEM) { + printk(KERN_ERR "nbd: socks must be embedded in a SOCK_ITEM attr\n"); + ret = -EINVAL; + goto out; + } + ret = nla_parse_nested(socks, NBD_SOCK_MAX, attr, - nbd_sock_policy); ++ nbd_sock_policy, info->extack); + if (ret != 0) { + printk(KERN_ERR "nbd: error processing sock list\n"); + ret = -EINVAL; + goto out; + } + if (!socks[NBD_SOCK_FD]) + continue; + fd = (int)nla_get_u32(socks[NBD_SOCK_FD]); + ret = nbd_add_socket(nbd, fd, true); + if (ret) + goto out; + } + } + ret = nbd_start_device(nbd); +out: + mutex_unlock(&nbd->config_lock); + if (!ret) { + set_bit(NBD_HAS_CONFIG_REF, &config->runtime_flags); + refcount_inc(&nbd->config_refs); + nbd_connect_reply(info, nbd->index); + } + nbd_config_put(nbd); + if (put_dev) + nbd_put(nbd); + return ret; +} + +static int nbd_genl_disconnect(struct sk_buff *skb, struct genl_info *info) +{ + struct nbd_device *nbd; + int index; + + if (!netlink_capable(skb, CAP_SYS_ADMIN)) + return -EPERM; + + if (!info->attrs[NBD_ATTR_INDEX]) { + printk(KERN_ERR "nbd: must specify an index to disconnect\n"); + return -EINVAL; + } + index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]); + mutex_lock(&nbd_index_mutex); + nbd = idr_find(&nbd_index_idr, index); + if (!nbd) { + mutex_unlock(&nbd_index_mutex); + printk(KERN_ERR "nbd: couldn't find device at index %d\n", + index); + return -EINVAL; + } + if (!refcount_inc_not_zero(&nbd->refs)) { + mutex_unlock(&nbd_index_mutex); + printk(KERN_ERR "nbd: device at index %d is going down\n", + index); + return -EINVAL; + } + mutex_unlock(&nbd_index_mutex); + if (!refcount_inc_not_zero(&nbd->config_refs)) { + nbd_put(nbd); + return 0; + } + mutex_lock(&nbd->config_lock); + nbd_disconnect(nbd); + mutex_unlock(&nbd->config_lock); + if (test_and_clear_bit(NBD_HAS_CONFIG_REF, + &nbd->config->runtime_flags)) + nbd_config_put(nbd); + nbd_config_put(nbd); + nbd_put(nbd); + return 0; +} + +static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info) +{ + struct nbd_device *nbd = NULL; + struct nbd_config *config; + int index; + int ret = -EINVAL; + bool put_dev = false; + + if (!netlink_capable(skb, CAP_SYS_ADMIN)) + return -EPERM; + + if (!info->attrs[NBD_ATTR_INDEX]) { + printk(KERN_ERR "nbd: must specify a device to reconfigure\n"); + return -EINVAL; + } + index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]); + mutex_lock(&nbd_index_mutex); + nbd = idr_find(&nbd_index_idr, index); + if (!nbd) { + mutex_unlock(&nbd_index_mutex); + printk(KERN_ERR "nbd: couldn't find a device at index %d\n", + index); + return -EINVAL; + } + if (!refcount_inc_not_zero(&nbd->refs)) { + mutex_unlock(&nbd_index_mutex); + printk(KERN_ERR "nbd: device at index %d is going down\n", + index); + return -EINVAL; + } + mutex_unlock(&nbd_index_mutex); + + if (!refcount_inc_not_zero(&nbd->config_refs)) { + dev_err(nbd_to_dev(nbd), + "not configured, cannot reconfigure\n"); + nbd_put(nbd); + return -EINVAL; + } + + mutex_lock(&nbd->config_lock); + config = nbd->config; + if (!test_bit(NBD_BOUND, &config->runtime_flags) || + !nbd->task_recv) { + dev_err(nbd_to_dev(nbd), + "not configured, cannot reconfigure\n"); + goto out; + } + + if (info->attrs[NBD_ATTR_TIMEOUT]) { + u64 timeout = nla_get_u64(info->attrs[NBD_ATTR_TIMEOUT]); + nbd->tag_set.timeout = timeout * HZ; + blk_queue_rq_timeout(nbd->disk->queue, timeout * HZ); + } + if (info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]) { + config->dead_conn_timeout = + nla_get_u64(info->attrs[NBD_ATTR_DEAD_CONN_TIMEOUT]); + config->dead_conn_timeout *= HZ; + } + if (info->attrs[NBD_ATTR_CLIENT_FLAGS]) { + u64 flags = nla_get_u64(info->attrs[NBD_ATTR_CLIENT_FLAGS]); + if (flags & NBD_CFLAG_DESTROY_ON_DISCONNECT) { + if (!test_and_set_bit(NBD_DESTROY_ON_DISCONNECT, + &config->runtime_flags)) + put_dev = true; + } else { + if (test_and_clear_bit(NBD_DESTROY_ON_DISCONNECT, + &config->runtime_flags)) + refcount_inc(&nbd->refs); + } + } + + if (info->attrs[NBD_ATTR_SOCKETS]) { + struct nlattr *attr; + int rem, fd; + + nla_for_each_nested(attr, info->attrs[NBD_ATTR_SOCKETS], + rem) { + struct nlattr *socks[NBD_SOCK_MAX+1]; + + if (nla_type(attr) != NBD_SOCK_ITEM) { + printk(KERN_ERR "nbd: socks must be embedded in a SOCK_ITEM attr\n"); + ret = -EINVAL; + goto out; + } + ret = nla_parse_nested(socks, NBD_SOCK_MAX, attr, - nbd_sock_policy); ++ nbd_sock_policy, info->extack); + if (ret != 0) { + printk(KERN_ERR "nbd: error processing sock list\n"); + ret = -EINVAL; + goto out; + } + if (!socks[NBD_SOCK_FD]) + continue; + fd = (int)nla_get_u32(socks[NBD_SOCK_FD]); + ret = nbd_reconnect_socket(nbd, fd); + if (ret) { + if (ret == -ENOSPC) + ret = 0; + goto out; + } + dev_info(nbd_to_dev(nbd), "reconnected socket\n"); + } + } +out: + mutex_unlock(&nbd->config_lock); + nbd_config_put(nbd); + nbd_put(nbd); + if (put_dev) + nbd_put(nbd); + return ret; +} + +static const struct genl_ops nbd_connect_genl_ops[] = { + { + .cmd = NBD_CMD_CONNECT, + .policy = nbd_attr_policy, + .doit = nbd_genl_connect, + }, + { + .cmd = NBD_CMD_DISCONNECT, + .policy = nbd_attr_policy, + .doit = nbd_genl_disconnect, + }, + { + .cmd = NBD_CMD_RECONFIGURE, + .policy = nbd_attr_policy, + .doit = nbd_genl_reconfigure, + }, + { + .cmd = NBD_CMD_STATUS, + .policy = nbd_attr_policy, + .doit = nbd_genl_status, + }, +}; + +static const struct genl_multicast_group nbd_mcast_grps[] = { + { .name = NBD_GENL_MCAST_GROUP_NAME, }, +}; + +static struct genl_family nbd_genl_family __ro_after_init = { + .hdrsize = 0, + .name = NBD_GENL_FAMILY_NAME, + .version = NBD_GENL_VERSION, + .module = THIS_MODULE, + .ops = nbd_connect_genl_ops, + .n_ops = ARRAY_SIZE(nbd_connect_genl_ops), + .maxattr = NBD_ATTR_MAX, + .mcgrps = nbd_mcast_grps, + .n_mcgrps = ARRAY_SIZE(nbd_mcast_grps), +}; + +static int populate_nbd_status(struct nbd_device *nbd, struct sk_buff *reply) +{ + struct nlattr *dev_opt; + u8 connected = 0; + int ret; + + /* This is a little racey, but for status it's ok. The + * reason we don't take a ref here is because we can't + * take a ref in the index == -1 case as we would need + * to put under the nbd_index_mutex, which could + * deadlock if we are configured to remove ourselves + * once we're disconnected. + */ + if (refcount_read(&nbd->config_refs)) + connected = 1; + dev_opt = nla_nest_start(reply, NBD_DEVICE_ITEM); + if (!dev_opt) + return -EMSGSIZE; + ret = nla_put_u32(reply, NBD_DEVICE_INDEX, nbd->index); + if (ret) + return -EMSGSIZE; + ret = nla_put_u8(reply, NBD_DEVICE_CONNECTED, + connected); + if (ret) + return -EMSGSIZE; + nla_nest_end(reply, dev_opt); + return 0; +} + +static int status_cb(int id, void *ptr, void *data) +{ + struct nbd_device *nbd = ptr; + return populate_nbd_status(nbd, (struct sk_buff *)data); +} + +static int nbd_genl_status(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr *dev_list; + struct sk_buff *reply; + void *reply_head; + size_t msg_size; + int index = -1; + int ret = -ENOMEM; + + if (info->attrs[NBD_ATTR_INDEX]) + index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]); + + mutex_lock(&nbd_index_mutex); + + msg_size = nla_total_size(nla_attr_size(sizeof(u32)) + + nla_attr_size(sizeof(u8))); + msg_size *= (index == -1) ? nbd_total_devices : 1; + + reply = genlmsg_new(msg_size, GFP_KERNEL); + if (!reply) + goto out; + reply_head = genlmsg_put_reply(reply, info, &nbd_genl_family, 0, + NBD_CMD_STATUS); + if (!reply_head) { + nlmsg_free(reply); + goto out; + } + + dev_list = nla_nest_start(reply, NBD_ATTR_DEVICE_LIST); + if (index == -1) { + ret = idr_for_each(&nbd_index_idr, &status_cb, reply); + if (ret) { + nlmsg_free(reply); + goto out; + } + } else { + struct nbd_device *nbd; + nbd = idr_find(&nbd_index_idr, index); + if (nbd) { + ret = populate_nbd_status(nbd, reply); + if (ret) { + nlmsg_free(reply); + goto out; + } + } + } + nla_nest_end(reply, dev_list); + genlmsg_end(reply, reply_head); + genlmsg_reply(reply, info); + ret = 0; +out: + mutex_unlock(&nbd_index_mutex); + return ret; +} + +static void nbd_connect_reply(struct genl_info *info, int index) +{ + struct sk_buff *skb; + void *msg_head; + int ret; + + skb = genlmsg_new(nla_total_size(sizeof(u32)), GFP_KERNEL); + if (!skb) + return; + msg_head = genlmsg_put_reply(skb, info, &nbd_genl_family, 0, + NBD_CMD_CONNECT); + if (!msg_head) { + nlmsg_free(skb); + return; + } + ret = nla_put_u32(skb, NBD_ATTR_INDEX, index); + if (ret) { + nlmsg_free(skb); + return; + } + genlmsg_end(skb, msg_head); + genlmsg_reply(skb, info); +} + +static void nbd_mcast_index(int index) +{ + struct sk_buff *skb; + void *msg_head; + int ret; + + skb = genlmsg_new(nla_total_size(sizeof(u32)), GFP_KERNEL); + if (!skb) + return; + msg_head = genlmsg_put(skb, 0, 0, &nbd_genl_family, 0, + NBD_CMD_LINK_DEAD); + if (!msg_head) { + nlmsg_free(skb); + return; + } + ret = nla_put_u32(skb, NBD_ATTR_INDEX, index); + if (ret) { + nlmsg_free(skb); + return; + } + genlmsg_end(skb, msg_head); + genlmsg_multicast(&nbd_genl_family, skb, 0, 0, GFP_KERNEL); +} + +static void nbd_dead_link_work(struct work_struct *work) +{ + struct link_dead_args *args = container_of(work, struct link_dead_args, + work); + nbd_mcast_index(args->index); + kfree(args); +} static int __init nbd_init(void) {