From f70a5ac6ed7eb212ec282fa71f2993c0e003628d Mon Sep 17 00:00:00 2001 From: Mukesh Kacker Date: Thu, 22 Jan 2015 11:14:02 -0800 Subject: [PATCH] ipoib: rfe- enable pkey and device name decoupling The sysfs "create_child" interface creates pkey based child interface but derives the name from parent device name and pkey value. This makes administration difficult where pkey values can change but policies encoded with device names do not. We add ability to create a child interface with a user specified name and a specified pkey with a new sysfs "create_named_child" interface (and also add a corresponding "delete_named_child" interface). We also add a new module api interface to query pkey from a netdevice so any kernel users of pkey based child interfaces can query it - since with device name decoupled from pkey, it can no longer be deduced from parsing the device name by other kernel users. Orabug: 19064704 Ported from UEK2 commits a101f6037e882b1c12143416d48345fe7ea62979 and 916eb26c32082e241c22fcf8064959d697eabe2b Signed-off-by: Mukesh Kacker Reviewed-by: Yuval Shaia Reviewed-by: Chien-Hua Yen Signed-off-by: Guangyu Sun --- Documentation/infiniband/ipoib.txt | 12 ++ drivers/infiniband/ulp/ipoib/ipoib.h | 3 + drivers/infiniband/ulp/ipoib/ipoib_main.c | 191 ++++++++++++++++++++++ drivers/infiniband/ulp/ipoib/ipoib_vlan.c | 77 ++++++++- include/net/ipoib/if_ipoib.h | 14 ++ 5 files changed, 291 insertions(+), 6 deletions(-) create mode 100644 include/net/ipoib/if_ipoib.h diff --git a/Documentation/infiniband/ipoib.txt b/Documentation/infiniband/ipoib.txt index f2cfe265e836..7433db27f9e7 100644 --- a/Documentation/infiniband/ipoib.txt +++ b/Documentation/infiniband/ipoib.txt @@ -21,6 +21,18 @@ Partitions and P_Keys echo 0x8001 > /sys/class/net/ib0/delete_child + Interfaces with a user chosen name can be created in a similar + manner with a different name and P_Key, by writing them into the + main interface's /sys/class/net//create_named_child + For example: + echo "epart2 0x8002" > /sys/class/net/ib1/create_named_child + + This will create an interfaces named epart2 with P_Key 0x8002 and + parent ib1. To remove a named subinterface, use the + "delete_named_child" file: + + echo epart2 > /sys/class/net/ib1/delete_named_child + The P_Key for any interface is given by the "pkey" file, and the main interface for a subinterface is in "parent." diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 99fe6fa77575..70a1d2a3ede3 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -539,6 +539,9 @@ void ipoib_event(struct ib_event_handler *handler, int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey); int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey); +int ipoib_named_vlan_add(struct net_device *pdev, unsigned short pkey, + char *child_name_buf); +int ipoib_named_vlan_delete(struct net_device *pdev, char *child_name_buf); int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv, u16 pkey, int child_type); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 9f9864b42c0b..7b89a7e13258 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -34,6 +34,7 @@ #include "ipoib.h" +#include #include #include @@ -98,6 +99,13 @@ static struct ib_client ipoib_client = { .remove = ipoib_remove_one }; +/* + * PKEY_HEXSTRING_MAXWIDTH - number of hex + * digits needed to represent max width of + * pkey value. + */ +#define PKEY_HEXSTRING_MAXWIDTH 4 + int ipoib_open(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); @@ -1498,6 +1506,126 @@ int ipoib_add_umcast_attr(struct net_device *dev) return device_create_file(&dev->dev, &dev_attr_umcast); } +/* + * Check if a buffer has name of the format + * + * .<4hexcharacters> + * e.g. ib1.8004 etc. + * + * Such names are generated by create_child() by + * concatenating parent device with 16-bit pkey + * in hex, and disallowed from usage with + * create_named_child() interface. + * + */ +static bool ipoib_disallowed_named_child_namespace(const char *buf) +{ + char localbuf[IFNAMSIZ]; + char *dotp = NULL; + char *buf_before_dot = NULL; + char *buf_after_dot = NULL; + unsigned int ii; + + memcpy(localbuf, buf, IFNAMSIZ); + localbuf[IFNAMSIZ-1] = '\0'; /* paranoia! */ + + dotp = strnchr(localbuf, IFNAMSIZ, '.'); + /* no dot or dot at end! */ + if (dotp == NULL || dotp == localbuf+IFNAMSIZ-2) + return false; + + *dotp = '\0'; /* split buffer at "dot" */ + buf_before_dot = localbuf; + buf_after_dot = dotp + 1; + + /* + * Check if buf_after_dot is hexstring of width + * that could be a pkey! + */ + if (strlen(buf_after_dot) != PKEY_HEXSTRING_MAXWIDTH) + return false; + + for (ii = 0; ii < PKEY_HEXSTRING_MAXWIDTH; ii++) { + if (!isxdigit(buf_after_dot[ii])) + return false; + } + + /* + * (1) buf_after_dot check above makes it valid hexdigit .XXXX format + * + * Now verify if buf_before_dot is a valid net device name - + * (if it is not, then we are not in disallowed namespace) + */ + if (__dev_get_by_name(&init_net, buf_before_dot) == NULL) + return false; + + /* + * (2) buf_before_dot is valid net device name + * - reserved namespace is being used! + * + * Note: No check on netdev->type to be ARPHRD_INFINIBAND etc + * We implicitly treat even misleading names such as eth1.XXXX + * (ethernet device prefix) for child interface name of an + * infiniband device as intrusion of reserved namespace! + */ + return true; +} + +static int parse_named_child(struct device *dev, const char *buf, + char *child_name_buf, int *pkeyp) +{ + int ret; + struct ipoib_dev_priv *priv = netdev_priv(to_net_dev(dev)); + + if (pkeyp) + *pkeyp = -1; + + /* + * First parameter is child interface name, after that + * 'pkey' is required if we were passed a pkey buffer + * (Note: From create_named_child, we are passed a pkey + * buffer to parse input, from delete_named_child we are + * not!) + * Note: IFNAMSIZ is 16, allowing for tail null + * we only scan 15 characters for name. + */ + if (pkeyp) { + ret = sscanf(buf, "%15s %i", child_name_buf, pkeyp); + if (ret != 2) + return -EINVAL; + } else { + ret = sscanf(buf, "%15s", child_name_buf); + if (ret != 1) + return -EINVAL; + } + + if (strlen(child_name_buf) <= 0 || !dev_valid_name(child_name_buf)) + return -EINVAL; + + if (pkeyp && (*pkeyp <= 0 || *pkeyp > 0xffff || *pkeyp == 0x8000)) + return -EINVAL; + + if (ipoib_disallowed_named_child_namespace(child_name_buf)) { + pr_warn("child name %s not allowed to be used " + "with create_named_child as it uses " + ".XXXX format reserved " + "for create_child/delete_child interfaces!\n", + child_name_buf); + return -EINVAL; + } + + if (pkeyp) + ipoib_dbg(priv, "parse_named_child inp %s out " + "child_name_buf %s, pkey %04x\n", + buf, child_name_buf, *pkeyp); + else + ipoib_dbg(priv, "parse_named_child inp %s out " + "child_name_buf %s\n", + buf, child_name_buf); + return 0; +} + + static ssize_t create_child(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) @@ -1543,6 +1671,44 @@ static ssize_t delete_child(struct device *dev, } static DEVICE_ATTR(delete_child, S_IWUSR, NULL, delete_child); +static ssize_t create_named_child(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + int pkey; + char child_name[IFNAMSIZ]; + int ret = 0; + + child_name[0] = '\0'; + + if (parse_named_child(dev, buf, child_name, &pkey)) + return -EINVAL; + + ret = ipoib_named_vlan_add(to_net_dev(dev), pkey, child_name); + return ret ? ret : count; +} +static DEVICE_ATTR(create_named_child, S_IWUSR, NULL, create_named_child); + +static ssize_t delete_named_child(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + char child_name[IFNAMSIZ]; + int ret = 0; + + child_name[0] = '\0'; + + if (parse_named_child(dev, buf, child_name, NULL)) + return -EINVAL; + + ret = ipoib_named_vlan_delete(to_net_dev(dev), child_name); + + return ret ? ret : count; + +} +static DEVICE_ATTR(delete_named_child, S_IWUSR, NULL, delete_named_child); + + int ipoib_add_pkey_attr(struct net_device *dev) { return device_create_file(&dev->dev, &dev_attr_pkey); @@ -1681,6 +1847,11 @@ static struct net_device *ipoib_add_port(const char *format, goto sysfs_failed; if (device_create_file(&priv->dev->dev, &dev_attr_delete_child)) goto sysfs_failed; + if (device_create_file(&priv->dev->dev, &dev_attr_create_named_child)) + goto sysfs_failed; + if (device_create_file(&priv->dev->dev, &dev_attr_delete_named_child)) + goto sysfs_failed; + return priv->dev; @@ -1782,6 +1953,26 @@ static void ipoib_remove_one(struct ib_device *device) kfree(dev_list); } +int +ipoib_get_netdev_pkey(struct net_device *dev, u16 *pkey) +{ + struct ipoib_dev_priv *priv; + + if (dev->type != ARPHRD_INFINIBAND) + return -EINVAL; + + /* only for ipoib net devices! */ + if (dev->netdev_ops != &ipoib_netdev_ops) + return -EINVAL; + + priv = netdev_priv(dev); + + *pkey = priv->pkey; + + return 0; +} +EXPORT_SYMBOL(ipoib_get_netdev_pkey); + static int __init ipoib_init_module(void) { int ret; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c index fca1a882de27..2dddd0c8b0b8 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c @@ -117,7 +117,9 @@ err: return result; } -int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey) +int ipoib_vlan_add_common(struct net_device *pdev, + unsigned short pkey, + char *child_name_buf) { struct ipoib_dev_priv *ppriv, *priv; char intf_name[IFNAMSIZ]; @@ -129,8 +131,22 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey) ppriv = netdev_priv(pdev); - snprintf(intf_name, sizeof intf_name, "%s.%04x", - ppriv->dev->name, pkey); + if (child_name_buf == NULL) { + /* + * If child name is not provided, we generated + * one using name of parent and pkey. + */ + snprintf(intf_name, sizeof(intf_name), "%s.%04x", + ppriv->dev->name, pkey); + } else { + /* + * Note: Duplicate intf_name will be detected later in the code + * by register_netdevice() (inside __ipoib_vlan_add() call + * below) returning EEXIST! + */ + strncpy(intf_name, child_name_buf, IFNAMSIZ); + } + priv = ipoib_intf_alloc(intf_name); if (!priv) return -ENOMEM; @@ -171,10 +187,27 @@ out: return result; } -int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey) +int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey) +{ + return ipoib_vlan_add_common(pdev, pkey, NULL); +} + +int ipoib_named_vlan_add(struct net_device *pdev, + unsigned short pkey, + char *child_name_buf) +{ + return ipoib_vlan_add_common(pdev, pkey, child_name_buf); +} + +int ipoib_vlan_delete_common(struct net_device *pdev, + unsigned short pkey, + char *child_name_buf) { struct ipoib_dev_priv *ppriv, *priv, *tpriv; struct net_device *dev = NULL; + char gen_intf_name[IFNAMSIZ]; + + gen_intf_name[0] = '\0'; /* initialize - paranoia! */ if (!capable(CAP_NET_ADMIN)) return -EPERM; @@ -185,9 +218,30 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey) return restart_syscall(); down_write(&ppriv->vlan_rwsem); + if (child_name_buf == NULL && ppriv->dev) { + /* + * If child name is not provided, we generate the + * expected one using name of parent and pkey + * and use it in addition to pkey value + * (other children with same pkey may exist that have + * created by create_named_child() - we do not allow + * delete_child() to delete them - delete_named_child() + * has to be used!) + */ + snprintf(gen_intf_name, sizeof(gen_intf_name), + "%s.%04x", ppriv->dev->name, pkey); + } list_for_each_entry_safe(priv, tpriv, &ppriv->child_intfs, list) { - if (priv->pkey == pkey && - priv->child_type == IPOIB_LEGACY_CHILD) { + if ((priv->child_type == IPOIB_LEGACY_CHILD) && + /* user named child (match by name) OR */ + ((child_name_buf && priv->dev && + !strcmp(child_name_buf, priv->dev->name)) || + /* + * OR classic (devname.hexpkey generated name) child + * (match by pkey and generated name) + */ + (!child_name_buf && priv->pkey == pkey && + priv->dev && !strcmp(gen_intf_name, priv->dev->name)))) { unregister_netdevice(priv->dev); list_del(&priv->list); dev = priv->dev; @@ -205,3 +259,14 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey) return -ENODEV; } + +int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey) +{ + + return ipoib_vlan_delete_common(pdev, pkey, NULL); +} + +int ipoib_named_vlan_delete(struct net_device *pdev, char *child_name_buf) +{ + return ipoib_vlan_delete_common(pdev, 0, child_name_buf); +} diff --git a/include/net/ipoib/if_ipoib.h b/include/net/ipoib/if_ipoib.h new file mode 100644 index 000000000000..9c12cafef08f --- /dev/null +++ b/include/net/ipoib/if_ipoib.h @@ -0,0 +1,14 @@ +/* + * Copyright (c) 2014 Oracle Inc. All rights reserved. + */ + +#ifndef _NET_IPOIB_IF_H +#define _NET_IPOIB_IF_H + +#ifdef __KERNEL__ +#include +#include +extern int ipoib_get_netdev_pkey(struct net_device *dev, u16 *pkey); +#endif /* __KERNEL__ */ + +#endif /* _NET_IPOIB_IF_H */ -- 2.50.1