From dfe75ff8ca74f54b0fa5a326a1aa9afa485ed802 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Wed, 1 Feb 2017 21:01:54 +0100 Subject: netfilter: nf_ct_helper: warn when not applying default helper assignment Commit 3bb398d925 ("netfilter: nf_ct_helper: disable automatic helper assignment") is causing behavior regressions in firewalls, as traffic handled by conntrack helpers is now by default not passed through even though it was before due to missing CT targets (which were not necessary before this commit). The default had to be switched off due to security reasons [1] [2] and therefore should stay the way it is, but let's be friendly to firewall admins and issue a warning the first time we're in situation where packet would be likely passed through with the old default but we're likely going to drop it on the floor now. Rewrite the code a little bit as suggested by Linus, so that we avoid spaghettiing the code even more -- namely the whole decision making process regarding helper selection (either automatic or not) is being separated, so that the whole logic can be simplified and code (condition) duplication reduced. [1] https://cansecwest.com/csw12/conntrack-attack.pdf [2] https://home.regit.org/netfilter-en/secure-use-of-helpers/ Signed-off-by: Jiri Kosina Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_helper.c | 39 ++++++++++++++++++++++++------------- 1 file changed, 26 insertions(+), 13 deletions(-) diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 7341adf7059d..6dc44d9b4190 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -188,6 +188,26 @@ nf_ct_helper_ext_add(struct nf_conn *ct, } EXPORT_SYMBOL_GPL(nf_ct_helper_ext_add); +static struct nf_conntrack_helper * +nf_ct_lookup_helper(struct nf_conn *ct, struct net *net) +{ + if (!net->ct.sysctl_auto_assign_helper) { + if (net->ct.auto_assign_helper_warned) + return NULL; + if (!__nf_ct_helper_find(&ct->tuplehash[IP_CT_DIR_REPLY].tuple)) + return NULL; + pr_info("nf_conntrack: default automatic helper assignment " + "has been turned off for security reasons and CT-based " + " firewall rule not found. Use the iptables CT target " + "to attach helpers instead.\n"); + net->ct.auto_assign_helper_warned = 1; + return NULL; + } + + return __nf_ct_helper_find(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); +} + + int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl, gfp_t flags) { @@ -213,21 +233,14 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl, } help = nfct_help(ct); - if (net->ct.sysctl_auto_assign_helper && helper == NULL) { - helper = __nf_ct_helper_find(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); - if (unlikely(!net->ct.auto_assign_helper_warned && helper)) { - pr_info("nf_conntrack: automatic helper " - "assignment is deprecated and it will " - "be removed soon. Use the iptables CT target " - "to attach helpers instead.\n"); - net->ct.auto_assign_helper_warned = true; - } - } if (helper == NULL) { - if (help) - RCU_INIT_POINTER(help->helper, NULL); - return 0; + helper = nf_ct_lookup_helper(ct, net); + if (helper == NULL) { + if (help) + RCU_INIT_POINTER(help->helper, NULL); + return 0; + } } if (help == NULL) { -- cgit v1.2.3 From a963d710f367f68cd13d562a07db55ccb8daade9 Mon Sep 17 00:00:00 2001 From: Kevin Cernekee Date: Thu, 26 Jan 2017 14:49:43 -0800 Subject: netfilter: ctnetlink: Fix regression in CTA_STATUS processing The libnetfilter_conntrack userland library always sets IPS_CONFIRMED when building a CTA_STATUS attribute. If this toggles the bit from 0->1, the parser will return an error. On Linux 4.4+ this will cause any NFQA_EXP attribute in the packet to be ignored. This breaks conntrackd's userland helpers because they operate on unconfirmed connections. Instead of returning -EBUSY if the user program asks to modify an unchangeable bit, simply ignore the change. Also, fix the logic so that user programs are allowed to clear the bits that they are allowed to change. Signed-off-by: Kevin Cernekee Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_conntrack_common.h | 4 ++++ net/netfilter/nf_conntrack_netlink.c | 26 +++++++++++++++++++++- 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h index 6d074d14ee27..6a8e33dd4ecb 100644 --- a/include/uapi/linux/netfilter/nf_conntrack_common.h +++ b/include/uapi/linux/netfilter/nf_conntrack_common.h @@ -82,6 +82,10 @@ enum ip_conntrack_status { IPS_DYING_BIT = 9, IPS_DYING = (1 << IPS_DYING_BIT), + /* Bits that cannot be altered from userland. */ + IPS_UNCHANGEABLE_MASK = (IPS_NAT_DONE_MASK | IPS_NAT_MASK | + IPS_EXPECTED | IPS_CONFIRMED | IPS_DYING), + /* Connection has fixed timeout. */ IPS_FIXED_TIMEOUT_BIT = 10, IPS_FIXED_TIMEOUT = (1 << IPS_FIXED_TIMEOUT_BIT), diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 27540455dc62..bf04b7e9d6f7 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -2269,6 +2269,30 @@ nla_put_failure: return -ENOSPC; } +static int +ctnetlink_update_status(struct nf_conn *ct, const struct nlattr * const cda[]) +{ + unsigned int status = ntohl(nla_get_be32(cda[CTA_STATUS])); + unsigned long d = ct->status ^ status; + + if (d & IPS_SEEN_REPLY && !(status & IPS_SEEN_REPLY)) + /* SEEN_REPLY bit can only be set */ + return -EBUSY; + + if (d & IPS_ASSURED && !(status & IPS_ASSURED)) + /* ASSURED bit can only be set */ + return -EBUSY; + + /* This check is less strict than ctnetlink_change_status() + * because callers often flip IPS_EXPECTED bits when sending + * an NFQA_CT attribute to the kernel. So ignore the + * unchangeable bits but do not error out. + */ + ct->status = (status & ~IPS_UNCHANGEABLE_MASK) | + (ct->status & IPS_UNCHANGEABLE_MASK); + return 0; +} + static int ctnetlink_glue_parse_ct(const struct nlattr *cda[], struct nf_conn *ct) { @@ -2280,7 +2304,7 @@ ctnetlink_glue_parse_ct(const struct nlattr *cda[], struct nf_conn *ct) return err; } if (cda[CTA_STATUS]) { - err = ctnetlink_change_status(ct, cda); + err = ctnetlink_update_status(ct, cda); if (err < 0) return err; } -- cgit v1.2.3 From f95d7a46bc5722767c30ee223c8b67dd0f2e8793 Mon Sep 17 00:00:00 2001 From: Kevin Cernekee Date: Thu, 26 Jan 2017 14:49:44 -0800 Subject: netfilter: ctnetlink: Fix regression in CTA_HELP processing Prior to Linux 4.4, it was usually harmless to send a CTA_HELP attribute containing the name of the current helper. That is no longer the case: as of Linux 4.4, if ctnetlink_change_helper() returns an error from the ct->master check, processing of the request will fail, skipping the NFQA_EXP attribute (if present). This patch changes the behavior to improve compatibility with user programs that expect the kernel interface to work the way it did prior to Linux 4.4. If a user program specifies CTA_HELP but the argument matches the current conntrack helper name, ignore it instead of generating an error. Signed-off-by: Kevin Cernekee Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_netlink.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index bf04b7e9d6f7..6806b5e73567 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1478,14 +1478,23 @@ static int ctnetlink_change_helper(struct nf_conn *ct, struct nlattr *helpinfo = NULL; int err; - /* don't change helper of sibling connections */ - if (ct->master) - return -EBUSY; - err = ctnetlink_parse_help(cda[CTA_HELP], &helpname, &helpinfo); if (err < 0) return err; + /* don't change helper of sibling connections */ + if (ct->master) { + /* If we try to change the helper to the same thing twice, + * treat the second attempt as a no-op instead of returning + * an error. + */ + if (help && help->helper && + !strcmp(help->helper->name, helpname)) + return 0; + else + return -EBUSY; + } + if (!strcmp(helpname, "")) { if (help && help->helper) { /* we had a helper before ... */ -- cgit v1.2.3 From 50054a9223aaf79985c55ef7dd090ced15581567 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Thu, 16 Feb 2017 20:47:30 +0100 Subject: Fix bug: sometimes valid entries in hash:* types of sets were evicted Wrong index was used and therefore when shrinking a hash bucket at deleting an entry, valid entries could be evicted as well. Thanks to Eric Ewanco for the thorough bugreport. Fixes netfilter bugzilla #1119 Signed-off-by: Jozsef Kadlecsik --- net/netfilter/ipset/ip_set_hash_gen.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 1b05d4a7d5a1..f236c0bc7b3f 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -897,7 +897,7 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, continue; data = ahash_data(n, j, dsize); memcpy(tmp->value + k * dsize, data, dsize); - set_bit(j, tmp->used); + set_bit(k, tmp->used); k++; } tmp->pos = k; -- cgit v1.2.3 From 40b446a1d8af17274746ff7079aa0a618dffbac3 Mon Sep 17 00:00:00 2001 From: Vishwanath Pai Date: Thu, 16 Feb 2017 20:55:45 +0100 Subject: netfilter: ipset: Null pointer exception in ipset list:set If we use before/after to add an element to an empty list it will cause a kernel panic. $> cat crash.restore create a hash:ip create b hash:ip create test list:set timeout 5 size 4 add test b before a $> ipset -R < crash.restore Executing the above will crash the kernel. Signed-off-by: Vishwanath Pai Reviewed-by: Josh Hunt Signed-off-by: Jozsef Kadlecsik --- net/netfilter/ipset/ip_set_list_set.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index 51077c53d76b..178d4eba013b 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -260,11 +260,14 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext, else prev = e; } + + /* If before/after is used on an empty set */ + if ((d->before > 0 && !next) || + (d->before < 0 && !prev)) + return -IPSET_ERR_REF_EXIST; + /* Re-add already existing element */ if (n) { - if ((d->before > 0 && !next) || - (d->before < 0 && !prev)) - return -IPSET_ERR_REF_EXIST; if (!flag_exist) return -IPSET_ERR_EXIST; /* Update extensions */ -- cgit v1.2.3 From ad5b55761956427f61ed9c96961bf9c5cd4f92dc Mon Sep 17 00:00:00 2001 From: Alban Browaeys Date: Mon, 6 Feb 2017 23:50:33 +0100 Subject: netfilter: xt_hashlimit: Fix integer divide round to zero. Diving the divider by the multiplier before applying to the input. When this would "divide by zero", divide the multiplier by the divider first then multiply the input by this value. Currently user2creds outputs zero when input value is bigger than the number of slices and lower than scale. This as then user input is applied an integer divide operation to a number greater than itself (scale). That rounds up to zero, then we multiply zero by the credits slice size. iptables -t filter -I INPUT --protocol tcp --match hashlimit --hashlimit 40/second --hashlimit-burst 20 --hashlimit-mode srcip --hashlimit-name syn-flood --jump RETURN thus trigger the overflow detection code: xt_hashlimit: overflow, try lower: 25000/20 (25000 as hashlimit avg and 20 the burst) Here: 134217 slices of (HZ * CREDITS_PER_JIFFY) size. 500000 is user input value 1000000 is XT_HASHLIMIT_SCALE_v2 gives: 0 as user2creds output Setting burst to "1" typically solve the issue ... but setting it to "40" does too ! This is on 32bit arch calling into revision 2 of hashlimit. Signed-off-by: Alban Browaeys Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_hashlimit.c | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index 10063408141d..84ad5ab34558 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -463,23 +463,16 @@ static u32 xt_hashlimit_len_to_chunks(u32 len) /* Precision saver. */ static u64 user2credits(u64 user, int revision) { - if (revision == 1) { - /* If multiplying would overflow... */ - if (user > 0xFFFFFFFF / (HZ*CREDITS_PER_JIFFY_v1)) - /* Divide first. */ - return div64_u64(user, XT_HASHLIMIT_SCALE) - * HZ * CREDITS_PER_JIFFY_v1; - - return div64_u64(user * HZ * CREDITS_PER_JIFFY_v1, - XT_HASHLIMIT_SCALE); - } else { - if (user > 0xFFFFFFFFFFFFFFFFULL / (HZ*CREDITS_PER_JIFFY)) - return div64_u64(user, XT_HASHLIMIT_SCALE_v2) - * HZ * CREDITS_PER_JIFFY; + u64 scale = (revision == 1) ? + XT_HASHLIMIT_SCALE : XT_HASHLIMIT_SCALE_v2; + u64 cpj = (revision == 1) ? + CREDITS_PER_JIFFY_v1 : CREDITS_PER_JIFFY; - return div64_u64(user * HZ * CREDITS_PER_JIFFY, - XT_HASHLIMIT_SCALE_v2); - } + /* Avoid overflow: divide the constant operands first */ + if (scale >= HZ * cpj) + return div64_u64(user, div64_u64(scale, HZ * cpj)); + + return user * div64_u64(HZ * cpj, scale); } static u32 user2credits_byte(u32 user) -- cgit v1.2.3 From ba896a05ad9375912ccebdac9623aab97845600f Mon Sep 17 00:00:00 2001 From: Ken-ichirou MATSUZAWA Date: Thu, 16 Feb 2017 18:20:33 +0900 Subject: netfilter: nfnetlink_queue: fix NFQA_VLAN_MAX definition Should be - 1 as in other _MAX definitions. Signed-off-by: Ken-ichirou MATSUZAWA Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nfnetlink_queue.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/netfilter/nfnetlink_queue.h b/include/uapi/linux/netfilter/nfnetlink_queue.h index ae30841ff94e..d42f0396fe30 100644 --- a/include/uapi/linux/netfilter/nfnetlink_queue.h +++ b/include/uapi/linux/netfilter/nfnetlink_queue.h @@ -36,7 +36,7 @@ enum nfqnl_vlan_attr { NFQA_VLAN_TCI, /* __be16 skb htons(vlan_tci) */ __NFQA_VLAN_MAX, }; -#define NFQA_VLAN_MAX (__NFQA_VLAN_MAX + 1) +#define NFQA_VLAN_MAX (__NFQA_VLAN_MAX - 1) enum nfqnl_attr_type { NFQA_UNSPEC, -- cgit v1.2.3 From 4eba8b78e1390f46828e00d6fa4d651c3d219894 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sat, 18 Feb 2017 10:35:47 +0800 Subject: netfilter: nfnetlink: remove static declaration from err_list Otherwise, different subsys will race to access the err_list, with holding the different nfnl_lock(subsys_id). But this will not happen now, since ->call_batch is only implemented by nftables, so the err_list is protected by nfnl_lock(NFNL_SUBSYS_NFTABLES). Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index a09fa9fd8f3d..6fa448478cba 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -279,7 +279,7 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh, struct net *net = sock_net(skb->sk); const struct nfnetlink_subsystem *ss; const struct nfnl_callback *nc; - static LIST_HEAD(err_list); + LIST_HEAD(err_list); u32 status; int err; -- cgit v1.2.3 From a577ca6badb5261d3fa1bcbb3332ae7874f1a3a2 Mon Sep 17 00:00:00 2001 From: Christian Lamparter Date: Mon, 20 Feb 2017 20:10:58 +0100 Subject: net: emac: add support for device-tree based PHY discovery and setup This patch adds glue-code that allows the EMAC driver to interface with the existing dt-supported PHYs in drivers/net/phy. Because currently, the emac driver maintains a small library of supported phys for in a private phy.c file located in the drivers directory. The support is limited to mostly single ethernet transceiver like the: CIS8201, BCM5248, ET1011C, Marvell 88E1111 and 88E1112, AR8035. However, routers like the Netgear WNDR4700 and Cisco Meraki MX60(W) have a 5-port switch (AR8327N) attached to the EMAC. The switch chip is supported by the qca8k mdio driver, which uses the generic phy library. Another reason is that PHYLIB also supports the BCM54610, which was used for the Western Digital My Book Live. This will now also make EMAC select PHYLIB. Signed-off-by: Christian Lamparter Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/emac/Kconfig | 1 + drivers/net/ethernet/ibm/emac/core.c | 254 +++++++++++++++++++++++++++++++++- drivers/net/ethernet/ibm/emac/core.h | 4 + 3 files changed, 252 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/ibm/emac/Kconfig b/drivers/net/ethernet/ibm/emac/Kconfig index 3f44a30e0615..90d49191beb3 100644 --- a/drivers/net/ethernet/ibm/emac/Kconfig +++ b/drivers/net/ethernet/ibm/emac/Kconfig @@ -2,6 +2,7 @@ config IBM_EMAC tristate "IBM EMAC Ethernet support" depends on PPC_DCR select CRC32 + select PHYLIB help This driver supports the IBM EMAC family of Ethernet controllers typically found on 4xx embedded PowerPC chips, but also on the diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c index 6ead2335a169..275c2e2349ad 100644 --- a/drivers/net/ethernet/ibm/emac/core.c +++ b/drivers/net/ethernet/ibm/emac/core.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include @@ -2420,6 +2421,219 @@ static int emac_read_uint_prop(struct device_node *np, const char *name, return 0; } +static void emac_adjust_link(struct net_device *ndev) +{ + struct emac_instance *dev = netdev_priv(ndev); + struct phy_device *phy = dev->phy_dev; + + dev->phy.autoneg = phy->autoneg; + dev->phy.speed = phy->speed; + dev->phy.duplex = phy->duplex; + dev->phy.pause = phy->pause; + dev->phy.asym_pause = phy->asym_pause; + dev->phy.advertising = phy->advertising; +} + +static int emac_mii_bus_read(struct mii_bus *bus, int addr, int regnum) +{ + int ret = emac_mdio_read(bus->priv, addr, regnum); + /* This is a workaround for powered down ports/phys. + * In the wild, this was seen on the Cisco Meraki MX60(W). + * This hardware disables ports as part of the handoff + * procedure. Accessing the ports will lead to errors + * (-ETIMEDOUT, -EREMOTEIO) that do more harm than good. + */ + return ret < 0 ? 0xffff : ret; +} + +static int emac_mii_bus_write(struct mii_bus *bus, int addr, + int regnum, u16 val) +{ + emac_mdio_write(bus->priv, addr, regnum, val); + return 0; +} + +static int emac_mii_bus_reset(struct mii_bus *bus) +{ + struct emac_instance *dev = netdev_priv(bus->priv); + + return emac_reset(dev); +} + +static int emac_mdio_setup_aneg(struct mii_phy *phy, u32 advertise) +{ + struct net_device *ndev = phy->dev; + struct emac_instance *dev = netdev_priv(ndev); + + dev->phy.autoneg = AUTONEG_ENABLE; + dev->phy.speed = SPEED_1000; + dev->phy.duplex = DUPLEX_FULL; + dev->phy.advertising = advertise; + phy->autoneg = AUTONEG_ENABLE; + phy->speed = dev->phy.speed; + phy->duplex = dev->phy.duplex; + phy->advertising = advertise; + return phy_start_aneg(dev->phy_dev); +} + +static int emac_mdio_setup_forced(struct mii_phy *phy, int speed, int fd) +{ + struct net_device *ndev = phy->dev; + struct emac_instance *dev = netdev_priv(ndev); + + dev->phy.autoneg = AUTONEG_DISABLE; + dev->phy.speed = speed; + dev->phy.duplex = fd; + phy->autoneg = AUTONEG_DISABLE; + phy->speed = speed; + phy->duplex = fd; + return phy_start_aneg(dev->phy_dev); +} + +static int emac_mdio_poll_link(struct mii_phy *phy) +{ + struct net_device *ndev = phy->dev; + struct emac_instance *dev = netdev_priv(ndev); + int res; + + res = phy_read_status(dev->phy_dev); + if (res) { + dev_err(&dev->ofdev->dev, "link update failed (%d).", res); + return ethtool_op_get_link(ndev); + } + + return dev->phy_dev->link; +} + +static int emac_mdio_read_link(struct mii_phy *phy) +{ + struct net_device *ndev = phy->dev; + struct emac_instance *dev = netdev_priv(ndev); + int res; + + res = phy_read_status(dev->phy_dev); + if (res) + return res; + + dev->phy.speed = phy->speed; + dev->phy.duplex = phy->duplex; + dev->phy.pause = phy->pause; + dev->phy.asym_pause = phy->asym_pause; + return 0; +} + +static int emac_mdio_init_phy(struct mii_phy *phy) +{ + struct net_device *ndev = phy->dev; + struct emac_instance *dev = netdev_priv(ndev); + + phy_start(dev->phy_dev); + dev->phy.autoneg = phy->autoneg; + dev->phy.speed = phy->speed; + dev->phy.duplex = phy->duplex; + dev->phy.advertising = phy->advertising; + dev->phy.pause = phy->pause; + dev->phy.asym_pause = phy->asym_pause; + + return phy_init_hw(dev->phy_dev); +} + +static const struct mii_phy_ops emac_dt_mdio_phy_ops = { + .init = emac_mdio_init_phy, + .setup_aneg = emac_mdio_setup_aneg, + .setup_forced = emac_mdio_setup_forced, + .poll_link = emac_mdio_poll_link, + .read_link = emac_mdio_read_link, +}; + +static int emac_dt_mdio_probe(struct emac_instance *dev) +{ + struct device_node *mii_np; + int res; + + mii_np = of_get_child_by_name(dev->ofdev->dev.of_node, "mdio"); + if (!mii_np) { + dev_err(&dev->ofdev->dev, "no mdio definition found."); + return -ENODEV; + } + + if (!of_device_is_available(mii_np)) { + res = -ENODEV; + goto put_node; + } + + dev->mii_bus = devm_mdiobus_alloc(&dev->ofdev->dev); + if (!dev->mii_bus) { + res = -ENOMEM; + goto put_node; + } + + dev->mii_bus->priv = dev->ndev; + dev->mii_bus->parent = dev->ndev->dev.parent; + dev->mii_bus->name = "emac_mdio"; + dev->mii_bus->read = &emac_mii_bus_read; + dev->mii_bus->write = &emac_mii_bus_write; + dev->mii_bus->reset = &emac_mii_bus_reset; + snprintf(dev->mii_bus->id, MII_BUS_ID_SIZE, "%s", dev->ofdev->name); + res = of_mdiobus_register(dev->mii_bus, mii_np); + if (res) { + dev_err(&dev->ofdev->dev, "cannot register MDIO bus %s (%d)", + dev->mii_bus->name, res); + } + + put_node: + of_node_put(mii_np); + return res; +} + +static int emac_dt_phy_connect(struct emac_instance *dev, + struct device_node *phy_handle) +{ + int res; + + dev->phy.def = devm_kzalloc(&dev->ofdev->dev, sizeof(*dev->phy.def), + GFP_KERNEL); + if (!dev->phy.def) + return -ENOMEM; + + dev->phy_dev = of_phy_connect(dev->ndev, phy_handle, &emac_adjust_link, + 0, dev->phy_mode); + if (!dev->phy_dev) { + dev_err(&dev->ofdev->dev, "failed to connect to PHY.\n"); + return -ENODEV; + } + + dev->phy.def->phy_id = dev->phy_dev->drv->phy_id; + dev->phy.def->phy_id_mask = dev->phy_dev->drv->phy_id_mask; + dev->phy.def->name = dev->phy_dev->drv->name; + dev->phy.def->ops = &emac_dt_mdio_phy_ops; + dev->phy.features = dev->phy_dev->supported; + dev->phy.address = dev->phy_dev->mdio.addr; + dev->phy.mode = dev->phy_dev->interface; + return 0; +} + +static int emac_dt_phy_probe(struct emac_instance *dev) +{ + struct device_node *np = dev->ofdev->dev.of_node; + struct device_node *phy_handle; + int res = 0; + + phy_handle = of_parse_phandle(np, "phy-handle", 0); + + if (phy_handle) { + res = emac_dt_mdio_probe(dev); + if (!res) { + res = emac_dt_phy_connect(dev, phy_handle); + if (res) + mdiobus_unregister(dev->mii_bus); + } + } + + of_node_put(phy_handle); + return res; +} + static int emac_init_phy(struct emac_instance *dev) { struct device_node *np = dev->ofdev->dev.of_node; @@ -2430,15 +2644,12 @@ static int emac_init_phy(struct emac_instance *dev) dev->phy.dev = ndev; dev->phy.mode = dev->phy_mode; - /* PHY-less configuration. - * XXX I probably should move these settings to the dev tree - */ - if (dev->phy_address == 0xffffffff && dev->phy_map == 0xffffffff) { + /* PHY-less configuration. */ + if ((dev->phy_address == 0xffffffff && dev->phy_map == 0xffffffff) || + of_phy_is_fixed_link(np)) { emac_reset(dev); - /* PHY-less configuration. - * XXX I probably should move these settings to the dev tree - */ + /* PHY-less configuration. */ dev->phy.address = -1; dev->phy.features = SUPPORTED_MII; if (emac_phy_supports_gige(dev->phy_mode)) @@ -2447,6 +2658,16 @@ static int emac_init_phy(struct emac_instance *dev) dev->phy.features |= SUPPORTED_100baseT_Full; dev->phy.pause = 1; + if (of_phy_is_fixed_link(np)) { + int res = emac_dt_mdio_probe(dev); + + if (!res) { + res = of_phy_register_fixed_link(np); + if (res) + mdiobus_unregister(dev->mii_bus); + } + return res; + } return 0; } @@ -2490,6 +2711,18 @@ static int emac_init_phy(struct emac_instance *dev) emac_configure(dev); + if (emac_has_feature(dev, EMAC_FTR_HAS_RGMII)) { + int res = emac_dt_phy_probe(dev); + + mutex_unlock(&emac_phy_map_lock); + if (!res) + goto init_phy; + + dev_err(&dev->ofdev->dev, "failed to attach dt phy (%d).\n", + res); + return res; + } + if (dev->phy_address != 0xffffffff) phy_map = ~(1 << dev->phy_address); @@ -2517,6 +2750,7 @@ static int emac_init_phy(struct emac_instance *dev) return -ENXIO; } + init_phy: /* Init PHY */ if (dev->phy.def->ops->init) dev->phy.def->ops->init(&dev->phy); @@ -2988,6 +3222,12 @@ static int emac_remove(struct platform_device *ofdev) if (emac_has_feature(dev, EMAC_FTR_HAS_ZMII)) zmii_detach(dev->zmii_dev, dev->zmii_port); + if (dev->phy_dev) + phy_disconnect(dev->phy_dev); + + if (dev->mii_bus) + mdiobus_unregister(dev->mii_bus); + busy_phy_map &= ~(1 << dev->phy.address); DBG(dev, "busy_phy_map now %#x" NL, busy_phy_map); diff --git a/drivers/net/ethernet/ibm/emac/core.h b/drivers/net/ethernet/ibm/emac/core.h index 93ae11494810..0710a6685489 100644 --- a/drivers/net/ethernet/ibm/emac/core.h +++ b/drivers/net/ethernet/ibm/emac/core.h @@ -199,6 +199,10 @@ struct emac_instance { struct emac_instance *mdio_instance; struct mutex mdio_lock; + /* Device-tree based phy configuration */ + struct mii_bus *mii_bus; + struct phy_device *phy_dev; + /* ZMII infos if any */ u32 zmii_ph; u32 zmii_port; -- cgit v1.2.3 From 0ccea2210c7ed486c688cd05fd1b674d95ef9894 Mon Sep 17 00:00:00 2001 From: Bhumika Goyal Date: Wed, 22 Feb 2017 00:11:17 +0530 Subject: qlogic: qlcnic_sysfs: constify bin_attribute structures Declare bin_attribute structures as const as they are only passed as an arguments to the functions device_remove_bin_file and device_create_bin_file. These function arguments are of type const, so bin_attribute structures having this property can be made const too. Done using Coccinelle: @r1 disable optional_qualifier @ identifier i; position p; @@ static struct bin_attribute i@p = {...}; @ok1@ identifier r1.i; position p,p1; @@ ( device_remove_bin_file(...,&i@p) | device_create_bin_file(..., &i@p1) ) @bad@ position p!={r1.p,ok1.p,ok1.p1}; identifier r1.i; @@ i@p @depends on !bad disable optional_qualifier@ identifier r1.i; @@ +const struct bin_attribute i; Signed-off-by: Bhumika Goyal Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c index ccbb04503b27..73027a6c06c7 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c @@ -1192,56 +1192,56 @@ static struct device_attribute dev_attr_beacon = { .store = qlcnic_store_beacon, }; -static struct bin_attribute bin_attr_crb = { +static const struct bin_attribute bin_attr_crb = { .attr = {.name = "crb", .mode = (S_IRUGO | S_IWUSR)}, .size = 0, .read = qlcnic_sysfs_read_crb, .write = qlcnic_sysfs_write_crb, }; -static struct bin_attribute bin_attr_mem = { +static const struct bin_attribute bin_attr_mem = { .attr = {.name = "mem", .mode = (S_IRUGO | S_IWUSR)}, .size = 0, .read = qlcnic_sysfs_read_mem, .write = qlcnic_sysfs_write_mem, }; -static struct bin_attribute bin_attr_npar_config = { +static const struct bin_attribute bin_attr_npar_config = { .attr = {.name = "npar_config", .mode = (S_IRUGO | S_IWUSR)}, .size = 0, .read = qlcnic_sysfs_read_npar_config, .write = qlcnic_sysfs_write_npar_config, }; -static struct bin_attribute bin_attr_pci_config = { +static const struct bin_attribute bin_attr_pci_config = { .attr = {.name = "pci_config", .mode = (S_IRUGO | S_IWUSR)}, .size = 0, .read = qlcnic_sysfs_read_pci_config, .write = NULL, }; -static struct bin_attribute bin_attr_port_stats = { +static const struct bin_attribute bin_attr_port_stats = { .attr = {.name = "port_stats", .mode = (S_IRUGO | S_IWUSR)}, .size = 0, .read = qlcnic_sysfs_get_port_stats, .write = qlcnic_sysfs_clear_port_stats, }; -static struct bin_attribute bin_attr_esw_stats = { +static const struct bin_attribute bin_attr_esw_stats = { .attr = {.name = "esw_stats", .mode = (S_IRUGO | S_IWUSR)}, .size = 0, .read = qlcnic_sysfs_get_esw_stats, .write = qlcnic_sysfs_clear_esw_stats, }; -static struct bin_attribute bin_attr_esw_config = { +static const struct bin_attribute bin_attr_esw_config = { .attr = {.name = "esw_config", .mode = (S_IRUGO | S_IWUSR)}, .size = 0, .read = qlcnic_sysfs_read_esw_config, .write = qlcnic_sysfs_write_esw_config, }; -static struct bin_attribute bin_attr_pm_config = { +static const struct bin_attribute bin_attr_pm_config = { .attr = {.name = "pm_config", .mode = (S_IRUGO | S_IWUSR)}, .size = 0, .read = qlcnic_sysfs_read_pm_config, -- cgit v1.2.3 From ff2924583b7211b062ee5433924256f99011b0d9 Mon Sep 17 00:00:00 2001 From: Bhumika Goyal Date: Wed, 22 Feb 2017 00:17:48 +0530 Subject: qlogic: netxen: constify bin_attribute structures Declare bin_attribute structures as const as they are only passed as an arguments to the functions device_remove_bin_file and device_create_bin_file. These function arguments are of type const, so bin_attribute structures having this property can be made const too. Done using Coccinelle: @r1 disable optional_qualifier @ identifier i; position p; @@ static struct bin_attribute i@p = {...}; @ok1@ identifier r1.i; position p,p1; @@ ( device_remove_bin_file(...,&i@p) | device_create_bin_file(..., &i@p1) ) @bad@ position p!={r1.p,ok1.p,ok1.p1}; identifier r1.i; @@ i@p @depends on !bad disable optional_qualifier@ identifier r1.i; @@ +const struct bin_attribute i; Signed-off-by: Bhumika Goyal Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c index 3b5d7cfa2321..827de838389f 100644 --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c @@ -3005,14 +3005,14 @@ static ssize_t netxen_sysfs_write_mem(struct file *filp, struct kobject *kobj, } -static struct bin_attribute bin_attr_crb = { +static const struct bin_attribute bin_attr_crb = { .attr = {.name = "crb", .mode = (S_IRUGO | S_IWUSR)}, .size = 0, .read = netxen_sysfs_read_crb, .write = netxen_sysfs_write_crb, }; -static struct bin_attribute bin_attr_mem = { +static const struct bin_attribute bin_attr_mem = { .attr = {.name = "mem", .mode = (S_IRUGO | S_IWUSR)}, .size = 0, .read = netxen_sysfs_read_mem, @@ -3141,7 +3141,7 @@ out: } -static struct bin_attribute bin_attr_dimm = { +static const struct bin_attribute bin_attr_dimm = { .attr = { .name = "dimm", .mode = (S_IRUGO | S_IWUSR) }, .size = sizeof(struct netxen_dimm_cfg), .read = netxen_sysfs_read_dimm, -- cgit v1.2.3 From 12d656af4e3d2781b9b9f52538593e1717e7c979 Mon Sep 17 00:00:00 2001 From: Ridge Kennedy Date: Wed, 22 Feb 2017 14:59:49 +1300 Subject: l2tp: Avoid schedule while atomic in exit_net While destroying a network namespace that contains a L2TP tunnel a "BUG: scheduling while atomic" can be observed. Enabling lockdep shows that this is happening because l2tp_exit_net() is calling l2tp_tunnel_closeall() (via l2tp_tunnel_delete()) from within an RCU critical section. l2tp_exit_net() takes rcu_read_lock_bh() << list_for_each_entry_rcu() >> l2tp_tunnel_delete() l2tp_tunnel_closeall() __l2tp_session_unhash() synchronize_rcu() << Illegal inside RCU critical section >> BUG: sleeping function called from invalid context in_atomic(): 1, irqs_disabled(): 0, pid: 86, name: kworker/u16:2 INFO: lockdep is turned off. CPU: 2 PID: 86 Comm: kworker/u16:2 Tainted: G W O 4.4.6-at1 #2 Hardware name: Xen HVM domU, BIOS 4.6.1-xs125300 05/09/2016 Workqueue: netns cleanup_net 0000000000000000 ffff880202417b90 ffffffff812b0013 ffff880202410ac0 ffffffff81870de8 ffff880202417bb8 ffffffff8107aee8 ffffffff81870de8 0000000000000c51 0000000000000000 ffff880202417be0 ffffffff8107b024 Call Trace: [] dump_stack+0x85/0xc2 [] ___might_sleep+0x148/0x240 [] __might_sleep+0x44/0x80 [] synchronize_sched+0x2d/0xe0 [] ? trace_hardirqs_on+0xd/0x10 [] ? __local_bh_enable_ip+0x6b/0xc0 [] ? _raw_spin_unlock_bh+0x30/0x40 [] __l2tp_session_unhash+0x172/0x220 [] ? __l2tp_session_unhash+0x87/0x220 [] l2tp_tunnel_closeall+0x9b/0x140 [] l2tp_tunnel_delete+0x14/0x60 [] l2tp_exit_net+0x110/0x270 [] ? l2tp_exit_net+0x9c/0x270 [] ops_exit_list.isra.6+0x33/0x60 [] cleanup_net+0x1b6/0x280 ... This bug can easily be reproduced with a few steps: $ sudo unshare -n bash # Create a shell in a new namespace # ip link set lo up # ip addr add 127.0.0.1 dev lo # ip l2tp add tunnel remote 127.0.0.1 local 127.0.0.1 tunnel_id 1 \ peer_tunnel_id 1 udp_sport 50000 udp_dport 50000 # ip l2tp add session name foo tunnel_id 1 session_id 1 \ peer_session_id 1 # ip link set foo up # exit # Exit the shell, in turn exiting the namespace $ dmesg ... [942121.089216] BUG: scheduling while atomic: kworker/u16:3/13872/0x00000200 ... To fix this, move the call to l2tp_tunnel_closeall() out of the RCU critical section, and instead call it from l2tp_tunnel_del_work(), which is running from the l2tp_wq workqueue. Fixes: 2b551c6e7d5b ("l2tp: close sessions before initiating tunnel delete") Signed-off-by: Ridge Kennedy Acked-by: Guillaume Nault Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 85948c69b236..b58000efee73 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1317,6 +1317,9 @@ static void l2tp_tunnel_del_work(struct work_struct *work) struct sock *sk = NULL; tunnel = container_of(work, struct l2tp_tunnel, del_work); + + l2tp_tunnel_closeall(tunnel); + sk = l2tp_tunnel_sock_lookup(tunnel); if (!sk) goto out; @@ -1639,7 +1642,6 @@ EXPORT_SYMBOL_GPL(l2tp_tunnel_create); int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel) { l2tp_tunnel_inc_refcount(tunnel); - l2tp_tunnel_closeall(tunnel); if (false == queue_work(l2tp_wq, &tunnel->del_work)) { l2tp_tunnel_dec_refcount(tunnel); return 1; -- cgit v1.2.3 From 2618be7dccf8739b89e1906b64bd8d551af351e6 Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Mon, 20 Feb 2017 14:58:41 +0300 Subject: uapi: fix linux/if.h userspace compilation errors Include (guarded by ifndef __KERNEL__) to fix the following linux/if.h userspace compilation errors: /usr/include/linux/if.h:234:19: error: field 'ifru_addr' has incomplete type struct sockaddr ifru_addr; /usr/include/linux/if.h:235:19: error: field 'ifru_dstaddr' has incomplete type struct sockaddr ifru_dstaddr; /usr/include/linux/if.h:236:19: error: field 'ifru_broadaddr' has incomplete type struct sockaddr ifru_broadaddr; /usr/include/linux/if.h:237:19: error: field 'ifru_netmask' has incomplete type struct sockaddr ifru_netmask; /usr/include/linux/if.h:238:20: error: field 'ifru_hwaddr' has incomplete type struct sockaddr ifru_hwaddr; This also fixes userspace compilation of the following uapi headers: linux/atmbr2684.h linux/gsmmux.h linux/if_arp.h linux/if_bonding.h linux/if_frad.h linux/if_pppox.h linux/if_tunnel.h linux/netdevice.h linux/route.h linux/wireless.h As no uapi header provides a definition of struct sockaddr, inclusion of seems to be the most conservative and the only safe fix available. All current users of are very likely to be including already because the latter is the sole provider of struct sockaddr definition in libc, so adding a uapi header with a definition of struct sockaddr would create a potential conflict with . Replacing struct sockaddr in the definition of struct ifreq with a different type would create a potential incompatibility with current users of struct ifreq who might rely on ifru_addr et al members being of type struct sockaddr. Signed-off-by: Dmitry V. Levin Signed-off-by: David S. Miller --- include/uapi/linux/if.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/uapi/linux/if.h b/include/uapi/linux/if.h index 1158a043342a..259617a551f2 100644 --- a/include/uapi/linux/if.h +++ b/include/uapi/linux/if.h @@ -24,6 +24,10 @@ #include /* for "struct sockaddr" et al */ #include /* for "__user" et al */ +#ifndef __KERNEL__ +#include /* for struct sockaddr. */ +#endif + #if __UAPI_DEF_IF_IFNAMSIZ #define IFNAMSIZ 16 #endif /* __UAPI_DEF_IF_IFNAMSIZ */ -- cgit v1.2.3 From ec7cb62d18d854ea09df8b7194e7e710985f8b9a Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Wed, 22 Feb 2017 12:35:27 +0300 Subject: net/dccp: fix use after free in tw_timer_handler() DCCP doesn't purge timewait sockets on network namespace shutdown. So, after net namespace destroyed we could still have an active timer which will trigger use after free in tw_timer_handler(): BUG: KASAN: use-after-free in tw_timer_handler+0x4a/0xa0 at addr ffff88010e0d1e10 Read of size 8 by task swapper/1/0 Call Trace: __asan_load8+0x54/0x90 tw_timer_handler+0x4a/0xa0 call_timer_fn+0x127/0x480 expire_timers+0x1db/0x2e0 run_timer_softirq+0x12f/0x2a0 __do_softirq+0x105/0x5b4 irq_exit+0xdd/0xf0 smp_apic_timer_interrupt+0x57/0x70 apic_timer_interrupt+0x90/0xa0 Object at ffff88010e0d1bc0, in cache net_namespace size: 6848 Allocated: save_stack_trace+0x1b/0x20 kasan_kmalloc+0xee/0x180 kasan_slab_alloc+0x12/0x20 kmem_cache_alloc+0x134/0x310 copy_net_ns+0x8d/0x280 create_new_namespaces+0x23f/0x340 unshare_nsproxy_namespaces+0x75/0xf0 SyS_unshare+0x299/0x4f0 entry_SYSCALL_64_fastpath+0x18/0xad Freed: save_stack_trace+0x1b/0x20 kasan_slab_free+0xae/0x180 kmem_cache_free+0xb4/0x350 net_drop_ns+0x3f/0x50 cleanup_net+0x3df/0x450 process_one_work+0x419/0xbb0 worker_thread+0x92/0x850 kthread+0x192/0x1e0 ret_from_fork+0x2e/0x40 Add .exit_batch hook to dccp_v4_ops()/dccp_v6_ops() which will purge timewait sockets on net namespace destruction and prevent above issue. Fixes: f2bf415cfed7 ("mib: add net to NET_ADD_STATS_BH") Reported-by: Dmitry Vyukov Signed-off-by: Andrey Ryabinin Acked-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/dccp/ipv4.c | 6 ++++++ net/dccp/ipv6.c | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index b043ec833785..409d0cfd3447 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -1017,9 +1017,15 @@ static void __net_exit dccp_v4_exit_net(struct net *net) inet_ctl_sock_destroy(net->dccp.v4_ctl_sk); } +static void __net_exit dccp_v4_exit_batch(struct list_head *net_exit_list) +{ + inet_twsk_purge(&dccp_hashinfo, AF_INET); +} + static struct pernet_operations dccp_v4_ops = { .init = dccp_v4_init_net, .exit = dccp_v4_exit_net, + .exit_batch = dccp_v4_exit_batch, }; static int __init dccp_v4_init(void) diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index cef60a4a2803..233b57367758 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -1075,9 +1075,15 @@ static void __net_exit dccp_v6_exit_net(struct net *net) inet_ctl_sock_destroy(net->dccp.v6_ctl_sk); } +static void __net_exit dccp_v6_exit_batch(struct list_head *net_exit_list) +{ + inet_twsk_purge(&dccp_hashinfo, AF_INET6); +} + static struct pernet_operations dccp_v6_ops = { .init = dccp_v6_init_net, .exit = dccp_v6_exit_net, + .exit_batch = dccp_v6_exit_batch, }; static int __init dccp_v6_init(void) -- cgit v1.2.3 From 00355fa5bb89840c48eb11eb6d84c6c3b128a839 Mon Sep 17 00:00:00 2001 From: Alexey Kodanev Date: Wed, 22 Feb 2017 13:23:55 +0300 Subject: tcp: setup timestamp offset when write_seq already set Found that when randomized tcp offsets are enabled (by default) TCP client can still start new connections without them. Later, if server does active close and re-uses sockets in TIME-WAIT state, new SYN from client can be rejected on PAWS check inside tcp_timewait_state_process(), because either tw_ts_recent or rcv_tsval doesn't really have an offset set. Here is how to reproduce it with LTP netstress tool: netstress -R 1 & netstress -H 127.0.0.1 -lr 1000000 -a1 [...] < S seq 1956977072 win 43690 TS val 295618 ecr 459956970 > . ack 1956911535 win 342 TS val 459967184 ecr 1547117608 < R seq 1956911535 win 0 length 0 +1. < S seq 1956977072 win 43690 TS val 296640 ecr 459956970 > S. seq 657450664 ack 1956977073 win 43690 TS val 459968205 ecr 296640 Fixes: 95a22caee396 ("tcp: randomize tcp timestamp offsets for each connection") Signed-off-by: Alexey Kodanev Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 16 ++++++++++------ net/ipv6/tcp_ipv6.c | 16 ++++++++++------ 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 8c124d4ef4b7..9a89b8deafae 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -145,6 +145,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) struct flowi4 *fl4; struct rtable *rt; int err; + u32 seq; struct ip_options_rcu *inet_opt; struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row; @@ -234,12 +235,15 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) sk_setup_caps(sk, &rt->dst); rt = NULL; - if (!tp->write_seq && likely(!tp->repair)) - tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr, - inet->inet_daddr, - inet->inet_sport, - usin->sin_port, - &tp->tsoffset); + if (likely(!tp->repair)) { + seq = secure_tcp_sequence_number(inet->inet_saddr, + inet->inet_daddr, + inet->inet_sport, + usin->sin_port, + &tp->tsoffset); + if (!tp->write_seq) + tp->write_seq = seq; + } inet->inet_id = tp->write_seq ^ jiffies; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 21c719965b6b..60a5295a7de6 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -122,6 +122,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, struct flowi6 fl6; struct dst_entry *dst; int addr_type; + u32 seq; int err; struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row; @@ -285,12 +286,15 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, sk_set_txhash(sk); - if (!tp->write_seq && likely(!tp->repair)) - tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32, - sk->sk_v6_daddr.s6_addr32, - inet->inet_sport, - inet->inet_dport, - &tp->tsoffset); + if (likely(!tp->repair)) { + seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32, + sk->sk_v6_daddr.s6_addr32, + inet->inet_sport, + inet->inet_dport, + &tp->tsoffset); + if (!tp->write_seq) + tp->write_seq = seq; + } if (tcp_fastopen_defer_connect(sk, &err)) return err; -- cgit v1.2.3 From eee2faabc63d863a129000b698a2bca54dff643d Mon Sep 17 00:00:00 2001 From: Alexey Kodanev Date: Wed, 22 Feb 2017 13:23:56 +0300 Subject: tcp: account for ts offset only if tsecr not zero We can get SYN with zero tsecr, don't apply offset in this case. Fixes: ee684b6f2830 ("tcp: send packets with a socket timestamp") Signed-off-by: Alexey Kodanev Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_minisocks.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index dff7d2aaf861..7e16243cdb58 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -101,7 +101,8 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, tcp_parse_options(skb, &tmp_opt, 0, NULL); if (tmp_opt.saw_tstamp) { - tmp_opt.rcv_tsecr -= tcptw->tw_ts_offset; + if (tmp_opt.rcv_tsecr) + tmp_opt.rcv_tsecr -= tcptw->tw_ts_offset; tmp_opt.ts_recent = tcptw->tw_ts_recent; tmp_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp; paws_reject = tcp_paws_reject(&tmp_opt, th->rst); -- cgit v1.2.3 From 18bcf742fbb0ddaf68d58ebf50b248748c983cea Mon Sep 17 00:00:00 2001 From: Mohamad Haj Yahia Date: Wed, 22 Feb 2017 17:20:11 +0200 Subject: net/mlx5e: s390 system compilation fix Add necessary headers include for s390 arch compilation. Fixes: e586b3b0baee ("net/mlx5: Ethernet Datapath files") Fixes: d605d6686dc7 ("net/mlx5e: Add support for ethtool self..") Signed-off-by: Mohamad Haj Yahia Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 1 + drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index b039b87742a6..9fad22768aab 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -30,6 +30,7 @@ * SOFTWARE. */ +#include #include #include #include diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c index 65442c36a6e1..31e3cb7ee5fe 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c @@ -30,6 +30,7 @@ * SOFTWARE. */ +#include #include #include #include -- cgit v1.2.3 From 6f08a22c5fb2b9aefb8ecd8496758e7a677c1fde Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Wed, 22 Feb 2017 17:20:12 +0200 Subject: net/mlx5e: Register/unregister vport representors on interface attach/detach Currently vport representors are added only on driver load and removed on driver unload. Apparently we forgot to handle them when we added the seamless reset flow feature. This caused to leave the representors netdevs alive and active with open HW resources on pci shutdown and on error reset flows. To overcome this we move their handling to interface attach/detach, so they would be cleaned up on shutdown and recreated on reset flows. Fixes: 26e59d8077a3 ("net/mlx5e: Implement mlx5e interface attach/detach callbacks") Signed-off-by: Saeed Mahameed Reviewed-by: Hadar Hen Zion Reviewed-by: Roi Dayan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 3cce6281e075..c24366868b39 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3970,6 +3970,19 @@ static void mlx5e_register_vport_rep(struct mlx5_core_dev *mdev) } } +static void mlx5e_unregister_vport_rep(struct mlx5_core_dev *mdev) +{ + struct mlx5_eswitch *esw = mdev->priv.eswitch; + int total_vfs = MLX5_TOTAL_VPORTS(mdev); + int vport; + + if (!MLX5_CAP_GEN(mdev, vport_group_manager)) + return; + + for (vport = 1; vport < total_vfs; vport++) + mlx5_eswitch_unregister_vport_rep(esw, vport); +} + void mlx5e_detach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); @@ -4016,6 +4029,7 @@ static int mlx5e_attach(struct mlx5_core_dev *mdev, void *vpriv) return err; } + mlx5e_register_vport_rep(mdev); return 0; } @@ -4027,6 +4041,7 @@ static void mlx5e_detach(struct mlx5_core_dev *mdev, void *vpriv) if (!netif_device_present(netdev)) return; + mlx5e_unregister_vport_rep(mdev); mlx5e_detach_netdev(mdev, netdev); mlx5e_destroy_mdev_resources(mdev); } @@ -4045,8 +4060,6 @@ static void *mlx5e_add(struct mlx5_core_dev *mdev) if (err) return NULL; - mlx5e_register_vport_rep(mdev); - if (MLX5_CAP_GEN(mdev, vport_group_manager)) ppriv = &esw->offloads.vport_reps[0]; @@ -4098,13 +4111,7 @@ void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, struct mlx5e_priv *priv) static void mlx5e_remove(struct mlx5_core_dev *mdev, void *vpriv) { - struct mlx5_eswitch *esw = mdev->priv.eswitch; - int total_vfs = MLX5_TOTAL_VPORTS(mdev); struct mlx5e_priv *priv = vpriv; - int vport; - - for (vport = 1; vport < total_vfs; vport++) - mlx5_eswitch_unregister_vport_rep(esw, vport); unregister_netdev(priv->netdev); mlx5e_detach(mdev, vpriv); -- cgit v1.2.3 From 4078e637c12f1e0a74293f1ec9563f42bff14a03 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Wed, 22 Feb 2017 17:20:13 +0200 Subject: net/mlx5e: Do not reduce LRO WQE size when not using build_skb When rq_type is Striding RQ, no room of SKB_RESERVE is needed as SKB allocation is not done via build_skb. Fixes: e4b85508072b ("net/mlx5e: Slightly reduce hardware LRO size") Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index c24366868b39..0d58729b42ae 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -82,6 +82,7 @@ static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev) static void mlx5e_set_rq_type_params(struct mlx5e_priv *priv, u8 rq_type) { priv->params.rq_wq_type = rq_type; + priv->params.lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ; switch (priv->params.rq_wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: priv->params.log_rq_size = is_kdump_kernel() ? @@ -98,6 +99,10 @@ static void mlx5e_set_rq_type_params(struct mlx5e_priv *priv, u8 rq_type) priv->params.log_rq_size = is_kdump_kernel() ? MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE : MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE; + + /* Extra room needed for build_skb */ + priv->params.lro_wqe_sz -= MLX5_RX_HEADROOM + + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); } priv->params.min_rx_wqes = mlx5_min_rx_wqes(priv->params.rq_wq_type, BIT(priv->params.log_rq_size)); @@ -3547,12 +3552,6 @@ static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev, mlx5e_build_default_indir_rqt(mdev, priv->params.indirection_rqt, MLX5E_INDIR_RQT_SIZE, profile->max_nch(mdev)); - priv->params.lro_wqe_sz = - MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ - - /* Extra room needed for build_skb */ - MLX5_RX_HEADROOM - - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); - /* Initialize pflags */ MLX5E_SET_PFLAG(priv, MLX5E_PFLAG_RX_CQE_BASED_MODER, priv->params.rx_cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE); -- cgit v1.2.3 From b0d4660b4cc52e6477ca3a43435351d565dfcedc Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Wed, 22 Feb 2017 17:20:14 +0200 Subject: net/mlx5e: Fix broken CQE compression initialization Some of RQ type parameters are derived from CQE compression state flag, CQE compression flag was initialized only after RQ type parameters setup. This leads to load RQ with stride size smaller than what we want for when CQE compression is on. This bug introduces no functional damage, it only makes CQE compression occur less often, since in ConnectX4-LX CQE compression is performed only on packets smaller than stride size. Fix this by marking default status of CQE compression in PFLAG prior to calling mlx5e_set_rq_priv_params(), as it inits some fields based on it. Tested: load driver on systems where rx CQE compress will be on (MH) pktgen with 64 < pkt size < 256 and netperf TCP_STREAM (IPv4/IPv6) verify `ethtool -S ethxx | grep compress` are advancing more often (rapidly) Fixes: 2fc4bfb7250d ("net/mlx5e: Dynamic RQ type infrastructure") Signed-off-by: Tariq Toukan Cc: kernel-team@fb.com Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 0d58729b42ae..dc621bc4e173 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3526,6 +3526,9 @@ static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev, cqe_compress_heuristic(link_speed, pci_bw); } + MLX5E_SET_PFLAG(priv, MLX5E_PFLAG_RX_CQE_COMPRESS, + priv->params.rx_cqe_compress_def); + mlx5e_set_rq_priv_params(priv); if (priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) priv->params.lro_en = true; @@ -3555,7 +3558,6 @@ static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev, /* Initialize pflags */ MLX5E_SET_PFLAG(priv, MLX5E_PFLAG_RX_CQE_BASED_MODER, priv->params.rx_cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE); - MLX5E_SET_PFLAG(priv, MLX5E_PFLAG_RX_CQE_COMPRESS, priv->params.rx_cqe_compress_def); mutex_init(&priv->state_lock); -- cgit v1.2.3 From 6dc4b54e77282caf17f0ff72aa32dd296037fbc0 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Wed, 22 Feb 2017 17:20:15 +0200 Subject: net/mlx5e: Update MPWQE stride size when modifying CQE compress state When the admin enables/disables cqe compression, updating mpwqe stride size is required: CQE compress ON ==> stride size = 256B CQE compress OFF ==> stride size = 64B This is already done on driver load via mlx5e_set_rq_type_params, all we need is just to call it on arbitrary admin changes of cqe compression state via priv flags or when changing timestamping state (as it is mutually exclusive with cqe compression). This bug introduces no functional damage, it only makes cqe compression occur less often, since in ConnectX4-LX CQE compression is performed only on packets smaller than stride size. Tested: ethtool --set-priv-flags ethxx rx_cqe_compress on pktgen with 64 < pkt size < 256 and netperf TCP_STREAM (IPv4/IPv6) verify `ethtool -S ethxx | grep compress` are advancing more often (rapidly) Fixes: 7219ab34f184 ("net/mlx5e: CQE compression") Signed-off-by: Saeed Mahameed Reviewed-by: Tariq Toukan Cc: kernel-team@fb.com Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 1 + drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 1 + drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 1 + 4 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 95ca03c0d9f5..f6a6ded204f6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -816,6 +816,7 @@ int mlx5e_get_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed); void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode); +void mlx5e_set_rq_type_params(struct mlx5e_priv *priv, u8 rq_type); static inline void mlx5e_tx_notify_hw(struct mlx5e_sq *sq, struct mlx5_wqe_ctrl_seg *ctrl, int bf_sz) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index cc80522b5854..a004a5a1a4c2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -1487,6 +1487,7 @@ static int set_pflag_rx_cqe_compress(struct net_device *netdev, mlx5e_modify_rx_cqe_compression_locked(priv, enable); priv->params.rx_cqe_compress_def = enable; + mlx5e_set_rq_type_params(priv, priv->params.rq_wq_type); return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index dc621bc4e173..8ef64c4db2c2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -79,7 +79,7 @@ static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev) MLX5_CAP_ETH(mdev, reg_umr_sq); } -static void mlx5e_set_rq_type_params(struct mlx5e_priv *priv, u8 rq_type) +void mlx5e_set_rq_type_params(struct mlx5e_priv *priv, u8 rq_type) { priv->params.rq_wq_type = rq_type; priv->params.lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 9fad22768aab..d5ce20db3f0b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -172,6 +172,7 @@ void mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool val) mlx5e_close_locked(priv->netdev); MLX5E_SET_PFLAG(priv, MLX5E_PFLAG_RX_CQE_COMPRESS, val); + mlx5e_set_rq_type_params(priv, priv->params.rq_wq_type); if (was_opened) mlx5e_open_locked(priv->netdev); -- cgit v1.2.3 From 36154be40a28e4afaa0416da2681d80b7e2ca319 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Wed, 22 Feb 2017 17:20:16 +0200 Subject: net/mlx5e: Fix wrong CQE decompression In cqe compression with striding RQ, the decompression of the CQE field wqe_counter was done with a wrong wraparound value. This caused handling cqes with a wrong pointer to wqe (rx descriptor) and creating SKBs with wrong data, pointing to wrong (and already consumed) strides/pages. The meaning of the CQE field wqe_counter in striding RQ holds the stride index instead of the WQE index. Hence, when decompressing a CQE, wqe_counter should have wrapped-around the number of strides in a single multi-packet WQE. We dropped this wrap-around mask at all in CQE decompression of striding RQ. It is not needed as in such cases the CQE compression session would break because of different value of wqe_id field, starting a new compression session. Tested: ethtool -K ethxx lro off/on ethtool --set-priv-flags ethxx rx_cqe_compress on super_netperf 16 {ipv4,ipv6} -t TCP_STREAM -m 50 -D verified no csum errors and no page refcount issues. Fixes: 7219ab34f184 ("net/mlx5e: CQE compression") Signed-off-by: Tariq Toukan Reported-by: Tom Herbert Cc: kernel-team@fb.com Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index d5ce20db3f0b..3d371688fbbb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -94,19 +94,18 @@ static inline void mlx5e_cqes_update_owner(struct mlx5e_cq *cq, u32 cqcc, int n) static inline void mlx5e_decompress_cqe(struct mlx5e_rq *rq, struct mlx5e_cq *cq, u32 cqcc) { - u16 wqe_cnt_step; - cq->title.byte_cnt = cq->mini_arr[cq->mini_arr_idx].byte_cnt; cq->title.check_sum = cq->mini_arr[cq->mini_arr_idx].checksum; cq->title.op_own &= 0xf0; cq->title.op_own |= 0x01 & (cqcc >> cq->wq.log_sz); cq->title.wqe_counter = cpu_to_be16(cq->decmprs_wqe_counter); - wqe_cnt_step = - rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ ? - mpwrq_get_cqe_consumed_strides(&cq->title) : 1; - cq->decmprs_wqe_counter = - (cq->decmprs_wqe_counter + wqe_cnt_step) & rq->wq.sz_m1; + if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) + cq->decmprs_wqe_counter += + mpwrq_get_cqe_consumed_strides(&cq->title); + else + cq->decmprs_wqe_counter = + (cq->decmprs_wqe_counter + 1) & rq->wq.sz_m1; } static inline void mlx5e_decompress_cqe_no_hash(struct mlx5e_rq *rq, -- cgit v1.2.3 From 557d7acd754543ca6f7166b9abde0a1af01ed848 Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Thu, 23 Feb 2017 01:38:03 +0300 Subject: uapi: fix linux/ip6_tunnel.h userspace compilation errors Include and to fix the following linux/ip6_tunnel.h userspace compilation errors: /usr/include/linux/ip6_tunnel.h:23:12: error: 'IFNAMSIZ' undeclared here (not in a function) char name[IFNAMSIZ]; /* name of tunnel device */ /usr/include/linux/ip6_tunnel.h:30:18: error: field 'laddr' has incomplete type struct in6_addr laddr; /* local tunnel end-point address */ Signed-off-by: Dmitry V. Levin Signed-off-by: David S. Miller --- include/uapi/linux/ip6_tunnel.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/uapi/linux/ip6_tunnel.h b/include/uapi/linux/ip6_tunnel.h index 48af63c9a48d..425926c467d7 100644 --- a/include/uapi/linux/ip6_tunnel.h +++ b/include/uapi/linux/ip6_tunnel.h @@ -2,6 +2,8 @@ #define _IP6_TUNNEL_H #include +#include /* For IFNAMSIZ. */ +#include /* For struct in6_addr. */ #define IPV6_TLV_TNL_ENCAP_LIMIT 4 #define IPV6_DEFAULT_TNL_ENCAP_LIMIT 4 -- cgit v1.2.3 From 40df93be6a7084e09688e1ddc45615d13df133fc Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Thu, 23 Feb 2017 01:38:26 +0300 Subject: uapi: fix linux/llc.h userspace compilation error Include to fix the following linux/llc.h userspace compilation error: /usr/include/linux/llc.h:26:27: error: 'IFHWADDRLEN' undeclared here (not in a function) unsigned char sllc_mac[IFHWADDRLEN]; Signed-off-by: Dmitry V. Levin Signed-off-by: David S. Miller --- include/uapi/linux/llc.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/uapi/linux/llc.h b/include/uapi/linux/llc.h index 9c987a402473..a6c17f66ee94 100644 --- a/include/uapi/linux/llc.h +++ b/include/uapi/linux/llc.h @@ -14,6 +14,7 @@ #define _UAPI__LINUX_LLC_H #include +#include /* For IFHWADDRLEN. */ #define __LLC_SOCK_SIZE__ 16 /* sizeof(sockaddr_llc), word align. */ struct sockaddr_llc { -- cgit v1.2.3 From bc1750f366902449f36f15f4a692a495fe6bcdfe Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 23 Feb 2017 00:20:53 +0000 Subject: bpf: fix spelling mistake: "proccessed" -> "processed" trivial fix to spelling mistake in verbose log message Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index d2bded2b250c..3fc6e39b223e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2776,7 +2776,7 @@ static int do_check(struct bpf_verifier_env *env) class = BPF_CLASS(insn->code); if (++insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) { - verbose("BPF program is too large. Proccessed %d insn\n", + verbose("BPF program is too large. Processed %d insn\n", insn_processed); return -E2BIG; } -- cgit v1.2.3 From ca92aea9781691bae3bafde95048cc3840a70ad8 Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Thu, 23 Feb 2017 03:22:49 -0500 Subject: forcedeth: Remove return from a void function In a void function, it is not necessary to append a return statement in it. Signed-off-by: Zhu Yanjun Signed-off-by: David S. Miller --- drivers/net/ethernet/nvidia/forcedeth.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c index 92367a06491a..978d32944c80 100644 --- a/drivers/net/ethernet/nvidia/forcedeth.c +++ b/drivers/net/ethernet/nvidia/forcedeth.c @@ -3272,8 +3272,6 @@ static void nv_force_linkspeed(struct net_device *dev, int speed, int duplex) pci_push(base); writel(np->linkspeed, base + NvRegLinkSpeed); pci_push(base); - - return; } /** -- cgit v1.2.3 From 50ab3af16c48bc14a6787ab2f506c403d9d1bfad Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 23 Feb 2017 10:57:45 +0100 Subject: lib: Remove string from parman config selection As reported by Geert, remove the string so the user does not see this config option. The option is explicitly selected only as a dependency of in-kernel users. Reported-by: Geert Uytterhoeven Fixes: 44091d29f207 ("lib: Introduce priority array area manager") Signed-off-by: Jiri Pirko Tested-by: Geert Uytterhoeven Signed-off-by: David S. Miller --- lib/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Kconfig b/lib/Kconfig index 5d644f180fe5..f3552604e47a 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -551,6 +551,6 @@ config SBITMAP bool config PARMAN - tristate "parman" + tristate endmenu -- cgit v1.2.3 From ea3ebc73b46fbdb049dafd47543bb22efaa09c8e Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Thu, 23 Feb 2017 14:30:34 +0300 Subject: uapi: fix linux/seg6.h and linux/seg6_iptunnel.h userspace compilation errors Include in uapi/linux/seg6.h to fix the following linux/seg6.h userspace compilation error: /usr/include/linux/seg6.h:31:18: error: array type has incomplete element type 'struct in6_addr' struct in6_addr segments[0]; Include in uapi/linux/seg6_iptunnel.h to fix the following linux/seg6_iptunnel.h userspace compilation error: /usr/include/linux/seg6_iptunnel.h:26:21: error: array type has incomplete element type 'struct ipv6_sr_hdr' struct ipv6_sr_hdr srh[0]; Fixes: a50a05f497a2 ("ipv6: sr: add missing Kbuild export for header files") Signed-off-by: Dmitry V. Levin Signed-off-by: David S. Miller --- include/uapi/linux/seg6.h | 1 + include/uapi/linux/seg6_iptunnel.h | 2 ++ 2 files changed, 3 insertions(+) diff --git a/include/uapi/linux/seg6.h b/include/uapi/linux/seg6.h index 61df8d392f41..7278511d339e 100644 --- a/include/uapi/linux/seg6.h +++ b/include/uapi/linux/seg6.h @@ -15,6 +15,7 @@ #define _UAPI_LINUX_SEG6_H #include +#include /* For struct in6_addr. */ /* * SRH diff --git a/include/uapi/linux/seg6_iptunnel.h b/include/uapi/linux/seg6_iptunnel.h index 7a7183d4062a..b6e5a0a1afd7 100644 --- a/include/uapi/linux/seg6_iptunnel.h +++ b/include/uapi/linux/seg6_iptunnel.h @@ -14,6 +14,8 @@ #ifndef _UAPI_LINUX_SEG6_IPTUNNEL_H #define _UAPI_LINUX_SEG6_IPTUNNEL_H +#include /* For struct ipv6_sr_hdr. */ + enum { SEG6_IPTUNNEL_UNSPEC, SEG6_IPTUNNEL_SRH, -- cgit v1.2.3 From c12f4d761dd2313ae4f457041df3ec0c603aa76a Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Thu, 23 Feb 2017 14:35:23 +0300 Subject: uapi: fix linux/rds.h userspace compilation errors Consistently use types from linux/types.h to fix the following linux/rds.h userspace compilation errors: /usr/include/linux/rds.h:198:2: error: unknown type name 'u8' u8 rx_traces; /usr/include/linux/rds.h:199:2: error: unknown type name 'u8' u8 rx_trace_pos[RDS_MSG_RX_DGRAM_TRACE_MAX]; /usr/include/linux/rds.h:203:2: error: unknown type name 'u8' u8 rx_traces; /usr/include/linux/rds.h:204:2: error: unknown type name 'u8' u8 rx_trace_pos[RDS_MSG_RX_DGRAM_TRACE_MAX]; /usr/include/linux/rds.h:205:2: error: unknown type name 'u64' u64 rx_trace[RDS_MSG_RX_DGRAM_TRACE_MAX]; Fixes: 3289025aedc0 ("RDS: add receive message trace used by application") Signed-off-by: Dmitry V. Levin Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller --- include/uapi/linux/rds.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/include/uapi/linux/rds.h b/include/uapi/linux/rds.h index 47c03ca5c404..198892b95f09 100644 --- a/include/uapi/linux/rds.h +++ b/include/uapi/linux/rds.h @@ -195,14 +195,14 @@ enum rds_message_rxpath_latency { }; struct rds_rx_trace_so { - u8 rx_traces; - u8 rx_trace_pos[RDS_MSG_RX_DGRAM_TRACE_MAX]; + __u8 rx_traces; + __u8 rx_trace_pos[RDS_MSG_RX_DGRAM_TRACE_MAX]; }; struct rds_cmsg_rx_trace { - u8 rx_traces; - u8 rx_trace_pos[RDS_MSG_RX_DGRAM_TRACE_MAX]; - u64 rx_trace[RDS_MSG_RX_DGRAM_TRACE_MAX]; + __u8 rx_traces; + __u8 rx_trace_pos[RDS_MSG_RX_DGRAM_TRACE_MAX]; + __u64 rx_trace[RDS_MSG_RX_DGRAM_TRACE_MAX]; }; /* -- cgit v1.2.3 From 423b3aecf29085a52530d4f9167c56a84b081042 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 23 Feb 2017 12:02:41 +0200 Subject: net/mlx4: Change ENOTSUPP to EOPNOTSUPP As ENOTSUPP is specific to NFS, change the return error value to EOPNOTSUPP in various places in the mlx4 driver. Signed-off-by: Or Gerlitz Suggested-by: Yotam Gigi Reviewed-by: Matan Barak Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c | 2 +- drivers/net/ethernet/mellanox/mlx4/fw.c | 2 +- drivers/net/ethernet/mellanox/mlx4/intf.c | 2 +- drivers/net/ethernet/mellanox/mlx4/main.c | 6 +++--- drivers/net/ethernet/mellanox/mlx4/mr.c | 2 +- drivers/net/ethernet/mellanox/mlx4/qp.c | 2 +- drivers/net/ethernet/mellanox/mlx4/resource_tracker.c | 2 +- 7 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c b/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c index b04760a5034b..1dae8e40fb25 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c @@ -319,7 +319,7 @@ static int mlx4_en_ets_validate(struct mlx4_en_priv *priv, struct ieee_ets *ets) default: en_err(priv, "TC[%d]: Not supported TSA: %d\n", i, ets->tc_tsa[i]); - return -ENOTSUPP; + return -EOPNOTSUPP; } } diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 3fe885ce1902..37e84a59e751 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -2436,7 +2436,7 @@ int mlx4_config_dev_retrieval(struct mlx4_dev *dev, #define CONFIG_DEV_RX_CSUM_MODE_PORT2_BIT_OFFSET 4 if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_CONFIG_DEV)) - return -ENOTSUPP; + return -EOPNOTSUPP; err = mlx4_CONFIG_DEV_get(dev, &config_dev); if (err) diff --git a/drivers/net/ethernet/mellanox/mlx4/intf.c b/drivers/net/ethernet/mellanox/mlx4/intf.c index 8258d08acd8c..e00f627331cb 100644 --- a/drivers/net/ethernet/mellanox/mlx4/intf.c +++ b/drivers/net/ethernet/mellanox/mlx4/intf.c @@ -136,7 +136,7 @@ int mlx4_do_bond(struct mlx4_dev *dev, bool enable) LIST_HEAD(bond_list); if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PORT_REMAP)) - return -ENOTSUPP; + return -EOPNOTSUPP; ret = mlx4_disable_rx_port_check(dev, enable); if (ret) { diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 15ef787e71ba..683234221741 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -1447,7 +1447,7 @@ int mlx4_port_map_set(struct mlx4_dev *dev, struct mlx4_port_map *v2p) int err; if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PORT_REMAP)) - return -ENOTSUPP; + return -EOPNOTSUPP; mutex_lock(&priv->bond_mutex); @@ -1884,7 +1884,7 @@ int mlx4_get_internal_clock_params(struct mlx4_dev *dev, struct mlx4_priv *priv = mlx4_priv(dev); if (mlx4_is_slave(dev)) - return -ENOTSUPP; + return -EOPNOTSUPP; if (!params) return -EINVAL; @@ -2384,7 +2384,7 @@ static int mlx4_init_hca(struct mlx4_dev *dev) /* Query CONFIG_DEV parameters */ err = mlx4_config_dev_retrieval(dev, ¶ms); - if (err && err != -ENOTSUPP) { + if (err && err != -EOPNOTSUPP) { mlx4_err(dev, "Failed to query CONFIG_DEV parameters\n"); } else if (!err) { dev->caps.rx_checksum_flags_port[1] = params.rx_csum_flags_port_1; diff --git a/drivers/net/ethernet/mellanox/mlx4/mr.c b/drivers/net/ethernet/mellanox/mlx4/mr.c index 395b5463cfd9..db65f72879e9 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mr.c +++ b/drivers/net/ethernet/mellanox/mlx4/mr.c @@ -823,7 +823,7 @@ int mlx4_mw_alloc(struct mlx4_dev *dev, u32 pd, enum mlx4_mw_type type, !(dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW)) || (type == MLX4_MW_TYPE_2 && !(dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN))) - return -ENOTSUPP; + return -EOPNOTSUPP; index = mlx4_mpt_reserve(dev); if (index == -1) diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c index d1cd9c32a9ae..2d6abd4662b1 100644 --- a/drivers/net/ethernet/mellanox/mlx4/qp.c +++ b/drivers/net/ethernet/mellanox/mlx4/qp.c @@ -447,7 +447,7 @@ int mlx4_update_qp(struct mlx4_dev *dev, u32 qpn, & MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB)) { mlx4_warn(dev, "Trying to set src check LB, but it isn't supported\n"); - err = -ENOTSUPP; + err = -EOPNOTSUPP; goto out; } pri_addr_path_mask |= diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 6fe9f76ae656..d8d5d161b8c7 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -4297,7 +4297,7 @@ int mlx4_UPDATE_QP_wrapper(struct mlx4_dev *dev, int slave, MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB)) { mlx4_warn(dev, "Src check LB for slave %d isn't supported\n", slave); - return -ENOTSUPP; + return -EOPNOTSUPP; } /* Just change the smac for the QP */ -- cgit v1.2.3 From 745d8ae4622c6808b22e33a944c7decb30074be4 Mon Sep 17 00:00:00 2001 From: Eugenia Emantayev Date: Thu, 23 Feb 2017 12:02:42 +0200 Subject: net/mlx4: Spoofcheck and zero MAC can't coexist Spoofcheck can't be enabled if VF MAC is zero. Vice versa, can't zero MAC if spoofcheck is on. Fixes: 8f7ba3ca12f6 ('net/mlx4: Add set VF mac address support') Signed-off-by: Eugenia Emantayev Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/cmd.c | 22 ++++++++++++++++++++-- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 6 +----- include/linux/mlx4/cmd.h | 2 +- include/linux/mlx4/driver.h | 10 ++++++++++ 4 files changed, 32 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index a49072b4fa52..e8c105164931 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -43,6 +43,7 @@ #include #include #include +#include #include @@ -2955,7 +2956,7 @@ static bool mlx4_valid_vf_state_change(struct mlx4_dev *dev, int port, return false; } -int mlx4_set_vf_mac(struct mlx4_dev *dev, int port, int vf, u64 mac) +int mlx4_set_vf_mac(struct mlx4_dev *dev, int port, int vf, u8 *mac) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_vport_state *s_info; @@ -2964,13 +2965,22 @@ int mlx4_set_vf_mac(struct mlx4_dev *dev, int port, int vf, u64 mac) if (!mlx4_is_master(dev)) return -EPROTONOSUPPORT; + if (is_multicast_ether_addr(mac)) + return -EINVAL; + slave = mlx4_get_slave_indx(dev, vf); if (slave < 0) return -EINVAL; port = mlx4_slaves_closest_port(dev, slave, port); s_info = &priv->mfunc.master.vf_admin[slave].vport[port]; - s_info->mac = mac; + + if (s_info->spoofchk && is_zero_ether_addr(mac)) { + mlx4_info(dev, "MAC invalidation is not allowed when spoofchk is on\n"); + return -EPERM; + } + + s_info->mac = mlx4_mac_to_u64(mac); mlx4_info(dev, "default mac on vf %d port %d to %llX will take effect only after vf restart\n", vf, port, s_info->mac); return 0; @@ -3143,6 +3153,7 @@ int mlx4_set_vf_spoofchk(struct mlx4_dev *dev, int port, int vf, bool setting) struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_vport_state *s_info; int slave; + u8 mac[ETH_ALEN]; if ((!mlx4_is_master(dev)) || !(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FSM)) @@ -3154,6 +3165,13 @@ int mlx4_set_vf_spoofchk(struct mlx4_dev *dev, int port, int vf, bool setting) port = mlx4_slaves_closest_port(dev, slave, port); s_info = &priv->mfunc.master.vf_admin[slave].vport[port]; + + mlx4_u64_to_mac(mac, s_info->mac); + if (setting && !is_valid_ether_addr(mac)) { + mlx4_info(dev, "Illegal MAC with spoofchk\n"); + return -EPERM; + } + s_info->spoofchk = setting; return 0; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index afe4444e5434..61420473fe5f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -2485,12 +2485,8 @@ static int mlx4_en_set_vf_mac(struct net_device *dev, int queue, u8 *mac) { struct mlx4_en_priv *en_priv = netdev_priv(dev); struct mlx4_en_dev *mdev = en_priv->mdev; - u64 mac_u64 = mlx4_mac_to_u64(mac); - if (is_multicast_ether_addr(mac)) - return -EINVAL; - - return mlx4_set_vf_mac(mdev->dev, en_priv->port, queue, mac_u64); + return mlx4_set_vf_mac(mdev->dev, en_priv->port, queue, mac); } static int mlx4_en_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos, diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h index 1f3568694a57..7b74afcbbab2 100644 --- a/include/linux/mlx4/cmd.h +++ b/include/linux/mlx4/cmd.h @@ -308,7 +308,7 @@ int mlx4_get_counter_stats(struct mlx4_dev *dev, int counter_index, int mlx4_get_vf_stats(struct mlx4_dev *dev, int port, int vf_idx, struct ifla_vf_stats *vf_stats); u32 mlx4_comm_get_version(void); -int mlx4_set_vf_mac(struct mlx4_dev *dev, int port, int vf, u64 mac); +int mlx4_set_vf_mac(struct mlx4_dev *dev, int port, int vf, u8 *mac); int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos, __be16 proto); int mlx4_set_vf_rate(struct mlx4_dev *dev, int port, int vf, int min_tx_rate, diff --git a/include/linux/mlx4/driver.h b/include/linux/mlx4/driver.h index bd0e7075ea6d..e965e5090d96 100644 --- a/include/linux/mlx4/driver.h +++ b/include/linux/mlx4/driver.h @@ -104,4 +104,14 @@ static inline u64 mlx4_mac_to_u64(u8 *addr) return mac; } +static inline void mlx4_u64_to_mac(u8 *addr, u64 mac) +{ + int i; + + for (i = ETH_ALEN; i > 0; i--) { + addr[i - 1] = mac && 0xFF; + mac >>= 8; + } +} + #endif /* MLX4_DRIVER_H */ -- cgit v1.2.3 From 95f1ba9a24af9769f6e20dfe9a77c863f253f311 Mon Sep 17 00:00:00 2001 From: Majd Dibbiny Date: Thu, 23 Feb 2017 12:02:43 +0200 Subject: net/mlx4_core: Fix VF overwrite of module param which disables DMFS on new probed PFs In the VF driver, module parameter mlx4_log_num_mgm_entry_size was mistakenly overwritten -- and in a manner which overrode the device-managed flow steering option encoded in the parameter. log_num_mgm_entry_size is a global module parameter which affects all ConnectX-3 PFs installed on that host. If a VF changes log_num_mgm_entry_size, this will affect all PFs which are probed subsequent to the change (by disabling DMFS for those PFs). Fixes: 3c439b5586e9 ("mlx4_core: Allow choosing flow steering mode") Signed-off-by: Majd Dibbiny Reviewed-by: Jack Morgenstein Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/main.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 683234221741..005e1049c977 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -841,8 +841,6 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) return -EINVAL; } - mlx4_log_num_mgm_entry_size = hca_param.log_mc_entry_sz; - dev->caps.hca_core_clock = hca_param.hca_core_clock; memset(&dev_cap, 0, sizeof(dev_cap)); -- cgit v1.2.3 From 6ed63d845e7866ff1a0eac9f0fa554fdf2c64e1d Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Thu, 23 Feb 2017 12:02:44 +0200 Subject: net/mlx4_core: Use cq quota in SRIOV when creating completion EQs When creating EQs to handle CQ completion events for the PF or for VFs, we create enough EQE entries to handle completions for the max number of CQs that can use that EQ. When SRIOV is activated, the max number of CQs a VF (or the PF) can obtain is its CQ quota (determined by the Hypervisor resource tracker). Therefore, when creating an EQ, the number of EQE entries that the VF should request for that EQ is the CQ quota value (and not the total number of CQs available in the FW). Under SRIOV, the PF, also must use its CQ quota, because the resource tracker also controls how many CQs the PF can obtain. Using the FW total CQs instead of the CQ quota when creating EQs resulted wasting MTT entries, due to allocating more EQEs than were needed. Fixes: 5a0d0a6161ae ("mlx4: Structures and init/teardown for VF resource quotas") Signed-off-by: Jack Morgenstein Reported-by: Dexuan Cui Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/eq.c | 5 ++--- drivers/net/ethernet/mellanox/mlx4/main.c | 3 ++- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index 39232b6a974f..07406cf2eacd 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -1249,9 +1249,8 @@ int mlx4_init_eq_table(struct mlx4_dev *dev) mlx4_warn(dev, "Failed adding irq rmap\n"); } #endif - err = mlx4_create_eq(dev, dev->caps.num_cqs - - dev->caps.reserved_cqs + - MLX4_NUM_SPARE_EQE, + err = mlx4_create_eq(dev, dev->quotas.cq + + MLX4_NUM_SPARE_EQE, (dev->flags & MLX4_FLAG_MSI_X) ? i + 1 - !!(i > MLX4_EQ_ASYNC) : 0, eq); diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 005e1049c977..21377c315083 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -3501,6 +3501,8 @@ slave_start: goto err_disable_msix; } + mlx4_init_quotas(dev); + err = mlx4_setup_hca(dev); if (err == -EBUSY && (dev->flags & MLX4_FLAG_MSI_X) && !mlx4_is_mfunc(dev)) { @@ -3513,7 +3515,6 @@ slave_start: if (err) goto err_steer; - mlx4_init_quotas(dev); /* When PF resources are ready arm its comm channel to enable * getting commands */ -- cgit v1.2.3 From 7f0137e2ef9f32143df623001a96f7aab61a9595 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 23 Feb 2017 12:02:45 +0200 Subject: net/mlx4_en: Use __skb_fill_page_desc() Or we might miss the fact that a page was allocated from memory reserves. Fixes: dceeab0e5258 ("mlx4: support __GFP_MEMALLOC for rx") Signed-off-by: Eric Dumazet Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index d85e6446f9d9..867292880c07 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -604,10 +604,10 @@ static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv, dma_sync_single_for_cpu(priv->ddev, dma, frag_info->frag_size, DMA_FROM_DEVICE); - /* Save page reference in skb */ - __skb_frag_set_page(&skb_frags_rx[nr], frags[nr].page); - skb_frag_size_set(&skb_frags_rx[nr], frag_info->frag_size); - skb_frags_rx[nr].page_offset = frags[nr].page_offset; + __skb_fill_page_desc(skb, nr, frags[nr].page, + frags[nr].page_offset, + frag_info->frag_size); + skb->truesize += frag_info->frag_stride; frags[nr].page = NULL; } -- cgit v1.2.3 From 9c4713701c01e4cef6e2315c2818abc919ffb0de Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 23 Feb 2017 10:40:34 -0800 Subject: bpf: Fix bpf_xdp_event_output Fix a typo. xdp->data instead of xdp should be copied to the perf-event's dst_buff. Fixes: 4de16969523c ("bpf: enable event output helper also for xdp types") Reported-by: Huapeng Zhou Tested-by: Feixiong Zhang Acked-by: Alexei Starovoitov Cc: Daniel Borkmann Signed-off-by: Martin KaFai Lau Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- net/core/filter.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/filter.c b/net/core/filter.c index e466e0040137..ebaeaf2e46e8 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2584,8 +2584,8 @@ BPF_CALL_5(bpf_xdp_event_output, struct xdp_buff *, xdp, struct bpf_map *, map, if (unlikely(xdp_size > (unsigned long)(xdp->data_end - xdp->data))) return -EFAULT; - return bpf_event_output(map, flags, meta, meta_size, xdp, xdp_size, - bpf_xdp_copy); + return bpf_event_output(map, flags, meta, meta_size, xdp->data, + xdp_size, bpf_xdp_copy); } static const struct bpf_func_proto bpf_xdp_event_output_proto = { -- cgit v1.2.3