diff options
Diffstat (limited to 'net')
215 files changed, 6276 insertions, 2912 deletions
diff --git a/net/802/fddi.c b/net/802/fddi.c index 7d3a0af954e8..6356623fc238 100644 --- a/net/802/fddi.c +++ b/net/802/fddi.c @@ -141,15 +141,6 @@ __be16 fddi_type_trans(struct sk_buff *skb, struct net_device *dev) EXPORT_SYMBOL(fddi_type_trans); -int fddi_change_mtu(struct net_device *dev, int new_mtu) -{ - if ((new_mtu < FDDI_K_SNAP_HLEN) || (new_mtu > FDDI_K_SNAP_DLEN)) - return -EINVAL; - dev->mtu = new_mtu; - return 0; -} -EXPORT_SYMBOL(fddi_change_mtu); - static const struct header_ops fddi_header_ops = { .create = fddi_header, }; @@ -161,6 +152,8 @@ static void fddi_setup(struct net_device *dev) dev->type = ARPHRD_FDDI; dev->hard_header_len = FDDI_K_SNAP_HLEN+3; /* Assume 802.2 SNAP hdr len + 3 pad bytes */ dev->mtu = FDDI_K_SNAP_DLEN; /* Assume max payload of 802.2 SNAP frame */ + dev->min_mtu = FDDI_K_SNAP_HLEN; + dev->max_mtu = FDDI_K_SNAP_DLEN; dev->addr_len = FDDI_K_ALEN; dev->tx_queue_len = 100; /* Long queues on FDDI */ dev->flags = IFF_BROADCAST | IFF_MULTICAST; diff --git a/net/802/hippi.c b/net/802/hippi.c index ade1a52cdcff..5e4427beab2b 100644 --- a/net/802/hippi.c +++ b/net/802/hippi.c @@ -116,18 +116,6 @@ __be16 hippi_type_trans(struct sk_buff *skb, struct net_device *dev) EXPORT_SYMBOL(hippi_type_trans); -int hippi_change_mtu(struct net_device *dev, int new_mtu) -{ - /* - * HIPPI's got these nice large MTUs. - */ - if ((new_mtu < 68) || (new_mtu > 65280)) - return -EINVAL; - dev->mtu = new_mtu; - return 0; -} -EXPORT_SYMBOL(hippi_change_mtu); - /* * For HIPPI we will actually use the lower 4 bytes of the hardware * address as the I-FIELD rather than the actual hardware address. @@ -174,6 +162,8 @@ static void hippi_setup(struct net_device *dev) dev->type = ARPHRD_HIPPI; dev->hard_header_len = HIPPI_HLEN; dev->mtu = 65280; + dev->min_mtu = 68; + dev->max_mtu = 65280; dev->addr_len = HIPPI_ALEN; dev->tx_queue_len = 25 /* 5 */; memset(dev->broadcast, 0xFF, HIPPI_ALEN); diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 8de138d3306b..9accde339601 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -515,8 +515,8 @@ static int vlan_ioctl_handler(struct net *net, void __user *arg) return -EFAULT; /* Null terminate this sucker, just in case. */ - args.device1[23] = 0; - args.u.device2[23] = 0; + args.device1[sizeof(args.device1) - 1] = 0; + args.u.device2[sizeof(args.u.device2) - 1] = 0; rtnl_lock(); @@ -571,8 +571,7 @@ static int vlan_ioctl_handler(struct net *net, void __user *arg) err = -EPERM; if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) break; - if ((args.u.name_type >= 0) && - (args.u.name_type < VLAN_NAME_TYPE_HIGHEST)) { + if (args.u.name_type < VLAN_NAME_TYPE_HIGHEST) { struct vlan_net *vn; vn = net_generic(net, vlan_net_id); diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index fbfacd51aa34..10da6c588bf8 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -826,5 +826,8 @@ void vlan_setup(struct net_device *dev) dev->destructor = vlan_dev_free; dev->ethtool_ops = &vlan_ethtool_ops; + dev->min_mtu = 0; + dev->max_mtu = ETH_MAX_MTU; + eth_zero_addr(dev->broadcast); } diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c index 1852e383afd6..553ed4ecb6a0 100644 --- a/net/9p/trans_rdma.c +++ b/net/9p/trans_rdma.c @@ -680,7 +680,7 @@ rdma_create_trans(struct p9_client *client, const char *addr, char *args) goto error; /* Create the Protection Domain */ - rdma->pd = ib_alloc_pd(rdma->cm_id->device); + rdma->pd = ib_alloc_pd(rdma->cm_id->device, 0); if (IS_ERR(rdma->pd)) goto error; diff --git a/net/atm/br2684.c b/net/atm/br2684.c index aa0047c5c467..fca84e111c89 100644 --- a/net/atm/br2684.c +++ b/net/atm/br2684.c @@ -620,14 +620,12 @@ error: static const struct net_device_ops br2684_netdev_ops = { .ndo_start_xmit = br2684_start_xmit, .ndo_set_mac_address = br2684_mac_addr, - .ndo_change_mtu = eth_change_mtu, .ndo_validate_addr = eth_validate_addr, }; static const struct net_device_ops br2684_netdev_ops_routed = { .ndo_start_xmit = br2684_start_xmit, .ndo_set_mac_address = br2684_mac_addr, - .ndo_change_mtu = eth_change_mtu }; static void br2684_setup(struct net_device *netdev) @@ -651,7 +649,9 @@ static void br2684_setup_routed(struct net_device *netdev) netdev->hard_header_len = sizeof(llc_oui_ipv4); /* worst case */ netdev->netdev_ops = &br2684_netdev_ops_routed; netdev->addr_len = 0; - netdev->mtu = 1500; + netdev->mtu = ETH_DATA_LEN; + netdev->min_mtu = 0; + netdev->max_mtu = ETH_MAX_MTU; netdev->type = ARPHRD_PPP; netdev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST; netdev->tx_queue_len = 100; diff --git a/net/atm/lec.c b/net/atm/lec.c index 5d2693826afb..779b3fa6052d 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -544,15 +544,6 @@ send_to_lecd(struct lec_priv *priv, atmlec_msg_type type, return 0; } -/* shamelessly stolen from drivers/net/net_init.c */ -static int lec_change_mtu(struct net_device *dev, int new_mtu) -{ - if ((new_mtu < 68) || (new_mtu > 18190)) - return -EINVAL; - dev->mtu = new_mtu; - return 0; -} - static void lec_set_multicast_list(struct net_device *dev) { /* @@ -565,7 +556,6 @@ static const struct net_device_ops lec_netdev_ops = { .ndo_open = lec_open, .ndo_stop = lec_close, .ndo_start_xmit = lec_start_xmit, - .ndo_change_mtu = lec_change_mtu, .ndo_tx_timeout = lec_tx_timeout, .ndo_set_rx_mode = lec_set_multicast_list, }; @@ -742,6 +732,7 @@ static int lecd_attach(struct atm_vcc *vcc, int arg) if (!dev_lec[i]) return -ENOMEM; dev_lec[i]->netdev_ops = &lec_netdev_ops; + dev_lec[i]->max_mtu = 18190; snprintf(dev_lec[i]->name, IFNAMSIZ, "lec%d", i); if (register_netdev(dev_lec[i])) { free_netdev(dev_lec[i]); diff --git a/net/batman-adv/debugfs.h b/net/batman-adv/debugfs.h index c68ff3dcb926..e49121ee55f6 100644 --- a/net/batman-adv/debugfs.h +++ b/net/batman-adv/debugfs.h @@ -20,8 +20,6 @@ #include "main.h" -#include <linux/kconfig.h> - struct net_device; #define BATADV_DEBUGFS_SUBDIR "batman_adv" diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c index 64cb6acbe0a6..005012ba9b48 100644 --- a/net/batman-adv/netlink.c +++ b/net/batman-adv/netlink.c @@ -48,14 +48,7 @@ #include "tp_meter.h" #include "translation-table.h" -struct genl_family batadv_netlink_family = { - .id = GENL_ID_GENERATE, - .hdrsize = 0, - .name = BATADV_NL_NAME, - .version = 1, - .maxattr = BATADV_ATTR_MAX, - .netnsok = true, -}; +struct genl_family batadv_netlink_family; /* multicast groups */ enum batadv_netlink_multicast_groups { @@ -610,6 +603,19 @@ static struct genl_ops batadv_netlink_ops[] = { }; +struct genl_family batadv_netlink_family __ro_after_init = { + .hdrsize = 0, + .name = BATADV_NL_NAME, + .version = 1, + .maxattr = BATADV_ATTR_MAX, + .netnsok = true, + .module = THIS_MODULE, + .ops = batadv_netlink_ops, + .n_ops = ARRAY_SIZE(batadv_netlink_ops), + .mcgrps = batadv_netlink_mcgrps, + .n_mcgrps = ARRAY_SIZE(batadv_netlink_mcgrps), +}; + /** * batadv_netlink_register - register batadv genl netlink family */ @@ -617,9 +623,7 @@ void __init batadv_netlink_register(void) { int ret; - ret = genl_register_family_with_ops_groups(&batadv_netlink_family, - batadv_netlink_ops, - batadv_netlink_mcgrps); + ret = genl_register_family(&batadv_netlink_family); if (ret) pr_warn("unable to register netlink family"); } diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c index f4fcb4a9d5c1..2b875edf77e1 100644 --- a/net/bluetooth/bnep/netdev.c +++ b/net/bluetooth/bnep/netdev.c @@ -211,7 +211,6 @@ static const struct net_device_ops bnep_netdev_ops = { .ndo_set_rx_mode = bnep_net_set_mc_list, .ndo_set_mac_address = bnep_net_set_mac_addr, .ndo_tx_timeout = bnep_net_timeout, - .ndo_change_mtu = eth_change_mtu, }; @@ -222,6 +221,8 @@ void bnep_net_setup(struct net_device *dev) dev->addr_len = ETH_ALEN; ether_setup(dev); + dev->min_mtu = 0; + dev->max_mtu = ETH_MAX_MTU; dev->priv_flags &= ~IFF_TX_SKB_SHARING; dev->netdev_ops = &bnep_netdev_ops; diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index c8135680c43e..e2288421fe6b 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -21,8 +21,6 @@ SOFTWARE IS DISCLAIMED. */ -#include <asm/unaligned.h> - #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> #include <net/bluetooth/mgmt.h> @@ -973,33 +971,58 @@ void __hci_req_enable_advertising(struct hci_request *req) static u8 append_local_name(struct hci_dev *hdev, u8 *ptr, u8 ad_len) { - size_t name_len; + size_t complete_len; + size_t short_len; int max_len; max_len = HCI_MAX_AD_LENGTH - ad_len - 2; - name_len = strlen(hdev->dev_name); - if (name_len > 0 && max_len > 0) { - - if (name_len > max_len) { - name_len = max_len; - ptr[1] = EIR_NAME_SHORT; - } else - ptr[1] = EIR_NAME_COMPLETE; - - ptr[0] = name_len + 1; + complete_len = strlen(hdev->dev_name); + short_len = strlen(hdev->short_name); + + /* no space left for name */ + if (max_len < 1) + return ad_len; + + /* no name set */ + if (!complete_len) + return ad_len; + + /* complete name fits and is eq to max short name len or smaller */ + if (complete_len <= max_len && + complete_len <= HCI_MAX_SHORT_NAME_LENGTH) { + return eir_append_data(ptr, ad_len, EIR_NAME_COMPLETE, + hdev->dev_name, complete_len); + } - memcpy(ptr + 2, hdev->dev_name, name_len); + /* short name set and fits */ + if (short_len && short_len <= max_len) { + return eir_append_data(ptr, ad_len, EIR_NAME_SHORT, + hdev->short_name, short_len); + } - ad_len += (name_len + 2); - ptr += (name_len + 2); + /* no short name set so shorten complete name */ + if (!short_len) { + return eir_append_data(ptr, ad_len, EIR_NAME_SHORT, + hdev->dev_name, max_len); } return ad_len; } +static u8 append_appearance(struct hci_dev *hdev, u8 *ptr, u8 ad_len) +{ + return eir_append_le16(ptr, ad_len, EIR_APPEARANCE, hdev->appearance); +} + static u8 create_default_scan_rsp_data(struct hci_dev *hdev, u8 *ptr) { - return append_local_name(hdev, ptr, 0); + u8 scan_rsp_len = 0; + + if (hdev->appearance) { + scan_rsp_len = append_appearance(hdev, ptr, scan_rsp_len); + } + + return append_local_name(hdev, ptr, scan_rsp_len); } static u8 create_instance_scan_rsp_data(struct hci_dev *hdev, u8 instance, @@ -1016,18 +1039,13 @@ static u8 create_instance_scan_rsp_data(struct hci_dev *hdev, u8 instance, instance_flags = adv_instance->flags; if ((instance_flags & MGMT_ADV_FLAG_APPEARANCE) && hdev->appearance) { - ptr[0] = 3; - ptr[1] = EIR_APPEARANCE; - put_unaligned_le16(hdev->appearance, ptr + 2); - scan_rsp_len += 4; - ptr += 4; + scan_rsp_len = append_appearance(hdev, ptr, scan_rsp_len); } - memcpy(ptr, adv_instance->scan_rsp_data, + memcpy(&ptr[scan_rsp_len], adv_instance->scan_rsp_data, adv_instance->scan_rsp_len); scan_rsp_len += adv_instance->scan_rsp_len; - ptr += adv_instance->scan_rsp_len; if (instance_flags & MGMT_ADV_FLAG_LOCAL_NAME) scan_rsp_len = append_local_name(hdev, ptr, scan_rsp_len); diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h index ac1e11006f38..6b06629245a8 100644 --- a/net/bluetooth/hci_request.h +++ b/net/bluetooth/hci_request.h @@ -20,6 +20,8 @@ SOFTWARE IS DISCLAIMED. */ +#include <asm/unaligned.h> + #define hci_req_sync_lock(hdev) mutex_lock(&hdev->req_lock) #define hci_req_sync_unlock(hdev) mutex_unlock(&hdev->req_lock) @@ -103,3 +105,24 @@ static inline void hci_update_background_scan(struct hci_dev *hdev) void hci_request_setup(struct hci_dev *hdev); void hci_request_cancel_all(struct hci_dev *hdev); + +static inline u16 eir_append_data(u8 *eir, u16 eir_len, u8 type, + u8 *data, u8 data_len) +{ + eir[eir_len++] = sizeof(type) + data_len; + eir[eir_len++] = type; + memcpy(&eir[eir_len], data, data_len); + eir_len += data_len; + + return eir_len; +} + +static inline u16 eir_append_le16(u8 *eir, u16 eir_len, u8 type, u16 data) +{ + eir[eir_len++] = sizeof(type) + sizeof(data); + eir[eir_len++] = type; + put_unaligned_le16(data, &eir[eir_len]); + eir_len += sizeof(data); + + return eir_len; +} diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 19b8a5e9420d..736038085feb 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -867,27 +867,6 @@ static int read_controller_info(struct sock *sk, struct hci_dev *hdev, sizeof(rp)); } -static inline u16 eir_append_data(u8 *eir, u16 eir_len, u8 type, u8 *data, - u8 data_len) -{ - eir[eir_len++] = sizeof(type) + data_len; - eir[eir_len++] = type; - memcpy(&eir[eir_len], data, data_len); - eir_len += data_len; - - return eir_len; -} - -static inline u16 eir_append_le16(u8 *eir, u16 eir_len, u8 type, u16 data) -{ - eir[eir_len++] = sizeof(type) + sizeof(data); - eir[eir_len++] = type; - put_unaligned_le16(data, &eir[eir_len]); - eir_len += sizeof(data); - - return eir_len; -} - static u16 append_eir_data_to_buf(struct hci_dev *hdev, u8 *eir) { u16 eir_len = 0; diff --git a/net/bridge/br.c b/net/bridge/br.c index 3addc05b9a16..889e5640455f 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -227,9 +227,11 @@ static int __init br_init(void) br_fdb_test_addr_hook = br_fdb_test_addr; #endif - pr_info("bridge: automatic filtering via arp/ip/ip6tables has been " - "deprecated. Update your scripts to load br_netfilter if you " +#if IS_MODULE(CONFIG_BRIDGE_NETFILTER) + pr_info("bridge: filtering via arp/ip/ip6tables is no longer available " + "by default. Update your scripts to load br_netfilter if you " "need this.\n"); +#endif return 0; diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 89a687f3c0a3..c08e02b67818 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -185,7 +185,7 @@ static struct rtnl_link_stats64 *br_get_stats64(struct net_device *dev, static int br_change_mtu(struct net_device *dev, int new_mtu) { struct net_bridge *br = netdev_priv(dev); - if (new_mtu < 68 || new_mtu > br_min_mtu(br)) + if (new_mtu > br_min_mtu(br)) return -EINVAL; dev->mtu = new_mtu; @@ -410,6 +410,7 @@ void br_dev_setup(struct net_device *dev) br->bridge_hello_time = br->hello_time = 2 * HZ; br->bridge_forward_delay = br->forward_delay = 15 * HZ; br->ageing_time = BR_DEFAULT_AGEING_TIME; + dev->max_mtu = ETH_MAX_MTU; br_netfilter_rtable_init(br); br_stp_timer_init(br); diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 6b43c8c88f19..e4a4176171c9 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -535,9 +535,8 @@ static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source, */ if (fdb->is_local) return 0; - br_warn(br, "adding interface %s with same address " - "as a received packet\n", - source ? source->dev->name : br->dev->name); + br_warn(br, "adding interface %s with same address as a received packet (addr:%pM, vlan:%u)\n", + source ? source->dev->name : br->dev->name, addr, vid); fdb_delete(br, fdb); } @@ -583,9 +582,8 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, /* attempt to update an entry for a local interface */ if (unlikely(fdb->is_local)) { if (net_ratelimit()) - br_warn(br, "received packet on %s with " - "own address as source address\n", - source->dev->name); + br_warn(br, "received packet on %s with own address as source address (addr:%pM, vlan:%u)\n", + source->dev->name, addr, vid); } else { /* fastpath: update of existing entry */ if (unlikely(source != fdb->dst)) { diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c index e657258e1f2c..8bd569695e76 100644 --- a/net/bridge/br_sysfs_if.c +++ b/net/bridge/br_sysfs_if.c @@ -217,6 +217,7 @@ static const struct brport_attribute *brport_attrs[] = { #endif &brport_attr_proxyarp, &brport_attr_proxyarp_wifi, + &brport_attr_multicast_flood, NULL }; diff --git a/net/ceph/Makefile b/net/ceph/Makefile index 84cbed630c4b..6a5180903e7b 100644 --- a/net/ceph/Makefile +++ b/net/ceph/Makefile @@ -5,6 +5,7 @@ obj-$(CONFIG_CEPH_LIB) += libceph.o libceph-y := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \ mon_client.o \ + cls_lock_client.o \ osd_client.o osdmap.o crush/crush.o crush/mapper.o crush/hash.o \ debugfs.o \ auth.o auth_none.o \ diff --git a/net/ceph/auth.c b/net/ceph/auth.c index 2bc5965fdd1e..c822b3ae1bd3 100644 --- a/net/ceph/auth.c +++ b/net/ceph/auth.c @@ -82,7 +82,10 @@ void ceph_auth_reset(struct ceph_auth_client *ac) mutex_unlock(&ac->mutex); } -int ceph_entity_name_encode(const char *name, void **p, void *end) +/* + * EntityName, not to be confused with entity_name_t + */ +int ceph_auth_entity_name_encode(const char *name, void **p, void *end) { int len = strlen(name); @@ -111,7 +114,7 @@ int ceph_auth_build_hello(struct ceph_auth_client *ac, void *buf, size_t len) monhdr->session_mon = cpu_to_le16(-1); monhdr->session_mon_tid = 0; - ceph_encode_32(&p, 0); /* no protocol, yet */ + ceph_encode_32(&p, CEPH_AUTH_UNKNOWN); /* no protocol, yet */ lenp = p; p += sizeof(u32); @@ -124,7 +127,7 @@ int ceph_auth_build_hello(struct ceph_auth_client *ac, void *buf, size_t len) for (i = 0; i < num; i++) ceph_encode_32(&p, supported_protocols[i]); - ret = ceph_entity_name_encode(ac->name, &p, end); + ret = ceph_auth_entity_name_encode(ac->name, &p, end); if (ret < 0) goto out; ceph_decode_need(&p, end, sizeof(u64), bad); @@ -259,9 +262,7 @@ int ceph_build_auth(struct ceph_auth_client *ac, int ret = 0; mutex_lock(&ac->mutex); - if (!ac->protocol) - ret = ceph_auth_build_hello(ac, msg_buf, msg_len); - else if (ac->ops->should_authenticate(ac)) + if (ac->ops->should_authenticate(ac)) ret = ceph_build_auth_request(ac, msg_buf, msg_len); mutex_unlock(&ac->mutex); return ret; diff --git a/net/ceph/auth_none.c b/net/ceph/auth_none.c index 5f836f02ae36..df45e467c81f 100644 --- a/net/ceph/auth_none.c +++ b/net/ceph/auth_none.c @@ -46,7 +46,7 @@ static int ceph_auth_none_build_authorizer(struct ceph_auth_client *ac, int ret; ceph_encode_8_safe(&p, end, 1, e_range); - ret = ceph_entity_name_encode(ac->name, &p, end); + ret = ceph_auth_entity_name_encode(ac->name, &p, end); if (ret < 0) return ret; diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index bddfcf6f09c2..464e88599b9d 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -566,11 +566,17 @@ int ceph_print_client_options(struct seq_file *m, struct ceph_client *client) } EXPORT_SYMBOL(ceph_print_client_options); -u64 ceph_client_id(struct ceph_client *client) +struct ceph_entity_addr *ceph_client_addr(struct ceph_client *client) +{ + return &client->msgr.inst.addr; +} +EXPORT_SYMBOL(ceph_client_addr); + +u64 ceph_client_gid(struct ceph_client *client) { return client->monc.auth->global_id; } -EXPORT_SYMBOL(ceph_client_id); +EXPORT_SYMBOL(ceph_client_gid); /* * create a fresh client instance @@ -685,7 +691,8 @@ int __ceph_open_session(struct ceph_client *client, unsigned long started) return client->auth_err; } - pr_info("client%llu fsid %pU\n", ceph_client_id(client), &client->fsid); + pr_info("client%llu fsid %pU\n", ceph_client_gid(client), + &client->fsid); ceph_debugfs_client_init(client); return 0; diff --git a/net/ceph/ceph_strings.c b/net/ceph/ceph_strings.c index 3773a4fa11e3..19b7d8aa915c 100644 --- a/net/ceph/ceph_strings.c +++ b/net/ceph/ceph_strings.c @@ -15,6 +15,7 @@ const char *ceph_entity_type_name(int type) default: return "unknown"; } } +EXPORT_SYMBOL(ceph_entity_type_name); const char *ceph_osd_op_name(int op) { diff --git a/net/ceph/cls_lock_client.c b/net/ceph/cls_lock_client.c new file mode 100644 index 000000000000..50f040fdb2a9 --- /dev/null +++ b/net/ceph/cls_lock_client.c @@ -0,0 +1,325 @@ +#include <linux/ceph/ceph_debug.h> + +#include <linux/types.h> +#include <linux/slab.h> + +#include <linux/ceph/cls_lock_client.h> +#include <linux/ceph/decode.h> + +/** + * ceph_cls_lock - grab rados lock for object + * @oid, @oloc: object to lock + * @lock_name: the name of the lock + * @type: lock type (CEPH_CLS_LOCK_EXCLUSIVE or CEPH_CLS_LOCK_SHARED) + * @cookie: user-defined identifier for this instance of the lock + * @tag: user-defined tag + * @desc: user-defined lock description + * @flags: lock flags + * + * All operations on the same lock should use the same tag. + */ +int ceph_cls_lock(struct ceph_osd_client *osdc, + struct ceph_object_id *oid, + struct ceph_object_locator *oloc, + char *lock_name, u8 type, char *cookie, + char *tag, char *desc, u8 flags) +{ + int lock_op_buf_size; + int name_len = strlen(lock_name); + int cookie_len = strlen(cookie); + int tag_len = strlen(tag); + int desc_len = strlen(desc); + void *p, *end; + struct page *lock_op_page; + struct timespec mtime; + int ret; + + lock_op_buf_size = name_len + sizeof(__le32) + + cookie_len + sizeof(__le32) + + tag_len + sizeof(__le32) + + desc_len + sizeof(__le32) + + sizeof(struct ceph_timespec) + + /* flag and type */ + sizeof(u8) + sizeof(u8) + + CEPH_ENCODING_START_BLK_LEN; + if (lock_op_buf_size > PAGE_SIZE) + return -E2BIG; + + lock_op_page = alloc_page(GFP_NOIO); + if (!lock_op_page) + return -ENOMEM; + + p = page_address(lock_op_page); + end = p + lock_op_buf_size; + + /* encode cls_lock_lock_op struct */ + ceph_start_encoding(&p, 1, 1, + lock_op_buf_size - CEPH_ENCODING_START_BLK_LEN); + ceph_encode_string(&p, end, lock_name, name_len); + ceph_encode_8(&p, type); + ceph_encode_string(&p, end, cookie, cookie_len); + ceph_encode_string(&p, end, tag, tag_len); + ceph_encode_string(&p, end, desc, desc_len); + /* only support infinite duration */ + memset(&mtime, 0, sizeof(mtime)); + ceph_encode_timespec(p, &mtime); + p += sizeof(struct ceph_timespec); + ceph_encode_8(&p, flags); + + dout("%s lock_name %s type %d cookie %s tag %s desc %s flags 0x%x\n", + __func__, lock_name, type, cookie, tag, desc, flags); + ret = ceph_osdc_call(osdc, oid, oloc, "lock", "lock", + CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, + lock_op_page, lock_op_buf_size, NULL, NULL); + + dout("%s: status %d\n", __func__, ret); + __free_page(lock_op_page); + return ret; +} +EXPORT_SYMBOL(ceph_cls_lock); + +/** + * ceph_cls_unlock - release rados lock for object + * @oid, @oloc: object to lock + * @lock_name: the name of the lock + * @cookie: user-defined identifier for this instance of the lock + */ +int ceph_cls_unlock(struct ceph_osd_client *osdc, + struct ceph_object_id *oid, + struct ceph_object_locator *oloc, + char *lock_name, char *cookie) +{ + int unlock_op_buf_size; + int name_len = strlen(lock_name); + int cookie_len = strlen(cookie); + void *p, *end; + struct page *unlock_op_page; + int ret; + + unlock_op_buf_size = name_len + sizeof(__le32) + + cookie_len + sizeof(__le32) + + CEPH_ENCODING_START_BLK_LEN; + if (unlock_op_buf_size > PAGE_SIZE) + return -E2BIG; + + unlock_op_page = alloc_page(GFP_NOIO); + if (!unlock_op_page) + return -ENOMEM; + + p = page_address(unlock_op_page); + end = p + unlock_op_buf_size; + + /* encode cls_lock_unlock_op struct */ + ceph_start_encoding(&p, 1, 1, + unlock_op_buf_size - CEPH_ENCODING_START_BLK_LEN); + ceph_encode_string(&p, end, lock_name, name_len); + ceph_encode_string(&p, end, cookie, cookie_len); + + dout("%s lock_name %s cookie %s\n", __func__, lock_name, cookie); + ret = ceph_osdc_call(osdc, oid, oloc, "lock", "unlock", + CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, + unlock_op_page, unlock_op_buf_size, NULL, NULL); + + dout("%s: status %d\n", __func__, ret); + __free_page(unlock_op_page); + return ret; +} +EXPORT_SYMBOL(ceph_cls_unlock); + +/** + * ceph_cls_break_lock - release rados lock for object for specified client + * @oid, @oloc: object to lock + * @lock_name: the name of the lock + * @cookie: user-defined identifier for this instance of the lock + * @locker: current lock owner + */ +int ceph_cls_break_lock(struct ceph_osd_client *osdc, + struct ceph_object_id *oid, + struct ceph_object_locator *oloc, + char *lock_name, char *cookie, + struct ceph_entity_name *locker) +{ + int break_op_buf_size; + int name_len = strlen(lock_name); + int cookie_len = strlen(cookie); + struct page *break_op_page; + void *p, *end; + int ret; + + break_op_buf_size = name_len + sizeof(__le32) + + cookie_len + sizeof(__le32) + + sizeof(u8) + sizeof(__le64) + + CEPH_ENCODING_START_BLK_LEN; + if (break_op_buf_size > PAGE_SIZE) + return -E2BIG; + + break_op_page = alloc_page(GFP_NOIO); + if (!break_op_page) + return -ENOMEM; + + p = page_address(break_op_page); + end = p + break_op_buf_size; + + /* encode cls_lock_break_op struct */ + ceph_start_encoding(&p, 1, 1, + break_op_buf_size - CEPH_ENCODING_START_BLK_LEN); + ceph_encode_string(&p, end, lock_name, name_len); + ceph_encode_copy(&p, locker, sizeof(*locker)); + ceph_encode_string(&p, end, cookie, cookie_len); + + dout("%s lock_name %s cookie %s locker %s%llu\n", __func__, lock_name, + cookie, ENTITY_NAME(*locker)); + ret = ceph_osdc_call(osdc, oid, oloc, "lock", "break_lock", + CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, + break_op_page, break_op_buf_size, NULL, NULL); + + dout("%s: status %d\n", __func__, ret); + __free_page(break_op_page); + return ret; +} +EXPORT_SYMBOL(ceph_cls_break_lock); + +void ceph_free_lockers(struct ceph_locker *lockers, u32 num_lockers) +{ + int i; + + for (i = 0; i < num_lockers; i++) + kfree(lockers[i].id.cookie); + kfree(lockers); +} +EXPORT_SYMBOL(ceph_free_lockers); + +static int decode_locker(void **p, void *end, struct ceph_locker *locker) +{ + u8 struct_v; + u32 len; + char *s; + int ret; + + ret = ceph_start_decoding(p, end, 1, "locker_id_t", &struct_v, &len); + if (ret) + return ret; + + ceph_decode_copy(p, &locker->id.name, sizeof(locker->id.name)); + s = ceph_extract_encoded_string(p, end, NULL, GFP_NOIO); + if (IS_ERR(s)) + return PTR_ERR(s); + + locker->id.cookie = s; + + ret = ceph_start_decoding(p, end, 1, "locker_info_t", &struct_v, &len); + if (ret) + return ret; + + *p += sizeof(struct ceph_timespec); /* skip expiration */ + ceph_decode_copy(p, &locker->info.addr, sizeof(locker->info.addr)); + ceph_decode_addr(&locker->info.addr); + len = ceph_decode_32(p); + *p += len; /* skip description */ + + dout("%s %s%llu cookie %s addr %s\n", __func__, + ENTITY_NAME(locker->id.name), locker->id.cookie, + ceph_pr_addr(&locker->info.addr.in_addr)); + return 0; +} + +static int decode_lockers(void **p, void *end, u8 *type, char **tag, + struct ceph_locker **lockers, u32 *num_lockers) +{ + u8 struct_v; + u32 struct_len; + char *s; + int i; + int ret; + + ret = ceph_start_decoding(p, end, 1, "cls_lock_get_info_reply", + &struct_v, &struct_len); + if (ret) + return ret; + + *num_lockers = ceph_decode_32(p); + *lockers = kcalloc(*num_lockers, sizeof(**lockers), GFP_NOIO); + if (!*lockers) + return -ENOMEM; + + for (i = 0; i < *num_lockers; i++) { + ret = decode_locker(p, end, *lockers + i); + if (ret) + goto err_free_lockers; + } + + *type = ceph_decode_8(p); + s = ceph_extract_encoded_string(p, end, NULL, GFP_NOIO); + if (IS_ERR(s)) { + ret = PTR_ERR(s); + goto err_free_lockers; + } + + *tag = s; + return 0; + +err_free_lockers: + ceph_free_lockers(*lockers, *num_lockers); + return ret; +} + +/* + * On success, the caller is responsible for: + * + * kfree(tag); + * ceph_free_lockers(lockers, num_lockers); + */ +int ceph_cls_lock_info(struct ceph_osd_client *osdc, + struct ceph_object_id *oid, + struct ceph_object_locator *oloc, + char *lock_name, u8 *type, char **tag, + struct ceph_locker **lockers, u32 *num_lockers) +{ + int get_info_op_buf_size; + int name_len = strlen(lock_name); + struct page *get_info_op_page, *reply_page; + size_t reply_len; + void *p, *end; + int ret; + + get_info_op_buf_size = name_len + sizeof(__le32) + + CEPH_ENCODING_START_BLK_LEN; + if (get_info_op_buf_size > PAGE_SIZE) + return -E2BIG; + + get_info_op_page = alloc_page(GFP_NOIO); + if (!get_info_op_page) + return -ENOMEM; + + reply_page = alloc_page(GFP_NOIO); + if (!reply_page) { + __free_page(get_info_op_page); + return -ENOMEM; + } + + p = page_address(get_info_op_page); + end = p + get_info_op_buf_size; + + /* encode cls_lock_get_info_op struct */ + ceph_start_encoding(&p, 1, 1, + get_info_op_buf_size - CEPH_ENCODING_START_BLK_LEN); + ceph_encode_string(&p, end, lock_name, name_len); + + dout("%s lock_name %s\n", __func__, lock_name); + ret = ceph_osdc_call(osdc, oid, oloc, "lock", "get_info", + CEPH_OSD_FLAG_READ, get_info_op_page, + get_info_op_buf_size, reply_page, &reply_len); + + dout("%s: status %d\n", __func__, ret); + if (ret >= 0) { + p = page_address(reply_page); + end = p + reply_len; + + ret = decode_lockers(&p, end, type, tag, lockers, num_lockers); + } + + __free_page(get_info_op_page); + __free_page(reply_page); + return ret; +} +EXPORT_SYMBOL(ceph_cls_lock_info); diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c index 5fcfb98f309e..a421e905331a 100644 --- a/net/ceph/crush/mapper.c +++ b/net/ceph/crush/mapper.c @@ -245,7 +245,7 @@ static int bucket_straw_choose(struct crush_bucket_straw *bucket, /* compute 2^44*log2(input+1) */ static __u64 crush_ln(unsigned int xin) { - unsigned int x = xin, x1; + unsigned int x = xin; int iexpon, index1, index2; __u64 RH, LH, LL, xl64, result; @@ -253,9 +253,15 @@ static __u64 crush_ln(unsigned int xin) /* normalize input */ iexpon = 15; - while (!(x & 0x18000)) { - x <<= 1; - iexpon--; + + /* + * figure out number of bits we need to shift and + * do it in one step instead of iteratively + */ + if (!(x & 0x18000)) { + int bits = __builtin_clz(x & 0x1FFFF) - 16; + x <<= bits; + iexpon = 15 - bits; } index1 = (x >> 8) << 1; @@ -267,12 +273,11 @@ static __u64 crush_ln(unsigned int xin) /* RH*x ~ 2^48 * (2^15 + xf), xf<2^8 */ xl64 = (__s64)x * RH; xl64 >>= 48; - x1 = xl64; result = iexpon; result <<= (12 + 32); - index2 = x1 & 0xff; + index2 = xl64 & 0xff; /* LL ~ 2^48*log2(1.0+index2/2^15) */ LL = __LL_tbl[index2]; diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index ef34a02719d7..a8effc8b7280 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -835,6 +835,83 @@ int ceph_monc_get_version_async(struct ceph_mon_client *monc, const char *what, } EXPORT_SYMBOL(ceph_monc_get_version_async); +static void handle_command_ack(struct ceph_mon_client *monc, + struct ceph_msg *msg) +{ + struct ceph_mon_generic_request *req; + void *p = msg->front.iov_base; + void *const end = p + msg->front_alloc_len; + u64 tid = le64_to_cpu(msg->hdr.tid); + + dout("%s msg %p tid %llu\n", __func__, msg, tid); + + ceph_decode_need(&p, end, sizeof(struct ceph_mon_request_header) + + sizeof(u32), bad); + p += sizeof(struct ceph_mon_request_header); + + mutex_lock(&monc->mutex); + req = lookup_generic_request(&monc->generic_request_tree, tid); + if (!req) { + mutex_unlock(&monc->mutex); + return; + } + + req->result = ceph_decode_32(&p); + __finish_generic_request(req); + mutex_unlock(&monc->mutex); + + complete_generic_request(req); + return; + +bad: + pr_err("corrupt mon_command ack, tid %llu\n", tid); + ceph_msg_dump(msg); +} + +int ceph_monc_blacklist_add(struct ceph_mon_client *monc, + struct ceph_entity_addr *client_addr) +{ + struct ceph_mon_generic_request *req; + struct ceph_mon_command *h; + int ret = -ENOMEM; + int len; + + req = alloc_generic_request(monc, GFP_NOIO); + if (!req) + goto out; + + req->request = ceph_msg_new(CEPH_MSG_MON_COMMAND, 256, GFP_NOIO, true); + if (!req->request) + goto out; + + req->reply = ceph_msg_new(CEPH_MSG_MON_COMMAND_ACK, 512, GFP_NOIO, + true); + if (!req->reply) + goto out; + + mutex_lock(&monc->mutex); + register_generic_request(req); + h = req->request->front.iov_base; + h->monhdr.have_version = 0; + h->monhdr.session_mon = cpu_to_le16(-1); + h->monhdr.session_mon_tid = 0; + h->fsid = monc->monmap->fsid; + h->num_strs = cpu_to_le32(1); + len = sprintf(h->str, "{ \"prefix\": \"osd blacklist\", \ + \"blacklistop\": \"add\", \ + \"addr\": \"%pISpc/%u\" }", + &client_addr->in_addr, le32_to_cpu(client_addr->nonce)); + h->str_len = cpu_to_le32(len); + send_generic_request(monc, req); + mutex_unlock(&monc->mutex); + + ret = wait_generic_request(req); +out: + put_generic_request(req); + return ret; +} +EXPORT_SYMBOL(ceph_monc_blacklist_add); + /* * Resend pending generic requests. */ @@ -1139,6 +1216,10 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg) handle_get_version_reply(monc, msg); break; + case CEPH_MSG_MON_COMMAND_ACK: + handle_command_ack(monc, msg); + break; + case CEPH_MSG_MON_MAP: ceph_monc_handle_map(monc, msg); break; @@ -1178,6 +1259,7 @@ static struct ceph_msg *mon_alloc_msg(struct ceph_connection *con, m = ceph_msg_get(monc->m_subscribe_ack); break; case CEPH_MSG_STATFS_REPLY: + case CEPH_MSG_MON_COMMAND_ACK: return get_generic_reply(con, hdr, skip); case CEPH_MSG_AUTH_REPLY: m = ceph_msg_get(monc->m_auth_reply); diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index a97e7b506612..d9bf7a1d0a58 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -338,6 +338,9 @@ static void osd_req_op_data_release(struct ceph_osd_request *osd_req, ceph_osd_data_release(&op->notify.request_data); ceph_osd_data_release(&op->notify.response_data); break; + case CEPH_OSD_OP_LIST_WATCHERS: + ceph_osd_data_release(&op->list_watchers.response_data); + break; default: break; } @@ -863,6 +866,8 @@ static u32 osd_req_encode_op(struct ceph_osd_op *dst, case CEPH_OSD_OP_NOTIFY: dst->notify.cookie = cpu_to_le64(src->notify.cookie); break; + case CEPH_OSD_OP_LIST_WATCHERS: + break; case CEPH_OSD_OP_SETALLOCHINT: dst->alloc_hint.expected_object_size = cpu_to_le64(src->alloc_hint.expected_object_size); @@ -1445,6 +1450,10 @@ static void setup_request_data(struct ceph_osd_request *req, ceph_osdc_msg_data_add(req->r_reply, &op->extent.osd_data); break; + case CEPH_OSD_OP_LIST_WATCHERS: + ceph_osdc_msg_data_add(req->r_reply, + &op->list_watchers.response_data); + break; /* both */ case CEPH_OSD_OP_CALL: @@ -3891,12 +3900,121 @@ int ceph_osdc_watch_check(struct ceph_osd_client *osdc, return ret; } +static int decode_watcher(void **p, void *end, struct ceph_watch_item *item) +{ + u8 struct_v; + u32 struct_len; + int ret; + + ret = ceph_start_decoding(p, end, 2, "watch_item_t", + &struct_v, &struct_len); + if (ret) + return ret; + + ceph_decode_copy(p, &item->name, sizeof(item->name)); + item->cookie = ceph_decode_64(p); + *p += 4; /* skip timeout_seconds */ + if (struct_v >= 2) { + ceph_decode_copy(p, &item->addr, sizeof(item->addr)); + ceph_decode_addr(&item->addr); + } + + dout("%s %s%llu cookie %llu addr %s\n", __func__, + ENTITY_NAME(item->name), item->cookie, + ceph_pr_addr(&item->addr.in_addr)); + return 0; +} + +static int decode_watchers(void **p, void *end, + struct ceph_watch_item **watchers, + u32 *num_watchers) +{ + u8 struct_v; + u32 struct_len; + int i; + int ret; + + ret = ceph_start_decoding(p, end, 1, "obj_list_watch_response_t", + &struct_v, &struct_len); + if (ret) + return ret; + + *num_watchers = ceph_decode_32(p); + *watchers = kcalloc(*num_watchers, sizeof(**watchers), GFP_NOIO); + if (!*watchers) + return -ENOMEM; + + for (i = 0; i < *num_watchers; i++) { + ret = decode_watcher(p, end, *watchers + i); + if (ret) { + kfree(*watchers); + return ret; + } + } + + return 0; +} + +/* + * On success, the caller is responsible for: + * + * kfree(watchers); + */ +int ceph_osdc_list_watchers(struct ceph_osd_client *osdc, + struct ceph_object_id *oid, + struct ceph_object_locator *oloc, + struct ceph_watch_item **watchers, + u32 *num_watchers) +{ + struct ceph_osd_request *req; + struct page **pages; + int ret; + + req = ceph_osdc_alloc_request(osdc, NULL, 1, false, GFP_NOIO); + if (!req) + return -ENOMEM; + + ceph_oid_copy(&req->r_base_oid, oid); + ceph_oloc_copy(&req->r_base_oloc, oloc); + req->r_flags = CEPH_OSD_FLAG_READ; + + ret = ceph_osdc_alloc_messages(req, GFP_NOIO); + if (ret) + goto out_put_req; + + pages = ceph_alloc_page_vector(1, GFP_NOIO); + if (IS_ERR(pages)) { + ret = PTR_ERR(pages); + goto out_put_req; + } + + osd_req_op_init(req, 0, CEPH_OSD_OP_LIST_WATCHERS, 0); + ceph_osd_data_pages_init(osd_req_op_data(req, 0, list_watchers, + response_data), + pages, PAGE_SIZE, 0, false, true); + + ceph_osdc_start_request(osdc, req, false); + ret = ceph_osdc_wait_request(osdc, req); + if (ret >= 0) { + void *p = page_address(pages[0]); + void *const end = p + req->r_ops[0].outdata_len; + + ret = decode_watchers(&p, end, watchers, num_watchers); + } + +out_put_req: + ceph_osdc_put_request(req); + return ret; +} +EXPORT_SYMBOL(ceph_osdc_list_watchers); + /* * Call all pending notify callbacks - for use after a watch is * unregistered, to make sure no more callbacks for it will be invoked */ void ceph_osdc_flush_notifies(struct ceph_osd_client *osdc) { + dout("%s osdc %p\n", __func__, osdc); flush_workqueue(osdc->notify_wq); } EXPORT_SYMBOL(ceph_osdc_flush_notifies); @@ -3910,6 +4028,57 @@ void ceph_osdc_maybe_request_map(struct ceph_osd_client *osdc) EXPORT_SYMBOL(ceph_osdc_maybe_request_map); /* + * Execute an OSD class method on an object. + * + * @flags: CEPH_OSD_FLAG_* + * @resp_len: out param for reply length + */ +int ceph_osdc_call(struct ceph_osd_client *osdc, + struct ceph_object_id *oid, + struct ceph_object_locator *oloc, + const char *class, const char *method, + unsigned int flags, + struct page *req_page, size_t req_len, + struct page *resp_page, size_t *resp_len) +{ + struct ceph_osd_request *req; + int ret; + + req = ceph_osdc_alloc_request(osdc, NULL, 1, false, GFP_NOIO); + if (!req) + return -ENOMEM; + + ceph_oid_copy(&req->r_base_oid, oid); + ceph_oloc_copy(&req->r_base_oloc, oloc); + req->r_flags = flags; + + ret = ceph_osdc_alloc_messages(req, GFP_NOIO); + if (ret) + goto out_put_req; + + osd_req_op_cls_init(req, 0, CEPH_OSD_OP_CALL, class, method); + if (req_page) + osd_req_op_cls_request_data_pages(req, 0, &req_page, req_len, + 0, false, false); + if (resp_page) + osd_req_op_cls_response_data_pages(req, 0, &resp_page, + PAGE_SIZE, 0, false, false); + + ceph_osdc_start_request(osdc, req, false); + ret = ceph_osdc_wait_request(osdc, req); + if (ret >= 0) { + ret = req->r_ops[0].rval; + if (resp_page) + *resp_len = req->r_ops[0].outdata_len; + } + +out_put_req: + ceph_osdc_put_request(req); + return ret; +} +EXPORT_SYMBOL(ceph_osdc_call); + +/* * init, shutdown */ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client) diff --git a/net/core/datagram.c b/net/core/datagram.c index b7de71f8d5d3..bfb973aebb5b 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -323,6 +323,27 @@ void __skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb, int len) } EXPORT_SYMBOL(__skb_free_datagram_locked); +int __sk_queue_drop_skb(struct sock *sk, struct sk_buff *skb, + unsigned int flags) +{ + int err = 0; + + if (flags & MSG_PEEK) { + err = -ENOENT; + spin_lock_bh(&sk->sk_receive_queue.lock); + if (skb == skb_peek(&sk->sk_receive_queue)) { + __skb_unlink(skb, &sk->sk_receive_queue); + atomic_dec(&skb->users); + err = 0; + } + spin_unlock_bh(&sk->sk_receive_queue.lock); + } + + atomic_inc(&sk->sk_drops); + return err; +} +EXPORT_SYMBOL(__sk_queue_drop_skb); + /** * skb_kill_datagram - Free a datagram skbuff forcibly * @sk: socket @@ -346,23 +367,10 @@ EXPORT_SYMBOL(__skb_free_datagram_locked); int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags) { - int err = 0; - - if (flags & MSG_PEEK) { - err = -ENOENT; - spin_lock_bh(&sk->sk_receive_queue.lock); - if (skb == skb_peek(&sk->sk_receive_queue)) { - __skb_unlink(skb, &sk->sk_receive_queue); - atomic_dec(&skb->users); - err = 0; - } - spin_unlock_bh(&sk->sk_receive_queue.lock); - } + int err = __sk_queue_drop_skb(sk, skb, flags); kfree_skb(skb); - atomic_inc(&sk->sk_drops); sk_mem_reclaim_partial(sk); - return err; } EXPORT_SYMBOL(skb_kill_datagram); diff --git a/net/core/dev.c b/net/core/dev.c index c0c291f721d6..f55fb4536016 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -139,7 +139,6 @@ #include <linux/errqueue.h> #include <linux/hrtimer.h> #include <linux/netfilter_ingress.h> -#include <linux/sctp.h> #include <linux/crash_dump.h> #include "net-sysfs.h" @@ -2492,141 +2491,6 @@ out: } EXPORT_SYMBOL(skb_checksum_help); -/* skb_csum_offload_check - Driver helper function to determine if a device - * with limited checksum offload capabilities is able to offload the checksum - * for a given packet. - * - * Arguments: - * skb - sk_buff for the packet in question - * spec - contains the description of what device can offload - * csum_encapped - returns true if the checksum being offloaded is - * encpasulated. That is it is checksum for the transport header - * in the inner headers. - * checksum_help - when set indicates that helper function should - * call skb_checksum_help if offload checks fail - * - * Returns: - * true: Packet has passed the checksum checks and should be offloadable to - * the device (a driver may still need to check for additional - * restrictions of its device) - * false: Checksum is not offloadable. If checksum_help was set then - * skb_checksum_help was called to resolve checksum for non-GSO - * packets and when IP protocol is not SCTP - */ -bool __skb_csum_offload_chk(struct sk_buff *skb, - const struct skb_csum_offl_spec *spec, - bool *csum_encapped, - bool csum_help) -{ - struct iphdr *iph; - struct ipv6hdr *ipv6; - void *nhdr; - int protocol; - u8 ip_proto; - - if (skb->protocol == htons(ETH_P_8021Q) || - skb->protocol == htons(ETH_P_8021AD)) { - if (!spec->vlan_okay) - goto need_help; - } - - /* We check whether the checksum refers to a transport layer checksum in - * the outermost header or an encapsulated transport layer checksum that - * corresponds to the inner headers of the skb. If the checksum is for - * something else in the packet we need help. - */ - if (skb_checksum_start_offset(skb) == skb_transport_offset(skb)) { - /* Non-encapsulated checksum */ - protocol = eproto_to_ipproto(vlan_get_protocol(skb)); - nhdr = skb_network_header(skb); - *csum_encapped = false; - if (spec->no_not_encapped) - goto need_help; - } else if (skb->encapsulation && spec->encap_okay && - skb_checksum_start_offset(skb) == - skb_inner_transport_offset(skb)) { - /* Encapsulated checksum */ - *csum_encapped = true; - switch (skb->inner_protocol_type) { - case ENCAP_TYPE_ETHER: - protocol = eproto_to_ipproto(skb->inner_protocol); - break; - case ENCAP_TYPE_IPPROTO: - protocol = skb->inner_protocol; - break; - } - nhdr = skb_inner_network_header(skb); - } else { - goto need_help; - } - - switch (protocol) { - case IPPROTO_IP: - if (!spec->ipv4_okay) - goto need_help; - iph = nhdr; - ip_proto = iph->protocol; - if (iph->ihl != 5 && !spec->ip_options_okay) - goto need_help; - break; - case IPPROTO_IPV6: - if (!spec->ipv6_okay) - goto need_help; - if (spec->no_encapped_ipv6 && *csum_encapped) - goto need_help; - ipv6 = nhdr; - nhdr += sizeof(*ipv6); - ip_proto = ipv6->nexthdr; - break; - default: - goto need_help; - } - -ip_proto_again: - switch (ip_proto) { - case IPPROTO_TCP: - if (!spec->tcp_okay || - skb->csum_offset != offsetof(struct tcphdr, check)) - goto need_help; - break; - case IPPROTO_UDP: - if (!spec->udp_okay || - skb->csum_offset != offsetof(struct udphdr, check)) - goto need_help; - break; - case IPPROTO_SCTP: - if (!spec->sctp_okay || - skb->csum_offset != offsetof(struct sctphdr, checksum)) - goto cant_help; - break; - case NEXTHDR_HOP: - case NEXTHDR_ROUTING: - case NEXTHDR_DEST: { - u8 *opthdr = nhdr; - - if (protocol != IPPROTO_IPV6 || !spec->ext_hdrs_okay) - goto need_help; - - ip_proto = opthdr[0]; - nhdr += (opthdr[1] + 1) << 3; - - goto ip_proto_again; - } - default: - goto need_help; - } - - /* Passed the tests for offloading checksum */ - return true; - -need_help: - if (csum_help && !skb_shinfo(skb)->gso_size) - skb_checksum_help(skb); -cant_help: - return false; -} -EXPORT_SYMBOL(__skb_csum_offload_chk); - __be16 skb_network_protocol(struct sk_buff *skb, int *depth) { __be16 type = skb->protocol; @@ -5273,6 +5137,13 @@ static struct netdev_adjacent *__netdev_find_adj(struct net_device *adj_dev, return NULL; } +static int __netdev_has_upper_dev(struct net_device *upper_dev, void *data) +{ + struct net_device *dev = data; + + return upper_dev == dev; +} + /** * netdev_has_upper_dev - Check if device is linked to an upper device * @dev: device @@ -5287,11 +5158,30 @@ bool netdev_has_upper_dev(struct net_device *dev, { ASSERT_RTNL(); - return __netdev_find_adj(upper_dev, &dev->all_adj_list.upper); + return netdev_walk_all_upper_dev_rcu(dev, __netdev_has_upper_dev, + upper_dev); } EXPORT_SYMBOL(netdev_has_upper_dev); /** + * netdev_has_upper_dev_all - Check if device is linked to an upper device + * @dev: device + * @upper_dev: upper device to check + * + * Find out if a device is linked to specified upper device and return true + * in case it is. Note that this checks the entire upper device chain. + * The caller must hold rcu lock. + */ + +bool netdev_has_upper_dev_all_rcu(struct net_device *dev, + struct net_device *upper_dev) +{ + return !!netdev_walk_all_upper_dev_rcu(dev, __netdev_has_upper_dev, + upper_dev); +} +EXPORT_SYMBOL(netdev_has_upper_dev_all_rcu); + +/** * netdev_has_any_upper_dev - Check if device is linked to some device * @dev: device * @@ -5302,7 +5192,7 @@ static bool netdev_has_any_upper_dev(struct net_device *dev) { ASSERT_RTNL(); - return !list_empty(&dev->all_adj_list.upper); + return !list_empty(&dev->adj_list.upper); } /** @@ -5329,6 +5219,20 @@ struct net_device *netdev_master_upper_dev_get(struct net_device *dev) } EXPORT_SYMBOL(netdev_master_upper_dev_get); +/** + * netdev_has_any_lower_dev - Check if device is linked to some device + * @dev: device + * + * Find out if a device is linked to a lower device and return true in case + * it is. The caller must hold the RTNL lock. + */ +static bool netdev_has_any_lower_dev(struct net_device *dev) +{ + ASSERT_RTNL(); + + return !list_empty(&dev->adj_list.lower); +} + void *netdev_adjacent_get_private(struct list_head *adj_list) { struct netdev_adjacent *adj; @@ -5365,16 +5269,8 @@ struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev, } EXPORT_SYMBOL(netdev_upper_get_next_dev_rcu); -/** - * netdev_all_upper_get_next_dev_rcu - Get the next dev from upper list - * @dev: device - * @iter: list_head ** of the current position - * - * Gets the next device from the dev's upper list, starting from iter - * position. The caller must hold RCU read lock. - */ -struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev, - struct list_head **iter) +static struct net_device *netdev_next_upper_dev_rcu(struct net_device *dev, + struct list_head **iter) { struct netdev_adjacent *upper; @@ -5382,14 +5278,41 @@ struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev, upper = list_entry_rcu((*iter)->next, struct netdev_adjacent, list); - if (&upper->list == &dev->all_adj_list.upper) + if (&upper->list == &dev->adj_list.upper) return NULL; *iter = &upper->list; return upper->dev; } -EXPORT_SYMBOL(netdev_all_upper_get_next_dev_rcu); + +int netdev_walk_all_upper_dev_rcu(struct net_device *dev, + int (*fn)(struct net_device *dev, + void *data), + void *data) +{ + struct net_device *udev; + struct list_head *iter; + int ret; + + for (iter = &dev->adj_list.upper, + udev = netdev_next_upper_dev_rcu(dev, &iter); + udev; + udev = netdev_next_upper_dev_rcu(dev, &iter)) { + /* first is the upper device itself */ + ret = fn(udev, data); + if (ret) + return ret; + + /* then look at all of its upper devices */ + ret = netdev_walk_all_upper_dev_rcu(udev, fn, data); + if (ret) + return ret; + } + + return 0; +} +EXPORT_SYMBOL_GPL(netdev_walk_all_upper_dev_rcu); /** * netdev_lower_get_next_private - Get the next ->private from the @@ -5472,51 +5395,90 @@ void *netdev_lower_get_next(struct net_device *dev, struct list_head **iter) } EXPORT_SYMBOL(netdev_lower_get_next); -/** - * netdev_all_lower_get_next - Get the next device from all lower neighbour list - * @dev: device - * @iter: list_head ** of the current position - * - * Gets the next netdev_adjacent from the dev's all lower neighbour - * list, starting from iter position. The caller must hold RTNL lock or - * its own locking that guarantees that the neighbour all lower - * list will remain unchanged. - */ -struct net_device *netdev_all_lower_get_next(struct net_device *dev, struct list_head **iter) +static struct net_device *netdev_next_lower_dev(struct net_device *dev, + struct list_head **iter) { struct netdev_adjacent *lower; lower = list_entry(*iter, struct netdev_adjacent, list); - if (&lower->list == &dev->all_adj_list.lower) + if (&lower->list == &dev->adj_list.lower) return NULL; *iter = lower->list.next; return lower->dev; } -EXPORT_SYMBOL(netdev_all_lower_get_next); -/** - * netdev_all_lower_get_next_rcu - Get the next device from all - * lower neighbour list, RCU variant - * @dev: device - * @iter: list_head ** of the current position - * - * Gets the next netdev_adjacent from the dev's all lower neighbour - * list, starting from iter position. The caller must hold RCU read lock. - */ -struct net_device *netdev_all_lower_get_next_rcu(struct net_device *dev, - struct list_head **iter) +int netdev_walk_all_lower_dev(struct net_device *dev, + int (*fn)(struct net_device *dev, + void *data), + void *data) +{ + struct net_device *ldev; + struct list_head *iter; + int ret; + + for (iter = &dev->adj_list.lower, + ldev = netdev_next_lower_dev(dev, &iter); + ldev; + ldev = netdev_next_lower_dev(dev, &iter)) { + /* first is the lower device itself */ + ret = fn(ldev, data); + if (ret) + return ret; + + /* then look at all of its lower devices */ + ret = netdev_walk_all_lower_dev(ldev, fn, data); + if (ret) + return ret; + } + + return 0; +} +EXPORT_SYMBOL_GPL(netdev_walk_all_lower_dev); + +static struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev, + struct list_head **iter) { struct netdev_adjacent *lower; - lower = list_first_or_null_rcu(&dev->all_adj_list.lower, - struct netdev_adjacent, list); + lower = list_entry_rcu((*iter)->next, struct netdev_adjacent, list); + if (&lower->list == &dev->adj_list.lower) + return NULL; + + *iter = &lower->list; + + return lower->dev; +} - return lower ? lower->dev : NULL; +int netdev_walk_all_lower_dev_rcu(struct net_device *dev, + int (*fn)(struct net_device *dev, + void *data), + void *data) +{ + struct net_device *ldev; + struct list_head *iter; + int ret; + + for (iter = &dev->adj_list.lower, + ldev = netdev_next_lower_dev_rcu(dev, &iter); + ldev; + ldev = netdev_next_lower_dev_rcu(dev, &iter)) { + /* first is the lower device itself */ + ret = fn(ldev, data); + if (ret) + return ret; + + /* then look at all of its lower devices */ + ret = netdev_walk_all_lower_dev_rcu(ldev, fn, data); + if (ret) + return ret; + } + + return 0; } -EXPORT_SYMBOL(netdev_all_lower_get_next_rcu); +EXPORT_SYMBOL_GPL(netdev_walk_all_lower_dev_rcu); /** * netdev_lower_get_first_private_rcu - Get the first ->private from the @@ -5598,7 +5560,10 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev, adj = __netdev_find_adj(adj_dev, dev_list); if (adj) { - adj->ref_nr++; + adj->ref_nr += 1; + pr_debug("Insert adjacency: dev %s adj_dev %s adj->ref_nr %d\n", + dev->name, adj_dev->name, adj->ref_nr); + return 0; } @@ -5612,8 +5577,8 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev, adj->private = private; dev_hold(adj_dev); - pr_debug("dev_hold for %s, because of link added from %s to %s\n", - adj_dev->name, dev->name, adj_dev->name); + pr_debug("Insert adjacency: dev %s adj_dev %s adj->ref_nr %d; dev_hold on %s\n", + dev->name, adj_dev->name, adj->ref_nr, adj_dev->name); if (netdev_adjacent_is_neigh_list(dev, adj_dev, dev_list)) { ret = netdev_adjacent_sysfs_add(dev, adj_dev, dev_list); @@ -5647,22 +5612,28 @@ free_adj: static void __netdev_adjacent_dev_remove(struct net_device *dev, struct net_device *adj_dev, + u16 ref_nr, struct list_head *dev_list) { struct netdev_adjacent *adj; + pr_debug("Remove adjacency: dev %s adj_dev %s ref_nr %d\n", + dev->name, adj_dev->name, ref_nr); + adj = __netdev_find_adj(adj_dev, dev_list); if (!adj) { - pr_err("tried to remove device %s from %s\n", + pr_err("Adjacency does not exist for device %s from %s\n", dev->name, adj_dev->name); - BUG(); + WARN_ON(1); + return; } - if (adj->ref_nr > 1) { - pr_debug("%s to %s ref_nr-- = %d\n", dev->name, adj_dev->name, - adj->ref_nr-1); - adj->ref_nr--; + if (adj->ref_nr > ref_nr) { + pr_debug("adjacency: %s to %s ref_nr - %d = %d\n", + dev->name, adj_dev->name, ref_nr, + adj->ref_nr - ref_nr); + adj->ref_nr -= ref_nr; return; } @@ -5673,7 +5644,7 @@ static void __netdev_adjacent_dev_remove(struct net_device *dev, netdev_adjacent_sysfs_del(dev, adj_dev->name, dev_list); list_del_rcu(&adj->list); - pr_debug("dev_put for %s, because link removed from %s to %s\n", + pr_debug("adjacency: dev_put for %s, because link removed from %s to %s\n", adj_dev->name, dev->name, adj_dev->name); dev_put(adj_dev); kfree_rcu(adj, rcu); @@ -5687,73 +5658,45 @@ static int __netdev_adjacent_dev_link_lists(struct net_device *dev, { int ret; - ret = __netdev_adjacent_dev_insert(dev, upper_dev, up_list, private, - master); + ret = __netdev_adjacent_dev_insert(dev, upper_dev, up_list, + private, master); if (ret) return ret; - ret = __netdev_adjacent_dev_insert(upper_dev, dev, down_list, private, - false); + ret = __netdev_adjacent_dev_insert(upper_dev, dev, down_list, + private, false); if (ret) { - __netdev_adjacent_dev_remove(dev, upper_dev, up_list); + __netdev_adjacent_dev_remove(dev, upper_dev, 1, up_list); return ret; } return 0; } -static int __netdev_adjacent_dev_link(struct net_device *dev, - struct net_device *upper_dev) -{ - return __netdev_adjacent_dev_link_lists(dev, upper_dev, - &dev->all_adj_list.upper, - &upper_dev->all_adj_list.lower, - NULL, false); -} - static void __netdev_adjacent_dev_unlink_lists(struct net_device *dev, struct net_device *upper_dev, + u16 ref_nr, struct list_head *up_list, struct list_head *down_list) { - __netdev_adjacent_dev_remove(dev, upper_dev, up_list); - __netdev_adjacent_dev_remove(upper_dev, dev, down_list); -} - -static void __netdev_adjacent_dev_unlink(struct net_device *dev, - struct net_device *upper_dev) -{ - __netdev_adjacent_dev_unlink_lists(dev, upper_dev, - &dev->all_adj_list.upper, - &upper_dev->all_adj_list.lower); + __netdev_adjacent_dev_remove(dev, upper_dev, ref_nr, up_list); + __netdev_adjacent_dev_remove(upper_dev, dev, ref_nr, down_list); } static int __netdev_adjacent_dev_link_neighbour(struct net_device *dev, struct net_device *upper_dev, void *private, bool master) { - int ret = __netdev_adjacent_dev_link(dev, upper_dev); - - if (ret) - return ret; - - ret = __netdev_adjacent_dev_link_lists(dev, upper_dev, - &dev->adj_list.upper, - &upper_dev->adj_list.lower, - private, master); - if (ret) { - __netdev_adjacent_dev_unlink(dev, upper_dev); - return ret; - } - - return 0; + return __netdev_adjacent_dev_link_lists(dev, upper_dev, + &dev->adj_list.upper, + &upper_dev->adj_list.lower, + private, master); } static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev, struct net_device *upper_dev) { - __netdev_adjacent_dev_unlink(dev, upper_dev); - __netdev_adjacent_dev_unlink_lists(dev, upper_dev, + __netdev_adjacent_dev_unlink_lists(dev, upper_dev, 1, &dev->adj_list.upper, &upper_dev->adj_list.lower); } @@ -5763,7 +5706,6 @@ static int __netdev_upper_dev_link(struct net_device *dev, void *upper_priv, void *upper_info) { struct netdev_notifier_changeupper_info changeupper_info; - struct netdev_adjacent *i, *j, *to_i, *to_j; int ret = 0; ASSERT_RTNL(); @@ -5772,10 +5714,10 @@ static int __netdev_upper_dev_link(struct net_device *dev, return -EBUSY; /* To prevent loops, check if dev is not upper device to upper_dev. */ - if (__netdev_find_adj(dev, &upper_dev->all_adj_list.upper)) + if (netdev_has_upper_dev(upper_dev, dev)) return -EBUSY; - if (__netdev_find_adj(upper_dev, &dev->adj_list.upper)) + if (netdev_has_upper_dev(dev, upper_dev)) return -EEXIST; if (master && netdev_master_upper_dev_get(dev)) @@ -5797,80 +5739,15 @@ static int __netdev_upper_dev_link(struct net_device *dev, if (ret) return ret; - /* Now that we linked these devs, make all the upper_dev's - * all_adj_list.upper visible to every dev's all_adj_list.lower an - * versa, and don't forget the devices itself. All of these - * links are non-neighbours. - */ - list_for_each_entry(i, &dev->all_adj_list.lower, list) { - list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) { - pr_debug("Interlinking %s with %s, non-neighbour\n", - i->dev->name, j->dev->name); - ret = __netdev_adjacent_dev_link(i->dev, j->dev); - if (ret) - goto rollback_mesh; - } - } - - /* add dev to every upper_dev's upper device */ - list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) { - pr_debug("linking %s's upper device %s with %s\n", - upper_dev->name, i->dev->name, dev->name); - ret = __netdev_adjacent_dev_link(dev, i->dev); - if (ret) - goto rollback_upper_mesh; - } - - /* add upper_dev to every dev's lower device */ - list_for_each_entry(i, &dev->all_adj_list.lower, list) { - pr_debug("linking %s's lower device %s with %s\n", dev->name, - i->dev->name, upper_dev->name); - ret = __netdev_adjacent_dev_link(i->dev, upper_dev); - if (ret) - goto rollback_lower_mesh; - } - ret = call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev, &changeupper_info.info); ret = notifier_to_errno(ret); if (ret) - goto rollback_lower_mesh; + goto rollback; return 0; -rollback_lower_mesh: - to_i = i; - list_for_each_entry(i, &dev->all_adj_list.lower, list) { - if (i == to_i) - break; - __netdev_adjacent_dev_unlink(i->dev, upper_dev); - } - - i = NULL; - -rollback_upper_mesh: - to_i = i; - list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) { - if (i == to_i) - break; - __netdev_adjacent_dev_unlink(dev, i->dev); - } - - i = j = NULL; - -rollback_mesh: - to_i = i; - to_j = j; - list_for_each_entry(i, &dev->all_adj_list.lower, list) { - list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) { - if (i == to_i && j == to_j) - break; - __netdev_adjacent_dev_unlink(i->dev, j->dev); - } - if (i == to_i) - break; - } - +rollback: __netdev_adjacent_dev_unlink_neighbour(dev, upper_dev); return ret; @@ -5927,7 +5804,6 @@ void netdev_upper_dev_unlink(struct net_device *dev, struct net_device *upper_dev) { struct netdev_notifier_changeupper_info changeupper_info; - struct netdev_adjacent *i, *j; ASSERT_RTNL(); changeupper_info.upper_dev = upper_dev; @@ -5939,23 +5815,6 @@ void netdev_upper_dev_unlink(struct net_device *dev, __netdev_adjacent_dev_unlink_neighbour(dev, upper_dev); - /* Here is the tricky part. We must remove all dev's lower - * devices from all upper_dev's upper devices and vice - * versa, to maintain the graph relationship. - */ - list_for_each_entry(i, &dev->all_adj_list.lower, list) - list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) - __netdev_adjacent_dev_unlink(i->dev, j->dev); - - /* remove also the devices itself from lower/upper device - * list - */ - list_for_each_entry(i, &dev->all_adj_list.lower, list) - __netdev_adjacent_dev_unlink(i->dev, upper_dev); - - list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) - __netdev_adjacent_dev_unlink(dev, i->dev); - call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev, &changeupper_info.info); } @@ -6493,9 +6352,18 @@ int dev_set_mtu(struct net_device *dev, int new_mtu) if (new_mtu == dev->mtu) return 0; - /* MTU must be positive. */ - if (new_mtu < 0) + /* MTU must be positive, and in range */ + if (new_mtu < 0 || new_mtu < dev->min_mtu) { + net_err_ratelimited("%s: Invalid MTU %d requested, hw min %d\n", + dev->name, new_mtu, dev->min_mtu); return -EINVAL; + } + + if (dev->max_mtu > 0 && new_mtu > dev->max_mtu) { + net_err_ratelimited("%s: Invalid MTU %d requested, hw max %d\n", + dev->name, new_mtu, dev->max_mtu); + return -EINVAL; + } if (!netif_device_present(dev)) return -ENODEV; @@ -6770,6 +6638,7 @@ static void rollback_registered_many(struct list_head *head) /* Notifier chain MUST detach us all upper devices. */ WARN_ON(netdev_has_any_upper_dev(dev)); + WARN_ON(netdev_has_any_lower_dev(dev)); /* Remove entries from kobject tree */ netdev_unregister_kobject(dev); @@ -7648,8 +7517,6 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, INIT_LIST_HEAD(&dev->link_watch_list); INIT_LIST_HEAD(&dev->adj_list.upper); INIT_LIST_HEAD(&dev->adj_list.lower); - INIT_LIST_HEAD(&dev->all_adj_list.upper); - INIT_LIST_HEAD(&dev->all_adj_list.lower); INIT_LIST_HEAD(&dev->ptype_all); INIT_LIST_HEAD(&dev->ptype_specific); #ifdef CONFIG_NET_SCHED diff --git a/net/core/devlink.c b/net/core/devlink.c index 1b5063088f1a..c14f8b661db9 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -341,15 +341,7 @@ static void devlink_nl_post_doit(const struct genl_ops *ops, mutex_unlock(&devlink_mutex); } -static struct genl_family devlink_nl_family = { - .id = GENL_ID_GENERATE, - .name = DEVLINK_GENL_NAME, - .version = DEVLINK_GENL_VERSION, - .maxattr = DEVLINK_ATTR_MAX, - .netnsok = true, - .pre_doit = devlink_nl_pre_doit, - .post_doit = devlink_nl_post_doit, -}; +static struct genl_family devlink_nl_family; enum devlink_multicast_groups { DEVLINK_MCGRP_CONFIG, @@ -608,6 +600,8 @@ static int devlink_port_type_set(struct devlink *devlink, if (devlink->ops && devlink->ops->port_type_set) { if (port_type == DEVLINK_PORT_TYPE_NOTSET) return -EINVAL; + if (port_type == devlink_port->type) + return 0; err = devlink->ops->port_type_set(devlink_port, port_type); if (err) return err; @@ -1618,6 +1612,20 @@ static const struct genl_ops devlink_nl_ops[] = { }, }; +static struct genl_family devlink_nl_family __ro_after_init = { + .name = DEVLINK_GENL_NAME, + .version = DEVLINK_GENL_VERSION, + .maxattr = DEVLINK_ATTR_MAX, + .netnsok = true, + .pre_doit = devlink_nl_pre_doit, + .post_doit = devlink_nl_post_doit, + .module = THIS_MODULE, + .ops = devlink_nl_ops, + .n_ops = ARRAY_SIZE(devlink_nl_ops), + .mcgrps = devlink_nl_mcgrps, + .n_mcgrps = ARRAY_SIZE(devlink_nl_mcgrps), +}; + /** * devlink_alloc - Allocate new devlink instance resources * @@ -1840,9 +1848,7 @@ EXPORT_SYMBOL_GPL(devlink_sb_unregister); static int __init devlink_module_init(void) { - return genl_register_family_with_ops_groups(&devlink_nl_family, - devlink_nl_ops, - devlink_nl_mcgrps); + return genl_register_family(&devlink_nl_family); } static void __exit devlink_module_exit(void) diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index 72cfb0c61125..8e0c0635ee97 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -59,12 +59,7 @@ struct dm_hw_stat_delta { unsigned long last_drop_val; }; -static struct genl_family net_drop_monitor_family = { - .id = GENL_ID_GENERATE, - .hdrsize = 0, - .name = "NET_DM", - .version = 2, -}; +static struct genl_family net_drop_monitor_family; static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_cpu_data); @@ -351,6 +346,17 @@ static const struct genl_ops dropmon_ops[] = { }, }; +static struct genl_family net_drop_monitor_family __ro_after_init = { + .hdrsize = 0, + .name = "NET_DM", + .version = 2, + .module = THIS_MODULE, + .ops = dropmon_ops, + .n_ops = ARRAY_SIZE(dropmon_ops), + .mcgrps = dropmon_mcgrps, + .n_mcgrps = ARRAY_SIZE(dropmon_mcgrps), +}; + static struct notifier_block dropmon_net_notifier = { .notifier_call = dropmon_net_event }; @@ -367,8 +373,7 @@ static int __init init_net_drop_monitor(void) return -ENOSPC; } - rc = genl_register_family_with_ops_groups(&net_drop_monitor_family, - dropmon_ops, dropmon_mcgrps); + rc = genl_register_family(&net_drop_monitor_family); if (rc) { pr_err("Could not create drop monitor netlink family\n"); return rc; diff --git a/net/core/filter.c b/net/core/filter.c index 00351cdf7d0c..cd9e2ba66b0e 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2492,6 +2492,8 @@ sk_filter_func_proto(enum bpf_func_id func_id) return &bpf_get_prandom_u32_proto; case BPF_FUNC_get_smp_processor_id: return &bpf_get_raw_smp_processor_id_proto; + case BPF_FUNC_get_numa_node_id: + return &bpf_get_numa_node_id_proto; case BPF_FUNC_tail_call: return &bpf_tail_call_proto; case BPF_FUNC_ktime_get_ns: diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c index e5f84c26ba1a..88fd64250b02 100644 --- a/net/core/lwtunnel.c +++ b/net/core/lwtunnel.c @@ -130,6 +130,19 @@ int lwtunnel_build_state(struct net_device *dev, u16 encap_type, } EXPORT_SYMBOL(lwtunnel_build_state); +void lwtstate_free(struct lwtunnel_state *lws) +{ + const struct lwtunnel_encap_ops *ops = lwtun_encaps[lws->type]; + + if (ops->destroy_state) { + ops->destroy_state(lws); + kfree_rcu(lws, rcu); + } else { + kfree(lws); + } +} +EXPORT_SYMBOL(lwtstate_free); + int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate) { const struct lwtunnel_encap_ops *ops; diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 6e4f34721080..d4fe28606ff5 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -950,10 +950,13 @@ net_rx_queue_update_kobjects(struct net_device *dev, int old_num, int new_num) } while (--i >= new_num) { + struct kobject *kobj = &dev->_rx[i].kobj; + + if (!list_empty(&dev_net(dev)->exit_list)) + kobj->uevent_suppress = 1; if (dev->sysfs_rx_queue_group) - sysfs_remove_group(&dev->_rx[i].kobj, - dev->sysfs_rx_queue_group); - kobject_put(&dev->_rx[i].kobj); + sysfs_remove_group(kobj, dev->sysfs_rx_queue_group); + kobject_put(kobj); } return error; @@ -1340,6 +1343,8 @@ netdev_queue_update_kobjects(struct net_device *dev, int old_num, int new_num) while (--i >= new_num) { struct netdev_queue *queue = dev->_tx + i; + if (!list_empty(&dev_net(dev)->exit_list)) + queue->kobj.uevent_suppress = 1; #ifdef CONFIG_BQL sysfs_remove_group(&queue->kobj, &dql_group); #endif @@ -1525,6 +1530,9 @@ void netdev_unregister_kobject(struct net_device *ndev) { struct device *dev = &(ndev->dev); + if (!list_empty(&dev_net(ndev)->exit_list)) + dev_set_uevent_suppress(dev, 1); + kobject_get(&dev->kobj); remove_queue_kobjects(ndev); diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 42bdda0e616b..b9243b14af17 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -309,6 +309,16 @@ out_undo: #ifdef CONFIG_NET_NS +static struct ucounts *inc_net_namespaces(struct user_namespace *ns) +{ + return inc_ucount(ns, current_euid(), UCOUNT_NET_NAMESPACES); +} + +static void dec_net_namespaces(struct ucounts *ucounts) +{ + dec_ucount(ucounts, UCOUNT_NET_NAMESPACES); +} + static struct kmem_cache *net_cachep; static struct workqueue_struct *netns_wq; @@ -350,19 +360,34 @@ void net_drop_ns(void *p) struct net *copy_net_ns(unsigned long flags, struct user_namespace *user_ns, struct net *old_net) { + struct ucounts *ucounts; struct net *net; int rv; if (!(flags & CLONE_NEWNET)) return get_net(old_net); + ucounts = inc_net_namespaces(user_ns); + if (!ucounts) + return ERR_PTR(-ENOSPC); + net = net_alloc(); - if (!net) + if (!net) { + dec_net_namespaces(ucounts); return ERR_PTR(-ENOMEM); + } get_user_ns(user_ns); - mutex_lock(&net_mutex); + rv = mutex_lock_killable(&net_mutex); + if (rv < 0) { + net_free(net); + dec_net_namespaces(ucounts); + put_user_ns(user_ns); + return ERR_PTR(rv); + } + + net->ucounts = ucounts; rv = setup_net(net, user_ns); if (rv == 0) { rtnl_lock(); @@ -371,6 +396,7 @@ struct net *copy_net_ns(unsigned long flags, } mutex_unlock(&net_mutex); if (rv < 0) { + dec_net_namespaces(ucounts); put_user_ns(user_ns); net_drop_ns(net); return ERR_PTR(rv); @@ -443,6 +469,7 @@ static void cleanup_net(struct work_struct *work) /* Finally it is safe to free my network namespace structure */ list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) { list_del_init(&net->exit_list); + dec_net_namespaces(net->ucounts); put_user_ns(net->user_ns); net_drop_ns(net); } @@ -1004,11 +1031,17 @@ static int netns_install(struct nsproxy *nsproxy, struct ns_common *ns) return 0; } +static struct user_namespace *netns_owner(struct ns_common *ns) +{ + return to_net_ns(ns)->user_ns; +} + const struct proc_ns_operations netns_operations = { .name = "net", .type = CLONE_NEWNET, .get = netns_get, .put = netns_put, .install = netns_install, + .owner = netns_owner, }; #endif diff --git a/net/core/pktgen.c b/net/core/pktgen.c index bbd118b19aef..5219a9e2127a 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2286,7 +2286,7 @@ out: static inline void set_pkt_overhead(struct pktgen_dev *pkt_dev) { - pkt_dev->pkt_overhead = LL_RESERVED_SPACE(pkt_dev->odev); + pkt_dev->pkt_overhead = 0; pkt_dev->pkt_overhead += pkt_dev->nr_labels*sizeof(u32); pkt_dev->pkt_overhead += VLAN_TAG_SIZE(pkt_dev); pkt_dev->pkt_overhead += SVLAN_TAG_SIZE(pkt_dev); @@ -2777,13 +2777,13 @@ static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb, } static struct sk_buff *pktgen_alloc_skb(struct net_device *dev, - struct pktgen_dev *pkt_dev, - unsigned int extralen) + struct pktgen_dev *pkt_dev) { + unsigned int extralen = LL_RESERVED_SPACE(dev); struct sk_buff *skb = NULL; - unsigned int size = pkt_dev->cur_pkt_size + 64 + extralen + - pkt_dev->pkt_overhead; + unsigned int size; + size = pkt_dev->cur_pkt_size + 64 + extralen + pkt_dev->pkt_overhead; if (pkt_dev->flags & F_NODE) { int node = pkt_dev->node >= 0 ? pkt_dev->node : numa_node_id(); @@ -2796,8 +2796,9 @@ static struct sk_buff *pktgen_alloc_skb(struct net_device *dev, skb = __netdev_alloc_skb(dev, size, GFP_NOWAIT); } + /* the caller pre-fetches from skb->data and reserves for the mac hdr */ if (likely(skb)) - skb_reserve(skb, LL_RESERVED_SPACE(dev)); + skb_reserve(skb, extralen - 16); return skb; } @@ -2830,16 +2831,14 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, mod_cur_headers(pkt_dev); queue_map = pkt_dev->cur_queue_map; - datalen = (odev->hard_header_len + 16) & ~0xf; - - skb = pktgen_alloc_skb(odev, pkt_dev, datalen); + skb = pktgen_alloc_skb(odev, pkt_dev); if (!skb) { sprintf(pkt_dev->result, "No memory"); return NULL; } prefetchw(skb->data); - skb_reserve(skb, datalen); + skb_reserve(skb, 16); /* Reserve for ethernet and IP header */ eth = (__u8 *) skb_push(skb, 14); @@ -2959,7 +2958,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, mod_cur_headers(pkt_dev); queue_map = pkt_dev->cur_queue_map; - skb = pktgen_alloc_skb(odev, pkt_dev, 16); + skb = pktgen_alloc_skb(odev, pkt_dev); if (!skb) { sprintf(pkt_dev->result, "No memory"); return NULL; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 3ac8946bf244..fb7348f13501 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1144,6 +1144,8 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb, if (dev->netdev_ops->ndo_get_vf_config(dev, vfs_num, &ivi)) return 0; + memset(&vf_vlan_info, 0, sizeof(vf_vlan_info)); + vf_mac.vf = vf_vlan.vf = vf_vlan_info.vf = @@ -1753,6 +1755,9 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) len++; } + if (len == 0) + return -EINVAL; + err = ops->ndo_set_vf_vlan(dev, ivvl[0]->vf, ivvl[0]->vlan, ivvl[0]->qos, ivvl[0]->vlan_proto); if (err < 0) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index d36c7548952f..1e3e0087245b 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1962,37 +1962,13 @@ static bool __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe, return false; } -ssize_t skb_socket_splice(struct sock *sk, - struct pipe_inode_info *pipe, - struct splice_pipe_desc *spd) -{ - int ret; - - /* Drop the socket lock, otherwise we have reverse - * locking dependencies between sk_lock and i_mutex - * here as compared to sendfile(). We enter here - * with the socket lock held, and splice_to_pipe() will - * grab the pipe inode lock. For sendfile() emulation, - * we call into ->sendpage() with the i_mutex lock held - * and networking will grab the socket lock. - */ - release_sock(sk); - ret = splice_to_pipe(pipe, spd); - lock_sock(sk); - - return ret; -} - /* * Map data from the skb to a pipe. Should handle both the linear part, * the fragments, and the frag list. */ int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset, struct pipe_inode_info *pipe, unsigned int tlen, - unsigned int flags, - ssize_t (*splice_cb)(struct sock *, - struct pipe_inode_info *, - struct splice_pipe_desc *)) + unsigned int flags) { struct partial_page partial[MAX_SKB_FRAGS]; struct page *pages[MAX_SKB_FRAGS]; @@ -2009,7 +1985,7 @@ int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset, __skb_splice_bits(skb, pipe, &offset, &tlen, &spd, sk); if (spd.nr_pages) - ret = splice_cb(sk, pipe, &spd); + ret = splice_to_pipe(pipe, &spd); return ret; } @@ -4528,13 +4504,18 @@ EXPORT_SYMBOL(skb_ensure_writable); int __skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci) { struct vlan_hdr *vhdr; - unsigned int offset = skb->data - skb_mac_header(skb); + int offset = skb->data - skb_mac_header(skb); int err; - __skb_push(skb, offset); + if (WARN_ONCE(offset, + "__skb_vlan_pop got skb with skb->data not at mac header (offset %d)\n", + offset)) { + return -EINVAL; + } + err = skb_ensure_writable(skb, VLAN_ETH_HLEN); if (unlikely(err)) - goto pull; + return err; skb_postpull_rcsum(skb, skb->data + (2 * ETH_ALEN), VLAN_HLEN); @@ -4551,13 +4532,14 @@ int __skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci) skb_set_network_header(skb, ETH_HLEN); skb_reset_mac_len(skb); -pull: - __skb_pull(skb, offset); return err; } EXPORT_SYMBOL(__skb_vlan_pop); +/* Pop a vlan tag either from hwaccel or from payload. + * Expects skb->data at mac header. + */ int skb_vlan_pop(struct sk_buff *skb) { u16 vlan_tci; @@ -4588,29 +4570,30 @@ int skb_vlan_pop(struct sk_buff *skb) } EXPORT_SYMBOL(skb_vlan_pop); +/* Push a vlan tag either into hwaccel or into payload (if hwaccel tag present). + * Expects skb->data at mac header. + */ int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci) { if (skb_vlan_tag_present(skb)) { - unsigned int offset = skb->data - skb_mac_header(skb); + int offset = skb->data - skb_mac_header(skb); int err; - /* __vlan_insert_tag expect skb->data pointing to mac header. - * So change skb->data before calling it and change back to - * original position later - */ - __skb_push(skb, offset); + if (WARN_ONCE(offset, + "skb_vlan_push got skb with skb->data not at mac header (offset %d)\n", + offset)) { + return -EINVAL; + } + err = __vlan_insert_tag(skb, skb->vlan_proto, skb_vlan_tag_get(skb)); - if (err) { - __skb_pull(skb, offset); + if (err) return err; - } skb->protocol = skb->vlan_proto; skb->mac_len += VLAN_HLEN; skb_postpush_rcsum(skb, skb->data + (2 * ETH_ALEN), VLAN_HLEN); - __skb_pull(skb, offset); } __vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci); return 0; diff --git a/net/core/sock.c b/net/core/sock.c index 038e660ef844..d8e4532e89e7 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1363,6 +1363,7 @@ static void sk_prot_free(struct proto *prot, struct sock *sk) slab = prot->slab; cgroup_sk_free(&sk->sk_cgrp_data); + mem_cgroup_sk_free(sk); security_sk_free(sk); if (slab != NULL) kmem_cache_free(slab, sk); @@ -1399,6 +1400,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, sock_net_set(sk, net); atomic_set(&sk->sk_wmem_alloc, 1); + mem_cgroup_sk_alloc(sk); cgroup_sk_alloc(&sk->sk_cgrp_data); sock_update_classid(&sk->sk_cgrp_data); sock_update_netprioidx(&sk->sk_cgrp_data); @@ -1545,6 +1547,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) newsk->sk_incoming_cpu = raw_smp_processor_id(); atomic64_set(&newsk->sk_cookie, 0); + mem_cgroup_sk_alloc(newsk); cgroup_sk_alloc(&newsk->sk_cgrp_data); /* @@ -1569,9 +1572,6 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) sk_set_socket(newsk, NULL); newsk->sk_wq = NULL; - if (mem_cgroup_sockets_enabled && sk->sk_memcg) - sock_update_memcg(newsk); - if (newsk->sk_prot->sockets_allocated) sk_sockets_allocated_inc(newsk); @@ -2091,24 +2091,18 @@ int sk_wait_data(struct sock *sk, long *timeo, const struct sk_buff *skb) EXPORT_SYMBOL(sk_wait_data); /** - * __sk_mem_schedule - increase sk_forward_alloc and memory_allocated + * __sk_mem_raise_allocated - increase memory_allocated * @sk: socket * @size: memory size to allocate + * @amt: pages to allocate * @kind: allocation type * - * If kind is SK_MEM_SEND, it means wmem allocation. Otherwise it means - * rmem allocation. This function assumes that protocols which have - * memory_pressure use sk_wmem_queued as write buffer accounting. + * Similar to __sk_mem_schedule(), but does not update sk_forward_alloc */ -int __sk_mem_schedule(struct sock *sk, int size, int kind) +int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind) { struct proto *prot = sk->sk_prot; - int amt = sk_mem_pages(size); - long allocated; - - sk->sk_forward_alloc += amt * SK_MEM_QUANTUM; - - allocated = sk_memory_allocated_add(sk, amt); + long allocated = sk_memory_allocated_add(sk, amt); if (mem_cgroup_sockets_enabled && sk->sk_memcg && !mem_cgroup_charge_skmem(sk->sk_memcg, amt)) @@ -2169,9 +2163,6 @@ suppress_allocation: trace_sock_exceed_buf_limit(sk, prot, allocated); - /* Alas. Undo changes. */ - sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM; - sk_memory_allocated_sub(sk, amt); if (mem_cgroup_sockets_enabled && sk->sk_memcg) @@ -2179,18 +2170,40 @@ suppress_allocation: return 0; } +EXPORT_SYMBOL(__sk_mem_raise_allocated); + +/** + * __sk_mem_schedule - increase sk_forward_alloc and memory_allocated + * @sk: socket + * @size: memory size to allocate + * @kind: allocation type + * + * If kind is SK_MEM_SEND, it means wmem allocation. Otherwise it means + * rmem allocation. This function assumes that protocols which have + * memory_pressure use sk_wmem_queued as write buffer accounting. + */ +int __sk_mem_schedule(struct sock *sk, int size, int kind) +{ + int ret, amt = sk_mem_pages(size); + + sk->sk_forward_alloc += amt << SK_MEM_QUANTUM_SHIFT; + ret = __sk_mem_raise_allocated(sk, size, amt, kind); + if (!ret) + sk->sk_forward_alloc -= amt << SK_MEM_QUANTUM_SHIFT; + return ret; +} EXPORT_SYMBOL(__sk_mem_schedule); /** - * __sk_mem_reclaim - reclaim memory_allocated + * __sk_mem_reduce_allocated - reclaim memory_allocated * @sk: socket - * @amount: number of bytes (rounded down to a SK_MEM_QUANTUM multiple) + * @amount: number of quanta + * + * Similar to __sk_mem_reclaim(), but does not update sk_forward_alloc */ -void __sk_mem_reclaim(struct sock *sk, int amount) +void __sk_mem_reduce_allocated(struct sock *sk, int amount) { - amount >>= SK_MEM_QUANTUM_SHIFT; sk_memory_allocated_sub(sk, amount); - sk->sk_forward_alloc -= amount << SK_MEM_QUANTUM_SHIFT; if (mem_cgroup_sockets_enabled && sk->sk_memcg) mem_cgroup_uncharge_skmem(sk->sk_memcg, amount); @@ -2199,6 +2212,19 @@ void __sk_mem_reclaim(struct sock *sk, int amount) (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0))) sk_leave_memory_pressure(sk); } +EXPORT_SYMBOL(__sk_mem_reduce_allocated); + +/** + * __sk_mem_reclaim - reclaim sk_forward_alloc and memory_allocated + * @sk: socket + * @amount: number of bytes (rounded down to a SK_MEM_QUANTUM multiple) + */ +void __sk_mem_reclaim(struct sock *sk, int amount) +{ + amount >>= SK_MEM_QUANTUM_SHIFT; + sk->sk_forward_alloc -= amount << SK_MEM_QUANTUM_SHIFT; + __sk_mem_reduce_allocated(sk, amount); +} EXPORT_SYMBOL(__sk_mem_reclaim); int sk_set_peek_off(struct sock *sk, int val) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 6b1282c006b1..d0c7bce88743 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -641,7 +641,8 @@ static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev) /* ethtool operations *******************************************************/ static int -dsa_slave_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) +dsa_slave_get_link_ksettings(struct net_device *dev, + struct ethtool_link_ksettings *cmd) { struct dsa_slave_priv *p = netdev_priv(dev); int err; @@ -650,19 +651,20 @@ dsa_slave_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) if (p->phy != NULL) { err = phy_read_status(p->phy); if (err == 0) - err = phy_ethtool_gset(p->phy, cmd); + err = phy_ethtool_ksettings_get(p->phy, cmd); } return err; } static int -dsa_slave_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) +dsa_slave_set_link_ksettings(struct net_device *dev, + const struct ethtool_link_ksettings *cmd) { struct dsa_slave_priv *p = netdev_priv(dev); if (p->phy != NULL) - return phy_ethtool_sset(p->phy, cmd); + return phy_ethtool_ksettings_set(p->phy, cmd); return -EOPNOTSUPP; } @@ -990,8 +992,6 @@ void dsa_cpu_port_ethtool_init(struct ethtool_ops *ops) } static const struct ethtool_ops dsa_slave_ethtool_ops = { - .get_settings = dsa_slave_get_settings, - .set_settings = dsa_slave_set_settings, .get_drvinfo = dsa_slave_get_drvinfo, .get_regs_len = dsa_slave_get_regs_len, .get_regs = dsa_slave_get_regs, @@ -1007,6 +1007,8 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = { .get_wol = dsa_slave_get_wol, .set_eee = dsa_slave_set_eee, .get_eee = dsa_slave_get_eee, + .get_link_ksettings = dsa_slave_get_link_ksettings, + .set_link_ksettings = dsa_slave_set_link_ksettings, }; static const struct net_device_ops dsa_slave_netdev_ops = { @@ -1245,6 +1247,8 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent, slave_dev->priv_flags |= IFF_NO_QUEUE; slave_dev->netdev_ops = &dsa_slave_netdev_ops; slave_dev->switchdev_ops = &dsa_slave_switchdev_ops; + slave_dev->min_mtu = 0; + slave_dev->max_mtu = ETH_MAX_MTU; SET_NETDEV_DEVTYPE(slave_dev, &dsa_type); netdev_for_each_tx_queue(slave_dev, dsa_slave_set_lockdep_class_one, diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 66dff5e3d772..f983c102ebe3 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -322,8 +322,7 @@ EXPORT_SYMBOL(eth_mac_addr); */ int eth_change_mtu(struct net_device *dev, int new_mtu) { - if (new_mtu < 68 || new_mtu > ETH_DATA_LEN) - return -EINVAL; + netdev_warn(dev, "%s is deprecated\n", __func__); dev->mtu = new_mtu; return 0; } @@ -357,6 +356,8 @@ void ether_setup(struct net_device *dev) dev->type = ARPHRD_ETHER; dev->hard_header_len = ETH_HLEN; dev->mtu = ETH_DATA_LEN; + dev->min_mtu = ETH_MIN_MTU; + dev->max_mtu = ETH_DATA_LEN; dev->addr_len = ETH_ALEN; dev->tx_queue_len = 1000; /* Ethernet wants good queues */ dev->flags = IFF_BROADCAST|IFF_MULTICAST; diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index 16737cd8dae8..fc65b145f6e7 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -398,6 +398,7 @@ void hsr_dev_setup(struct net_device *dev) random_ether_addr(dev->dev_addr); ether_setup(dev); + dev->min_mtu = 0; dev->header_ops = &hsr_header_ops; dev->netdev_ops = &hsr_device_ops; SET_NETDEV_DEVTYPE(dev, &hsr_type); diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c index d4d1617f43a8..1ab30e7d3f99 100644 --- a/net/hsr/hsr_netlink.c +++ b/net/hsr/hsr_netlink.c @@ -131,13 +131,7 @@ static const struct nla_policy hsr_genl_policy[HSR_A_MAX + 1] = { [HSR_A_IF2_SEQ] = { .type = NLA_U16 }, }; -static struct genl_family hsr_genl_family = { - .id = GENL_ID_GENERATE, - .hdrsize = 0, - .name = "HSR", - .version = 1, - .maxattr = HSR_A_MAX, -}; +static struct genl_family hsr_genl_family; static const struct genl_multicast_group hsr_mcgrps[] = { { .name = "hsr-network", }, @@ -467,6 +461,18 @@ static const struct genl_ops hsr_ops[] = { }, }; +static struct genl_family hsr_genl_family __ro_after_init = { + .hdrsize = 0, + .name = "HSR", + .version = 1, + .maxattr = HSR_A_MAX, + .module = THIS_MODULE, + .ops = hsr_ops, + .n_ops = ARRAY_SIZE(hsr_ops), + .mcgrps = hsr_mcgrps, + .n_mcgrps = ARRAY_SIZE(hsr_mcgrps), +}; + int __init hsr_netlink_init(void) { int rc; @@ -475,8 +481,7 @@ int __init hsr_netlink_init(void) if (rc) goto fail_rtnl_link_register; - rc = genl_register_family_with_ops_groups(&hsr_genl_family, hsr_ops, - hsr_mcgrps); + rc = genl_register_family(&hsr_genl_family); if (rc) goto fail_genl_register_family; diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c index c8133c07ceee..6bde9e5a5503 100644 --- a/net/ieee802154/netlink.c +++ b/net/ieee802154/netlink.c @@ -28,14 +28,6 @@ static unsigned int ieee802154_seq_num; static DEFINE_SPINLOCK(ieee802154_seq_lock); -struct genl_family nl802154_family = { - .id = GENL_ID_GENERATE, - .hdrsize = 0, - .name = IEEE802154_NL_NAME, - .version = 1, - .maxattr = IEEE802154_ATTR_MAX, -}; - /* Requests to userspace */ struct sk_buff *ieee802154_nl_create(int flags, u8 req) { @@ -139,11 +131,21 @@ static const struct genl_multicast_group ieee802154_mcgrps[] = { [IEEE802154_BEACON_MCGRP] = { .name = IEEE802154_MCAST_BEACON_NAME, }, }; +struct genl_family nl802154_family __ro_after_init = { + .hdrsize = 0, + .name = IEEE802154_NL_NAME, + .version = 1, + .maxattr = IEEE802154_ATTR_MAX, + .module = THIS_MODULE, + .ops = ieee8021154_ops, + .n_ops = ARRAY_SIZE(ieee8021154_ops), + .mcgrps = ieee802154_mcgrps, + .n_mcgrps = ARRAY_SIZE(ieee802154_mcgrps), +}; + int __init ieee802154_nl_init(void) { - return genl_register_family_with_ops_groups(&nl802154_family, - ieee8021154_ops, - ieee802154_mcgrps); + return genl_register_family(&nl802154_family); } void ieee802154_nl_exit(void) diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c index d90a4ed5b8a0..fc60cd061f39 100644 --- a/net/ieee802154/nl802154.c +++ b/net/ieee802154/nl802154.c @@ -26,23 +26,8 @@ #include "rdev-ops.h" #include "core.h" -static int nl802154_pre_doit(const struct genl_ops *ops, struct sk_buff *skb, - struct genl_info *info); - -static void nl802154_post_doit(const struct genl_ops *ops, struct sk_buff *skb, - struct genl_info *info); - /* the netlink family */ -static struct genl_family nl802154_fam = { - .id = GENL_ID_GENERATE, /* don't bother with a hardcoded ID */ - .name = NL802154_GENL_NAME, /* have users key off the name instead */ - .hdrsize = 0, /* no private header */ - .version = 1, /* no particular meaning now */ - .maxattr = NL802154_ATTR_MAX, - .netnsok = true, - .pre_doit = nl802154_pre_doit, - .post_doit = nl802154_post_doit, -}; +static struct genl_family nl802154_fam; /* multicast groups */ enum nl802154_multicast_groups { @@ -263,13 +248,14 @@ nl802154_prepare_wpan_dev_dump(struct sk_buff *skb, if (!cb->args[0]) { err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl802154_fam.hdrsize, - nl802154_fam.attrbuf, nl802154_fam.maxattr, + genl_family_attrbuf(&nl802154_fam), + nl802154_fam.maxattr, nl802154_policy); if (err) goto out_unlock; *wpan_dev = __cfg802154_wpan_dev_from_attrs(sock_net(skb->sk), - nl802154_fam.attrbuf); + genl_family_attrbuf(&nl802154_fam)); if (IS_ERR(*wpan_dev)) { err = PTR_ERR(*wpan_dev); goto out_unlock; @@ -575,7 +561,7 @@ static int nl802154_dump_wpan_phy_parse(struct sk_buff *skb, struct netlink_callback *cb, struct nl802154_dump_wpan_phy_state *state) { - struct nlattr **tb = nl802154_fam.attrbuf; + struct nlattr **tb = genl_family_attrbuf(&nl802154_fam); int ret = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl802154_fam.hdrsize, tb, nl802154_fam.maxattr, nl802154_policy); @@ -2476,11 +2462,25 @@ static const struct genl_ops nl802154_ops[] = { #endif /* CONFIG_IEEE802154_NL802154_EXPERIMENTAL */ }; +static struct genl_family nl802154_fam __ro_after_init = { + .name = NL802154_GENL_NAME, /* have users key off the name instead */ + .hdrsize = 0, /* no private header */ + .version = 1, /* no particular meaning now */ + .maxattr = NL802154_ATTR_MAX, + .netnsok = true, + .pre_doit = nl802154_pre_doit, + .post_doit = nl802154_post_doit, + .module = THIS_MODULE, + .ops = nl802154_ops, + .n_ops = ARRAY_SIZE(nl802154_ops), + .mcgrps = nl802154_mcgrps, + .n_mcgrps = ARRAY_SIZE(nl802154_mcgrps), +}; + /* initialisation/exit functions */ -int nl802154_init(void) +int __init nl802154_init(void) { - return genl_register_family_with_ops_groups(&nl802154_fam, nl802154_ops, - nl802154_mcgrps); + return genl_register_family(&nl802154_fam); } void nl802154_exit(void) diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 300b06888fdf..28e051a8e847 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -430,6 +430,14 @@ config INET_UDP_DIAG Support for UDP socket monitoring interface used by the ss tool. If unsure, say Y. +config INET_RAW_DIAG + tristate "RAW: socket monitoring interface" + depends on INET_DIAG && (IPV6 || IPV6=n) + default n + ---help--- + Support for RAW socket monitoring interface used by the ss tool. + If unsure, say Y. + config INET_DIAG_DESTROY bool "INET: allow privileged process to administratively close sockets" depends on INET_DIAG diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index bc6a6c8b9bcd..48af58a5686e 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -40,6 +40,7 @@ obj-$(CONFIG_NETFILTER) += netfilter.o netfilter/ obj-$(CONFIG_INET_DIAG) += inet_diag.o obj-$(CONFIG_INET_TCP_DIAG) += tcp_diag.o obj-$(CONFIG_INET_UDP_DIAG) += udp_diag.o +obj-$(CONFIG_INET_RAW_DIAG) += raw_diag.o obj-$(CONFIG_NET_TCPPROBE) += tcp_probe.o obj-$(CONFIG_TCP_CONG_BBR) += tcp_bbr.o obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index cf50f7e2b012..6cb57bb8692d 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -622,14 +622,7 @@ static int fou_destroy(struct net *net, struct fou_cfg *cfg) return err; } -static struct genl_family fou_nl_family = { - .id = GENL_ID_GENERATE, - .hdrsize = 0, - .name = FOU_GENL_NAME, - .version = FOU_GENL_VERSION, - .maxattr = FOU_ATTR_MAX, - .netnsok = true, -}; +static struct genl_family fou_nl_family; static const struct nla_policy fou_nl_policy[FOU_ATTR_MAX + 1] = { [FOU_ATTR_PORT] = { .type = NLA_U16, }, @@ -831,6 +824,17 @@ static const struct genl_ops fou_nl_ops[] = { }, }; +static struct genl_family fou_nl_family __ro_after_init = { + .hdrsize = 0, + .name = FOU_GENL_NAME, + .version = FOU_GENL_VERSION, + .maxattr = FOU_ATTR_MAX, + .netnsok = true, + .module = THIS_MODULE, + .ops = fou_nl_ops, + .n_ops = ARRAY_SIZE(fou_nl_ops), +}; + size_t fou_encap_hlen(struct ip_tunnel_encap *e) { return sizeof(struct udphdr); @@ -1086,8 +1090,7 @@ static int __init fou_init(void) if (ret) goto exit; - ret = genl_register_family_with_ops(&fou_nl_family, - fou_nl_ops); + ret = genl_register_family(&fou_nl_family); if (ret < 0) goto unregister; diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index e4d16fc5bbb3..3b34024202d8 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -200,6 +200,15 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, if (sock_diag_put_meminfo(sk, skb, INET_DIAG_SKMEMINFO)) goto errout; + /* + * RAW sockets might have user-defined protocols assigned, + * so report the one supplied on socket creation. + */ + if (sk->sk_type == SOCK_RAW) { + if (nla_put_u8(skb, INET_DIAG_PROTOCOL, sk->sk_protocol)) + goto errout; + } + if (!icsk) { handler->idiag_get_info(sk, r, NULL); goto out; @@ -863,7 +872,7 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb, s_num = num = cb->args[2]; if (cb->args[0] == 0) { - if (!(idiag_states & TCPF_LISTEN)) + if (!(idiag_states & TCPF_LISTEN) || r->id.idiag_dport) goto skip_listen_ht; for (i = s_i; i < INET_LHTABLE_SIZE; i++) { @@ -872,7 +881,7 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb, num = 0; ilb = &hashinfo->listening_hash[i]; - spin_lock_bh(&ilb->lock); + spin_lock(&ilb->lock); sk_for_each(sk, &ilb->head) { struct inet_sock *inet = inet_sk(sk); @@ -892,26 +901,18 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb, r->id.idiag_sport) goto next_listen; - if (r->id.idiag_dport || - cb->args[3] > 0) - goto next_listen; - if (inet_csk_diag_dump(sk, skb, cb, r, bc, net_admin) < 0) { - spin_unlock_bh(&ilb->lock); + spin_unlock(&ilb->lock); goto done; } next_listen: - cb->args[3] = 0; - cb->args[4] = 0; ++num; } - spin_unlock_bh(&ilb->lock); + spin_unlock(&ilb->lock); s_num = 0; - cb->args[3] = 0; - cb->args[4] = 0; } skip_listen_ht: cb->args[0] = 1; diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 5719d6ba0824..12a92e3349ed 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -358,6 +358,7 @@ static struct ip_tunnel *ip_tunnel_create(struct net *net, { struct ip_tunnel *nt; struct net_device *dev; + int t_hlen; BUG_ON(!itn->fb_tunnel_dev); dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms); @@ -367,6 +368,9 @@ static struct ip_tunnel *ip_tunnel_create(struct net *net, dev->mtu = ip_tunnel_bind_dev(dev); nt = netdev_priv(dev); + t_hlen = nt->hlen + sizeof(struct iphdr); + dev->min_mtu = ETH_MIN_MTU; + dev->max_mtu = 0xFFF8 - dev->hard_header_len - t_hlen; ip_tunnel_add(itn, nt); return nt; } @@ -929,7 +933,7 @@ int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict) int t_hlen = tunnel->hlen + sizeof(struct iphdr); int max_mtu = 0xFFF8 - dev->hard_header_len - t_hlen; - if (new_mtu < 68) + if (new_mtu < ETH_MIN_MTU) return -EINVAL; if (new_mtu > max_mtu) { diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index a87bcd2d4a94..5f006e13de56 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -2123,7 +2123,7 @@ static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, int ipmr_get_route(struct net *net, struct sk_buff *skb, __be32 saddr, __be32 daddr, - struct rtmsg *rtm, int nowait) + struct rtmsg *rtm, int nowait, u32 portid) { struct mfc_cache *cache; struct mr_table *mrt; @@ -2168,6 +2168,7 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb, return -ENOMEM; } + NETLINK_CB(skb2).portid = portid; skb_push(skb2, sizeof(struct iphdr)); skb_reset_network_header(skb2); iph = ip_hdr(skb2); diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 66ddcb60519a..7cf7d6e380c2 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -258,7 +258,7 @@ int ping_init_sock(struct sock *sk) struct net *net = sock_net(sk); kgid_t group = current_egid(); struct group_info *group_info; - int i, j, count; + int i; kgid_t low, high; int ret = 0; @@ -270,16 +270,11 @@ int ping_init_sock(struct sock *sk) return 0; group_info = get_current_groups(); - count = group_info->ngroups; - for (i = 0; i < group_info->nblocks; i++) { - int cp_count = min_t(int, NGROUPS_PER_BLOCK, count); - for (j = 0; j < cp_count; j++) { - kgid_t gid = group_info->blocks[i][j]; - if (gid_lte(low, gid) && gid_lte(gid, high)) - goto out_release_group; - } + for (i = 0; i < group_info->ngroups; i++) { + kgid_t gid = group_info->gid[i]; - count -= cp_count; + if (gid_lte(low, gid) && gid_lte(gid, high)) + goto out_release_group; } ret = -EACCES; diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 1ed015e4bc79..7143ca1a6af9 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -46,6 +46,8 @@ #include <net/sock.h> #include <net/raw.h> +#define TCPUDP_MIB_MAX max_t(u32, UDP_MIB_MAX, TCP_MIB_MAX) + /* * Report socket allocation statistics [mea@utu.fi] */ @@ -356,22 +358,22 @@ static void icmp_put(struct seq_file *seq) atomic_long_t *ptr = net->mib.icmpmsg_statistics->mibs; seq_puts(seq, "\nIcmp: InMsgs InErrors InCsumErrors"); - for (i = 0; icmpmibmap[i].name != NULL; i++) + for (i = 0; icmpmibmap[i].name; i++) seq_printf(seq, " In%s", icmpmibmap[i].name); seq_puts(seq, " OutMsgs OutErrors"); - for (i = 0; icmpmibmap[i].name != NULL; i++) + for (i = 0; icmpmibmap[i].name; i++) seq_printf(seq, " Out%s", icmpmibmap[i].name); seq_printf(seq, "\nIcmp: %lu %lu %lu", snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_INMSGS), snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_INERRORS), snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_CSUMERRORS)); - for (i = 0; icmpmibmap[i].name != NULL; i++) + for (i = 0; icmpmibmap[i].name; i++) seq_printf(seq, " %lu", atomic_long_read(ptr + icmpmibmap[i].index)); seq_printf(seq, " %lu %lu", snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_OUTMSGS), snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_OUTERRORS)); - for (i = 0; icmpmibmap[i].name != NULL; i++) + for (i = 0; icmpmibmap[i].name; i++) seq_printf(seq, " %lu", atomic_long_read(ptr + (icmpmibmap[i].index | 0x100))); } @@ -379,14 +381,16 @@ static void icmp_put(struct seq_file *seq) /* * Called from the PROCfs module. This outputs /proc/net/snmp. */ -static int snmp_seq_show(struct seq_file *seq, void *v) +static int snmp_seq_show_ipstats(struct seq_file *seq, void *v) { - int i; struct net *net = seq->private; + u64 buff64[IPSTATS_MIB_MAX]; + int i; - seq_puts(seq, "Ip: Forwarding DefaultTTL"); + memset(buff64, 0, IPSTATS_MIB_MAX * sizeof(u64)); - for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) + seq_puts(seq, "Ip: Forwarding DefaultTTL"); + for (i = 0; snmp4_ipstats_list[i].name; i++) seq_printf(seq, " %s", snmp4_ipstats_list[i].name); seq_printf(seq, "\nIp: %d %d", @@ -394,57 +398,77 @@ static int snmp_seq_show(struct seq_file *seq, void *v) net->ipv4.sysctl_ip_default_ttl); BUILD_BUG_ON(offsetof(struct ipstats_mib, mibs) != 0); - for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) - seq_printf(seq, " %llu", - snmp_fold_field64(net->mib.ip_statistics, - snmp4_ipstats_list[i].entry, - offsetof(struct ipstats_mib, syncp))); + snmp_get_cpu_field64_batch(buff64, snmp4_ipstats_list, + net->mib.ip_statistics, + offsetof(struct ipstats_mib, syncp)); + for (i = 0; snmp4_ipstats_list[i].name; i++) + seq_printf(seq, " %llu", buff64[i]); - icmp_put(seq); /* RFC 2011 compatibility */ - icmpmsg_put(seq); + return 0; +} + +static int snmp_seq_show_tcp_udp(struct seq_file *seq, void *v) +{ + unsigned long buff[TCPUDP_MIB_MAX]; + struct net *net = seq->private; + int i; + + memset(buff, 0, TCPUDP_MIB_MAX * sizeof(unsigned long)); seq_puts(seq, "\nTcp:"); - for (i = 0; snmp4_tcp_list[i].name != NULL; i++) + for (i = 0; snmp4_tcp_list[i].name; i++) seq_printf(seq, " %s", snmp4_tcp_list[i].name); seq_puts(seq, "\nTcp:"); - for (i = 0; snmp4_tcp_list[i].name != NULL; i++) { + snmp_get_cpu_field_batch(buff, snmp4_tcp_list, + net->mib.tcp_statistics); + for (i = 0; snmp4_tcp_list[i].name; i++) { /* MaxConn field is signed, RFC 2012 */ if (snmp4_tcp_list[i].entry == TCP_MIB_MAXCONN) - seq_printf(seq, " %ld", - snmp_fold_field(net->mib.tcp_statistics, - snmp4_tcp_list[i].entry)); + seq_printf(seq, " %ld", buff[i]); else - seq_printf(seq, " %lu", - snmp_fold_field(net->mib.tcp_statistics, - snmp4_tcp_list[i].entry)); + seq_printf(seq, " %lu", buff[i]); } + memset(buff, 0, TCPUDP_MIB_MAX * sizeof(unsigned long)); + + snmp_get_cpu_field_batch(buff, snmp4_udp_list, + net->mib.udp_statistics); seq_puts(seq, "\nUdp:"); - for (i = 0; snmp4_udp_list[i].name != NULL; i++) + for (i = 0; snmp4_udp_list[i].name; i++) seq_printf(seq, " %s", snmp4_udp_list[i].name); - seq_puts(seq, "\nUdp:"); - for (i = 0; snmp4_udp_list[i].name != NULL; i++) - seq_printf(seq, " %lu", - snmp_fold_field(net->mib.udp_statistics, - snmp4_udp_list[i].entry)); + for (i = 0; snmp4_udp_list[i].name; i++) + seq_printf(seq, " %lu", buff[i]); + + memset(buff, 0, TCPUDP_MIB_MAX * sizeof(unsigned long)); /* the UDP and UDP-Lite MIBs are the same */ seq_puts(seq, "\nUdpLite:"); - for (i = 0; snmp4_udp_list[i].name != NULL; i++) + snmp_get_cpu_field_batch(buff, snmp4_udp_list, + net->mib.udplite_statistics); + for (i = 0; snmp4_udp_list[i].name; i++) seq_printf(seq, " %s", snmp4_udp_list[i].name); - seq_puts(seq, "\nUdpLite:"); - for (i = 0; snmp4_udp_list[i].name != NULL; i++) - seq_printf(seq, " %lu", - snmp_fold_field(net->mib.udplite_statistics, - snmp4_udp_list[i].entry)); + for (i = 0; snmp4_udp_list[i].name; i++) + seq_printf(seq, " %lu", buff[i]); seq_putc(seq, '\n'); return 0; } +static int snmp_seq_show(struct seq_file *seq, void *v) +{ + snmp_seq_show_ipstats(seq, v); + + icmp_put(seq); /* RFC 2011 compatibility */ + icmpmsg_put(seq); + + snmp_seq_show_tcp_udp(seq, v); + + return 0; +} + static int snmp_seq_open(struct inode *inode, struct file *file) { return single_open_net(inode, file, snmp_seq_show); @@ -469,21 +493,21 @@ static int netstat_seq_show(struct seq_file *seq, void *v) struct net *net = seq->private; seq_puts(seq, "TcpExt:"); - for (i = 0; snmp4_net_list[i].name != NULL; i++) + for (i = 0; snmp4_net_list[i].name; i++) seq_printf(seq, " %s", snmp4_net_list[i].name); seq_puts(seq, "\nTcpExt:"); - for (i = 0; snmp4_net_list[i].name != NULL; i++) + for (i = 0; snmp4_net_list[i].name; i++) seq_printf(seq, " %lu", snmp_fold_field(net->mib.net_statistics, snmp4_net_list[i].entry)); seq_puts(seq, "\nIpExt:"); - for (i = 0; snmp4_ipextstats_list[i].name != NULL; i++) + for (i = 0; snmp4_ipextstats_list[i].name; i++) seq_printf(seq, " %s", snmp4_ipextstats_list[i].name); seq_puts(seq, "\nIpExt:"); - for (i = 0; snmp4_ipextstats_list[i].name != NULL; i++) + for (i = 0; snmp4_ipextstats_list[i].name; i++) seq_printf(seq, " %llu", snmp_fold_field64(net->mib.ip_statistics, snmp4_ipextstats_list[i].entry, diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 90a85c955872..03618ed03532 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -89,9 +89,10 @@ struct raw_frag_vec { int hlen; }; -static struct raw_hashinfo raw_v4_hashinfo = { +struct raw_hashinfo raw_v4_hashinfo = { .lock = __RW_LOCK_UNLOCKED(raw_v4_hashinfo.lock), }; +EXPORT_SYMBOL_GPL(raw_v4_hashinfo); int raw_hash_sk(struct sock *sk) { @@ -120,7 +121,7 @@ void raw_unhash_sk(struct sock *sk) } EXPORT_SYMBOL_GPL(raw_unhash_sk); -static struct sock *__raw_v4_lookup(struct net *net, struct sock *sk, +struct sock *__raw_v4_lookup(struct net *net, struct sock *sk, unsigned short num, __be32 raddr, __be32 laddr, int dif) { sk_for_each_from(sk) { @@ -136,6 +137,7 @@ static struct sock *__raw_v4_lookup(struct net *net, struct sock *sk, found: return sk; } +EXPORT_SYMBOL_GPL(__raw_v4_lookup); /* * 0 - deliver @@ -912,6 +914,20 @@ static int compat_raw_ioctl(struct sock *sk, unsigned int cmd, unsigned long arg } #endif +int raw_abort(struct sock *sk, int err) +{ + lock_sock(sk); + + sk->sk_err = err; + sk->sk_error_report(sk); + udp_disconnect(sk, 0); + + release_sock(sk); + + return 0; +} +EXPORT_SYMBOL_GPL(raw_abort); + struct proto raw_prot = { .name = "RAW", .owner = THIS_MODULE, @@ -937,6 +953,7 @@ struct proto raw_prot = { .compat_getsockopt = compat_raw_getsockopt, .compat_ioctl = compat_raw_ioctl, #endif + .diag_destroy = raw_abort, }; #ifdef CONFIG_PROC_FS diff --git a/net/ipv4/raw_diag.c b/net/ipv4/raw_diag.c new file mode 100644 index 000000000000..ef3bea061b75 --- /dev/null +++ b/net/ipv4/raw_diag.c @@ -0,0 +1,261 @@ +#include <linux/module.h> + +#include <linux/inet_diag.h> +#include <linux/sock_diag.h> + +#include <net/raw.h> +#include <net/rawv6.h> + +#ifdef pr_fmt +# undef pr_fmt +#endif + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +static struct raw_hashinfo * +raw_get_hashinfo(const struct inet_diag_req_v2 *r) +{ + if (r->sdiag_family == AF_INET) { + return &raw_v4_hashinfo; +#if IS_ENABLED(CONFIG_IPV6) + } else if (r->sdiag_family == AF_INET6) { + return &raw_v6_hashinfo; +#endif + } else { + pr_warn_once("Unexpected inet family %d\n", + r->sdiag_family); + WARN_ON_ONCE(1); + return ERR_PTR(-EINVAL); + } +} + +/* + * Due to requirement of not breaking user API we can't simply + * rename @pad field in inet_diag_req_v2 structure, instead + * use helper to figure it out. + */ + +static struct sock *raw_lookup(struct net *net, struct sock *from, + const struct inet_diag_req_v2 *req) +{ + struct inet_diag_req_raw *r = (void *)req; + struct sock *sk = NULL; + + if (r->sdiag_family == AF_INET) + sk = __raw_v4_lookup(net, from, r->sdiag_raw_protocol, + r->id.idiag_dst[0], + r->id.idiag_src[0], + r->id.idiag_if); +#if IS_ENABLED(CONFIG_IPV6) + else + sk = __raw_v6_lookup(net, from, r->sdiag_raw_protocol, + (const struct in6_addr *)r->id.idiag_src, + (const struct in6_addr *)r->id.idiag_dst, + r->id.idiag_if); +#endif + return sk; +} + +static struct sock *raw_sock_get(struct net *net, const struct inet_diag_req_v2 *r) +{ + struct raw_hashinfo *hashinfo = raw_get_hashinfo(r); + struct sock *sk = NULL, *s; + int slot; + + if (IS_ERR(hashinfo)) + return ERR_CAST(hashinfo); + + read_lock(&hashinfo->lock); + for (slot = 0; slot < RAW_HTABLE_SIZE; slot++) { + sk_for_each(s, &hashinfo->ht[slot]) { + sk = raw_lookup(net, s, r); + if (sk) { + /* + * Grab it and keep until we fill + * diag meaage to be reported, so + * caller should call sock_put then. + * We can do that because we're keeping + * hashinfo->lock here. + */ + sock_hold(sk); + break; + } + } + } + read_unlock(&hashinfo->lock); + + return sk ? sk : ERR_PTR(-ENOENT); +} + +static int raw_diag_dump_one(struct sk_buff *in_skb, + const struct nlmsghdr *nlh, + const struct inet_diag_req_v2 *r) +{ + struct net *net = sock_net(in_skb->sk); + struct sk_buff *rep; + struct sock *sk; + int err; + + sk = raw_sock_get(net, r); + if (IS_ERR(sk)) + return PTR_ERR(sk); + + rep = nlmsg_new(sizeof(struct inet_diag_msg) + + sizeof(struct inet_diag_meminfo) + 64, + GFP_KERNEL); + if (!rep) { + sock_put(sk); + return -ENOMEM; + } + + err = inet_sk_diag_fill(sk, NULL, rep, r, + sk_user_ns(NETLINK_CB(in_skb).sk), + NETLINK_CB(in_skb).portid, + nlh->nlmsg_seq, 0, nlh, + netlink_net_capable(in_skb, CAP_NET_ADMIN)); + sock_put(sk); + + if (err < 0) { + kfree_skb(rep); + return err; + } + + err = netlink_unicast(net->diag_nlsk, rep, + NETLINK_CB(in_skb).portid, + MSG_DONTWAIT); + if (err > 0) + err = 0; + return err; +} + +static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, + struct netlink_callback *cb, + const struct inet_diag_req_v2 *r, + struct nlattr *bc, bool net_admin) +{ + if (!inet_diag_bc_sk(bc, sk)) + return 0; + + return inet_sk_diag_fill(sk, NULL, skb, r, + sk_user_ns(NETLINK_CB(cb->skb).sk), + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, + cb->nlh, net_admin); +} + +static void raw_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, + const struct inet_diag_req_v2 *r, struct nlattr *bc) +{ + bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN); + struct raw_hashinfo *hashinfo = raw_get_hashinfo(r); + struct net *net = sock_net(skb->sk); + int num, s_num, slot, s_slot; + struct sock *sk = NULL; + + if (IS_ERR(hashinfo)) + return; + + s_slot = cb->args[0]; + num = s_num = cb->args[1]; + + read_lock(&hashinfo->lock); + for (slot = s_slot; slot < RAW_HTABLE_SIZE; s_num = 0, slot++) { + num = 0; + + sk_for_each(sk, &hashinfo->ht[slot]) { + struct inet_sock *inet = inet_sk(sk); + + if (!net_eq(sock_net(sk), net)) + continue; + if (num < s_num) + goto next; + if (sk->sk_family != r->sdiag_family) + goto next; + if (r->id.idiag_sport != inet->inet_sport && + r->id.idiag_sport) + goto next; + if (r->id.idiag_dport != inet->inet_dport && + r->id.idiag_dport) + goto next; + if (sk_diag_dump(sk, skb, cb, r, bc, net_admin) < 0) + goto out_unlock; +next: + num++; + } + } + +out_unlock: + read_unlock(&hashinfo->lock); + + cb->args[0] = slot; + cb->args[1] = num; +} + +static void raw_diag_get_info(struct sock *sk, struct inet_diag_msg *r, + void *info) +{ + r->idiag_rqueue = sk_rmem_alloc_get(sk); + r->idiag_wqueue = sk_wmem_alloc_get(sk); +} + +#ifdef CONFIG_INET_DIAG_DESTROY +static int raw_diag_destroy(struct sk_buff *in_skb, + const struct inet_diag_req_v2 *r) +{ + struct net *net = sock_net(in_skb->sk); + struct sock *sk; + + sk = raw_sock_get(net, r); + if (IS_ERR(sk)) + return PTR_ERR(sk); + return sock_diag_destroy(sk, ECONNABORTED); +} +#endif + +static const struct inet_diag_handler raw_diag_handler = { + .dump = raw_diag_dump, + .dump_one = raw_diag_dump_one, + .idiag_get_info = raw_diag_get_info, + .idiag_type = IPPROTO_RAW, + .idiag_info_size = 0, +#ifdef CONFIG_INET_DIAG_DESTROY + .destroy = raw_diag_destroy, +#endif +}; + +static void __always_unused __check_inet_diag_req_raw(void) +{ + /* + * Make sure the two structures are identical, + * except the @pad field. + */ +#define __offset_mismatch(m1, m2) \ + (offsetof(struct inet_diag_req_v2, m1) != \ + offsetof(struct inet_diag_req_raw, m2)) + + BUILD_BUG_ON(sizeof(struct inet_diag_req_v2) != + sizeof(struct inet_diag_req_raw)); + BUILD_BUG_ON(__offset_mismatch(sdiag_family, sdiag_family)); + BUILD_BUG_ON(__offset_mismatch(sdiag_protocol, sdiag_protocol)); + BUILD_BUG_ON(__offset_mismatch(idiag_ext, idiag_ext)); + BUILD_BUG_ON(__offset_mismatch(pad, sdiag_raw_protocol)); + BUILD_BUG_ON(__offset_mismatch(idiag_states, idiag_states)); + BUILD_BUG_ON(__offset_mismatch(id, id)); +#undef __offset_mismatch +} + +static int __init raw_diag_init(void) +{ + return inet_diag_register(&raw_diag_handler); +} + +static void __exit raw_diag_exit(void) +{ + inet_diag_unregister(&raw_diag_handler); +} + +module_init(raw_diag_init); +module_exit(raw_diag_exit); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-255 /* AF_INET - IPPROTO_RAW */); +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 10-255 /* AF_INET6 - IPPROTO_RAW */); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 654a9af20136..62d4d90c1389 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2265,7 +2265,8 @@ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4, if (err) { res.fi = NULL; res.table = NULL; - if (fl4->flowi4_oif) { + if (fl4->flowi4_oif && + !netif_index_is_l3_master(net, fl4->flowi4_oif)) { /* Apparently, routing tables are wrong. Assume, that the destination is on link. @@ -2500,7 +2501,8 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, u32 table_id, IPV4_DEVCONF_ALL(net, MC_FORWARDING)) { int err = ipmr_get_route(net, skb, fl4->saddr, fl4->daddr, - r, nowait); + r, nowait, portid); + if (err <= 0) { if (!nowait) { if (err == 0) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f253e5019d22..3251fe71f39f 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -424,8 +424,6 @@ void tcp_init_sock(struct sock *sk) sk->sk_rcvbuf = sysctl_tcp_rmem[1]; local_bh_disable(); - if (mem_cgroup_sockets_enabled) - sock_update_memcg(sk); sk_sockets_allocated_inc(sk); local_bh_enable(); } @@ -691,8 +689,7 @@ static int tcp_splice_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, int ret; ret = skb_splice_bits(skb, skb->sk, offset, tss->pipe, - min(rd_desc->count, len), tss->flags, - skb_socket_splice); + min(rd_desc->count, len), tss->flags); if (ret > 0) rd_desc->count -= ret; return ret; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 8c6ad2d319d6..a27b9c0e27c0 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2362,10 +2362,9 @@ static void DBGUNDO(struct sock *sk, const char *msg) } #if IS_ENABLED(CONFIG_IPV6) else if (sk->sk_family == AF_INET6) { - struct ipv6_pinfo *np = inet6_sk(sk); pr_debug("Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n", msg, - &np->daddr, ntohs(inet->inet_dport), + &sk->sk_v6_daddr, ntohs(inet->inet_dport), tp->snd_cwnd, tcp_left_out(tp), tp->snd_ssthresh, tp->prior_ssthresh, tp->packets_out); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 7ac37c314312..83b3d0b8c481 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1871,9 +1871,6 @@ void tcp_v4_destroy_sock(struct sock *sk) local_bh_disable(); sk_sockets_allocated_dec(sk); local_bh_enable(); - - if (mem_cgroup_sockets_enabled && sk->sk_memcg) - sock_release_memcg(sk); } EXPORT_SYMBOL(tcp_v4_destroy_sock); @@ -1896,7 +1893,7 @@ static void *listening_get_next(struct seq_file *seq, void *cur) if (!sk) { get_head: ilb = &tcp_hashinfo.listening_hash[st->bucket]; - spin_lock_bh(&ilb->lock); + spin_lock(&ilb->lock); sk = sk_head(&ilb->head); st->offset = 0; goto get_sk; @@ -1914,7 +1911,7 @@ get_sk: return sk; icsk = inet_csk(sk); } - spin_unlock_bh(&ilb->lock); + spin_unlock(&ilb->lock); st->offset = 0; if (++st->bucket < INET_LHTABLE_SIZE) goto get_head; @@ -2122,7 +2119,7 @@ static void tcp_seq_stop(struct seq_file *seq, void *v) switch (st->state) { case TCP_SEQ_STATE_LISTENING: if (v != SEQ_START_TOKEN) - spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock); + spin_unlock(&tcp_hashinfo.listening_hash[st->bucket].lock); break; case TCP_SEQ_STATE_ESTABLISHED: if (v) diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index bf1f3b2b29d1..d46f4d5b1c62 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -742,14 +742,7 @@ void tcp_fastopen_cache_set(struct sock *sk, u16 mss, rcu_read_unlock(); } -static struct genl_family tcp_metrics_nl_family = { - .id = GENL_ID_GENERATE, - .hdrsize = 0, - .name = TCP_METRICS_GENL_NAME, - .version = TCP_METRICS_GENL_VERSION, - .maxattr = TCP_METRICS_ATTR_MAX, - .netnsok = true, -}; +static struct genl_family tcp_metrics_nl_family; static const struct nla_policy tcp_metrics_nl_policy[TCP_METRICS_ATTR_MAX + 1] = { [TCP_METRICS_ATTR_ADDR_IPV4] = { .type = NLA_U32, }, @@ -1116,6 +1109,17 @@ static const struct genl_ops tcp_metrics_nl_ops[] = { }, }; +static struct genl_family tcp_metrics_nl_family __ro_after_init = { + .hdrsize = 0, + .name = TCP_METRICS_GENL_NAME, + .version = TCP_METRICS_GENL_VERSION, + .maxattr = TCP_METRICS_ATTR_MAX, + .netnsok = true, + .module = THIS_MODULE, + .ops = tcp_metrics_nl_ops, + .n_ops = ARRAY_SIZE(tcp_metrics_nl_ops), +}; + static unsigned int tcpmhash_entries; static int __init set_tcpmhash_entries(char *str) { @@ -1179,8 +1183,7 @@ void __init tcp_metrics_init(void) if (ret < 0) panic("Could not allocate the tcp_metrics hash table\n"); - ret = genl_register_family_with_ops(&tcp_metrics_nl_family, - tcp_metrics_nl_ops); + ret = genl_register_family(&tcp_metrics_nl_family); if (ret < 0) panic("Could not register tcp_metrics generic netlink\n"); } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 7c777089a4d6..896e9dfbdb5c 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1992,12 +1992,14 @@ static int tcp_mtu_probe(struct sock *sk) len = 0; tcp_for_write_queue_from_safe(skb, next, sk) { copy = min_t(int, skb->len, probe_size - len); - if (nskb->ip_summed) + if (nskb->ip_summed) { skb_copy_bits(skb, 0, skb_put(nskb, copy), copy); - else - nskb->csum = skb_copy_and_csum_bits(skb, 0, - skb_put(nskb, copy), - copy, nskb->csum); + } else { + __wsum csum = skb_copy_and_csum_bits(skb, 0, + skb_put(nskb, copy), + copy, 0); + nskb->csum = csum_block_add(nskb->csum, csum, len); + } if (skb->len <= copy) { /* We've eaten all the data from this skb. diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 7d96dc2d3d08..c8332715ee2d 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1172,6 +1172,112 @@ out: return ret; } +static void udp_rmem_release(struct sock *sk, int size, int partial) +{ + int amt; + + atomic_sub(size, &sk->sk_rmem_alloc); + + spin_lock_bh(&sk->sk_receive_queue.lock); + sk->sk_forward_alloc += size; + amt = (sk->sk_forward_alloc - partial) & ~(SK_MEM_QUANTUM - 1); + sk->sk_forward_alloc -= amt; + spin_unlock_bh(&sk->sk_receive_queue.lock); + + if (amt) + __sk_mem_reduce_allocated(sk, amt >> SK_MEM_QUANTUM_SHIFT); +} + +static void udp_rmem_free(struct sk_buff *skb) +{ + udp_rmem_release(skb->sk, skb->truesize, 1); +} + +int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb) +{ + struct sk_buff_head *list = &sk->sk_receive_queue; + int rmem, delta, amt, err = -ENOMEM; + int size = skb->truesize; + + /* try to avoid the costly atomic add/sub pair when the receive + * queue is full; always allow at least a packet + */ + rmem = atomic_read(&sk->sk_rmem_alloc); + if (rmem && (rmem + size > sk->sk_rcvbuf)) + goto drop; + + /* we drop only if the receive buf is full and the receive + * queue contains some other skb + */ + rmem = atomic_add_return(size, &sk->sk_rmem_alloc); + if ((rmem > sk->sk_rcvbuf) && (rmem > size)) + goto uncharge_drop; + + spin_lock(&list->lock); + if (size >= sk->sk_forward_alloc) { + amt = sk_mem_pages(size); + delta = amt << SK_MEM_QUANTUM_SHIFT; + if (!__sk_mem_raise_allocated(sk, delta, amt, SK_MEM_RECV)) { + err = -ENOBUFS; + spin_unlock(&list->lock); + goto uncharge_drop; + } + + sk->sk_forward_alloc += delta; + } + + sk->sk_forward_alloc -= size; + + /* the skb owner in now the udp socket */ + skb->sk = sk; + skb->destructor = udp_rmem_free; + skb->dev = NULL; + sock_skb_set_dropcount(sk, skb); + + __skb_queue_tail(list, skb); + spin_unlock(&list->lock); + + if (!sock_flag(sk, SOCK_DEAD)) + sk->sk_data_ready(sk); + + return 0; + +uncharge_drop: + atomic_sub(skb->truesize, &sk->sk_rmem_alloc); + +drop: + atomic_inc(&sk->sk_drops); + return err; +} +EXPORT_SYMBOL_GPL(__udp_enqueue_schedule_skb); + +static void udp_destruct_sock(struct sock *sk) +{ + /* reclaim completely the forward allocated memory */ + __skb_queue_purge(&sk->sk_receive_queue); + udp_rmem_release(sk, 0, 0); + inet_sock_destruct(sk); +} + +int udp_init_sock(struct sock *sk) +{ + sk->sk_destruct = udp_destruct_sock; + return 0; +} +EXPORT_SYMBOL_GPL(udp_init_sock); + +void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len) +{ + if (unlikely(READ_ONCE(sk->sk_peek_off) >= 0)) { + bool slow = lock_sock_fast(sk); + + sk_peek_offset_bwd(sk, len); + unlock_sock_fast(sk, slow); + } + consume_skb(skb); +} +EXPORT_SYMBOL_GPL(skb_consume_udp); + /** * first_packet_length - return length of first packet in receive queue * @sk: socket @@ -1201,13 +1307,7 @@ static int first_packet_length(struct sock *sk) res = skb ? skb->len : -1; spin_unlock_bh(&rcvq->lock); - if (!skb_queue_empty(&list_kill)) { - bool slow = lock_sock_fast(sk); - - __skb_queue_purge(&list_kill); - sk_mem_reclaim_partial(sk); - unlock_sock_fast(sk, slow); - } + __skb_queue_purge(&list_kill); return res; } @@ -1256,7 +1356,6 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, int err; int is_udplite = IS_UDPLITE(sk); bool checksum_valid = false; - bool slow; if (flags & MSG_ERRQUEUE) return ip_recv_error(sk, msg, len, addr_len); @@ -1297,13 +1396,12 @@ try_again: } if (unlikely(err)) { - trace_kfree_skb(skb, udp_recvmsg); if (!peeked) { atomic_inc(&sk->sk_drops); UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite); } - skb_free_datagram_locked(sk, skb); + kfree_skb(skb); return err; } @@ -1328,16 +1426,15 @@ try_again: if (flags & MSG_TRUNC) err = ulen; - __skb_free_datagram_locked(sk, skb, peeking ? -err : err); + skb_consume_udp(sk, skb, peeking ? -err : err); return err; csum_copy_err: - slow = lock_sock_fast(sk); - if (!skb_kill_datagram(sk, skb, flags)) { + if (!__sk_queue_drop_skb(sk, skb, flags)) { UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite); UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite); } - unlock_sock_fast(sk, slow); + kfree_skb(skb); /* starting over for a new packet, but check if we need to yield */ cond_resched(); @@ -1456,7 +1553,7 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) sk_incoming_cpu_update(sk); } - rc = __sock_queue_rcv_skb(sk, skb); + rc = __udp_enqueue_schedule_skb(sk, skb); if (rc < 0) { int is_udplite = IS_UDPLITE(sk); @@ -1471,7 +1568,6 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) } return 0; - } static struct static_key udp_encap_needed __read_mostly; @@ -1493,7 +1589,6 @@ EXPORT_SYMBOL(udp_encap_enable); int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) { struct udp_sock *up = udp_sk(sk); - int rc; int is_udplite = IS_UDPLITE(sk); /* @@ -1580,25 +1675,9 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) goto drop; udp_csum_pull_header(skb); - if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) { - __UDP_INC_STATS(sock_net(sk), UDP_MIB_RCVBUFERRORS, - is_udplite); - goto drop; - } - - rc = 0; ipv4_pktinfo_prepare(sk, skb); - bh_lock_sock(sk); - if (!sock_owned_by_user(sk)) - rc = __udp_queue_rcv_skb(sk, skb); - else if (sk_add_backlog(sk, skb, sk->sk_rcvbuf)) { - bh_unlock_sock(sk); - goto drop; - } - bh_unlock_sock(sk); - - return rc; + return __udp_queue_rcv_skb(sk, skb); csum_error: __UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite); @@ -2208,13 +2287,13 @@ struct proto udp_prot = { .connect = ip4_datagram_connect, .disconnect = udp_disconnect, .ioctl = udp_ioctl, + .init = udp_init_sock, .destroy = udp_destroy_sock, .setsockopt = udp_setsockopt, .getsockopt = udp_getsockopt, .sendmsg = udp_sendmsg, .recvmsg = udp_recvmsg, .sendpage = udp_sendpage, - .backlog_rcv = __udp_queue_rcv_skb, .release_cb = ip4_datagram_release_cb, .hash = udp_lib_hash, .unhash = udp_lib_unhash, diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 2f1f5d439788..d8983e15f859 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -112,6 +112,27 @@ static inline u32 cstamp_delta(unsigned long cstamp) return (cstamp - INITIAL_JIFFIES) * 100UL / HZ; } +static inline s32 rfc3315_s14_backoff_init(s32 irt) +{ + /* multiply 'initial retransmission time' by 0.9 .. 1.1 */ + u64 tmp = (900000 + prandom_u32() % 200001) * (u64)irt; + do_div(tmp, 1000000); + return (s32)tmp; +} + +static inline s32 rfc3315_s14_backoff_update(s32 rt, s32 mrt) +{ + /* multiply 'retransmission timeout' by 1.9 .. 2.1 */ + u64 tmp = (1900000 + prandom_u32() % 200001) * (u64)rt; + do_div(tmp, 1000000); + if ((s32)tmp > mrt) { + /* multiply 'maximum retransmission time' by 0.9 .. 1.1 */ + tmp = (900000 + prandom_u32() % 200001) * (u64)mrt; + do_div(tmp, 1000000); + } + return (s32)tmp; +} + #ifdef CONFIG_SYSCTL static int addrconf_sysctl_register(struct inet6_dev *idev); static void addrconf_sysctl_unregister(struct inet6_dev *idev); @@ -187,6 +208,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { .dad_transmits = 1, .rtr_solicits = MAX_RTR_SOLICITATIONS, .rtr_solicit_interval = RTR_SOLICITATION_INTERVAL, + .rtr_solicit_max_interval = RTR_SOLICITATION_MAX_INTERVAL, .rtr_solicit_delay = MAX_RTR_SOLICITATION_DELAY, .use_tempaddr = 0, .temp_valid_lft = TEMP_VALID_LIFETIME, @@ -232,6 +254,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .dad_transmits = 1, .rtr_solicits = MAX_RTR_SOLICITATIONS, .rtr_solicit_interval = RTR_SOLICITATION_INTERVAL, + .rtr_solicit_max_interval = RTR_SOLICITATION_MAX_INTERVAL, .rtr_solicit_delay = MAX_RTR_SOLICITATION_DELAY, .use_tempaddr = 0, .temp_valid_lft = TEMP_VALID_LIFETIME, @@ -3687,7 +3710,7 @@ static void addrconf_rs_timer(unsigned long data) if (idev->if_flags & IF_RA_RCVD) goto out; - if (idev->rs_probes++ < idev->cnf.rtr_solicits) { + if (idev->rs_probes++ < idev->cnf.rtr_solicits || idev->cnf.rtr_solicits < 0) { write_unlock(&idev->lock); if (!ipv6_get_lladdr(dev, &lladdr, IFA_F_TENTATIVE)) ndisc_send_rs(dev, &lladdr, @@ -3696,11 +3719,13 @@ static void addrconf_rs_timer(unsigned long data) goto put; write_lock(&idev->lock); + idev->rs_interval = rfc3315_s14_backoff_update( + idev->rs_interval, idev->cnf.rtr_solicit_max_interval); /* The wait after the last probe can be shorter */ addrconf_mod_rs_timer(idev, (idev->rs_probes == idev->cnf.rtr_solicits) ? idev->cnf.rtr_solicit_delay : - idev->cnf.rtr_solicit_interval); + idev->rs_interval); } else { /* * Note: we do not support deprecated "all on-link" @@ -3949,7 +3974,7 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp) send_mld = ifp->scope == IFA_LINK && ipv6_lonely_lladdr(ifp); send_rs = send_mld && ipv6_accept_ra(ifp->idev) && - ifp->idev->cnf.rtr_solicits > 0 && + ifp->idev->cnf.rtr_solicits != 0 && (dev->flags&IFF_LOOPBACK) == 0; read_unlock_bh(&ifp->idev->lock); @@ -3971,10 +3996,11 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp) write_lock_bh(&ifp->idev->lock); spin_lock(&ifp->lock); + ifp->idev->rs_interval = rfc3315_s14_backoff_init( + ifp->idev->cnf.rtr_solicit_interval); ifp->idev->rs_probes = 1; ifp->idev->if_flags |= IF_RS_SENT; - addrconf_mod_rs_timer(ifp->idev, - ifp->idev->cnf.rtr_solicit_interval); + addrconf_mod_rs_timer(ifp->idev, ifp->idev->rs_interval); spin_unlock(&ifp->lock); write_unlock_bh(&ifp->idev->lock); } @@ -4891,6 +4917,8 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_RTR_SOLICITS] = cnf->rtr_solicits; array[DEVCONF_RTR_SOLICIT_INTERVAL] = jiffies_to_msecs(cnf->rtr_solicit_interval); + array[DEVCONF_RTR_SOLICIT_MAX_INTERVAL] = + jiffies_to_msecs(cnf->rtr_solicit_max_interval); array[DEVCONF_RTR_SOLICIT_DELAY] = jiffies_to_msecs(cnf->rtr_solicit_delay); array[DEVCONF_FORCE_MLD_VERSION] = cnf->force_mld_version; @@ -4961,18 +4989,18 @@ static inline size_t inet6_if_nlmsg_size(void) } static inline void __snmp6_fill_statsdev(u64 *stats, atomic_long_t *mib, - int items, int bytes) + int bytes) { int i; - int pad = bytes - sizeof(u64) * items; + int pad = bytes - sizeof(u64) * ICMP6_MIB_MAX; BUG_ON(pad < 0); /* Use put_unaligned() because stats may not be aligned for u64. */ - put_unaligned(items, &stats[0]); - for (i = 1; i < items; i++) + put_unaligned(ICMP6_MIB_MAX, &stats[0]); + for (i = 1; i < ICMP6_MIB_MAX; i++) put_unaligned(atomic_long_read(&mib[i]), &stats[i]); - memset(&stats[items], 0, pad); + memset(&stats[ICMP6_MIB_MAX], 0, pad); } static inline void __snmp6_fill_stats64(u64 *stats, void __percpu *mib, @@ -5005,7 +5033,7 @@ static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype, offsetof(struct ipstats_mib, syncp)); break; case IFLA_INET6_ICMP6STATS: - __snmp6_fill_statsdev(stats, idev->stats.icmpv6dev->mibs, ICMP6_MIB_MAX, bytes); + __snmp6_fill_statsdev(stats, idev->stats.icmpv6dev->mibs, bytes); break; } } @@ -5099,7 +5127,7 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token) return -EINVAL; if (!ipv6_accept_ra(idev)) return -EINVAL; - if (idev->cnf.rtr_solicits <= 0) + if (idev->cnf.rtr_solicits == 0) return -EINVAL; write_lock_bh(&idev->lock); @@ -5128,8 +5156,10 @@ update_lft: if (update_rs) { idev->if_flags |= IF_RS_SENT; + idev->rs_interval = rfc3315_s14_backoff_init( + idev->cnf.rtr_solicit_interval); idev->rs_probes = 1; - addrconf_mod_rs_timer(idev, idev->cnf.rtr_solicit_interval); + addrconf_mod_rs_timer(idev, idev->rs_interval); } /* Well, that's kinda nasty ... */ @@ -5467,20 +5497,6 @@ int addrconf_sysctl_forward(struct ctl_table *ctl, int write, } static -int addrconf_sysctl_hop_limit(struct ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) -{ - struct ctl_table lctl; - int min_hl = 1, max_hl = 255; - - lctl = *ctl; - lctl.extra1 = &min_hl; - lctl.extra2 = &max_hl; - - return proc_dointvec_minmax(&lctl, write, buffer, lenp, ppos); -} - -static int addrconf_sysctl_mtu(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { @@ -5713,6 +5729,10 @@ int addrconf_sysctl_ignore_routes_with_linkdown(struct ctl_table *ctl, return ret; } +static int minus_one = -1; +static const int one = 1; +static const int two_five_five = 255; + static const struct ctl_table addrconf_sysctl[] = { { .procname = "forwarding", @@ -5726,7 +5746,9 @@ static const struct ctl_table addrconf_sysctl[] = { .data = &ipv6_devconf.hop_limit, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = addrconf_sysctl_hop_limit, + .proc_handler = proc_dointvec_minmax, + .extra1 = (void *)&one, + .extra2 = (void *)&two_five_five, }, { .procname = "mtu", @@ -5768,7 +5790,8 @@ static const struct ctl_table addrconf_sysctl[] = { .data = &ipv6_devconf.rtr_solicits, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = &minus_one, }, { .procname = "router_solicitation_interval", @@ -5778,6 +5801,13 @@ static const struct ctl_table addrconf_sysctl[] = { .proc_handler = proc_dointvec_jiffies, }, { + .procname = "router_solicitation_max_interval", + .data = &ipv6_devconf.rtr_solicit_max_interval, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + { .procname = "router_solicitation_delay", .data = &ipv6_devconf.rtr_solicit_delay, .maxlen = sizeof(int), @@ -6044,8 +6074,14 @@ static int __addrconf_sysctl_register(struct net *net, char *dev_name, for (i = 0; table[i].data; i++) { table[i].data += (char *)p - (char *)&ipv6_devconf; - table[i].extra1 = idev; /* embedded; no ref */ - table[i].extra2 = net; + /* If one of these is already set, then it is not safe to + * overwrite either of them: this makes proc_dointvec_minmax + * usable. + */ + if (!table[i].extra1 && !table[i].extra2) { + table[i].extra1 = idev; /* embedded; no ref */ + table[i].extra2 = net; + } } snprintf(path, sizeof(path), "net/ipv6/conf/%s", dev_name); diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c index e50c27a93e17..a7bc54ab46e2 100644 --- a/net/ipv6/ila/ila_lwt.c +++ b/net/ipv6/ila/ila_lwt.c @@ -6,29 +6,88 @@ #include <linux/socket.h> #include <linux/types.h> #include <net/checksum.h> +#include <net/dst_cache.h> #include <net/ip.h> #include <net/ip6_fib.h> +#include <net/ip6_route.h> #include <net/lwtunnel.h> #include <net/protocol.h> #include <uapi/linux/ila.h> #include "ila.h" +struct ila_lwt { + struct ila_params p; + struct dst_cache dst_cache; + u32 connected : 1; +}; + +static inline struct ila_lwt *ila_lwt_lwtunnel( + struct lwtunnel_state *lwt) +{ + return (struct ila_lwt *)lwt->data; +} + static inline struct ila_params *ila_params_lwtunnel( - struct lwtunnel_state *lwstate) + struct lwtunnel_state *lwt) { - return (struct ila_params *)lwstate->data; + return &ila_lwt_lwtunnel(lwt)->p; } static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb) { - struct dst_entry *dst = skb_dst(skb); + struct dst_entry *orig_dst = skb_dst(skb); + struct rt6_info *rt = (struct rt6_info *)orig_dst; + struct ila_lwt *ilwt = ila_lwt_lwtunnel(orig_dst->lwtstate); + struct dst_entry *dst; + int err = -EINVAL; if (skb->protocol != htons(ETH_P_IPV6)) goto drop; - ila_update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate), true); + ila_update_ipv6_locator(skb, ila_params_lwtunnel(orig_dst->lwtstate), + true); - return dst->lwtstate->orig_output(net, sk, skb); + if (rt->rt6i_flags & (RTF_GATEWAY | RTF_CACHE)) { + /* Already have a next hop address in route, no need for + * dest cache route. + */ + return orig_dst->lwtstate->orig_output(net, sk, skb); + } + + dst = dst_cache_get(&ilwt->dst_cache); + if (unlikely(!dst)) { + struct ipv6hdr *ip6h = ipv6_hdr(skb); + struct flowi6 fl6; + + /* Lookup a route for the new destination. Take into + * account that the base route may already have a gateway. + */ + + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_oif = orig_dst->dev->ifindex; + fl6.flowi6_iif = LOOPBACK_IFINDEX; + fl6.daddr = *rt6_nexthop((struct rt6_info *)orig_dst, + &ip6h->daddr); + + dst = ip6_route_output(net, NULL, &fl6); + if (dst->error) { + err = -EHOSTUNREACH; + dst_release(dst); + goto drop; + } + + dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0); + if (IS_ERR(dst)) { + err = PTR_ERR(dst); + goto drop; + } + + if (ilwt->connected) + dst_cache_set_ip6(&ilwt->dst_cache, dst, &fl6.saddr); + } + + skb_dst_set(skb, dst); + return dst_output(net, sk, skb); drop: kfree_skb(skb); @@ -60,9 +119,9 @@ static int ila_build_state(struct net_device *dev, struct nlattr *nla, unsigned int family, const void *cfg, struct lwtunnel_state **ts) { + struct ila_lwt *ilwt; struct ila_params *p; struct nlattr *tb[ILA_ATTR_MAX + 1]; - size_t encap_len = sizeof(*p); struct lwtunnel_state *newts; const struct fib6_config *cfg6 = cfg; struct ila_addr *iaddr; @@ -71,7 +130,7 @@ static int ila_build_state(struct net_device *dev, struct nlattr *nla, if (family != AF_INET6) return -EINVAL; - if (cfg6->fc_dst_len < sizeof(struct ila_locator) + 1) { + if (cfg6->fc_dst_len < 8 * sizeof(struct ila_locator) + 3) { /* Need to have full locator and at least type field * included in destination */ @@ -95,11 +154,17 @@ static int ila_build_state(struct net_device *dev, struct nlattr *nla, if (!tb[ILA_ATTR_LOCATOR]) return -EINVAL; - newts = lwtunnel_state_alloc(encap_len); + newts = lwtunnel_state_alloc(sizeof(*ilwt)); if (!newts) return -ENOMEM; - newts->len = encap_len; + ilwt = ila_lwt_lwtunnel(newts); + ret = dst_cache_init(&ilwt->dst_cache, GFP_ATOMIC); + if (ret) { + kfree(newts); + return ret; + } + p = ila_params_lwtunnel(newts); p->locator.v64 = (__force __be64)nla_get_u64(tb[ILA_ATTR_LOCATOR]); @@ -120,11 +185,19 @@ static int ila_build_state(struct net_device *dev, struct nlattr *nla, newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT | LWTUNNEL_STATE_INPUT_REDIRECT; + if (cfg6->fc_dst_len == 8 * sizeof(struct in6_addr)) + ilwt->connected = 1; + *ts = newts; return 0; } +static void ila_destroy_state(struct lwtunnel_state *lwt) +{ + dst_cache_destroy(&ila_lwt_lwtunnel(lwt)->dst_cache); +} + static int ila_fill_encap_info(struct sk_buff *skb, struct lwtunnel_state *lwtstate) { @@ -159,6 +232,7 @@ static int ila_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b) static const struct lwtunnel_encap_ops ila_encap_ops = { .build_state = ila_build_state, + .destroy_state = ila_destroy_state, .output = ila_output, .input = ila_input, .fill_encap = ila_fill_encap_info, diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c index e604013dd814..628ae6d85b59 100644 --- a/net/ipv6/ila/ila_xlat.c +++ b/net/ipv6/ila/ila_xlat.c @@ -118,15 +118,7 @@ static const struct rhashtable_params rht_params = { .obj_cmpfn = ila_cmpfn, }; -static struct genl_family ila_nl_family = { - .id = GENL_ID_GENERATE, - .hdrsize = 0, - .name = ILA_GENL_NAME, - .version = ILA_GENL_VERSION, - .maxattr = ILA_ATTR_MAX, - .netnsok = true, - .parallel_ops = true, -}; +static struct genl_family ila_nl_family; static const struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = { [ILA_ATTR_LOCATOR] = { .type = NLA_U64, }, @@ -561,6 +553,18 @@ static const struct genl_ops ila_nl_ops[] = { }, }; +static struct genl_family ila_nl_family __ro_after_init = { + .hdrsize = 0, + .name = ILA_GENL_NAME, + .version = ILA_GENL_VERSION, + .maxattr = ILA_ATTR_MAX, + .netnsok = true, + .parallel_ops = true, + .module = THIS_MODULE, + .ops = ila_nl_ops, + .n_ops = ARRAY_SIZE(ila_nl_ops), +}; + #define ILA_HASH_TABLE_SIZE 1024 static __net_init int ila_init_net(struct net *net) @@ -623,7 +627,7 @@ static int ila_xlat_addr(struct sk_buff *skb, bool set_csum_neutral) return 0; } -int ila_xlat_init(void) +int __init ila_xlat_init(void) { int ret; @@ -631,8 +635,7 @@ int ila_xlat_init(void) if (ret) goto exit; - ret = genl_register_family_with_ops(&ila_nl_family, - ila_nl_ops); + ret = genl_register_family(&ila_nl_family); if (ret < 0) goto unregister; diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 4ce74f86291b..d7d6d3ae0b3b 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -648,7 +648,6 @@ static int ip6gre_xmit_other(struct sk_buff *skb, struct net_device *dev) encap_limit = t->parms.encap_limit; memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); - fl6.flowi6_proto = skb->protocol; err = gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM)); if (err) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 6a66adba0c22..3a70567846aa 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1634,7 +1634,7 @@ int ip6_tnl_change_mtu(struct net_device *dev, int new_mtu) struct ip6_tnl *tnl = netdev_priv(dev); if (tnl->parms.proto == IPPROTO_IPIP) { - if (new_mtu < 68) + if (new_mtu < ETH_MIN_MTU) return -EINVAL; } else { if (new_mtu < IPV6_MIN_MTU) @@ -1787,6 +1787,8 @@ ip6_tnl_dev_init_gen(struct net_device *dev) dev->mtu = ETH_DATA_LEN - t_hlen; if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) dev->mtu -= 8; + dev->min_mtu = ETH_MIN_MTU; + dev->max_mtu = 0xFFF8 - dev->hard_header_len; return 0; diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 8a02ca8a11af..35c5b2d8c401 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -812,30 +812,11 @@ vti6_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) return err; } -/** - * vti6_tnl_change_mtu - change mtu manually for tunnel device - * @dev: virtual device associated with tunnel - * @new_mtu: the new mtu - * - * Return: - * 0 on success, - * %-EINVAL if mtu too small - **/ -static int vti6_change_mtu(struct net_device *dev, int new_mtu) -{ - if (new_mtu < IPV6_MIN_MTU) - return -EINVAL; - - dev->mtu = new_mtu; - return 0; -} - static const struct net_device_ops vti6_netdev_ops = { .ndo_init = vti6_dev_init, .ndo_uninit = vti6_dev_uninit, .ndo_start_xmit = vti6_tnl_xmit, .ndo_do_ioctl = vti6_ioctl, - .ndo_change_mtu = vti6_change_mtu, .ndo_get_stats64 = ip_tunnel_get_stats64, .ndo_get_iflink = ip6_tnl_get_iflink, }; @@ -855,6 +836,8 @@ static void vti6_dev_setup(struct net_device *dev) dev->type = ARPHRD_TUNNEL6; dev->hard_header_len = LL_MAX_HEADER + sizeof(struct ipv6hdr); dev->mtu = ETH_DATA_LEN; + dev->min_mtu = IPV6_MIN_MTU; + dev->max_mtu = IP_MAX_MTU; dev->flags |= IFF_NOARP; dev->addr_len = sizeof(struct in6_addr); netif_keep_dst(dev); diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index fccb5dd91902..7f4265b1649b 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -2285,8 +2285,8 @@ static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, return 1; } -int ip6mr_get_route(struct net *net, - struct sk_buff *skb, struct rtmsg *rtm, int nowait) +int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm, + int nowait, u32 portid) { int err; struct mr6_table *mrt; @@ -2331,6 +2331,7 @@ int ip6mr_get_route(struct net *net, return -ENOMEM; } + NETLINK_CB(skb2).portid = portid; skb_reset_transport_header(skb2); skb_put(skb2, sizeof(struct ipv6hdr)); diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index 679253d0af84..cc8e3ae9ca73 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -30,6 +30,11 @@ #include <net/transp_v6.h> #include <net/ipv6.h> +#define MAX4(a, b, c, d) \ + max_t(u32, max_t(u32, a, b), max_t(u32, c, d)) +#define SNMP_MIB_MAX MAX4(UDP_MIB_MAX, TCP_MIB_MAX, \ + IPSTATS_MIB_MAX, ICMP_MIB_MAX) + static int sockstat6_seq_show(struct seq_file *seq, void *v) { struct net *net = seq->private; @@ -191,25 +196,34 @@ static void snmp6_seq_show_item(struct seq_file *seq, void __percpu *pcpumib, atomic_long_t *smib, const struct snmp_mib *itemlist) { + unsigned long buff[SNMP_MIB_MAX]; int i; - unsigned long val; - for (i = 0; itemlist[i].name; i++) { - val = pcpumib ? - snmp_fold_field(pcpumib, itemlist[i].entry) : - atomic_long_read(smib + itemlist[i].entry); - seq_printf(seq, "%-32s\t%lu\n", itemlist[i].name, val); + if (pcpumib) { + memset(buff, 0, sizeof(unsigned long) * SNMP_MIB_MAX); + + snmp_get_cpu_field_batch(buff, itemlist, pcpumib); + for (i = 0; itemlist[i].name; i++) + seq_printf(seq, "%-32s\t%lu\n", + itemlist[i].name, buff[i]); + } else { + for (i = 0; itemlist[i].name; i++) + seq_printf(seq, "%-32s\t%lu\n", itemlist[i].name, + atomic_long_read(smib + itemlist[i].entry)); } } static void snmp6_seq_show_item64(struct seq_file *seq, void __percpu *mib, const struct snmp_mib *itemlist, size_t syncpoff) { + u64 buff64[SNMP_MIB_MAX]; int i; + memset(buff64, 0, sizeof(unsigned long) * SNMP_MIB_MAX); + + snmp_get_cpu_field64_batch(buff64, itemlist, mib, syncpoff); for (i = 0; itemlist[i].name; i++) - seq_printf(seq, "%-32s\t%llu\n", itemlist[i].name, - snmp_fold_field64(mib, itemlist[i].entry, syncpoff)); + seq_printf(seq, "%-32s\t%llu\n", itemlist[i].name, buff64[i]); } static int snmp6_seq_show(struct seq_file *seq, void *v) diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 54404f08efcc..d7e8b955ade8 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -65,11 +65,12 @@ #define ICMPV6_HDRLEN 4 /* ICMPv6 header, RFC 4443 Section 2.1 */ -static struct raw_hashinfo raw_v6_hashinfo = { +struct raw_hashinfo raw_v6_hashinfo = { .lock = __RW_LOCK_UNLOCKED(raw_v6_hashinfo.lock), }; +EXPORT_SYMBOL_GPL(raw_v6_hashinfo); -static struct sock *__raw_v6_lookup(struct net *net, struct sock *sk, +struct sock *__raw_v6_lookup(struct net *net, struct sock *sk, unsigned short num, const struct in6_addr *loc_addr, const struct in6_addr *rmt_addr, int dif) { @@ -102,6 +103,7 @@ static struct sock *__raw_v6_lookup(struct net *net, struct sock *sk, found: return sk; } +EXPORT_SYMBOL_GPL(__raw_v6_lookup); /* * 0 - deliver @@ -1259,6 +1261,7 @@ struct proto rawv6_prot = { .compat_getsockopt = compat_rawv6_getsockopt, .compat_ioctl = compat_rawv6_ioctl, #endif + .diag_destroy = raw_abort, }; #ifdef CONFIG_PROC_FS diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 5a5aeb92b4ec..bdbc38e8bf29 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3216,7 +3216,9 @@ static int rt6_fill_node(struct net *net, if (iif) { #ifdef CONFIG_IPV6_MROUTE if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) { - int err = ip6mr_get_route(net, skb, rtm, nowait); + int err = ip6mr_get_route(net, skb, rtm, nowait, + portid); + if (err <= 0) { if (!nowait) { if (err == 0) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index b1cdf8009d29..dc7a3449ffc1 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1318,23 +1318,11 @@ done: return err; } -static int ipip6_tunnel_change_mtu(struct net_device *dev, int new_mtu) -{ - struct ip_tunnel *tunnel = netdev_priv(dev); - int t_hlen = tunnel->hlen + sizeof(struct iphdr); - - if (new_mtu < IPV6_MIN_MTU || new_mtu > 0xFFF8 - t_hlen) - return -EINVAL; - dev->mtu = new_mtu; - return 0; -} - static const struct net_device_ops ipip6_netdev_ops = { .ndo_init = ipip6_tunnel_init, .ndo_uninit = ipip6_tunnel_uninit, .ndo_start_xmit = sit_tunnel_xmit, .ndo_do_ioctl = ipip6_tunnel_ioctl, - .ndo_change_mtu = ipip6_tunnel_change_mtu, .ndo_get_stats64 = ip_tunnel_get_stats64, .ndo_get_iflink = ip_tunnel_get_iflink, }; @@ -1365,6 +1353,8 @@ static void ipip6_tunnel_setup(struct net_device *dev) dev->type = ARPHRD_SIT; dev->hard_header_len = LL_MAX_HEADER + t_hlen; dev->mtu = ETH_DATA_LEN - t_hlen; + dev->min_mtu = IPV6_MIN_MTU; + dev->max_mtu = 0xFFF8 - t_hlen; dev->flags = IFF_NOARP; netif_keep_dst(dev); dev->addr_len = 4; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 54cf7197c7ab..5a27ab4eab39 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1190,6 +1190,16 @@ out: return NULL; } +static void tcp_v6_restore_cb(struct sk_buff *skb) +{ + /* We need to move header back to the beginning if xfrm6_policy_check() + * and tcp_v6_fill_cb() are going to be called again. + * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there. + */ + memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6, + sizeof(struct inet6_skb_parm)); +} + /* The socket must have it's spinlock held when we get * here, unless it is a TCP_LISTEN socket. * @@ -1319,6 +1329,7 @@ ipv6_pktoptions: np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb)); if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) { skb_set_owner_r(opt_skb, sk); + tcp_v6_restore_cb(opt_skb); opt_skb = xchg(&np->pktoptions, opt_skb); } else { __kfree_skb(opt_skb); @@ -1352,15 +1363,6 @@ static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr, TCP_SKB_CB(skb)->sacked = 0; } -static void tcp_v6_restore_cb(struct sk_buff *skb) -{ - /* We need to move header back to the beginning if xfrm6_policy_check() - * and tcp_v6_fill_cb() are going to be called again. - */ - memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6, - sizeof(struct inet6_skb_parm)); -} - static int tcp_v6_rcv(struct sk_buff *skb) { const struct tcphdr *th; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 9aa7c1c7a9ce..71963b23d5a5 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -334,7 +334,6 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int is_udplite = IS_UDPLITE(sk); bool checksum_valid = false; int is_udp4; - bool slow; if (flags & MSG_ERRQUEUE) return ipv6_recv_error(sk, msg, len, addr_len); @@ -378,7 +377,6 @@ try_again: goto csum_copy_err; } if (unlikely(err)) { - trace_kfree_skb(skb, udpv6_recvmsg); if (!peeked) { atomic_inc(&sk->sk_drops); if (is_udp4) @@ -388,7 +386,7 @@ try_again: UDP6_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite); } - skb_free_datagram_locked(sk, skb); + kfree_skb(skb); return err; } if (!peeked) { @@ -437,12 +435,11 @@ try_again: if (flags & MSG_TRUNC) err = ulen; - __skb_free_datagram_locked(sk, skb, peeking ? -err : err); + skb_consume_udp(sk, skb, peeking ? -err : err); return err; csum_copy_err: - slow = lock_sock_fast(sk); - if (!skb_kill_datagram(sk, skb, flags)) { + if (!__sk_queue_drop_skb(sk, skb, flags)) { if (is_udp4) { UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite); @@ -455,7 +452,7 @@ csum_copy_err: UDP_MIB_INERRORS, is_udplite); } } - unlock_sock_fast(sk, slow); + kfree_skb(skb); /* starting over for a new packet, but check if we need to yield */ cond_resched(); @@ -523,7 +520,7 @@ static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) sk_incoming_cpu_update(sk); } - rc = __sock_queue_rcv_skb(sk, skb); + rc = __udp_enqueue_schedule_skb(sk, skb); if (rc < 0) { int is_udplite = IS_UDPLITE(sk); @@ -535,6 +532,7 @@ static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) kfree_skb(skb); return -1; } + return 0; } @@ -556,7 +554,6 @@ EXPORT_SYMBOL(udpv6_encap_enable); int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) { struct udp_sock *up = udp_sk(sk); - int rc; int is_udplite = IS_UDPLITE(sk); if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) @@ -622,25 +619,10 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) goto drop; udp_csum_pull_header(skb); - if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) { - __UDP6_INC_STATS(sock_net(sk), - UDP_MIB_RCVBUFERRORS, is_udplite); - goto drop; - } skb_dst_drop(skb); - bh_lock_sock(sk); - rc = 0; - if (!sock_owned_by_user(sk)) - rc = __udpv6_queue_rcv_skb(sk, skb); - else if (sk_add_backlog(sk, skb, sk->sk_rcvbuf)) { - bh_unlock_sock(sk); - goto drop; - } - bh_unlock_sock(sk); - - return rc; + return __udpv6_queue_rcv_skb(sk, skb); csum_error: __UDP6_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite); @@ -1433,12 +1415,12 @@ struct proto udpv6_prot = { .connect = ip6_datagram_connect, .disconnect = udp_disconnect, .ioctl = udp_ioctl, + .init = udp_init_sock, .destroy = udpv6_destroy_sock, .setsockopt = udpv6_setsockopt, .getsockopt = udpv6_getsockopt, .sendmsg = udpv6_sendmsg, .recvmsg = udpv6_recvmsg, - .backlog_rcv = __udpv6_queue_rcv_skb, .release_cb = ip6_datagram_release_cb, .hash = udp_lib_hash, .unhash = udp_lib_unhash, diff --git a/net/irda/irlan/irlan_eth.c b/net/irda/irlan/irlan_eth.c index d8b7267280c3..74d09f91709e 100644 --- a/net/irda/irlan/irlan_eth.c +++ b/net/irda/irlan/irlan_eth.c @@ -51,7 +51,6 @@ static const struct net_device_ops irlan_eth_netdev_ops = { .ndo_stop = irlan_eth_close, .ndo_start_xmit = irlan_eth_xmit, .ndo_set_rx_mode = irlan_eth_set_multicast_list, - .ndo_change_mtu = eth_change_mtu, .ndo_validate_addr = eth_validate_addr, }; @@ -67,7 +66,8 @@ static void irlan_eth_setup(struct net_device *dev) dev->netdev_ops = &irlan_eth_netdev_ops; dev->destructor = free_netdev; - + dev->min_mtu = 0; + dev->max_mtu = ETH_MAX_MTU; /* * Lets do all queueing in IrTTP instead of this device driver. diff --git a/net/irda/irnetlink.c b/net/irda/irnetlink.c index e15c40e86660..7fc340e574cf 100644 --- a/net/irda/irnetlink.c +++ b/net/irda/irnetlink.c @@ -24,13 +24,7 @@ -static struct genl_family irda_nl_family = { - .id = GENL_ID_GENERATE, - .name = IRDA_NL_NAME, - .hdrsize = 0, - .version = IRDA_NL_VERSION, - .maxattr = IRDA_NL_CMD_MAX, -}; +static struct genl_family irda_nl_family; static struct net_device * ifname_to_netdev(struct net *net, struct genl_info *info) { @@ -147,9 +141,19 @@ static const struct genl_ops irda_nl_ops[] = { }; -int irda_nl_register(void) +static struct genl_family irda_nl_family __ro_after_init = { + .name = IRDA_NL_NAME, + .hdrsize = 0, + .version = IRDA_NL_VERSION, + .maxattr = IRDA_NL_CMD_MAX, + .module = THIS_MODULE, + .ops = irda_nl_ops, + .n_ops = ARRAY_SIZE(irda_nl_ops), +}; + +int __init irda_nl_register(void) { - return genl_register_family_with_ops(&irda_nl_family, irda_nl_ops); + return genl_register_family(&irda_nl_family); } void irda_nl_unregister(void) diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 02b45a8e8b35..cfb9e5f4e28f 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -453,19 +453,27 @@ static void iucv_sever_path(struct sock *sk, int with_user_data) } } -/* Send FIN through an IUCV socket for HIPER transport */ +/* Send controlling flags through an IUCV socket for HIPER transport */ static int iucv_send_ctrl(struct sock *sk, u8 flags) { int err = 0; int blen; struct sk_buff *skb; + u8 shutdown = 0; blen = sizeof(struct af_iucv_trans_hdr) + ETH_HLEN; + if (sk->sk_shutdown & SEND_SHUTDOWN) { + /* controlling flags should be sent anyway */ + shutdown = sk->sk_shutdown; + sk->sk_shutdown &= RCV_SHUTDOWN; + } skb = sock_alloc_send_skb(sk, blen, 1, &err); if (skb) { skb_reserve(skb, blen); err = afiucv_hs_send(NULL, sk, skb, flags); } + if (shutdown) + sk->sk_shutdown = shutdown; return err; } @@ -1315,8 +1323,13 @@ static void iucv_process_message(struct sock *sk, struct sk_buff *skb, } IUCV_SKB_CB(skb)->offset = 0; - if (sock_queue_rcv_skb(sk, skb)) - skb_queue_head(&iucv_sk(sk)->backlog_skb_q, skb); + if (sk_filter(sk, skb)) { + atomic_inc(&sk->sk_drops); /* skb rejected by filter */ + kfree_skb(skb); + return; + } + if (__sock_queue_rcv_skb(sk, skb)) /* handle rcv queue full */ + skb_queue_tail(&iucv_sk(sk)->backlog_skb_q, skb); } /* iucv_process_message_q() - Process outstanding IUCV messages @@ -1430,13 +1443,13 @@ static int iucv_sock_recvmsg(struct socket *sock, struct msghdr *msg, rskb = skb_dequeue(&iucv->backlog_skb_q); while (rskb) { IUCV_SKB_CB(rskb)->offset = 0; - if (sock_queue_rcv_skb(sk, rskb)) { + if (__sock_queue_rcv_skb(sk, rskb)) { + /* handle rcv queue full */ skb_queue_head(&iucv->backlog_skb_q, rskb); break; - } else { - rskb = skb_dequeue(&iucv->backlog_skb_q); } + rskb = skb_dequeue(&iucv->backlog_skb_q); } if (skb_queue_empty(&iucv->backlog_skb_q)) { if (!list_empty(&iucv->message_q.list)) @@ -2116,12 +2129,17 @@ static int afiucv_hs_callback_rx(struct sock *sk, struct sk_buff *skb) skb_reset_transport_header(skb); skb_reset_network_header(skb); IUCV_SKB_CB(skb)->offset = 0; + if (sk_filter(sk, skb)) { + atomic_inc(&sk->sk_drops); /* skb rejected by filter */ + kfree_skb(skb); + return NET_RX_SUCCESS; + } + spin_lock(&iucv->message_q.lock); if (skb_queue_empty(&iucv->backlog_skb_q)) { - if (sock_queue_rcv_skb(sk, skb)) { + if (__sock_queue_rcv_skb(sk, skb)) /* handle rcv queue full */ skb_queue_tail(&iucv->backlog_skb_q, skb); - } } else skb_queue_tail(&iucv_sk(sk)->backlog_skb_q, skb); spin_unlock(&iucv->message_q.lock); diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index b7f869a85ab7..7e08a4d3d77d 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -1160,19 +1160,6 @@ out: return copied ? : err; } -static ssize_t kcm_sock_splice(struct sock *sk, - struct pipe_inode_info *pipe, - struct splice_pipe_desc *spd) -{ - int ret; - - release_sock(sk); - ret = splice_to_pipe(pipe, spd); - lock_sock(sk); - - return ret; -} - static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags) @@ -1202,8 +1189,7 @@ static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos, if (len > rxm->full_len) len = rxm->full_len; - copied = skb_splice_bits(skb, sk, rxm->offset, pipe, len, flags, - kcm_sock_splice); + copied = skb_splice_bits(skb, sk, rxm->offset, pipe, len, flags); if (copied < 0) { err = copied; goto err_out; diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c index 965f7e344cef..e2c6ae024565 100644 --- a/net/l2tp/l2tp_eth.c +++ b/net/l2tp/l2tp_eth.c @@ -259,6 +259,8 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 p session->mtu = dev->mtu - session->hdr_len; dev->mtu = session->mtu; dev->needed_headroom += session->hdr_len; + dev->min_mtu = 0; + dev->max_mtu = ETH_MAX_MTU; priv = netdev_priv(dev); priv->dev = dev; diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index bf3117771822..59aa2d204e4a 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -31,14 +31,7 @@ #include "l2tp_core.h" -static struct genl_family l2tp_nl_family = { - .id = GENL_ID_GENERATE, - .name = L2TP_GENL_NAME, - .version = L2TP_GENL_VERSION, - .hdrsize = 0, - .maxattr = L2TP_ATTR_MAX, - .netnsok = true, -}; +static struct genl_family l2tp_nl_family; static const struct genl_multicast_group l2tp_multicast_group[] = { { @@ -977,6 +970,19 @@ static const struct genl_ops l2tp_nl_ops[] = { }, }; +static struct genl_family l2tp_nl_family __ro_after_init = { + .name = L2TP_GENL_NAME, + .version = L2TP_GENL_VERSION, + .hdrsize = 0, + .maxattr = L2TP_ATTR_MAX, + .netnsok = true, + .module = THIS_MODULE, + .ops = l2tp_nl_ops, + .n_ops = ARRAY_SIZE(l2tp_nl_ops), + .mcgrps = l2tp_multicast_group, + .n_mcgrps = ARRAY_SIZE(l2tp_multicast_group), +}; + int l2tp_nl_register_ops(enum l2tp_pwtype pw_type, const struct l2tp_nl_cmd_ops *ops) { int ret; @@ -1010,12 +1016,10 @@ void l2tp_nl_unregister_ops(enum l2tp_pwtype pw_type) } EXPORT_SYMBOL_GPL(l2tp_nl_unregister_ops); -static int l2tp_nl_init(void) +static int __init l2tp_nl_init(void) { pr_info("L2TP netlink interface\n"); - return genl_register_family_with_ops_groups(&l2tp_nl_family, - l2tp_nl_ops, - l2tp_multicast_group); + return genl_register_family(&l2tp_nl_family); } static void l2tp_nl_cleanup(void) diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index e29ff5749944..fd6541f3ade3 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -3,6 +3,7 @@ * * Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2015 Intel Mobile Communications GmbH + * Copyright (C) 2015-2016 Intel Deutschland GmbH * * This file is GPLv2 as found in COPYING. */ @@ -152,6 +153,149 @@ static void ieee80211_stop_p2p_device(struct wiphy *wiphy, ieee80211_sdata_stop(IEEE80211_WDEV_TO_SUB_IF(wdev)); } +static int ieee80211_start_nan(struct wiphy *wiphy, + struct wireless_dev *wdev, + struct cfg80211_nan_conf *conf) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); + int ret; + + mutex_lock(&sdata->local->chanctx_mtx); + ret = ieee80211_check_combinations(sdata, NULL, 0, 0); + mutex_unlock(&sdata->local->chanctx_mtx); + if (ret < 0) + return ret; + + ret = ieee80211_do_open(wdev, true); + if (ret) + return ret; + + ret = drv_start_nan(sdata->local, sdata, conf); + if (ret) + ieee80211_sdata_stop(sdata); + + sdata->u.nan.conf = *conf; + + return ret; +} + +static void ieee80211_stop_nan(struct wiphy *wiphy, + struct wireless_dev *wdev) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); + + drv_stop_nan(sdata->local, sdata); + ieee80211_sdata_stop(sdata); +} + +static int ieee80211_nan_change_conf(struct wiphy *wiphy, + struct wireless_dev *wdev, + struct cfg80211_nan_conf *conf, + u32 changes) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); + struct cfg80211_nan_conf new_conf; + int ret = 0; + + if (sdata->vif.type != NL80211_IFTYPE_NAN) + return -EOPNOTSUPP; + + if (!ieee80211_sdata_running(sdata)) + return -ENETDOWN; + + new_conf = sdata->u.nan.conf; + + if (changes & CFG80211_NAN_CONF_CHANGED_PREF) + new_conf.master_pref = conf->master_pref; + + if (changes & CFG80211_NAN_CONF_CHANGED_DUAL) + new_conf.dual = conf->dual; + + ret = drv_nan_change_conf(sdata->local, sdata, &new_conf, changes); + if (!ret) + sdata->u.nan.conf = new_conf; + + return ret; +} + +static int ieee80211_add_nan_func(struct wiphy *wiphy, + struct wireless_dev *wdev, + struct cfg80211_nan_func *nan_func) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); + int ret; + + if (sdata->vif.type != NL80211_IFTYPE_NAN) + return -EOPNOTSUPP; + + if (!ieee80211_sdata_running(sdata)) + return -ENETDOWN; + + spin_lock_bh(&sdata->u.nan.func_lock); + + ret = idr_alloc(&sdata->u.nan.function_inst_ids, + nan_func, 1, sdata->local->hw.max_nan_de_entries + 1, + GFP_ATOMIC); + spin_unlock_bh(&sdata->u.nan.func_lock); + + if (ret < 0) + return ret; + + nan_func->instance_id = ret; + + WARN_ON(nan_func->instance_id == 0); + + ret = drv_add_nan_func(sdata->local, sdata, nan_func); + if (ret) { + spin_lock_bh(&sdata->u.nan.func_lock); + idr_remove(&sdata->u.nan.function_inst_ids, + nan_func->instance_id); + spin_unlock_bh(&sdata->u.nan.func_lock); + } + + return ret; +} + +static struct cfg80211_nan_func * +ieee80211_find_nan_func_by_cookie(struct ieee80211_sub_if_data *sdata, + u64 cookie) +{ + struct cfg80211_nan_func *func; + int id; + + lockdep_assert_held(&sdata->u.nan.func_lock); + + idr_for_each_entry(&sdata->u.nan.function_inst_ids, func, id) { + if (func->cookie == cookie) + return func; + } + + return NULL; +} + +static void ieee80211_del_nan_func(struct wiphy *wiphy, + struct wireless_dev *wdev, u64 cookie) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); + struct cfg80211_nan_func *func; + u8 instance_id = 0; + + if (sdata->vif.type != NL80211_IFTYPE_NAN || + !ieee80211_sdata_running(sdata)) + return; + + spin_lock_bh(&sdata->u.nan.func_lock); + + func = ieee80211_find_nan_func_by_cookie(sdata, cookie); + if (func) + instance_id = func->instance_id; + + spin_unlock_bh(&sdata->u.nan.func_lock); + + if (instance_id) + drv_del_nan_func(sdata->local, sdata, instance_id); +} + static int ieee80211_set_noack_map(struct wiphy *wiphy, struct net_device *dev, u16 noack_map) @@ -257,6 +401,7 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, case NL80211_IFTYPE_WDS: case NL80211_IFTYPE_MONITOR: case NL80211_IFTYPE_P2P_DEVICE: + case NL80211_IFTYPE_NAN: case NL80211_IFTYPE_UNSPECIFIED: case NUM_NL80211_IFTYPES: case NL80211_IFTYPE_P2P_CLIENT: @@ -2036,6 +2181,7 @@ static int ieee80211_scan(struct wiphy *wiphy, !(req->flags & NL80211_SCAN_FLAG_AP))) return -EOPNOTSUPP; break; + case NL80211_IFTYPE_NAN: default: return -EOPNOTSUPP; } @@ -3377,6 +3523,63 @@ static int ieee80211_del_tx_ts(struct wiphy *wiphy, struct net_device *dev, return -ENOENT; } +void ieee80211_nan_func_terminated(struct ieee80211_vif *vif, + u8 inst_id, + enum nl80211_nan_func_term_reason reason, + gfp_t gfp) +{ + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + struct cfg80211_nan_func *func; + u64 cookie; + + if (WARN_ON(vif->type != NL80211_IFTYPE_NAN)) + return; + + spin_lock_bh(&sdata->u.nan.func_lock); + + func = idr_find(&sdata->u.nan.function_inst_ids, inst_id); + if (WARN_ON(!func)) { + spin_unlock_bh(&sdata->u.nan.func_lock); + return; + } + + cookie = func->cookie; + idr_remove(&sdata->u.nan.function_inst_ids, inst_id); + + spin_unlock_bh(&sdata->u.nan.func_lock); + + cfg80211_free_nan_func(func); + + cfg80211_nan_func_terminated(ieee80211_vif_to_wdev(vif), inst_id, + reason, cookie, gfp); +} +EXPORT_SYMBOL(ieee80211_nan_func_terminated); + +void ieee80211_nan_func_match(struct ieee80211_vif *vif, + struct cfg80211_nan_match_params *match, + gfp_t gfp) +{ + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + struct cfg80211_nan_func *func; + + if (WARN_ON(vif->type != NL80211_IFTYPE_NAN)) + return; + + spin_lock_bh(&sdata->u.nan.func_lock); + + func = idr_find(&sdata->u.nan.function_inst_ids, match->inst_id); + if (WARN_ON(!func)) { + spin_unlock_bh(&sdata->u.nan.func_lock); + return; + } + match->cookie = func->cookie; + + spin_unlock_bh(&sdata->u.nan.func_lock); + + cfg80211_nan_match(ieee80211_vif_to_wdev(vif), match, gfp); +} +EXPORT_SYMBOL(ieee80211_nan_func_match); + const struct cfg80211_ops mac80211_config_ops = { .add_virtual_intf = ieee80211_add_iface, .del_virtual_intf = ieee80211_del_iface, @@ -3462,4 +3665,9 @@ const struct cfg80211_ops mac80211_config_ops = { .set_ap_chanwidth = ieee80211_set_ap_chanwidth, .add_tx_ts = ieee80211_add_tx_ts, .del_tx_ts = ieee80211_del_tx_ts, + .start_nan = ieee80211_start_nan, + .stop_nan = ieee80211_stop_nan, + .nan_change_conf = ieee80211_nan_change_conf, + .add_nan_func = ieee80211_add_nan_func, + .del_nan_func = ieee80211_del_nan_func, }; diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index 74142d07ad31..e75cbf6ecc26 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -274,6 +274,7 @@ ieee80211_get_chanctx_max_required_bw(struct ieee80211_local *local, ieee80211_get_max_required_bw(sdata)); break; case NL80211_IFTYPE_P2P_DEVICE: + case NL80211_IFTYPE_NAN: continue; case NL80211_IFTYPE_ADHOC: case NL80211_IFTYPE_WDS: @@ -646,6 +647,9 @@ static int ieee80211_assign_vif_chanctx(struct ieee80211_sub_if_data *sdata, struct ieee80211_chanctx *curr_ctx = NULL; int ret = 0; + if (WARN_ON(sdata->vif.type == NL80211_IFTYPE_NAN)) + return -ENOTSUPP; + conf = rcu_dereference_protected(sdata->vif.chanctx_conf, lockdep_is_held(&local->chanctx_mtx)); @@ -718,6 +722,7 @@ void ieee80211_recalc_smps_chanctx(struct ieee80211_local *local, switch (sdata->vif.type) { case NL80211_IFTYPE_P2P_DEVICE: + case NL80211_IFTYPE_NAN: continue; case NL80211_IFTYPE_STATION: if (!sdata->u.mgd.associated) @@ -980,6 +985,7 @@ ieee80211_vif_chanctx_reservation_complete(struct ieee80211_sub_if_data *sdata) case NL80211_IFTYPE_P2P_CLIENT: case NL80211_IFTYPE_P2P_GO: case NL80211_IFTYPE_P2P_DEVICE: + case NL80211_IFTYPE_NAN: case NUM_NL80211_IFTYPES: WARN_ON(1); break; diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 8ca62b6bb02a..f56e2f487d09 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -89,13 +89,19 @@ static ssize_t aqm_read(struct file *file, "R fq_flows_cnt %u\n" "R fq_backlog %u\n" "R fq_overlimit %u\n" + "R fq_overmemory %u\n" "R fq_collisions %u\n" + "R fq_memory_usage %u\n" + "RW fq_memory_limit %u\n" "RW fq_limit %u\n" "RW fq_quantum %u\n", fq->flows_cnt, fq->backlog, + fq->overmemory, fq->overlimit, fq->collisions, + fq->memory_usage, + fq->memory_limit, fq->limit, fq->quantum); @@ -128,6 +134,8 @@ static ssize_t aqm_write(struct file *file, if (sscanf(buf, "fq_limit %u", &local->fq.limit) == 1) return count; + else if (sscanf(buf, "fq_memory_limit %u", &local->fq.memory_limit) == 1) + return count; else if (sscanf(buf, "fq_quantum %u", &local->fq.quantum) == 1) return count; diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index 5d35c0f37bb7..bcec1240f41d 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -556,9 +556,15 @@ static ssize_t ieee80211_if_parse_tsf( ret = kstrtoull(buf, 10, &tsf); if (ret < 0) return ret; - if (tsf_is_delta) - tsf = drv_get_tsf(local, sdata) + tsf_is_delta * tsf; - if (local->ops->set_tsf) { + if (tsf_is_delta && local->ops->offset_tsf) { + drv_offset_tsf(local, sdata, tsf_is_delta * tsf); + wiphy_info(local->hw.wiphy, + "debugfs offset TSF by %018lld\n", + tsf_is_delta * tsf); + } else if (local->ops->set_tsf) { + if (tsf_is_delta) + tsf = drv_get_tsf(local, sdata) + + tsf_is_delta * tsf; drv_set_tsf(local, sdata, tsf); wiphy_info(local->hw.wiphy, "debugfs set TSF to %#018llx\n", tsf); diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c index c701b6438bd9..bb886e7db47f 100644 --- a/net/mac80211/driver-ops.c +++ b/net/mac80211/driver-ops.c @@ -215,6 +215,21 @@ void drv_set_tsf(struct ieee80211_local *local, trace_drv_return_void(local); } +void drv_offset_tsf(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + s64 offset) +{ + might_sleep(); + + if (!check_sdata_in_driver(sdata)) + return; + + trace_drv_offset_tsf(local, sdata, offset); + if (local->ops->offset_tsf) + local->ops->offset_tsf(&local->hw, &sdata->vif, offset); + trace_drv_return_void(local); +} + void drv_reset_tsf(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata) { diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index fe35a1c0dc86..09f77e4a8a79 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -162,6 +162,7 @@ static inline void drv_bss_info_changed(struct ieee80211_local *local, return; if (WARN_ON_ONCE(sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE || + sdata->vif.type == NL80211_IFTYPE_NAN || (sdata->vif.type == NL80211_IFTYPE_MONITOR && !sdata->vif.mu_mimo_owner))) return; @@ -568,6 +569,9 @@ u64 drv_get_tsf(struct ieee80211_local *local, void drv_set_tsf(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, u64 tsf); +void drv_offset_tsf(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + s64 offset); void drv_reset_tsf(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata); @@ -1165,4 +1169,83 @@ static inline void drv_wake_tx_queue(struct ieee80211_local *local, local->ops->wake_tx_queue(&local->hw, &txq->txq); } +static inline int drv_start_nan(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct cfg80211_nan_conf *conf) +{ + int ret; + + might_sleep(); + check_sdata_in_driver(sdata); + + trace_drv_start_nan(local, sdata, conf); + ret = local->ops->start_nan(&local->hw, &sdata->vif, conf); + trace_drv_return_int(local, ret); + return ret; +} + +static inline void drv_stop_nan(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata) +{ + might_sleep(); + check_sdata_in_driver(sdata); + + trace_drv_stop_nan(local, sdata); + local->ops->stop_nan(&local->hw, &sdata->vif); + trace_drv_return_void(local); +} + +static inline int drv_nan_change_conf(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct cfg80211_nan_conf *conf, + u32 changes) +{ + int ret; + + might_sleep(); + check_sdata_in_driver(sdata); + + if (!local->ops->nan_change_conf) + return -EOPNOTSUPP; + + trace_drv_nan_change_conf(local, sdata, conf, changes); + ret = local->ops->nan_change_conf(&local->hw, &sdata->vif, conf, + changes); + trace_drv_return_int(local, ret); + + return ret; +} + +static inline int drv_add_nan_func(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + const struct cfg80211_nan_func *nan_func) +{ + int ret; + + might_sleep(); + check_sdata_in_driver(sdata); + + if (!local->ops->add_nan_func) + return -EOPNOTSUPP; + + trace_drv_add_nan_func(local, sdata, nan_func); + ret = local->ops->add_nan_func(&local->hw, &sdata->vif, nan_func); + trace_drv_return_int(local, ret); + + return ret; +} + +static inline void drv_del_nan_func(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + u8 instance_id) +{ + might_sleep(); + check_sdata_in_driver(sdata); + + trace_drv_del_nan_func(local, sdata, instance_id); + if (local->ops->del_nan_func) + local->ops->del_nan_func(&local->hw, &sdata->vif, instance_id); + trace_drv_return_void(local); +} + #endif /* __MAC80211_DRIVER_OPS */ diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index e496dee5af08..34c2add2c455 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -86,6 +86,8 @@ struct ieee80211_local; #define IEEE80211_DEAUTH_FRAME_LEN (24 /* hdr */ + 2 /* reason */) +#define IEEE80211_MAX_NAN_INSTANCE_ID 255 + struct ieee80211_fragment_entry { struct sk_buff_head skb_list; unsigned long first_frag_time; @@ -813,12 +815,14 @@ enum txq_info_flags { * @def_flow: used as a fallback flow when a packet destined to @tin hashes to * a fq_flow which is already owned by a different tin * @def_cvars: codel vars for @def_flow + * @frags: used to keep fragments created after dequeue */ struct txq_info { struct fq_tin tin; struct fq_flow def_flow; struct codel_vars def_cvars; struct codel_stats cstats; + struct sk_buff_head frags; unsigned long flags; /* keep last! */ @@ -830,6 +834,20 @@ struct ieee80211_if_mntr { u8 mu_follow_addr[ETH_ALEN] __aligned(2); }; +/** + * struct ieee80211_if_nan - NAN state + * + * @conf: current NAN configuration + * @func_ids: a bitmap of available instance_id's + */ +struct ieee80211_if_nan { + struct cfg80211_nan_conf conf; + + /* protects function_inst_ids */ + spinlock_t func_lock; + struct idr function_inst_ids; +}; + struct ieee80211_sub_if_data { struct list_head list; @@ -929,6 +947,7 @@ struct ieee80211_sub_if_data { struct ieee80211_if_mesh mesh; struct ieee80211_if_ocb ocb; struct ieee80211_if_mntr mntr; + struct ieee80211_if_nan nan; } u; #ifdef CONFIG_MAC80211_DEBUGFS @@ -1481,6 +1500,13 @@ static inline struct txq_info *to_txq_info(struct ieee80211_txq *txq) return container_of(txq, struct txq_info, txq); } +static inline bool txq_has_queue(struct ieee80211_txq *txq) +{ + struct txq_info *txqi = to_txq_info(txq); + + return !(skb_queue_empty(&txqi->frags) && !txqi->tin.backlog_packets); +} + static inline int ieee80211_bssid_match(const u8 *raddr, const u8 *addr) { return ether_addr_equal(raddr, addr) || diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index b0abddc714ef..73e6a8fd2845 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -150,15 +150,6 @@ void ieee80211_recalc_idle(struct ieee80211_local *local) ieee80211_hw_config(local, change); } -static int ieee80211_change_mtu(struct net_device *dev, int new_mtu) -{ - if (new_mtu < 256 || new_mtu > IEEE80211_MAX_DATA_LEN) - return -EINVAL; - - dev->mtu = new_mtu; - return 0; -} - static int ieee80211_verify_mac(struct ieee80211_sub_if_data *sdata, u8 *addr, bool check_dup) { @@ -327,6 +318,9 @@ static int ieee80211_check_queues(struct ieee80211_sub_if_data *sdata, int n_queues = sdata->local->hw.queues; int i; + if (iftype == NL80211_IFTYPE_NAN) + return 0; + if (iftype != NL80211_IFTYPE_P2P_DEVICE) { for (i = 0; i < IEEE80211_NUM_ACS; i++) { if (WARN_ON_ONCE(sdata->vif.hw_queue[i] == @@ -545,6 +539,7 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) case NL80211_IFTYPE_ADHOC: case NL80211_IFTYPE_P2P_DEVICE: case NL80211_IFTYPE_OCB: + case NL80211_IFTYPE_NAN: /* no special treatment */ break; case NL80211_IFTYPE_UNSPECIFIED: @@ -646,7 +641,8 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) local->fif_probe_req++; } - if (sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE) + if (sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE && + sdata->vif.type != NL80211_IFTYPE_NAN) changed |= ieee80211_reset_erp_info(sdata); ieee80211_bss_info_change_notify(sdata, changed); @@ -660,6 +656,7 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) break; case NL80211_IFTYPE_WDS: case NL80211_IFTYPE_P2P_DEVICE: + case NL80211_IFTYPE_NAN: break; default: /* not reached */ @@ -792,6 +789,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, struct ps_data *ps; struct cfg80211_chan_def chandef; bool cancel_scan; + struct cfg80211_nan_func *func; clear_bit(SDATA_STATE_RUNNING, &sdata->state); @@ -944,6 +942,18 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, ieee80211_adjust_monitor_flags(sdata, -1); break; + case NL80211_IFTYPE_NAN: + /* clean all the functions */ + spin_lock_bh(&sdata->u.nan.func_lock); + + idr_for_each_entry(&sdata->u.nan.function_inst_ids, func, i) { + idr_remove(&sdata->u.nan.function_inst_ids, i); + cfg80211_free_nan_func(func); + } + idr_destroy(&sdata->u.nan.function_inst_ids); + + spin_unlock_bh(&sdata->u.nan.func_lock); + break; case NL80211_IFTYPE_P2P_DEVICE: /* relies on synchronize_rcu() below */ RCU_INIT_POINTER(local->p2p_sdata, NULL); @@ -1147,7 +1157,6 @@ static const struct net_device_ops ieee80211_dataif_ops = { .ndo_uninit = ieee80211_uninit, .ndo_start_xmit = ieee80211_subif_start_xmit, .ndo_set_rx_mode = ieee80211_set_multicast_list, - .ndo_change_mtu = ieee80211_change_mtu, .ndo_set_mac_address = ieee80211_change_mac, .ndo_select_queue = ieee80211_netdev_select_queue, .ndo_get_stats64 = ieee80211_get_stats64, @@ -1181,7 +1190,6 @@ static const struct net_device_ops ieee80211_monitorif_ops = { .ndo_uninit = ieee80211_uninit, .ndo_start_xmit = ieee80211_monitor_start_xmit, .ndo_set_rx_mode = ieee80211_set_multicast_list, - .ndo_change_mtu = ieee80211_change_mtu, .ndo_set_mac_address = ieee80211_change_mac, .ndo_select_queue = ieee80211_monitor_select_queue, .ndo_get_stats64 = ieee80211_get_stats64, @@ -1455,6 +1463,11 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, case NL80211_IFTYPE_WDS: sdata->vif.bss_conf.bssid = NULL; break; + case NL80211_IFTYPE_NAN: + idr_init(&sdata->u.nan.function_inst_ids); + spin_lock_init(&sdata->u.nan.func_lock); + sdata->vif.bss_conf.bssid = sdata->vif.addr; + break; case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_P2P_DEVICE: sdata->vif.bss_conf.bssid = sdata->vif.addr; @@ -1722,7 +1735,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, ASSERT_RTNL(); - if (type == NL80211_IFTYPE_P2P_DEVICE) { + if (type == NL80211_IFTYPE_P2P_DEVICE || type == NL80211_IFTYPE_NAN) { struct wireless_dev *wdev; sdata = kzalloc(sizeof(*sdata) + local->hw.vif_data_size, @@ -1860,6 +1873,10 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, netdev_set_default_ethtool_ops(ndev, &ieee80211_ethtool_ops); + /* MTU range: 256 - 2304 */ + ndev->min_mtu = 256; + ndev->max_mtu = IEEE80211_MAX_DATA_LEN; + ret = register_netdevice(ndev); if (ret) { ieee80211_if_free(ndev); diff --git a/net/mac80211/main.c b/net/mac80211/main.c index ac053a9df36d..1075ac24c8c5 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -821,6 +821,11 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) !local->ops->tdls_recv_channel_switch)) return -EOPNOTSUPP; + if (WARN_ON(local->hw.wiphy->interface_modes & + BIT(NL80211_IFTYPE_NAN) && + (!local->ops->start_nan || !local->ops->stop_nan))) + return -EINVAL; + #ifdef CONFIG_PM if (hw->wiphy->wowlan && (!local->ops->suspend || !local->ops->resume)) return -EINVAL; @@ -1058,6 +1063,9 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) local->dynamic_ps_forced_timeout = -1; + if (!local->hw.max_nan_de_entries) + local->hw.max_nan_de_entries = IEEE80211_MAX_NAN_INSTANCE_ID; + result = ieee80211_wep_init(local); if (result < 0) wiphy_debug(local->hw.wiphy, "Failed to initialize wep: %d\n", diff --git a/net/mac80211/mesh_sync.c b/net/mac80211/mesh_sync.c index 64bc22ad9496..faca22cd02b5 100644 --- a/net/mac80211/mesh_sync.c +++ b/net/mac80211/mesh_sync.c @@ -28,7 +28,7 @@ * could be, for instance, in case a neighbor is restarted and its TSF counter * reset. */ -#define TOFFSET_MAXIMUM_ADJUSTMENT 30000 /* 30 ms */ +#define TOFFSET_MAXIMUM_ADJUSTMENT 800 /* 0.8 ms */ struct sync_method { u8 method; @@ -70,9 +70,13 @@ void mesh_sync_adjust_tbtt(struct ieee80211_sub_if_data *sdata) } spin_unlock_bh(&ifmsh->sync_offset_lock); - tsf = drv_get_tsf(local, sdata); - if (tsf != -1ULL) - drv_set_tsf(local, sdata, tsf + tsfdelta); + if (local->ops->offset_tsf) { + drv_offset_tsf(local, sdata, tsfdelta); + } else { + tsf = drv_get_tsf(local, sdata); + if (tsf != -1ULL) + drv_set_tsf(local, sdata, tsf + tsfdelta); + } } static void mesh_sync_offset_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index 55a9c5b94ce1..c3f610bba3fe 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -128,7 +128,8 @@ void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local) if (!ieee80211_sdata_running(sdata)) continue; - if (sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE) + if (sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE || + sdata->vif.type == NL80211_IFTYPE_NAN) continue; if (sdata->vif.type != NL80211_IFTYPE_MONITOR) @@ -838,6 +839,7 @@ int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, case NL80211_IFTYPE_P2P_DEVICE: need_offchan = true; break; + case NL80211_IFTYPE_NAN: default: return -EOPNOTSUPP; } diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index f7cf342bab52..6175db385ba7 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1323,9 +1323,7 @@ static void sta_ps_start(struct sta_info *sta) return; for (tid = 0; tid < ARRAY_SIZE(sta->sta.txq); tid++) { - struct txq_info *txqi = to_txq_info(sta->sta.txq[tid]); - - if (txqi->tin.backlog_packets) + if (txq_has_queue(sta->sta.txq[tid])) set_bit(tid, &sta->txq_buffered_tids); else clear_bit(tid, &sta->txq_buffered_tids); @@ -3586,6 +3584,9 @@ static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx) ieee80211_is_probe_req(hdr->frame_control) || ieee80211_is_probe_resp(hdr->frame_control) || ieee80211_is_beacon(hdr->frame_control); + case NL80211_IFTYPE_NAN: + /* Currently no frames on NAN interface are allowed */ + return false; default: break; } diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 011880d633b4..78e9ecbc96e6 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -1202,12 +1202,10 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta) if (sta->sta.txq[0]) { for (i = 0; i < ARRAY_SIZE(sta->sta.txq); i++) { - struct txq_info *txqi = to_txq_info(sta->sta.txq[i]); - - if (!txqi->tin.backlog_packets) + if (!txq_has_queue(sta->sta.txq[i])) continue; - drv_wake_tx_queue(local, txqi); + drv_wake_tx_queue(local, to_txq_info(sta->sta.txq[i])); } } @@ -1638,10 +1636,8 @@ ieee80211_sta_ps_deliver_response(struct sta_info *sta, return; for (tid = 0; tid < ARRAY_SIZE(sta->sta.txq); tid++) { - struct txq_info *txqi = to_txq_info(sta->sta.txq[tid]); - if (!(driver_release_tids & BIT(tid)) || - txqi->tin.backlog_packets) + txq_has_queue(sta->sta.txq[tid])) continue; sta_info_recalc_tim(sta); diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index 77e4c53baefb..92a47afaa989 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -984,6 +984,32 @@ TRACE_EVENT(drv_set_tsf, ) ); +TRACE_EVENT(drv_offset_tsf, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + s64 offset), + + TP_ARGS(local, sdata, offset), + + TP_STRUCT__entry( + LOCAL_ENTRY + VIF_ENTRY + __field(s64, tsf_offset) + ), + + TP_fast_assign( + LOCAL_ASSIGN; + VIF_ASSIGN; + __entry->tsf_offset = offset; + ), + + TP_printk( + LOCAL_PR_FMT VIF_PR_FMT " tsf offset:%lld", + LOCAL_PR_ARG, VIF_PR_ARG, + (unsigned long long)__entry->tsf_offset + ) +); + DEFINE_EVENT(local_sdata_evt, drv_reset_tsf, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata), @@ -1700,6 +1726,139 @@ TRACE_EVENT(drv_get_expected_throughput, ) ); +TRACE_EVENT(drv_start_nan, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct cfg80211_nan_conf *conf), + + TP_ARGS(local, sdata, conf), + TP_STRUCT__entry( + LOCAL_ENTRY + VIF_ENTRY + __field(u8, master_pref) + __field(u8, dual) + ), + + TP_fast_assign( + LOCAL_ASSIGN; + VIF_ASSIGN; + __entry->master_pref = conf->master_pref; + __entry->dual = conf->dual; + ), + + TP_printk( + LOCAL_PR_FMT VIF_PR_FMT + ", master preference: %u, dual: %d", + LOCAL_PR_ARG, VIF_PR_ARG, __entry->master_pref, + __entry->dual + ) +); + +TRACE_EVENT(drv_stop_nan, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata), + + TP_ARGS(local, sdata), + + TP_STRUCT__entry( + LOCAL_ENTRY + VIF_ENTRY + ), + + TP_fast_assign( + LOCAL_ASSIGN; + VIF_ASSIGN; + ), + + TP_printk( + LOCAL_PR_FMT VIF_PR_FMT, + LOCAL_PR_ARG, VIF_PR_ARG + ) +); + +TRACE_EVENT(drv_nan_change_conf, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct cfg80211_nan_conf *conf, + u32 changes), + + TP_ARGS(local, sdata, conf, changes), + TP_STRUCT__entry( + LOCAL_ENTRY + VIF_ENTRY + __field(u8, master_pref) + __field(u8, dual) + __field(u32, changes) + ), + + TP_fast_assign( + LOCAL_ASSIGN; + VIF_ASSIGN; + __entry->master_pref = conf->master_pref; + __entry->dual = conf->dual; + __entry->changes = changes; + ), + + TP_printk( + LOCAL_PR_FMT VIF_PR_FMT + ", master preference: %u, dual: %d, changes: 0x%x", + LOCAL_PR_ARG, VIF_PR_ARG, __entry->master_pref, + __entry->dual, __entry->changes + ) +); + +TRACE_EVENT(drv_add_nan_func, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + const struct cfg80211_nan_func *func), + + TP_ARGS(local, sdata, func), + TP_STRUCT__entry( + LOCAL_ENTRY + VIF_ENTRY + __field(u8, type) + __field(u8, inst_id) + ), + + TP_fast_assign( + LOCAL_ASSIGN; + VIF_ASSIGN; + __entry->type = func->type; + __entry->inst_id = func->instance_id; + ), + + TP_printk( + LOCAL_PR_FMT VIF_PR_FMT + ", type: %u, inst_id: %u", + LOCAL_PR_ARG, VIF_PR_ARG, __entry->type, __entry->inst_id + ) +); + +TRACE_EVENT(drv_del_nan_func, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + u8 instance_id), + + TP_ARGS(local, sdata, instance_id), + TP_STRUCT__entry( + LOCAL_ENTRY + VIF_ENTRY + __field(u8, instance_id) + ), + + TP_fast_assign( + LOCAL_ASSIGN; + VIF_ASSIGN; + __entry->instance_id = instance_id; + ), + + TP_printk( + LOCAL_PR_FMT VIF_PR_FMT + ", instance_id: %u", + LOCAL_PR_ARG, VIF_PR_ARG, __entry->instance_id + ) +); + /* * Tracing for API calls that drivers call. */ diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 1ff08be90a98..1c56abc49627 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -796,36 +796,6 @@ static __le16 ieee80211_tx_next_seq(struct sta_info *sta, int tid) return ret; } -static struct txq_info *ieee80211_get_txq(struct ieee80211_local *local, - struct ieee80211_vif *vif, - struct ieee80211_sta *pubsta, - struct sk_buff *skb) -{ - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - struct ieee80211_txq *txq = NULL; - - if ((info->flags & IEEE80211_TX_CTL_SEND_AFTER_DTIM) || - (info->control.flags & IEEE80211_TX_CTRL_PS_RESPONSE)) - return NULL; - - if (!ieee80211_is_data(hdr->frame_control)) - return NULL; - - if (pubsta) { - u8 tid = skb->priority & IEEE80211_QOS_CTL_TID_MASK; - - txq = pubsta->txq[tid]; - } else if (vif) { - txq = vif->txq; - } - - if (!txq) - return NULL; - - return to_txq_info(txq); -} - static ieee80211_tx_result debug_noinline ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx) { @@ -883,9 +853,7 @@ ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx) tid = *qc & IEEE80211_QOS_CTL_TID_MASK; tx->sta->tx_stats.msdu[tid]++; - if (!ieee80211_get_txq(tx->local, info->control.vif, &tx->sta->sta, - tx->skb)) - hdr->seq_ctrl = ieee80211_tx_next_seq(tx->sta, tid); + hdr->seq_ctrl = ieee80211_tx_next_seq(tx->sta, tid); return TX_CONTINUE; } @@ -1274,6 +1242,36 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata, return TX_CONTINUE; } +static struct txq_info *ieee80211_get_txq(struct ieee80211_local *local, + struct ieee80211_vif *vif, + struct ieee80211_sta *pubsta, + struct sk_buff *skb) +{ + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct ieee80211_txq *txq = NULL; + + if ((info->flags & IEEE80211_TX_CTL_SEND_AFTER_DTIM) || + (info->control.flags & IEEE80211_TX_CTRL_PS_RESPONSE)) + return NULL; + + if (!ieee80211_is_data(hdr->frame_control)) + return NULL; + + if (pubsta) { + u8 tid = skb->priority & IEEE80211_QOS_CTL_TID_MASK; + + txq = pubsta->txq[tid]; + } else if (vif) { + txq = vif->txq; + } + + if (!txq) + return NULL; + + return to_txq_info(txq); +} + static void ieee80211_set_skb_enqueue_time(struct sk_buff *skb) { IEEE80211_SKB_CB(skb)->control.enqueue_time = codel_get_time(); @@ -1405,6 +1403,7 @@ void ieee80211_txq_init(struct ieee80211_sub_if_data *sdata, fq_flow_init(&txqi->def_flow); codel_vars_init(&txqi->def_cvars); codel_stats_init(&txqi->cstats); + __skb_queue_head_init(&txqi->frags); txqi->txq.vif = &sdata->vif; @@ -1427,6 +1426,7 @@ void ieee80211_txq_purge(struct ieee80211_local *local, struct fq_tin *tin = &txqi->tin; fq_tin_reset(fq, tin, fq_skb_free_func); + ieee80211_purge_tx_queue(&local->hw, &txqi->frags); } int ieee80211_txq_setup_flows(struct ieee80211_local *local) @@ -1434,6 +1434,8 @@ int ieee80211_txq_setup_flows(struct ieee80211_local *local) struct fq *fq = &local->fq; int ret; int i; + bool supp_vht = false; + enum nl80211_band band; if (!local->ops->wake_tx_queue) return 0; @@ -1442,6 +1444,23 @@ int ieee80211_txq_setup_flows(struct ieee80211_local *local) if (ret) return ret; + /* + * If the hardware doesn't support VHT, it is safe to limit the maximum + * queue size. 4 Mbytes is 64 max-size aggregates in 802.11n. + */ + for (band = 0; band < NUM_NL80211_BANDS; band++) { + struct ieee80211_supported_band *sband; + + sband = local->hw.wiphy->bands[band]; + if (!sband) + continue; + + supp_vht = supp_vht || sband->vht_cap.vht_supported; + } + + if (!supp_vht) + fq->memory_limit = 4 << 20; /* 4 Mbytes */ + codel_params_init(&local->cparams); local->cparams.interval = MS2TIME(100); local->cparams.target = MS2TIME(20); @@ -1477,54 +1496,46 @@ void ieee80211_txq_teardown_flows(struct ieee80211_local *local) spin_unlock_bh(&fq->lock); } -struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw, - struct ieee80211_txq *txq) +static bool ieee80211_queue_skb(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct sta_info *sta, + struct sk_buff *skb) { - struct ieee80211_local *local = hw_to_local(hw); - struct txq_info *txqi = container_of(txq, struct txq_info, txq); - struct ieee80211_hdr *hdr; - struct sk_buff *skb = NULL; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); struct fq *fq = &local->fq; - struct fq_tin *tin = &txqi->tin; + struct ieee80211_vif *vif; + struct txq_info *txqi; + struct ieee80211_sta *pubsta; - spin_lock_bh(&fq->lock); + if (!local->ops->wake_tx_queue || + sdata->vif.type == NL80211_IFTYPE_MONITOR) + return false; - if (test_bit(IEEE80211_TXQ_STOP, &txqi->flags)) - goto out; + if (sta && sta->uploaded) + pubsta = &sta->sta; + else + pubsta = NULL; - skb = fq_tin_dequeue(fq, tin, fq_tin_dequeue_func); - if (!skb) - goto out; + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) + sdata = container_of(sdata->bss, + struct ieee80211_sub_if_data, u.ap); - ieee80211_set_skb_vif(skb, txqi); + vif = &sdata->vif; + txqi = ieee80211_get_txq(local, vif, pubsta, skb); - hdr = (struct ieee80211_hdr *)skb->data; - if (txq->sta && ieee80211_is_data_qos(hdr->frame_control)) { - struct sta_info *sta = container_of(txq->sta, struct sta_info, - sta); - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + if (!txqi) + return false; - hdr->seq_ctrl = ieee80211_tx_next_seq(sta, txq->tid); - if (test_bit(IEEE80211_TXQ_AMPDU, &txqi->flags)) - info->flags |= IEEE80211_TX_CTL_AMPDU; - else - info->flags &= ~IEEE80211_TX_CTL_AMPDU; - } + info->control.vif = vif; -out: + spin_lock_bh(&fq->lock); + ieee80211_txq_enqueue(local, txqi, skb); spin_unlock_bh(&fq->lock); - if (skb && skb_has_frag_list(skb) && - !ieee80211_hw_check(&local->hw, TX_FRAG_LIST)) { - if (skb_linearize(skb)) { - ieee80211_free_txskb(&local->hw, skb); - return NULL; - } - } + drv_wake_tx_queue(local, txqi); - return skb; + return true; } -EXPORT_SYMBOL(ieee80211_tx_dequeue); static bool ieee80211_tx_frags(struct ieee80211_local *local, struct ieee80211_vif *vif, @@ -1533,9 +1544,7 @@ static bool ieee80211_tx_frags(struct ieee80211_local *local, bool txpending) { struct ieee80211_tx_control control = {}; - struct fq *fq = &local->fq; struct sk_buff *skb, *tmp; - struct txq_info *txqi; unsigned long flags; skb_queue_walk_safe(skbs, skb, tmp) { @@ -1550,21 +1559,6 @@ static bool ieee80211_tx_frags(struct ieee80211_local *local, } #endif - txqi = ieee80211_get_txq(local, vif, sta, skb); - if (txqi) { - info->control.vif = vif; - - __skb_unlink(skb, skbs); - - spin_lock_bh(&fq->lock); - ieee80211_txq_enqueue(local, txqi, skb); - spin_unlock_bh(&fq->lock); - - drv_wake_tx_queue(local, txqi); - - continue; - } - spin_lock_irqsave(&local->queue_stop_reason_lock, flags); if (local->queue_stop_reasons[q] || (!txpending && !skb_queue_empty(&local->pending[q]))) { @@ -1685,10 +1679,13 @@ static bool __ieee80211_tx(struct ieee80211_local *local, /* * Invoke TX handlers, return 0 on success and non-zero if the * frame was dropped or queued. + * + * The handlers are split into an early and late part. The latter is everything + * that can be sensitive to reordering, and will be deferred to after packets + * are dequeued from the intermediate queues (when they are enabled). */ -static int invoke_tx_handlers(struct ieee80211_tx_data *tx) +static int invoke_tx_handlers_early(struct ieee80211_tx_data *tx) { - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); ieee80211_tx_result res = TX_DROP; #define CALL_TXH(txh) \ @@ -1706,6 +1703,31 @@ static int invoke_tx_handlers(struct ieee80211_tx_data *tx) if (!ieee80211_hw_check(&tx->local->hw, HAS_RATE_CONTROL)) CALL_TXH(ieee80211_tx_h_rate_ctrl); + txh_done: + if (unlikely(res == TX_DROP)) { + I802_DEBUG_INC(tx->local->tx_handlers_drop); + if (tx->skb) + ieee80211_free_txskb(&tx->local->hw, tx->skb); + else + ieee80211_purge_tx_queue(&tx->local->hw, &tx->skbs); + return -1; + } else if (unlikely(res == TX_QUEUED)) { + I802_DEBUG_INC(tx->local->tx_handlers_queued); + return -1; + } + + return 0; +} + +/* + * Late handlers can be called while the sta lock is held. Handlers that can + * cause packets to be generated will cause deadlock! + */ +static int invoke_tx_handlers_late(struct ieee80211_tx_data *tx) +{ + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); + ieee80211_tx_result res = TX_CONTINUE; + if (unlikely(info->flags & IEEE80211_TX_INTFL_RETRANSMISSION)) { __skb_queue_tail(&tx->skbs, tx->skb); tx->skb = NULL; @@ -1738,6 +1760,15 @@ static int invoke_tx_handlers(struct ieee80211_tx_data *tx) return 0; } +static int invoke_tx_handlers(struct ieee80211_tx_data *tx) +{ + int r = invoke_tx_handlers_early(tx); + + if (r) + return r; + return invoke_tx_handlers_late(tx); +} + bool ieee80211_tx_prepare_skb(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct sk_buff *skb, int band, struct ieee80211_sta **sta) @@ -1812,7 +1843,13 @@ static bool ieee80211_tx(struct ieee80211_sub_if_data *sdata, info->hw_queue = sdata->vif.hw_queue[skb_get_queue_mapping(skb)]; - if (!invoke_tx_handlers(&tx)) + if (invoke_tx_handlers_early(&tx)) + return false; + + if (ieee80211_queue_skb(local, sdata, tx.sta, tx.skb)) + return true; + + if (!invoke_tx_handlers_late(&tx)) result = __ieee80211_tx(local, &tx.skbs, led_len, tx.sta, txpending); @@ -3156,8 +3193,71 @@ out: return ret; } +/* + * Can be called while the sta lock is held. Anything that can cause packets to + * be generated will cause deadlock! + */ +static void ieee80211_xmit_fast_finish(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta, u8 pn_offs, + struct ieee80211_key *key, + struct sk_buff *skb) +{ + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct ieee80211_hdr *hdr = (void *)skb->data; + u8 tid = IEEE80211_NUM_TIDS; + + if (key) + info->control.hw_key = &key->conf; + + ieee80211_tx_stats(skb->dev, skb->len); + + if (hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) { + tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK; + *ieee80211_get_qos_ctl(hdr) = tid; + hdr->seq_ctrl = ieee80211_tx_next_seq(sta, tid); + } else { + info->flags |= IEEE80211_TX_CTL_ASSIGN_SEQ; + hdr->seq_ctrl = cpu_to_le16(sdata->sequence_number); + sdata->sequence_number += 0x10; + } + + if (skb_shinfo(skb)->gso_size) + sta->tx_stats.msdu[tid] += + DIV_ROUND_UP(skb->len, skb_shinfo(skb)->gso_size); + else + sta->tx_stats.msdu[tid]++; + + info->hw_queue = sdata->vif.hw_queue[skb_get_queue_mapping(skb)]; + + /* statistics normally done by ieee80211_tx_h_stats (but that + * has to consider fragmentation, so is more complex) + */ + sta->tx_stats.bytes[skb_get_queue_mapping(skb)] += skb->len; + sta->tx_stats.packets[skb_get_queue_mapping(skb)]++; + + if (pn_offs) { + u64 pn; + u8 *crypto_hdr = skb->data + pn_offs; + + switch (key->conf.cipher) { + case WLAN_CIPHER_SUITE_CCMP: + case WLAN_CIPHER_SUITE_CCMP_256: + case WLAN_CIPHER_SUITE_GCMP: + case WLAN_CIPHER_SUITE_GCMP_256: + pn = atomic64_inc_return(&key->conf.tx_pn); + crypto_hdr[0] = pn; + crypto_hdr[1] = pn >> 8; + crypto_hdr[4] = pn >> 16; + crypto_hdr[5] = pn >> 24; + crypto_hdr[6] = pn >> 32; + crypto_hdr[7] = pn >> 40; + break; + } + } +} + static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, - struct net_device *dev, struct sta_info *sta, + struct sta_info *sta, struct ieee80211_fast_tx *fast_tx, struct sk_buff *skb) { @@ -3208,8 +3308,6 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, return true; } - ieee80211_tx_stats(dev, skb->len + extra_head); - if ((hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) && ieee80211_amsdu_aggregate(sdata, sta, fast_tx, skb)) return true; @@ -3238,24 +3336,7 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, info->flags = IEEE80211_TX_CTL_FIRST_FRAGMENT | IEEE80211_TX_CTL_DONTFRAG | (tid_tx ? IEEE80211_TX_CTL_AMPDU : 0); - - if (hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) { - *ieee80211_get_qos_ctl(hdr) = tid; - if (!ieee80211_get_txq(local, &sdata->vif, &sta->sta, skb)) - hdr->seq_ctrl = ieee80211_tx_next_seq(sta, tid); - } else { - info->flags |= IEEE80211_TX_CTL_ASSIGN_SEQ; - hdr->seq_ctrl = cpu_to_le16(sdata->sequence_number); - sdata->sequence_number += 0x10; - } - - if (skb_shinfo(skb)->gso_size) - sta->tx_stats.msdu[tid] += - DIV_ROUND_UP(skb->len, skb_shinfo(skb)->gso_size); - else - sta->tx_stats.msdu[tid]++; - - info->hw_queue = sdata->vif.hw_queue[skb_get_queue_mapping(skb)]; + info->control.flags = IEEE80211_TX_CTRL_FAST_XMIT; __skb_queue_head_init(&tx.skbs); @@ -3265,9 +3346,6 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, tx.sta = sta; tx.key = fast_tx->key; - if (fast_tx->key) - info->control.hw_key = &fast_tx->key->conf; - if (!ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL)) { tx.skb = skb; r = ieee80211_tx_h_rate_ctrl(&tx); @@ -3281,31 +3359,11 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, } } - /* statistics normally done by ieee80211_tx_h_stats (but that - * has to consider fragmentation, so is more complex) - */ - sta->tx_stats.bytes[skb_get_queue_mapping(skb)] += skb->len; - sta->tx_stats.packets[skb_get_queue_mapping(skb)]++; - - if (fast_tx->pn_offs) { - u64 pn; - u8 *crypto_hdr = skb->data + fast_tx->pn_offs; + if (ieee80211_queue_skb(local, sdata, sta, skb)) + return true; - switch (fast_tx->key->conf.cipher) { - case WLAN_CIPHER_SUITE_CCMP: - case WLAN_CIPHER_SUITE_CCMP_256: - case WLAN_CIPHER_SUITE_GCMP: - case WLAN_CIPHER_SUITE_GCMP_256: - pn = atomic64_inc_return(&fast_tx->key->conf.tx_pn); - crypto_hdr[0] = pn; - crypto_hdr[1] = pn >> 8; - crypto_hdr[4] = pn >> 16; - crypto_hdr[5] = pn >> 24; - crypto_hdr[6] = pn >> 32; - crypto_hdr[7] = pn >> 40; - break; - } - } + ieee80211_xmit_fast_finish(sdata, sta, fast_tx->pn_offs, + fast_tx->key, skb); if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) sdata = container_of(sdata->bss, @@ -3316,6 +3374,94 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, return true; } +struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw, + struct ieee80211_txq *txq) +{ + struct ieee80211_local *local = hw_to_local(hw); + struct txq_info *txqi = container_of(txq, struct txq_info, txq); + struct ieee80211_hdr *hdr; + struct sk_buff *skb = NULL; + struct fq *fq = &local->fq; + struct fq_tin *tin = &txqi->tin; + struct ieee80211_tx_info *info; + struct ieee80211_tx_data tx; + ieee80211_tx_result r; + + spin_lock_bh(&fq->lock); + + if (test_bit(IEEE80211_TXQ_STOP, &txqi->flags)) + goto out; + + /* Make sure fragments stay together. */ + skb = __skb_dequeue(&txqi->frags); + if (skb) + goto out; + +begin: + skb = fq_tin_dequeue(fq, tin, fq_tin_dequeue_func); + if (!skb) + goto out; + + ieee80211_set_skb_vif(skb, txqi); + + hdr = (struct ieee80211_hdr *)skb->data; + info = IEEE80211_SKB_CB(skb); + + memset(&tx, 0, sizeof(tx)); + __skb_queue_head_init(&tx.skbs); + tx.local = local; + tx.skb = skb; + tx.sdata = vif_to_sdata(info->control.vif); + + if (txq->sta) + tx.sta = container_of(txq->sta, struct sta_info, sta); + + /* + * The key can be removed while the packet was queued, so need to call + * this here to get the current key. + */ + r = ieee80211_tx_h_select_key(&tx); + if (r != TX_CONTINUE) { + ieee80211_free_txskb(&local->hw, skb); + goto begin; + } + + if (info->control.flags & IEEE80211_TX_CTRL_FAST_XMIT) { + struct sta_info *sta = container_of(txq->sta, struct sta_info, + sta); + u8 pn_offs = 0; + + if (tx.key && + (tx.key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)) + pn_offs = ieee80211_hdrlen(hdr->frame_control); + + ieee80211_xmit_fast_finish(sta->sdata, sta, pn_offs, + tx.key, skb); + } else { + if (invoke_tx_handlers_late(&tx)) + goto begin; + + skb = __skb_dequeue(&tx.skbs); + + if (!skb_queue_empty(&tx.skbs)) + skb_queue_splice_tail(&tx.skbs, &txqi->frags); + } + + if (skb && skb_has_frag_list(skb) && + !ieee80211_hw_check(&local->hw, TX_FRAG_LIST)) { + if (skb_linearize(skb)) { + ieee80211_free_txskb(&local->hw, skb); + goto begin; + } + } + +out: + spin_unlock_bh(&fq->lock); + + return skb; +} +EXPORT_SYMBOL(ieee80211_tx_dequeue); + void __ieee80211_subif_start_xmit(struct sk_buff *skb, struct net_device *dev, u32 info_flags) @@ -3340,7 +3486,7 @@ void __ieee80211_subif_start_xmit(struct sk_buff *skb, fast_tx = rcu_dereference(sta->fast_tx); if (fast_tx && - ieee80211_xmit_fast(sdata, dev, sta, fast_tx, skb)) + ieee80211_xmit_fast(sdata, sta, fast_tx, skb)) goto out; } diff --git a/net/mac80211/util.c b/net/mac80211/util.c index b6865d884487..545c79a42a77 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1209,7 +1209,8 @@ void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata, } if (sdata->vif.type != NL80211_IFTYPE_MONITOR && - sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE) { + sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE && + sdata->vif.type != NL80211_IFTYPE_NAN) { sdata->vif.bss_conf.qos = enable_qos; if (bss_notify) ieee80211_bss_info_change_notify(sdata, @@ -1748,6 +1749,46 @@ static void ieee80211_reconfig_stations(struct ieee80211_sub_if_data *sdata) mutex_unlock(&local->sta_mtx); } +static int ieee80211_reconfig_nan(struct ieee80211_sub_if_data *sdata) +{ + struct cfg80211_nan_func *func, **funcs; + int res, id, i = 0; + + res = drv_start_nan(sdata->local, sdata, + &sdata->u.nan.conf); + if (WARN_ON(res)) + return res; + + funcs = kzalloc((sdata->local->hw.max_nan_de_entries + 1) * + sizeof(*funcs), GFP_KERNEL); + if (!funcs) + return -ENOMEM; + + /* Add all the functions: + * This is a little bit ugly. We need to call a potentially sleeping + * callback for each NAN function, so we can't hold the spinlock. + */ + spin_lock_bh(&sdata->u.nan.func_lock); + + idr_for_each_entry(&sdata->u.nan.function_inst_ids, func, id) + funcs[i++] = func; + + spin_unlock_bh(&sdata->u.nan.func_lock); + + for (i = 0; funcs[i]; i++) { + res = drv_add_nan_func(sdata->local, sdata, funcs[i]); + if (WARN_ON(res)) + ieee80211_nan_func_terminated(&sdata->vif, + funcs[i]->instance_id, + NL80211_NAN_FUNC_TERM_REASON_ERROR, + GFP_KERNEL); + } + + kfree(funcs); + + return 0; +} + int ieee80211_reconfig(struct ieee80211_local *local) { struct ieee80211_hw *hw = &local->hw; @@ -1971,6 +2012,13 @@ int ieee80211_reconfig(struct ieee80211_local *local) ieee80211_bss_info_change_notify(sdata, changed); } break; + case NL80211_IFTYPE_NAN: + res = ieee80211_reconfig_nan(sdata); + if (res < 0) { + ieee80211_handle_reconfig_failure(local); + return res; + } + break; case NL80211_IFTYPE_WDS: case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_MONITOR: @@ -3393,11 +3441,18 @@ void ieee80211_txq_get_depth(struct ieee80211_txq *txq, unsigned long *byte_cnt) { struct txq_info *txqi = to_txq_info(txq); + u32 frag_cnt = 0, frag_bytes = 0; + struct sk_buff *skb; + + skb_queue_walk(&txqi->frags, skb) { + frag_cnt++; + frag_bytes += skb->len; + } if (frame_cnt) - *frame_cnt = txqi->tin.backlog_packets; + *frame_cnt = txqi->tin.backlog_packets + frag_cnt; if (byte_cnt) - *byte_cnt = txqi->tin.backlog_bytes; + *byte_cnt = txqi->tin.backlog_bytes + frag_bytes; } EXPORT_SYMBOL(ieee80211_txq_get_depth); diff --git a/net/mpls/internal.h b/net/mpls/internal.h index 732a5c17e986..bdfef6c3271a 100644 --- a/net/mpls/internal.h +++ b/net/mpls/internal.h @@ -1,9 +1,6 @@ #ifndef MPLS_INTERNAL_H #define MPLS_INTERNAL_H - -struct mpls_shim_hdr { - __be32 label_stack_entry; -}; +#include <net/mpls.h> struct mpls_entry_decoded { u32 label; @@ -93,11 +90,6 @@ struct mpls_route { /* next hop label forwarding entry */ #define endfor_nexthops(rt) } -static inline struct mpls_shim_hdr *mpls_hdr(const struct sk_buff *skb) -{ - return (struct mpls_shim_hdr *)skb_network_header(skb); -} - static inline struct mpls_shim_hdr mpls_entry_encode(u32 label, unsigned ttl, unsigned tc, bool bos) { struct mpls_shim_hdr result; diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c index cf52cf30ac4b..2f7ccd934416 100644 --- a/net/mpls/mpls_iptunnel.c +++ b/net/mpls/mpls_iptunnel.c @@ -133,7 +133,6 @@ static int mpls_build_state(struct net_device *dev, struct nlattr *nla, struct mpls_iptunnel_encap *tun_encap_info; struct nlattr *tb[MPLS_IPTUNNEL_MAX + 1]; struct lwtunnel_state *newts; - int tun_encap_info_len; int ret; ret = nla_parse_nested(tb, MPLS_IPTUNNEL_MAX, nla, @@ -144,13 +143,11 @@ static int mpls_build_state(struct net_device *dev, struct nlattr *nla, if (!tb[MPLS_IPTUNNEL_DST]) return -EINVAL; - tun_encap_info_len = sizeof(*tun_encap_info); - newts = lwtunnel_state_alloc(tun_encap_info_len); + newts = lwtunnel_state_alloc(sizeof(*tun_encap_info)); if (!newts) return -ENOMEM; - newts->len = tun_encap_info_len; tun_encap_info = mpls_lwtunnel_encap(newts); ret = nla_get_labels(tb[MPLS_IPTUNNEL_DST], MAX_NEW_LABELS, &tun_encap_info->labels, tun_encap_info->label); diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h index 33738c060547..13290a70fa71 100644 --- a/net/ncsi/internal.h +++ b/net/ncsi/internal.h @@ -170,6 +170,7 @@ struct ncsi_package; #define NCSI_PACKAGE_SHIFT 5 #define NCSI_PACKAGE_INDEX(c) (((c) >> NCSI_PACKAGE_SHIFT) & 0x7) +#define NCSI_RESERVED_CHANNEL 0x1f #define NCSI_CHANNEL_INDEX(c) ((c) & ((1 << NCSI_PACKAGE_SHIFT) - 1)) #define NCSI_TO_CHANNEL(p, c) (((p) << NCSI_PACKAGE_SHIFT) | (c)) @@ -186,9 +187,15 @@ struct ncsi_channel { struct ncsi_channel_mode modes[NCSI_MODE_MAX]; struct ncsi_channel_filter *filters[NCSI_FILTER_MAX]; struct ncsi_channel_stats stats; - struct timer_list timer; /* Link monitor timer */ - bool enabled; /* Timer is enabled */ - unsigned int timeout; /* Times of timeout */ + struct { + struct timer_list timer; + bool enabled; + unsigned int state; +#define NCSI_CHANNEL_MONITOR_START 0 +#define NCSI_CHANNEL_MONITOR_RETRY 1 +#define NCSI_CHANNEL_MONITOR_WAIT 2 +#define NCSI_CHANNEL_MONITOR_WAIT_MAX 5 + } monitor; struct list_head node; struct list_head link; }; @@ -206,7 +213,8 @@ struct ncsi_package { struct ncsi_request { unsigned char id; /* Request ID - 0 to 255 */ bool used; /* Request that has been assigned */ - bool driven; /* Drive state machine */ + unsigned int flags; /* NCSI request property */ +#define NCSI_REQ_FLAG_EVENT_DRIVEN 1 struct ncsi_dev_priv *ndp; /* Associated NCSI device */ struct sk_buff *cmd; /* Associated NCSI command packet */ struct sk_buff *rsp; /* Associated NCSI response packet */ @@ -258,6 +266,7 @@ struct ncsi_dev_priv { struct list_head packages; /* List of packages */ struct ncsi_request requests[256]; /* Request table */ unsigned int request_id; /* Last used request ID */ +#define NCSI_REQ_START_IDX 1 unsigned int pending_req_num; /* Number of pending requests */ struct ncsi_package *active_package; /* Currently handled package */ struct ncsi_channel *active_channel; /* Currently handled channel */ @@ -274,7 +283,7 @@ struct ncsi_cmd_arg { unsigned char package; /* Destination package ID */ unsigned char channel; /* Detination channel ID or 0x1f */ unsigned short payload; /* Command packet payload length */ - bool driven; /* Drive the state machine? */ + unsigned int req_flags; /* NCSI request properties */ union { unsigned char bytes[16]; /* Command packet specific data */ unsigned short words[8]; @@ -313,7 +322,8 @@ void ncsi_find_package_and_channel(struct ncsi_dev_priv *ndp, unsigned char id, struct ncsi_package **np, struct ncsi_channel **nc); -struct ncsi_request *ncsi_alloc_request(struct ncsi_dev_priv *ndp, bool driven); +struct ncsi_request *ncsi_alloc_request(struct ncsi_dev_priv *ndp, + unsigned int req_flags); void ncsi_free_request(struct ncsi_request *nr); struct ncsi_dev *ncsi_find_dev(struct net_device *dev); int ncsi_process_next_channel(struct ncsi_dev_priv *ndp); diff --git a/net/ncsi/ncsi-aen.c b/net/ncsi/ncsi-aen.c index d463468442ae..b41a6617d498 100644 --- a/net/ncsi/ncsi-aen.c +++ b/net/ncsi/ncsi-aen.c @@ -53,7 +53,9 @@ static int ncsi_aen_handler_lsc(struct ncsi_dev_priv *ndp, struct ncsi_aen_lsc_pkt *lsc; struct ncsi_channel *nc; struct ncsi_channel_mode *ncm; - unsigned long old_data; + bool chained; + int state; + unsigned long old_data, data; unsigned long flags; /* Find the NCSI channel */ @@ -62,20 +64,27 @@ static int ncsi_aen_handler_lsc(struct ncsi_dev_priv *ndp, return -ENODEV; /* Update the link status */ - ncm = &nc->modes[NCSI_MODE_LINK]; lsc = (struct ncsi_aen_lsc_pkt *)h; + + spin_lock_irqsave(&nc->lock, flags); + ncm = &nc->modes[NCSI_MODE_LINK]; old_data = ncm->data[2]; - ncm->data[2] = ntohl(lsc->status); + data = ntohl(lsc->status); + ncm->data[2] = data; ncm->data[4] = ntohl(lsc->oem_status); - if (!((old_data ^ ncm->data[2]) & 0x1) || - !list_empty(&nc->link)) + + chained = !list_empty(&nc->link); + state = nc->state; + spin_unlock_irqrestore(&nc->lock, flags); + + if (!((old_data ^ data) & 0x1) || chained) return 0; - if (!(nc->state == NCSI_CHANNEL_INACTIVE && (ncm->data[2] & 0x1)) && - !(nc->state == NCSI_CHANNEL_ACTIVE && !(ncm->data[2] & 0x1))) + if (!(state == NCSI_CHANNEL_INACTIVE && (data & 0x1)) && + !(state == NCSI_CHANNEL_ACTIVE && !(data & 0x1))) return 0; if (!(ndp->flags & NCSI_DEV_HWA) && - nc->state == NCSI_CHANNEL_ACTIVE) + state == NCSI_CHANNEL_ACTIVE) ndp->flags |= NCSI_DEV_RESHUFFLE; ncsi_stop_channel_monitor(nc); @@ -97,13 +106,21 @@ static int ncsi_aen_handler_cr(struct ncsi_dev_priv *ndp, if (!nc) return -ENODEV; + spin_lock_irqsave(&nc->lock, flags); if (!list_empty(&nc->link) || - nc->state != NCSI_CHANNEL_ACTIVE) + nc->state != NCSI_CHANNEL_ACTIVE) { + spin_unlock_irqrestore(&nc->lock, flags); return 0; + } + spin_unlock_irqrestore(&nc->lock, flags); ncsi_stop_channel_monitor(nc); + spin_lock_irqsave(&nc->lock, flags); + nc->state = NCSI_CHANNEL_INVISIBLE; + spin_unlock_irqrestore(&nc->lock, flags); + spin_lock_irqsave(&ndp->lock, flags); - xchg(&nc->state, NCSI_CHANNEL_INACTIVE); + nc->state = NCSI_CHANNEL_INACTIVE; list_add_tail_rcu(&nc->link, &ndp->channel_queue); spin_unlock_irqrestore(&ndp->lock, flags); diff --git a/net/ncsi/ncsi-cmd.c b/net/ncsi/ncsi-cmd.c index 21057a8ceeac..db7083bfd476 100644 --- a/net/ncsi/ncsi-cmd.c +++ b/net/ncsi/ncsi-cmd.c @@ -272,7 +272,7 @@ static struct ncsi_request *ncsi_alloc_command(struct ncsi_cmd_arg *nca) struct sk_buff *skb; struct ncsi_request *nr; - nr = ncsi_alloc_request(ndp, nca->driven); + nr = ncsi_alloc_request(ndp, nca->req_flags); if (!nr) return NULL; diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c index ef017b871857..5e509e547c2d 100644 --- a/net/ncsi/ncsi-manage.c +++ b/net/ncsi/ncsi-manage.c @@ -132,6 +132,7 @@ static void ncsi_report_link(struct ncsi_dev_priv *ndp, bool force_down) struct ncsi_dev *nd = &ndp->ndev; struct ncsi_package *np; struct ncsi_channel *nc; + unsigned long flags; nd->state = ncsi_dev_state_functional; if (force_down) { @@ -142,14 +143,21 @@ static void ncsi_report_link(struct ncsi_dev_priv *ndp, bool force_down) nd->link_up = 0; NCSI_FOR_EACH_PACKAGE(ndp, np) { NCSI_FOR_EACH_CHANNEL(np, nc) { + spin_lock_irqsave(&nc->lock, flags); + if (!list_empty(&nc->link) || - nc->state != NCSI_CHANNEL_ACTIVE) + nc->state != NCSI_CHANNEL_ACTIVE) { + spin_unlock_irqrestore(&nc->lock, flags); continue; + } if (nc->modes[NCSI_MODE_LINK].data[2] & 0x1) { + spin_unlock_irqrestore(&nc->lock, flags); nd->link_up = 1; goto report; } + + spin_unlock_irqrestore(&nc->lock, flags); } } @@ -163,43 +171,55 @@ static void ncsi_channel_monitor(unsigned long data) struct ncsi_package *np = nc->package; struct ncsi_dev_priv *ndp = np->ndp; struct ncsi_cmd_arg nca; - bool enabled; - unsigned int timeout; + bool enabled, chained; + unsigned int monitor_state; unsigned long flags; - int ret; + int state, ret; spin_lock_irqsave(&nc->lock, flags); - timeout = nc->timeout; - enabled = nc->enabled; + state = nc->state; + chained = !list_empty(&nc->link); + enabled = nc->monitor.enabled; + monitor_state = nc->monitor.state; spin_unlock_irqrestore(&nc->lock, flags); - if (!enabled || !list_empty(&nc->link)) + if (!enabled || chained) return; - if (nc->state != NCSI_CHANNEL_INACTIVE && - nc->state != NCSI_CHANNEL_ACTIVE) + if (state != NCSI_CHANNEL_INACTIVE && + state != NCSI_CHANNEL_ACTIVE) return; - if (!(timeout % 2)) { + switch (monitor_state) { + case NCSI_CHANNEL_MONITOR_START: + case NCSI_CHANNEL_MONITOR_RETRY: nca.ndp = ndp; nca.package = np->id; nca.channel = nc->id; nca.type = NCSI_PKT_CMD_GLS; - nca.driven = false; + nca.req_flags = 0; ret = ncsi_xmit_cmd(&nca); if (ret) { netdev_err(ndp->ndev.dev, "Error %d sending GLS\n", ret); return; } - } - if (timeout + 1 >= 3) { + break; + case NCSI_CHANNEL_MONITOR_WAIT ... NCSI_CHANNEL_MONITOR_WAIT_MAX: + break; + default: if (!(ndp->flags & NCSI_DEV_HWA) && - nc->state == NCSI_CHANNEL_ACTIVE) + state == NCSI_CHANNEL_ACTIVE) { ncsi_report_link(ndp, true); + ndp->flags |= NCSI_DEV_RESHUFFLE; + } + + spin_lock_irqsave(&nc->lock, flags); + nc->state = NCSI_CHANNEL_INVISIBLE; + spin_unlock_irqrestore(&nc->lock, flags); spin_lock_irqsave(&ndp->lock, flags); - xchg(&nc->state, NCSI_CHANNEL_INACTIVE); + nc->state = NCSI_CHANNEL_INACTIVE; list_add_tail_rcu(&nc->link, &ndp->channel_queue); spin_unlock_irqrestore(&ndp->lock, flags); ncsi_process_next_channel(ndp); @@ -207,10 +227,9 @@ static void ncsi_channel_monitor(unsigned long data) } spin_lock_irqsave(&nc->lock, flags); - nc->timeout = timeout + 1; - nc->enabled = true; + nc->monitor.state++; spin_unlock_irqrestore(&nc->lock, flags); - mod_timer(&nc->timer, jiffies + HZ * (1 << (nc->timeout / 2))); + mod_timer(&nc->monitor.timer, jiffies + HZ); } void ncsi_start_channel_monitor(struct ncsi_channel *nc) @@ -218,12 +237,12 @@ void ncsi_start_channel_monitor(struct ncsi_channel *nc) unsigned long flags; spin_lock_irqsave(&nc->lock, flags); - WARN_ON_ONCE(nc->enabled); - nc->timeout = 0; - nc->enabled = true; + WARN_ON_ONCE(nc->monitor.enabled); + nc->monitor.enabled = true; + nc->monitor.state = NCSI_CHANNEL_MONITOR_START; spin_unlock_irqrestore(&nc->lock, flags); - mod_timer(&nc->timer, jiffies + HZ * (1 << (nc->timeout / 2))); + mod_timer(&nc->monitor.timer, jiffies + HZ); } void ncsi_stop_channel_monitor(struct ncsi_channel *nc) @@ -231,14 +250,14 @@ void ncsi_stop_channel_monitor(struct ncsi_channel *nc) unsigned long flags; spin_lock_irqsave(&nc->lock, flags); - if (!nc->enabled) { + if (!nc->monitor.enabled) { spin_unlock_irqrestore(&nc->lock, flags); return; } - nc->enabled = false; + nc->monitor.enabled = false; spin_unlock_irqrestore(&nc->lock, flags); - del_timer_sync(&nc->timer); + del_timer_sync(&nc->monitor.timer); } struct ncsi_channel *ncsi_find_channel(struct ncsi_package *np, @@ -267,8 +286,9 @@ struct ncsi_channel *ncsi_add_channel(struct ncsi_package *np, unsigned char id) nc->id = id; nc->package = np; nc->state = NCSI_CHANNEL_INACTIVE; - nc->enabled = false; - setup_timer(&nc->timer, ncsi_channel_monitor, (unsigned long)nc); + nc->monitor.enabled = false; + setup_timer(&nc->monitor.timer, + ncsi_channel_monitor, (unsigned long)nc); spin_lock_init(&nc->lock); INIT_LIST_HEAD(&nc->link); for (index = 0; index < NCSI_CAP_MAX; index++) @@ -405,7 +425,8 @@ void ncsi_find_package_and_channel(struct ncsi_dev_priv *ndp, * be same. Otherwise, the bogus response might be replied. So * the available IDs are allocated in round-robin fashion. */ -struct ncsi_request *ncsi_alloc_request(struct ncsi_dev_priv *ndp, bool driven) +struct ncsi_request *ncsi_alloc_request(struct ncsi_dev_priv *ndp, + unsigned int req_flags) { struct ncsi_request *nr = NULL; int i, limit = ARRAY_SIZE(ndp->requests); @@ -413,30 +434,31 @@ struct ncsi_request *ncsi_alloc_request(struct ncsi_dev_priv *ndp, bool driven) /* Check if there is one available request until the ceiling */ spin_lock_irqsave(&ndp->lock, flags); - for (i = ndp->request_id; !nr && i < limit; i++) { + for (i = ndp->request_id; i < limit; i++) { if (ndp->requests[i].used) continue; nr = &ndp->requests[i]; nr->used = true; - nr->driven = driven; - if (++ndp->request_id >= limit) - ndp->request_id = 0; + nr->flags = req_flags; + ndp->request_id = i + 1; + goto found; } /* Fail back to check from the starting cursor */ - for (i = 0; !nr && i < ndp->request_id; i++) { + for (i = NCSI_REQ_START_IDX; i < ndp->request_id; i++) { if (ndp->requests[i].used) continue; nr = &ndp->requests[i]; nr->used = true; - nr->driven = driven; - if (++ndp->request_id >= limit) - ndp->request_id = 0; + nr->flags = req_flags; + ndp->request_id = i + 1; + goto found; } - spin_unlock_irqrestore(&ndp->lock, flags); +found: + spin_unlock_irqrestore(&ndp->lock, flags); return nr; } @@ -458,7 +480,7 @@ void ncsi_free_request(struct ncsi_request *nr) nr->cmd = NULL; nr->rsp = NULL; nr->used = false; - driven = nr->driven; + driven = !!(nr->flags & NCSI_REQ_FLAG_EVENT_DRIVEN); spin_unlock_irqrestore(&ndp->lock, flags); if (driven && cmd && --ndp->pending_req_num == 0) @@ -508,10 +530,11 @@ static void ncsi_suspend_channel(struct ncsi_dev_priv *ndp) struct ncsi_package *np = ndp->active_package; struct ncsi_channel *nc = ndp->active_channel; struct ncsi_cmd_arg nca; + unsigned long flags; int ret; nca.ndp = ndp; - nca.driven = true; + nca.req_flags = NCSI_REQ_FLAG_EVENT_DRIVEN; switch (nd->state) { case ncsi_dev_state_suspend: nd->state = ncsi_dev_state_suspend_select; @@ -527,7 +550,7 @@ static void ncsi_suspend_channel(struct ncsi_dev_priv *ndp) nca.package = np->id; if (nd->state == ncsi_dev_state_suspend_select) { nca.type = NCSI_PKT_CMD_SP; - nca.channel = 0x1f; + nca.channel = NCSI_RESERVED_CHANNEL; if (ndp->flags & NCSI_DEV_HWA) nca.bytes[0] = 0; else @@ -544,7 +567,7 @@ static void ncsi_suspend_channel(struct ncsi_dev_priv *ndp) nd->state = ncsi_dev_state_suspend_deselect; } else if (nd->state == ncsi_dev_state_suspend_deselect) { nca.type = NCSI_PKT_CMD_DP; - nca.channel = 0x1f; + nca.channel = NCSI_RESERVED_CHANNEL; nd->state = ncsi_dev_state_suspend_done; } @@ -556,7 +579,9 @@ static void ncsi_suspend_channel(struct ncsi_dev_priv *ndp) break; case ncsi_dev_state_suspend_done: - xchg(&nc->state, NCSI_CHANNEL_INACTIVE); + spin_lock_irqsave(&nc->lock, flags); + nc->state = NCSI_CHANNEL_INACTIVE; + spin_unlock_irqrestore(&nc->lock, flags); ncsi_process_next_channel(ndp); break; @@ -574,10 +599,11 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp) struct ncsi_channel *nc = ndp->active_channel; struct ncsi_cmd_arg nca; unsigned char index; + unsigned long flags; int ret; nca.ndp = ndp; - nca.driven = true; + nca.req_flags = NCSI_REQ_FLAG_EVENT_DRIVEN; switch (nd->state) { case ncsi_dev_state_config: case ncsi_dev_state_config_sp: @@ -590,7 +616,7 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp) else nca.bytes[0] = 1; nca.package = np->id; - nca.channel = 0x1f; + nca.channel = NCSI_RESERVED_CHANNEL; ret = ncsi_xmit_cmd(&nca); if (ret) goto error; @@ -675,10 +701,12 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp) goto error; break; case ncsi_dev_state_config_done: + spin_lock_irqsave(&nc->lock, flags); if (nc->modes[NCSI_MODE_LINK].data[2] & 0x1) - xchg(&nc->state, NCSI_CHANNEL_ACTIVE); + nc->state = NCSI_CHANNEL_ACTIVE; else - xchg(&nc->state, NCSI_CHANNEL_INACTIVE); + nc->state = NCSI_CHANNEL_INACTIVE; + spin_unlock_irqrestore(&nc->lock, flags); ncsi_start_channel_monitor(nc); ncsi_process_next_channel(ndp); @@ -707,18 +735,25 @@ static int ncsi_choose_active_channel(struct ncsi_dev_priv *ndp) found = NULL; NCSI_FOR_EACH_PACKAGE(ndp, np) { NCSI_FOR_EACH_CHANNEL(np, nc) { + spin_lock_irqsave(&nc->lock, flags); + if (!list_empty(&nc->link) || - nc->state != NCSI_CHANNEL_INACTIVE) + nc->state != NCSI_CHANNEL_INACTIVE) { + spin_unlock_irqrestore(&nc->lock, flags); continue; + } if (!found) found = nc; ncm = &nc->modes[NCSI_MODE_LINK]; if (ncm->data[2] & 0x1) { + spin_unlock_irqrestore(&nc->lock, flags); found = nc; goto out; } + + spin_unlock_irqrestore(&nc->lock, flags); } } @@ -797,7 +832,7 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp) int ret; nca.ndp = ndp; - nca.driven = true; + nca.req_flags = NCSI_REQ_FLAG_EVENT_DRIVEN; switch (nd->state) { case ncsi_dev_state_probe: nd->state = ncsi_dev_state_probe_deselect; @@ -807,7 +842,7 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp) /* Deselect all possible packages */ nca.type = NCSI_PKT_CMD_DP; - nca.channel = 0x1f; + nca.channel = NCSI_RESERVED_CHANNEL; for (index = 0; index < 8; index++) { nca.package = index; ret = ncsi_xmit_cmd(&nca); @@ -823,7 +858,7 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp) /* Select all possible packages */ nca.type = NCSI_PKT_CMD_SP; nca.bytes[0] = 1; - nca.channel = 0x1f; + nca.channel = NCSI_RESERVED_CHANNEL; for (index = 0; index < 8; index++) { nca.package = index; ret = ncsi_xmit_cmd(&nca); @@ -876,7 +911,7 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp) nca.type = NCSI_PKT_CMD_SP; nca.bytes[0] = 1; nca.package = ndp->active_package->id; - nca.channel = 0x1f; + nca.channel = NCSI_RESERVED_CHANNEL; ret = ncsi_xmit_cmd(&nca); if (ret) goto error; @@ -884,12 +919,12 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp) nd->state = ncsi_dev_state_probe_cis; break; case ncsi_dev_state_probe_cis: - ndp->pending_req_num = 32; + ndp->pending_req_num = NCSI_RESERVED_CHANNEL; /* Clear initial state */ nca.type = NCSI_PKT_CMD_CIS; nca.package = ndp->active_package->id; - for (index = 0; index < 0x20; index++) { + for (index = 0; index < NCSI_RESERVED_CHANNEL; index++) { nca.channel = index; ret = ncsi_xmit_cmd(&nca); if (ret) @@ -933,7 +968,7 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp) /* Deselect the active package */ nca.type = NCSI_PKT_CMD_DP; nca.package = ndp->active_package->id; - nca.channel = 0x1f; + nca.channel = NCSI_RESERVED_CHANNEL; ret = ncsi_xmit_cmd(&nca); if (ret) goto error; @@ -987,11 +1022,14 @@ int ncsi_process_next_channel(struct ncsi_dev_priv *ndp) goto out; } - old_state = xchg(&nc->state, NCSI_CHANNEL_INVISIBLE); list_del_init(&nc->link); - spin_unlock_irqrestore(&ndp->lock, flags); + spin_lock_irqsave(&nc->lock, flags); + old_state = nc->state; + nc->state = NCSI_CHANNEL_INVISIBLE; + spin_unlock_irqrestore(&nc->lock, flags); + ndp->active_channel = nc; ndp->active_package = nc->package; @@ -1006,7 +1044,7 @@ int ncsi_process_next_channel(struct ncsi_dev_priv *ndp) break; default: netdev_err(ndp->ndev.dev, "Invalid state 0x%x on %d:%d\n", - nc->state, nc->package->id, nc->id); + old_state, nc->package->id, nc->id); ncsi_report_link(ndp, false); return -EINVAL; } @@ -1070,7 +1108,7 @@ static int ncsi_inet6addr_event(struct notifier_block *this, return NOTIFY_OK; nca.ndp = ndp; - nca.driven = false; + nca.req_flags = 0; nca.package = np->id; nca.channel = nc->id; nca.dwords[0] = nc->caps[NCSI_CAP_MC].cap; @@ -1118,7 +1156,7 @@ struct ncsi_dev *ncsi_register_dev(struct net_device *dev, /* Initialize private NCSI device */ spin_lock_init(&ndp->lock); INIT_LIST_HEAD(&ndp->packages); - ndp->request_id = 0; + ndp->request_id = NCSI_REQ_START_IDX; for (i = 0; i < ARRAY_SIZE(ndp->requests); i++) { ndp->requests[i].id = i; ndp->requests[i].ndp = ndp; @@ -1149,9 +1187,7 @@ EXPORT_SYMBOL_GPL(ncsi_register_dev); int ncsi_start_dev(struct ncsi_dev *nd) { struct ncsi_dev_priv *ndp = TO_NCSI_DEV_PRIV(nd); - struct ncsi_package *np; - struct ncsi_channel *nc; - int old_state, ret; + int ret; if (nd->state != ncsi_dev_state_registered && nd->state != ncsi_dev_state_functional) @@ -1163,15 +1199,6 @@ int ncsi_start_dev(struct ncsi_dev *nd) return 0; } - /* Reset channel's state and start over */ - NCSI_FOR_EACH_PACKAGE(ndp, np) { - NCSI_FOR_EACH_CHANNEL(np, nc) { - old_state = xchg(&nc->state, NCSI_CHANNEL_INACTIVE); - WARN_ON_ONCE(!list_empty(&nc->link) || - old_state == NCSI_CHANNEL_INVISIBLE); - } - } - if (ndp->flags & NCSI_DEV_HWA) ret = ncsi_enable_hwa(ndp); else @@ -1181,6 +1208,35 @@ int ncsi_start_dev(struct ncsi_dev *nd) } EXPORT_SYMBOL_GPL(ncsi_start_dev); +void ncsi_stop_dev(struct ncsi_dev *nd) +{ + struct ncsi_dev_priv *ndp = TO_NCSI_DEV_PRIV(nd); + struct ncsi_package *np; + struct ncsi_channel *nc; + bool chained; + int old_state; + unsigned long flags; + + /* Stop the channel monitor and reset channel's state */ + NCSI_FOR_EACH_PACKAGE(ndp, np) { + NCSI_FOR_EACH_CHANNEL(np, nc) { + ncsi_stop_channel_monitor(nc); + + spin_lock_irqsave(&nc->lock, flags); + chained = !list_empty(&nc->link); + old_state = nc->state; + nc->state = NCSI_CHANNEL_INACTIVE; + spin_unlock_irqrestore(&nc->lock, flags); + + WARN_ON_ONCE(chained || + old_state == NCSI_CHANNEL_INVISIBLE); + } + } + + ncsi_report_link(ndp, true); +} +EXPORT_SYMBOL_GPL(ncsi_stop_dev); + void ncsi_unregister_dev(struct ncsi_dev *nd) { struct ncsi_dev_priv *ndp = TO_NCSI_DEV_PRIV(nd); diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c index af84389a6bf1..087db775b3dc 100644 --- a/net/ncsi/ncsi-rsp.c +++ b/net/ncsi/ncsi-rsp.c @@ -317,12 +317,12 @@ static int ncsi_rsp_handler_gls(struct ncsi_request *nr) ncm->data[3] = ntohl(rsp->other); ncm->data[4] = ntohl(rsp->oem_status); - if (nr->driven) + if (nr->flags & NCSI_REQ_FLAG_EVENT_DRIVEN) return 0; /* Reset the channel monitor if it has been enabled */ spin_lock_irqsave(&nc->lock, flags); - nc->timeout = 0; + nc->monitor.state = NCSI_CHANNEL_MONITOR_START; spin_unlock_irqrestore(&nc->lock, flags); return 0; diff --git a/net/netfilter/core.c b/net/netfilter/core.c index fa6715db4581..fcb5d1df11e9 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -65,51 +65,37 @@ static DEFINE_MUTEX(nf_hook_mutex); #define nf_entry_dereference(e) \ rcu_dereference_protected(e, lockdep_is_held(&nf_hook_mutex)) -static struct nf_hook_entry *nf_hook_entry_head(struct net *net, - const struct nf_hook_ops *reg) +static struct nf_hook_entry __rcu **nf_hook_entry_head(struct net *net, const struct nf_hook_ops *reg) { - struct nf_hook_entry *hook_head = NULL; - if (reg->pf != NFPROTO_NETDEV) - hook_head = nf_entry_dereference(net->nf.hooks[reg->pf] - [reg->hooknum]); - else if (reg->hooknum == NF_NETDEV_INGRESS) { + return net->nf.hooks[reg->pf]+reg->hooknum; + #ifdef CONFIG_NETFILTER_INGRESS + if (reg->hooknum == NF_NETDEV_INGRESS) { if (reg->dev && dev_net(reg->dev) == net) - hook_head = - nf_entry_dereference( - reg->dev->nf_hooks_ingress); -#endif - } - return hook_head; -} - -/* must hold nf_hook_mutex */ -static void nf_set_hooks_head(struct net *net, const struct nf_hook_ops *reg, - struct nf_hook_entry *entry) -{ - switch (reg->pf) { - case NFPROTO_NETDEV: - /* We already checked in nf_register_net_hook() that this is - * used from ingress. - */ - rcu_assign_pointer(reg->dev->nf_hooks_ingress, entry); - break; - default: - rcu_assign_pointer(net->nf.hooks[reg->pf][reg->hooknum], - entry); - break; + return ®->dev->nf_hooks_ingress; } +#endif + return NULL; } int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg) { - struct nf_hook_entry *hooks_entry; - struct nf_hook_entry *entry; + struct nf_hook_entry __rcu **pp; + struct nf_hook_entry *entry, *p; + + if (reg->pf == NFPROTO_NETDEV) { +#ifndef CONFIG_NETFILTER_INGRESS + if (reg->hooknum == NF_NETDEV_INGRESS) + return -EOPNOTSUPP; +#endif + if (reg->hooknum != NF_NETDEV_INGRESS || + !reg->dev || dev_net(reg->dev) != net) + return -EINVAL; + } - if (reg->pf == NFPROTO_NETDEV && - (reg->hooknum != NF_NETDEV_INGRESS || - !reg->dev || dev_net(reg->dev) != net)) + pp = nf_hook_entry_head(net, reg); + if (!pp) return -EINVAL; entry = kmalloc(sizeof(*entry), GFP_KERNEL); @@ -121,26 +107,15 @@ int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg) entry->next = NULL; mutex_lock(&nf_hook_mutex); - hooks_entry = nf_hook_entry_head(net, reg); - if (hooks_entry && hooks_entry->orig_ops->priority > reg->priority) { - /* This is the case where we need to insert at the head */ - entry->next = hooks_entry; - hooks_entry = NULL; - } - - while (hooks_entry && - reg->priority >= hooks_entry->orig_ops->priority && - nf_entry_dereference(hooks_entry->next)) { - hooks_entry = nf_entry_dereference(hooks_entry->next); - } - - if (hooks_entry) { - entry->next = nf_entry_dereference(hooks_entry->next); - rcu_assign_pointer(hooks_entry->next, entry); - } else { - nf_set_hooks_head(net, reg, entry); + /* Find the spot in the list */ + while ((p = nf_entry_dereference(*pp)) != NULL) { + if (reg->priority < p->orig_ops->priority) + break; + pp = &p->next; } + rcu_assign_pointer(entry->next, p); + rcu_assign_pointer(*pp, entry); mutex_unlock(&nf_hook_mutex); #ifdef CONFIG_NETFILTER_INGRESS @@ -156,33 +131,23 @@ EXPORT_SYMBOL(nf_register_net_hook); void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg) { - struct nf_hook_entry *hooks_entry; + struct nf_hook_entry __rcu **pp; + struct nf_hook_entry *p; - mutex_lock(&nf_hook_mutex); - hooks_entry = nf_hook_entry_head(net, reg); - if (hooks_entry->orig_ops == reg) { - nf_set_hooks_head(net, reg, - nf_entry_dereference(hooks_entry->next)); - goto unlock; - } - while (hooks_entry && nf_entry_dereference(hooks_entry->next)) { - struct nf_hook_entry *next = - nf_entry_dereference(hooks_entry->next); - struct nf_hook_entry *nnext; + pp = nf_hook_entry_head(net, reg); + if (WARN_ON_ONCE(!pp)) + return; - if (next->orig_ops != reg) { - hooks_entry = next; - continue; + mutex_lock(&nf_hook_mutex); + while ((p = nf_entry_dereference(*pp)) != NULL) { + if (p->orig_ops == reg) { + rcu_assign_pointer(*pp, p->next); + break; } - nnext = nf_entry_dereference(next->next); - rcu_assign_pointer(hooks_entry->next, nnext); - hooks_entry = next; - break; + pp = &p->next; } - -unlock: mutex_unlock(&nf_hook_mutex); - if (!hooks_entry) { + if (!p) { WARN(1, "nf_unregister_net_hook: hook not found!\n"); return; } @@ -194,10 +159,10 @@ unlock: static_key_slow_dec(&nf_hooks_needed[reg->pf][reg->hooknum]); #endif synchronize_net(); - nf_queue_nf_hook_drop(net, hooks_entry); + nf_queue_nf_hook_drop(net, p); /* other cpu might still process nfqueue verdict that used reg */ synchronize_net(); - kfree(hooks_entry); + kfree(p); } EXPORT_SYMBOL(nf_unregister_net_hook); diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index c3c809b2e712..6b85ded4f91d 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -2840,14 +2840,7 @@ static struct nf_sockopt_ops ip_vs_sockopts = { */ /* IPVS genetlink family */ -static struct genl_family ip_vs_genl_family = { - .id = GENL_ID_GENERATE, - .hdrsize = 0, - .name = IPVS_GENL_NAME, - .version = IPVS_GENL_VERSION, - .maxattr = IPVS_CMD_MAX, - .netnsok = true, /* Make ipvsadm to work on netns */ -}; +static struct genl_family ip_vs_genl_family; /* Policy used for first-level command attributes */ static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = { @@ -3872,10 +3865,20 @@ static const struct genl_ops ip_vs_genl_ops[] = { }, }; +static struct genl_family ip_vs_genl_family __ro_after_init = { + .hdrsize = 0, + .name = IPVS_GENL_NAME, + .version = IPVS_GENL_VERSION, + .maxattr = IPVS_CMD_MAX, + .netnsok = true, /* Make ipvsadm to work on netns */ + .module = THIS_MODULE, + .ops = ip_vs_genl_ops, + .n_ops = ARRAY_SIZE(ip_vs_genl_ops), +}; + static int __init ip_vs_genl_register(void) { - return genl_register_family_with_ops(&ip_vs_genl_family, - ip_vs_genl_ops); + return genl_register_family(&ip_vs_genl_family); } static void ip_vs_genl_unregister(void) diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 30a17d649a83..3dca90dc24ad 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -422,7 +422,7 @@ static int nf_log_proc_dostring(struct ctl_table *table, int write, char buf[NFLOGGER_NAME_LEN]; int r = 0; int tindex = (unsigned long)table->extra1; - struct net *net = current->nsproxy->net_ns; + struct net *net = table->extra2; if (write) { struct ctl_table tmp = *table; @@ -476,7 +476,6 @@ static int netfilter_log_sysctl_init(struct net *net) 3, "%d", i); nf_log_sysctl_table[i].procname = nf_log_sysctl_fnames[i]; - nf_log_sysctl_table[i].data = NULL; nf_log_sysctl_table[i].maxlen = NFLOGGER_NAME_LEN; nf_log_sysctl_table[i].mode = 0644; nf_log_sysctl_table[i].proc_handler = @@ -486,6 +485,9 @@ static int netfilter_log_sysctl_init(struct net *net) } } + for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++) + table[i].extra2 = net; + net->nf.nf_log_dir_header = register_net_sysctl(net, "net/netfilter/nf_log", table); diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c index 070b98938e02..c6baf412236d 100644 --- a/net/netfilter/nft_limit.c +++ b/net/netfilter/nft_limit.c @@ -145,7 +145,7 @@ static int nft_limit_pkts_init(const struct nft_ctx *ctx, if (err < 0) return err; - priv->cost = div_u64(priv->limit.nsecs, priv->limit.rate); + priv->cost = div64_u64(priv->limit.nsecs, priv->limit.rate); return 0; } @@ -170,7 +170,7 @@ static void nft_limit_pkt_bytes_eval(const struct nft_expr *expr, const struct nft_pktinfo *pkt) { struct nft_limit *priv = nft_expr_priv(expr); - u64 cost = div_u64(priv->nsecs * pkt->skb->len, priv->rate); + u64 cost = div64_u64(priv->nsecs * pkt->skb->len, priv->rate); if (nft_limit_eval(priv, cost)) regs->verdict.code = NFT_BREAK; diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index 44a095ecc7b7..2fab0c65aa94 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -467,17 +467,18 @@ static u64 user2credits(u64 user, int revision) /* If multiplying would overflow... */ if (user > 0xFFFFFFFF / (HZ*CREDITS_PER_JIFFY_v1)) /* Divide first. */ - return (user / XT_HASHLIMIT_SCALE) *\ - HZ * CREDITS_PER_JIFFY_v1; + return div64_u64(user, XT_HASHLIMIT_SCALE) + * HZ * CREDITS_PER_JIFFY_v1; - return (user * HZ * CREDITS_PER_JIFFY_v1) \ - / XT_HASHLIMIT_SCALE; + return div64_u64(user * HZ * CREDITS_PER_JIFFY_v1, + XT_HASHLIMIT_SCALE); } else { if (user > 0xFFFFFFFFFFFFFFFF / (HZ*CREDITS_PER_JIFFY)) - return (user / XT_HASHLIMIT_SCALE_v2) *\ - HZ * CREDITS_PER_JIFFY; + return div64_u64(user, XT_HASHLIMIT_SCALE_v2) + * HZ * CREDITS_PER_JIFFY; - return (user * HZ * CREDITS_PER_JIFFY) / XT_HASHLIMIT_SCALE_v2; + return div64_u64(user * HZ * CREDITS_PER_JIFFY, + XT_HASHLIMIT_SCALE_v2); } } diff --git a/net/netlabel/netlabel_calipso.c b/net/netlabel/netlabel_calipso.c index 2ec93c5e77bb..d177dd066504 100644 --- a/net/netlabel/netlabel_calipso.c +++ b/net/netlabel/netlabel_calipso.c @@ -60,13 +60,7 @@ struct netlbl_domhsh_walk_arg { }; /* NetLabel Generic NETLINK CALIPSO family */ -static struct genl_family netlbl_calipso_gnl_family = { - .id = GENL_ID_GENERATE, - .hdrsize = 0, - .name = NETLBL_NLTYPE_CALIPSO_NAME, - .version = NETLBL_PROTO_VERSION, - .maxattr = NLBL_CALIPSO_A_MAX, -}; +static struct genl_family netlbl_calipso_gnl_family; /* NetLabel Netlink attribute policy */ static const struct nla_policy calipso_genl_policy[NLBL_CALIPSO_A_MAX + 1] = { @@ -355,6 +349,16 @@ static const struct genl_ops netlbl_calipso_ops[] = { }, }; +static struct genl_family netlbl_calipso_gnl_family __ro_after_init = { + .hdrsize = 0, + .name = NETLBL_NLTYPE_CALIPSO_NAME, + .version = NETLBL_PROTO_VERSION, + .maxattr = NLBL_CALIPSO_A_MAX, + .module = THIS_MODULE, + .ops = netlbl_calipso_ops, + .n_ops = ARRAY_SIZE(netlbl_calipso_ops), +}; + /* NetLabel Generic NETLINK Protocol Functions */ @@ -368,8 +372,7 @@ static const struct genl_ops netlbl_calipso_ops[] = { */ int __init netlbl_calipso_genl_init(void) { - return genl_register_family_with_ops(&netlbl_calipso_gnl_family, - netlbl_calipso_ops); + return genl_register_family(&netlbl_calipso_gnl_family); } static const struct netlbl_calipso_ops *calipso_ops; diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c index 7fd1104ba900..4149d3e63589 100644 --- a/net/netlabel/netlabel_cipso_v4.c +++ b/net/netlabel/netlabel_cipso_v4.c @@ -59,14 +59,7 @@ struct netlbl_domhsh_walk_arg { }; /* NetLabel Generic NETLINK CIPSOv4 family */ -static struct genl_family netlbl_cipsov4_gnl_family = { - .id = GENL_ID_GENERATE, - .hdrsize = 0, - .name = NETLBL_NLTYPE_CIPSOV4_NAME, - .version = NETLBL_PROTO_VERSION, - .maxattr = NLBL_CIPSOV4_A_MAX, -}; - +static struct genl_family netlbl_cipsov4_gnl_family; /* NetLabel Netlink attribute policy */ static const struct nla_policy netlbl_cipsov4_genl_policy[NLBL_CIPSOV4_A_MAX + 1] = { [NLBL_CIPSOV4_A_DOI] = { .type = NLA_U32 }, @@ -767,6 +760,16 @@ static const struct genl_ops netlbl_cipsov4_ops[] = { }, }; +static struct genl_family netlbl_cipsov4_gnl_family __ro_after_init = { + .hdrsize = 0, + .name = NETLBL_NLTYPE_CIPSOV4_NAME, + .version = NETLBL_PROTO_VERSION, + .maxattr = NLBL_CIPSOV4_A_MAX, + .module = THIS_MODULE, + .ops = netlbl_cipsov4_ops, + .n_ops = ARRAY_SIZE(netlbl_cipsov4_ops), +}; + /* * NetLabel Generic NETLINK Protocol Functions */ @@ -781,6 +784,5 @@ static const struct genl_ops netlbl_cipsov4_ops[] = { */ int __init netlbl_cipsov4_genl_init(void) { - return genl_register_family_with_ops(&netlbl_cipsov4_gnl_family, - netlbl_cipsov4_ops); + return genl_register_family(&netlbl_cipsov4_gnl_family); } diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c index f85d0e07af2d..21e0095b1d14 100644 --- a/net/netlabel/netlabel_mgmt.c +++ b/net/netlabel/netlabel_mgmt.c @@ -60,13 +60,7 @@ struct netlbl_domhsh_walk_arg { }; /* NetLabel Generic NETLINK CIPSOv4 family */ -static struct genl_family netlbl_mgmt_gnl_family = { - .id = GENL_ID_GENERATE, - .hdrsize = 0, - .name = NETLBL_NLTYPE_MGMT_NAME, - .version = NETLBL_PROTO_VERSION, - .maxattr = NLBL_MGMT_A_MAX, -}; +static struct genl_family netlbl_mgmt_gnl_family; /* NetLabel Netlink attribute policy */ static const struct nla_policy netlbl_mgmt_genl_policy[NLBL_MGMT_A_MAX + 1] = { @@ -834,6 +828,16 @@ static const struct genl_ops netlbl_mgmt_genl_ops[] = { }, }; +static struct genl_family netlbl_mgmt_gnl_family __ro_after_init = { + .hdrsize = 0, + .name = NETLBL_NLTYPE_MGMT_NAME, + .version = NETLBL_PROTO_VERSION, + .maxattr = NLBL_MGMT_A_MAX, + .module = THIS_MODULE, + .ops = netlbl_mgmt_genl_ops, + .n_ops = ARRAY_SIZE(netlbl_mgmt_genl_ops), +}; + /* * NetLabel Generic NETLINK Protocol Functions */ @@ -848,6 +852,5 @@ static const struct genl_ops netlbl_mgmt_genl_ops[] = { */ int __init netlbl_mgmt_genl_init(void) { - return genl_register_family_with_ops(&netlbl_mgmt_gnl_family, - netlbl_mgmt_genl_ops); + return genl_register_family(&netlbl_mgmt_gnl_family); } diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index 4528cff9138b..22dc1b9d6362 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -123,13 +123,7 @@ static struct netlbl_unlhsh_iface __rcu *netlbl_unlhsh_def; static u8 netlabel_unlabel_acceptflg; /* NetLabel Generic NETLINK unlabeled family */ -static struct genl_family netlbl_unlabel_gnl_family = { - .id = GENL_ID_GENERATE, - .hdrsize = 0, - .name = NETLBL_NLTYPE_UNLABELED_NAME, - .version = NETLBL_PROTO_VERSION, - .maxattr = NLBL_UNLABEL_A_MAX, -}; +static struct genl_family netlbl_unlabel_gnl_family; /* NetLabel Netlink attribute policy */ static const struct nla_policy netlbl_unlabel_genl_policy[NLBL_UNLABEL_A_MAX + 1] = { @@ -1378,6 +1372,16 @@ static const struct genl_ops netlbl_unlabel_genl_ops[] = { }, }; +static struct genl_family netlbl_unlabel_gnl_family __ro_after_init = { + .hdrsize = 0, + .name = NETLBL_NLTYPE_UNLABELED_NAME, + .version = NETLBL_PROTO_VERSION, + .maxattr = NLBL_UNLABEL_A_MAX, + .module = THIS_MODULE, + .ops = netlbl_unlabel_genl_ops, + .n_ops = ARRAY_SIZE(netlbl_unlabel_genl_ops), +}; + /* * NetLabel Generic NETLINK Protocol Functions */ @@ -1392,8 +1396,7 @@ static const struct genl_ops netlbl_unlabel_genl_ops[] = { */ int __init netlbl_unlabel_genl_init(void) { - return genl_register_family_with_ops(&netlbl_unlabel_gnl_family, - netlbl_unlabel_genl_ops); + return genl_register_family(&netlbl_unlabel_gnl_family); } /* diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 627f898c05b9..62bea4591054 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1832,7 +1832,7 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, /* Record the max length of recvmsg() calls for future allocations */ nlk->max_recvmsg_len = max(nlk->max_recvmsg_len, len); nlk->max_recvmsg_len = min_t(size_t, nlk->max_recvmsg_len, - 16384); + SKB_WITH_OVERHEAD(32768)); copied = data_skb->len; if (len < copied) { @@ -2083,8 +2083,9 @@ static int netlink_dump(struct sock *sk) if (alloc_min_size < nlk->max_recvmsg_len) { alloc_size = nlk->max_recvmsg_len; - skb = alloc_skb(alloc_size, GFP_KERNEL | - __GFP_NOWARN | __GFP_NORETRY); + skb = alloc_skb(alloc_size, + (GFP_KERNEL & ~__GFP_DIRECT_RECLAIM) | + __GFP_NOWARN | __GFP_NORETRY); } if (!skb) { alloc_size = alloc_min_size; diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 23cc12639ba7..df0cbcddda2c 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -17,6 +17,7 @@ #include <linux/mutex.h> #include <linux/bitmap.h> #include <linux/rwsem.h> +#include <linux/idr.h> #include <net/sock.h> #include <net/genetlink.h> @@ -58,10 +59,8 @@ static void genl_unlock_all(void) up_write(&cb_lock); } -#define GENL_FAM_TAB_SIZE 16 -#define GENL_FAM_TAB_MASK (GENL_FAM_TAB_SIZE - 1) +static DEFINE_IDR(genl_fam_idr); -static struct list_head family_ht[GENL_FAM_TAB_SIZE]; /* * Bitmap of multicast groups that are currently in use. * @@ -86,45 +85,29 @@ static unsigned long mc_group_start = 0x3 | BIT(GENL_ID_CTRL) | static unsigned long *mc_groups = &mc_group_start; static unsigned long mc_groups_longs = 1; -static int genl_ctrl_event(int event, struct genl_family *family, +static int genl_ctrl_event(int event, const struct genl_family *family, const struct genl_multicast_group *grp, int grp_id); -static inline unsigned int genl_family_hash(unsigned int id) +static const struct genl_family *genl_family_find_byid(unsigned int id) { - return id & GENL_FAM_TAB_MASK; + return idr_find(&genl_fam_idr, id); } -static inline struct list_head *genl_family_chain(unsigned int id) +static const struct genl_family *genl_family_find_byname(char *name) { - return &family_ht[genl_family_hash(id)]; -} - -static struct genl_family *genl_family_find_byid(unsigned int id) -{ - struct genl_family *f; + const struct genl_family *family; + unsigned int id; - list_for_each_entry(f, genl_family_chain(id), family_list) - if (f->id == id) - return f; + idr_for_each_entry(&genl_fam_idr, family, id) + if (strcmp(family->name, name) == 0) + return family; return NULL; } -static struct genl_family *genl_family_find_byname(char *name) -{ - struct genl_family *f; - int i; - - for (i = 0; i < GENL_FAM_TAB_SIZE; i++) - list_for_each_entry(f, genl_family_chain(i), family_list) - if (strcmp(f->name, name) == 0) - return f; - - return NULL; -} - -static const struct genl_ops *genl_get_cmd(u8 cmd, struct genl_family *family) +static const struct genl_ops *genl_get_cmd(u8 cmd, + const struct genl_family *family) { int i; @@ -135,26 +118,6 @@ static const struct genl_ops *genl_get_cmd(u8 cmd, struct genl_family *family) return NULL; } -/* Of course we are going to have problems once we hit - * 2^16 alive types, but that can only happen by year 2K -*/ -static u16 genl_generate_id(void) -{ - static u16 id_gen_idx = GENL_MIN_ID; - int i; - - for (i = 0; i <= GENL_MAX_ID - GENL_MIN_ID; i++) { - if (id_gen_idx != GENL_ID_VFS_DQUOT && - id_gen_idx != GENL_ID_PMCRAID && - !genl_family_find_byid(id_gen_idx)) - return id_gen_idx; - if (++id_gen_idx > GENL_MAX_ID) - id_gen_idx = GENL_MIN_ID; - } - - return 0; -} - static int genl_allocate_reserve_groups(int n_groups, int *first_id) { unsigned long *new_groups; @@ -295,7 +258,7 @@ static int genl_validate_assign_mc_groups(struct genl_family *family) return err; } -static void genl_unregister_mc_groups(struct genl_family *family) +static void genl_unregister_mc_groups(const struct genl_family *family) { struct net *net; int i; @@ -344,28 +307,21 @@ static int genl_validate_ops(const struct genl_family *family) } /** - * __genl_register_family - register a generic netlink family + * genl_register_family - register a generic netlink family * @family: generic netlink family * * Registers the specified family after validating it first. Only one * family may be registered with the same family name or identifier. - * The family id may equal GENL_ID_GENERATE causing an unique id to - * be automatically generated and assigned. * - * The family's ops array must already be assigned, you can use the - * genl_register_family_with_ops() helper function. + * The family's ops, multicast groups and module pointer must already + * be assigned. * * Return 0 on success or a negative error code. */ -int __genl_register_family(struct genl_family *family) +int genl_register_family(struct genl_family *family) { - int err = -EINVAL, i; - - if (family->id && family->id < GENL_MIN_ID) - goto errout; - - if (family->id > GENL_MAX_ID) - goto errout; + int err, i; + int start = GENL_START_ALLOC, end = GENL_MAX_ID; err = genl_validate_ops(family); if (err) @@ -378,18 +334,20 @@ int __genl_register_family(struct genl_family *family) goto errout_locked; } - if (family->id == GENL_ID_GENERATE) { - u16 newid = genl_generate_id(); - - if (!newid) { - err = -ENOMEM; - goto errout_locked; - } - - family->id = newid; - } else if (genl_family_find_byid(family->id)) { - err = -EEXIST; - goto errout_locked; + /* + * Sadly, a few cases need to be special-cased + * due to them having previously abused the API + * and having used their family ID also as their + * multicast group ID, so we use reserved IDs + * for both to be sure we can do that mapping. + */ + if (family == &genl_ctrl) { + /* and this needs to be special for initial family lookups */ + start = end = GENL_ID_CTRL; + } else if (strcmp(family->name, "pmcraid") == 0) { + start = end = GENL_ID_PMCRAID; + } else if (strcmp(family->name, "VFS_DQUOT") == 0) { + start = end = GENL_ID_VFS_DQUOT; } if (family->maxattr && !family->parallel_ops) { @@ -402,11 +360,15 @@ int __genl_register_family(struct genl_family *family) } else family->attrbuf = NULL; + family->id = idr_alloc(&genl_fam_idr, family, + start, end + 1, GFP_KERNEL); + if (!family->id) + goto errout_locked; + err = genl_validate_assign_mc_groups(family); if (err) - goto errout_locked; + goto errout_remove; - list_add_tail(&family->family_list, genl_family_chain(family->id)); genl_unlock_all(); /* send all events */ @@ -417,12 +379,13 @@ int __genl_register_family(struct genl_family *family) return 0; +errout_remove: + idr_remove(&genl_fam_idr, family->id); errout_locked: genl_unlock_all(); -errout: return err; } -EXPORT_SYMBOL(__genl_register_family); +EXPORT_SYMBOL(genl_register_family); /** * genl_unregister_family - unregister generic netlink family @@ -432,33 +395,29 @@ EXPORT_SYMBOL(__genl_register_family); * * Returns 0 on success or a negative error code. */ -int genl_unregister_family(struct genl_family *family) +int genl_unregister_family(const struct genl_family *family) { - struct genl_family *rc; - genl_lock_all(); - list_for_each_entry(rc, genl_family_chain(family->id), family_list) { - if (family->id != rc->id || strcmp(rc->name, family->name)) - continue; + if (genl_family_find_byid(family->id)) { + genl_unlock_all(); + return -ENOENT; + } - genl_unregister_mc_groups(family); + genl_unregister_mc_groups(family); - list_del(&rc->family_list); - family->n_ops = 0; - up_write(&cb_lock); - wait_event(genl_sk_destructing_waitq, - atomic_read(&genl_sk_destructing_cnt) == 0); - genl_unlock(); + idr_remove(&genl_fam_idr, family->id); - kfree(family->attrbuf); - genl_ctrl_event(CTRL_CMD_DELFAMILY, family, NULL, 0); - return 0; - } + up_write(&cb_lock); + wait_event(genl_sk_destructing_waitq, + atomic_read(&genl_sk_destructing_cnt) == 0); + genl_unlock(); - genl_unlock_all(); + kfree(family->attrbuf); + + genl_ctrl_event(CTRL_CMD_DELFAMILY, family, NULL, 0); - return -ENOENT; + return 0; } EXPORT_SYMBOL(genl_unregister_family); @@ -474,7 +433,7 @@ EXPORT_SYMBOL(genl_unregister_family); * Returns pointer to user specific header */ void *genlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, - struct genl_family *family, int flags, u8 cmd) + const struct genl_family *family, int flags, u8 cmd) { struct nlmsghdr *nlh; struct genlmsghdr *hdr; @@ -533,7 +492,7 @@ static int genl_lock_done(struct netlink_callback *cb) return rc; } -static int genl_family_rcv_msg(struct genl_family *family, +static int genl_family_rcv_msg(const struct genl_family *family, struct sk_buff *skb, struct nlmsghdr *nlh) { @@ -645,7 +604,7 @@ out: static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) { - struct genl_family *family; + const struct genl_family *family; int err; family = genl_family_find_byid(nlh->nlmsg_type); @@ -674,15 +633,9 @@ static void genl_rcv(struct sk_buff *skb) * Controller **************************************************************************/ -static struct genl_family genl_ctrl = { - .id = GENL_ID_CTRL, - .name = "nlctrl", - .version = 0x2, - .maxattr = CTRL_ATTR_MAX, - .netnsok = true, -}; +static struct genl_family genl_ctrl; -static int ctrl_fill_info(struct genl_family *family, u32 portid, u32 seq, +static int ctrl_fill_info(const struct genl_family *family, u32 portid, u32 seq, u32 flags, struct sk_buff *skb, u8 cmd) { void *hdr; @@ -769,7 +722,7 @@ nla_put_failure: return -EMSGSIZE; } -static int ctrl_fill_mcgrp_info(struct genl_family *family, +static int ctrl_fill_mcgrp_info(const struct genl_family *family, const struct genl_multicast_group *grp, int grp_id, u32 portid, u32 seq, u32 flags, struct sk_buff *skb, u8 cmd) @@ -812,37 +765,30 @@ nla_put_failure: static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb) { - - int i, n = 0; + int n = 0; struct genl_family *rt; struct net *net = sock_net(skb->sk); - int chains_to_skip = cb->args[0]; - int fams_to_skip = cb->args[1]; - - for (i = chains_to_skip; i < GENL_FAM_TAB_SIZE; i++) { - n = 0; - list_for_each_entry(rt, genl_family_chain(i), family_list) { - if (!rt->netnsok && !net_eq(net, &init_net)) - continue; - if (++n < fams_to_skip) - continue; - if (ctrl_fill_info(rt, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, NLM_F_MULTI, - skb, CTRL_CMD_NEWFAMILY) < 0) - goto errout; - } + int fams_to_skip = cb->args[0]; + unsigned int id; - fams_to_skip = 0; - } + idr_for_each_entry(&genl_fam_idr, rt, id) { + if (!rt->netnsok && !net_eq(net, &init_net)) + continue; + + if (n++ < fams_to_skip) + continue; -errout: - cb->args[0] = i; - cb->args[1] = n; + if (ctrl_fill_info(rt, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, + skb, CTRL_CMD_NEWFAMILY) < 0) + break; + } + cb->args[0] = n; return skb->len; } -static struct sk_buff *ctrl_build_family_msg(struct genl_family *family, +static struct sk_buff *ctrl_build_family_msg(const struct genl_family *family, u32 portid, int seq, u8 cmd) { struct sk_buff *skb; @@ -862,7 +808,7 @@ static struct sk_buff *ctrl_build_family_msg(struct genl_family *family, } static struct sk_buff * -ctrl_build_mcgrp_msg(struct genl_family *family, +ctrl_build_mcgrp_msg(const struct genl_family *family, const struct genl_multicast_group *grp, int grp_id, u32 portid, int seq, u8 cmd) { @@ -892,7 +838,7 @@ static const struct nla_policy ctrl_policy[CTRL_ATTR_MAX+1] = { static int ctrl_getfamily(struct sk_buff *skb, struct genl_info *info) { struct sk_buff *msg; - struct genl_family *res = NULL; + const struct genl_family *res = NULL; int err = -EINVAL; if (info->attrs[CTRL_ATTR_FAMILY_ID]) { @@ -936,7 +882,7 @@ static int ctrl_getfamily(struct sk_buff *skb, struct genl_info *info) return genlmsg_reply(msg, info); } -static int genl_ctrl_event(int event, struct genl_family *family, +static int genl_ctrl_event(int event, const struct genl_family *family, const struct genl_multicast_group *grp, int grp_id) { @@ -990,27 +936,39 @@ static const struct genl_multicast_group genl_ctrl_groups[] = { { .name = "notify", }, }; +static struct genl_family genl_ctrl __ro_after_init = { + .module = THIS_MODULE, + .ops = genl_ctrl_ops, + .n_ops = ARRAY_SIZE(genl_ctrl_ops), + .mcgrps = genl_ctrl_groups, + .n_mcgrps = ARRAY_SIZE(genl_ctrl_groups), + .id = GENL_ID_CTRL, + .name = "nlctrl", + .version = 0x2, + .maxattr = CTRL_ATTR_MAX, + .netnsok = true, +}; + static int genl_bind(struct net *net, int group) { - int i, err = -ENOENT; + struct genl_family *f; + int err = -ENOENT; + unsigned int id; down_read(&cb_lock); - for (i = 0; i < GENL_FAM_TAB_SIZE; i++) { - struct genl_family *f; - - list_for_each_entry(f, genl_family_chain(i), family_list) { - if (group >= f->mcgrp_offset && - group < f->mcgrp_offset + f->n_mcgrps) { - int fam_grp = group - f->mcgrp_offset; - - if (!f->netnsok && net != &init_net) - err = -ENOENT; - else if (f->mcast_bind) - err = f->mcast_bind(net, fam_grp); - else - err = 0; - break; - } + + idr_for_each_entry(&genl_fam_idr, f, id) { + if (group >= f->mcgrp_offset && + group < f->mcgrp_offset + f->n_mcgrps) { + int fam_grp = group - f->mcgrp_offset; + + if (!f->netnsok && net != &init_net) + err = -ENOENT; + else if (f->mcast_bind) + err = f->mcast_bind(net, fam_grp); + else + err = 0; + break; } } up_read(&cb_lock); @@ -1020,21 +978,19 @@ static int genl_bind(struct net *net, int group) static void genl_unbind(struct net *net, int group) { - int i; + struct genl_family *f; + unsigned int id; down_read(&cb_lock); - for (i = 0; i < GENL_FAM_TAB_SIZE; i++) { - struct genl_family *f; - list_for_each_entry(f, genl_family_chain(i), family_list) { - if (group >= f->mcgrp_offset && - group < f->mcgrp_offset + f->n_mcgrps) { - int fam_grp = group - f->mcgrp_offset; + idr_for_each_entry(&genl_fam_idr, f, id) { + if (group >= f->mcgrp_offset && + group < f->mcgrp_offset + f->n_mcgrps) { + int fam_grp = group - f->mcgrp_offset; - if (f->mcast_unbind) - f->mcast_unbind(net, fam_grp); - break; - } + if (f->mcast_unbind) + f->mcast_unbind(net, fam_grp); + break; } } up_read(&cb_lock); @@ -1074,13 +1030,9 @@ static struct pernet_operations genl_pernet_ops = { static int __init genl_init(void) { - int i, err; - - for (i = 0; i < GENL_FAM_TAB_SIZE; i++) - INIT_LIST_HEAD(&family_ht[i]); + int err; - err = genl_register_family_with_ops_groups(&genl_ctrl, genl_ctrl_ops, - genl_ctrl_groups); + err = genl_register_family(&genl_ctrl); if (err < 0) goto problem; @@ -1096,6 +1048,25 @@ problem: subsys_initcall(genl_init); +/** + * genl_family_attrbuf - return family's attrbuf + * @family: the family + * + * Return the family's attrbuf, while validating that it's + * actually valid to access it. + * + * You cannot use this function with a family that has parallel_ops + * and you can only use it within (pre/post) doit/dumpit callbacks. + */ +struct nlattr **genl_family_attrbuf(const struct genl_family *family) +{ + if (!WARN_ON(family->parallel_ops)) + lockdep_assert_held(&genl_mutex); + + return family->attrbuf; +} +EXPORT_SYMBOL(genl_family_attrbuf); + static int genlmsg_mcast(struct sk_buff *skb, u32 portid, unsigned long group, gfp_t flags) { @@ -1125,8 +1096,9 @@ static int genlmsg_mcast(struct sk_buff *skb, u32 portid, unsigned long group, return err; } -int genlmsg_multicast_allns(struct genl_family *family, struct sk_buff *skb, - u32 portid, unsigned int group, gfp_t flags) +int genlmsg_multicast_allns(const struct genl_family *family, + struct sk_buff *skb, u32 portid, + unsigned int group, gfp_t flags) { if (WARN_ON_ONCE(group >= family->n_mcgrps)) return -EINVAL; @@ -1135,7 +1107,7 @@ int genlmsg_multicast_allns(struct genl_family *family, struct sk_buff *skb, } EXPORT_SYMBOL(genlmsg_multicast_allns); -void genl_notify(struct genl_family *family, struct sk_buff *skb, +void genl_notify(const struct genl_family *family, struct sk_buff *skb, struct genl_info *info, u32 group, gfp_t flags) { struct net *net = genl_info_net(info); diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index ea023b35f1c2..03f3d5c7beb8 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -38,14 +38,7 @@ static const struct genl_multicast_group nfc_genl_mcgrps[] = { { .name = NFC_GENL_MCAST_EVENT_NAME, }, }; -static struct genl_family nfc_genl_family = { - .id = GENL_ID_GENERATE, - .hdrsize = 0, - .name = NFC_GENL_NAME, - .version = NFC_GENL_VERSION, - .maxattr = NFC_ATTR_MAX, -}; - +static struct genl_family nfc_genl_family; static const struct nla_policy nfc_genl_policy[NFC_ATTR_MAX + 1] = { [NFC_ATTR_DEVICE_INDEX] = { .type = NLA_U32 }, [NFC_ATTR_DEVICE_NAME] = { .type = NLA_STRING, @@ -120,21 +113,20 @@ nla_put_failure: static struct nfc_dev *__get_device_from_cb(struct netlink_callback *cb) { + struct nlattr **attrbuf = genl_family_attrbuf(&nfc_genl_family); struct nfc_dev *dev; int rc; u32 idx; rc = nlmsg_parse(cb->nlh, GENL_HDRLEN + nfc_genl_family.hdrsize, - nfc_genl_family.attrbuf, - nfc_genl_family.maxattr, - nfc_genl_policy); + attrbuf, nfc_genl_family.maxattr, nfc_genl_policy); if (rc < 0) return ERR_PTR(rc); - if (!nfc_genl_family.attrbuf[NFC_ATTR_DEVICE_INDEX]) + if (!attrbuf[NFC_ATTR_DEVICE_INDEX]) return ERR_PTR(-EINVAL); - idx = nla_get_u32(nfc_genl_family.attrbuf[NFC_ATTR_DEVICE_INDEX]); + idx = nla_get_u32(attrbuf[NFC_ATTR_DEVICE_INDEX]); dev = nfc_get_device(idx); if (!dev) @@ -1754,6 +1746,18 @@ static const struct genl_ops nfc_genl_ops[] = { }, }; +static struct genl_family nfc_genl_family __ro_after_init = { + .hdrsize = 0, + .name = NFC_GENL_NAME, + .version = NFC_GENL_VERSION, + .maxattr = NFC_ATTR_MAX, + .module = THIS_MODULE, + .ops = nfc_genl_ops, + .n_ops = ARRAY_SIZE(nfc_genl_ops), + .mcgrps = nfc_genl_mcgrps, + .n_mcgrps = ARRAY_SIZE(nfc_genl_mcgrps), +}; + struct urelease_work { struct work_struct w; @@ -1839,9 +1843,7 @@ int __init nfc_genl_init(void) { int rc; - rc = genl_register_family_with_ops_groups(&nfc_genl_family, - nfc_genl_ops, - nfc_genl_mcgrps); + rc = genl_register_family(&nfc_genl_family); if (rc) return rc; diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 863e992dfbc0..1105c4e29c62 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -62,7 +62,8 @@ struct ovs_frag_data { struct vport *vport; struct ovs_skb_cb cb; __be16 inner_protocol; - __u16 vlan_tci; + u16 network_offset; /* valid only for MPLS */ + u16 vlan_tci; __be16 vlan_proto; unsigned int l2_len; u8 l2_data[MAX_L2_LEN]; @@ -160,7 +161,7 @@ static void update_ethertype(struct sk_buff *skb, struct ethhdr *hdr, static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key, const struct ovs_action_push_mpls *mpls) { - __be32 *new_mpls_lse; + struct mpls_shim_hdr *new_mpls_lse; /* Networking stack do not allow simultaneous Tunnel and MPLS GSO. */ if (skb->encapsulation) @@ -180,8 +181,8 @@ static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key, skb_reset_mac_header(skb); skb_set_network_header(skb, skb->mac_len); - new_mpls_lse = (__be32 *)skb_mpls_header(skb); - *new_mpls_lse = mpls->mpls_lse; + new_mpls_lse = mpls_hdr(skb); + new_mpls_lse->label_stack_entry = mpls->mpls_lse; skb_postpush_rcsum(skb, new_mpls_lse, MPLS_HLEN); @@ -202,7 +203,7 @@ static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key, if (unlikely(err)) return err; - skb_postpull_rcsum(skb, skb_mpls_header(skb), MPLS_HLEN); + skb_postpull_rcsum(skb, mpls_hdr(skb), MPLS_HLEN); memmove(skb_mac_header(skb) + MPLS_HLEN, skb_mac_header(skb), skb->mac_len); @@ -211,10 +212,10 @@ static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key, skb_reset_mac_header(skb); skb_set_network_header(skb, skb->mac_len); - /* skb_mpls_header() is used to locate the ethertype - * field correctly in the presence of VLAN tags. + /* mpls_hdr() is used to locate the ethertype field correctly in the + * presence of VLAN tags. */ - hdr = (struct ethhdr *)(skb_mpls_header(skb) - ETH_HLEN); + hdr = (struct ethhdr *)((void *)mpls_hdr(skb) - ETH_HLEN); update_ethertype(skb, hdr, ethertype); if (eth_p_mpls(skb->protocol)) skb->protocol = ethertype; @@ -226,7 +227,7 @@ static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key, static int set_mpls(struct sk_buff *skb, struct sw_flow_key *flow_key, const __be32 *mpls_lse, const __be32 *mask) { - __be32 *stack; + struct mpls_shim_hdr *stack; __be32 lse; int err; @@ -234,16 +235,16 @@ static int set_mpls(struct sk_buff *skb, struct sw_flow_key *flow_key, if (unlikely(err)) return err; - stack = (__be32 *)skb_mpls_header(skb); - lse = OVS_MASKED(*stack, *mpls_lse, *mask); + stack = mpls_hdr(skb); + lse = OVS_MASKED(stack->label_stack_entry, *mpls_lse, *mask); if (skb->ip_summed == CHECKSUM_COMPLETE) { - __be32 diff[] = { ~(*stack), lse }; + __be32 diff[] = { ~(stack->label_stack_entry), lse }; skb->csum = ~csum_partial((char *)diff, sizeof(diff), ~skb->csum); } - *stack = lse; + stack->label_stack_entry = lse; flow_key->mpls.top_lse = lse; return 0; } @@ -666,6 +667,12 @@ static int ovs_vport_output(struct net *net, struct sock *sk, struct sk_buff *sk skb_postpush_rcsum(skb, skb->data, data->l2_len); skb_reset_mac_header(skb); + if (eth_p_mpls(skb->protocol)) { + skb->inner_network_header = skb->network_header; + skb_set_network_header(skb, data->network_offset); + skb_reset_mac_len(skb); + } + ovs_vport_send(vport, skb); return 0; } @@ -684,7 +691,8 @@ static struct dst_ops ovs_dst_ops = { /* prepare_frag() is called once per (larger-than-MTU) frame; its inverse is * ovs_vport_output(), which is called once per fragmented packet. */ -static void prepare_frag(struct vport *vport, struct sk_buff *skb) +static void prepare_frag(struct vport *vport, struct sk_buff *skb, + u16 orig_network_offset) { unsigned int hlen = skb_network_offset(skb); struct ovs_frag_data *data; @@ -694,6 +702,7 @@ static void prepare_frag(struct vport *vport, struct sk_buff *skb) data->vport = vport; data->cb = *OVS_CB(skb); data->inner_protocol = skb->inner_protocol; + data->network_offset = orig_network_offset; data->vlan_tci = skb->vlan_tci; data->vlan_proto = skb->vlan_proto; data->l2_len = hlen; @@ -706,6 +715,13 @@ static void prepare_frag(struct vport *vport, struct sk_buff *skb) static void ovs_fragment(struct net *net, struct vport *vport, struct sk_buff *skb, u16 mru, __be16 ethertype) { + u16 orig_network_offset = 0; + + if (eth_p_mpls(skb->protocol)) { + orig_network_offset = skb_network_offset(skb); + skb->network_header = skb->inner_network_header; + } + if (skb_network_offset(skb) > MAX_L2_LEN) { OVS_NLERR(1, "L2 header too long to fragment"); goto err; @@ -715,7 +731,7 @@ static void ovs_fragment(struct net *net, struct vport *vport, struct dst_entry ovs_dst; unsigned long orig_dst; - prepare_frag(vport, skb); + prepare_frag(vport, skb, orig_network_offset); dst_init(&ovs_dst, &ovs_dst_ops, NULL, 1, DST_OBSOLETE_NONE, DST_NOCOUNT); ovs_dst.dev = vport->dev; @@ -735,7 +751,7 @@ static void ovs_fragment(struct net *net, struct vport *vport, goto err; } - prepare_frag(vport, skb); + prepare_frag(vport, skb, orig_network_offset); memset(&ovs_rt, 0, sizeof(ovs_rt)); dst_init(&ovs_rt.dst, &ovs_dst_ops, NULL, 1, DST_OBSOLETE_NONE, DST_NOCOUNT); diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 4d67ea856067..fa8760176b7d 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -59,7 +59,6 @@ #include "vport-netdev.h" int ovs_net_id __read_mostly; -EXPORT_SYMBOL_GPL(ovs_net_id); static struct genl_family dp_packet_genl_family; static struct genl_family dp_flow_genl_family; @@ -131,7 +130,6 @@ int lockdep_ovsl_is_held(void) else return 1; } -EXPORT_SYMBOL_GPL(lockdep_ovsl_is_held); #endif static struct vport *new_vport(const struct vport_parms *); @@ -672,8 +670,7 @@ static const struct genl_ops dp_packet_genl_ops[] = { } }; -static struct genl_family dp_packet_genl_family = { - .id = GENL_ID_GENERATE, +static struct genl_family dp_packet_genl_family __ro_after_init = { .hdrsize = sizeof(struct ovs_header), .name = OVS_PACKET_FAMILY, .version = OVS_PACKET_VERSION, @@ -682,6 +679,7 @@ static struct genl_family dp_packet_genl_family = { .parallel_ops = true, .ops = dp_packet_genl_ops, .n_ops = ARRAY_SIZE(dp_packet_genl_ops), + .module = THIS_MODULE, }; static void get_dp_stats(const struct datapath *dp, struct ovs_dp_stats *stats, @@ -1437,8 +1435,7 @@ static const struct genl_ops dp_flow_genl_ops[] = { }, }; -static struct genl_family dp_flow_genl_family = { - .id = GENL_ID_GENERATE, +static struct genl_family dp_flow_genl_family __ro_after_init = { .hdrsize = sizeof(struct ovs_header), .name = OVS_FLOW_FAMILY, .version = OVS_FLOW_VERSION, @@ -1449,6 +1446,7 @@ static struct genl_family dp_flow_genl_family = { .n_ops = ARRAY_SIZE(dp_flow_genl_ops), .mcgrps = &ovs_dp_flow_multicast_group, .n_mcgrps = 1, + .module = THIS_MODULE, }; static size_t ovs_dp_cmd_msg_size(void) @@ -1823,8 +1821,7 @@ static const struct genl_ops dp_datapath_genl_ops[] = { }, }; -static struct genl_family dp_datapath_genl_family = { - .id = GENL_ID_GENERATE, +static struct genl_family dp_datapath_genl_family __ro_after_init = { .hdrsize = sizeof(struct ovs_header), .name = OVS_DATAPATH_FAMILY, .version = OVS_DATAPATH_VERSION, @@ -1835,6 +1832,7 @@ static struct genl_family dp_datapath_genl_family = { .n_ops = ARRAY_SIZE(dp_datapath_genl_ops), .mcgrps = &ovs_dp_datapath_multicast_group, .n_mcgrps = 1, + .module = THIS_MODULE, }; /* Called with ovs_mutex or RCU read lock. */ @@ -2245,8 +2243,7 @@ static const struct genl_ops dp_vport_genl_ops[] = { }, }; -struct genl_family dp_vport_genl_family = { - .id = GENL_ID_GENERATE, +struct genl_family dp_vport_genl_family __ro_after_init = { .hdrsize = sizeof(struct ovs_header), .name = OVS_VPORT_FAMILY, .version = OVS_VPORT_VERSION, @@ -2257,6 +2254,7 @@ struct genl_family dp_vport_genl_family = { .n_ops = ARRAY_SIZE(dp_vport_genl_ops), .mcgrps = &ovs_dp_vport_multicast_group, .n_mcgrps = 1, + .module = THIS_MODULE, }; static struct genl_family * const dp_genl_families[] = { @@ -2274,7 +2272,7 @@ static void dp_unregister_genl(int n_families) genl_unregister_family(dp_genl_families[i]); } -static int dp_register_genl(void) +static int __init dp_register_genl(void) { int err; int i; diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 634cc10d6dee..22087062bd10 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -343,7 +343,7 @@ static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key) key->eth.cvlan.tci = 0; key->eth.cvlan.tpid = 0; - if (likely(skb_vlan_tag_present(skb))) { + if (skb_vlan_tag_present(skb)) { key->eth.vlan.tci = htons(skb->vlan_tci); key->eth.vlan.tpid = skb->vlan_proto; } else { @@ -633,12 +633,7 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) } else if (eth_p_mpls(key->eth.type)) { size_t stack_len = MPLS_HLEN; - /* In the presence of an MPLS label stack the end of the L2 - * header and the beginning of the L3 header differ. - * - * Advance network_header to the beginning of the L3 - * header. mac_len corresponds to the end of the L2 header. - */ + skb_set_inner_network_header(skb, skb->mac_len); while (1) { __be32 lse; @@ -646,12 +641,12 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) if (unlikely(error)) return 0; - memcpy(&lse, skb_network_header(skb), MPLS_HLEN); + memcpy(&lse, skb_inner_network_header(skb), MPLS_HLEN); if (stack_len == MPLS_HLEN) memcpy(&key->mpls.top_lse, &lse, MPLS_HLEN); - skb_set_network_header(skb, skb->mac_len + stack_len); + skb_set_inner_network_header(skb, skb->mac_len + stack_len); if (lse & htonl(MPLS_LS_S_MASK)) break; diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c index 95c36147a6e1..d5d6caecd072 100644 --- a/net/openvswitch/vport-internal_dev.c +++ b/net/openvswitch/vport-internal_dev.c @@ -89,15 +89,6 @@ static const struct ethtool_ops internal_dev_ethtool_ops = { .get_link = ethtool_op_get_link, }; -static int internal_dev_change_mtu(struct net_device *netdev, int new_mtu) -{ - if (new_mtu < 68) - return -EINVAL; - - netdev->mtu = new_mtu; - return 0; -} - static void internal_dev_destructor(struct net_device *dev) { struct vport *vport = ovs_internal_dev_get_vport(dev); @@ -148,7 +139,6 @@ static const struct net_device_ops internal_dev_netdev_ops = { .ndo_stop = internal_dev_stop, .ndo_start_xmit = internal_dev_xmit, .ndo_set_mac_address = eth_mac_addr, - .ndo_change_mtu = internal_dev_change_mtu, .ndo_get_stats64 = internal_get_stats, .ndo_set_rx_headroom = internal_set_rx_headroom, }; @@ -176,7 +166,7 @@ static void do_setup(struct net_device *netdev) netdev->vlan_features = netdev->features; netdev->hw_enc_features = netdev->features; - netdev->features |= NETIF_F_HW_VLAN_CTAG_TX; + netdev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX; netdev->hw_features = netdev->features & ~NETIF_F_LLTX; eth_hw_addr_random(netdev); diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c index 4e3972344aa6..e825753de1e0 100644 --- a/net/openvswitch/vport-netdev.c +++ b/net/openvswitch/vport-netdev.c @@ -162,7 +162,6 @@ void ovs_netdev_detach_dev(struct vport *vport) netdev_master_upper_dev_get(vport->dev)); dev_set_promiscuity(vport->dev, -1); } -EXPORT_SYMBOL_GPL(ovs_netdev_detach_dev); static void netdev_destroy(struct vport *vport) { diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 8f198437c724..9bb85b35a1fb 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -463,29 +463,13 @@ int ovs_vport_receive(struct vport *vport, struct sk_buff *skb, ovs_dp_process_packet(skb, &key); return 0; } -EXPORT_SYMBOL_GPL(ovs_vport_receive); - -static void free_vport_rcu(struct rcu_head *rcu) -{ - struct vport *vport = container_of(rcu, struct vport, rcu); - - ovs_vport_free(vport); -} - -void ovs_vport_deferred_free(struct vport *vport) -{ - if (!vport) - return; - - call_rcu(&vport->rcu, free_vport_rcu); -} -EXPORT_SYMBOL_GPL(ovs_vport_deferred_free); static unsigned int packet_length(const struct sk_buff *skb) { unsigned int length = skb->len - ETH_HLEN; - if (skb_vlan_tagged(skb)) + if (!skb_vlan_tag_present(skb) && + eth_type_vlan(skb->protocol)) length -= VLAN_HLEN; /* Don't subtract for multiple VLAN tags. Most (all?) drivers allow diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h index f01f28a567ad..46e5b69927c7 100644 --- a/net/openvswitch/vport.h +++ b/net/openvswitch/vport.h @@ -149,7 +149,6 @@ struct vport_ops { struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *, const struct vport_parms *); void ovs_vport_free(struct vport *); -void ovs_vport_deferred_free(struct vport *vport); #define VPORT_ALIGN 8 diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 33a4697d5539..11db0d619c00 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3952,6 +3952,7 @@ static int packet_notifier(struct notifier_block *this, } if (msg == NETDEV_UNREGISTER) { packet_cached_dev_reset(po); + fanout_release(sk); po->ifindex = -1; if (po->prot_hook.dev) dev_put(po->prot_hook.dev); diff --git a/net/phonet/pep-gprs.c b/net/phonet/pep-gprs.c index fa8237fdc57b..21c28b51be94 100644 --- a/net/phonet/pep-gprs.c +++ b/net/phonet/pep-gprs.c @@ -217,20 +217,10 @@ static netdev_tx_t gprs_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_OK; } -static int gprs_set_mtu(struct net_device *dev, int new_mtu) -{ - if ((new_mtu < 576) || (new_mtu > (PHONET_MAX_MTU - 11))) - return -EINVAL; - - dev->mtu = new_mtu; - return 0; -} - static const struct net_device_ops gprs_netdev_ops = { .ndo_open = gprs_open, .ndo_stop = gprs_close, .ndo_start_xmit = gprs_xmit, - .ndo_change_mtu = gprs_set_mtu, }; static void gprs_setup(struct net_device *dev) @@ -239,6 +229,8 @@ static void gprs_setup(struct net_device *dev) dev->type = ARPHRD_PHONET_PIPE; dev->flags = IFF_POINTOPOINT | IFF_NOARP; dev->mtu = GPRS_DEFAULT_MTU; + dev->min_mtu = 576; + dev->max_mtu = (PHONET_MAX_MTU - 11); dev->hard_header_len = 0; dev->addr_len = 0; dev->tx_queue_len = 10; diff --git a/net/rds/connection.c b/net/rds/connection.c index f5058559bb08..13f459dad4ef 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -689,21 +689,6 @@ void rds_conn_connect_if_down(struct rds_connection *conn) } EXPORT_SYMBOL_GPL(rds_conn_connect_if_down); -/* - * An error occurred on the connection - */ -void -__rds_conn_error(struct rds_connection *conn, const char *fmt, ...) -{ - va_list ap; - - va_start(ap, fmt); - vprintk(fmt, ap); - va_end(ap); - - rds_conn_drop(conn); -} - void __rds_conn_path_error(struct rds_conn_path *cp, const char *fmt, ...) { diff --git a/net/rds/ib.c b/net/rds/ib.c index 7eaf887e46f8..5680d90b0b77 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -160,7 +160,7 @@ static void rds_ib_add_one(struct ib_device *device) rds_ibdev->max_responder_resources = device->attrs.max_qp_rd_atom; rds_ibdev->dev = device; - rds_ibdev->pd = ib_alloc_pd(device); + rds_ibdev->pd = ib_alloc_pd(device, 0); if (IS_ERR(rds_ibdev->pd)) { rds_ibdev->pd = NULL; goto put_dev; diff --git a/net/rds/rds.h b/net/rds/rds.h index fd0bccb2f9f9..25532a46602f 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -683,10 +683,6 @@ void rds_for_each_conn_info(struct socket *sock, unsigned int len, struct rds_info_lengths *lens, int (*visitor)(struct rds_connection *, void *), size_t item_len); -__printf(2, 3) -void __rds_conn_error(struct rds_connection *conn, const char *, ...); -#define rds_conn_error(conn, fmt...) \ - __rds_conn_error(conn, KERN_WARNING "RDS: " fmt) __printf(2, 3) void __rds_conn_path_error(struct rds_conn_path *cp, const char *, ...); diff --git a/net/rds/threads.c b/net/rds/threads.c index e42df11bf30a..e36e333a0aa0 100644 --- a/net/rds/threads.c +++ b/net/rds/threads.c @@ -171,8 +171,7 @@ void rds_connect_worker(struct work_struct *work) RDS_CONN_DOWN)) rds_queue_reconnect(cp); else - rds_conn_path_error(cp, - "RDS: connect failed\n"); + rds_conn_path_error(cp, "connect failed\n"); } } } diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 8dbf7bed2cc4..2d59c9be40e1 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -136,7 +136,8 @@ static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len) struct sockaddr_rxrpc *srx = (struct sockaddr_rxrpc *)saddr; struct sock *sk = sock->sk; struct rxrpc_local *local; - struct rxrpc_sock *rx = rxrpc_sk(sk), *prx; + struct rxrpc_sock *rx = rxrpc_sk(sk); + u16 service_id = srx->srx_service; int ret; _enter("%p,%p,%d", rx, saddr, len); @@ -160,15 +161,12 @@ static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len) goto error_unlock; } - if (rx->srx.srx_service) { + if (service_id) { write_lock(&local->services_lock); - hlist_for_each_entry(prx, &local->services, listen_link) { - if (prx->srx.srx_service == rx->srx.srx_service) - goto service_in_use; - } - + if (rcu_access_pointer(local->service)) + goto service_in_use; rx->local = local; - hlist_add_head_rcu(&rx->listen_link, &local->services); + rcu_assign_pointer(local->service, rx); write_unlock(&local->services_lock); rx->sk.sk_state = RXRPC_SERVER_BOUND; @@ -599,7 +597,6 @@ static int rxrpc_create(struct net *net, struct socket *sock, int protocol, rx->family = protocol; rx->calls = RB_ROOT; - INIT_HLIST_NODE(&rx->listen_link); spin_lock_init(&rx->incoming_lock); INIT_LIST_HEAD(&rx->sock_calls); INIT_LIST_HEAD(&rx->to_be_accepted); @@ -681,11 +678,9 @@ static int rxrpc_release_sock(struct sock *sk) sk->sk_state = RXRPC_CLOSE; spin_unlock_bh(&sk->sk_receive_queue.lock); - ASSERTCMP(rx->listen_link.next, !=, LIST_POISON1); - - if (!hlist_unhashed(&rx->listen_link)) { + if (rx->local && rcu_access_pointer(rx->local->service) == rx) { write_lock(&rx->local->services_lock); - hlist_del_rcu(&rx->listen_link); + rcu_assign_pointer(rx->local->service, NULL); write_unlock(&rx->local->services_lock); } diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index ca96e547cb9a..f60e35576526 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -93,7 +93,6 @@ struct rxrpc_sock { rxrpc_notify_new_call_t notify_new_call; /* Func to notify of new call */ rxrpc_discard_new_call_t discard_new_call; /* Func to discard a new call */ struct rxrpc_local *local; /* local endpoint */ - struct hlist_node listen_link; /* link in the local endpoint's listen list */ struct rxrpc_backlog *backlog; /* Preallocation for services */ spinlock_t incoming_lock; /* Incoming call vs service shutdown lock */ struct list_head sock_calls; /* List of calls owned by this socket */ @@ -145,9 +144,7 @@ struct rxrpc_skb_priv { u8 nr_jumbo; /* Number of jumbo subpackets */ }; union { - unsigned int offset; /* offset into buffer of next read */ int remain; /* amount of space remaining for next write */ - u32 error; /* network error code */ }; struct rxrpc_host_header hdr; /* RxRPC packet header from this packet */ @@ -216,7 +213,7 @@ struct rxrpc_local { struct list_head link; struct socket *socket; /* my UDP socket */ struct work_struct processor; - struct hlist_head services; /* services listening on this endpoint */ + struct rxrpc_sock __rcu *service; /* Service(s) listening on this endpoint */ struct rw_semaphore defrag_sem; /* control re-enablement of IP DF bit */ struct sk_buff_head reject_queue; /* packets awaiting rejection */ struct sk_buff_head event_queue; /* endpoint event packets awaiting processing */ @@ -401,6 +398,7 @@ enum rxrpc_call_flag { RXRPC_CALL_EXPOSED, /* The call was exposed to the world */ RXRPC_CALL_RX_LAST, /* Received the last packet (at rxtx_top) */ RXRPC_CALL_TX_LAST, /* Last packet in Tx buffer (at rxtx_top) */ + RXRPC_CALL_SEND_PING, /* A ping will need to be sent */ RXRPC_CALL_PINGING, /* Ping in process */ RXRPC_CALL_RETRANS_TIMEOUT, /* Retransmission due to timeout occurred */ }; @@ -413,6 +411,7 @@ enum rxrpc_call_event { RXRPC_CALL_EV_ABORT, /* need to generate abort */ RXRPC_CALL_EV_TIMER, /* Timer expired */ RXRPC_CALL_EV_RESEND, /* Tx resend required */ + RXRPC_CALL_EV_PING, /* Ping send required */ }; /* @@ -467,9 +466,10 @@ struct rxrpc_call { struct rxrpc_connection *conn; /* connection carrying call */ struct rxrpc_peer *peer; /* Peer record for remote address */ struct rxrpc_sock __rcu *socket; /* socket responsible */ - unsigned long ack_at; /* When deferred ACK needs to happen */ - unsigned long resend_at; /* When next resend needs to happen */ - unsigned long expire_at; /* When the call times out */ + ktime_t ack_at; /* When deferred ACK needs to happen */ + ktime_t resend_at; /* When next resend needs to happen */ + ktime_t ping_at; /* When next to send a ping */ + ktime_t expire_at; /* When the call times out */ struct timer_list timer; /* Combined event timer */ struct work_struct processor; /* Event processor */ rxrpc_notify_rx_t notify_rx; /* kernel service Rx notification function */ @@ -561,8 +561,10 @@ struct rxrpc_call { rxrpc_seq_t ackr_prev_seq; /* previous sequence number received */ rxrpc_seq_t ackr_consumed; /* Highest packet shown consumed */ rxrpc_seq_t ackr_seen; /* Highest packet shown seen */ - rxrpc_serial_t ackr_ping; /* Last ping sent */ - ktime_t ackr_ping_time; /* Time last ping sent */ + + /* ping management */ + rxrpc_serial_t ping_serial; /* Last ping sent */ + ktime_t ping_time; /* Time last ping sent */ /* transmission-phase ACK management */ ktime_t acks_latest_ts; /* Timestamp of latest ACK received */ @@ -603,7 +605,6 @@ enum rxrpc_skb_trace { rxrpc_skb_tx_cleaned, rxrpc_skb_tx_freed, rxrpc_skb_tx_got, - rxrpc_skb_tx_lost, rxrpc_skb_tx_new, rxrpc_skb_tx_rotated, rxrpc_skb_tx_seen, @@ -732,8 +733,10 @@ extern const char rxrpc_rtt_rx_traces[rxrpc_rtt_rx__nr_trace][5]; enum rxrpc_timer_trace { rxrpc_timer_begin, rxrpc_timer_init_for_reply, + rxrpc_timer_init_for_send_reply, rxrpc_timer_expired, rxrpc_timer_set_for_ack, + rxrpc_timer_set_for_ping, rxrpc_timer_set_for_resend, rxrpc_timer_set_for_send, rxrpc_timer__nr_trace @@ -747,6 +750,7 @@ enum rxrpc_propose_ack_trace { rxrpc_propose_ack_ping_for_lost_ack, rxrpc_propose_ack_ping_for_lost_reply, rxrpc_propose_ack_ping_for_params, + rxrpc_propose_ack_processing_op, rxrpc_propose_ack_respond_to_ack, rxrpc_propose_ack_respond_to_ping, rxrpc_propose_ack_retry_tx, @@ -781,7 +785,7 @@ extern const char rxrpc_congest_modes[NR__RXRPC_CONGEST_MODES][10]; extern const char rxrpc_congest_changes[rxrpc_congest__nr_change][9]; extern const char *const rxrpc_pkts[]; -extern const char const rxrpc_ack_names[RXRPC_ACK__INVALID + 1][4]; +extern const char rxrpc_ack_names[RXRPC_ACK__INVALID + 1][4]; #include <trace/events/rxrpc.h> @@ -809,7 +813,8 @@ int rxrpc_reject_call(struct rxrpc_sock *); /* * call_event.c */ -void rxrpc_set_timer(struct rxrpc_call *, enum rxrpc_timer_trace); +void __rxrpc_set_timer(struct rxrpc_call *, enum rxrpc_timer_trace, ktime_t); +void rxrpc_set_timer(struct rxrpc_call *, enum rxrpc_timer_trace, ktime_t); void rxrpc_propose_ACK(struct rxrpc_call *, u8, u16, u32, bool, bool, enum rxrpc_propose_ack_trace); void rxrpc_process_call(struct work_struct *); @@ -1072,8 +1077,9 @@ extern const s8 rxrpc_ack_priority[]; /* * output.c */ -int rxrpc_send_call_packet(struct rxrpc_call *, u8); -int rxrpc_send_data_packet(struct rxrpc_call *, struct sk_buff *); +int rxrpc_send_ack_packet(struct rxrpc_call *, bool); +int rxrpc_send_abort_packet(struct rxrpc_call *); +int rxrpc_send_data_packet(struct rxrpc_call *, struct sk_buff *, bool); void rxrpc_reject_packets(struct rxrpc_local *); /* diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index a8d39d7cf42c..832d854c2d5c 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -331,14 +331,14 @@ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local, struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct rxrpc_sock *rx; struct rxrpc_call *call; + u16 service_id = sp->hdr.serviceId; _enter(""); /* Get the socket providing the service */ - hlist_for_each_entry_rcu_bh(rx, &local->services, listen_link) { - if (rx->srx.srx_service == sp->hdr.serviceId) - goto found_service; - } + rx = rcu_dereference(local->service); + if (rx && service_id == rx->srx.srx_service) + goto found_service; trace_rxrpc_abort("INV", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, RX_INVALID_OPERATION, EOPNOTSUPP); @@ -565,7 +565,7 @@ out_discard: write_unlock_bh(&call->state_lock); write_unlock(&rx->call_lock); if (abort) { - rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT); + rxrpc_send_abort_packet(call); rxrpc_release_call(rx, call); rxrpc_put_call(call, rxrpc_call_put); } diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 0e8478012212..97a17ada4431 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -24,36 +24,99 @@ /* * Set the timer */ -void rxrpc_set_timer(struct rxrpc_call *call, enum rxrpc_timer_trace why) +void __rxrpc_set_timer(struct rxrpc_call *call, enum rxrpc_timer_trace why, + ktime_t now) { - unsigned long t, now = jiffies; - - read_lock_bh(&call->state_lock); + unsigned long t_j, now_j = jiffies; + ktime_t t; + bool queue = false; if (call->state < RXRPC_CALL_COMPLETE) { t = call->expire_at; - if (time_before_eq(t, now)) + if (!ktime_after(t, now)) { + trace_rxrpc_timer(call, why, now, now_j); + queue = true; goto out; + } - if (time_after(call->resend_at, now) && - time_before(call->resend_at, t)) + if (!ktime_after(call->resend_at, now)) { + call->resend_at = call->expire_at; + if (!test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events)) + queue = true; + } else if (ktime_before(call->resend_at, t)) { t = call->resend_at; + } - if (time_after(call->ack_at, now) && - time_before(call->ack_at, t)) + if (!ktime_after(call->ack_at, now)) { + call->ack_at = call->expire_at; + if (!test_and_set_bit(RXRPC_CALL_EV_ACK, &call->events)) + queue = true; + } else if (ktime_before(call->ack_at, t)) { t = call->ack_at; + } - if (call->timer.expires != t || !timer_pending(&call->timer)) { - mod_timer(&call->timer, t); - trace_rxrpc_timer(call, why, now); + if (!ktime_after(call->ping_at, now)) { + call->ping_at = call->expire_at; + if (!test_and_set_bit(RXRPC_CALL_EV_PING, &call->events)) + queue = true; + } else if (ktime_before(call->ping_at, t)) { + t = call->ping_at; + } + + t_j = nsecs_to_jiffies(ktime_to_ns(ktime_sub(t, now))); + t_j += jiffies; + + /* We have to make sure that the calculated jiffies value falls + * at or after the nsec value, or we may loop ceaselessly + * because the timer times out, but we haven't reached the nsec + * timeout yet. + */ + t_j++; + + if (call->timer.expires != t_j || !timer_pending(&call->timer)) { + mod_timer(&call->timer, t_j); + trace_rxrpc_timer(call, why, now, now_j); } } out: + if (queue) + rxrpc_queue_call(call); +} + +/* + * Set the timer + */ +void rxrpc_set_timer(struct rxrpc_call *call, enum rxrpc_timer_trace why, + ktime_t now) +{ + read_lock_bh(&call->state_lock); + __rxrpc_set_timer(call, why, now); read_unlock_bh(&call->state_lock); } /* + * Propose a PING ACK be sent. + */ +static void rxrpc_propose_ping(struct rxrpc_call *call, + bool immediate, bool background) +{ + if (immediate) { + if (background && + !test_and_set_bit(RXRPC_CALL_EV_PING, &call->events)) + rxrpc_queue_call(call); + } else { + ktime_t now = ktime_get_real(); + ktime_t ping_at = ktime_add_ms(now, rxrpc_idle_ack_delay); + + if (ktime_before(ping_at, call->ping_at)) { + call->ping_at = ping_at; + rxrpc_set_timer(call, rxrpc_timer_set_for_ping, now); + } + } +} + +/* * propose an ACK be sent */ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, @@ -62,9 +125,18 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, enum rxrpc_propose_ack_trace why) { enum rxrpc_propose_ack_outcome outcome = rxrpc_propose_ack_use; - unsigned long now, ack_at, expiry = rxrpc_soft_ack_delay; + unsigned int expiry = rxrpc_soft_ack_delay; + ktime_t now, ack_at; s8 prior = rxrpc_ack_priority[ack_reason]; + /* Pings are handled specially because we don't want to accidentally + * lose a ping response by subsuming it into a ping. + */ + if (ack_reason == RXRPC_ACK_PING) { + rxrpc_propose_ping(call, immediate, background); + goto trace; + } + /* Update DELAY, IDLE, REQUESTED and PING_RESPONSE ACK serial * numbers, but we don't alter the timeout. */ @@ -100,7 +172,6 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, expiry = rxrpc_soft_ack_delay; break; - case RXRPC_ACK_PING: case RXRPC_ACK_IDLE: if (rxrpc_idle_ack_delay < expiry) expiry = rxrpc_idle_ack_delay; @@ -111,7 +182,6 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, break; } - now = jiffies; if (test_bit(RXRPC_CALL_EV_ACK, &call->events)) { _debug("already scheduled"); } else if (immediate || expiry == 0) { @@ -120,11 +190,11 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, background) rxrpc_queue_call(call); } else { - ack_at = now + expiry; - _debug("deferred ACK %ld < %ld", expiry, call->ack_at - now); - if (time_before(ack_at, call->ack_at)) { + now = ktime_get_real(); + ack_at = ktime_add_ms(now, expiry); + if (ktime_before(ack_at, call->ack_at)) { call->ack_at = ack_at; - rxrpc_set_timer(call, rxrpc_timer_set_for_ack); + rxrpc_set_timer(call, rxrpc_timer_set_for_ack, now); } } @@ -157,12 +227,12 @@ static void rxrpc_congestion_timeout(struct rxrpc_call *call) /* * Perform retransmission of NAK'd and unack'd packets. */ -static void rxrpc_resend(struct rxrpc_call *call) +static void rxrpc_resend(struct rxrpc_call *call, ktime_t now) { struct rxrpc_skb_priv *sp; struct sk_buff *skb; rxrpc_seq_t cursor, seq, top; - ktime_t now = ktime_get_real(), max_age, oldest, resend_at, ack_ts; + ktime_t max_age, oldest, ack_ts; int ix; u8 annotation, anno_type, retrans = 0, unacked = 0; @@ -212,14 +282,7 @@ static void rxrpc_resend(struct rxrpc_call *call) ktime_to_ns(ktime_sub(skb->tstamp, max_age))); } - resend_at = ktime_add_ms(oldest, rxrpc_resend_timeout); - call->resend_at = jiffies + - nsecs_to_jiffies(ktime_to_ns(ktime_sub(resend_at, now))) + - 1; /* We have to make sure that the calculated jiffies value - * falls at or after the nsec value, or we shall loop - * ceaselessly because the timer times out, but we haven't - * reached the nsec timeout yet. - */ + call->resend_at = ktime_add_ms(oldest, rxrpc_resend_timeout); if (unacked) rxrpc_congestion_timeout(call); @@ -229,14 +292,14 @@ static void rxrpc_resend(struct rxrpc_call *call) * retransmitting data. */ if (!retrans) { - rxrpc_set_timer(call, rxrpc_timer_set_for_resend); + rxrpc_set_timer(call, rxrpc_timer_set_for_resend, now); spin_unlock_bh(&call->lock); ack_ts = ktime_sub(now, call->acks_latest_ts); if (ktime_to_ns(ack_ts) < call->peer->rtt) goto out; rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, 0, true, false, rxrpc_propose_ack_ping_for_lost_ack); - rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK); + rxrpc_send_ack_packet(call, true); goto out; } @@ -256,7 +319,7 @@ static void rxrpc_resend(struct rxrpc_call *call) rxrpc_get_skb(skb, rxrpc_skb_tx_got); spin_unlock_bh(&call->lock); - if (rxrpc_send_data_packet(call, skb) < 0) { + if (rxrpc_send_data_packet(call, skb, true) < 0) { rxrpc_free_skb(skb, rxrpc_skb_tx_freed); return; } @@ -301,7 +364,7 @@ void rxrpc_process_call(struct work_struct *work) { struct rxrpc_call *call = container_of(work, struct rxrpc_call, processor); - unsigned long now; + ktime_t now; rxrpc_see_call(call); @@ -311,38 +374,41 @@ void rxrpc_process_call(struct work_struct *work) recheck_state: if (test_and_clear_bit(RXRPC_CALL_EV_ABORT, &call->events)) { - rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT); + rxrpc_send_abort_packet(call); goto recheck_state; } if (call->state == RXRPC_CALL_COMPLETE) { del_timer_sync(&call->timer); + rxrpc_notify_socket(call); goto out_put; } - now = jiffies; - if (time_after_eq(now, call->expire_at)) { + now = ktime_get_real(); + if (ktime_before(call->expire_at, now)) { rxrpc_abort_call("EXP", call, 0, RX_CALL_TIMEOUT, ETIME); set_bit(RXRPC_CALL_EV_ABORT, &call->events); goto recheck_state; } - if (test_and_clear_bit(RXRPC_CALL_EV_ACK, &call->events) || - time_after_eq(now, call->ack_at)) { - call->ack_at = call->expire_at; + if (test_and_clear_bit(RXRPC_CALL_EV_ACK, &call->events)) { if (call->ackr_reason) { - rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK); + rxrpc_send_ack_packet(call, false); goto recheck_state; } } - if (test_and_clear_bit(RXRPC_CALL_EV_RESEND, &call->events) || - time_after_eq(now, call->resend_at)) { - rxrpc_resend(call); + if (test_and_clear_bit(RXRPC_CALL_EV_PING, &call->events)) { + rxrpc_send_ack_packet(call, true); + goto recheck_state; + } + + if (test_and_clear_bit(RXRPC_CALL_EV_RESEND, &call->events)) { + rxrpc_resend(call, now); goto recheck_state; } - rxrpc_set_timer(call, rxrpc_timer_set_for_resend); + rxrpc_set_timer(call, rxrpc_timer_set_for_resend, now); /* other events may have been raised since we started checking */ if (call->events && call->state < RXRPC_CALL_COMPLETE) { diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index d4b3293b78fa..4353a29f3b57 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -19,11 +19,6 @@ #include <net/af_rxrpc.h> #include "ar-internal.h" -/* - * Maximum lifetime of a call (in jiffies). - */ -unsigned int rxrpc_max_call_lifetime = 60 * HZ; - const char *const rxrpc_call_states[NR__RXRPC_CALL_STATES] = { [RXRPC_CALL_UNINITIALISED] = "Uninit ", [RXRPC_CALL_CLIENT_AWAIT_CONN] = "ClWtConn", @@ -76,10 +71,8 @@ static void rxrpc_call_timer_expired(unsigned long _call) _enter("%d", call->debug_id); - if (call->state < RXRPC_CALL_COMPLETE) { - trace_rxrpc_timer(call, rxrpc_timer_expired, jiffies); - rxrpc_queue_call(call); - } + if (call->state < RXRPC_CALL_COMPLETE) + rxrpc_set_timer(call, rxrpc_timer_expired, ktime_get_real()); } /* @@ -207,14 +200,15 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct sockaddr_rxrpc *srx, */ static void rxrpc_start_call_timer(struct rxrpc_call *call) { - unsigned long expire_at; + ktime_t now = ktime_get_real(), expire_at; - expire_at = jiffies + rxrpc_max_call_lifetime; + expire_at = ktime_add_ms(now, rxrpc_max_call_lifetime); call->expire_at = expire_at; call->ack_at = expire_at; + call->ping_at = expire_at; call->resend_at = expire_at; - call->timer.expires = expire_at + 1; - rxrpc_set_timer(call, rxrpc_timer_begin); + call->timer.expires = jiffies + LONG_MAX / 2; + rxrpc_set_timer(call, rxrpc_timer_begin, now); } /* @@ -505,7 +499,7 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx) struct rxrpc_call, sock_link); rxrpc_get_call(call, rxrpc_call_got); rxrpc_abort_call("SKT", call, 0, RX_CALL_DEAD, ECONNRESET); - rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT); + rxrpc_send_abort_packet(call); rxrpc_release_call(rx, call); rxrpc_put_call(call, rxrpc_call_put); } diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index c76a125df891..60ef9605167e 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -200,7 +200,7 @@ rxrpc_alloc_client_connection(struct rxrpc_conn_parameters *cp, gfp_t gfp) } atomic_set(&conn->usage, 1); - if (conn->params.exclusive) + if (cp->exclusive) __set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags); conn->params = *cp; @@ -576,28 +576,42 @@ static void rxrpc_activate_one_channel(struct rxrpc_connection *conn, } /* + * Assign channels and callNumbers to waiting calls with channel_lock + * held by caller. + */ +static void rxrpc_activate_channels_locked(struct rxrpc_connection *conn) +{ + u8 avail, mask; + + switch (conn->cache_state) { + case RXRPC_CONN_CLIENT_ACTIVE: + mask = RXRPC_ACTIVE_CHANS_MASK; + break; + default: + return; + } + + while (!list_empty(&conn->waiting_calls) && + (avail = ~conn->active_chans, + avail &= mask, + avail != 0)) + rxrpc_activate_one_channel(conn, __ffs(avail)); +} + +/* * Assign channels and callNumbers to waiting calls. */ static void rxrpc_activate_channels(struct rxrpc_connection *conn) { - unsigned char mask; - _enter("%d", conn->debug_id); trace_rxrpc_client(conn, -1, rxrpc_client_activate_chans); - if (conn->cache_state != RXRPC_CONN_CLIENT_ACTIVE || - conn->active_chans == RXRPC_ACTIVE_CHANS_MASK) + if (conn->active_chans == RXRPC_ACTIVE_CHANS_MASK) return; spin_lock(&conn->channel_lock); - - while (!list_empty(&conn->waiting_calls) && - (mask = ~conn->active_chans, - mask &= RXRPC_ACTIVE_CHANS_MASK, - mask != 0)) - rxrpc_activate_one_channel(conn, __ffs(mask)); - + rxrpc_activate_channels_locked(conn); spin_unlock(&conn->channel_lock); _leave(""); } diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 37609ce89f52..3f9d8d7ec632 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -276,7 +276,8 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, return 0; case RXRPC_PACKET_TYPE_ABORT: - if (skb_copy_bits(skb, sp->offset, &wtmp, sizeof(wtmp)) < 0) + if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), + &wtmp, sizeof(wtmp)) < 0) return -EPROTO; abort_code = ntohl(wtmp); _proto("Rx ABORT %%%u { ac=%d }", sp->hdr.serial, abort_code); diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 094720dd1eaf..44fb8d893c7d 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -41,10 +41,10 @@ static void rxrpc_proto_abort(const char *why, */ static void rxrpc_congestion_management(struct rxrpc_call *call, struct sk_buff *skb, - struct rxrpc_ack_summary *summary) + struct rxrpc_ack_summary *summary, + rxrpc_serial_t acked_serial) { enum rxrpc_congest_change change = rxrpc_cong_no_change; - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); unsigned int cumulative_acks = call->cong_cumul_acks; unsigned int cwnd = call->cong_cwnd; bool resend = false; @@ -57,7 +57,7 @@ static void rxrpc_congestion_management(struct rxrpc_call *call, call->cong_ssthresh = max_t(unsigned int, summary->flight_size / 2, 2); cwnd = 1; - if (cwnd > call->cong_ssthresh && + if (cwnd >= call->cong_ssthresh && call->cong_mode == RXRPC_CALL_SLOW_START) { call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE; call->cong_tstamp = skb->tstamp; @@ -82,7 +82,7 @@ static void rxrpc_congestion_management(struct rxrpc_call *call, goto packet_loss_detected; if (summary->cumulative_acks > 0) cwnd += 1; - if (cwnd > call->cong_ssthresh) { + if (cwnd >= call->cong_ssthresh) { call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE; call->cong_tstamp = skb->tstamp; } @@ -161,7 +161,7 @@ resume_normality: call->cong_dup_acks = 0; call->cong_extra = 0; call->cong_tstamp = skb->tstamp; - if (cwnd <= call->cong_ssthresh) + if (cwnd < call->cong_ssthresh) call->cong_mode = RXRPC_CALL_SLOW_START; else call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE; @@ -172,7 +172,7 @@ out_no_clear_ca: cwnd = RXRPC_RXTX_BUFF_SIZE - 1; call->cong_cwnd = cwnd; call->cong_cumul_acks = cumulative_acks; - trace_rxrpc_congest(call, summary, sp->hdr.serial, change); + trace_rxrpc_congest(call, summary, acked_serial, change); if (resend && !test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events)) rxrpc_queue_call(call); return; @@ -328,7 +328,8 @@ static bool rxrpc_receiving_reply(struct rxrpc_call *call) call->resend_at = call->expire_at; call->ack_at = call->expire_at; spin_unlock_bh(&call->lock); - rxrpc_set_timer(call, rxrpc_timer_init_for_reply); + rxrpc_set_timer(call, rxrpc_timer_init_for_reply, + ktime_get_real()); } if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags)) @@ -358,7 +359,7 @@ static bool rxrpc_receiving_reply(struct rxrpc_call *call) static bool rxrpc_validate_jumbo(struct sk_buff *skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - unsigned int offset = sp->offset; + unsigned int offset = sizeof(struct rxrpc_wire_header); unsigned int len = skb->len; int nr_jumbo = 1; u8 flags = sp->hdr.flags; @@ -419,7 +420,7 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb, u16 skew) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - unsigned int offset = sp->offset; + unsigned int offset = sizeof(struct rxrpc_wire_header); unsigned int ix; rxrpc_serial_t serial = sp->hdr.serial, ack_serial = 0; rxrpc_seq_t seq = sp->hdr.seq, hard_ack; @@ -624,9 +625,9 @@ static void rxrpc_input_ping_response(struct rxrpc_call *call, rxrpc_serial_t ping_serial; ktime_t ping_time; - ping_time = call->ackr_ping_time; + ping_time = call->ping_time; smp_rmb(); - ping_serial = call->ackr_ping; + ping_serial = call->ping_serial; if (!test_bit(RXRPC_CALL_PINGING, &call->flags) || before(orig_serial, ping_serial)) @@ -658,6 +659,8 @@ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb, if (rwind > RXRPC_RXTX_BUFF_SIZE - 1) rwind = RXRPC_RXTX_BUFF_SIZE - 1; call->tx_winsize = rwind; + if (call->cong_ssthresh > rwind) + call->cong_ssthresh = rwind; mtu = min(ntohl(ackinfo->rxMTU), ntohl(ackinfo->maxMTU)); @@ -744,15 +747,16 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, } buf; rxrpc_serial_t acked_serial; rxrpc_seq_t first_soft_ack, hard_ack; - int nr_acks, offset; + int nr_acks, offset, ioffset; _enter(""); - if (skb_copy_bits(skb, sp->offset, &buf.ack, sizeof(buf.ack)) < 0) { + offset = sizeof(struct rxrpc_wire_header); + if (skb_copy_bits(skb, offset, &buf.ack, sizeof(buf.ack)) < 0) { _debug("extraction failure"); return rxrpc_proto_abort("XAK", call, 0); } - sp->offset += sizeof(buf.ack); + offset += sizeof(buf.ack); acked_serial = ntohl(buf.ack.serial); first_soft_ack = ntohl(buf.ack.firstPacket); @@ -790,9 +794,9 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, rxrpc_propose_ack_respond_to_ack); } - offset = sp->offset + nr_acks + 3; - if (skb->len >= offset + sizeof(buf.info)) { - if (skb_copy_bits(skb, offset, &buf.info, sizeof(buf.info)) < 0) + ioffset = offset + nr_acks + 3; + if (skb->len >= ioffset + sizeof(buf.info)) { + if (skb_copy_bits(skb, ioffset, &buf.info, sizeof(buf.info)) < 0) return rxrpc_proto_abort("XAI", call, 0); rxrpc_input_ackinfo(call, skb, &buf.info); } @@ -830,7 +834,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, rxrpc_rotate_tx_window(call, hard_ack, &summary); if (nr_acks > 0) { - if (skb_copy_bits(skb, sp->offset, buf.acks, nr_acks) < 0) + if (skb_copy_bits(skb, offset, buf.acks, nr_acks) < 0) return rxrpc_proto_abort("XSA", call, 0); rxrpc_input_soft_acks(call, buf.acks, first_soft_ack, nr_acks, &summary); @@ -843,12 +847,13 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, if (call->rxtx_annotations[call->tx_top & RXRPC_RXTX_BUFF_MASK] & RXRPC_TX_ANNO_LAST && - summary.nr_acks == call->tx_top - hard_ack) + summary.nr_acks == call->tx_top - hard_ack && + rxrpc_is_client_call(call)) rxrpc_propose_ACK(call, RXRPC_ACK_PING, skew, sp->hdr.serial, false, true, rxrpc_propose_ack_ping_for_lost_reply); - return rxrpc_congestion_management(call, skb, &summary); + return rxrpc_congestion_management(call, skb, &summary, acked_serial); } /* @@ -878,7 +883,8 @@ static void rxrpc_input_abort(struct rxrpc_call *call, struct sk_buff *skb) _enter(""); if (skb->len >= 4 && - skb_copy_bits(skb, sp->offset, &wtmp, sizeof(wtmp)) >= 0) + skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), + &wtmp, sizeof(wtmp)) >= 0) abort_code = ntohl(wtmp); _proto("Rx ABORT %%%u { %x }", sp->hdr.serial, abort_code); @@ -933,6 +939,33 @@ static void rxrpc_input_call_packet(struct rxrpc_call *call, } /* + * Handle a new call on a channel implicitly completing the preceding call on + * that channel. + * + * TODO: If callNumber > call_id + 1, renegotiate security. + */ +static void rxrpc_input_implicit_end_call(struct rxrpc_connection *conn, + struct rxrpc_call *call) +{ + switch (call->state) { + case RXRPC_CALL_SERVER_AWAIT_ACK: + rxrpc_call_completed(call); + break; + case RXRPC_CALL_COMPLETE: + break; + default: + if (rxrpc_abort_call("IMP", call, 0, RX_CALL_DEAD, ESHUTDOWN)) { + set_bit(RXRPC_CALL_EV_ABORT, &call->events); + rxrpc_queue_call(call); + } + break; + } + + __rxrpc_disconnect_call(conn, call); + rxrpc_notify_socket(call); +} + +/* * post connection-level events to the connection * - this includes challenges, responses, some aborts and call terminal packet * retransmission. @@ -994,7 +1027,6 @@ int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb) sp->hdr.securityIndex = whdr.securityIndex; sp->hdr._rsvd = ntohs(whdr._rsvd); sp->hdr.serviceId = ntohs(whdr.serviceId); - sp->offset = sizeof(whdr); return 0; } @@ -1141,6 +1173,16 @@ void rxrpc_data_ready(struct sock *udp_sk) } call = rcu_dereference(chan->call); + + if (sp->hdr.callNumber > chan->call_id) { + if (!(sp->hdr.flags & RXRPC_CLIENT_INITIATED)) { + rcu_read_unlock(); + goto reject_packet; + } + if (call) + rxrpc_input_implicit_end_call(conn, call); + call = NULL; + } } else { skew = 0; call = NULL; diff --git a/net/rxrpc/local_event.c b/net/rxrpc/local_event.c index 190f68bd9e27..540d3955c1bc 100644 --- a/net/rxrpc/local_event.c +++ b/net/rxrpc/local_event.c @@ -95,7 +95,8 @@ void rxrpc_process_local_events(struct rxrpc_local *local) switch (sp->hdr.type) { case RXRPC_PACKET_TYPE_VERSION: - if (skb_copy_bits(skb, sp->offset, &v, 1) < 0) + if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), + &v, 1) < 0) return; _proto("Rx VERSION { %02x }", v); if (v == 0) diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index e3fad80b0795..ff4864d550b8 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -86,7 +86,6 @@ static struct rxrpc_local *rxrpc_alloc_local(const struct sockaddr_rxrpc *srx) atomic_set(&local->usage, 1); INIT_LIST_HEAD(&local->link); INIT_WORK(&local->processor, rxrpc_local_processor); - INIT_HLIST_HEAD(&local->services); init_rwsem(&local->defrag_sem); skb_queue_head_init(&local->reject_queue); skb_queue_head_init(&local->event_queue); @@ -292,7 +291,7 @@ static void rxrpc_local_destroyer(struct rxrpc_local *local) mutex_unlock(&rxrpc_local_mutex); ASSERT(RB_EMPTY_ROOT(&local->client_conns)); - ASSERT(hlist_empty(&local->services)); + ASSERT(!local->service); if (socket) { local->socket = NULL; diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index aedb8978226d..6dee55fad2d3 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c @@ -21,28 +21,33 @@ unsigned int rxrpc_max_backlog __read_mostly = 10; /* + * Maximum lifetime of a call (in mx). + */ +unsigned int rxrpc_max_call_lifetime = 60 * 1000; + +/* * How long to wait before scheduling ACK generation after seeing a - * packet with RXRPC_REQUEST_ACK set (in jiffies). + * packet with RXRPC_REQUEST_ACK set (in ms). */ unsigned int rxrpc_requested_ack_delay = 1; /* - * How long to wait before scheduling an ACK with subtype DELAY (in jiffies). + * How long to wait before scheduling an ACK with subtype DELAY (in ms). * * We use this when we've received new data packets. If those packets aren't * all consumed within this time we will send a DELAY ACK if an ACK was not * requested to let the sender know it doesn't need to resend. */ -unsigned int rxrpc_soft_ack_delay = 1 * HZ; +unsigned int rxrpc_soft_ack_delay = 1 * 1000; /* - * How long to wait before scheduling an ACK with subtype IDLE (in jiffies). + * How long to wait before scheduling an ACK with subtype IDLE (in ms). * * We use this when we've consumed some previously soft-ACK'd packets when * further packets aren't immediately received to decide when to send an IDLE * ACK let the other end know that it can free up its Tx buffer space. */ -unsigned int rxrpc_idle_ack_delay = 0.5 * HZ; +unsigned int rxrpc_idle_ack_delay = 0.5 * 1000; /* * Receive window size in packets. This indicates the maximum number of @@ -88,10 +93,9 @@ const s8 rxrpc_ack_priority[] = { [RXRPC_ACK_EXCEEDS_WINDOW] = 6, [RXRPC_ACK_NOSPACE] = 7, [RXRPC_ACK_PING_RESPONSE] = 8, - [RXRPC_ACK_PING] = 9, }; -const char const rxrpc_ack_names[RXRPC_ACK__INVALID + 1][4] = { +const char rxrpc_ack_names[RXRPC_ACK__INVALID + 1][4] = { "---", "REQ", "DUP", "OOS", "WIN", "MEM", "PNG", "PNR", "DLY", "IDL", "-?-" }; @@ -108,7 +112,6 @@ const char rxrpc_skb_traces[rxrpc_skb__nr_trace][7] = { [rxrpc_skb_tx_cleaned] = "Tx CLN", [rxrpc_skb_tx_freed] = "Tx FRE", [rxrpc_skb_tx_got] = "Tx GOT", - [rxrpc_skb_tx_lost] = "Tx *L*", [rxrpc_skb_tx_new] = "Tx NEW", [rxrpc_skb_tx_rotated] = "Tx ROT", [rxrpc_skb_tx_seen] = "Tx SEE", @@ -192,7 +195,9 @@ const char rxrpc_timer_traces[rxrpc_timer__nr_trace][8] = { [rxrpc_timer_begin] = "Begin ", [rxrpc_timer_expired] = "*EXPR*", [rxrpc_timer_init_for_reply] = "IniRpl", + [rxrpc_timer_init_for_send_reply] = "SndRpl", [rxrpc_timer_set_for_ack] = "SetAck", + [rxrpc_timer_set_for_ping] = "SetPng", [rxrpc_timer_set_for_send] = "SetTx ", [rxrpc_timer_set_for_resend] = "SetRTx", }; @@ -203,6 +208,7 @@ const char rxrpc_propose_ack_traces[rxrpc_propose_ack__nr_trace][8] = { [rxrpc_propose_ack_ping_for_lost_ack] = "LostAck", [rxrpc_propose_ack_ping_for_lost_reply] = "LostRpl", [rxrpc_propose_ack_ping_for_params] = "Params ", + [rxrpc_propose_ack_processing_op] = "ProcOp ", [rxrpc_propose_ack_respond_to_ack] = "Rsp2Ack", [rxrpc_propose_ack_respond_to_ping] = "Rsp2Png", [rxrpc_propose_ack_retry_tx] = "RetryTx", diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index cf43a715685e..5dab1ff3a6c2 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -19,26 +19,27 @@ #include <net/af_rxrpc.h> #include "ar-internal.h" -struct rxrpc_pkt_buffer { +struct rxrpc_ack_buffer { struct rxrpc_wire_header whdr; - union { - struct { - struct rxrpc_ackpacket ack; - u8 acks[255]; - u8 pad[3]; - }; - __be32 abort_code; - }; + struct rxrpc_ackpacket ack; + u8 acks[255]; + u8 pad[3]; struct rxrpc_ackinfo ackinfo; }; +struct rxrpc_abort_buffer { + struct rxrpc_wire_header whdr; + __be32 abort_code; +}; + /* * Fill out an ACK packet. */ static size_t rxrpc_fill_out_ack(struct rxrpc_call *call, - struct rxrpc_pkt_buffer *pkt, + struct rxrpc_ack_buffer *pkt, rxrpc_seq_t *_hard_ack, - rxrpc_seq_t *_top) + rxrpc_seq_t *_top, + u8 reason) { rxrpc_serial_t serial; rxrpc_seq_t hard_ack, top, seq; @@ -58,10 +59,10 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_call *call, pkt->ack.firstPacket = htonl(hard_ack + 1); pkt->ack.previousPacket = htonl(call->ackr_prev_seq); pkt->ack.serial = htonl(serial); - pkt->ack.reason = call->ackr_reason; + pkt->ack.reason = reason; pkt->ack.nAcks = top - hard_ack; - if (pkt->ack.reason == RXRPC_ACK_PING) + if (reason == RXRPC_ACK_PING) pkt->whdr.flags |= RXRPC_REQUEST_ACK; if (after(top, hard_ack)) { @@ -91,22 +92,19 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_call *call, } /* - * Send an ACK or ABORT call packet. + * Send an ACK call packet. */ -int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type) +int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping) { struct rxrpc_connection *conn = NULL; - struct rxrpc_pkt_buffer *pkt; + struct rxrpc_ack_buffer *pkt; struct msghdr msg; struct kvec iov[2]; rxrpc_serial_t serial; rxrpc_seq_t hard_ack, top; size_t len, n; - bool ping = false; - int ioc, ret; - u32 abort_code; - - _enter("%u,%s", call->debug_id, rxrpc_pkts[type]); + int ret; + u8 reason; spin_lock_bh(&call->lock); if (call->conn) @@ -131,68 +129,44 @@ int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type) pkt->whdr.cid = htonl(call->cid); pkt->whdr.callNumber = htonl(call->call_id); pkt->whdr.seq = 0; - pkt->whdr.type = type; - pkt->whdr.flags = conn->out_clientflag; + pkt->whdr.type = RXRPC_PACKET_TYPE_ACK; + pkt->whdr.flags = RXRPC_SLOW_START_OK | conn->out_clientflag; pkt->whdr.userStatus = 0; pkt->whdr.securityIndex = call->security_ix; pkt->whdr._rsvd = 0; pkt->whdr.serviceId = htons(call->service_id); - iov[0].iov_base = pkt; - iov[0].iov_len = sizeof(pkt->whdr); - len = sizeof(pkt->whdr); - - switch (type) { - case RXRPC_PACKET_TYPE_ACK: - spin_lock_bh(&call->lock); + spin_lock_bh(&call->lock); + if (ping) { + reason = RXRPC_ACK_PING; + } else { + reason = call->ackr_reason; if (!call->ackr_reason) { spin_unlock_bh(&call->lock); ret = 0; goto out; } - ping = (call->ackr_reason == RXRPC_ACK_PING); - n = rxrpc_fill_out_ack(call, pkt, &hard_ack, &top); call->ackr_reason = 0; + } + n = rxrpc_fill_out_ack(call, pkt, &hard_ack, &top, reason); - spin_unlock_bh(&call->lock); - - - pkt->whdr.flags |= RXRPC_SLOW_START_OK; - - iov[0].iov_len += sizeof(pkt->ack) + n; - iov[1].iov_base = &pkt->ackinfo; - iov[1].iov_len = sizeof(pkt->ackinfo); - len += sizeof(pkt->ack) + n + sizeof(pkt->ackinfo); - ioc = 2; - break; - - case RXRPC_PACKET_TYPE_ABORT: - abort_code = call->abort_code; - pkt->abort_code = htonl(abort_code); - iov[0].iov_len += sizeof(pkt->abort_code); - len += sizeof(pkt->abort_code); - ioc = 1; - break; + spin_unlock_bh(&call->lock); - default: - BUG(); - ret = -ENOANO; - goto out; - } + iov[0].iov_base = pkt; + iov[0].iov_len = sizeof(pkt->whdr) + sizeof(pkt->ack) + n; + iov[1].iov_base = &pkt->ackinfo; + iov[1].iov_len = sizeof(pkt->ackinfo); + len = iov[0].iov_len + iov[1].iov_len; serial = atomic_inc_return(&conn->serial); pkt->whdr.serial = htonl(serial); - switch (type) { - case RXRPC_PACKET_TYPE_ACK: - trace_rxrpc_tx_ack(call, serial, - ntohl(pkt->ack.firstPacket), - ntohl(pkt->ack.serial), - pkt->ack.reason, pkt->ack.nAcks); - break; - } + trace_rxrpc_tx_ack(call, serial, + ntohl(pkt->ack.firstPacket), + ntohl(pkt->ack.serial), + pkt->ack.reason, pkt->ack.nAcks); if (ping) { - call->ackr_ping = serial; + call->ping_serial = serial; smp_wmb(); /* We need to stick a time in before we send the packet in case * the reply gets back before kernel_sendmsg() completes - but @@ -201,19 +175,19 @@ int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type) * the packet transmission is more likely to happen towards the * end of the kernel_sendmsg() call. */ - call->ackr_ping_time = ktime_get_real(); + call->ping_time = ktime_get_real(); set_bit(RXRPC_CALL_PINGING, &call->flags); trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_ping, serial); } - ret = kernel_sendmsg(conn->params.local->socket, - &msg, iov, ioc, len); + + ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len); if (ping) - call->ackr_ping_time = ktime_get_real(); + call->ping_time = ktime_get_real(); - if (type == RXRPC_PACKET_TYPE_ACK && - call->state < RXRPC_CALL_COMPLETE) { + if (call->state < RXRPC_CALL_COMPLETE) { if (ret < 0) { - clear_bit(RXRPC_CALL_PINGING, &call->flags); + if (ping) + clear_bit(RXRPC_CALL_PINGING, &call->flags); rxrpc_propose_ACK(call, pkt->ack.reason, ntohs(pkt->ack.maxSkew), ntohl(pkt->ack.serial), @@ -236,9 +210,60 @@ out: } /* + * Send an ABORT call packet. + */ +int rxrpc_send_abort_packet(struct rxrpc_call *call) +{ + struct rxrpc_connection *conn = NULL; + struct rxrpc_abort_buffer pkt; + struct msghdr msg; + struct kvec iov[1]; + rxrpc_serial_t serial; + int ret; + + spin_lock_bh(&call->lock); + if (call->conn) + conn = rxrpc_get_connection_maybe(call->conn); + spin_unlock_bh(&call->lock); + if (!conn) + return -ECONNRESET; + + msg.msg_name = &call->peer->srx.transport; + msg.msg_namelen = call->peer->srx.transport_len; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_flags = 0; + + pkt.whdr.epoch = htonl(conn->proto.epoch); + pkt.whdr.cid = htonl(call->cid); + pkt.whdr.callNumber = htonl(call->call_id); + pkt.whdr.seq = 0; + pkt.whdr.type = RXRPC_PACKET_TYPE_ABORT; + pkt.whdr.flags = conn->out_clientflag; + pkt.whdr.userStatus = 0; + pkt.whdr.securityIndex = call->security_ix; + pkt.whdr._rsvd = 0; + pkt.whdr.serviceId = htons(call->service_id); + pkt.abort_code = htonl(call->abort_code); + + iov[0].iov_base = &pkt; + iov[0].iov_len = sizeof(pkt); + + serial = atomic_inc_return(&conn->serial); + pkt.whdr.serial = htonl(serial); + + ret = kernel_sendmsg(conn->params.local->socket, + &msg, iov, 1, sizeof(pkt)); + + rxrpc_put_connection(conn); + return ret; +} + +/* * send a packet through the transport endpoint */ -int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb) +int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb, + bool retrans) { struct rxrpc_connection *conn = call->conn; struct rxrpc_wire_header whdr; @@ -247,6 +272,7 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb) struct kvec iov[2]; rxrpc_serial_t serial; size_t len; + bool lost = false; int ret, opt; _enter(",{%d}", skb->len); @@ -281,20 +307,20 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb) /* If our RTT cache needs working on, request an ACK. Also request * ACKs if a DATA packet appears to have been lost. */ - if (call->cong_mode == RXRPC_CALL_FAST_RETRANSMIT || - (call->peer->rtt_usage < 3 && sp->hdr.seq & 1) || - ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), - ktime_get_real())) + if (!(sp->hdr.flags & RXRPC_LAST_PACKET) && + (retrans || + call->cong_mode == RXRPC_CALL_SLOW_START || + (call->peer->rtt_usage < 3 && sp->hdr.seq & 1) || + ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), + ktime_get_real()))) whdr.flags |= RXRPC_REQUEST_ACK; if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) { static int lose; if ((lose++ & 7) == 7) { - trace_rxrpc_tx_data(call, sp->hdr.seq, serial, - whdr.flags, true); - rxrpc_lose_skb(skb, rxrpc_skb_tx_lost); - _leave(" = 0 [lose]"); - return 0; + ret = 0; + lost = true; + goto done; } } @@ -319,7 +345,8 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb) goto send_fragmentable; done: - trace_rxrpc_tx_data(call, sp->hdr.seq, serial, whdr.flags, false); + trace_rxrpc_tx_data(call, sp->hdr.seq, serial, whdr.flags, + retrans, lost); if (ret >= 0) { ktime_t now = ktime_get_real(); skb->tstamp = now; diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 038ae62ddb4d..c29362d50a92 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -143,7 +143,7 @@ static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial) if (call->state == RXRPC_CALL_CLIENT_RECV_REPLY) { rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, serial, true, false, rxrpc_propose_ack_terminal_ack); - rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK); + rxrpc_send_ack_packet(call, false); } write_lock_bh(&call->state_lock); @@ -151,17 +151,21 @@ static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial) switch (call->state) { case RXRPC_CALL_CLIENT_RECV_REPLY: __rxrpc_call_completed(call); + write_unlock_bh(&call->state_lock); break; case RXRPC_CALL_SERVER_RECV_REQUEST: call->tx_phase = true; call->state = RXRPC_CALL_SERVER_ACK_REQUEST; + call->ack_at = call->expire_at; + write_unlock_bh(&call->state_lock); + rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, 0, serial, false, true, + rxrpc_propose_ack_processing_op); break; default: + write_unlock_bh(&call->state_lock); break; } - - write_unlock_bh(&call->state_lock); } /* @@ -212,7 +216,7 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call) true, false, rxrpc_propose_ack_rotate_rx); if (call->ackr_reason) - rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK); + rxrpc_send_ack_packet(call, false); } } @@ -261,15 +265,13 @@ static int rxrpc_locate_data(struct rxrpc_call *call, struct sk_buff *skb, u8 *_annotation, unsigned int *_offset, unsigned int *_len) { - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - unsigned int offset = *_offset; + unsigned int offset = sizeof(struct rxrpc_wire_header); unsigned int len = *_len; int ret; u8 annotation = *_annotation; /* Locate the subpacket */ - offset = sp->offset; - len = skb->len - sp->offset; + len = skb->len - offset; if ((annotation & RXRPC_RX_ANNO_JUMBO) > 0) { offset += (((annotation & RXRPC_RX_ANNO_JUMBO) - 1) * RXRPC_JUMBO_SUBPKTLEN); @@ -654,7 +656,7 @@ excess_data: goto out; call_complete: *_abort = call->abort_code; - ret = call->error; + ret = -call->error; if (call->completion == RXRPC_CALL_SUCCEEDED) { ret = 1; if (size > 0) diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index 88d080a1a3de..4374e7b9c7bf 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -381,7 +381,7 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb, return 0; protocol_error: - rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT); + rxrpc_send_abort_packet(call); _leave(" = -EPROTO"); return -EPROTO; @@ -471,7 +471,7 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, return 0; protocol_error: - rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT); + rxrpc_send_abort_packet(call); _leave(" = -EPROTO"); return -EPROTO; @@ -523,7 +523,7 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb, if (cksum != expected_cksum) { rxrpc_abort_call("VCK", call, seq, RXKADSEALEDINCON, EPROTO); - rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT); + rxrpc_send_abort_packet(call); _leave(" = -EPROTO [csum failed]"); return -EPROTO; } @@ -771,7 +771,8 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn, } abort_code = RXKADPACKETSHORT; - if (skb_copy_bits(skb, sp->offset, &challenge, sizeof(challenge)) < 0) + if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), + &challenge, sizeof(challenge)) < 0) goto protocol_error; version = ntohl(challenge.version); @@ -1028,7 +1029,8 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, _enter("{%d,%x}", conn->debug_id, key_serial(conn->server_key)); abort_code = RXKADPACKETSHORT; - if (skb_copy_bits(skb, sp->offset, &response, sizeof(response)) < 0) + if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), + &response, sizeof(response)) < 0) goto protocol_error; if (!pskb_pull(skb, sizeof(response))) BUG(); @@ -1057,7 +1059,8 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, return -ENOMEM; abort_code = RXKADPACKETSHORT; - if (skb_copy_bits(skb, sp->offset, ticket, ticket_len) < 0) + if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), + ticket, ticket_len) < 0) goto protocol_error_free; ret = rxkad_decrypt_ticket(conn, ticket, ticket_len, &session_key, diff --git a/net/rxrpc/security.c b/net/rxrpc/security.c index 82d8134e9287..7d921e56e715 100644 --- a/net/rxrpc/security.c +++ b/net/rxrpc/security.c @@ -131,10 +131,10 @@ int rxrpc_init_server_conn_security(struct rxrpc_connection *conn) /* find the service */ read_lock(&local->services_lock); - hlist_for_each_entry(rx, &local->services, listen_link) { - if (rx->srx.srx_service == conn->params.service_id) - goto found_service; - } + rx = rcu_dereference_protected(local->service, + lockdep_is_held(&local->services_lock)); + if (rx && rx->srx.srx_service == conn->params.service_id) + goto found_service; /* the service appears to have died */ read_unlock(&local->services_lock); diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 1f8040d82395..b214a4d4a641 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -130,6 +130,11 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb, break; case RXRPC_CALL_SERVER_ACK_REQUEST: call->state = RXRPC_CALL_SERVER_SEND_REPLY; + call->ack_at = call->expire_at; + if (call->ackr_reason == RXRPC_ACK_DELAY) + call->ackr_reason = 0; + __rxrpc_set_timer(call, rxrpc_timer_init_for_send_reply, + ktime_get_real()); if (!last) break; case RXRPC_CALL_SERVER_SEND_REPLY: @@ -144,18 +149,18 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb, if (seq == 1 && rxrpc_is_client_call(call)) rxrpc_expose_client_call(call); - ret = rxrpc_send_data_packet(call, skb); + ret = rxrpc_send_data_packet(call, skb, false); if (ret < 0) { _debug("need instant resend %d", ret); rxrpc_instant_resend(call, ix); } else { - unsigned long resend_at; + ktime_t now = ktime_get_real(), resend_at; - resend_at = jiffies + msecs_to_jiffies(rxrpc_resend_timeout); + resend_at = ktime_add_ms(now, rxrpc_resend_timeout); - if (time_before(resend_at, call->resend_at)) { + if (ktime_before(resend_at, call->resend_at)) { call->resend_at = resend_at; - rxrpc_set_timer(call, rxrpc_timer_set_for_send); + rxrpc_set_timer(call, rxrpc_timer_set_for_send, now); } } @@ -197,7 +202,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, do { /* Check to see if there's a ping ACK to reply to. */ if (call->ackr_reason == RXRPC_ACK_PING_RESPONSE) - rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK); + rxrpc_send_ack_packet(call, false); if (!skb) { size_t size, chunk, max, space; @@ -514,8 +519,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) } else if (cmd == RXRPC_CMD_SEND_ABORT) { ret = 0; if (rxrpc_abort_call("CMD", call, 0, abort_code, ECONNABORTED)) - ret = rxrpc_send_call_packet(call, - RXRPC_PACKET_TYPE_ABORT); + ret = rxrpc_send_abort_packet(call); } else if (cmd != RXRPC_CMD_SEND_DATA) { ret = -EINVAL; } else if (rxrpc_is_client_call(call) && @@ -597,7 +601,7 @@ void rxrpc_kernel_abort_call(struct socket *sock, struct rxrpc_call *call, lock_sock(sock->sk); if (rxrpc_abort_call(why, call, 0, abort_code, error)) - rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT); + rxrpc_send_abort_packet(call); release_sock(sock->sk); _leave(""); diff --git a/net/rxrpc/skbuff.c b/net/rxrpc/skbuff.c index 5154cbf7e540..67b02c45271b 100644 --- a/net/rxrpc/skbuff.c +++ b/net/rxrpc/skbuff.c @@ -77,14 +77,9 @@ void rxrpc_lose_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) if (skb) { int n; CHECK_SLAB_OKAY(&skb->users); - if (op == rxrpc_skb_tx_lost) { - n = atomic_read(select_skb_count(op)); - trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here); - } else { - n = atomic_dec_return(select_skb_count(op)); - trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here); - kfree_skb(skb); - } + n = atomic_dec_return(select_skb_count(op)); + trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here); + kfree_skb(skb); } } diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c index 13d1df03ebac..34c706d2f79c 100644 --- a/net/rxrpc/sysctl.c +++ b/net/rxrpc/sysctl.c @@ -35,7 +35,7 @@ static struct ctl_table rxrpc_sysctl_table[] = { .data = &rxrpc_requested_ack_delay, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = proc_dointvec_ms_jiffies, + .proc_handler = proc_dointvec, .extra1 = (void *)&zero, }, { @@ -43,7 +43,7 @@ static struct ctl_table rxrpc_sysctl_table[] = { .data = &rxrpc_soft_ack_delay, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = proc_dointvec_ms_jiffies, + .proc_handler = proc_dointvec, .extra1 = (void *)&one, }, { @@ -51,7 +51,7 @@ static struct ctl_table rxrpc_sysctl_table[] = { .data = &rxrpc_idle_ack_delay, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = proc_dointvec_ms_jiffies, + .proc_handler = proc_dointvec, .extra1 = (void *)&one, }, { @@ -85,7 +85,7 @@ static struct ctl_table rxrpc_sysctl_table[] = { .data = &rxrpc_max_call_lifetime, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = proc_dointvec_jiffies, + .proc_handler = proc_dointvec, .extra1 = (void *)&one, }, diff --git a/net/sched/act_api.c b/net/sched/act_api.c index c9102172ce3b..a512b18c0088 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -341,22 +341,25 @@ int tcf_register_action(struct tc_action_ops *act, if (!act->act || !act->dump || !act->init || !act->walk || !act->lookup) return -EINVAL; + /* We have to register pernet ops before making the action ops visible, + * otherwise tcf_action_init_1() could get a partially initialized + * netns. + */ + ret = register_pernet_subsys(ops); + if (ret) + return ret; + write_lock(&act_mod_lock); list_for_each_entry(a, &act_base, head) { if (act->type == a->type || (strcmp(act->kind, a->kind) == 0)) { write_unlock(&act_mod_lock); + unregister_pernet_subsys(ops); return -EEXIST; } } list_add_tail(&act->head, &act_base); write_unlock(&act_mod_lock); - ret = register_pernet_subsys(ops); - if (ret) { - tcf_unregister_action(act, ops); - return ret; - } - return 0; } EXPORT_SYMBOL(tcf_register_action); @@ -367,8 +370,6 @@ int tcf_unregister_action(struct tc_action_ops *act, struct tc_action_ops *a; int err = -ENOENT; - unregister_pernet_subsys(ops); - write_lock(&act_mod_lock); list_for_each_entry(a, &act_base, head) { if (a == act) { @@ -378,6 +379,8 @@ int tcf_unregister_action(struct tc_action_ops *act, } } write_unlock(&act_mod_lock); + if (!err) + unregister_pernet_subsys(ops); return err; } EXPORT_SYMBOL(tcf_unregister_action); diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index ccf7b4b655fe..95c463cbb9a6 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -53,7 +53,7 @@ int ife_tlv_meta_encode(void *skbdata, u16 attrtype, u16 dlen, const void *dval) u32 *tlv = (u32 *)(skbdata); u16 totlen = nla_total_size(dlen); /*alignment + hdr */ char *dptr = (char *)tlv + NLA_HDRLEN; - u32 htlv = attrtype << 16 | dlen; + u32 htlv = attrtype << 16 | (dlen + NLA_HDRLEN); *tlv = htonl(htlv); memset(dptr, 0, totlen - NLA_HDRLEN); @@ -653,7 +653,7 @@ static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a, struct tcf_ife_info *ife = to_ife(a); int action = ife->tcf_action; struct ifeheadr *ifehdr = (struct ifeheadr *)skb->data; - u16 ifehdrln = ifehdr->metalen; + int ifehdrln = (int)ifehdr->metalen; struct meta_tlvhdr *tlv = (struct meta_tlvhdr *)(ifehdr->tlv_data); spin_lock(&ife->tcf_lock); @@ -766,8 +766,6 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a, return TC_ACT_SHOT; } - iethh = eth_hdr(skb); - err = skb_cow_head(skb, hdrm); if (unlikely(err)) { ife->tcf_qstats.drops++; @@ -778,6 +776,7 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a, if (!(at & AT_EGRESS)) skb_push(skb, skb->dev->hard_header_len); + iethh = (struct ethhdr *)skb->data; __skb_push(skb, hdrm); memcpy(skb->data, iethh, skb->mac_len); skb_reset_mac_header(skb); diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 667dc382df82..2d93be6717e5 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -33,6 +33,25 @@ static LIST_HEAD(mirred_list); static DEFINE_SPINLOCK(mirred_list_lock); +static bool tcf_mirred_is_act_redirect(int action) +{ + return action == TCA_EGRESS_REDIR || action == TCA_INGRESS_REDIR; +} + +static u32 tcf_mirred_act_direction(int action) +{ + switch (action) { + case TCA_EGRESS_REDIR: + case TCA_EGRESS_MIRROR: + return AT_EGRESS; + case TCA_INGRESS_REDIR: + case TCA_INGRESS_MIRROR: + return AT_INGRESS; + default: + BUG(); + } +} + static void tcf_mirred_release(struct tc_action *a, int bind) { struct tcf_mirred *m = to_mirred(a); @@ -54,17 +73,32 @@ static const struct nla_policy mirred_policy[TCA_MIRRED_MAX + 1] = { static int mirred_net_id; static struct tc_action_ops act_mirred_ops; +static bool dev_is_mac_header_xmit(const struct net_device *dev) +{ + switch (dev->type) { + case ARPHRD_TUNNEL: + case ARPHRD_TUNNEL6: + case ARPHRD_SIT: + case ARPHRD_IPGRE: + case ARPHRD_VOID: + case ARPHRD_NONE: + return false; + } + return true; +} + static int tcf_mirred_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, int bind) { struct tc_action_net *tn = net_generic(net, mirred_net_id); struct nlattr *tb[TCA_MIRRED_MAX + 1]; + bool mac_header_xmit = false; struct tc_mirred *parm; struct tcf_mirred *m; struct net_device *dev; - int ret, ok_push = 0; bool exists = false; + int ret; if (nla == NULL) return -EINVAL; @@ -82,6 +116,8 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, switch (parm->eaction) { case TCA_EGRESS_MIRROR: case TCA_EGRESS_REDIR: + case TCA_INGRESS_REDIR: + case TCA_INGRESS_MIRROR: break; default: if (exists) @@ -95,19 +131,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, tcf_hash_release(*a, bind); return -ENODEV; } - switch (dev->type) { - case ARPHRD_TUNNEL: - case ARPHRD_TUNNEL6: - case ARPHRD_SIT: - case ARPHRD_IPGRE: - case ARPHRD_VOID: - case ARPHRD_NONE: - ok_push = 0; - break; - default: - ok_push = 1; - break; - } + mac_header_xmit = dev_is_mac_header_xmit(dev); } else { dev = NULL; } @@ -136,7 +160,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, dev_put(rcu_dereference_protected(m->tcfm_dev, 1)); dev_hold(dev); rcu_assign_pointer(m->tcfm_dev, dev); - m->tcfm_ok_push = ok_push; + m->tcfm_mac_header_xmit = mac_header_xmit; } if (ret == ACT_P_CREATED) { @@ -153,15 +177,20 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_mirred *m = to_mirred(a); + bool m_mac_header_xmit; struct net_device *dev; struct sk_buff *skb2; - int retval, err; + int retval, err = 0; + int m_eaction; + int mac_len; u32 at; tcf_lastuse_update(&m->tcf_tm); bstats_cpu_update(this_cpu_ptr(m->common.cpu_bstats), skb); rcu_read_lock(); + m_mac_header_xmit = READ_ONCE(m->tcfm_mac_header_xmit); + m_eaction = READ_ONCE(m->tcfm_eaction); retval = READ_ONCE(m->tcf_action); dev = rcu_dereference(m->tcfm_dev); if (unlikely(!dev)) { @@ -180,23 +209,36 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a, if (!skb2) goto out; - if (!(at & AT_EGRESS)) { - if (m->tcfm_ok_push) + /* If action's target direction differs than filter's direction, + * and devices expect a mac header on xmit, then mac push/pull is + * needed. + */ + if (at != tcf_mirred_act_direction(m_eaction) && m_mac_header_xmit) { + if (at & AT_EGRESS) { + /* caught at egress, act ingress: pull mac */ + mac_len = skb_network_header(skb) - skb_mac_header(skb); + skb_pull_rcsum(skb2, mac_len); + } else { + /* caught at ingress, act egress: push mac */ skb_push_rcsum(skb2, skb->mac_len); + } } /* mirror is always swallowed */ - if (m->tcfm_eaction != TCA_EGRESS_MIRROR) + if (tcf_mirred_is_act_redirect(m_eaction)) skb2->tc_verd = SET_TC_FROM(skb2->tc_verd, at); skb2->skb_iif = skb->dev->ifindex; skb2->dev = dev; - err = dev_queue_xmit(skb2); + if (tcf_mirred_act_direction(m_eaction) & AT_EGRESS) + err = dev_queue_xmit(skb2); + else + err = netif_receive_skb(skb2); if (err) { out: qstats_overlimit_inc(this_cpu_ptr(m->common.cpu_qstats)); - if (m->tcfm_eaction != TCA_EGRESS_MIRROR) + if (tcf_mirred_is_act_redirect(m_eaction)) retval = TC_ACT_SHOT; } rcu_read_unlock(); diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index a133dcb82132..024f3a3afeff 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -46,8 +46,10 @@ static int tcf_skbedit(struct sk_buff *skb, const struct tc_action *a, if (d->flags & SKBEDIT_F_QUEUE_MAPPING && skb->dev->real_num_tx_queues > d->queue_mapping) skb_set_queue_mapping(skb, d->queue_mapping); - if (d->flags & SKBEDIT_F_MARK) - skb->mark = d->mark; + if (d->flags & SKBEDIT_F_MARK) { + skb->mark &= ~d->mask; + skb->mark |= d->mark & d->mask; + } if (d->flags & SKBEDIT_F_PTYPE) skb->pkt_type = d->ptype; @@ -61,6 +63,7 @@ static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = { [TCA_SKBEDIT_QUEUE_MAPPING] = { .len = sizeof(u16) }, [TCA_SKBEDIT_MARK] = { .len = sizeof(u32) }, [TCA_SKBEDIT_PTYPE] = { .len = sizeof(u16) }, + [TCA_SKBEDIT_MASK] = { .len = sizeof(u32) }, }; static int tcf_skbedit_init(struct net *net, struct nlattr *nla, @@ -71,7 +74,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, struct nlattr *tb[TCA_SKBEDIT_MAX + 1]; struct tc_skbedit *parm; struct tcf_skbedit *d; - u32 flags = 0, *priority = NULL, *mark = NULL; + u32 flags = 0, *priority = NULL, *mark = NULL, *mask = NULL; u16 *queue_mapping = NULL, *ptype = NULL; bool exists = false; int ret = 0, err; @@ -108,6 +111,11 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, mark = nla_data(tb[TCA_SKBEDIT_MARK]); } + if (tb[TCA_SKBEDIT_MASK] != NULL) { + flags |= SKBEDIT_F_MASK; + mask = nla_data(tb[TCA_SKBEDIT_MASK]); + } + parm = nla_data(tb[TCA_SKBEDIT_PARMS]); exists = tcf_hash_check(tn, parm->index, a, bind); @@ -145,6 +153,10 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, d->mark = *mark; if (flags & SKBEDIT_F_PTYPE) d->ptype = *ptype; + /* default behaviour is to use all the bits */ + d->mask = 0xffffffff; + if (flags & SKBEDIT_F_MASK) + d->mask = *mask; d->tcf_action = parm->action; @@ -182,6 +194,9 @@ static int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a, if ((d->flags & SKBEDIT_F_PTYPE) && nla_put_u16(skb, TCA_SKBEDIT_PTYPE, d->ptype)) goto nla_put_failure; + if ((d->flags & SKBEDIT_F_MASK) && + nla_put_u32(skb, TCA_SKBEDIT_MASK, d->mask)) + goto nla_put_failure; tcf_tm_dump(&t, &d->tcf_tm); if (nla_put_64bit(skb, TCA_SKBEDIT_TM, sizeof(t), &t, TCA_SKBEDIT_PAD)) diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index a95c00b119da..b57fcbcefea1 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -37,6 +37,12 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a, bstats_update(&v->tcf_bstats, skb); action = v->tcf_action; + /* Ensure 'data' points at mac_header prior calling vlan manipulating + * functions. + */ + if (skb_at_tc_ingress(skb)) + skb_push_rcsum(skb, skb->mac_len); + switch (v->tcfv_action) { case TCA_VLAN_ACT_POP: err = skb_vlan_pop(skb); @@ -83,6 +89,9 @@ drop: action = TC_ACT_SHOT; v->tcf_qstats.drops++; unlock: + if (skb_at_tc_ingress(skb)) + skb_pull_rcsum(skb, skb->mac_len); + spin_unlock(&v->tcf_lock); return action; } diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 11da7da0b7c4..2ee29a3375f6 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -101,7 +101,7 @@ EXPORT_SYMBOL(unregister_tcf_proto_ops); static int tfilter_notify(struct net *net, struct sk_buff *oskb, struct nlmsghdr *n, struct tcf_proto *tp, - unsigned long fh, int event); + unsigned long fh, int event, bool unicast); static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb, struct nlmsghdr *n, @@ -112,7 +112,7 @@ static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb, for (it_chain = chain; (tp = rtnl_dereference(*it_chain)) != NULL; it_chain = &tp->next) - tfilter_notify(net, oskb, n, tp, 0, event); + tfilter_notify(net, oskb, n, tp, 0, event, false); } /* Select new prio value from the range, managed by kernel. */ @@ -319,7 +319,8 @@ replay: RCU_INIT_POINTER(*back, next); - tfilter_notify(net, skb, n, tp, fh, RTM_DELTFILTER); + tfilter_notify(net, skb, n, tp, fh, + RTM_DELTFILTER, false); tcf_destroy(tp, true); err = 0; goto errout; @@ -345,14 +346,14 @@ replay: struct tcf_proto *next = rtnl_dereference(tp->next); tfilter_notify(net, skb, n, tp, fh, - RTM_DELTFILTER); + RTM_DELTFILTER, false); if (tcf_destroy(tp, false)) RCU_INIT_POINTER(*back, next); } goto errout; case RTM_GETTFILTER: err = tfilter_notify(net, skb, n, tp, fh, - RTM_NEWTFILTER); + RTM_NEWTFILTER, true); goto errout; default: err = -EINVAL; @@ -367,7 +368,7 @@ replay: RCU_INIT_POINTER(tp->next, rtnl_dereference(*back)); rcu_assign_pointer(*back, tp); } - tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER); + tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER, false); } else { if (tp_created) tcf_destroy(tp, true); @@ -419,7 +420,7 @@ nla_put_failure: static int tfilter_notify(struct net *net, struct sk_buff *oskb, struct nlmsghdr *n, struct tcf_proto *tp, - unsigned long fh, int event) + unsigned long fh, int event, bool unicast) { struct sk_buff *skb; u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; @@ -433,6 +434,9 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb, return -EINVAL; } + if (unicast) + return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT); + return rtnetlink_send(skb, net, portid, RTNLGRP_TC, n->nlmsg_flags & NLM_F_ECHO); } diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index a309a07ccb35..41c80b6c3906 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -176,11 +176,12 @@ META_COLLECTOR(int_vlan_tag) { unsigned short tag; - tag = skb_vlan_tag_get(skb); - if (!tag && __vlan_get_tag(skb, &tag)) - *err = -1; - else + if (skb_vlan_tag_present(skb)) + dst->value = skb_vlan_tag_get(skb); + else if (!__vlan_get_tag(skb, &tag)) dst->value = tag; + else + *err = -1; } diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index c798d0de8a9d..9926fe4f3b6f 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1145,7 +1145,7 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d) if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, NULL, &cl->bstats) < 0 || - gnet_stats_copy_rate_est(d, NULL, &cl->rate_est) < 0 || + gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 || gnet_stats_copy_queue(d, NULL, &qs, qlen) < 0) return -1; diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index f27ffee106f6..ca0516e6f743 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -1153,6 +1153,7 @@ static struct sk_buff *qfq_dequeue(struct Qdisc *sch) if (!skb) return NULL; + qdisc_qstats_backlog_dec(sch, skb); sch->q.qlen--; qdisc_bstats_update(sch, skb); @@ -1256,6 +1257,7 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, } bstats_update(&cl->bstats, skb); + qdisc_qstats_backlog_inc(sch, skb); ++sch->q.qlen; agg = cl->agg; @@ -1476,6 +1478,7 @@ static void qfq_reset_qdisc(struct Qdisc *sch) qdisc_reset(cl->qdisc); } } + sch->qstats.backlog = 0; sch->q.qlen = 0; } diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index add3cc7d37ec..20a350bd1b1d 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -400,6 +400,7 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch, enqueue: ret = qdisc_enqueue(skb, child, to_free); if (likely(ret == NET_XMIT_SUCCESS)) { + qdisc_qstats_backlog_inc(sch, skb); sch->q.qlen++; increment_qlen(skb, q); } else if (net_xmit_drop_count(ret)) { @@ -428,6 +429,7 @@ static struct sk_buff *sfb_dequeue(struct Qdisc *sch) if (skb) { qdisc_bstats_update(sch, skb); + qdisc_qstats_backlog_dec(sch, skb); sch->q.qlen--; decrement_qlen(skb, q); } @@ -450,6 +452,7 @@ static void sfb_reset(struct Qdisc *sch) struct sfb_sched_data *q = qdisc_priv(sch); qdisc_reset(q->qdisc); + sch->qstats.backlog = 0; sch->q.qlen = 0; q->slot = 0; q->double_buffering = false; diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 2cd9b4478b92..b0196366d58d 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -418,9 +418,6 @@ static int teql_master_mtu(struct net_device *dev, int new_mtu) struct teql_master *m = netdev_priv(dev); struct Qdisc *q; - if (new_mtu < 68) - return -EINVAL; - q = m->slaves; if (q) { do { @@ -460,6 +457,8 @@ static __init void teql_master_setup(struct net_device *dev) dev->netdev_ops = &teql_netdev_ops; dev->type = ARPHRD_VOID; dev->mtu = 1500; + dev->min_mtu = 68; + dev->max_mtu = 65535; dev->tx_queue_len = 100; dev->flags = IFF_NOARP; dev->hard_header_len = LL_MAX_HEADER; diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index 8afe2e90d003..615f0ddd41df 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -52,7 +52,6 @@ static void sctp_datamsg_init(struct sctp_datamsg *msg) atomic_set(&msg->refcnt, 1); msg->send_failed = 0; msg->send_error = 0; - msg->can_abandon = 0; msg->can_delay = 1; msg->expires_at = 0; INIT_LIST_HEAD(&msg->chunks); @@ -182,15 +181,11 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, /* Note: Calculate this outside of the loop, so that all fragments * have the same expiration. */ - if (sinfo->sinfo_timetolive) { - /* sinfo_timetolive is in milliseconds */ + if (asoc->peer.prsctp_capable && sinfo->sinfo_timetolive && + (SCTP_PR_TTL_ENABLED(sinfo->sinfo_flags) || + !SCTP_PR_POLICY(sinfo->sinfo_flags))) msg->expires_at = jiffies + msecs_to_jiffies(sinfo->sinfo_timetolive); - msg->can_abandon = 1; - - pr_debug("%s: msg:%p expires_at:%ld jiffies:%ld\n", __func__, - msg, msg->expires_at, jiffies); - } /* This is the biggest possible DATA chunk that can fit into * the packet @@ -349,30 +344,24 @@ errout: /* Check whether this message has expired. */ int sctp_chunk_abandoned(struct sctp_chunk *chunk) { - if (!chunk->asoc->prsctp_enable || - !SCTP_PR_POLICY(chunk->sinfo.sinfo_flags)) { - struct sctp_datamsg *msg = chunk->msg; - - if (!msg->can_abandon) - return 0; - - if (time_after(jiffies, msg->expires_at)) - return 1; - + if (!chunk->asoc->peer.prsctp_capable) return 0; - } if (SCTP_PR_TTL_ENABLED(chunk->sinfo.sinfo_flags) && - time_after(jiffies, chunk->prsctp_param)) { + time_after(jiffies, chunk->msg->expires_at)) { if (chunk->sent_count) chunk->asoc->abandoned_sent[SCTP_PR_INDEX(TTL)]++; else chunk->asoc->abandoned_unsent[SCTP_PR_INDEX(TTL)]++; return 1; } else if (SCTP_PR_RTX_ENABLED(chunk->sinfo.sinfo_flags) && - chunk->sent_count > chunk->prsctp_param) { + chunk->sent_count > chunk->sinfo.sinfo_timetolive) { chunk->asoc->abandoned_sent[SCTP_PR_INDEX(RTX)]++; return 1; + } else if (!SCTP_PR_POLICY(chunk->sinfo.sinfo_flags) && + chunk->msg->expires_at && + time_after(jiffies, chunk->msg->expires_at)) { + return 1; } /* PRIO policy is processed by sendmsg, not here */ diff --git a/net/sctp/output.c b/net/sctp/output.c index 2a5c1896d18f..4282b488985b 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -552,7 +552,8 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) * for a given destination transport address. */ - if (!chunk->resent && !tp->rto_pending) { + if (!sctp_chunk_retransmitted(chunk) && + !tp->rto_pending) { chunk->rtt_in_progress = 1; tp->rto_pending = 1; } @@ -865,9 +866,6 @@ static void sctp_packet_append_data(struct sctp_packet *packet, rwnd = 0; asoc->peer.rwnd = rwnd; - /* Has been accepted for transmission. */ - if (!asoc->peer.prsctp_capable) - chunk->msg->can_abandon = 0; sctp_chunk_assign_tsn(chunk); sctp_chunk_assign_ssn(chunk); } diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 3ec6da8bbb53..e54082699520 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -304,7 +304,7 @@ void sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk, gfp_t gfp) "illegal chunk"); sctp_outq_tail_data(q, chunk); - if (chunk->asoc->prsctp_enable && + if (chunk->asoc->peer.prsctp_capable && SCTP_PR_PRIO_ENABLED(chunk->sinfo.sinfo_flags)) chunk->asoc->sent_cnt_removable++; if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED) @@ -354,7 +354,7 @@ static int sctp_prsctp_prune_sent(struct sctp_association *asoc, list_for_each_entry_safe(chk, temp, queue, transmitted_list) { if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) || - chk->prsctp_param <= sinfo->sinfo_timetolive) + chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive) continue; list_del_init(&chk->transmitted_list); @@ -389,7 +389,7 @@ static int sctp_prsctp_prune_unsent(struct sctp_association *asoc, list_for_each_entry_safe(chk, temp, queue, list) { if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) || - chk->prsctp_param <= sinfo->sinfo_timetolive) + chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive) continue; list_del_init(&chk->list); @@ -413,7 +413,7 @@ void sctp_prsctp_prune(struct sctp_association *asoc, { struct sctp_transport *transport; - if (!asoc->prsctp_enable || !asoc->sent_cnt_removable) + if (!asoc->peer.prsctp_capable || !asoc->sent_cnt_removable) return; msg_len = sctp_prsctp_prune_sent(asoc, sinfo, @@ -507,8 +507,6 @@ void sctp_retransmit_mark(struct sctp_outq *q, transport->rto_pending = 0; } - chunk->resent = 1; - /* Move the chunk to the retransmit queue. The chunks * on the retransmit queue are always kept in order. */ @@ -1026,7 +1024,7 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp) /* Mark as failed send. */ sctp_chunk_fail(chunk, SCTP_ERROR_INV_STRM); - if (asoc->prsctp_enable && + if (asoc->peer.prsctp_capable && SCTP_PR_PRIO_ENABLED(chunk->sinfo.sinfo_flags)) asoc->sent_cnt_removable--; sctp_chunk_free(chunk); @@ -1319,7 +1317,7 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_chunk *chunk) tsn = ntohl(tchunk->subh.data_hdr->tsn); if (TSN_lte(tsn, ctsn)) { list_del_init(&tchunk->transmitted_list); - if (asoc->prsctp_enable && + if (asoc->peer.prsctp_capable && SCTP_PR_PRIO_ENABLED(chunk->sinfo.sinfo_flags)) asoc->sent_cnt_removable--; sctp_chunk_free(tchunk); @@ -1439,7 +1437,7 @@ static void sctp_check_transmitted(struct sctp_outq *q, * instance). */ if (!tchunk->tsn_gap_acked && - !tchunk->resent && + !sctp_chunk_retransmitted(tchunk) && tchunk->rtt_in_progress) { tchunk->rtt_in_progress = 0; rtt = jiffies - tchunk->sent_at; diff --git a/net/sctp/proc.c b/net/sctp/proc.c index ef8ba77a5bea..206377fe91ec 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -73,13 +73,17 @@ static const struct snmp_mib sctp_snmp_list[] = { /* Display sctp snmp mib statistics(/proc/net/sctp/snmp). */ static int sctp_snmp_seq_show(struct seq_file *seq, void *v) { + unsigned long buff[SCTP_MIB_MAX]; struct net *net = seq->private; int i; - for (i = 0; sctp_snmp_list[i].name != NULL; i++) + memset(buff, 0, sizeof(unsigned long) * SCTP_MIB_MAX); + + snmp_get_cpu_field_batch(buff, sctp_snmp_list, + net->sctp.sctp_statistics); + for (i = 0; sctp_snmp_list[i].name; i++) seq_printf(seq, "%-32s\t%ld\n", sctp_snmp_list[i].name, - snmp_fold_field(net->sctp.sctp_statistics, - sctp_snmp_list[i].entry)); + buff[i]); return 0; } diff --git a/net/sctp/sctp_diag.c b/net/sctp/sctp_diag.c index 807158e32f5f..048954eee984 100644 --- a/net/sctp/sctp_diag.c +++ b/net/sctp/sctp_diag.c @@ -276,28 +276,17 @@ out: return err; } -static int sctp_tsp_dump(struct sctp_transport *tsp, void *p) +static int sctp_sock_dump(struct sock *sk, void *p) { - struct sctp_endpoint *ep = tsp->asoc->ep; + struct sctp_endpoint *ep = sctp_sk(sk)->ep; struct sctp_comm_param *commp = p; - struct sock *sk = ep->base.sk; struct sk_buff *skb = commp->skb; struct netlink_callback *cb = commp->cb; const struct inet_diag_req_v2 *r = commp->r; - struct sctp_association *assoc = - list_entry(ep->asocs.next, struct sctp_association, asocs); + struct sctp_association *assoc; int err = 0; - /* find the ep only once through the transports by this condition */ - if (tsp->asoc != assoc) - goto out; - - if (r->sdiag_family != AF_UNSPEC && sk->sk_family != r->sdiag_family) - goto out; - lock_sock(sk); - if (sk != assoc->base.sk) - goto release; list_for_each_entry(assoc, &ep->asocs, asocs) { if (cb->args[4] < cb->args[1]) goto next; @@ -317,7 +306,7 @@ static int sctp_tsp_dump(struct sctp_transport *tsp, void *p) NLM_F_MULTI, cb->nlh, commp->net_admin) < 0) { cb->args[3] = 1; - err = 2; + err = 1; goto release; } cb->args[3] = 1; @@ -327,7 +316,7 @@ static int sctp_tsp_dump(struct sctp_transport *tsp, void *p) NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 0, cb->nlh, commp->net_admin) < 0) { - err = 2; + err = 1; goto release; } next: @@ -339,10 +328,35 @@ next: cb->args[4] = 0; release: release_sock(sk); + sock_put(sk); return err; +} + +static int sctp_get_sock(struct sctp_transport *tsp, void *p) +{ + struct sctp_endpoint *ep = tsp->asoc->ep; + struct sctp_comm_param *commp = p; + struct sock *sk = ep->base.sk; + struct netlink_callback *cb = commp->cb; + const struct inet_diag_req_v2 *r = commp->r; + struct sctp_association *assoc = + list_entry(ep->asocs.next, struct sctp_association, asocs); + + /* find the ep only once through the transports by this condition */ + if (tsp->asoc != assoc) + goto out; + + if (r->sdiag_family != AF_UNSPEC && sk->sk_family != r->sdiag_family) + goto out; + + sock_hold(sk); + cb->args[5] = (long)sk; + + return 1; + out: cb->args[2]++; - return err; + return 0; } static int sctp_ep_dump(struct sctp_endpoint *ep, void *p) @@ -480,10 +494,18 @@ skip: * 2 : to record the transport pos of this time's traversal * 3 : to mark if we have dumped the ep info of the current asoc * 4 : to work as a temporary variable to traversal list + * 5 : to save the sk we get from travelsing the tsp list. */ if (!(idiag_states & ~(TCPF_LISTEN | TCPF_CLOSE))) goto done; - sctp_for_each_transport(sctp_tsp_dump, net, cb->args[2], &commp); + +next: + cb->args[5] = 0; + sctp_for_each_transport(sctp_get_sock, net, cb->args[2], &commp); + + if (cb->args[5] && !sctp_sock_dump((struct sock *)cb->args[5], &commp)) + goto next; + done: cb->args[1] = cb->args[4]; cb->args[4] = 0; diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 79dd66079dd7..9e9690b7afe1 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -706,20 +706,6 @@ nodata: return retval; } -static void sctp_set_prsctp_policy(struct sctp_chunk *chunk, - const struct sctp_sndrcvinfo *sinfo) -{ - if (!chunk->asoc->prsctp_enable) - return; - - if (SCTP_PR_TTL_ENABLED(sinfo->sinfo_flags)) - chunk->prsctp_param = - jiffies + msecs_to_jiffies(sinfo->sinfo_timetolive); - else if (SCTP_PR_RTX_ENABLED(sinfo->sinfo_flags) || - SCTP_PR_PRIO_ENABLED(sinfo->sinfo_flags)) - chunk->prsctp_param = sinfo->sinfo_timetolive; -} - /* Make a DATA chunk for the given association from the provided * parameters. However, do not populate the data payload. */ @@ -753,7 +739,6 @@ struct sctp_chunk *sctp_make_datafrag_empty(struct sctp_association *asoc, retval->subh.data_hdr = sctp_addto_chunk(retval, sizeof(dp), &dp); memcpy(&retval->sinfo, sinfo, sizeof(struct sctp_sndrcvinfo)); - sctp_set_prsctp_policy(retval, sinfo); nodata: return retval; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 6cdc61c21438..fb02c7033307 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4473,17 +4473,21 @@ int sctp_transport_lookup_process(int (*cb)(struct sctp_transport *, void *), const union sctp_addr *paddr, void *p) { struct sctp_transport *transport; - int err = 0; + int err = -ENOENT; rcu_read_lock(); transport = sctp_addrs_lookup_transport(net, laddr, paddr); if (!transport || !sctp_transport_hold(transport)) goto out; - err = cb(transport, p); + + sctp_association_hold(transport->asoc); sctp_transport_put(transport); -out: rcu_read_unlock(); + err = cb(transport, p); + sctp_association_put(transport->asoc); + +out: return err; } EXPORT_SYMBOL_GPL(sctp_transport_lookup_process); diff --git a/net/socket.c b/net/socket.c index a1bd16106625..5a9bf5ee2464 100644 --- a/net/socket.c +++ b/net/socket.c @@ -320,11 +320,38 @@ static const struct dentry_operations sockfs_dentry_operations = { .d_dname = sockfs_dname, }; +static int sockfs_xattr_get(const struct xattr_handler *handler, + struct dentry *dentry, struct inode *inode, + const char *suffix, void *value, size_t size) +{ + if (value) { + if (dentry->d_name.len + 1 > size) + return -ERANGE; + memcpy(value, dentry->d_name.name, dentry->d_name.len + 1); + } + return dentry->d_name.len + 1; +} + +#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname" +#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX) +#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1) + +static const struct xattr_handler sockfs_xattr_handler = { + .name = XATTR_NAME_SOCKPROTONAME, + .get = sockfs_xattr_get, +}; + +static const struct xattr_handler *sockfs_xattr_handlers[] = { + &sockfs_xattr_handler, + NULL +}; + static struct dentry *sockfs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { - return mount_pseudo(fs_type, "socket:", &sockfs_ops, - &sockfs_dentry_operations, SOCKFS_MAGIC); + return mount_pseudo_xattr(fs_type, "socket:", &sockfs_ops, + sockfs_xattr_handlers, + &sockfs_dentry_operations, SOCKFS_MAGIC); } static struct vfsmount *sock_mnt __read_mostly; @@ -463,35 +490,6 @@ static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed) return NULL; } -#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname" -#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX) -#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1) -static ssize_t sockfs_getxattr(struct dentry *dentry, struct inode *inode, - const char *name, void *value, size_t size) -{ - const char *proto_name; - size_t proto_size; - int error; - - error = -ENODATA; - if (!strncmp(name, XATTR_NAME_SOCKPROTONAME, XATTR_NAME_SOCKPROTONAME_LEN)) { - proto_name = dentry->d_name.name; - proto_size = strlen(proto_name); - - if (value) { - error = -ERANGE; - if (proto_size + 1 > size) - goto out; - - strncpy(value, proto_name, proto_size + 1); - } - error = proto_size + 1; - } - -out: - return error; -} - static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer, size_t size) { @@ -521,7 +519,6 @@ static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer, } static const struct inode_operations sockfs_inode_ops = { - .getxattr = sockfs_getxattr, .listxattr = sockfs_listxattr, }; diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c index 5c7549b5b92c..41adf362936d 100644 --- a/net/strparser/strparser.c +++ b/net/strparser/strparser.c @@ -246,7 +246,7 @@ static int strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb, } else { strp->rx_interrupted = 1; } - strp_parser_err(strp, err, desc); + strp_parser_err(strp, len, desc); break; } else if (len > strp->sk->sk_rcvbuf) { /* Message length exceeds maximum allowed */ diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index a7e42f9a405c..2bff63a73cf8 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -551,7 +551,7 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred, *entry, *new; unsigned int nr; - nr = hash_long(from_kuid(&init_user_ns, acred->uid), cache->hashbits); + nr = auth->au_ops->hash_cred(acred, cache->hashbits); rcu_read_lock(); hlist_for_each_entry_rcu(entry, &cache->hashtable[nr], cr_hash) { diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c index 168219535a34..f1df9837f1ac 100644 --- a/net/sunrpc/auth_generic.c +++ b/net/sunrpc/auth_generic.c @@ -78,6 +78,14 @@ static struct rpc_cred *generic_bind_cred(struct rpc_task *task, return auth->au_ops->lookup_cred(auth, acred, lookupflags); } +static int +generic_hash_cred(struct auth_cred *acred, unsigned int hashbits) +{ + return hash_64(from_kgid(&init_user_ns, acred->gid) | + ((u64)from_kuid(&init_user_ns, acred->uid) << + (sizeof(gid_t) * 8)), hashbits); +} + /* * Lookup generic creds for current process */ @@ -176,8 +184,8 @@ generic_match(struct auth_cred *acred, struct rpc_cred *cred, int flags) if (gcred->acred.group_info->ngroups != acred->group_info->ngroups) goto out_nomatch; for (i = 0; i < gcred->acred.group_info->ngroups; i++) { - if (!gid_eq(GROUP_AT(gcred->acred.group_info, i), - GROUP_AT(acred->group_info, i))) + if (!gid_eq(gcred->acred.group_info->gid[i], + acred->group_info->gid[i])) goto out_nomatch; } out_match: @@ -258,6 +266,7 @@ generic_key_timeout(struct rpc_auth *auth, struct rpc_cred *cred) static const struct rpc_authops generic_auth_ops = { .owner = THIS_MODULE, .au_name = "Generic", + .hash_cred = generic_hash_cred, .lookup_cred = generic_lookup_cred, .crcreate = generic_create_cred, .key_timeout = generic_key_timeout, diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 976c7812bbd5..d8bd97a5a7c9 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -1298,6 +1298,12 @@ gss_destroy_cred(struct rpc_cred *cred) gss_destroy_nullcred(cred); } +static int +gss_hash_cred(struct auth_cred *acred, unsigned int hashbits) +{ + return hash_64(from_kuid(&init_user_ns, acred->uid), hashbits); +} + /* * Lookup RPCSEC_GSS cred for the current process */ @@ -1982,6 +1988,7 @@ static const struct rpc_authops authgss_ops = { .au_name = "RPCSEC_GSS", .create = gss_create, .destroy = gss_destroy, + .hash_cred = gss_hash_cred, .lookup_cred = gss_lookup_cred, .crcreate = gss_create_cred, .list_pseudoflavors = gss_mech_list_pseudoflavors, diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.c b/net/sunrpc/auth_gss/gss_rpc_xdr.c index eeeba5adee6d..dc6fb79a361f 100644 --- a/net/sunrpc/auth_gss/gss_rpc_xdr.c +++ b/net/sunrpc/auth_gss/gss_rpc_xdr.c @@ -229,7 +229,7 @@ static int gssx_dec_linux_creds(struct xdr_stream *xdr, kgid = make_kgid(&init_user_ns, tmp); if (!gid_valid(kgid)) goto out_free_groups; - GROUP_AT(creds->cr_group_info, i) = kgid; + creds->cr_group_info->gid[i] = kgid; } return 0; diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index d8582028b346..d67f7e1bc82d 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -479,7 +479,7 @@ static int rsc_parse(struct cache_detail *cd, kgid = make_kgid(&init_user_ns, id); if (!gid_valid(kgid)) goto out; - GROUP_AT(rsci.cred.cr_group_info, i) = kgid; + rsci.cred.cr_group_info->gid[i] = kgid; } /* mech name */ diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c index a99278c984e8..306fc0f54596 100644 --- a/net/sunrpc/auth_unix.c +++ b/net/sunrpc/auth_unix.c @@ -46,6 +46,14 @@ unx_destroy(struct rpc_auth *auth) rpcauth_clear_credcache(auth->au_credcache); } +static int +unx_hash_cred(struct auth_cred *acred, unsigned int hashbits) +{ + return hash_64(from_kgid(&init_user_ns, acred->gid) | + ((u64)from_kuid(&init_user_ns, acred->uid) << + (sizeof(gid_t) * 8)), hashbits); +} + /* * Lookup AUTH_UNIX creds for current process */ @@ -79,7 +87,7 @@ unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags, gfp_t cred->uc_gid = acred->gid; for (i = 0; i < groups; i++) - cred->uc_gids[i] = GROUP_AT(acred->group_info, i); + cred->uc_gids[i] = acred->group_info->gid[i]; if (i < NFS_NGROUPS) cred->uc_gids[i] = INVALID_GID; @@ -127,7 +135,7 @@ unx_match(struct auth_cred *acred, struct rpc_cred *rcred, int flags) if (groups > NFS_NGROUPS) groups = NFS_NGROUPS; for (i = 0; i < groups ; i++) - if (!gid_eq(cred->uc_gids[i], GROUP_AT(acred->group_info, i))) + if (!gid_eq(cred->uc_gids[i], acred->group_info->gid[i])) return 0; if (groups < NFS_NGROUPS && gid_valid(cred->uc_gids[groups])) return 0; @@ -220,6 +228,7 @@ const struct rpc_authops authunix_ops = { .au_name = "UNIX", .create = unx_create, .destroy = unx_destroy, + .hash_cred = unx_hash_cred, .lookup_cred = unx_lookup_cred, .crcreate = unx_create_cred, }; diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c index 229956bf8457..ac701c28f44f 100644 --- a/net/sunrpc/backchannel_rqst.c +++ b/net/sunrpc/backchannel_rqst.c @@ -76,13 +76,7 @@ static int xprt_alloc_xdr_buf(struct xdr_buf *buf, gfp_t gfp_flags) page = alloc_page(gfp_flags); if (page == NULL) return -ENOMEM; - buf->head[0].iov_base = page_address(page); - buf->head[0].iov_len = PAGE_SIZE; - buf->tail[0].iov_base = NULL; - buf->tail[0].iov_len = 0; - buf->page_len = 0; - buf->len = 0; - buf->buflen = PAGE_SIZE; + xdr_buf_init(buf, page_address(page), PAGE_SIZE); return 0; } diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 4d8e11f94a35..8aabe12201f8 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -353,7 +353,7 @@ void sunrpc_init_cache_detail(struct cache_detail *cd) spin_unlock(&cache_list_lock); /* start the cleaning process */ - schedule_delayed_work(&cache_cleaner, 0); + queue_delayed_work(system_power_efficient_wq, &cache_cleaner, 0); } EXPORT_SYMBOL_GPL(sunrpc_init_cache_detail); @@ -476,7 +476,8 @@ static void do_cache_clean(struct work_struct *work) delay = 0; if (delay) - schedule_delayed_work(&cache_cleaner, delay); + queue_delayed_work(system_power_efficient_wq, + &cache_cleaner, delay); } diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 66f23b376fa0..34dd7b26ee5f 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -184,7 +184,6 @@ static int __rpc_clnt_handle_event(struct rpc_clnt *clnt, unsigned long event, struct super_block *sb) { struct dentry *dentry; - int err = 0; switch (event) { case RPC_PIPEFS_MOUNT: @@ -201,7 +200,7 @@ static int __rpc_clnt_handle_event(struct rpc_clnt *clnt, unsigned long event, printk(KERN_ERR "%s: unknown event: %ld\n", __func__, event); return -ENOTSUPP; } - return err; + return 0; } static int __rpc_pipefs_event(struct rpc_clnt *clnt, unsigned long event, @@ -988,7 +987,6 @@ void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt) { if (clnt != NULL) { - rpc_task_release_client(task); if (task->tk_xprt == NULL) task->tk_xprt = xprt_iter_get_next(&clnt->cl_xpi); task->tk_client = clnt; @@ -1693,6 +1691,7 @@ call_allocate(struct rpc_task *task) struct rpc_rqst *req = task->tk_rqstp; struct rpc_xprt *xprt = req->rq_xprt; struct rpc_procinfo *proc = task->tk_msg.rpc_proc; + int status; dprint_status(task); @@ -1718,11 +1717,14 @@ call_allocate(struct rpc_task *task) req->rq_rcvsize = RPC_REPHDRSIZE + slack + proc->p_replen; req->rq_rcvsize <<= 2; - req->rq_buffer = xprt->ops->buf_alloc(task, - req->rq_callsize + req->rq_rcvsize); - if (req->rq_buffer != NULL) - return; + status = xprt->ops->buf_alloc(task); xprt_inject_disconnect(xprt); + if (status == 0) + return; + if (status != -ENOMEM) { + rpc_exit(task, status); + return; + } dprintk("RPC: %5u rpc_buffer allocation failed\n", task->tk_pid); @@ -1748,18 +1750,6 @@ rpc_task_force_reencode(struct rpc_task *task) task->tk_rqstp->rq_bytes_sent = 0; } -static inline void -rpc_xdr_buf_init(struct xdr_buf *buf, void *start, size_t len) -{ - buf->head[0].iov_base = start; - buf->head[0].iov_len = len; - buf->tail[0].iov_len = 0; - buf->page_len = 0; - buf->flags = 0; - buf->len = 0; - buf->buflen = len; -} - /* * 3. Encode arguments of an RPC call */ @@ -1772,12 +1762,12 @@ rpc_xdr_encode(struct rpc_task *task) dprint_status(task); - rpc_xdr_buf_init(&req->rq_snd_buf, - req->rq_buffer, - req->rq_callsize); - rpc_xdr_buf_init(&req->rq_rcv_buf, - (char *)req->rq_buffer + req->rq_callsize, - req->rq_rcvsize); + xdr_buf_init(&req->rq_snd_buf, + req->rq_buffer, + req->rq_callsize); + xdr_buf_init(&req->rq_rcv_buf, + req->rq_rbuffer, + req->rq_rcvsize); p = rpc_encode_header(task); if (p == NULL) { @@ -2616,6 +2606,70 @@ int rpc_clnt_test_and_add_xprt(struct rpc_clnt *clnt, EXPORT_SYMBOL_GPL(rpc_clnt_test_and_add_xprt); /** + * rpc_clnt_setup_test_and_add_xprt() + * + * This is an rpc_clnt_add_xprt setup() function which returns 1 so: + * 1) caller of the test function must dereference the rpc_xprt_switch + * and the rpc_xprt. + * 2) test function must call rpc_xprt_switch_add_xprt, usually in + * the rpc_call_done routine. + * + * Upon success (return of 1), the test function adds the new + * transport to the rpc_clnt xprt switch + * + * @clnt: struct rpc_clnt to get the new transport + * @xps: the rpc_xprt_switch to hold the new transport + * @xprt: the rpc_xprt to test + * @data: a struct rpc_add_xprt_test pointer that holds the test function + * and test function call data + */ +int rpc_clnt_setup_test_and_add_xprt(struct rpc_clnt *clnt, + struct rpc_xprt_switch *xps, + struct rpc_xprt *xprt, + void *data) +{ + struct rpc_cred *cred; + struct rpc_task *task; + struct rpc_add_xprt_test *xtest = (struct rpc_add_xprt_test *)data; + int status = -EADDRINUSE; + + xprt = xprt_get(xprt); + xprt_switch_get(xps); + + if (rpc_xprt_switch_has_addr(xps, (struct sockaddr *)&xprt->addr)) + goto out_err; + + /* Test the connection */ + cred = authnull_ops.lookup_cred(NULL, NULL, 0); + task = rpc_call_null_helper(clnt, xprt, cred, + RPC_TASK_SOFT | RPC_TASK_SOFTCONN, + NULL, NULL); + put_rpccred(cred); + if (IS_ERR(task)) { + status = PTR_ERR(task); + goto out_err; + } + status = task->tk_status; + rpc_put_task(task); + + if (status < 0) + goto out_err; + + /* rpc_xprt_switch and rpc_xprt are deferrenced by add_xprt_test() */ + xtest->add_xprt_test(clnt, xprt, xtest->data); + + /* so that rpc_clnt_add_xprt does not call rpc_xprt_switch_add_xprt */ + return 1; +out_err: + xprt_put(xprt); + xprt_switch_put(xps); + pr_info("RPC: rpc_clnt_test_xprt failed: %d addr %s not added\n", + status, xprt->address_strings[RPC_DISPLAY_ADDR]); + return status; +} +EXPORT_SYMBOL_GPL(rpc_clnt_setup_test_and_add_xprt); + +/** * rpc_clnt_add_xprt - Add a new transport to a rpc_clnt * @clnt: pointer to struct rpc_clnt * @xprtargs: pointer to struct xprt_create @@ -2697,6 +2751,34 @@ rpc_cap_max_reconnect_timeout(struct rpc_clnt *clnt, unsigned long timeo) } EXPORT_SYMBOL_GPL(rpc_cap_max_reconnect_timeout); +void rpc_clnt_xprt_switch_put(struct rpc_clnt *clnt) +{ + xprt_switch_put(rcu_dereference(clnt->cl_xpi.xpi_xpswitch)); +} +EXPORT_SYMBOL_GPL(rpc_clnt_xprt_switch_put); + +void rpc_clnt_xprt_switch_add_xprt(struct rpc_clnt *clnt, struct rpc_xprt *xprt) +{ + rpc_xprt_switch_add_xprt(rcu_dereference(clnt->cl_xpi.xpi_xpswitch), + xprt); +} +EXPORT_SYMBOL_GPL(rpc_clnt_xprt_switch_add_xprt); + +bool rpc_clnt_xprt_switch_has_addr(struct rpc_clnt *clnt, + const struct sockaddr *sap) +{ + struct rpc_xprt_switch *xps; + bool ret; + + xps = rcu_dereference(clnt->cl_xpi.xpi_xpswitch); + + rcu_read_lock(); + ret = rpc_xprt_switch_has_addr(xps, sap); + rcu_read_unlock(); + return ret; +} +EXPORT_SYMBOL_GPL(rpc_clnt_xprt_switch_has_addr); + #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) static void rpc_show_header(void) { diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 84f98cbe31c3..61a504fb1ae2 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -477,7 +477,7 @@ rpc_get_inode(struct super_block *sb, umode_t mode) return NULL; inode->i_ino = get_next_ino(); inode->i_mode = mode; - inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; + inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); switch (mode & S_IFMT) { case S_IFDIR: inode->i_fop = &simple_dir_operations; diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 9ae588511aaf..5db68b371db2 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -849,14 +849,17 @@ static void rpc_async_schedule(struct work_struct *work) } /** - * rpc_malloc - allocate an RPC buffer - * @task: RPC task that will use this buffer - * @size: requested byte size + * rpc_malloc - allocate RPC buffer resources + * @task: RPC task + * + * A single memory region is allocated, which is split between the + * RPC call and RPC reply that this task is being used for. When + * this RPC is retired, the memory is released by calling rpc_free. * * To prevent rpciod from hanging, this allocator never sleeps, - * returning NULL and suppressing warning if the request cannot be serviced - * immediately. - * The caller can arrange to sleep in a way that is safe for rpciod. + * returning -ENOMEM and suppressing warning if the request cannot + * be serviced immediately. The caller can arrange to sleep in a + * way that is safe for rpciod. * * Most requests are 'small' (under 2KiB) and can be serviced from a * mempool, ensuring that NFS reads and writes can always proceed, @@ -865,8 +868,10 @@ static void rpc_async_schedule(struct work_struct *work) * In order to avoid memory starvation triggering more writebacks of * NFS requests, we avoid using GFP_KERNEL. */ -void *rpc_malloc(struct rpc_task *task, size_t size) +int rpc_malloc(struct rpc_task *task) { + struct rpc_rqst *rqst = task->tk_rqstp; + size_t size = rqst->rq_callsize + rqst->rq_rcvsize; struct rpc_buffer *buf; gfp_t gfp = GFP_NOIO | __GFP_NOWARN; @@ -880,28 +885,28 @@ void *rpc_malloc(struct rpc_task *task, size_t size) buf = kmalloc(size, gfp); if (!buf) - return NULL; + return -ENOMEM; buf->len = size; dprintk("RPC: %5u allocated buffer of size %zu at %p\n", task->tk_pid, size, buf); - return &buf->data; + rqst->rq_buffer = buf->data; + rqst->rq_rbuffer = (char *)rqst->rq_buffer + rqst->rq_callsize; + return 0; } EXPORT_SYMBOL_GPL(rpc_malloc); /** - * rpc_free - free buffer allocated via rpc_malloc - * @buffer: buffer to free + * rpc_free - free RPC buffer resources allocated via rpc_malloc + * @task: RPC task * */ -void rpc_free(void *buffer) +void rpc_free(struct rpc_task *task) { + void *buffer = task->tk_rqstp->rq_buffer; size_t size; struct rpc_buffer *buf; - if (!buffer) - return; - buf = container_of(buffer, struct rpc_buffer, data); size = buf->len; diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index c5b0cb4f4056..7c8070ec93c8 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -401,6 +401,21 @@ int svc_bind(struct svc_serv *serv, struct net *net) } EXPORT_SYMBOL_GPL(svc_bind); +#if defined(CONFIG_SUNRPC_BACKCHANNEL) +static void +__svc_init_bc(struct svc_serv *serv) +{ + INIT_LIST_HEAD(&serv->sv_cb_list); + spin_lock_init(&serv->sv_cb_lock); + init_waitqueue_head(&serv->sv_cb_waitq); +} +#else +static void +__svc_init_bc(struct svc_serv *serv) +{ +} +#endif + /* * Create an RPC service */ @@ -443,6 +458,8 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, init_timer(&serv->sv_temptimer); spin_lock_init(&serv->sv_lock); + __svc_init_bc(serv); + serv->sv_nrpools = npools; serv->sv_pools = kcalloc(serv->sv_nrpools, sizeof(struct svc_pool), diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index dfacdc95b3f5..64af4f034de6 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -517,7 +517,7 @@ static int unix_gid_parse(struct cache_detail *cd, kgid = make_kgid(&init_user_ns, gid); if (!gid_valid(kgid)) goto out; - GROUP_AT(ug.gi, i) = kgid; + ug.gi->gid[i] = kgid; } ugp = unix_gid_lookup(cd, uid); @@ -564,7 +564,7 @@ static int unix_gid_show(struct seq_file *m, seq_printf(m, "%u %d:", from_kuid_munged(user_ns, ug->uid), glen); for (i = 0; i < glen; i++) - seq_printf(m, " %d", from_kgid_munged(user_ns, GROUP_AT(ug->gi, i))); + seq_printf(m, " %d", from_kgid_munged(user_ns, ug->gi->gid[i])); seq_printf(m, "\n"); return 0; } @@ -817,7 +817,7 @@ svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp) return SVC_CLOSE; for (i = 0; i < slen; i++) { kgid_t kgid = make_kgid(&init_user_ns, svc_getnl(argv)); - GROUP_AT(cred->cr_group_info, i) = kgid; + cred->cr_group_info->gid[i] = kgid; } if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) { *authp = rpc_autherr_badverf; diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 57625f64efd5..e2a55dc787e6 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -39,6 +39,7 @@ #include <net/checksum.h> #include <net/ip.h> #include <net/ipv6.h> +#include <net/udp.h> #include <net/tcp.h> #include <net/tcp_states.h> #include <asm/uaccess.h> @@ -129,6 +130,18 @@ static void svc_release_skb(struct svc_rqst *rqstp) } } +static void svc_release_udp_skb(struct svc_rqst *rqstp) +{ + struct sk_buff *skb = rqstp->rq_xprt_ctxt; + + if (skb) { + rqstp->rq_xprt_ctxt = NULL; + + dprintk("svc: service %p, releasing skb %p\n", rqstp, skb); + consume_skb(skb); + } +} + union svc_pktinfo_u { struct in_pktinfo pkti; struct in6_pktinfo pkti6; @@ -575,7 +588,7 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp) goto out_free; } local_bh_enable(); - skb_free_datagram_locked(svsk->sk_sk, skb); + consume_skb(skb); } else { /* we can use it in-place */ rqstp->rq_arg.head[0].iov_base = skb->data; @@ -602,8 +615,7 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp) return len; out_free: - trace_kfree_skb(skb, svc_udp_recvfrom); - skb_free_datagram_locked(svsk->sk_sk, skb); + kfree_skb(skb); return 0; } @@ -660,7 +672,7 @@ static struct svc_xprt_ops svc_udp_ops = { .xpo_create = svc_udp_create, .xpo_recvfrom = svc_udp_recvfrom, .xpo_sendto = svc_udp_sendto, - .xpo_release_rqst = svc_release_skb, + .xpo_release_rqst = svc_release_udp_skb, .xpo_detach = svc_sock_detach, .xpo_free = svc_sock_free, .xpo_prep_reply_hdr = svc_udp_prep_reply_hdr, diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index c4f3cc0c0775..7f1071e103ca 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -767,7 +767,7 @@ static void xdr_set_next_page(struct xdr_stream *xdr) newbase -= xdr->buf->page_base; if (xdr_set_page_base(xdr, newbase, PAGE_SIZE) < 0) - xdr_set_iov(xdr, xdr->buf->tail, xdr->buf->len); + xdr_set_iov(xdr, xdr->buf->tail, xdr->nwords << 2); } static bool xdr_set_next_buffer(struct xdr_stream *xdr) @@ -776,7 +776,7 @@ static bool xdr_set_next_buffer(struct xdr_stream *xdr) xdr_set_next_page(xdr); else if (xdr->iov == xdr->buf->head) { if (xdr_set_page_base(xdr, 0, PAGE_SIZE) < 0) - xdr_set_iov(xdr, xdr->buf->tail, xdr->buf->len); + xdr_set_iov(xdr, xdr->buf->tail, xdr->nwords << 2); } return xdr->p != xdr->end; } @@ -859,12 +859,15 @@ EXPORT_SYMBOL_GPL(xdr_set_scratch_buffer); static __be32 *xdr_copy_to_scratch(struct xdr_stream *xdr, size_t nbytes) { __be32 *p; - void *cpdest = xdr->scratch.iov_base; + char *cpdest = xdr->scratch.iov_base; size_t cplen = (char *)xdr->end - (char *)xdr->p; if (nbytes > xdr->scratch.iov_len) return NULL; - memcpy(cpdest, xdr->p, cplen); + p = __xdr_inline_decode(xdr, cplen); + if (p == NULL) + return NULL; + memcpy(cpdest, p, cplen); cpdest += cplen; nbytes -= cplen; if (!xdr_set_next_buffer(xdr)) diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index ea244b29138b..685e6d225414 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1295,7 +1295,7 @@ void xprt_release(struct rpc_task *task) xprt_schedule_autodisconnect(xprt); spin_unlock_bh(&xprt->transport_lock); if (req->rq_buffer) - xprt->ops->buf_free(req->rq_buffer); + xprt->ops->buf_free(task); xprt_inject_disconnect(xprt); if (req->rq_cred != NULL) put_rpccred(req->rq_cred); diff --git a/net/sunrpc/xprtmultipath.c b/net/sunrpc/xprtmultipath.c index 66c9d63f4797..ae92a9e9ba52 100644 --- a/net/sunrpc/xprtmultipath.c +++ b/net/sunrpc/xprtmultipath.c @@ -15,6 +15,7 @@ #include <asm/cmpxchg.h> #include <linux/spinlock.h> #include <linux/sunrpc/xprt.h> +#include <linux/sunrpc/addr.h> #include <linux/sunrpc/xprtmultipath.h> typedef struct rpc_xprt *(*xprt_switch_find_xprt_t)(struct list_head *head, @@ -49,7 +50,8 @@ void rpc_xprt_switch_add_xprt(struct rpc_xprt_switch *xps, if (xprt == NULL) return; spin_lock(&xps->xps_lock); - if (xps->xps_net == xprt->xprt_net || xps->xps_net == NULL) + if ((xps->xps_net == xprt->xprt_net || xps->xps_net == NULL) && + !rpc_xprt_switch_has_addr(xps, (struct sockaddr *)&xprt->addr)) xprt_switch_add_xprt_locked(xps, xprt); spin_unlock(&xps->xps_lock); } @@ -232,6 +234,26 @@ struct rpc_xprt *xprt_iter_current_entry(struct rpc_xprt_iter *xpi) return xprt_switch_find_current_entry(head, xpi->xpi_cursor); } +bool rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps, + const struct sockaddr *sap) +{ + struct list_head *head; + struct rpc_xprt *pos; + + if (xps == NULL || sap == NULL) + return false; + + head = &xps->xps_xprt_list; + list_for_each_entry_rcu(pos, head, xprt_switch) { + if (rpc_cmp_addr_port(sap, (struct sockaddr *)&pos->addr)) { + pr_info("RPC: addr %s already in xprt switch\n", + pos->address_strings[RPC_DISPLAY_ADDR]); + return true; + } + } + return false; +} + static struct rpc_xprt *xprt_switch_find_next_entry(struct list_head *head, const struct rpc_xprt *cur) diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c index 87762d976b63..2c472e1b4827 100644 --- a/net/sunrpc/xprtrdma/backchannel.c +++ b/net/sunrpc/xprtrdma/backchannel.c @@ -27,7 +27,7 @@ static void rpcrdma_bc_free_rqst(struct rpcrdma_xprt *r_xprt, list_del(&req->rl_all); spin_unlock(&buf->rb_reqslock); - rpcrdma_destroy_req(&r_xprt->rx_ia, req); + rpcrdma_destroy_req(req); kfree(rqst); } @@ -35,10 +35,8 @@ static void rpcrdma_bc_free_rqst(struct rpcrdma_xprt *r_xprt, static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst) { - struct rpcrdma_ia *ia = &r_xprt->rx_ia; struct rpcrdma_regbuf *rb; struct rpcrdma_req *req; - struct xdr_buf *buf; size_t size; req = rpcrdma_create_req(r_xprt); @@ -46,30 +44,19 @@ static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt, return PTR_ERR(req); req->rl_backchannel = true; - size = RPCRDMA_INLINE_WRITE_THRESHOLD(rqst); - rb = rpcrdma_alloc_regbuf(ia, size, GFP_KERNEL); + rb = rpcrdma_alloc_regbuf(RPCRDMA_HDRBUF_SIZE, + DMA_TO_DEVICE, GFP_KERNEL); if (IS_ERR(rb)) goto out_fail; req->rl_rdmabuf = rb; - size += RPCRDMA_INLINE_READ_THRESHOLD(rqst); - rb = rpcrdma_alloc_regbuf(ia, size, GFP_KERNEL); + size = r_xprt->rx_data.inline_rsize; + rb = rpcrdma_alloc_regbuf(size, DMA_TO_DEVICE, GFP_KERNEL); if (IS_ERR(rb)) goto out_fail; - rb->rg_owner = req; req->rl_sendbuf = rb; - /* so that rpcr_to_rdmar works when receiving a request */ - rqst->rq_buffer = (void *)req->rl_sendbuf->rg_base; - - buf = &rqst->rq_snd_buf; - buf->head[0].iov_base = rqst->rq_buffer; - buf->head[0].iov_len = 0; - buf->tail[0].iov_base = NULL; - buf->tail[0].iov_len = 0; - buf->page_len = 0; - buf->len = 0; - buf->buflen = size; - + xdr_buf_init(&rqst->rq_snd_buf, rb->rg_base, size); + rpcrdma_set_xprtdata(rqst, req); return 0; out_fail: @@ -219,7 +206,6 @@ int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst) struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); struct rpcrdma_req *req = rpcr_to_rdmar(rqst); struct rpcrdma_msg *headerp; - size_t rpclen; headerp = rdmab_to_msg(req->rl_rdmabuf); headerp->rm_xid = rqst->rq_xid; @@ -231,26 +217,9 @@ int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst) headerp->rm_body.rm_chunks[1] = xdr_zero; headerp->rm_body.rm_chunks[2] = xdr_zero; - rpclen = rqst->rq_svec[0].iov_len; - -#ifdef RPCRDMA_BACKCHANNEL_DEBUG - pr_info("RPC: %s: rpclen %zd headerp 0x%p lkey 0x%x\n", - __func__, rpclen, headerp, rdmab_lkey(req->rl_rdmabuf)); - pr_info("RPC: %s: RPC/RDMA: %*ph\n", - __func__, (int)RPCRDMA_HDRLEN_MIN, headerp); - pr_info("RPC: %s: RPC: %*ph\n", - __func__, (int)rpclen, rqst->rq_svec[0].iov_base); -#endif - - req->rl_send_iov[0].addr = rdmab_addr(req->rl_rdmabuf); - req->rl_send_iov[0].length = RPCRDMA_HDRLEN_MIN; - req->rl_send_iov[0].lkey = rdmab_lkey(req->rl_rdmabuf); - - req->rl_send_iov[1].addr = rdmab_addr(req->rl_sendbuf); - req->rl_send_iov[1].length = rpclen; - req->rl_send_iov[1].lkey = rdmab_lkey(req->rl_sendbuf); - - req->rl_niovs = 2; + if (!rpcrdma_prepare_send_sges(&r_xprt->rx_ia, req, RPCRDMA_HDRLEN_MIN, + &rqst->rq_snd_buf, rpcrdma_noch)) + return -EIO; return 0; } @@ -402,7 +371,7 @@ out_overflow: out_short: pr_warn("RPC/RDMA short backward direction call\n"); - if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, &r_xprt->rx_ep, rep)) + if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, rep)) xprt_disconnect_done(xprt); else pr_warn("RPC: %s: reposting rep %p\n", diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c index 21cb3b150b37..1ebb09e1ac4f 100644 --- a/net/sunrpc/xprtrdma/fmr_ops.c +++ b/net/sunrpc/xprtrdma/fmr_ops.c @@ -160,9 +160,8 @@ static int fmr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, struct rpcrdma_create_data_internal *cdata) { - rpcrdma_set_max_header_sizes(ia, cdata, max_t(unsigned int, 1, - RPCRDMA_MAX_DATA_SEGS / - RPCRDMA_MAX_FMR_SGES)); + ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS / + RPCRDMA_MAX_FMR_SGES); return 0; } @@ -274,6 +273,7 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) */ list_for_each_entry(mw, &req->rl_registered, mw_list) list_add_tail(&mw->fmr.fm_mr->list, &unmap_list); + r_xprt->rx_stats.local_inv_needed++; rc = ib_unmap_fmr(&unmap_list); if (rc) goto out_reset; @@ -331,4 +331,5 @@ const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = { .ro_init_mr = fmr_op_init_mr, .ro_release_mr = fmr_op_release_mr, .ro_displayname = "fmr", + .ro_send_w_inv_ok = 0, }; diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index 892b5e1d9b09..210949562786 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -67,6 +67,8 @@ * pending send queue WRs before the transport is reconnected. */ +#include <linux/sunrpc/rpc_rdma.h> + #include "xprt_rdma.h" #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) @@ -161,7 +163,7 @@ __frwr_reset_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r) return PTR_ERR(f->fr_mr); } - dprintk("RPC: %s: recovered FRMR %p\n", __func__, r); + dprintk("RPC: %s: recovered FRMR %p\n", __func__, f); f->fr_state = FRMR_IS_INVALID; return 0; } @@ -242,9 +244,8 @@ frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, depth; } - rpcrdma_set_max_header_sizes(ia, cdata, max_t(unsigned int, 1, - RPCRDMA_MAX_DATA_SEGS / - ia->ri_max_frmr_depth)); + ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS / + ia->ri_max_frmr_depth); return 0; } @@ -329,7 +330,7 @@ frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc) frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe); if (wc->status != IB_WC_SUCCESS) __frwr_sendcompletion_flush(wc, frmr, "localinv"); - complete_all(&frmr->fr_linv_done); + complete(&frmr->fr_linv_done); } /* Post a REG_MR Work Request to register a memory region @@ -396,7 +397,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, goto out_mapmr_err; dprintk("RPC: %s: Using frmr %p to map %u segments (%u bytes)\n", - __func__, mw, mw->mw_nents, mr->length); + __func__, frmr, mw->mw_nents, mr->length); key = (u8)(mr->rkey & 0x000000FF); ib_update_fast_reg_key(mr, ++key); @@ -449,6 +450,8 @@ __frwr_prepare_linv_wr(struct rpcrdma_mw *mw) struct rpcrdma_frmr *f = &mw->frmr; struct ib_send_wr *invalidate_wr; + dprintk("RPC: %s: invalidating frmr %p\n", __func__, f); + f->fr_state = FRMR_IS_INVALID; invalidate_wr = &f->fr_invwr; @@ -472,6 +475,7 @@ static void frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) { struct ib_send_wr *invalidate_wrs, *pos, *prev, *bad_wr; + struct rpcrdma_rep *rep = req->rl_reply; struct rpcrdma_ia *ia = &r_xprt->rx_ia; struct rpcrdma_mw *mw, *tmp; struct rpcrdma_frmr *f; @@ -487,6 +491,12 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) f = NULL; invalidate_wrs = pos = prev = NULL; list_for_each_entry(mw, &req->rl_registered, mw_list) { + if ((rep->rr_wc_flags & IB_WC_WITH_INVALIDATE) && + (mw->mw_handle == rep->rr_inv_rkey)) { + mw->frmr.fr_state = FRMR_IS_INVALID; + continue; + } + pos = __frwr_prepare_linv_wr(mw); if (!invalidate_wrs) @@ -496,6 +506,8 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) prev = pos; f = &mw->frmr; } + if (!f) + goto unmap; /* Strong send queue ordering guarantees that when the * last WR in the chain completes, all WRs in the chain @@ -510,6 +522,7 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) * replaces the QP. The RPC reply handler won't call us * unless ri_id->qp is a valid pointer. */ + r_xprt->rx_stats.local_inv_needed++; rc = ib_post_send(ia->ri_id->qp, invalidate_wrs, &bad_wr); if (rc) goto reset_mrs; @@ -521,6 +534,8 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) */ unmap: list_for_each_entry_safe(mw, tmp, &req->rl_registered, mw_list) { + dprintk("RPC: %s: unmapping frmr %p\n", + __func__, &mw->frmr); list_del_init(&mw->mw_list); ib_dma_unmap_sg(ia->ri_device, mw->mw_sg, mw->mw_nents, mw->mw_dir); @@ -576,4 +591,5 @@ const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = { .ro_init_mr = frwr_op_init_mr, .ro_release_mr = frwr_op_release_mr, .ro_displayname = "frwr", + .ro_send_w_inv_ok = RPCRDMA_CMP_F_SND_W_INV_OK, }; diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index a47f170b20ef..d987c2d3dd6e 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -53,14 +53,6 @@ # define RPCDBG_FACILITY RPCDBG_TRANS #endif -enum rpcrdma_chunktype { - rpcrdma_noch = 0, - rpcrdma_readch, - rpcrdma_areadch, - rpcrdma_writech, - rpcrdma_replych -}; - static const char transfertypes[][12] = { "inline", /* no chunks */ "read list", /* some argument via rdma read */ @@ -118,10 +110,12 @@ static unsigned int rpcrdma_max_reply_header_size(unsigned int maxsegs) return size; } -void rpcrdma_set_max_header_sizes(struct rpcrdma_ia *ia, - struct rpcrdma_create_data_internal *cdata, - unsigned int maxsegs) +void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *r_xprt) { + struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data; + struct rpcrdma_ia *ia = &r_xprt->rx_ia; + unsigned int maxsegs = ia->ri_max_segs; + ia->ri_max_inline_write = cdata->inline_wsize - rpcrdma_max_call_header_size(maxsegs); ia->ri_max_inline_read = cdata->inline_rsize - @@ -155,42 +149,6 @@ static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt, return rqst->rq_rcv_buf.buflen <= ia->ri_max_inline_read; } -static int -rpcrdma_tail_pullup(struct xdr_buf *buf) -{ - size_t tlen = buf->tail[0].iov_len; - size_t skip = tlen & 3; - - /* Do not include the tail if it is only an XDR pad */ - if (tlen < 4) - return 0; - - /* xdr_write_pages() adds a pad at the beginning of the tail - * if the content in "buf->pages" is unaligned. Force the - * tail's actual content to land at the next XDR position - * after the head instead. - */ - if (skip) { - unsigned char *src, *dst; - unsigned int count; - - src = buf->tail[0].iov_base; - dst = buf->head[0].iov_base; - dst += buf->head[0].iov_len; - - src += skip; - tlen -= skip; - - dprintk("RPC: %s: skip=%zu, memmove(%p, %p, %zu)\n", - __func__, skip, dst, src, tlen); - - for (count = tlen; count; count--) - *dst++ = *src++; - } - - return tlen; -} - /* Split "vec" on page boundaries into segments. FMR registers pages, * not a byte range. Other modes coalesce these segments into a single * MR when they can. @@ -229,7 +187,8 @@ rpcrdma_convert_kvec(struct kvec *vec, struct rpcrdma_mr_seg *seg, int n) static int rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos, - enum rpcrdma_chunktype type, struct rpcrdma_mr_seg *seg) + enum rpcrdma_chunktype type, struct rpcrdma_mr_seg *seg, + bool reminv_expected) { int len, n, p, page_base; struct page **ppages; @@ -271,6 +230,13 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos, if (type == rpcrdma_readch) return n; + /* When encoding the Write list, some servers need to see an extra + * segment for odd-length Write chunks. The upper layer provides + * space in the tail iovec for this purpose. + */ + if (type == rpcrdma_writech && reminv_expected) + return n; + if (xdrbuf->tail[0].iov_len) { /* the rpcrdma protocol allows us to omit any trailing * xdr pad bytes, saving the server an RDMA operation. */ @@ -327,7 +293,7 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, if (rtype == rpcrdma_areadch) pos = 0; seg = req->rl_segments; - nsegs = rpcrdma_convert_iovs(&rqst->rq_snd_buf, pos, rtype, seg); + nsegs = rpcrdma_convert_iovs(&rqst->rq_snd_buf, pos, rtype, seg, false); if (nsegs < 0) return ERR_PTR(nsegs); @@ -391,7 +357,8 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, seg = req->rl_segments; nsegs = rpcrdma_convert_iovs(&rqst->rq_rcv_buf, rqst->rq_rcv_buf.head[0].iov_len, - wtype, seg); + wtype, seg, + r_xprt->rx_ia.ri_reminv_expected); if (nsegs < 0) return ERR_PTR(nsegs); @@ -456,7 +423,8 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, } seg = req->rl_segments; - nsegs = rpcrdma_convert_iovs(&rqst->rq_rcv_buf, 0, wtype, seg); + nsegs = rpcrdma_convert_iovs(&rqst->rq_rcv_buf, 0, wtype, seg, + r_xprt->rx_ia.ri_reminv_expected); if (nsegs < 0) return ERR_PTR(nsegs); @@ -491,74 +459,184 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, return iptr; } -/* - * Copy write data inline. - * This function is used for "small" requests. Data which is passed - * to RPC via iovecs (or page list) is copied directly into the - * pre-registered memory buffer for this request. For small amounts - * of data, this is efficient. The cutoff value is tunable. +/* Prepare the RPC-over-RDMA header SGE. */ -static void rpcrdma_inline_pullup(struct rpc_rqst *rqst) +static bool +rpcrdma_prepare_hdr_sge(struct rpcrdma_ia *ia, struct rpcrdma_req *req, + u32 len) { - int i, npages, curlen; - int copy_len; - unsigned char *srcp, *destp; - struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); - int page_base; - struct page **ppages; + struct rpcrdma_regbuf *rb = req->rl_rdmabuf; + struct ib_sge *sge = &req->rl_send_sge[0]; + + if (unlikely(!rpcrdma_regbuf_is_mapped(rb))) { + if (!__rpcrdma_dma_map_regbuf(ia, rb)) + return false; + sge->addr = rdmab_addr(rb); + sge->lkey = rdmab_lkey(rb); + } + sge->length = len; + + ib_dma_sync_single_for_device(ia->ri_device, sge->addr, + sge->length, DMA_TO_DEVICE); + req->rl_send_wr.num_sge++; + return true; +} - destp = rqst->rq_svec[0].iov_base; - curlen = rqst->rq_svec[0].iov_len; - destp += curlen; +/* Prepare the Send SGEs. The head and tail iovec, and each entry + * in the page list, gets its own SGE. + */ +static bool +rpcrdma_prepare_msg_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req, + struct xdr_buf *xdr, enum rpcrdma_chunktype rtype) +{ + unsigned int sge_no, page_base, len, remaining; + struct rpcrdma_regbuf *rb = req->rl_sendbuf; + struct ib_device *device = ia->ri_device; + struct ib_sge *sge = req->rl_send_sge; + u32 lkey = ia->ri_pd->local_dma_lkey; + struct page *page, **ppages; + + /* The head iovec is straightforward, as it is already + * DMA-mapped. Sync the content that has changed. + */ + if (!rpcrdma_dma_map_regbuf(ia, rb)) + return false; + sge_no = 1; + sge[sge_no].addr = rdmab_addr(rb); + sge[sge_no].length = xdr->head[0].iov_len; + sge[sge_no].lkey = rdmab_lkey(rb); + ib_dma_sync_single_for_device(device, sge[sge_no].addr, + sge[sge_no].length, DMA_TO_DEVICE); + + /* If there is a Read chunk, the page list is being handled + * via explicit RDMA, and thus is skipped here. However, the + * tail iovec may include an XDR pad for the page list, as + * well as additional content, and may not reside in the + * same page as the head iovec. + */ + if (rtype == rpcrdma_readch) { + len = xdr->tail[0].iov_len; - dprintk("RPC: %s: destp 0x%p len %d hdrlen %d\n", - __func__, destp, rqst->rq_slen, curlen); + /* Do not include the tail if it is only an XDR pad */ + if (len < 4) + goto out; - copy_len = rqst->rq_snd_buf.page_len; + page = virt_to_page(xdr->tail[0].iov_base); + page_base = (unsigned long)xdr->tail[0].iov_base & ~PAGE_MASK; - if (rqst->rq_snd_buf.tail[0].iov_len) { - curlen = rqst->rq_snd_buf.tail[0].iov_len; - if (destp + copy_len != rqst->rq_snd_buf.tail[0].iov_base) { - memmove(destp + copy_len, - rqst->rq_snd_buf.tail[0].iov_base, curlen); - r_xprt->rx_stats.pullup_copy_count += curlen; + /* If the content in the page list is an odd length, + * xdr_write_pages() has added a pad at the beginning + * of the tail iovec. Force the tail's non-pad content + * to land at the next XDR position in the Send message. + */ + page_base += len & 3; + len -= len & 3; + goto map_tail; + } + + /* If there is a page list present, temporarily DMA map + * and prepare an SGE for each page to be sent. + */ + if (xdr->page_len) { + ppages = xdr->pages + (xdr->page_base >> PAGE_SHIFT); + page_base = xdr->page_base & ~PAGE_MASK; + remaining = xdr->page_len; + while (remaining) { + sge_no++; + if (sge_no > RPCRDMA_MAX_SEND_SGES - 2) + goto out_mapping_overflow; + + len = min_t(u32, PAGE_SIZE - page_base, remaining); + sge[sge_no].addr = ib_dma_map_page(device, *ppages, + page_base, len, + DMA_TO_DEVICE); + if (ib_dma_mapping_error(device, sge[sge_no].addr)) + goto out_mapping_err; + sge[sge_no].length = len; + sge[sge_no].lkey = lkey; + + req->rl_mapped_sges++; + ppages++; + remaining -= len; + page_base = 0; } - dprintk("RPC: %s: tail destp 0x%p len %d\n", - __func__, destp + copy_len, curlen); - rqst->rq_svec[0].iov_len += curlen; } - r_xprt->rx_stats.pullup_copy_count += copy_len; - page_base = rqst->rq_snd_buf.page_base; - ppages = rqst->rq_snd_buf.pages + (page_base >> PAGE_SHIFT); - page_base &= ~PAGE_MASK; - npages = PAGE_ALIGN(page_base+copy_len) >> PAGE_SHIFT; - for (i = 0; copy_len && i < npages; i++) { - curlen = PAGE_SIZE - page_base; - if (curlen > copy_len) - curlen = copy_len; - dprintk("RPC: %s: page %d destp 0x%p len %d curlen %d\n", - __func__, i, destp, copy_len, curlen); - srcp = kmap_atomic(ppages[i]); - memcpy(destp, srcp+page_base, curlen); - kunmap_atomic(srcp); - rqst->rq_svec[0].iov_len += curlen; - destp += curlen; - copy_len -= curlen; - page_base = 0; + /* The tail iovec is not always constructed in the same + * page where the head iovec resides (see, for example, + * gss_wrap_req_priv). To neatly accommodate that case, + * DMA map it separately. + */ + if (xdr->tail[0].iov_len) { + page = virt_to_page(xdr->tail[0].iov_base); + page_base = (unsigned long)xdr->tail[0].iov_base & ~PAGE_MASK; + len = xdr->tail[0].iov_len; + +map_tail: + sge_no++; + sge[sge_no].addr = ib_dma_map_page(device, page, + page_base, len, + DMA_TO_DEVICE); + if (ib_dma_mapping_error(device, sge[sge_no].addr)) + goto out_mapping_err; + sge[sge_no].length = len; + sge[sge_no].lkey = lkey; + req->rl_mapped_sges++; } - /* header now contains entire send message */ + +out: + req->rl_send_wr.num_sge = sge_no + 1; + return true; + +out_mapping_overflow: + pr_err("rpcrdma: too many Send SGEs (%u)\n", sge_no); + return false; + +out_mapping_err: + pr_err("rpcrdma: Send mapping error\n"); + return false; +} + +bool +rpcrdma_prepare_send_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req, + u32 hdrlen, struct xdr_buf *xdr, + enum rpcrdma_chunktype rtype) +{ + req->rl_send_wr.num_sge = 0; + req->rl_mapped_sges = 0; + + if (!rpcrdma_prepare_hdr_sge(ia, req, hdrlen)) + goto out_map; + + if (rtype != rpcrdma_areadch) + if (!rpcrdma_prepare_msg_sges(ia, req, xdr, rtype)) + goto out_map; + + return true; + +out_map: + pr_err("rpcrdma: failed to DMA map a Send buffer\n"); + return false; +} + +void +rpcrdma_unmap_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req) +{ + struct ib_device *device = ia->ri_device; + struct ib_sge *sge; + int count; + + sge = &req->rl_send_sge[2]; + for (count = req->rl_mapped_sges; count--; sge++) + ib_dma_unmap_page(device, sge->addr, sge->length, + DMA_TO_DEVICE); + req->rl_mapped_sges = 0; } /* * Marshal a request: the primary job of this routine is to choose * the transfer modes. See comments below. * - * Prepares up to two IOVs per Call message: - * - * [0] -- RPC RDMA header - * [1] -- the RPC header/data - * * Returns zero on success, otherwise a negative errno. */ @@ -626,12 +704,11 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) */ if (rpcrdma_args_inline(r_xprt, rqst)) { rtype = rpcrdma_noch; - rpcrdma_inline_pullup(rqst); - rpclen = rqst->rq_svec[0].iov_len; + rpclen = rqst->rq_snd_buf.len; } else if (ddp_allowed && rqst->rq_snd_buf.flags & XDRBUF_WRITE) { rtype = rpcrdma_readch; - rpclen = rqst->rq_svec[0].iov_len; - rpclen += rpcrdma_tail_pullup(&rqst->rq_snd_buf); + rpclen = rqst->rq_snd_buf.head[0].iov_len + + rqst->rq_snd_buf.tail[0].iov_len; } else { r_xprt->rx_stats.nomsg_call_count++; headerp->rm_type = htonl(RDMA_NOMSG); @@ -673,34 +750,18 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) goto out_unmap; hdrlen = (unsigned char *)iptr - (unsigned char *)headerp; - if (hdrlen + rpclen > RPCRDMA_INLINE_WRITE_THRESHOLD(rqst)) - goto out_overflow; - dprintk("RPC: %5u %s: %s/%s: hdrlen %zd rpclen %zd\n", rqst->rq_task->tk_pid, __func__, transfertypes[rtype], transfertypes[wtype], hdrlen, rpclen); - req->rl_send_iov[0].addr = rdmab_addr(req->rl_rdmabuf); - req->rl_send_iov[0].length = hdrlen; - req->rl_send_iov[0].lkey = rdmab_lkey(req->rl_rdmabuf); - - req->rl_niovs = 1; - if (rtype == rpcrdma_areadch) - return 0; - - req->rl_send_iov[1].addr = rdmab_addr(req->rl_sendbuf); - req->rl_send_iov[1].length = rpclen; - req->rl_send_iov[1].lkey = rdmab_lkey(req->rl_sendbuf); - - req->rl_niovs = 2; + if (!rpcrdma_prepare_send_sges(&r_xprt->rx_ia, req, hdrlen, + &rqst->rq_snd_buf, rtype)) { + iptr = ERR_PTR(-EIO); + goto out_unmap; + } return 0; -out_overflow: - pr_err("rpcrdma: send overflow: hdrlen %zd rpclen %zu %s/%s\n", - hdrlen, rpclen, transfertypes[rtype], transfertypes[wtype]); - iptr = ERR_PTR(-EIO); - out_unmap: r_xprt->rx_ia.ri_ops->ro_unmap_safe(r_xprt, req, false); return PTR_ERR(iptr); @@ -916,8 +977,10 @@ rpcrdma_conn_func(struct rpcrdma_ep *ep) * allowed to timeout, to discover the errors at that time. */ void -rpcrdma_reply_handler(struct rpcrdma_rep *rep) +rpcrdma_reply_handler(struct work_struct *work) { + struct rpcrdma_rep *rep = + container_of(work, struct rpcrdma_rep, rr_work); struct rpcrdma_msg *headerp; struct rpcrdma_req *req; struct rpc_rqst *rqst; @@ -1132,6 +1195,6 @@ out_duplicate: repost: r_xprt->rx_stats.bad_reply_count++; - if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, &r_xprt->rx_ep, rep)) + if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, rep)) rpcrdma_recv_buffer_put(rep); } diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c index a2a7519b0f23..2d8545c34095 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c +++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c @@ -129,7 +129,7 @@ static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma, ret = -EIO; goto out_unmap; } - atomic_inc(&rdma->sc_dma_used); + svc_rdma_count_mappings(rdma, ctxt); memset(&send_wr, 0, sizeof(send_wr)); ctxt->cqe.done = svc_rdma_wc_send; @@ -159,33 +159,34 @@ out_unmap: /* Server-side transport endpoint wants a whole page for its send * buffer. The client RPC code constructs the RPC header in this * buffer before it invokes ->send_request. - * - * Returns NULL if there was a temporary allocation failure. */ -static void * -xprt_rdma_bc_allocate(struct rpc_task *task, size_t size) +static int +xprt_rdma_bc_allocate(struct rpc_task *task) { struct rpc_rqst *rqst = task->tk_rqstp; struct svc_xprt *sxprt = rqst->rq_xprt->bc_xprt; + size_t size = rqst->rq_callsize; struct svcxprt_rdma *rdma; struct page *page; rdma = container_of(sxprt, struct svcxprt_rdma, sc_xprt); - /* Prevent an infinite loop: try to make this case work */ - if (size > PAGE_SIZE) + if (size > PAGE_SIZE) { WARN_ONCE(1, "svcrdma: large bc buffer request (size %zu)\n", size); + return -EINVAL; + } page = alloc_page(RPCRDMA_DEF_GFP); if (!page) - return NULL; + return -ENOMEM; - return page_address(page); + rqst->rq_buffer = page_address(page); + return 0; } static void -xprt_rdma_bc_free(void *buffer) +xprt_rdma_bc_free(struct rpc_task *task) { /* No-op: ctxt and page have already been freed. */ } diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 2c25606f2561..ad1df979b3f0 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -159,7 +159,7 @@ int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt, ctxt->sge[pno].addr); if (ret) goto err; - atomic_inc(&xprt->sc_dma_used); + svc_rdma_count_mappings(xprt, ctxt); ctxt->sge[pno].lkey = xprt->sc_pd->local_dma_lkey; ctxt->sge[pno].length = len; diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 54d533300620..f5a91edcd233 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -225,6 +225,48 @@ svc_rdma_get_reply_array(struct rpcrdma_msg *rmsgp, return rp_ary; } +/* RPC-over-RDMA Version One private extension: Remote Invalidation. + * Responder's choice: requester signals it can handle Send With + * Invalidate, and responder chooses one rkey to invalidate. + * + * Find a candidate rkey to invalidate when sending a reply. Picks the + * first rkey it finds in the chunks lists. + * + * Returns zero if RPC's chunk lists are empty. + */ +static u32 svc_rdma_get_inv_rkey(struct rpcrdma_msg *rdma_argp, + struct rpcrdma_write_array *wr_ary, + struct rpcrdma_write_array *rp_ary) +{ + struct rpcrdma_read_chunk *rd_ary; + struct rpcrdma_segment *arg_ch; + u32 inv_rkey; + + inv_rkey = 0; + + rd_ary = svc_rdma_get_read_chunk(rdma_argp); + if (rd_ary) { + inv_rkey = be32_to_cpu(rd_ary->rc_target.rs_handle); + goto out; + } + + if (wr_ary && be32_to_cpu(wr_ary->wc_nchunks)) { + arg_ch = &wr_ary->wc_array[0].wc_target; + inv_rkey = be32_to_cpu(arg_ch->rs_handle); + goto out; + } + + if (rp_ary && be32_to_cpu(rp_ary->wc_nchunks)) { + arg_ch = &rp_ary->wc_array[0].wc_target; + inv_rkey = be32_to_cpu(arg_ch->rs_handle); + goto out; + } + +out: + dprintk("svcrdma: Send With Invalidate rkey=%08x\n", inv_rkey); + return inv_rkey; +} + /* Assumptions: * - The specified write_len can be represented in sc_max_sge * PAGE_SIZE */ @@ -280,7 +322,7 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, if (ib_dma_mapping_error(xprt->sc_cm_id->device, sge[sge_no].addr)) goto err; - atomic_inc(&xprt->sc_dma_used); + svc_rdma_count_mappings(xprt, ctxt); sge[sge_no].lkey = xprt->sc_pd->local_dma_lkey; ctxt->count++; sge_off = 0; @@ -464,7 +506,8 @@ static int send_reply(struct svcxprt_rdma *rdma, struct page *page, struct rpcrdma_msg *rdma_resp, struct svc_rdma_req_map *vec, - int byte_count) + int byte_count, + u32 inv_rkey) { struct svc_rdma_op_ctxt *ctxt; struct ib_send_wr send_wr; @@ -489,7 +532,7 @@ static int send_reply(struct svcxprt_rdma *rdma, ctxt->sge[0].length, DMA_TO_DEVICE); if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr)) goto err; - atomic_inc(&rdma->sc_dma_used); + svc_rdma_count_mappings(rdma, ctxt); ctxt->direction = DMA_TO_DEVICE; @@ -505,7 +548,7 @@ static int send_reply(struct svcxprt_rdma *rdma, if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[sge_no].addr)) goto err; - atomic_inc(&rdma->sc_dma_used); + svc_rdma_count_mappings(rdma, ctxt); ctxt->sge[sge_no].lkey = rdma->sc_pd->local_dma_lkey; ctxt->sge[sge_no].length = sge_bytes; } @@ -523,23 +566,9 @@ static int send_reply(struct svcxprt_rdma *rdma, ctxt->pages[page_no+1] = rqstp->rq_respages[page_no]; ctxt->count++; rqstp->rq_respages[page_no] = NULL; - /* - * If there are more pages than SGE, terminate SGE - * list so that svc_rdma_unmap_dma doesn't attempt to - * unmap garbage. - */ - if (page_no+1 >= sge_no) - ctxt->sge[page_no+1].length = 0; } rqstp->rq_next_page = rqstp->rq_respages + 1; - /* The loop above bumps sc_dma_used for each sge. The - * xdr_buf.tail gets a separate sge, but resides in the - * same page as xdr_buf.head. Don't count it twice. - */ - if (sge_no > ctxt->count) - atomic_dec(&rdma->sc_dma_used); - if (sge_no > rdma->sc_max_sge) { pr_err("svcrdma: Too many sges (%d)\n", sge_no); goto err; @@ -549,7 +578,11 @@ static int send_reply(struct svcxprt_rdma *rdma, send_wr.wr_cqe = &ctxt->cqe; send_wr.sg_list = ctxt->sge; send_wr.num_sge = sge_no; - send_wr.opcode = IB_WR_SEND; + if (inv_rkey) { + send_wr.opcode = IB_WR_SEND_WITH_INV; + send_wr.ex.invalidate_rkey = inv_rkey; + } else + send_wr.opcode = IB_WR_SEND; send_wr.send_flags = IB_SEND_SIGNALED; ret = svc_rdma_send(rdma, &send_wr); @@ -581,6 +614,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) int inline_bytes; struct page *res_page; struct svc_rdma_req_map *vec; + u32 inv_rkey; dprintk("svcrdma: sending response for rqstp=%p\n", rqstp); @@ -591,6 +625,10 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) wr_ary = svc_rdma_get_write_array(rdma_argp); rp_ary = svc_rdma_get_reply_array(rdma_argp, wr_ary); + inv_rkey = 0; + if (rdma->sc_snd_w_inv) + inv_rkey = svc_rdma_get_inv_rkey(rdma_argp, wr_ary, rp_ary); + /* Build an req vec for the XDR */ vec = svc_rdma_get_req_map(rdma); ret = svc_rdma_map_xdr(rdma, &rqstp->rq_res, vec, wr_ary != NULL); @@ -633,9 +671,9 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) goto err1; ret = send_reply(rdma, rqstp, res_page, rdma_resp, vec, - inline_bytes); + inline_bytes, inv_rkey); if (ret < 0) - goto err1; + goto err0; svc_rdma_put_req_map(rdma, vec); dprintk("svcrdma: send_reply returns %d\n", ret); @@ -692,7 +730,7 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp, svc_rdma_put_context(ctxt, 1); return; } - atomic_inc(&xprt->sc_dma_used); + svc_rdma_count_mappings(xprt, ctxt); /* Prepare SEND WR */ memset(&err_wr, 0, sizeof(err_wr)); diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index dd9440137834..6864fb967038 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -198,6 +198,7 @@ struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt) out: ctxt->count = 0; + ctxt->mapped_sges = 0; ctxt->frmr = NULL; return ctxt; @@ -221,22 +222,27 @@ out_empty: void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt) { struct svcxprt_rdma *xprt = ctxt->xprt; - int i; - for (i = 0; i < ctxt->count && ctxt->sge[i].length; i++) { + struct ib_device *device = xprt->sc_cm_id->device; + u32 lkey = xprt->sc_pd->local_dma_lkey; + unsigned int i, count; + + for (count = 0, i = 0; i < ctxt->mapped_sges; i++) { /* * Unmap the DMA addr in the SGE if the lkey matches * the local_dma_lkey, otherwise, ignore it since it is * an FRMR lkey and will be unmapped later when the * last WR that uses it completes. */ - if (ctxt->sge[i].lkey == xprt->sc_pd->local_dma_lkey) { - atomic_dec(&xprt->sc_dma_used); - ib_dma_unmap_page(xprt->sc_cm_id->device, + if (ctxt->sge[i].lkey == lkey) { + count++; + ib_dma_unmap_page(device, ctxt->sge[i].addr, ctxt->sge[i].length, ctxt->direction); } } + ctxt->mapped_sges = 0; + atomic_sub(count, &xprt->sc_dma_used); } void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages) @@ -600,7 +606,7 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt, gfp_t flags) DMA_FROM_DEVICE); if (ib_dma_mapping_error(xprt->sc_cm_id->device, pa)) goto err_put_ctxt; - atomic_inc(&xprt->sc_dma_used); + svc_rdma_count_mappings(xprt, ctxt); ctxt->sge[sge_no].addr = pa; ctxt->sge[sge_no].length = PAGE_SIZE; ctxt->sge[sge_no].lkey = xprt->sc_pd->local_dma_lkey; @@ -642,6 +648,26 @@ int svc_rdma_repost_recv(struct svcxprt_rdma *xprt, gfp_t flags) return ret; } +static void +svc_rdma_parse_connect_private(struct svcxprt_rdma *newxprt, + struct rdma_conn_param *param) +{ + const struct rpcrdma_connect_private *pmsg = param->private_data; + + if (pmsg && + pmsg->cp_magic == rpcrdma_cmp_magic && + pmsg->cp_version == RPCRDMA_CMP_VERSION) { + newxprt->sc_snd_w_inv = pmsg->cp_flags & + RPCRDMA_CMP_F_SND_W_INV_OK; + + dprintk("svcrdma: client send_size %u, recv_size %u " + "remote inv %ssupported\n", + rpcrdma_decode_buffer_size(pmsg->cp_send_size), + rpcrdma_decode_buffer_size(pmsg->cp_recv_size), + newxprt->sc_snd_w_inv ? "" : "un"); + } +} + /* * This function handles the CONNECT_REQUEST event on a listening * endpoint. It is passed the cma_id for the _new_ connection. The context in @@ -653,7 +679,8 @@ int svc_rdma_repost_recv(struct svcxprt_rdma *xprt, gfp_t flags) * will call the recvfrom method on the listen xprt which will accept the new * connection. */ -static void handle_connect_req(struct rdma_cm_id *new_cma_id, size_t client_ird) +static void handle_connect_req(struct rdma_cm_id *new_cma_id, + struct rdma_conn_param *param) { struct svcxprt_rdma *listen_xprt = new_cma_id->context; struct svcxprt_rdma *newxprt; @@ -669,9 +696,10 @@ static void handle_connect_req(struct rdma_cm_id *new_cma_id, size_t client_ird) new_cma_id->context = newxprt; dprintk("svcrdma: Creating newxprt=%p, cm_id=%p, listenxprt=%p\n", newxprt, newxprt->sc_cm_id, listen_xprt); + svc_rdma_parse_connect_private(newxprt, param); /* Save client advertised inbound read limit for use later in accept. */ - newxprt->sc_ord = client_ird; + newxprt->sc_ord = param->initiator_depth; /* Set the local and remote addresses in the transport */ sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr; @@ -706,8 +734,7 @@ static int rdma_listen_handler(struct rdma_cm_id *cma_id, dprintk("svcrdma: Connect request on cma_id=%p, xprt = %p, " "event = %s (%d)\n", cma_id, cma_id->context, rdma_event_msg(event->event), event->event); - handle_connect_req(cma_id, - event->param.conn.initiator_depth); + handle_connect_req(cma_id, &event->param.conn); break; case RDMA_CM_EVENT_ESTABLISHED: @@ -941,6 +968,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) struct svcxprt_rdma *listen_rdma; struct svcxprt_rdma *newxprt = NULL; struct rdma_conn_param conn_param; + struct rpcrdma_connect_private pmsg; struct ib_qp_init_attr qp_attr; struct ib_device *dev; unsigned int i; @@ -993,7 +1021,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) newxprt->sc_ord = min_t(size_t, dev->attrs.max_qp_rd_atom, newxprt->sc_ord); newxprt->sc_ord = min_t(size_t, svcrdma_ord, newxprt->sc_ord); - newxprt->sc_pd = ib_alloc_pd(dev); + newxprt->sc_pd = ib_alloc_pd(dev, 0); if (IS_ERR(newxprt->sc_pd)) { dprintk("svcrdma: error creating PD for connect request\n"); goto errout; @@ -1070,7 +1098,8 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) dev->attrs.max_fast_reg_page_list_len; newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_FAST_REG; newxprt->sc_reader = rdma_read_chunk_frmr; - } + } else + newxprt->sc_snd_w_inv = false; /* * Determine if a DMA MR is required and if so, what privs are required @@ -1094,11 +1123,20 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) /* Swap out the handler */ newxprt->sc_cm_id->event_handler = rdma_cma_handler; + /* Construct RDMA-CM private message */ + pmsg.cp_magic = rpcrdma_cmp_magic; + pmsg.cp_version = RPCRDMA_CMP_VERSION; + pmsg.cp_flags = 0; + pmsg.cp_send_size = pmsg.cp_recv_size = + rpcrdma_encode_buffer_size(newxprt->sc_max_req_size); + /* Accept Connection */ set_bit(RDMAXPRT_CONN_PENDING, &newxprt->sc_flags); memset(&conn_param, 0, sizeof conn_param); conn_param.responder_resources = 0; conn_param.initiator_depth = newxprt->sc_ord; + conn_param.private_data = &pmsg; + conn_param.private_data_len = sizeof(pmsg); ret = rdma_accept(newxprt->sc_cm_id, &conn_param); if (ret) { dprintk("svcrdma: failed to accept new connection, ret=%d\n", diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 81f0e879f019..ed5e285fd2ea 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -97,7 +97,7 @@ static struct ctl_table xr_tunables_table[] = { .data = &xprt_rdma_max_inline_read, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, .extra1 = &min_inline_size, .extra2 = &max_inline_size, }, @@ -106,7 +106,7 @@ static struct ctl_table xr_tunables_table[] = { .data = &xprt_rdma_max_inline_write, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, .extra1 = &min_inline_size, .extra2 = &max_inline_size, }, @@ -477,115 +477,152 @@ xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task) } } -/* - * The RDMA allocate/free functions need the task structure as a place - * to hide the struct rpcrdma_req, which is necessary for the actual send/recv - * sequence. +/* Allocate a fixed-size buffer in which to construct and send the + * RPC-over-RDMA header for this request. + */ +static bool +rpcrdma_get_rdmabuf(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, + gfp_t flags) +{ + size_t size = RPCRDMA_HDRBUF_SIZE; + struct rpcrdma_regbuf *rb; + + if (req->rl_rdmabuf) + return true; + + rb = rpcrdma_alloc_regbuf(size, DMA_TO_DEVICE, flags); + if (IS_ERR(rb)) + return false; + + r_xprt->rx_stats.hardway_register_count += size; + req->rl_rdmabuf = rb; + return true; +} + +static bool +rpcrdma_get_sendbuf(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, + size_t size, gfp_t flags) +{ + struct rpcrdma_regbuf *rb; + + if (req->rl_sendbuf && rdmab_length(req->rl_sendbuf) >= size) + return true; + + rb = rpcrdma_alloc_regbuf(size, DMA_TO_DEVICE, flags); + if (IS_ERR(rb)) + return false; + + rpcrdma_free_regbuf(req->rl_sendbuf); + r_xprt->rx_stats.hardway_register_count += size; + req->rl_sendbuf = rb; + return true; +} + +/* The rq_rcv_buf is used only if a Reply chunk is necessary. + * The decision to use a Reply chunk is made later in + * rpcrdma_marshal_req. This buffer is registered at that time. * - * The RPC layer allocates both send and receive buffers in the same call - * (rq_send_buf and rq_rcv_buf are both part of a single contiguous buffer). - * We may register rq_rcv_buf when using reply chunks. + * Otherwise, the associated RPC Reply arrives in a separate + * Receive buffer, arbitrarily chosen by the HCA. The buffer + * allocated here for the RPC Reply is not utilized in that + * case. See rpcrdma_inline_fixup. + * + * A regbuf is used here to remember the buffer size. */ -static void * -xprt_rdma_allocate(struct rpc_task *task, size_t size) +static bool +rpcrdma_get_recvbuf(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, + size_t size, gfp_t flags) { - struct rpc_xprt *xprt = task->tk_rqstp->rq_xprt; - struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); struct rpcrdma_regbuf *rb; + + if (req->rl_recvbuf && rdmab_length(req->rl_recvbuf) >= size) + return true; + + rb = rpcrdma_alloc_regbuf(size, DMA_NONE, flags); + if (IS_ERR(rb)) + return false; + + rpcrdma_free_regbuf(req->rl_recvbuf); + r_xprt->rx_stats.hardway_register_count += size; + req->rl_recvbuf = rb; + return true; +} + +/** + * xprt_rdma_allocate - allocate transport resources for an RPC + * @task: RPC task + * + * Return values: + * 0: Success; rq_buffer points to RPC buffer to use + * ENOMEM: Out of memory, call again later + * EIO: A permanent error occurred, do not retry + * + * The RDMA allocate/free functions need the task structure as a place + * to hide the struct rpcrdma_req, which is necessary for the actual + * send/recv sequence. + * + * xprt_rdma_allocate provides buffers that are already mapped for + * DMA, and a local DMA lkey is provided for each. + */ +static int +xprt_rdma_allocate(struct rpc_task *task) +{ + struct rpc_rqst *rqst = task->tk_rqstp; + struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); struct rpcrdma_req *req; - size_t min_size; gfp_t flags; req = rpcrdma_buffer_get(&r_xprt->rx_buf); if (req == NULL) - return NULL; + return -ENOMEM; flags = RPCRDMA_DEF_GFP; if (RPC_IS_SWAPPER(task)) flags = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN; - if (req->rl_rdmabuf == NULL) - goto out_rdmabuf; - if (req->rl_sendbuf == NULL) - goto out_sendbuf; - if (size > req->rl_sendbuf->rg_size) - goto out_sendbuf; - -out: - dprintk("RPC: %s: size %zd, request 0x%p\n", __func__, size, req); - req->rl_connect_cookie = 0; /* our reserved value */ - req->rl_task = task; - return req->rl_sendbuf->rg_base; - -out_rdmabuf: - min_size = RPCRDMA_INLINE_WRITE_THRESHOLD(task->tk_rqstp); - rb = rpcrdma_alloc_regbuf(&r_xprt->rx_ia, min_size, flags); - if (IS_ERR(rb)) + if (!rpcrdma_get_rdmabuf(r_xprt, req, flags)) goto out_fail; - req->rl_rdmabuf = rb; - -out_sendbuf: - /* XDR encoding and RPC/RDMA marshaling of this request has not - * yet occurred. Thus a lower bound is needed to prevent buffer - * overrun during marshaling. - * - * RPC/RDMA marshaling may choose to send payload bearing ops - * inline, if the result is smaller than the inline threshold. - * The value of the "size" argument accounts for header - * requirements but not for the payload in these cases. - * - * Likewise, allocate enough space to receive a reply up to the - * size of the inline threshold. - * - * It's unlikely that both the send header and the received - * reply will be large, but slush is provided here to allow - * flexibility when marshaling. - */ - min_size = RPCRDMA_INLINE_READ_THRESHOLD(task->tk_rqstp); - min_size += RPCRDMA_INLINE_WRITE_THRESHOLD(task->tk_rqstp); - if (size < min_size) - size = min_size; - - rb = rpcrdma_alloc_regbuf(&r_xprt->rx_ia, size, flags); - if (IS_ERR(rb)) + if (!rpcrdma_get_sendbuf(r_xprt, req, rqst->rq_callsize, flags)) + goto out_fail; + if (!rpcrdma_get_recvbuf(r_xprt, req, rqst->rq_rcvsize, flags)) goto out_fail; - rb->rg_owner = req; - r_xprt->rx_stats.hardway_register_count += size; - rpcrdma_free_regbuf(&r_xprt->rx_ia, req->rl_sendbuf); - req->rl_sendbuf = rb; - goto out; + dprintk("RPC: %5u %s: send size = %zd, recv size = %zd, req = %p\n", + task->tk_pid, __func__, rqst->rq_callsize, + rqst->rq_rcvsize, req); + + req->rl_connect_cookie = 0; /* our reserved value */ + rpcrdma_set_xprtdata(rqst, req); + rqst->rq_buffer = req->rl_sendbuf->rg_base; + rqst->rq_rbuffer = req->rl_recvbuf->rg_base; + return 0; out_fail: rpcrdma_buffer_put(req); - return NULL; + return -ENOMEM; } -/* - * This function returns all RDMA resources to the pool. +/** + * xprt_rdma_free - release resources allocated by xprt_rdma_allocate + * @task: RPC task + * + * Caller guarantees rqst->rq_buffer is non-NULL. */ static void -xprt_rdma_free(void *buffer) +xprt_rdma_free(struct rpc_task *task) { - struct rpcrdma_req *req; - struct rpcrdma_xprt *r_xprt; - struct rpcrdma_regbuf *rb; - - if (buffer == NULL) - return; + struct rpc_rqst *rqst = task->tk_rqstp; + struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); + struct rpcrdma_req *req = rpcr_to_rdmar(rqst); + struct rpcrdma_ia *ia = &r_xprt->rx_ia; - rb = container_of(buffer, struct rpcrdma_regbuf, rg_base[0]); - req = rb->rg_owner; if (req->rl_backchannel) return; - r_xprt = container_of(req->rl_buffer, struct rpcrdma_xprt, rx_buf); - dprintk("RPC: %s: called on 0x%p\n", __func__, req->rl_reply); - r_xprt->rx_ia.ri_ops->ro_unmap_safe(r_xprt, req, - !RPC_IS_ASYNC(req->rl_task)); - + ia->ri_ops->ro_unmap_safe(r_xprt, req, !RPC_IS_ASYNC(task)); + rpcrdma_unmap_sges(ia, req); rpcrdma_buffer_put(req); } @@ -685,10 +722,11 @@ void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) r_xprt->rx_stats.failed_marshal_count, r_xprt->rx_stats.bad_reply_count, r_xprt->rx_stats.nomsg_call_count); - seq_printf(seq, "%lu %lu %lu\n", + seq_printf(seq, "%lu %lu %lu %lu\n", r_xprt->rx_stats.mrs_recovered, r_xprt->rx_stats.mrs_orphaned, - r_xprt->rx_stats.mrs_allocated); + r_xprt->rx_stats.mrs_allocated, + r_xprt->rx_stats.local_inv_needed); } static int diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 799cce6cbe45..ec74289af7ec 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -129,15 +129,6 @@ rpcrdma_wc_send(struct ib_cq *cq, struct ib_wc *wc) wc->status, wc->vendor_err); } -static void -rpcrdma_receive_worker(struct work_struct *work) -{ - struct rpcrdma_rep *rep = - container_of(work, struct rpcrdma_rep, rr_work); - - rpcrdma_reply_handler(rep); -} - /* Perform basic sanity checking to avoid using garbage * to update the credit grant value. */ @@ -161,13 +152,13 @@ rpcrdma_update_granted_credits(struct rpcrdma_rep *rep) } /** - * rpcrdma_receive_wc - Invoked by RDMA provider for each polled Receive WC + * rpcrdma_wc_receive - Invoked by RDMA provider for each polled Receive WC * @cq: completion queue (ignored) * @wc: completed WR * */ static void -rpcrdma_receive_wc(struct ib_cq *cq, struct ib_wc *wc) +rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) { struct ib_cqe *cqe = wc->wr_cqe; struct rpcrdma_rep *rep = container_of(cqe, struct rpcrdma_rep, @@ -185,6 +176,9 @@ rpcrdma_receive_wc(struct ib_cq *cq, struct ib_wc *wc) __func__, rep, wc->byte_len); rep->rr_len = wc->byte_len; + rep->rr_wc_flags = wc->wc_flags; + rep->rr_inv_rkey = wc->ex.invalidate_rkey; + ib_dma_sync_single_for_cpu(rep->rr_device, rdmab_addr(rep->rr_rdmabuf), rep->rr_len, DMA_FROM_DEVICE); @@ -204,6 +198,36 @@ out_fail: goto out_schedule; } +static void +rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt, + struct rdma_conn_param *param) +{ + struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data; + const struct rpcrdma_connect_private *pmsg = param->private_data; + unsigned int rsize, wsize; + + /* Default settings for RPC-over-RDMA Version One */ + r_xprt->rx_ia.ri_reminv_expected = false; + rsize = RPCRDMA_V1_DEF_INLINE_SIZE; + wsize = RPCRDMA_V1_DEF_INLINE_SIZE; + + if (pmsg && + pmsg->cp_magic == rpcrdma_cmp_magic && + pmsg->cp_version == RPCRDMA_CMP_VERSION) { + r_xprt->rx_ia.ri_reminv_expected = true; + rsize = rpcrdma_decode_buffer_size(pmsg->cp_send_size); + wsize = rpcrdma_decode_buffer_size(pmsg->cp_recv_size); + } + + if (rsize < cdata->inline_rsize) + cdata->inline_rsize = rsize; + if (wsize < cdata->inline_wsize) + cdata->inline_wsize = wsize; + pr_info("rpcrdma: max send %u, max recv %u\n", + cdata->inline_wsize, cdata->inline_rsize); + rpcrdma_set_max_header_sizes(r_xprt); +} + static int rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) { @@ -244,6 +268,7 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) " (%d initiator)\n", __func__, attr->max_dest_rd_atomic, attr->max_rd_atomic); + rpcrdma_update_connect_private(xprt, &event->param.conn); goto connected; case RDMA_CM_EVENT_CONNECT_ERROR: connstate = -ENOTCONN; @@ -387,7 +412,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) } ia->ri_device = ia->ri_id->device; - ia->ri_pd = ib_alloc_pd(ia->ri_device); + ia->ri_pd = ib_alloc_pd(ia->ri_device, 0); if (IS_ERR(ia->ri_pd)) { rc = PTR_ERR(ia->ri_pd); pr_err("rpcrdma: ib_alloc_pd() returned %d\n", rc); @@ -454,11 +479,12 @@ int rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, struct rpcrdma_create_data_internal *cdata) { + struct rpcrdma_connect_private *pmsg = &ep->rep_cm_private; struct ib_cq *sendcq, *recvcq; unsigned int max_qp_wr; int rc; - if (ia->ri_device->attrs.max_sge < RPCRDMA_MAX_IOVS) { + if (ia->ri_device->attrs.max_sge < RPCRDMA_MAX_SEND_SGES) { dprintk("RPC: %s: insufficient sge's available\n", __func__); return -ENOMEM; @@ -487,7 +513,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, ep->rep_attr.cap.max_recv_wr = cdata->max_requests; ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS; ep->rep_attr.cap.max_recv_wr += 1; /* drain cqe */ - ep->rep_attr.cap.max_send_sge = RPCRDMA_MAX_IOVS; + ep->rep_attr.cap.max_send_sge = RPCRDMA_MAX_SEND_SGES; ep->rep_attr.cap.max_recv_sge = 1; ep->rep_attr.cap.max_inline_data = 0; ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR; @@ -536,9 +562,14 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, /* Initialize cma parameters */ memset(&ep->rep_remote_cma, 0, sizeof(ep->rep_remote_cma)); - /* RPC/RDMA does not use private data */ - ep->rep_remote_cma.private_data = NULL; - ep->rep_remote_cma.private_data_len = 0; + /* Prepare RDMA-CM private message */ + pmsg->cp_magic = rpcrdma_cmp_magic; + pmsg->cp_version = RPCRDMA_CMP_VERSION; + pmsg->cp_flags |= ia->ri_ops->ro_send_w_inv_ok; + pmsg->cp_send_size = rpcrdma_encode_buffer_size(cdata->inline_wsize); + pmsg->cp_recv_size = rpcrdma_encode_buffer_size(cdata->inline_rsize); + ep->rep_remote_cma.private_data = pmsg; + ep->rep_remote_cma.private_data_len = sizeof(*pmsg); /* Client offers RDMA Read but does not initiate */ ep->rep_remote_cma.initiator_depth = 0; @@ -849,6 +880,10 @@ rpcrdma_create_req(struct rpcrdma_xprt *r_xprt) req->rl_cqe.done = rpcrdma_wc_send; req->rl_buffer = &r_xprt->rx_buf; INIT_LIST_HEAD(&req->rl_registered); + req->rl_send_wr.next = NULL; + req->rl_send_wr.wr_cqe = &req->rl_cqe; + req->rl_send_wr.sg_list = req->rl_send_sge; + req->rl_send_wr.opcode = IB_WR_SEND; return req; } @@ -865,17 +900,21 @@ rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt) if (rep == NULL) goto out; - rep->rr_rdmabuf = rpcrdma_alloc_regbuf(ia, cdata->inline_rsize, - GFP_KERNEL); + rep->rr_rdmabuf = rpcrdma_alloc_regbuf(cdata->inline_rsize, + DMA_FROM_DEVICE, GFP_KERNEL); if (IS_ERR(rep->rr_rdmabuf)) { rc = PTR_ERR(rep->rr_rdmabuf); goto out_free; } rep->rr_device = ia->ri_device; - rep->rr_cqe.done = rpcrdma_receive_wc; + rep->rr_cqe.done = rpcrdma_wc_receive; rep->rr_rxprt = r_xprt; - INIT_WORK(&rep->rr_work, rpcrdma_receive_worker); + INIT_WORK(&rep->rr_work, rpcrdma_reply_handler); + rep->rr_recv_wr.next = NULL; + rep->rr_recv_wr.wr_cqe = &rep->rr_cqe; + rep->rr_recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov; + rep->rr_recv_wr.num_sge = 1; return rep; out_free: @@ -966,17 +1005,18 @@ rpcrdma_buffer_get_rep_locked(struct rpcrdma_buffer *buf) } static void -rpcrdma_destroy_rep(struct rpcrdma_ia *ia, struct rpcrdma_rep *rep) +rpcrdma_destroy_rep(struct rpcrdma_rep *rep) { - rpcrdma_free_regbuf(ia, rep->rr_rdmabuf); + rpcrdma_free_regbuf(rep->rr_rdmabuf); kfree(rep); } void -rpcrdma_destroy_req(struct rpcrdma_ia *ia, struct rpcrdma_req *req) +rpcrdma_destroy_req(struct rpcrdma_req *req) { - rpcrdma_free_regbuf(ia, req->rl_sendbuf); - rpcrdma_free_regbuf(ia, req->rl_rdmabuf); + rpcrdma_free_regbuf(req->rl_recvbuf); + rpcrdma_free_regbuf(req->rl_sendbuf); + rpcrdma_free_regbuf(req->rl_rdmabuf); kfree(req); } @@ -1009,15 +1049,13 @@ rpcrdma_destroy_mrs(struct rpcrdma_buffer *buf) void rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) { - struct rpcrdma_ia *ia = rdmab_to_ia(buf); - cancel_delayed_work_sync(&buf->rb_recovery_worker); while (!list_empty(&buf->rb_recv_bufs)) { struct rpcrdma_rep *rep; rep = rpcrdma_buffer_get_rep_locked(buf); - rpcrdma_destroy_rep(ia, rep); + rpcrdma_destroy_rep(rep); } buf->rb_send_count = 0; @@ -1030,7 +1068,7 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) list_del(&req->rl_all); spin_unlock(&buf->rb_reqslock); - rpcrdma_destroy_req(ia, req); + rpcrdma_destroy_req(req); spin_lock(&buf->rb_reqslock); } spin_unlock(&buf->rb_reqslock); @@ -1129,7 +1167,7 @@ rpcrdma_buffer_put(struct rpcrdma_req *req) struct rpcrdma_buffer *buffers = req->rl_buffer; struct rpcrdma_rep *rep = req->rl_reply; - req->rl_niovs = 0; + req->rl_send_wr.num_sge = 0; req->rl_reply = NULL; spin_lock(&buffers->rb_lock); @@ -1171,70 +1209,81 @@ rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep) spin_unlock(&buffers->rb_lock); } -/* - * Wrappers for internal-use kmalloc memory registration, used by buffer code. - */ - /** - * rpcrdma_alloc_regbuf - kmalloc and register memory for SEND/RECV buffers - * @ia: controlling rpcrdma_ia + * rpcrdma_alloc_regbuf - allocate and DMA-map memory for SEND/RECV buffers * @size: size of buffer to be allocated, in bytes + * @direction: direction of data movement * @flags: GFP flags * - * Returns pointer to private header of an area of internally - * registered memory, or an ERR_PTR. The registered buffer follows - * the end of the private header. + * Returns an ERR_PTR, or a pointer to a regbuf, a buffer that + * can be persistently DMA-mapped for I/O. * * xprtrdma uses a regbuf for posting an outgoing RDMA SEND, or for - * receiving the payload of RDMA RECV operations. regbufs are not - * used for RDMA READ/WRITE operations, thus are registered only for - * LOCAL access. + * receiving the payload of RDMA RECV operations. During Long Calls + * or Replies they may be registered externally via ro_map. */ struct rpcrdma_regbuf * -rpcrdma_alloc_regbuf(struct rpcrdma_ia *ia, size_t size, gfp_t flags) +rpcrdma_alloc_regbuf(size_t size, enum dma_data_direction direction, + gfp_t flags) { struct rpcrdma_regbuf *rb; - struct ib_sge *iov; rb = kmalloc(sizeof(*rb) + size, flags); if (rb == NULL) - goto out; + return ERR_PTR(-ENOMEM); - iov = &rb->rg_iov; - iov->addr = ib_dma_map_single(ia->ri_device, - (void *)rb->rg_base, size, - DMA_BIDIRECTIONAL); - if (ib_dma_mapping_error(ia->ri_device, iov->addr)) - goto out_free; + rb->rg_device = NULL; + rb->rg_direction = direction; + rb->rg_iov.length = size; - iov->length = size; - iov->lkey = ia->ri_pd->local_dma_lkey; - rb->rg_size = size; - rb->rg_owner = NULL; return rb; +} -out_free: - kfree(rb); -out: - return ERR_PTR(-ENOMEM); +/** + * __rpcrdma_map_regbuf - DMA-map a regbuf + * @ia: controlling rpcrdma_ia + * @rb: regbuf to be mapped + */ +bool +__rpcrdma_dma_map_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb) +{ + if (rb->rg_direction == DMA_NONE) + return false; + + rb->rg_iov.addr = ib_dma_map_single(ia->ri_device, + (void *)rb->rg_base, + rdmab_length(rb), + rb->rg_direction); + if (ib_dma_mapping_error(ia->ri_device, rdmab_addr(rb))) + return false; + + rb->rg_device = ia->ri_device; + rb->rg_iov.lkey = ia->ri_pd->local_dma_lkey; + return true; +} + +static void +rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb) +{ + if (!rpcrdma_regbuf_is_mapped(rb)) + return; + + ib_dma_unmap_single(rb->rg_device, rdmab_addr(rb), + rdmab_length(rb), rb->rg_direction); + rb->rg_device = NULL; } /** * rpcrdma_free_regbuf - deregister and free registered buffer - * @ia: controlling rpcrdma_ia * @rb: regbuf to be deregistered and freed */ void -rpcrdma_free_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb) +rpcrdma_free_regbuf(struct rpcrdma_regbuf *rb) { - struct ib_sge *iov; - if (!rb) return; - iov = &rb->rg_iov; - ib_dma_unmap_single(ia->ri_device, - iov->addr, iov->length, DMA_BIDIRECTIONAL); + rpcrdma_dma_unmap_regbuf(rb); kfree(rb); } @@ -1248,39 +1297,28 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, struct rpcrdma_req *req) { - struct ib_device *device = ia->ri_device; - struct ib_send_wr send_wr, *send_wr_fail; - struct rpcrdma_rep *rep = req->rl_reply; - struct ib_sge *iov = req->rl_send_iov; - int i, rc; + struct ib_send_wr *send_wr = &req->rl_send_wr; + struct ib_send_wr *send_wr_fail; + int rc; - if (rep) { - rc = rpcrdma_ep_post_recv(ia, ep, rep); + if (req->rl_reply) { + rc = rpcrdma_ep_post_recv(ia, req->rl_reply); if (rc) return rc; req->rl_reply = NULL; } - send_wr.next = NULL; - send_wr.wr_cqe = &req->rl_cqe; - send_wr.sg_list = iov; - send_wr.num_sge = req->rl_niovs; - send_wr.opcode = IB_WR_SEND; - - for (i = 0; i < send_wr.num_sge; i++) - ib_dma_sync_single_for_device(device, iov[i].addr, - iov[i].length, DMA_TO_DEVICE); dprintk("RPC: %s: posting %d s/g entries\n", - __func__, send_wr.num_sge); + __func__, send_wr->num_sge); if (DECR_CQCOUNT(ep) > 0) - send_wr.send_flags = 0; + send_wr->send_flags = 0; else { /* Provider must take a send completion every now and then */ INIT_CQCOUNT(ep); - send_wr.send_flags = IB_SEND_SIGNALED; + send_wr->send_flags = IB_SEND_SIGNALED; } - rc = ib_post_send(ia->ri_id->qp, &send_wr, &send_wr_fail); + rc = ib_post_send(ia->ri_id->qp, send_wr, &send_wr_fail); if (rc) goto out_postsend_err; return 0; @@ -1290,32 +1328,24 @@ out_postsend_err: return -ENOTCONN; } -/* - * (Re)post a receive buffer. - */ int rpcrdma_ep_post_recv(struct rpcrdma_ia *ia, - struct rpcrdma_ep *ep, struct rpcrdma_rep *rep) { - struct ib_recv_wr recv_wr, *recv_wr_fail; + struct ib_recv_wr *recv_wr_fail; int rc; - recv_wr.next = NULL; - recv_wr.wr_cqe = &rep->rr_cqe; - recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov; - recv_wr.num_sge = 1; - - ib_dma_sync_single_for_cpu(ia->ri_device, - rdmab_addr(rep->rr_rdmabuf), - rdmab_length(rep->rr_rdmabuf), - DMA_BIDIRECTIONAL); - - rc = ib_post_recv(ia->ri_id->qp, &recv_wr, &recv_wr_fail); + if (!rpcrdma_dma_map_regbuf(ia, rep->rr_rdmabuf)) + goto out_map; + rc = ib_post_recv(ia->ri_id->qp, &rep->rr_recv_wr, &recv_wr_fail); if (rc) goto out_postrecv; return 0; +out_map: + pr_err("rpcrdma: failed to DMA map the Receive buffer\n"); + return -EIO; + out_postrecv: pr_err("rpcrdma: ib_post_recv returned %i\n", rc); return -ENOTCONN; @@ -1333,7 +1363,6 @@ rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *r_xprt, unsigned int count) { struct rpcrdma_buffer *buffers = &r_xprt->rx_buf; struct rpcrdma_ia *ia = &r_xprt->rx_ia; - struct rpcrdma_ep *ep = &r_xprt->rx_ep; struct rpcrdma_rep *rep; int rc; @@ -1344,7 +1373,7 @@ rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *r_xprt, unsigned int count) rep = rpcrdma_buffer_get_rep_locked(buffers); spin_unlock(&buffers->rb_lock); - rc = rpcrdma_ep_post_recv(ia, ep, rep); + rc = rpcrdma_ep_post_recv(ia, rep); if (rc) goto out_rc; } diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index a71b0f5897d8..0d35b761c883 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -70,9 +70,11 @@ struct rpcrdma_ia { struct ib_pd *ri_pd; struct completion ri_done; int ri_async_rc; + unsigned int ri_max_segs; unsigned int ri_max_frmr_depth; unsigned int ri_max_inline_write; unsigned int ri_max_inline_read; + bool ri_reminv_expected; struct ib_qp_attr ri_qp_attr; struct ib_qp_init_attr ri_qp_init_attr; }; @@ -87,6 +89,7 @@ struct rpcrdma_ep { int rep_connected; struct ib_qp_init_attr rep_attr; wait_queue_head_t rep_connect_wait; + struct rpcrdma_connect_private rep_cm_private; struct rdma_conn_param rep_remote_cma; struct sockaddr_storage rep_remote_addr; struct delayed_work rep_connect_worker; @@ -112,9 +115,9 @@ struct rpcrdma_ep { */ struct rpcrdma_regbuf { - size_t rg_size; - struct rpcrdma_req *rg_owner; struct ib_sge rg_iov; + struct ib_device *rg_device; + enum dma_data_direction rg_direction; __be32 rg_base[0] __attribute__ ((aligned(256))); }; @@ -162,7 +165,10 @@ rdmab_to_msg(struct rpcrdma_regbuf *rb) * The smallest inline threshold is 1024 bytes, ensuring that * at least 750 bytes are available for RPC messages. */ -#define RPCRDMA_MAX_HDR_SEGS (8) +enum { + RPCRDMA_MAX_HDR_SEGS = 8, + RPCRDMA_HDRBUF_SIZE = 256, +}; /* * struct rpcrdma_rep -- this structure encapsulates state required to recv @@ -182,10 +188,13 @@ rdmab_to_msg(struct rpcrdma_regbuf *rb) struct rpcrdma_rep { struct ib_cqe rr_cqe; unsigned int rr_len; + int rr_wc_flags; + u32 rr_inv_rkey; struct ib_device *rr_device; struct rpcrdma_xprt *rr_rxprt; struct work_struct rr_work; struct list_head rr_list; + struct ib_recv_wr rr_recv_wr; struct rpcrdma_regbuf *rr_rdmabuf; }; @@ -276,19 +285,30 @@ struct rpcrdma_mr_seg { /* chunk descriptors */ char *mr_offset; /* kva if no page, else offset */ }; -#define RPCRDMA_MAX_IOVS (2) +/* Reserve enough Send SGEs to send a maximum size inline request: + * - RPC-over-RDMA header + * - xdr_buf head iovec + * - RPCRDMA_MAX_INLINE bytes, possibly unaligned, in pages + * - xdr_buf tail iovec + */ +enum { + RPCRDMA_MAX_SEND_PAGES = PAGE_SIZE + RPCRDMA_MAX_INLINE - 1, + RPCRDMA_MAX_PAGE_SGES = (RPCRDMA_MAX_SEND_PAGES >> PAGE_SHIFT) + 1, + RPCRDMA_MAX_SEND_SGES = 1 + 1 + RPCRDMA_MAX_PAGE_SGES + 1, +}; struct rpcrdma_buffer; struct rpcrdma_req { struct list_head rl_free; - unsigned int rl_niovs; + unsigned int rl_mapped_sges; unsigned int rl_connect_cookie; - struct rpc_task *rl_task; struct rpcrdma_buffer *rl_buffer; - struct rpcrdma_rep *rl_reply;/* holder for reply buffer */ - struct ib_sge rl_send_iov[RPCRDMA_MAX_IOVS]; - struct rpcrdma_regbuf *rl_rdmabuf; - struct rpcrdma_regbuf *rl_sendbuf; + struct rpcrdma_rep *rl_reply; + struct ib_send_wr rl_send_wr; + struct ib_sge rl_send_sge[RPCRDMA_MAX_SEND_SGES]; + struct rpcrdma_regbuf *rl_rdmabuf; /* xprt header */ + struct rpcrdma_regbuf *rl_sendbuf; /* rq_snd_buf */ + struct rpcrdma_regbuf *rl_recvbuf; /* rq_rcv_buf */ struct ib_cqe rl_cqe; struct list_head rl_all; @@ -298,14 +318,16 @@ struct rpcrdma_req { struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS]; }; +static inline void +rpcrdma_set_xprtdata(struct rpc_rqst *rqst, struct rpcrdma_req *req) +{ + rqst->rq_xprtdata = req; +} + static inline struct rpcrdma_req * rpcr_to_rdmar(struct rpc_rqst *rqst) { - void *buffer = rqst->rq_buffer; - struct rpcrdma_regbuf *rb; - - rb = container_of(buffer, struct rpcrdma_regbuf, rg_base); - return rb->rg_owner; + return rqst->rq_xprtdata; } /* @@ -356,15 +378,6 @@ struct rpcrdma_create_data_internal { unsigned int padding; /* non-rdma write header padding */ }; -#define RPCRDMA_INLINE_READ_THRESHOLD(rq) \ - (rpcx_to_rdmad(rq->rq_xprt).inline_rsize) - -#define RPCRDMA_INLINE_WRITE_THRESHOLD(rq)\ - (rpcx_to_rdmad(rq->rq_xprt).inline_wsize) - -#define RPCRDMA_INLINE_PAD_VALUE(rq)\ - rpcx_to_rdmad(rq->rq_xprt).padding - /* * Statistics for RPCRDMA */ @@ -386,6 +399,7 @@ struct rpcrdma_stats { unsigned long mrs_recovered; unsigned long mrs_orphaned; unsigned long mrs_allocated; + unsigned long local_inv_needed; }; /* @@ -409,6 +423,7 @@ struct rpcrdma_memreg_ops { struct rpcrdma_mw *); void (*ro_release_mr)(struct rpcrdma_mw *); const char *ro_displayname; + const int ro_send_w_inv_ok; }; extern const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops; @@ -461,15 +476,14 @@ void rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *); int rpcrdma_ep_post(struct rpcrdma_ia *, struct rpcrdma_ep *, struct rpcrdma_req *); -int rpcrdma_ep_post_recv(struct rpcrdma_ia *, struct rpcrdma_ep *, - struct rpcrdma_rep *); +int rpcrdma_ep_post_recv(struct rpcrdma_ia *, struct rpcrdma_rep *); /* * Buffer calls - xprtrdma/verbs.c */ struct rpcrdma_req *rpcrdma_create_req(struct rpcrdma_xprt *); struct rpcrdma_rep *rpcrdma_create_rep(struct rpcrdma_xprt *); -void rpcrdma_destroy_req(struct rpcrdma_ia *, struct rpcrdma_req *); +void rpcrdma_destroy_req(struct rpcrdma_req *); int rpcrdma_buffer_create(struct rpcrdma_xprt *); void rpcrdma_buffer_destroy(struct rpcrdma_buffer *); @@ -482,10 +496,24 @@ void rpcrdma_recv_buffer_put(struct rpcrdma_rep *); void rpcrdma_defer_mr_recovery(struct rpcrdma_mw *); -struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(struct rpcrdma_ia *, - size_t, gfp_t); -void rpcrdma_free_regbuf(struct rpcrdma_ia *, - struct rpcrdma_regbuf *); +struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(size_t, enum dma_data_direction, + gfp_t); +bool __rpcrdma_dma_map_regbuf(struct rpcrdma_ia *, struct rpcrdma_regbuf *); +void rpcrdma_free_regbuf(struct rpcrdma_regbuf *); + +static inline bool +rpcrdma_regbuf_is_mapped(struct rpcrdma_regbuf *rb) +{ + return rb->rg_device != NULL; +} + +static inline bool +rpcrdma_dma_map_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb) +{ + if (likely(rpcrdma_regbuf_is_mapped(rb))) + return true; + return __rpcrdma_dma_map_regbuf(ia, rb); +} int rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *, unsigned int); @@ -507,15 +535,25 @@ rpcrdma_data_dir(bool writing) */ void rpcrdma_connect_worker(struct work_struct *); void rpcrdma_conn_func(struct rpcrdma_ep *); -void rpcrdma_reply_handler(struct rpcrdma_rep *); +void rpcrdma_reply_handler(struct work_struct *); /* * RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c */ + +enum rpcrdma_chunktype { + rpcrdma_noch = 0, + rpcrdma_readch, + rpcrdma_areadch, + rpcrdma_writech, + rpcrdma_replych +}; + +bool rpcrdma_prepare_send_sges(struct rpcrdma_ia *, struct rpcrdma_req *, + u32, struct xdr_buf *, enum rpcrdma_chunktype); +void rpcrdma_unmap_sges(struct rpcrdma_ia *, struct rpcrdma_req *); int rpcrdma_marshal_req(struct rpc_rqst *); -void rpcrdma_set_max_header_sizes(struct rpcrdma_ia *, - struct rpcrdma_create_data_internal *, - unsigned int); +void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *); /* RPC/RDMA module init - xprtrdma/transport.c */ diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index bf168838a029..1758665d609c 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -473,7 +473,16 @@ static int xs_nospace(struct rpc_task *task) spin_unlock_bh(&xprt->transport_lock); /* Race breaker in case memory is freed before above code is called */ - sk->sk_write_space(sk); + if (ret == -EAGAIN) { + struct socket_wq *wq; + + rcu_read_lock(); + wq = rcu_dereference(sk->sk_wq); + set_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags); + rcu_read_unlock(); + + sk->sk_write_space(sk); + } return ret; } @@ -1074,7 +1083,7 @@ static void xs_udp_data_receive(struct sock_xprt *transport) skb = skb_recv_datagram(sk, 0, 1, &err); if (skb != NULL) { xs_udp_data_read_skb(&transport->xprt, sk, skb); - skb_free_datagram_locked(sk, skb); + consume_skb(skb); continue; } if (!test_and_clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state)) @@ -2533,35 +2542,38 @@ static void xs_tcp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) * we allocate pages instead doing a kmalloc like rpc_malloc is because we want * to use the server side send routines. */ -static void *bc_malloc(struct rpc_task *task, size_t size) +static int bc_malloc(struct rpc_task *task) { + struct rpc_rqst *rqst = task->tk_rqstp; + size_t size = rqst->rq_callsize; struct page *page; struct rpc_buffer *buf; - WARN_ON_ONCE(size > PAGE_SIZE - sizeof(struct rpc_buffer)); - if (size > PAGE_SIZE - sizeof(struct rpc_buffer)) - return NULL; + if (size > PAGE_SIZE - sizeof(struct rpc_buffer)) { + WARN_ONCE(1, "xprtsock: large bc buffer request (size %zu)\n", + size); + return -EINVAL; + } page = alloc_page(GFP_KERNEL); if (!page) - return NULL; + return -ENOMEM; buf = page_address(page); buf->len = PAGE_SIZE; - return buf->data; + rqst->rq_buffer = buf->data; + return 0; } /* * Free the space allocated in the bc_alloc routine */ -static void bc_free(void *buffer) +static void bc_free(struct rpc_task *task) { + void *buffer = task->tk_rqstp->rq_buffer; struct rpc_buffer *buf; - if (!buffer) - return; - buf = container_of(buffer, struct rpc_buffer, data); free_page((unsigned long)buf); } diff --git a/net/sysctl_net.c b/net/sysctl_net.c index 5bc1a3d57401..919981324171 100644 --- a/net/sysctl_net.c +++ b/net/sysctl_net.c @@ -27,9 +27,9 @@ #endif static struct ctl_table_set * -net_ctl_header_lookup(struct ctl_table_root *root, struct nsproxy *namespaces) +net_ctl_header_lookup(struct ctl_table_root *root) { - return &namespaces->net_ns->sysctls; + return ¤t->nsproxy->net_ns->sysctls; } static int is_seen(struct ctl_table_set *set) @@ -44,7 +44,7 @@ static int net_ctl_permissions(struct ctl_table_header *head, struct net *net = container_of(head->set, struct net, sysctls); /* Allow network administrator to have same access as root. */ - if (ns_capable(net->user_ns, CAP_NET_ADMIN)) { + if (ns_capable_noaudit(net->user_ns, CAP_NET_ADMIN)) { int mode = (table->mode >> 6) & 7; return (mode << 6) | (mode << 3) | mode; } diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index 3200059d14b2..26ca8dd64ded 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -135,15 +135,6 @@ const struct nla_policy tipc_nl_udp_policy[TIPC_NLA_UDP_MAX + 1] = { /* Users of the legacy API (tipc-config) can't handle that we add operations, * so we have a separate genl handling for the new API. */ -struct genl_family tipc_genl_family = { - .id = GENL_ID_GENERATE, - .name = TIPC_GENL_V2_NAME, - .version = TIPC_GENL_V2_VERSION, - .hdrsize = 0, - .maxattr = TIPC_NLA_MAX, - .netnsok = true, -}; - static const struct genl_ops tipc_genl_v2_ops[] = { { .cmd = TIPC_NL_BEARER_DISABLE, @@ -258,23 +249,33 @@ static const struct genl_ops tipc_genl_v2_ops[] = { #endif }; +struct genl_family tipc_genl_family __ro_after_init = { + .name = TIPC_GENL_V2_NAME, + .version = TIPC_GENL_V2_VERSION, + .hdrsize = 0, + .maxattr = TIPC_NLA_MAX, + .netnsok = true, + .module = THIS_MODULE, + .ops = tipc_genl_v2_ops, + .n_ops = ARRAY_SIZE(tipc_genl_v2_ops), +}; + int tipc_nlmsg_parse(const struct nlmsghdr *nlh, struct nlattr ***attr) { u32 maxattr = tipc_genl_family.maxattr; - *attr = tipc_genl_family.attrbuf; + *attr = genl_family_attrbuf(&tipc_genl_family); if (!*attr) return -EOPNOTSUPP; return nlmsg_parse(nlh, GENL_HDRLEN, *attr, maxattr, tipc_nl_policy); } -int tipc_netlink_start(void) +int __init tipc_netlink_start(void) { int res; - res = genl_register_family_with_ops(&tipc_genl_family, - tipc_genl_v2_ops); + res = genl_register_family(&tipc_genl_family); if (res) { pr_err("Failed to register netlink interface\n"); return res; diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index 1fd464764765..e1ae8a8a2b8e 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -1215,15 +1215,6 @@ send: return err; } -static struct genl_family tipc_genl_compat_family = { - .id = GENL_ID_GENERATE, - .name = TIPC_GENL_NAME, - .version = TIPC_GENL_VERSION, - .hdrsize = TIPC_GENL_HDRLEN, - .maxattr = 0, - .netnsok = true, -}; - static struct genl_ops tipc_genl_compat_ops[] = { { .cmd = TIPC_GENL_CMD, @@ -1231,12 +1222,22 @@ static struct genl_ops tipc_genl_compat_ops[] = { }, }; -int tipc_netlink_compat_start(void) +static struct genl_family tipc_genl_compat_family __ro_after_init = { + .name = TIPC_GENL_NAME, + .version = TIPC_GENL_VERSION, + .hdrsize = TIPC_GENL_HDRLEN, + .maxattr = 0, + .netnsok = true, + .module = THIS_MODULE, + .ops = tipc_genl_compat_ops, + .n_ops = ARRAY_SIZE(tipc_genl_compat_ops), +}; + +int __init tipc_netlink_compat_start(void) { int res; - res = genl_register_family_with_ops(&tipc_genl_compat_family, - tipc_genl_compat_ops); + res = genl_register_family(&tipc_genl_compat_family); if (res) { pr_err("Failed to register legacy compat interface\n"); return res; diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index d80cd3f7503f..78cab9c5a445 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -407,6 +407,7 @@ static int __tipc_nl_add_udp_addr(struct sk_buff *skb, if (ntohs(addr->proto) == ETH_P_IP) { struct sockaddr_in ip4; + memset(&ip4, 0, sizeof(ip4)); ip4.sin_family = AF_INET; ip4.sin_port = addr->port; ip4.sin_addr.s_addr = addr->ipv4.s_addr; @@ -417,6 +418,7 @@ static int __tipc_nl_add_udp_addr(struct sk_buff *skb, } else if (ntohs(addr->proto) == ETH_P_IPV6) { struct sockaddr_in6 ip6; + memset(&ip6, 0, sizeof(ip6)); ip6.sin6_family = AF_INET6; ip6.sin6_port = addr->port; memcpy(&ip6.sin6_addr, &addr->ipv6, sizeof(struct in6_addr)); diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 8309687a56b0..145082e2ba36 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2475,28 +2475,13 @@ static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, return unix_stream_read_generic(&state); } -static ssize_t skb_unix_socket_splice(struct sock *sk, - struct pipe_inode_info *pipe, - struct splice_pipe_desc *spd) -{ - int ret; - struct unix_sock *u = unix_sk(sk); - - mutex_unlock(&u->iolock); - ret = splice_to_pipe(pipe, spd); - mutex_lock(&u->iolock); - - return ret; -} - static int unix_stream_splice_actor(struct sk_buff *skb, int skip, int chunk, struct unix_stream_read_state *state) { return skb_splice_bits(skb, state->socket->sk, UNIXCB(skb).consumed + skip, - state->pipe, chunk, state->splice_flags, - skb_unix_socket_splice); + state->pipe, chunk, state->splice_flags); } static ssize_t unix_stream_splice_read(struct socket *sock, loff_t *ppos, diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 17dbbe64cd73..8a398b3fb532 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -465,6 +465,8 @@ void vsock_pending_work(struct work_struct *work) if (vsock_is_pending(sk)) { vsock_remove_pending(listener, sk); + + listener->sk_ack_backlog--; } else if (!vsk->rejected) { /* We are not on the pending list and accept() did not reject * us, so we must have been accepted by our user process. We @@ -475,8 +477,6 @@ void vsock_pending_work(struct work_struct *work) goto out; } - listener->sk_ack_backlog--; - /* We need to remove ourself from the global connected sockets list so * incoming packets can't find this socket, and to reduce the reference * count. @@ -2010,5 +2010,5 @@ EXPORT_SYMBOL_GPL(vsock_core_get_transport); MODULE_AUTHOR("VMware, Inc."); MODULE_DESCRIPTION("VMware Virtual Socket Family"); -MODULE_VERSION("1.0.1.0-k"); +MODULE_VERSION("1.0.2.0-k"); MODULE_LICENSE("GPL v2"); diff --git a/net/wimax/stack.c b/net/wimax/stack.c index 3f816e2971ee..5db731512014 100644 --- a/net/wimax/stack.c +++ b/net/wimax/stack.c @@ -572,16 +572,20 @@ struct d_level D_LEVEL[] = { size_t D_LEVEL_SIZE = ARRAY_SIZE(D_LEVEL); -struct genl_family wimax_gnl_family = { - .id = GENL_ID_GENERATE, +static const struct genl_multicast_group wimax_gnl_mcgrps[] = { + { .name = "msg", }, +}; + +struct genl_family wimax_gnl_family __ro_after_init = { .name = "WiMAX", .version = WIMAX_GNL_VERSION, .hdrsize = 0, .maxattr = WIMAX_GNL_ATTR_MAX, -}; - -static const struct genl_multicast_group wimax_gnl_mcgrps[] = { - { .name = "msg", }, + .module = THIS_MODULE, + .ops = wimax_gnl_ops, + .n_ops = ARRAY_SIZE(wimax_gnl_ops), + .mcgrps = wimax_gnl_mcgrps, + .n_mcgrps = ARRAY_SIZE(wimax_gnl_mcgrps), }; @@ -596,11 +600,7 @@ int __init wimax_subsys_init(void) d_parse_params(D_LEVEL, D_LEVEL_SIZE, wimax_debug_params, "wimax.debug"); - snprintf(wimax_gnl_family.name, sizeof(wimax_gnl_family.name), - "WiMAX"); - result = genl_register_family_with_ops_groups(&wimax_gnl_family, - wimax_gnl_ops, - wimax_gnl_mcgrps); + result = genl_register_family(&wimax_gnl_family); if (unlikely(result < 0)) { pr_err("cannot register generic netlink family: %d\n", result); goto error_register_family; diff --git a/net/wireless/chan.c b/net/wireless/chan.c index 0f506220a3bd..5497d022fada 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -372,6 +372,7 @@ int cfg80211_chandef_dfs_required(struct wiphy *wiphy, case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_WDS: case NL80211_IFTYPE_P2P_DEVICE: + case NL80211_IFTYPE_NAN: break; case NL80211_IFTYPE_UNSPECIFIED: case NUM_NL80211_IFTYPES: @@ -946,6 +947,7 @@ cfg80211_get_chan_state(struct wireless_dev *wdev, case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_WDS: case NL80211_IFTYPE_P2P_DEVICE: + case NL80211_IFTYPE_NAN: /* these interface types don't really have a channel */ return; case NL80211_IFTYPE_UNSPECIFIED: diff --git a/net/wireless/core.c b/net/wireless/core.c index 4911cd997b9a..8201e6d7449e 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -225,6 +225,23 @@ void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev, } } +void cfg80211_stop_nan(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev) +{ + ASSERT_RTNL(); + + if (WARN_ON(wdev->iftype != NL80211_IFTYPE_NAN)) + return; + + if (!wdev->nan_started) + return; + + rdev_stop_nan(rdev, wdev); + wdev->nan_started = false; + + rdev->opencount--; +} + void cfg80211_shutdown_all_interfaces(struct wiphy *wiphy) { struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); @@ -242,6 +259,9 @@ void cfg80211_shutdown_all_interfaces(struct wiphy *wiphy) case NL80211_IFTYPE_P2P_DEVICE: cfg80211_stop_p2p_device(rdev, wdev); break; + case NL80211_IFTYPE_NAN: + cfg80211_stop_nan(rdev, wdev); + break; default: break; } @@ -537,6 +557,11 @@ static int wiphy_verify_combinations(struct wiphy *wiphy) c->limits[j].max > 1)) return -EINVAL; + /* Only a single NAN can be allowed */ + if (WARN_ON(types & BIT(NL80211_IFTYPE_NAN) && + c->limits[j].max > 1)) + return -EINVAL; + cnt += c->limits[j].max; /* * Don't advertise an unsupported type @@ -579,6 +604,11 @@ int wiphy_register(struct wiphy *wiphy) !rdev->ops->tdls_cancel_channel_switch))) return -EINVAL; + if (WARN_ON((wiphy->interface_modes & BIT(NL80211_IFTYPE_NAN)) && + (!rdev->ops->start_nan || !rdev->ops->stop_nan || + !rdev->ops->add_nan_func || !rdev->ops->del_nan_func))) + return -EINVAL; + /* * if a wiphy has unsupported modes for regulatory channel enforcement, * opt-out of enforcement checking @@ -589,6 +619,7 @@ int wiphy_register(struct wiphy *wiphy) BIT(NL80211_IFTYPE_P2P_GO) | BIT(NL80211_IFTYPE_ADHOC) | BIT(NL80211_IFTYPE_P2P_DEVICE) | + BIT(NL80211_IFTYPE_NAN) | BIT(NL80211_IFTYPE_AP_VLAN) | BIT(NL80211_IFTYPE_MONITOR))) wiphy->regulatory_flags |= REGULATORY_IGNORE_STALE_KICKOFF; @@ -916,6 +947,9 @@ void cfg80211_unregister_wdev(struct wireless_dev *wdev) cfg80211_mlme_purge_registrations(wdev); cfg80211_stop_p2p_device(rdev, wdev); break; + case NL80211_IFTYPE_NAN: + cfg80211_stop_nan(rdev, wdev); + break; default: WARN_ON_ONCE(1); break; @@ -979,6 +1013,7 @@ void __cfg80211_leave(struct cfg80211_registered_device *rdev, /* must be handled by mac80211/driver, has no APIs */ break; case NL80211_IFTYPE_P2P_DEVICE: + case NL80211_IFTYPE_NAN: /* cannot happen, has no netdev */ break; case NL80211_IFTYPE_AP_VLAN: diff --git a/net/wireless/core.h b/net/wireless/core.h index 5555e3c13ae9..08d2e948c9ad 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -249,8 +249,8 @@ struct cfg80211_event { }; struct cfg80211_cached_keys { - struct key_params params[4]; - u8 data[4][WLAN_KEY_LEN_WEP104]; + struct key_params params[CFG80211_MAX_WEP_KEYS]; + u8 data[CFG80211_MAX_WEP_KEYS][WLAN_KEY_LEN_WEP104]; int def; }; @@ -488,6 +488,9 @@ void cfg80211_leave(struct cfg80211_registered_device *rdev, void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev); +void cfg80211_stop_nan(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev); + #define CFG80211_MAX_NUM_DIFFERENT_CHANNELS 10 #ifdef CONFIG_CFG80211_DEVELOPER_WARNINGS diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c index eafdfa5798ae..364f900a3dc4 100644 --- a/net/wireless/ibss.c +++ b/net/wireless/ibss.c @@ -43,7 +43,8 @@ void __cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid, cfg80211_hold_bss(bss_from_pub(bss)); wdev->current_bss = bss_from_pub(bss); - cfg80211_upload_connect_keys(wdev); + if (!(wdev->wiphy->flags & WIPHY_FLAG_HAS_STATIC_WEP)) + cfg80211_upload_connect_keys(wdev); nl80211_send_ibss_bssid(wiphy_to_rdev(wdev->wiphy), dev, bssid, GFP_KERNEL); @@ -296,7 +297,7 @@ int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev, ck = kmemdup(wdev->wext.keys, sizeof(*ck), GFP_KERNEL); if (!ck) return -ENOMEM; - for (i = 0; i < 4; i++) + for (i = 0; i < CFG80211_MAX_WEP_KEYS; i++) ck->params[i].key = ck->data[i]; } err = __cfg80211_join_ibss(rdev, wdev->netdev, diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index d6abb0704db5..cbb48e26a871 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -634,6 +634,7 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, * fall through, P2P device only supports * public action frames */ + case NL80211_IFTYPE_NAN: default: err = -EOPNOTSUPP; break; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index fd111e2b559d..271707dacfea 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -32,22 +32,8 @@ static int nl80211_crypto_settings(struct cfg80211_registered_device *rdev, struct cfg80211_crypto_settings *settings, int cipher_limit); -static int nl80211_pre_doit(const struct genl_ops *ops, struct sk_buff *skb, - struct genl_info *info); -static void nl80211_post_doit(const struct genl_ops *ops, struct sk_buff *skb, - struct genl_info *info); - /* the netlink family */ -static struct genl_family nl80211_fam = { - .id = GENL_ID_GENERATE, /* don't bother with a hardcoded ID */ - .name = NL80211_GENL_NAME, /* have users key off the name instead */ - .hdrsize = 0, /* no private header */ - .version = 1, /* no particular meaning now */ - .maxattr = NL80211_ATTR_MAX, - .netnsok = true, - .pre_doit = nl80211_pre_doit, - .post_doit = nl80211_post_doit, -}; +static struct genl_family nl80211_fam; /* multicast groups */ enum nl80211_multicast_groups { @@ -56,6 +42,7 @@ enum nl80211_multicast_groups { NL80211_MCGRP_REGULATORY, NL80211_MCGRP_MLME, NL80211_MCGRP_VENDOR, + NL80211_MCGRP_NAN, NL80211_MCGRP_TESTMODE /* keep last - ifdef! */ }; @@ -65,6 +52,7 @@ static const struct genl_multicast_group nl80211_mcgrps[] = { [NL80211_MCGRP_REGULATORY] = { .name = NL80211_MULTICAST_GROUP_REG }, [NL80211_MCGRP_MLME] = { .name = NL80211_MULTICAST_GROUP_MLME }, [NL80211_MCGRP_VENDOR] = { .name = NL80211_MULTICAST_GROUP_VENDOR }, + [NL80211_MCGRP_NAN] = { .name = NL80211_MULTICAST_GROUP_NAN }, #ifdef CONFIG_NL80211_TESTMODE [NL80211_MCGRP_TESTMODE] = { .name = NL80211_MULTICAST_GROUP_TESTMODE } #endif @@ -409,6 +397,9 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { .len = VHT_MUMIMO_GROUPS_DATA_LEN }, [NL80211_ATTR_MU_MIMO_FOLLOW_MAC_ADDR] = { .len = ETH_ALEN }, + [NL80211_ATTR_NAN_MASTER_PREF] = { .type = NLA_U8 }, + [NL80211_ATTR_NAN_DUAL] = { .type = NLA_U8 }, + [NL80211_ATTR_NAN_FUNC] = { .type = NLA_NESTED }, }; /* policy for the key attributes */ @@ -502,6 +493,39 @@ nl80211_bss_select_policy[NL80211_BSS_SELECT_ATTR_MAX + 1] = { }, }; +/* policy for NAN function attributes */ +static const struct nla_policy +nl80211_nan_func_policy[NL80211_NAN_FUNC_ATTR_MAX + 1] = { + [NL80211_NAN_FUNC_TYPE] = { .type = NLA_U8 }, + [NL80211_NAN_FUNC_SERVICE_ID] = { .type = NLA_BINARY, + .len = NL80211_NAN_FUNC_SERVICE_ID_LEN }, + [NL80211_NAN_FUNC_PUBLISH_TYPE] = { .type = NLA_U8 }, + [NL80211_NAN_FUNC_PUBLISH_BCAST] = { .type = NLA_FLAG }, + [NL80211_NAN_FUNC_SUBSCRIBE_ACTIVE] = { .type = NLA_FLAG }, + [NL80211_NAN_FUNC_FOLLOW_UP_ID] = { .type = NLA_U8 }, + [NL80211_NAN_FUNC_FOLLOW_UP_REQ_ID] = { .type = NLA_U8 }, + [NL80211_NAN_FUNC_FOLLOW_UP_DEST] = { .len = ETH_ALEN }, + [NL80211_NAN_FUNC_CLOSE_RANGE] = { .type = NLA_FLAG }, + [NL80211_NAN_FUNC_TTL] = { .type = NLA_U32 }, + [NL80211_NAN_FUNC_SERVICE_INFO] = { .type = NLA_BINARY, + .len = NL80211_NAN_FUNC_SERVICE_SPEC_INFO_MAX_LEN }, + [NL80211_NAN_FUNC_SRF] = { .type = NLA_NESTED }, + [NL80211_NAN_FUNC_RX_MATCH_FILTER] = { .type = NLA_NESTED }, + [NL80211_NAN_FUNC_TX_MATCH_FILTER] = { .type = NLA_NESTED }, + [NL80211_NAN_FUNC_INSTANCE_ID] = { .type = NLA_U8 }, + [NL80211_NAN_FUNC_TERM_REASON] = { .type = NLA_U8 }, +}; + +/* policy for Service Response Filter attributes */ +static const struct nla_policy +nl80211_nan_srf_policy[NL80211_NAN_SRF_ATTR_MAX + 1] = { + [NL80211_NAN_SRF_INCLUDE] = { .type = NLA_FLAG }, + [NL80211_NAN_SRF_BF] = { .type = NLA_BINARY, + .len = NL80211_NAN_FUNC_SRF_MAX_LEN }, + [NL80211_NAN_SRF_BF_IDX] = { .type = NLA_U8 }, + [NL80211_NAN_SRF_MAC_ADDRS] = { .type = NLA_NESTED }, +}; + static int nl80211_prepare_wdev_dump(struct sk_buff *skb, struct netlink_callback *cb, struct cfg80211_registered_device **rdev, @@ -513,13 +537,14 @@ static int nl80211_prepare_wdev_dump(struct sk_buff *skb, if (!cb->args[0]) { err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, - nl80211_fam.attrbuf, nl80211_fam.maxattr, - nl80211_policy); + genl_family_attrbuf(&nl80211_fam), + nl80211_fam.maxattr, nl80211_policy); if (err) goto out_unlock; - *wdev = __cfg80211_wdev_from_attrs(sock_net(skb->sk), - nl80211_fam.attrbuf); + *wdev = __cfg80211_wdev_from_attrs( + sock_net(skb->sk), + genl_family_attrbuf(&nl80211_fam)); if (IS_ERR(*wdev)) { err = PTR_ERR(*wdev); goto out_unlock; @@ -934,6 +959,7 @@ static int nl80211_key_allowed(struct wireless_dev *wdev) case NL80211_IFTYPE_UNSPECIFIED: case NL80211_IFTYPE_OCB: case NL80211_IFTYPE_MONITOR: + case NL80211_IFTYPE_NAN: case NL80211_IFTYPE_P2P_DEVICE: case NL80211_IFTYPE_WDS: case NUM_NL80211_IFTYPES: @@ -1842,7 +1868,7 @@ static int nl80211_dump_wiphy_parse(struct sk_buff *skb, struct netlink_callback *cb, struct nl80211_dump_wiphy_state *state) { - struct nlattr **tb = nl80211_fam.attrbuf; + struct nlattr **tb = genl_family_attrbuf(&nl80211_fam); int ret = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, tb, nl80211_fam.maxattr, nl80211_policy); /* ignore parse errors for backward compatibility */ @@ -2819,7 +2845,7 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) !(rdev->wiphy.interface_modes & (1 << type))) return -EOPNOTSUPP; - if ((type == NL80211_IFTYPE_P2P_DEVICE || + if ((type == NL80211_IFTYPE_P2P_DEVICE || type == NL80211_IFTYPE_NAN || rdev->wiphy.features & NL80211_FEATURE_MAC_ON_CREATE) && info->attrs[NL80211_ATTR_MAC]) { nla_memcpy(params.macaddr, info->attrs[NL80211_ATTR_MAC], @@ -2875,9 +2901,10 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) wdev->mesh_id_up_len); wdev_unlock(wdev); break; + case NL80211_IFTYPE_NAN: case NL80211_IFTYPE_P2P_DEVICE: /* - * P2P Device doesn't have a netdev, so doesn't go + * P2P Device and NAN do not have a netdev, so don't go * through the netdev notifier and must be added here */ mutex_init(&wdev->mtx); @@ -3340,6 +3367,291 @@ static int nl80211_set_mac_acl(struct sk_buff *skb, struct genl_info *info) return err; } +static u32 rateset_to_mask(struct ieee80211_supported_band *sband, + u8 *rates, u8 rates_len) +{ + u8 i; + u32 mask = 0; + + for (i = 0; i < rates_len; i++) { + int rate = (rates[i] & 0x7f) * 5; + int ridx; + + for (ridx = 0; ridx < sband->n_bitrates; ridx++) { + struct ieee80211_rate *srate = + &sband->bitrates[ridx]; + if (rate == srate->bitrate) { + mask |= 1 << ridx; + break; + } + } + if (ridx == sband->n_bitrates) + return 0; /* rate not found */ + } + + return mask; +} + +static bool ht_rateset_to_mask(struct ieee80211_supported_band *sband, + u8 *rates, u8 rates_len, + u8 mcs[IEEE80211_HT_MCS_MASK_LEN]) +{ + u8 i; + + memset(mcs, 0, IEEE80211_HT_MCS_MASK_LEN); + + for (i = 0; i < rates_len; i++) { + int ridx, rbit; + + ridx = rates[i] / 8; + rbit = BIT(rates[i] % 8); + + /* check validity */ + if ((ridx < 0) || (ridx >= IEEE80211_HT_MCS_MASK_LEN)) + return false; + + /* check availability */ + if (sband->ht_cap.mcs.rx_mask[ridx] & rbit) + mcs[ridx] |= rbit; + else + return false; + } + + return true; +} + +static u16 vht_mcs_map_to_mcs_mask(u8 vht_mcs_map) +{ + u16 mcs_mask = 0; + + switch (vht_mcs_map) { + case IEEE80211_VHT_MCS_NOT_SUPPORTED: + break; + case IEEE80211_VHT_MCS_SUPPORT_0_7: + mcs_mask = 0x00FF; + break; + case IEEE80211_VHT_MCS_SUPPORT_0_8: + mcs_mask = 0x01FF; + break; + case IEEE80211_VHT_MCS_SUPPORT_0_9: + mcs_mask = 0x03FF; + break; + default: + break; + } + + return mcs_mask; +} + +static void vht_build_mcs_mask(u16 vht_mcs_map, + u16 vht_mcs_mask[NL80211_VHT_NSS_MAX]) +{ + u8 nss; + + for (nss = 0; nss < NL80211_VHT_NSS_MAX; nss++) { + vht_mcs_mask[nss] = vht_mcs_map_to_mcs_mask(vht_mcs_map & 0x03); + vht_mcs_map >>= 2; + } +} + +static bool vht_set_mcs_mask(struct ieee80211_supported_band *sband, + struct nl80211_txrate_vht *txrate, + u16 mcs[NL80211_VHT_NSS_MAX]) +{ + u16 tx_mcs_map = le16_to_cpu(sband->vht_cap.vht_mcs.tx_mcs_map); + u16 tx_mcs_mask[NL80211_VHT_NSS_MAX] = {}; + u8 i; + + if (!sband->vht_cap.vht_supported) + return false; + + memset(mcs, 0, sizeof(u16) * NL80211_VHT_NSS_MAX); + + /* Build vht_mcs_mask from VHT capabilities */ + vht_build_mcs_mask(tx_mcs_map, tx_mcs_mask); + + for (i = 0; i < NL80211_VHT_NSS_MAX; i++) { + if ((tx_mcs_mask[i] & txrate->mcs[i]) == txrate->mcs[i]) + mcs[i] = txrate->mcs[i]; + else + return false; + } + + return true; +} + +static const struct nla_policy nl80211_txattr_policy[NL80211_TXRATE_MAX + 1] = { + [NL80211_TXRATE_LEGACY] = { .type = NLA_BINARY, + .len = NL80211_MAX_SUPP_RATES }, + [NL80211_TXRATE_HT] = { .type = NLA_BINARY, + .len = NL80211_MAX_SUPP_HT_RATES }, + [NL80211_TXRATE_VHT] = { .len = sizeof(struct nl80211_txrate_vht)}, + [NL80211_TXRATE_GI] = { .type = NLA_U8 }, +}; + +static int nl80211_parse_tx_bitrate_mask(struct genl_info *info, + struct cfg80211_bitrate_mask *mask) +{ + struct nlattr *tb[NL80211_TXRATE_MAX + 1]; + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + int rem, i; + struct nlattr *tx_rates; + struct ieee80211_supported_band *sband; + u16 vht_tx_mcs_map; + + memset(mask, 0, sizeof(*mask)); + /* Default to all rates enabled */ + for (i = 0; i < NUM_NL80211_BANDS; i++) { + sband = rdev->wiphy.bands[i]; + + if (!sband) + continue; + + mask->control[i].legacy = (1 << sband->n_bitrates) - 1; + memcpy(mask->control[i].ht_mcs, + sband->ht_cap.mcs.rx_mask, + sizeof(mask->control[i].ht_mcs)); + + if (!sband->vht_cap.vht_supported) + continue; + + vht_tx_mcs_map = le16_to_cpu(sband->vht_cap.vht_mcs.tx_mcs_map); + vht_build_mcs_mask(vht_tx_mcs_map, mask->control[i].vht_mcs); + } + + /* if no rates are given set it back to the defaults */ + if (!info->attrs[NL80211_ATTR_TX_RATES]) + goto out; + + /* The nested attribute uses enum nl80211_band as the index. This maps + * directly to the enum nl80211_band values used in cfg80211. + */ + BUILD_BUG_ON(NL80211_MAX_SUPP_HT_RATES > IEEE80211_HT_MCS_MASK_LEN * 8); + nla_for_each_nested(tx_rates, info->attrs[NL80211_ATTR_TX_RATES], rem) { + enum nl80211_band band = nla_type(tx_rates); + int err; + + if (band < 0 || band >= NUM_NL80211_BANDS) + return -EINVAL; + sband = rdev->wiphy.bands[band]; + if (sband == NULL) + return -EINVAL; + err = nla_parse(tb, NL80211_TXRATE_MAX, nla_data(tx_rates), + nla_len(tx_rates), nl80211_txattr_policy); + if (err) + return err; + if (tb[NL80211_TXRATE_LEGACY]) { + mask->control[band].legacy = rateset_to_mask( + sband, + nla_data(tb[NL80211_TXRATE_LEGACY]), + nla_len(tb[NL80211_TXRATE_LEGACY])); + if ((mask->control[band].legacy == 0) && + nla_len(tb[NL80211_TXRATE_LEGACY])) + return -EINVAL; + } + if (tb[NL80211_TXRATE_HT]) { + if (!ht_rateset_to_mask( + sband, + nla_data(tb[NL80211_TXRATE_HT]), + nla_len(tb[NL80211_TXRATE_HT]), + mask->control[band].ht_mcs)) + return -EINVAL; + } + if (tb[NL80211_TXRATE_VHT]) { + if (!vht_set_mcs_mask( + sband, + nla_data(tb[NL80211_TXRATE_VHT]), + mask->control[band].vht_mcs)) + return -EINVAL; + } + if (tb[NL80211_TXRATE_GI]) { + mask->control[band].gi = + nla_get_u8(tb[NL80211_TXRATE_GI]); + if (mask->control[band].gi > NL80211_TXRATE_FORCE_LGI) + return -EINVAL; + } + + if (mask->control[band].legacy == 0) { + /* don't allow empty legacy rates if HT or VHT + * are not even supported. + */ + if (!(rdev->wiphy.bands[band]->ht_cap.ht_supported || + rdev->wiphy.bands[band]->vht_cap.vht_supported)) + return -EINVAL; + + for (i = 0; i < IEEE80211_HT_MCS_MASK_LEN; i++) + if (mask->control[band].ht_mcs[i]) + goto out; + + for (i = 0; i < NL80211_VHT_NSS_MAX; i++) + if (mask->control[band].vht_mcs[i]) + goto out; + + /* legacy and mcs rates may not be both empty */ + return -EINVAL; + } + } + +out: + return 0; +} + +static int validate_beacon_tx_rate(struct cfg80211_registered_device *rdev, + enum nl80211_band band, + struct cfg80211_bitrate_mask *beacon_rate) +{ + u32 count_ht, count_vht, i; + u32 rate = beacon_rate->control[band].legacy; + + /* Allow only one rate */ + if (hweight32(rate) > 1) + return -EINVAL; + + count_ht = 0; + for (i = 0; i < IEEE80211_HT_MCS_MASK_LEN; i++) { + if (hweight8(beacon_rate->control[band].ht_mcs[i]) > 1) { + return -EINVAL; + } else if (beacon_rate->control[band].ht_mcs[i]) { + count_ht++; + if (count_ht > 1) + return -EINVAL; + } + if (count_ht && rate) + return -EINVAL; + } + + count_vht = 0; + for (i = 0; i < NL80211_VHT_NSS_MAX; i++) { + if (hweight16(beacon_rate->control[band].vht_mcs[i]) > 1) { + return -EINVAL; + } else if (beacon_rate->control[band].vht_mcs[i]) { + count_vht++; + if (count_vht > 1) + return -EINVAL; + } + if (count_vht && rate) + return -EINVAL; + } + + if ((count_ht && count_vht) || (!rate && !count_ht && !count_vht)) + return -EINVAL; + + if (rate && + !wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_BEACON_RATE_LEGACY)) + return -EINVAL; + if (count_ht && + !wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_BEACON_RATE_HT)) + return -EINVAL; + if (count_vht && + !wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_BEACON_RATE_VHT)) + return -EINVAL; + + return 0; +} + static int nl80211_parse_beacon(struct nlattr *attrs[], struct cfg80211_beacon_data *bcn) { @@ -3569,6 +3881,17 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) wdev->iftype)) return -EINVAL; + if (info->attrs[NL80211_ATTR_TX_RATES]) { + err = nl80211_parse_tx_bitrate_mask(info, ¶ms.beacon_rate); + if (err) + return err; + + err = validate_beacon_tx_rate(rdev, params.chandef.chan->band, + ¶ms.beacon_rate); + if (err) + return err; + } + if (info->attrs[NL80211_ATTR_SMPS_MODE]) { params.smps_mode = nla_get_u8(info->attrs[NL80211_ATTR_SMPS_MODE]); @@ -6138,6 +6461,9 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) wiphy = &rdev->wiphy; + if (wdev->iftype == NL80211_IFTYPE_NAN) + return -EOPNOTSUPP; + if (!rdev->ops->scan) return -EOPNOTSUPP; @@ -7304,6 +7630,7 @@ static int nl80211_send_survey(struct sk_buff *msg, u32 portid, u32 seq, static int nl80211_dump_survey(struct sk_buff *skb, struct netlink_callback *cb) { + struct nlattr **attrbuf = genl_family_attrbuf(&nl80211_fam); struct survey_info survey; struct cfg80211_registered_device *rdev; struct wireless_dev *wdev; @@ -7316,7 +7643,7 @@ static int nl80211_dump_survey(struct sk_buff *skb, struct netlink_callback *cb) return res; /* prepare_wdev_dump parsed the attributes */ - radio_stats = nl80211_fam.attrbuf[NL80211_ATTR_SURVEY_RADIO_STATS]; + radio_stats = attrbuf[NL80211_ATTR_SURVEY_RADIO_STATS]; if (!wdev->netdev) { res = -EINVAL; @@ -8139,14 +8466,14 @@ static int nl80211_testmode_dump(struct sk_buff *skb, */ phy_idx = cb->args[0] - 1; } else { + struct nlattr **attrbuf = genl_family_attrbuf(&nl80211_fam); + err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, - nl80211_fam.attrbuf, nl80211_fam.maxattr, - nl80211_policy); + attrbuf, nl80211_fam.maxattr, nl80211_policy); if (err) goto out_err; - rdev = __cfg80211_rdev_from_attrs(sock_net(skb->sk), - nl80211_fam.attrbuf); + rdev = __cfg80211_rdev_from_attrs(sock_net(skb->sk), attrbuf); if (IS_ERR(rdev)) { err = PTR_ERR(rdev); goto out_err; @@ -8154,9 +8481,8 @@ static int nl80211_testmode_dump(struct sk_buff *skb, phy_idx = rdev->wiphy_idx; rdev = NULL; - if (nl80211_fam.attrbuf[NL80211_ATTR_TESTDATA]) - cb->args[1] = - (long)nl80211_fam.attrbuf[NL80211_ATTR_TESTDATA]; + if (attrbuf[NL80211_ATTR_TESTDATA]) + cb->args[1] = (long)attrbuf[NL80211_ATTR_TESTDATA]; } if (cb->args[1]) { @@ -8641,238 +8967,21 @@ static int nl80211_cancel_remain_on_channel(struct sk_buff *skb, return rdev_cancel_remain_on_channel(rdev, wdev, cookie); } -static u32 rateset_to_mask(struct ieee80211_supported_band *sband, - u8 *rates, u8 rates_len) -{ - u8 i; - u32 mask = 0; - - for (i = 0; i < rates_len; i++) { - int rate = (rates[i] & 0x7f) * 5; - int ridx; - - for (ridx = 0; ridx < sband->n_bitrates; ridx++) { - struct ieee80211_rate *srate = - &sband->bitrates[ridx]; - if (rate == srate->bitrate) { - mask |= 1 << ridx; - break; - } - } - if (ridx == sband->n_bitrates) - return 0; /* rate not found */ - } - - return mask; -} - -static bool ht_rateset_to_mask(struct ieee80211_supported_band *sband, - u8 *rates, u8 rates_len, - u8 mcs[IEEE80211_HT_MCS_MASK_LEN]) -{ - u8 i; - - memset(mcs, 0, IEEE80211_HT_MCS_MASK_LEN); - - for (i = 0; i < rates_len; i++) { - int ridx, rbit; - - ridx = rates[i] / 8; - rbit = BIT(rates[i] % 8); - - /* check validity */ - if ((ridx < 0) || (ridx >= IEEE80211_HT_MCS_MASK_LEN)) - return false; - - /* check availability */ - if (sband->ht_cap.mcs.rx_mask[ridx] & rbit) - mcs[ridx] |= rbit; - else - return false; - } - - return true; -} - -static u16 vht_mcs_map_to_mcs_mask(u8 vht_mcs_map) -{ - u16 mcs_mask = 0; - - switch (vht_mcs_map) { - case IEEE80211_VHT_MCS_NOT_SUPPORTED: - break; - case IEEE80211_VHT_MCS_SUPPORT_0_7: - mcs_mask = 0x00FF; - break; - case IEEE80211_VHT_MCS_SUPPORT_0_8: - mcs_mask = 0x01FF; - break; - case IEEE80211_VHT_MCS_SUPPORT_0_9: - mcs_mask = 0x03FF; - break; - default: - break; - } - - return mcs_mask; -} - -static void vht_build_mcs_mask(u16 vht_mcs_map, - u16 vht_mcs_mask[NL80211_VHT_NSS_MAX]) -{ - u8 nss; - - for (nss = 0; nss < NL80211_VHT_NSS_MAX; nss++) { - vht_mcs_mask[nss] = vht_mcs_map_to_mcs_mask(vht_mcs_map & 0x03); - vht_mcs_map >>= 2; - } -} - -static bool vht_set_mcs_mask(struct ieee80211_supported_band *sband, - struct nl80211_txrate_vht *txrate, - u16 mcs[NL80211_VHT_NSS_MAX]) -{ - u16 tx_mcs_map = le16_to_cpu(sband->vht_cap.vht_mcs.tx_mcs_map); - u16 tx_mcs_mask[NL80211_VHT_NSS_MAX] = {}; - u8 i; - - if (!sband->vht_cap.vht_supported) - return false; - - memset(mcs, 0, sizeof(u16) * NL80211_VHT_NSS_MAX); - - /* Build vht_mcs_mask from VHT capabilities */ - vht_build_mcs_mask(tx_mcs_map, tx_mcs_mask); - - for (i = 0; i < NL80211_VHT_NSS_MAX; i++) { - if ((tx_mcs_mask[i] & txrate->mcs[i]) == txrate->mcs[i]) - mcs[i] = txrate->mcs[i]; - else - return false; - } - - return true; -} - -static const struct nla_policy nl80211_txattr_policy[NL80211_TXRATE_MAX + 1] = { - [NL80211_TXRATE_LEGACY] = { .type = NLA_BINARY, - .len = NL80211_MAX_SUPP_RATES }, - [NL80211_TXRATE_HT] = { .type = NLA_BINARY, - .len = NL80211_MAX_SUPP_HT_RATES }, - [NL80211_TXRATE_VHT] = { .len = sizeof(struct nl80211_txrate_vht)}, - [NL80211_TXRATE_GI] = { .type = NLA_U8 }, -}; - static int nl80211_set_tx_bitrate_mask(struct sk_buff *skb, struct genl_info *info) { - struct nlattr *tb[NL80211_TXRATE_MAX + 1]; - struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct cfg80211_bitrate_mask mask; - int rem, i; + struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; - struct nlattr *tx_rates; - struct ieee80211_supported_band *sband; - u16 vht_tx_mcs_map; + int err; if (!rdev->ops->set_bitrate_mask) return -EOPNOTSUPP; - memset(&mask, 0, sizeof(mask)); - /* Default to all rates enabled */ - for (i = 0; i < NUM_NL80211_BANDS; i++) { - sband = rdev->wiphy.bands[i]; - - if (!sband) - continue; - - mask.control[i].legacy = (1 << sband->n_bitrates) - 1; - memcpy(mask.control[i].ht_mcs, - sband->ht_cap.mcs.rx_mask, - sizeof(mask.control[i].ht_mcs)); - - if (!sband->vht_cap.vht_supported) - continue; - - vht_tx_mcs_map = le16_to_cpu(sband->vht_cap.vht_mcs.tx_mcs_map); - vht_build_mcs_mask(vht_tx_mcs_map, mask.control[i].vht_mcs); - } - - /* if no rates are given set it back to the defaults */ - if (!info->attrs[NL80211_ATTR_TX_RATES]) - goto out; - - /* - * The nested attribute uses enum nl80211_band as the index. This maps - * directly to the enum nl80211_band values used in cfg80211. - */ - BUILD_BUG_ON(NL80211_MAX_SUPP_HT_RATES > IEEE80211_HT_MCS_MASK_LEN * 8); - nla_for_each_nested(tx_rates, info->attrs[NL80211_ATTR_TX_RATES], rem) { - enum nl80211_band band = nla_type(tx_rates); - int err; - - if (band < 0 || band >= NUM_NL80211_BANDS) - return -EINVAL; - sband = rdev->wiphy.bands[band]; - if (sband == NULL) - return -EINVAL; - err = nla_parse(tb, NL80211_TXRATE_MAX, nla_data(tx_rates), - nla_len(tx_rates), nl80211_txattr_policy); - if (err) - return err; - if (tb[NL80211_TXRATE_LEGACY]) { - mask.control[band].legacy = rateset_to_mask( - sband, - nla_data(tb[NL80211_TXRATE_LEGACY]), - nla_len(tb[NL80211_TXRATE_LEGACY])); - if ((mask.control[band].legacy == 0) && - nla_len(tb[NL80211_TXRATE_LEGACY])) - return -EINVAL; - } - if (tb[NL80211_TXRATE_HT]) { - if (!ht_rateset_to_mask( - sband, - nla_data(tb[NL80211_TXRATE_HT]), - nla_len(tb[NL80211_TXRATE_HT]), - mask.control[band].ht_mcs)) - return -EINVAL; - } - if (tb[NL80211_TXRATE_VHT]) { - if (!vht_set_mcs_mask( - sband, - nla_data(tb[NL80211_TXRATE_VHT]), - mask.control[band].vht_mcs)) - return -EINVAL; - } - if (tb[NL80211_TXRATE_GI]) { - mask.control[band].gi = - nla_get_u8(tb[NL80211_TXRATE_GI]); - if (mask.control[band].gi > NL80211_TXRATE_FORCE_LGI) - return -EINVAL; - } - - if (mask.control[band].legacy == 0) { - /* don't allow empty legacy rates if HT or VHT - * are not even supported. - */ - if (!(rdev->wiphy.bands[band]->ht_cap.ht_supported || - rdev->wiphy.bands[band]->vht_cap.vht_supported)) - return -EINVAL; - - for (i = 0; i < IEEE80211_HT_MCS_MASK_LEN; i++) - if (mask.control[band].ht_mcs[i]) - goto out; - - for (i = 0; i < NL80211_VHT_NSS_MAX; i++) - if (mask.control[band].vht_mcs[i]) - goto out; - - /* legacy and mcs rates may not be both empty */ - return -EINVAL; - } - } + err = nl80211_parse_tx_bitrate_mask(info, &mask); + if (err) + return err; -out: return rdev_set_bitrate_mask(rdev, dev, NULL, &mask); } @@ -8898,6 +9007,7 @@ static int nl80211_register_mgmt(struct sk_buff *skb, struct genl_info *info) case NL80211_IFTYPE_P2P_GO: case NL80211_IFTYPE_P2P_DEVICE: break; + case NL80211_IFTYPE_NAN: default: return -EOPNOTSUPP; } @@ -8943,6 +9053,7 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) case NL80211_IFTYPE_MESH_POINT: case NL80211_IFTYPE_P2P_GO: break; + case NL80211_IFTYPE_NAN: default: return -EOPNOTSUPP; } @@ -9059,6 +9170,7 @@ static int nl80211_tx_mgmt_cancel_wait(struct sk_buff *skb, struct genl_info *in case NL80211_IFTYPE_P2P_GO: case NL80211_IFTYPE_P2P_DEVICE: break; + case NL80211_IFTYPE_NAN: default: return -EOPNOTSUPP; } @@ -9340,6 +9452,17 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info) return err; } + if (info->attrs[NL80211_ATTR_TX_RATES]) { + err = nl80211_parse_tx_bitrate_mask(info, &setup.beacon_rate); + if (err) + return err; + + err = validate_beacon_tx_rate(rdev, setup.chandef.chan->band, + &setup.beacon_rate); + if (err) + return err; + } + return cfg80211_join_mesh(rdev, dev, &setup, &cfg); } @@ -10414,6 +10537,549 @@ static int nl80211_stop_p2p_device(struct sk_buff *skb, struct genl_info *info) return 0; } +static int nl80211_start_nan(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct wireless_dev *wdev = info->user_ptr[1]; + struct cfg80211_nan_conf conf = {}; + int err; + + if (wdev->iftype != NL80211_IFTYPE_NAN) + return -EOPNOTSUPP; + + if (wdev->nan_started) + return -EEXIST; + + if (rfkill_blocked(rdev->rfkill)) + return -ERFKILL; + + if (!info->attrs[NL80211_ATTR_NAN_MASTER_PREF]) + return -EINVAL; + + if (!info->attrs[NL80211_ATTR_NAN_DUAL]) + return -EINVAL; + + conf.master_pref = + nla_get_u8(info->attrs[NL80211_ATTR_NAN_MASTER_PREF]); + if (!conf.master_pref) + return -EINVAL; + + conf.dual = nla_get_u8(info->attrs[NL80211_ATTR_NAN_DUAL]); + + err = rdev_start_nan(rdev, wdev, &conf); + if (err) + return err; + + wdev->nan_started = true; + rdev->opencount++; + + return 0; +} + +static int nl80211_stop_nan(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct wireless_dev *wdev = info->user_ptr[1]; + + if (wdev->iftype != NL80211_IFTYPE_NAN) + return -EOPNOTSUPP; + + cfg80211_stop_nan(rdev, wdev); + + return 0; +} + +static int validate_nan_filter(struct nlattr *filter_attr) +{ + struct nlattr *attr; + int len = 0, n_entries = 0, rem; + + nla_for_each_nested(attr, filter_attr, rem) { + len += nla_len(attr); + n_entries++; + } + + if (len >= U8_MAX) + return -EINVAL; + + return n_entries; +} + +static int handle_nan_filter(struct nlattr *attr_filter, + struct cfg80211_nan_func *func, + bool tx) +{ + struct nlattr *attr; + int n_entries, rem, i; + struct cfg80211_nan_func_filter *filter; + + n_entries = validate_nan_filter(attr_filter); + if (n_entries < 0) + return n_entries; + + BUILD_BUG_ON(sizeof(*func->rx_filters) != sizeof(*func->tx_filters)); + + filter = kcalloc(n_entries, sizeof(*func->rx_filters), GFP_KERNEL); + if (!filter) + return -ENOMEM; + + i = 0; + nla_for_each_nested(attr, attr_filter, rem) { + filter[i].filter = kmemdup(nla_data(attr), nla_len(attr), + GFP_KERNEL); + filter[i].len = nla_len(attr); + i++; + } + if (tx) { + func->num_tx_filters = n_entries; + func->tx_filters = filter; + } else { + func->num_rx_filters = n_entries; + func->rx_filters = filter; + } + + return 0; +} + +static int nl80211_nan_add_func(struct sk_buff *skb, + struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct wireless_dev *wdev = info->user_ptr[1]; + struct nlattr *tb[NUM_NL80211_NAN_FUNC_ATTR], *func_attr; + struct cfg80211_nan_func *func; + struct sk_buff *msg = NULL; + void *hdr = NULL; + int err = 0; + + if (wdev->iftype != NL80211_IFTYPE_NAN) + return -EOPNOTSUPP; + + if (!wdev->nan_started) + return -ENOTCONN; + + if (!info->attrs[NL80211_ATTR_NAN_FUNC]) + return -EINVAL; + + if (wdev->owner_nlportid && + wdev->owner_nlportid != info->snd_portid) + return -ENOTCONN; + + err = nla_parse(tb, NL80211_NAN_FUNC_ATTR_MAX, + nla_data(info->attrs[NL80211_ATTR_NAN_FUNC]), + nla_len(info->attrs[NL80211_ATTR_NAN_FUNC]), + nl80211_nan_func_policy); + if (err) + return err; + + func = kzalloc(sizeof(*func), GFP_KERNEL); + if (!func) + return -ENOMEM; + + func->cookie = wdev->wiphy->cookie_counter++; + + if (!tb[NL80211_NAN_FUNC_TYPE] || + nla_get_u8(tb[NL80211_NAN_FUNC_TYPE]) > NL80211_NAN_FUNC_MAX_TYPE) { + err = -EINVAL; + goto out; + } + + + func->type = nla_get_u8(tb[NL80211_NAN_FUNC_TYPE]); + + if (!tb[NL80211_NAN_FUNC_SERVICE_ID]) { + err = -EINVAL; + goto out; + } + + memcpy(func->service_id, nla_data(tb[NL80211_NAN_FUNC_SERVICE_ID]), + sizeof(func->service_id)); + + func->close_range = + nla_get_flag(tb[NL80211_NAN_FUNC_CLOSE_RANGE]); + + if (tb[NL80211_NAN_FUNC_SERVICE_INFO]) { + func->serv_spec_info_len = + nla_len(tb[NL80211_NAN_FUNC_SERVICE_INFO]); + func->serv_spec_info = + kmemdup(nla_data(tb[NL80211_NAN_FUNC_SERVICE_INFO]), + func->serv_spec_info_len, + GFP_KERNEL); + if (!func->serv_spec_info) { + err = -ENOMEM; + goto out; + } + } + + if (tb[NL80211_NAN_FUNC_TTL]) + func->ttl = nla_get_u32(tb[NL80211_NAN_FUNC_TTL]); + + switch (func->type) { + case NL80211_NAN_FUNC_PUBLISH: + if (!tb[NL80211_NAN_FUNC_PUBLISH_TYPE]) { + err = -EINVAL; + goto out; + } + + func->publish_type = + nla_get_u8(tb[NL80211_NAN_FUNC_PUBLISH_TYPE]); + func->publish_bcast = + nla_get_flag(tb[NL80211_NAN_FUNC_PUBLISH_BCAST]); + + if ((!(func->publish_type & NL80211_NAN_SOLICITED_PUBLISH)) && + func->publish_bcast) { + err = -EINVAL; + goto out; + } + break; + case NL80211_NAN_FUNC_SUBSCRIBE: + func->subscribe_active = + nla_get_flag(tb[NL80211_NAN_FUNC_SUBSCRIBE_ACTIVE]); + break; + case NL80211_NAN_FUNC_FOLLOW_UP: + if (!tb[NL80211_NAN_FUNC_FOLLOW_UP_ID] || + !tb[NL80211_NAN_FUNC_FOLLOW_UP_REQ_ID]) { + err = -EINVAL; + goto out; + } + + func->followup_id = + nla_get_u8(tb[NL80211_NAN_FUNC_FOLLOW_UP_ID]); + func->followup_reqid = + nla_get_u8(tb[NL80211_NAN_FUNC_FOLLOW_UP_REQ_ID]); + memcpy(func->followup_dest.addr, + nla_data(tb[NL80211_NAN_FUNC_FOLLOW_UP_DEST]), + sizeof(func->followup_dest.addr)); + if (func->ttl) { + err = -EINVAL; + goto out; + } + break; + default: + err = -EINVAL; + goto out; + } + + if (tb[NL80211_NAN_FUNC_SRF]) { + struct nlattr *srf_tb[NUM_NL80211_NAN_SRF_ATTR]; + + err = nla_parse(srf_tb, NL80211_NAN_SRF_ATTR_MAX, + nla_data(tb[NL80211_NAN_FUNC_SRF]), + nla_len(tb[NL80211_NAN_FUNC_SRF]), NULL); + if (err) + goto out; + + func->srf_include = + nla_get_flag(srf_tb[NL80211_NAN_SRF_INCLUDE]); + + if (srf_tb[NL80211_NAN_SRF_BF]) { + if (srf_tb[NL80211_NAN_SRF_MAC_ADDRS] || + !srf_tb[NL80211_NAN_SRF_BF_IDX]) { + err = -EINVAL; + goto out; + } + + func->srf_bf_len = + nla_len(srf_tb[NL80211_NAN_SRF_BF]); + func->srf_bf = + kmemdup(nla_data(srf_tb[NL80211_NAN_SRF_BF]), + func->srf_bf_len, GFP_KERNEL); + if (!func->srf_bf) { + err = -ENOMEM; + goto out; + } + + func->srf_bf_idx = + nla_get_u8(srf_tb[NL80211_NAN_SRF_BF_IDX]); + } else { + struct nlattr *attr, *mac_attr = + srf_tb[NL80211_NAN_SRF_MAC_ADDRS]; + int n_entries, rem, i = 0; + + if (!mac_attr) { + err = -EINVAL; + goto out; + } + + n_entries = validate_acl_mac_addrs(mac_attr); + if (n_entries <= 0) { + err = -EINVAL; + goto out; + } + + func->srf_num_macs = n_entries; + func->srf_macs = + kzalloc(sizeof(*func->srf_macs) * n_entries, + GFP_KERNEL); + if (!func->srf_macs) { + err = -ENOMEM; + goto out; + } + + nla_for_each_nested(attr, mac_attr, rem) + memcpy(func->srf_macs[i++].addr, nla_data(attr), + sizeof(*func->srf_macs)); + } + } + + if (tb[NL80211_NAN_FUNC_TX_MATCH_FILTER]) { + err = handle_nan_filter(tb[NL80211_NAN_FUNC_TX_MATCH_FILTER], + func, true); + if (err) + goto out; + } + + if (tb[NL80211_NAN_FUNC_RX_MATCH_FILTER]) { + err = handle_nan_filter(tb[NL80211_NAN_FUNC_RX_MATCH_FILTER], + func, false); + if (err) + goto out; + } + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) { + err = -ENOMEM; + goto out; + } + + hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, + NL80211_CMD_ADD_NAN_FUNCTION); + /* This can't really happen - we just allocated 4KB */ + if (WARN_ON(!hdr)) { + err = -ENOMEM; + goto out; + } + + err = rdev_add_nan_func(rdev, wdev, func); +out: + if (err < 0) { + cfg80211_free_nan_func(func); + nlmsg_free(msg); + return err; + } + + /* propagate the instance id and cookie to userspace */ + if (nla_put_u64_64bit(msg, NL80211_ATTR_COOKIE, func->cookie, + NL80211_ATTR_PAD)) + goto nla_put_failure; + + func_attr = nla_nest_start(msg, NL80211_ATTR_NAN_FUNC); + if (!func_attr) + goto nla_put_failure; + + if (nla_put_u8(msg, NL80211_NAN_FUNC_INSTANCE_ID, + func->instance_id)) + goto nla_put_failure; + + nla_nest_end(msg, func_attr); + + genlmsg_end(msg, hdr); + return genlmsg_reply(msg, info); + +nla_put_failure: + nlmsg_free(msg); + return -ENOBUFS; +} + +static int nl80211_nan_del_func(struct sk_buff *skb, + struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct wireless_dev *wdev = info->user_ptr[1]; + u64 cookie; + + if (wdev->iftype != NL80211_IFTYPE_NAN) + return -EOPNOTSUPP; + + if (!wdev->nan_started) + return -ENOTCONN; + + if (!info->attrs[NL80211_ATTR_COOKIE]) + return -EINVAL; + + if (wdev->owner_nlportid && + wdev->owner_nlportid != info->snd_portid) + return -ENOTCONN; + + cookie = nla_get_u64(info->attrs[NL80211_ATTR_COOKIE]); + + rdev_del_nan_func(rdev, wdev, cookie); + + return 0; +} + +static int nl80211_nan_change_config(struct sk_buff *skb, + struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct wireless_dev *wdev = info->user_ptr[1]; + struct cfg80211_nan_conf conf = {}; + u32 changed = 0; + + if (wdev->iftype != NL80211_IFTYPE_NAN) + return -EOPNOTSUPP; + + if (!wdev->nan_started) + return -ENOTCONN; + + if (info->attrs[NL80211_ATTR_NAN_MASTER_PREF]) { + conf.master_pref = + nla_get_u8(info->attrs[NL80211_ATTR_NAN_MASTER_PREF]); + if (conf.master_pref <= 1 || conf.master_pref == 255) + return -EINVAL; + + changed |= CFG80211_NAN_CONF_CHANGED_PREF; + } + + if (info->attrs[NL80211_ATTR_NAN_DUAL]) { + conf.dual = nla_get_u8(info->attrs[NL80211_ATTR_NAN_DUAL]); + changed |= CFG80211_NAN_CONF_CHANGED_DUAL; + } + + if (!changed) + return -EINVAL; + + return rdev_nan_change_conf(rdev, wdev, &conf, changed); +} + +void cfg80211_nan_match(struct wireless_dev *wdev, + struct cfg80211_nan_match_params *match, gfp_t gfp) +{ + struct wiphy *wiphy = wdev->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); + struct nlattr *match_attr, *local_func_attr, *peer_func_attr; + struct sk_buff *msg; + void *hdr; + + if (WARN_ON(!match->inst_id || !match->peer_inst_id || !match->addr)) + return; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); + if (!msg) + return; + + hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_NAN_MATCH); + if (!hdr) { + nlmsg_free(msg); + return; + } + + if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || + (wdev->netdev && nla_put_u32(msg, NL80211_ATTR_IFINDEX, + wdev->netdev->ifindex)) || + nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev), + NL80211_ATTR_PAD)) + goto nla_put_failure; + + if (nla_put_u64_64bit(msg, NL80211_ATTR_COOKIE, match->cookie, + NL80211_ATTR_PAD) || + nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, match->addr)) + goto nla_put_failure; + + match_attr = nla_nest_start(msg, NL80211_ATTR_NAN_MATCH); + if (!match_attr) + goto nla_put_failure; + + local_func_attr = nla_nest_start(msg, NL80211_NAN_MATCH_FUNC_LOCAL); + if (!local_func_attr) + goto nla_put_failure; + + if (nla_put_u8(msg, NL80211_NAN_FUNC_INSTANCE_ID, match->inst_id)) + goto nla_put_failure; + + nla_nest_end(msg, local_func_attr); + + peer_func_attr = nla_nest_start(msg, NL80211_NAN_MATCH_FUNC_PEER); + if (!peer_func_attr) + goto nla_put_failure; + + if (nla_put_u8(msg, NL80211_NAN_FUNC_TYPE, match->type) || + nla_put_u8(msg, NL80211_NAN_FUNC_INSTANCE_ID, match->peer_inst_id)) + goto nla_put_failure; + + if (match->info && match->info_len && + nla_put(msg, NL80211_NAN_FUNC_SERVICE_INFO, match->info_len, + match->info)) + goto nla_put_failure; + + nla_nest_end(msg, peer_func_attr); + nla_nest_end(msg, match_attr); + genlmsg_end(msg, hdr); + + if (!wdev->owner_nlportid) + genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), + msg, 0, NL80211_MCGRP_NAN, gfp); + else + genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, + wdev->owner_nlportid); + + return; + +nla_put_failure: + nlmsg_free(msg); +} +EXPORT_SYMBOL(cfg80211_nan_match); + +void cfg80211_nan_func_terminated(struct wireless_dev *wdev, + u8 inst_id, + enum nl80211_nan_func_term_reason reason, + u64 cookie, gfp_t gfp) +{ + struct wiphy *wiphy = wdev->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); + struct sk_buff *msg; + struct nlattr *func_attr; + void *hdr; + + if (WARN_ON(!inst_id)) + return; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); + if (!msg) + return; + + hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_DEL_NAN_FUNCTION); + if (!hdr) { + nlmsg_free(msg); + return; + } + + if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || + (wdev->netdev && nla_put_u32(msg, NL80211_ATTR_IFINDEX, + wdev->netdev->ifindex)) || + nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev), + NL80211_ATTR_PAD)) + goto nla_put_failure; + + if (nla_put_u64_64bit(msg, NL80211_ATTR_COOKIE, cookie, + NL80211_ATTR_PAD)) + goto nla_put_failure; + + func_attr = nla_nest_start(msg, NL80211_ATTR_NAN_FUNC); + if (!func_attr) + goto nla_put_failure; + + if (nla_put_u8(msg, NL80211_NAN_FUNC_INSTANCE_ID, inst_id) || + nla_put_u8(msg, NL80211_NAN_FUNC_TERM_REASON, reason)) + goto nla_put_failure; + + nla_nest_end(msg, func_attr); + genlmsg_end(msg, hdr); + + if (!wdev->owner_nlportid) + genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), + msg, 0, NL80211_MCGRP_NAN, gfp); + else + genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, + wdev->owner_nlportid); + + return; + +nla_put_failure: + nlmsg_free(msg); +} +EXPORT_SYMBOL(cfg80211_nan_func_terminated); + static int nl80211_get_protocol_features(struct sk_buff *skb, struct genl_info *info) { @@ -10598,6 +11264,7 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb, struct cfg80211_registered_device **rdev, struct wireless_dev **wdev) { + struct nlattr **attrbuf = genl_family_attrbuf(&nl80211_fam); u32 vid, subcmd; unsigned int i; int vcmd_idx = -1; @@ -10633,31 +11300,28 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb, } err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, - nl80211_fam.attrbuf, nl80211_fam.maxattr, - nl80211_policy); + attrbuf, nl80211_fam.maxattr, nl80211_policy); if (err) goto out_unlock; - if (!nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_ID] || - !nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_SUBCMD]) { + if (!attrbuf[NL80211_ATTR_VENDOR_ID] || + !attrbuf[NL80211_ATTR_VENDOR_SUBCMD]) { err = -EINVAL; goto out_unlock; } - *wdev = __cfg80211_wdev_from_attrs(sock_net(skb->sk), - nl80211_fam.attrbuf); + *wdev = __cfg80211_wdev_from_attrs(sock_net(skb->sk), attrbuf); if (IS_ERR(*wdev)) *wdev = NULL; - *rdev = __cfg80211_rdev_from_attrs(sock_net(skb->sk), - nl80211_fam.attrbuf); + *rdev = __cfg80211_rdev_from_attrs(sock_net(skb->sk), attrbuf); if (IS_ERR(*rdev)) { err = PTR_ERR(*rdev); goto out_unlock; } - vid = nla_get_u32(nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_ID]); - subcmd = nla_get_u32(nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_SUBCMD]); + vid = nla_get_u32(attrbuf[NL80211_ATTR_VENDOR_ID]); + subcmd = nla_get_u32(attrbuf[NL80211_ATTR_VENDOR_SUBCMD]); for (i = 0; i < (*rdev)->wiphy.n_vendor_commands; i++) { const struct wiphy_vendor_command *vcmd; @@ -10681,9 +11345,9 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb, goto out_unlock; } - if (nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_DATA]) { - data = nla_data(nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_DATA]); - data_len = nla_len(nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_DATA]); + if (attrbuf[NL80211_ATTR_VENDOR_DATA]) { + data = nla_data(attrbuf[NL80211_ATTR_VENDOR_DATA]); + data_len = nla_len(attrbuf[NL80211_ATTR_VENDOR_DATA]); } /* 0 is the first index - add 1 to parse only once */ @@ -11115,7 +11779,14 @@ static int nl80211_pre_doit(const struct genl_ops *ops, struct sk_buff *skb, dev_hold(dev); } else if (ops->internal_flags & NL80211_FLAG_CHECK_NETDEV_UP) { - if (!wdev->p2p_started) { + if (wdev->iftype == NL80211_IFTYPE_P2P_DEVICE && + !wdev->p2p_started) { + if (rtnl) + rtnl_unlock(); + return -ENETDOWN; + } + if (wdev->iftype == NL80211_IFTYPE_NAN && + !wdev->nan_started) { if (rtnl) rtnl_unlock(); return -ENETDOWN; @@ -11749,6 +12420,46 @@ static const struct genl_ops nl80211_ops[] = { NL80211_FLAG_NEED_RTNL, }, { + .cmd = NL80211_CMD_START_NAN, + .doit = nl80211_start_nan, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_WDEV | + NL80211_FLAG_NEED_RTNL, + }, + { + .cmd = NL80211_CMD_STOP_NAN, + .doit = nl80211_stop_nan, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_WDEV_UP | + NL80211_FLAG_NEED_RTNL, + }, + { + .cmd = NL80211_CMD_ADD_NAN_FUNCTION, + .doit = nl80211_nan_add_func, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_WDEV_UP | + NL80211_FLAG_NEED_RTNL, + }, + { + .cmd = NL80211_CMD_DEL_NAN_FUNCTION, + .doit = nl80211_nan_del_func, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_WDEV_UP | + NL80211_FLAG_NEED_RTNL, + }, + { + .cmd = NL80211_CMD_CHANGE_NAN_CONFIG, + .doit = nl80211_nan_change_config, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_WDEV_UP | + NL80211_FLAG_NEED_RTNL, + }, + { .cmd = NL80211_CMD_SET_MCAST_RATE, .doit = nl80211_set_mcast_rate, .policy = nl80211_policy, @@ -11875,6 +12586,21 @@ static const struct genl_ops nl80211_ops[] = { }, }; +static struct genl_family nl80211_fam __ro_after_init = { + .name = NL80211_GENL_NAME, /* have users key off the name instead */ + .hdrsize = 0, /* no private header */ + .version = 1, /* no particular meaning now */ + .maxattr = NL80211_ATTR_MAX, + .netnsok = true, + .pre_doit = nl80211_pre_doit, + .post_doit = nl80211_post_doit, + .module = THIS_MODULE, + .ops = nl80211_ops, + .n_ops = ARRAY_SIZE(nl80211_ops), + .mcgrps = nl80211_mcgrps, + .n_mcgrps = ARRAY_SIZE(nl80211_mcgrps), +}; + /* notification functions */ void nl80211_notify_wiphy(struct cfg80211_registered_device *rdev, @@ -13837,12 +14563,11 @@ void nl80211_send_ap_stopped(struct wireless_dev *wdev) /* initialisation/exit functions */ -int nl80211_init(void) +int __init nl80211_init(void) { int err; - err = genl_register_family_with_ops_groups(&nl80211_fam, nl80211_ops, - nl80211_mcgrps); + err = genl_register_family(&nl80211_fam); if (err) return err; diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index 85ff30bee2b9..11cf83c8ad4f 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -887,6 +887,64 @@ static inline void rdev_stop_p2p_device(struct cfg80211_registered_device *rdev, trace_rdev_return_void(&rdev->wiphy); } +static inline int rdev_start_nan(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev, + struct cfg80211_nan_conf *conf) +{ + int ret; + + trace_rdev_start_nan(&rdev->wiphy, wdev, conf); + ret = rdev->ops->start_nan(&rdev->wiphy, wdev, conf); + trace_rdev_return_int(&rdev->wiphy, ret); + return ret; +} + +static inline void rdev_stop_nan(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev) +{ + trace_rdev_stop_nan(&rdev->wiphy, wdev); + rdev->ops->stop_nan(&rdev->wiphy, wdev); + trace_rdev_return_void(&rdev->wiphy); +} + +static inline int +rdev_add_nan_func(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev, + struct cfg80211_nan_func *nan_func) +{ + int ret; + + trace_rdev_add_nan_func(&rdev->wiphy, wdev, nan_func); + ret = rdev->ops->add_nan_func(&rdev->wiphy, wdev, nan_func); + trace_rdev_return_int(&rdev->wiphy, ret); + return ret; +} + +static inline void rdev_del_nan_func(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev, u64 cookie) +{ + trace_rdev_del_nan_func(&rdev->wiphy, wdev, cookie); + rdev->ops->del_nan_func(&rdev->wiphy, wdev, cookie); + trace_rdev_return_void(&rdev->wiphy); +} + +static inline int +rdev_nan_change_conf(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev, + struct cfg80211_nan_conf *conf, u32 changes) +{ + int ret; + + trace_rdev_nan_change_conf(&rdev->wiphy, wdev, conf, changes); + if (rdev->ops->nan_change_conf) + ret = rdev->ops->nan_change_conf(&rdev->wiphy, wdev, conf, + changes); + else + ret = -ENOTSUPP; + trace_rdev_return_int(&rdev->wiphy, ret); + return ret; +} + static inline int rdev_set_mac_acl(struct cfg80211_registered_device *rdev, struct net_device *dev, struct cfg80211_acl_data *params) diff --git a/net/wireless/sme.c b/net/wireless/sme.c index c08a3b57dca1..a77db333927e 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -726,7 +726,8 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, wdev->current_bss = bss_from_pub(bss); - cfg80211_upload_connect_keys(wdev); + if (!(wdev->wiphy->flags & WIPHY_FLAG_HAS_STATIC_WEP)) + cfg80211_upload_connect_keys(wdev); rcu_read_lock(); country_ie = ieee80211_bss_get_ie(bss, WLAN_EID_COUNTRY); @@ -1043,6 +1044,9 @@ int cfg80211_connect(struct cfg80211_registered_device *rdev, connect->crypto.ciphers_pairwise[0] = cipher; } } + + connect->crypto.wep_keys = connkeys->params; + connect->crypto.wep_tx_key = connkeys->def; } else { if (WARN_ON(connkeys)) return -EINVAL; diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 72b5255cefe2..a3d0a91b1e09 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -1889,6 +1889,96 @@ DEFINE_EVENT(wiphy_wdev_evt, rdev_stop_p2p_device, TP_ARGS(wiphy, wdev) ); +TRACE_EVENT(rdev_start_nan, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, + struct cfg80211_nan_conf *conf), + TP_ARGS(wiphy, wdev, conf), + TP_STRUCT__entry( + WIPHY_ENTRY + WDEV_ENTRY + __field(u8, master_pref) + __field(u8, dual); + ), + TP_fast_assign( + WIPHY_ASSIGN; + WDEV_ASSIGN; + __entry->master_pref = conf->master_pref; + __entry->dual = conf->dual; + ), + TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT + ", master preference: %u, dual: %d", + WIPHY_PR_ARG, WDEV_PR_ARG, __entry->master_pref, + __entry->dual) +); + +TRACE_EVENT(rdev_nan_change_conf, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, + struct cfg80211_nan_conf *conf, u32 changes), + TP_ARGS(wiphy, wdev, conf, changes), + TP_STRUCT__entry( + WIPHY_ENTRY + WDEV_ENTRY + __field(u8, master_pref) + __field(u8, dual); + __field(u32, changes); + ), + TP_fast_assign( + WIPHY_ASSIGN; + WDEV_ASSIGN; + __entry->master_pref = conf->master_pref; + __entry->dual = conf->dual; + __entry->changes = changes; + ), + TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT + ", master preference: %u, dual: %d, changes: %x", + WIPHY_PR_ARG, WDEV_PR_ARG, __entry->master_pref, + __entry->dual, __entry->changes) +); + +DEFINE_EVENT(wiphy_wdev_evt, rdev_stop_nan, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev), + TP_ARGS(wiphy, wdev) +); + +TRACE_EVENT(rdev_add_nan_func, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, + const struct cfg80211_nan_func *func), + TP_ARGS(wiphy, wdev, func), + TP_STRUCT__entry( + WIPHY_ENTRY + WDEV_ENTRY + __field(u8, func_type) + __field(u64, cookie) + ), + TP_fast_assign( + WIPHY_ASSIGN; + WDEV_ASSIGN; + __entry->func_type = func->type; + __entry->cookie = func->cookie + ), + TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", type=%u, cookie=%llu", + WIPHY_PR_ARG, WDEV_PR_ARG, __entry->func_type, + __entry->cookie) +); + +TRACE_EVENT(rdev_del_nan_func, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, + u64 cookie), + TP_ARGS(wiphy, wdev, cookie), + TP_STRUCT__entry( + WIPHY_ENTRY + WDEV_ENTRY + __field(u64, cookie) + ), + TP_fast_assign( + WIPHY_ASSIGN; + WDEV_ASSIGN; + __entry->cookie = cookie; + ), + TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", cookie=%llu", + WIPHY_PR_ARG, WDEV_PR_ARG, __entry->cookie) +); + TRACE_EVENT(rdev_set_mac_acl, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, struct cfg80211_acl_data *params), diff --git a/net/wireless/util.c b/net/wireless/util.c index 9e6e2aaa7766..8edce22d1b93 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -912,7 +912,7 @@ void cfg80211_upload_connect_keys(struct wireless_dev *wdev) if (!wdev->connect_keys) return; - for (i = 0; i < 4; i++) { + for (i = 0; i < CFG80211_MAX_WEP_KEYS; i++) { if (!wdev->connect_keys->params[i].cipher) continue; if (rdev_add_key(rdev, dev, i, false, NULL, @@ -1008,8 +1008,9 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, if (otype == NL80211_IFTYPE_AP_VLAN) return -EOPNOTSUPP; - /* cannot change into P2P device type */ - if (ntype == NL80211_IFTYPE_P2P_DEVICE) + /* cannot change into P2P device or NAN */ + if (ntype == NL80211_IFTYPE_P2P_DEVICE || + ntype == NL80211_IFTYPE_NAN) return -EOPNOTSUPP; if (!rdev->ops->change_virtual_intf || @@ -1088,6 +1089,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, /* not happening */ break; case NL80211_IFTYPE_P2P_DEVICE: + case NL80211_IFTYPE_NAN: WARN_ON(1); break; } @@ -1760,6 +1762,28 @@ int cfg80211_get_station(struct net_device *dev, const u8 *mac_addr, } EXPORT_SYMBOL(cfg80211_get_station); +void cfg80211_free_nan_func(struct cfg80211_nan_func *f) +{ + int i; + + if (!f) + return; + + kfree(f->serv_spec_info); + kfree(f->srf_bf); + kfree(f->srf_macs); + for (i = 0; i < f->num_rx_filters; i++) + kfree(f->rx_filters[i].filter); + + for (i = 0; i < f->num_tx_filters; i++) + kfree(f->tx_filters[i].filter); + + kfree(f->rx_filters); + kfree(f->tx_filters); + kfree(f); +} +EXPORT_SYMBOL(cfg80211_free_nan_func); + /* See IEEE 802.1H for LLC/SNAP encapsulation/decapsulation */ /* Ethernet-II snap header (RFC1042 for most EtherTypes) */ const unsigned char rfc1042_header[] __aligned(2) = diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index 7b97d43b27e1..a220156cf217 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -406,12 +406,16 @@ static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev, if (pairwise && !addr) return -EINVAL; + /* + * In many cases we won't actually need this, but it's better + * to do it first in case the allocation fails. Don't use wext. + */ if (!wdev->wext.keys) { wdev->wext.keys = kzalloc(sizeof(*wdev->wext.keys), GFP_KERNEL); if (!wdev->wext.keys) return -ENOMEM; - for (i = 0; i < 4; i++) + for (i = 0; i < CFG80211_MAX_WEP_KEYS; i++) wdev->wext.keys->params[i].key = wdev->wext.keys->data[i]; } @@ -493,7 +497,13 @@ static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev, if (err) return err; - if (!addr) { + /* + * We only need to store WEP keys, since they're the only keys that + * can be be set before a connection is established and persist after + * disconnecting. + */ + if (!addr && (params->cipher == WLAN_CIPHER_SUITE_WEP40 || + params->cipher == WLAN_CIPHER_SUITE_WEP104)) { wdev->wext.keys->params[idx] = *params; memcpy(wdev->wext.keys->data[idx], params->key, params->key_len); diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c index 88f1f6931ab8..995163830a61 100644 --- a/net/wireless/wext-sme.c +++ b/net/wireless/wext-sme.c @@ -46,7 +46,7 @@ int cfg80211_mgd_wext_connect(struct cfg80211_registered_device *rdev, ck = kmemdup(wdev->wext.keys, sizeof(*ck), GFP_KERNEL); if (!ck) return -ENOMEM; - for (i = 0; i < 4; i++) + for (i = 0; i < CFG80211_MAX_WEP_KEYS; i++) ck->params[i].key = ck->data[i]; } diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c index 9c4fbd8935f4..ba2b539879bc 100644 --- a/net/xfrm/xfrm_proc.c +++ b/net/xfrm/xfrm_proc.c @@ -50,12 +50,18 @@ static const struct snmp_mib xfrm_mib_list[] = { static int xfrm_statistics_seq_show(struct seq_file *seq, void *v) { + unsigned long buff[LINUX_MIB_XFRMMAX]; struct net *net = seq->private; int i; + + memset(buff, 0, sizeof(unsigned long) * LINUX_MIB_XFRMMAX); + + snmp_get_cpu_field_batch(buff, xfrm_mib_list, + net->mib.xfrm_statistics); for (i = 0; xfrm_mib_list[i].name; i++) seq_printf(seq, "%-24s\t%lu\n", xfrm_mib_list[i].name, - snmp_fold_field(net->mib.xfrm_statistics, - xfrm_mib_list[i].entry)); + buff[i]); + return 0; } |