summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/802/fddi.c11
-rw-r--r--net/802/hippi.c14
-rw-r--r--net/8021q/vlan.c7
-rw-r--r--net/8021q/vlan_dev.c3
-rw-r--r--net/9p/trans_rdma.c2
-rw-r--r--net/atm/br2684.c6
-rw-r--r--net/atm/lec.c11
-rw-r--r--net/batman-adv/debugfs.h2
-rw-r--r--net/batman-adv/netlink.c26
-rw-r--r--net/bluetooth/bnep/netdev.c3
-rw-r--r--net/bluetooth/hci_request.c66
-rw-r--r--net/bluetooth/hci_request.h23
-rw-r--r--net/bluetooth/mgmt.c21
-rw-r--r--net/bridge/br.c6
-rw-r--r--net/bridge/br_device.c3
-rw-r--r--net/bridge/br_fdb.c10
-rw-r--r--net/bridge/br_sysfs_if.c1
-rw-r--r--net/ceph/Makefile1
-rw-r--r--net/ceph/auth.c13
-rw-r--r--net/ceph/auth_none.c2
-rw-r--r--net/ceph/ceph_common.c13
-rw-r--r--net/ceph/ceph_strings.c1
-rw-r--r--net/ceph/cls_lock_client.c325
-rw-r--r--net/ceph/crush/mapper.c17
-rw-r--r--net/ceph/mon_client.c82
-rw-r--r--net/ceph/osd_client.c169
-rw-r--r--net/core/datagram.c36
-rw-r--r--net/core/dev.c509
-rw-r--r--net/core/devlink.c30
-rw-r--r--net/core/drop_monitor.c21
-rw-r--r--net/core/filter.c2
-rw-r--r--net/core/lwtunnel.c13
-rw-r--r--net/core/net-sysfs.c14
-rw-r--r--net/core/net_namespace.c37
-rw-r--r--net/core/pktgen.c21
-rw-r--r--net/core/rtnetlink.c5
-rw-r--r--net/core/skbuff.c65
-rw-r--r--net/core/sock.c70
-rw-r--r--net/dsa/slave.c16
-rw-r--r--net/ethernet/eth.c5
-rw-r--r--net/hsr/hsr_device.c1
-rw-r--r--net/hsr/hsr_netlink.c23
-rw-r--r--net/ieee802154/netlink.c24
-rw-r--r--net/ieee802154/nl802154.c44
-rw-r--r--net/ipv4/Kconfig8
-rw-r--r--net/ipv4/Makefile1
-rw-r--r--net/ipv4/fou.c23
-rw-r--r--net/ipv4/inet_diag.c25
-rw-r--r--net/ipv4/ip_tunnel.c6
-rw-r--r--net/ipv4/ipmr.c3
-rw-r--r--net/ipv4/ping.c15
-rw-r--r--net/ipv4/proc.c102
-rw-r--r--net/ipv4/raw.c21
-rw-r--r--net/ipv4/raw_diag.c261
-rw-r--r--net/ipv4/route.c6
-rw-r--r--net/ipv4/tcp.c5
-rw-r--r--net/ipv4/tcp_input.c3
-rw-r--r--net/ipv4/tcp_ipv4.c9
-rw-r--r--net/ipv4/tcp_metrics.c23
-rw-r--r--net/ipv4/tcp_output.c12
-rw-r--r--net/ipv4/udp.c149
-rw-r--r--net/ipv6/addrconf.c98
-rw-r--r--net/ipv6/ila/ila_lwt.c92
-rw-r--r--net/ipv6/ila/ila_xlat.c27
-rw-r--r--net/ipv6/ip6_gre.c1
-rw-r--r--net/ipv6/ip6_tunnel.c4
-rw-r--r--net/ipv6/ip6_vti.c21
-rw-r--r--net/ipv6/ip6mr.c5
-rw-r--r--net/ipv6/proc.c30
-rw-r--r--net/ipv6/raw.c7
-rw-r--r--net/ipv6/route.c4
-rw-r--r--net/ipv6/sit.c14
-rw-r--r--net/ipv6/tcp_ipv6.c20
-rw-r--r--net/ipv6/udp.c34
-rw-r--r--net/irda/irlan/irlan_eth.c4
-rw-r--r--net/irda/irnetlink.c22
-rw-r--r--net/iucv/af_iucv.c34
-rw-r--r--net/kcm/kcmsock.c16
-rw-r--r--net/l2tp/l2tp_eth.c2
-rw-r--r--net/l2tp/l2tp_netlink.c28
-rw-r--r--net/mac80211/cfg.c208
-rw-r--r--net/mac80211/chan.c6
-rw-r--r--net/mac80211/debugfs.c8
-rw-r--r--net/mac80211/debugfs_netdev.c12
-rw-r--r--net/mac80211/driver-ops.c15
-rw-r--r--net/mac80211/driver-ops.h83
-rw-r--r--net/mac80211/ieee80211_i.h26
-rw-r--r--net/mac80211/iface.c43
-rw-r--r--net/mac80211/main.c8
-rw-r--r--net/mac80211/mesh_sync.c12
-rw-r--r--net/mac80211/offchannel.c4
-rw-r--r--net/mac80211/rx.c7
-rw-r--r--net/mac80211/sta_info.c10
-rw-r--r--net/mac80211/trace.h159
-rw-r--r--net/mac80211/tx.c420
-rw-r--r--net/mac80211/util.c61
-rw-r--r--net/mpls/internal.h10
-rw-r--r--net/mpls/mpls_iptunnel.c5
-rw-r--r--net/ncsi/internal.h22
-rw-r--r--net/ncsi/ncsi-aen.c37
-rw-r--r--net/ncsi/ncsi-cmd.c2
-rw-r--r--net/ncsi/ncsi-manage.c198
-rw-r--r--net/ncsi/ncsi-rsp.c4
-rw-r--r--net/netfilter/core.c119
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c23
-rw-r--r--net/netfilter/nf_log.c6
-rw-r--r--net/netfilter/nft_limit.c4
-rw-r--r--net/netfilter/xt_hashlimit.c15
-rw-r--r--net/netlabel/netlabel_calipso.c21
-rw-r--r--net/netlabel/netlabel_cipso_v4.c22
-rw-r--r--net/netlabel/netlabel_mgmt.c21
-rw-r--r--net/netlabel/netlabel_unlabeled.c21
-rw-r--r--net/netlink/af_netlink.c7
-rw-r--r--net/netlink/genetlink.c320
-rw-r--r--net/nfc/netlink.c34
-rw-r--r--net/openvswitch/actions.c48
-rw-r--r--net/openvswitch/datapath.c20
-rw-r--r--net/openvswitch/flow.c13
-rw-r--r--net/openvswitch/vport-internal_dev.c12
-rw-r--r--net/openvswitch/vport-netdev.c1
-rw-r--r--net/openvswitch/vport.c20
-rw-r--r--net/openvswitch/vport.h1
-rw-r--r--net/packet/af_packet.c1
-rw-r--r--net/phonet/pep-gprs.c12
-rw-r--r--net/rds/connection.c15
-rw-r--r--net/rds/ib.c2
-rw-r--r--net/rds/rds.h4
-rw-r--r--net/rds/threads.c3
-rw-r--r--net/rxrpc/af_rxrpc.c21
-rw-r--r--net/rxrpc/ar-internal.h34
-rw-r--r--net/rxrpc/call_accept.c10
-rw-r--r--net/rxrpc/call_event.c154
-rw-r--r--net/rxrpc/call_object.c22
-rw-r--r--net/rxrpc/conn_client.c38
-rw-r--r--net/rxrpc/conn_event.c3
-rw-r--r--net/rxrpc/input.c86
-rw-r--r--net/rxrpc/local_event.c3
-rw-r--r--net/rxrpc/local_object.c3
-rw-r--r--net/rxrpc/misc.c22
-rw-r--r--net/rxrpc/output.c193
-rw-r--r--net/rxrpc/recvmsg.c20
-rw-r--r--net/rxrpc/rxkad.c15
-rw-r--r--net/rxrpc/security.c8
-rw-r--r--net/rxrpc/sendmsg.c22
-rw-r--r--net/rxrpc/skbuff.c11
-rw-r--r--net/rxrpc/sysctl.c8
-rw-r--r--net/sched/act_api.c19
-rw-r--r--net/sched/act_ife.c7
-rw-r--r--net/sched/act_mirred.c84
-rw-r--r--net/sched/act_skbedit.c21
-rw-r--r--net/sched/act_vlan.c9
-rw-r--r--net/sched/cls_api.c18
-rw-r--r--net/sched/em_meta.c9
-rw-r--r--net/sched/sch_htb.c2
-rw-r--r--net/sched/sch_qfq.c3
-rw-r--r--net/sched/sch_sfb.c3
-rw-r--r--net/sched/sch_teql.c5
-rw-r--r--net/sctp/chunk.c31
-rw-r--r--net/sctp/output.c6
-rw-r--r--net/sctp/outqueue.c16
-rw-r--r--net/sctp/proc.c10
-rw-r--r--net/sctp/sctp_diag.c58
-rw-r--r--net/sctp/sm_make_chunk.c15
-rw-r--r--net/sctp/socket.c10
-rw-r--r--net/socket.c61
-rw-r--r--net/strparser/strparser.c2
-rw-r--r--net/sunrpc/auth.c2
-rw-r--r--net/sunrpc/auth_generic.c13
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c7
-rw-r--r--net/sunrpc/auth_gss/gss_rpc_xdr.c2
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c2
-rw-r--r--net/sunrpc/auth_unix.c13
-rw-r--r--net/sunrpc/backchannel_rqst.c8
-rw-r--r--net/sunrpc/cache.c5
-rw-r--r--net/sunrpc/clnt.c132
-rw-r--r--net/sunrpc/rpc_pipe.c2
-rw-r--r--net/sunrpc/sched.c35
-rw-r--r--net/sunrpc/svc.c17
-rw-r--r--net/sunrpc/svcauth_unix.c6
-rw-r--r--net/sunrpc/svcsock.c20
-rw-r--r--net/sunrpc/xdr.c11
-rw-r--r--net/sunrpc/xprt.c2
-rw-r--r--net/sunrpc/xprtmultipath.c24
-rw-r--r--net/sunrpc/xprtrdma/backchannel.c53
-rw-r--r--net/sunrpc/xprtrdma/fmr_ops.c7
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c28
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c323
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_backchannel.c21
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c2
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c82
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c62
-rw-r--r--net/sunrpc/xprtrdma/transport.c202
-rw-r--r--net/sunrpc/xprtrdma/verbs.c239
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h108
-rw-r--r--net/sunrpc/xprtsock.c36
-rw-r--r--net/sysctl_net.c6
-rw-r--r--net/tipc/netlink.c27
-rw-r--r--net/tipc/netlink_compat.c25
-rw-r--r--net/tipc/udp_media.c2
-rw-r--r--net/unix/af_unix.c17
-rw-r--r--net/vmw_vsock/af_vsock.c6
-rw-r--r--net/wimax/stack.c22
-rw-r--r--net/wireless/chan.c2
-rw-r--r--net/wireless/core.c35
-rw-r--r--net/wireless/core.h7
-rw-r--r--net/wireless/ibss.c5
-rw-r--r--net/wireless/mlme.c1
-rw-r--r--net/wireless/nl80211.c1263
-rw-r--r--net/wireless/rdev-ops.h58
-rw-r--r--net/wireless/sme.c6
-rw-r--r--net/wireless/trace.h90
-rw-r--r--net/wireless/util.c30
-rw-r--r--net/wireless/wext-compat.c14
-rw-r--r--net/wireless/wext-sme.c2
-rw-r--r--net/xfrm/xfrm_proc.c10
215 files changed, 6276 insertions, 2912 deletions
diff --git a/net/802/fddi.c b/net/802/fddi.c
index 7d3a0af954e8..6356623fc238 100644
--- a/net/802/fddi.c
+++ b/net/802/fddi.c
@@ -141,15 +141,6 @@ __be16 fddi_type_trans(struct sk_buff *skb, struct net_device *dev)
EXPORT_SYMBOL(fddi_type_trans);
-int fddi_change_mtu(struct net_device *dev, int new_mtu)
-{
- if ((new_mtu < FDDI_K_SNAP_HLEN) || (new_mtu > FDDI_K_SNAP_DLEN))
- return -EINVAL;
- dev->mtu = new_mtu;
- return 0;
-}
-EXPORT_SYMBOL(fddi_change_mtu);
-
static const struct header_ops fddi_header_ops = {
.create = fddi_header,
};
@@ -161,6 +152,8 @@ static void fddi_setup(struct net_device *dev)
dev->type = ARPHRD_FDDI;
dev->hard_header_len = FDDI_K_SNAP_HLEN+3; /* Assume 802.2 SNAP hdr len + 3 pad bytes */
dev->mtu = FDDI_K_SNAP_DLEN; /* Assume max payload of 802.2 SNAP frame */
+ dev->min_mtu = FDDI_K_SNAP_HLEN;
+ dev->max_mtu = FDDI_K_SNAP_DLEN;
dev->addr_len = FDDI_K_ALEN;
dev->tx_queue_len = 100; /* Long queues on FDDI */
dev->flags = IFF_BROADCAST | IFF_MULTICAST;
diff --git a/net/802/hippi.c b/net/802/hippi.c
index ade1a52cdcff..5e4427beab2b 100644
--- a/net/802/hippi.c
+++ b/net/802/hippi.c
@@ -116,18 +116,6 @@ __be16 hippi_type_trans(struct sk_buff *skb, struct net_device *dev)
EXPORT_SYMBOL(hippi_type_trans);
-int hippi_change_mtu(struct net_device *dev, int new_mtu)
-{
- /*
- * HIPPI's got these nice large MTUs.
- */
- if ((new_mtu < 68) || (new_mtu > 65280))
- return -EINVAL;
- dev->mtu = new_mtu;
- return 0;
-}
-EXPORT_SYMBOL(hippi_change_mtu);
-
/*
* For HIPPI we will actually use the lower 4 bytes of the hardware
* address as the I-FIELD rather than the actual hardware address.
@@ -174,6 +162,8 @@ static void hippi_setup(struct net_device *dev)
dev->type = ARPHRD_HIPPI;
dev->hard_header_len = HIPPI_HLEN;
dev->mtu = 65280;
+ dev->min_mtu = 68;
+ dev->max_mtu = 65280;
dev->addr_len = HIPPI_ALEN;
dev->tx_queue_len = 25 /* 5 */;
memset(dev->broadcast, 0xFF, HIPPI_ALEN);
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 8de138d3306b..9accde339601 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -515,8 +515,8 @@ static int vlan_ioctl_handler(struct net *net, void __user *arg)
return -EFAULT;
/* Null terminate this sucker, just in case. */
- args.device1[23] = 0;
- args.u.device2[23] = 0;
+ args.device1[sizeof(args.device1) - 1] = 0;
+ args.u.device2[sizeof(args.u.device2) - 1] = 0;
rtnl_lock();
@@ -571,8 +571,7 @@ static int vlan_ioctl_handler(struct net *net, void __user *arg)
err = -EPERM;
if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
break;
- if ((args.u.name_type >= 0) &&
- (args.u.name_type < VLAN_NAME_TYPE_HIGHEST)) {
+ if (args.u.name_type < VLAN_NAME_TYPE_HIGHEST) {
struct vlan_net *vn;
vn = net_generic(net, vlan_net_id);
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index fbfacd51aa34..10da6c588bf8 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -826,5 +826,8 @@ void vlan_setup(struct net_device *dev)
dev->destructor = vlan_dev_free;
dev->ethtool_ops = &vlan_ethtool_ops;
+ dev->min_mtu = 0;
+ dev->max_mtu = ETH_MAX_MTU;
+
eth_zero_addr(dev->broadcast);
}
diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
index 1852e383afd6..553ed4ecb6a0 100644
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c
@@ -680,7 +680,7 @@ rdma_create_trans(struct p9_client *client, const char *addr, char *args)
goto error;
/* Create the Protection Domain */
- rdma->pd = ib_alloc_pd(rdma->cm_id->device);
+ rdma->pd = ib_alloc_pd(rdma->cm_id->device, 0);
if (IS_ERR(rdma->pd))
goto error;
diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index aa0047c5c467..fca84e111c89 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -620,14 +620,12 @@ error:
static const struct net_device_ops br2684_netdev_ops = {
.ndo_start_xmit = br2684_start_xmit,
.ndo_set_mac_address = br2684_mac_addr,
- .ndo_change_mtu = eth_change_mtu,
.ndo_validate_addr = eth_validate_addr,
};
static const struct net_device_ops br2684_netdev_ops_routed = {
.ndo_start_xmit = br2684_start_xmit,
.ndo_set_mac_address = br2684_mac_addr,
- .ndo_change_mtu = eth_change_mtu
};
static void br2684_setup(struct net_device *netdev)
@@ -651,7 +649,9 @@ static void br2684_setup_routed(struct net_device *netdev)
netdev->hard_header_len = sizeof(llc_oui_ipv4); /* worst case */
netdev->netdev_ops = &br2684_netdev_ops_routed;
netdev->addr_len = 0;
- netdev->mtu = 1500;
+ netdev->mtu = ETH_DATA_LEN;
+ netdev->min_mtu = 0;
+ netdev->max_mtu = ETH_MAX_MTU;
netdev->type = ARPHRD_PPP;
netdev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST;
netdev->tx_queue_len = 100;
diff --git a/net/atm/lec.c b/net/atm/lec.c
index 5d2693826afb..779b3fa6052d 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -544,15 +544,6 @@ send_to_lecd(struct lec_priv *priv, atmlec_msg_type type,
return 0;
}
-/* shamelessly stolen from drivers/net/net_init.c */
-static int lec_change_mtu(struct net_device *dev, int new_mtu)
-{
- if ((new_mtu < 68) || (new_mtu > 18190))
- return -EINVAL;
- dev->mtu = new_mtu;
- return 0;
-}
-
static void lec_set_multicast_list(struct net_device *dev)
{
/*
@@ -565,7 +556,6 @@ static const struct net_device_ops lec_netdev_ops = {
.ndo_open = lec_open,
.ndo_stop = lec_close,
.ndo_start_xmit = lec_start_xmit,
- .ndo_change_mtu = lec_change_mtu,
.ndo_tx_timeout = lec_tx_timeout,
.ndo_set_rx_mode = lec_set_multicast_list,
};
@@ -742,6 +732,7 @@ static int lecd_attach(struct atm_vcc *vcc, int arg)
if (!dev_lec[i])
return -ENOMEM;
dev_lec[i]->netdev_ops = &lec_netdev_ops;
+ dev_lec[i]->max_mtu = 18190;
snprintf(dev_lec[i]->name, IFNAMSIZ, "lec%d", i);
if (register_netdev(dev_lec[i])) {
free_netdev(dev_lec[i]);
diff --git a/net/batman-adv/debugfs.h b/net/batman-adv/debugfs.h
index c68ff3dcb926..e49121ee55f6 100644
--- a/net/batman-adv/debugfs.h
+++ b/net/batman-adv/debugfs.h
@@ -20,8 +20,6 @@
#include "main.h"
-#include <linux/kconfig.h>
-
struct net_device;
#define BATADV_DEBUGFS_SUBDIR "batman_adv"
diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
index 64cb6acbe0a6..005012ba9b48 100644
--- a/net/batman-adv/netlink.c
+++ b/net/batman-adv/netlink.c
@@ -48,14 +48,7 @@
#include "tp_meter.h"
#include "translation-table.h"
-struct genl_family batadv_netlink_family = {
- .id = GENL_ID_GENERATE,
- .hdrsize = 0,
- .name = BATADV_NL_NAME,
- .version = 1,
- .maxattr = BATADV_ATTR_MAX,
- .netnsok = true,
-};
+struct genl_family batadv_netlink_family;
/* multicast groups */
enum batadv_netlink_multicast_groups {
@@ -610,6 +603,19 @@ static struct genl_ops batadv_netlink_ops[] = {
};
+struct genl_family batadv_netlink_family __ro_after_init = {
+ .hdrsize = 0,
+ .name = BATADV_NL_NAME,
+ .version = 1,
+ .maxattr = BATADV_ATTR_MAX,
+ .netnsok = true,
+ .module = THIS_MODULE,
+ .ops = batadv_netlink_ops,
+ .n_ops = ARRAY_SIZE(batadv_netlink_ops),
+ .mcgrps = batadv_netlink_mcgrps,
+ .n_mcgrps = ARRAY_SIZE(batadv_netlink_mcgrps),
+};
+
/**
* batadv_netlink_register - register batadv genl netlink family
*/
@@ -617,9 +623,7 @@ void __init batadv_netlink_register(void)
{
int ret;
- ret = genl_register_family_with_ops_groups(&batadv_netlink_family,
- batadv_netlink_ops,
- batadv_netlink_mcgrps);
+ ret = genl_register_family(&batadv_netlink_family);
if (ret)
pr_warn("unable to register netlink family");
}
diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c
index f4fcb4a9d5c1..2b875edf77e1 100644
--- a/net/bluetooth/bnep/netdev.c
+++ b/net/bluetooth/bnep/netdev.c
@@ -211,7 +211,6 @@ static const struct net_device_ops bnep_netdev_ops = {
.ndo_set_rx_mode = bnep_net_set_mc_list,
.ndo_set_mac_address = bnep_net_set_mac_addr,
.ndo_tx_timeout = bnep_net_timeout,
- .ndo_change_mtu = eth_change_mtu,
};
@@ -222,6 +221,8 @@ void bnep_net_setup(struct net_device *dev)
dev->addr_len = ETH_ALEN;
ether_setup(dev);
+ dev->min_mtu = 0;
+ dev->max_mtu = ETH_MAX_MTU;
dev->priv_flags &= ~IFF_TX_SKB_SHARING;
dev->netdev_ops = &bnep_netdev_ops;
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index c8135680c43e..e2288421fe6b 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -21,8 +21,6 @@
SOFTWARE IS DISCLAIMED.
*/
-#include <asm/unaligned.h>
-
#include <net/bluetooth/bluetooth.h>
#include <net/bluetooth/hci_core.h>
#include <net/bluetooth/mgmt.h>
@@ -973,33 +971,58 @@ void __hci_req_enable_advertising(struct hci_request *req)
static u8 append_local_name(struct hci_dev *hdev, u8 *ptr, u8 ad_len)
{
- size_t name_len;
+ size_t complete_len;
+ size_t short_len;
int max_len;
max_len = HCI_MAX_AD_LENGTH - ad_len - 2;
- name_len = strlen(hdev->dev_name);
- if (name_len > 0 && max_len > 0) {
-
- if (name_len > max_len) {
- name_len = max_len;
- ptr[1] = EIR_NAME_SHORT;
- } else
- ptr[1] = EIR_NAME_COMPLETE;
-
- ptr[0] = name_len + 1;
+ complete_len = strlen(hdev->dev_name);
+ short_len = strlen(hdev->short_name);
+
+ /* no space left for name */
+ if (max_len < 1)
+ return ad_len;
+
+ /* no name set */
+ if (!complete_len)
+ return ad_len;
+
+ /* complete name fits and is eq to max short name len or smaller */
+ if (complete_len <= max_len &&
+ complete_len <= HCI_MAX_SHORT_NAME_LENGTH) {
+ return eir_append_data(ptr, ad_len, EIR_NAME_COMPLETE,
+ hdev->dev_name, complete_len);
+ }
- memcpy(ptr + 2, hdev->dev_name, name_len);
+ /* short name set and fits */
+ if (short_len && short_len <= max_len) {
+ return eir_append_data(ptr, ad_len, EIR_NAME_SHORT,
+ hdev->short_name, short_len);
+ }
- ad_len += (name_len + 2);
- ptr += (name_len + 2);
+ /* no short name set so shorten complete name */
+ if (!short_len) {
+ return eir_append_data(ptr, ad_len, EIR_NAME_SHORT,
+ hdev->dev_name, max_len);
}
return ad_len;
}
+static u8 append_appearance(struct hci_dev *hdev, u8 *ptr, u8 ad_len)
+{
+ return eir_append_le16(ptr, ad_len, EIR_APPEARANCE, hdev->appearance);
+}
+
static u8 create_default_scan_rsp_data(struct hci_dev *hdev, u8 *ptr)
{
- return append_local_name(hdev, ptr, 0);
+ u8 scan_rsp_len = 0;
+
+ if (hdev->appearance) {
+ scan_rsp_len = append_appearance(hdev, ptr, scan_rsp_len);
+ }
+
+ return append_local_name(hdev, ptr, scan_rsp_len);
}
static u8 create_instance_scan_rsp_data(struct hci_dev *hdev, u8 instance,
@@ -1016,18 +1039,13 @@ static u8 create_instance_scan_rsp_data(struct hci_dev *hdev, u8 instance,
instance_flags = adv_instance->flags;
if ((instance_flags & MGMT_ADV_FLAG_APPEARANCE) && hdev->appearance) {
- ptr[0] = 3;
- ptr[1] = EIR_APPEARANCE;
- put_unaligned_le16(hdev->appearance, ptr + 2);
- scan_rsp_len += 4;
- ptr += 4;
+ scan_rsp_len = append_appearance(hdev, ptr, scan_rsp_len);
}
- memcpy(ptr, adv_instance->scan_rsp_data,
+ memcpy(&ptr[scan_rsp_len], adv_instance->scan_rsp_data,
adv_instance->scan_rsp_len);
scan_rsp_len += adv_instance->scan_rsp_len;
- ptr += adv_instance->scan_rsp_len;
if (instance_flags & MGMT_ADV_FLAG_LOCAL_NAME)
scan_rsp_len = append_local_name(hdev, ptr, scan_rsp_len);
diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h
index ac1e11006f38..6b06629245a8 100644
--- a/net/bluetooth/hci_request.h
+++ b/net/bluetooth/hci_request.h
@@ -20,6 +20,8 @@
SOFTWARE IS DISCLAIMED.
*/
+#include <asm/unaligned.h>
+
#define hci_req_sync_lock(hdev) mutex_lock(&hdev->req_lock)
#define hci_req_sync_unlock(hdev) mutex_unlock(&hdev->req_lock)
@@ -103,3 +105,24 @@ static inline void hci_update_background_scan(struct hci_dev *hdev)
void hci_request_setup(struct hci_dev *hdev);
void hci_request_cancel_all(struct hci_dev *hdev);
+
+static inline u16 eir_append_data(u8 *eir, u16 eir_len, u8 type,
+ u8 *data, u8 data_len)
+{
+ eir[eir_len++] = sizeof(type) + data_len;
+ eir[eir_len++] = type;
+ memcpy(&eir[eir_len], data, data_len);
+ eir_len += data_len;
+
+ return eir_len;
+}
+
+static inline u16 eir_append_le16(u8 *eir, u16 eir_len, u8 type, u16 data)
+{
+ eir[eir_len++] = sizeof(type) + sizeof(data);
+ eir[eir_len++] = type;
+ put_unaligned_le16(data, &eir[eir_len]);
+ eir_len += sizeof(data);
+
+ return eir_len;
+}
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 19b8a5e9420d..736038085feb 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -867,27 +867,6 @@ static int read_controller_info(struct sock *sk, struct hci_dev *hdev,
sizeof(rp));
}
-static inline u16 eir_append_data(u8 *eir, u16 eir_len, u8 type, u8 *data,
- u8 data_len)
-{
- eir[eir_len++] = sizeof(type) + data_len;
- eir[eir_len++] = type;
- memcpy(&eir[eir_len], data, data_len);
- eir_len += data_len;
-
- return eir_len;
-}
-
-static inline u16 eir_append_le16(u8 *eir, u16 eir_len, u8 type, u16 data)
-{
- eir[eir_len++] = sizeof(type) + sizeof(data);
- eir[eir_len++] = type;
- put_unaligned_le16(data, &eir[eir_len]);
- eir_len += sizeof(data);
-
- return eir_len;
-}
-
static u16 append_eir_data_to_buf(struct hci_dev *hdev, u8 *eir)
{
u16 eir_len = 0;
diff --git a/net/bridge/br.c b/net/bridge/br.c
index 3addc05b9a16..889e5640455f 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -227,9 +227,11 @@ static int __init br_init(void)
br_fdb_test_addr_hook = br_fdb_test_addr;
#endif
- pr_info("bridge: automatic filtering via arp/ip/ip6tables has been "
- "deprecated. Update your scripts to load br_netfilter if you "
+#if IS_MODULE(CONFIG_BRIDGE_NETFILTER)
+ pr_info("bridge: filtering via arp/ip/ip6tables is no longer available "
+ "by default. Update your scripts to load br_netfilter if you "
"need this.\n");
+#endif
return 0;
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 89a687f3c0a3..c08e02b67818 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -185,7 +185,7 @@ static struct rtnl_link_stats64 *br_get_stats64(struct net_device *dev,
static int br_change_mtu(struct net_device *dev, int new_mtu)
{
struct net_bridge *br = netdev_priv(dev);
- if (new_mtu < 68 || new_mtu > br_min_mtu(br))
+ if (new_mtu > br_min_mtu(br))
return -EINVAL;
dev->mtu = new_mtu;
@@ -410,6 +410,7 @@ void br_dev_setup(struct net_device *dev)
br->bridge_hello_time = br->hello_time = 2 * HZ;
br->bridge_forward_delay = br->forward_delay = 15 * HZ;
br->ageing_time = BR_DEFAULT_AGEING_TIME;
+ dev->max_mtu = ETH_MAX_MTU;
br_netfilter_rtable_init(br);
br_stp_timer_init(br);
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 6b43c8c88f19..e4a4176171c9 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -535,9 +535,8 @@ static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
*/
if (fdb->is_local)
return 0;
- br_warn(br, "adding interface %s with same address "
- "as a received packet\n",
- source ? source->dev->name : br->dev->name);
+ br_warn(br, "adding interface %s with same address as a received packet (addr:%pM, vlan:%u)\n",
+ source ? source->dev->name : br->dev->name, addr, vid);
fdb_delete(br, fdb);
}
@@ -583,9 +582,8 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
/* attempt to update an entry for a local interface */
if (unlikely(fdb->is_local)) {
if (net_ratelimit())
- br_warn(br, "received packet on %s with "
- "own address as source address\n",
- source->dev->name);
+ br_warn(br, "received packet on %s with own address as source address (addr:%pM, vlan:%u)\n",
+ source->dev->name, addr, vid);
} else {
/* fastpath: update of existing entry */
if (unlikely(source != fdb->dst)) {
diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
index e657258e1f2c..8bd569695e76 100644
--- a/net/bridge/br_sysfs_if.c
+++ b/net/bridge/br_sysfs_if.c
@@ -217,6 +217,7 @@ static const struct brport_attribute *brport_attrs[] = {
#endif
&brport_attr_proxyarp,
&brport_attr_proxyarp_wifi,
+ &brport_attr_multicast_flood,
NULL
};
diff --git a/net/ceph/Makefile b/net/ceph/Makefile
index 84cbed630c4b..6a5180903e7b 100644
--- a/net/ceph/Makefile
+++ b/net/ceph/Makefile
@@ -5,6 +5,7 @@ obj-$(CONFIG_CEPH_LIB) += libceph.o
libceph-y := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \
mon_client.o \
+ cls_lock_client.o \
osd_client.o osdmap.o crush/crush.o crush/mapper.o crush/hash.o \
debugfs.o \
auth.o auth_none.o \
diff --git a/net/ceph/auth.c b/net/ceph/auth.c
index 2bc5965fdd1e..c822b3ae1bd3 100644
--- a/net/ceph/auth.c
+++ b/net/ceph/auth.c
@@ -82,7 +82,10 @@ void ceph_auth_reset(struct ceph_auth_client *ac)
mutex_unlock(&ac->mutex);
}
-int ceph_entity_name_encode(const char *name, void **p, void *end)
+/*
+ * EntityName, not to be confused with entity_name_t
+ */
+int ceph_auth_entity_name_encode(const char *name, void **p, void *end)
{
int len = strlen(name);
@@ -111,7 +114,7 @@ int ceph_auth_build_hello(struct ceph_auth_client *ac, void *buf, size_t len)
monhdr->session_mon = cpu_to_le16(-1);
monhdr->session_mon_tid = 0;
- ceph_encode_32(&p, 0); /* no protocol, yet */
+ ceph_encode_32(&p, CEPH_AUTH_UNKNOWN); /* no protocol, yet */
lenp = p;
p += sizeof(u32);
@@ -124,7 +127,7 @@ int ceph_auth_build_hello(struct ceph_auth_client *ac, void *buf, size_t len)
for (i = 0; i < num; i++)
ceph_encode_32(&p, supported_protocols[i]);
- ret = ceph_entity_name_encode(ac->name, &p, end);
+ ret = ceph_auth_entity_name_encode(ac->name, &p, end);
if (ret < 0)
goto out;
ceph_decode_need(&p, end, sizeof(u64), bad);
@@ -259,9 +262,7 @@ int ceph_build_auth(struct ceph_auth_client *ac,
int ret = 0;
mutex_lock(&ac->mutex);
- if (!ac->protocol)
- ret = ceph_auth_build_hello(ac, msg_buf, msg_len);
- else if (ac->ops->should_authenticate(ac))
+ if (ac->ops->should_authenticate(ac))
ret = ceph_build_auth_request(ac, msg_buf, msg_len);
mutex_unlock(&ac->mutex);
return ret;
diff --git a/net/ceph/auth_none.c b/net/ceph/auth_none.c
index 5f836f02ae36..df45e467c81f 100644
--- a/net/ceph/auth_none.c
+++ b/net/ceph/auth_none.c
@@ -46,7 +46,7 @@ static int ceph_auth_none_build_authorizer(struct ceph_auth_client *ac,
int ret;
ceph_encode_8_safe(&p, end, 1, e_range);
- ret = ceph_entity_name_encode(ac->name, &p, end);
+ ret = ceph_auth_entity_name_encode(ac->name, &p, end);
if (ret < 0)
return ret;
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index bddfcf6f09c2..464e88599b9d 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -566,11 +566,17 @@ int ceph_print_client_options(struct seq_file *m, struct ceph_client *client)
}
EXPORT_SYMBOL(ceph_print_client_options);
-u64 ceph_client_id(struct ceph_client *client)
+struct ceph_entity_addr *ceph_client_addr(struct ceph_client *client)
+{
+ return &client->msgr.inst.addr;
+}
+EXPORT_SYMBOL(ceph_client_addr);
+
+u64 ceph_client_gid(struct ceph_client *client)
{
return client->monc.auth->global_id;
}
-EXPORT_SYMBOL(ceph_client_id);
+EXPORT_SYMBOL(ceph_client_gid);
/*
* create a fresh client instance
@@ -685,7 +691,8 @@ int __ceph_open_session(struct ceph_client *client, unsigned long started)
return client->auth_err;
}
- pr_info("client%llu fsid %pU\n", ceph_client_id(client), &client->fsid);
+ pr_info("client%llu fsid %pU\n", ceph_client_gid(client),
+ &client->fsid);
ceph_debugfs_client_init(client);
return 0;
diff --git a/net/ceph/ceph_strings.c b/net/ceph/ceph_strings.c
index 3773a4fa11e3..19b7d8aa915c 100644
--- a/net/ceph/ceph_strings.c
+++ b/net/ceph/ceph_strings.c
@@ -15,6 +15,7 @@ const char *ceph_entity_type_name(int type)
default: return "unknown";
}
}
+EXPORT_SYMBOL(ceph_entity_type_name);
const char *ceph_osd_op_name(int op)
{
diff --git a/net/ceph/cls_lock_client.c b/net/ceph/cls_lock_client.c
new file mode 100644
index 000000000000..50f040fdb2a9
--- /dev/null
+++ b/net/ceph/cls_lock_client.c
@@ -0,0 +1,325 @@
+#include <linux/ceph/ceph_debug.h>
+
+#include <linux/types.h>
+#include <linux/slab.h>
+
+#include <linux/ceph/cls_lock_client.h>
+#include <linux/ceph/decode.h>
+
+/**
+ * ceph_cls_lock - grab rados lock for object
+ * @oid, @oloc: object to lock
+ * @lock_name: the name of the lock
+ * @type: lock type (CEPH_CLS_LOCK_EXCLUSIVE or CEPH_CLS_LOCK_SHARED)
+ * @cookie: user-defined identifier for this instance of the lock
+ * @tag: user-defined tag
+ * @desc: user-defined lock description
+ * @flags: lock flags
+ *
+ * All operations on the same lock should use the same tag.
+ */
+int ceph_cls_lock(struct ceph_osd_client *osdc,
+ struct ceph_object_id *oid,
+ struct ceph_object_locator *oloc,
+ char *lock_name, u8 type, char *cookie,
+ char *tag, char *desc, u8 flags)
+{
+ int lock_op_buf_size;
+ int name_len = strlen(lock_name);
+ int cookie_len = strlen(cookie);
+ int tag_len = strlen(tag);
+ int desc_len = strlen(desc);
+ void *p, *end;
+ struct page *lock_op_page;
+ struct timespec mtime;
+ int ret;
+
+ lock_op_buf_size = name_len + sizeof(__le32) +
+ cookie_len + sizeof(__le32) +
+ tag_len + sizeof(__le32) +
+ desc_len + sizeof(__le32) +
+ sizeof(struct ceph_timespec) +
+ /* flag and type */
+ sizeof(u8) + sizeof(u8) +
+ CEPH_ENCODING_START_BLK_LEN;
+ if (lock_op_buf_size > PAGE_SIZE)
+ return -E2BIG;
+
+ lock_op_page = alloc_page(GFP_NOIO);
+ if (!lock_op_page)
+ return -ENOMEM;
+
+ p = page_address(lock_op_page);
+ end = p + lock_op_buf_size;
+
+ /* encode cls_lock_lock_op struct */
+ ceph_start_encoding(&p, 1, 1,
+ lock_op_buf_size - CEPH_ENCODING_START_BLK_LEN);
+ ceph_encode_string(&p, end, lock_name, name_len);
+ ceph_encode_8(&p, type);
+ ceph_encode_string(&p, end, cookie, cookie_len);
+ ceph_encode_string(&p, end, tag, tag_len);
+ ceph_encode_string(&p, end, desc, desc_len);
+ /* only support infinite duration */
+ memset(&mtime, 0, sizeof(mtime));
+ ceph_encode_timespec(p, &mtime);
+ p += sizeof(struct ceph_timespec);
+ ceph_encode_8(&p, flags);
+
+ dout("%s lock_name %s type %d cookie %s tag %s desc %s flags 0x%x\n",
+ __func__, lock_name, type, cookie, tag, desc, flags);
+ ret = ceph_osdc_call(osdc, oid, oloc, "lock", "lock",
+ CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
+ lock_op_page, lock_op_buf_size, NULL, NULL);
+
+ dout("%s: status %d\n", __func__, ret);
+ __free_page(lock_op_page);
+ return ret;
+}
+EXPORT_SYMBOL(ceph_cls_lock);
+
+/**
+ * ceph_cls_unlock - release rados lock for object
+ * @oid, @oloc: object to lock
+ * @lock_name: the name of the lock
+ * @cookie: user-defined identifier for this instance of the lock
+ */
+int ceph_cls_unlock(struct ceph_osd_client *osdc,
+ struct ceph_object_id *oid,
+ struct ceph_object_locator *oloc,
+ char *lock_name, char *cookie)
+{
+ int unlock_op_buf_size;
+ int name_len = strlen(lock_name);
+ int cookie_len = strlen(cookie);
+ void *p, *end;
+ struct page *unlock_op_page;
+ int ret;
+
+ unlock_op_buf_size = name_len + sizeof(__le32) +
+ cookie_len + sizeof(__le32) +
+ CEPH_ENCODING_START_BLK_LEN;
+ if (unlock_op_buf_size > PAGE_SIZE)
+ return -E2BIG;
+
+ unlock_op_page = alloc_page(GFP_NOIO);
+ if (!unlock_op_page)
+ return -ENOMEM;
+
+ p = page_address(unlock_op_page);
+ end = p + unlock_op_buf_size;
+
+ /* encode cls_lock_unlock_op struct */
+ ceph_start_encoding(&p, 1, 1,
+ unlock_op_buf_size - CEPH_ENCODING_START_BLK_LEN);
+ ceph_encode_string(&p, end, lock_name, name_len);
+ ceph_encode_string(&p, end, cookie, cookie_len);
+
+ dout("%s lock_name %s cookie %s\n", __func__, lock_name, cookie);
+ ret = ceph_osdc_call(osdc, oid, oloc, "lock", "unlock",
+ CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
+ unlock_op_page, unlock_op_buf_size, NULL, NULL);
+
+ dout("%s: status %d\n", __func__, ret);
+ __free_page(unlock_op_page);
+ return ret;
+}
+EXPORT_SYMBOL(ceph_cls_unlock);
+
+/**
+ * ceph_cls_break_lock - release rados lock for object for specified client
+ * @oid, @oloc: object to lock
+ * @lock_name: the name of the lock
+ * @cookie: user-defined identifier for this instance of the lock
+ * @locker: current lock owner
+ */
+int ceph_cls_break_lock(struct ceph_osd_client *osdc,
+ struct ceph_object_id *oid,
+ struct ceph_object_locator *oloc,
+ char *lock_name, char *cookie,
+ struct ceph_entity_name *locker)
+{
+ int break_op_buf_size;
+ int name_len = strlen(lock_name);
+ int cookie_len = strlen(cookie);
+ struct page *break_op_page;
+ void *p, *end;
+ int ret;
+
+ break_op_buf_size = name_len + sizeof(__le32) +
+ cookie_len + sizeof(__le32) +
+ sizeof(u8) + sizeof(__le64) +
+ CEPH_ENCODING_START_BLK_LEN;
+ if (break_op_buf_size > PAGE_SIZE)
+ return -E2BIG;
+
+ break_op_page = alloc_page(GFP_NOIO);
+ if (!break_op_page)
+ return -ENOMEM;
+
+ p = page_address(break_op_page);
+ end = p + break_op_buf_size;
+
+ /* encode cls_lock_break_op struct */
+ ceph_start_encoding(&p, 1, 1,
+ break_op_buf_size - CEPH_ENCODING_START_BLK_LEN);
+ ceph_encode_string(&p, end, lock_name, name_len);
+ ceph_encode_copy(&p, locker, sizeof(*locker));
+ ceph_encode_string(&p, end, cookie, cookie_len);
+
+ dout("%s lock_name %s cookie %s locker %s%llu\n", __func__, lock_name,
+ cookie, ENTITY_NAME(*locker));
+ ret = ceph_osdc_call(osdc, oid, oloc, "lock", "break_lock",
+ CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
+ break_op_page, break_op_buf_size, NULL, NULL);
+
+ dout("%s: status %d\n", __func__, ret);
+ __free_page(break_op_page);
+ return ret;
+}
+EXPORT_SYMBOL(ceph_cls_break_lock);
+
+void ceph_free_lockers(struct ceph_locker *lockers, u32 num_lockers)
+{
+ int i;
+
+ for (i = 0; i < num_lockers; i++)
+ kfree(lockers[i].id.cookie);
+ kfree(lockers);
+}
+EXPORT_SYMBOL(ceph_free_lockers);
+
+static int decode_locker(void **p, void *end, struct ceph_locker *locker)
+{
+ u8 struct_v;
+ u32 len;
+ char *s;
+ int ret;
+
+ ret = ceph_start_decoding(p, end, 1, "locker_id_t", &struct_v, &len);
+ if (ret)
+ return ret;
+
+ ceph_decode_copy(p, &locker->id.name, sizeof(locker->id.name));
+ s = ceph_extract_encoded_string(p, end, NULL, GFP_NOIO);
+ if (IS_ERR(s))
+ return PTR_ERR(s);
+
+ locker->id.cookie = s;
+
+ ret = ceph_start_decoding(p, end, 1, "locker_info_t", &struct_v, &len);
+ if (ret)
+ return ret;
+
+ *p += sizeof(struct ceph_timespec); /* skip expiration */
+ ceph_decode_copy(p, &locker->info.addr, sizeof(locker->info.addr));
+ ceph_decode_addr(&locker->info.addr);
+ len = ceph_decode_32(p);
+ *p += len; /* skip description */
+
+ dout("%s %s%llu cookie %s addr %s\n", __func__,
+ ENTITY_NAME(locker->id.name), locker->id.cookie,
+ ceph_pr_addr(&locker->info.addr.in_addr));
+ return 0;
+}
+
+static int decode_lockers(void **p, void *end, u8 *type, char **tag,
+ struct ceph_locker **lockers, u32 *num_lockers)
+{
+ u8 struct_v;
+ u32 struct_len;
+ char *s;
+ int i;
+ int ret;
+
+ ret = ceph_start_decoding(p, end, 1, "cls_lock_get_info_reply",
+ &struct_v, &struct_len);
+ if (ret)
+ return ret;
+
+ *num_lockers = ceph_decode_32(p);
+ *lockers = kcalloc(*num_lockers, sizeof(**lockers), GFP_NOIO);
+ if (!*lockers)
+ return -ENOMEM;
+
+ for (i = 0; i < *num_lockers; i++) {
+ ret = decode_locker(p, end, *lockers + i);
+ if (ret)
+ goto err_free_lockers;
+ }
+
+ *type = ceph_decode_8(p);
+ s = ceph_extract_encoded_string(p, end, NULL, GFP_NOIO);
+ if (IS_ERR(s)) {
+ ret = PTR_ERR(s);
+ goto err_free_lockers;
+ }
+
+ *tag = s;
+ return 0;
+
+err_free_lockers:
+ ceph_free_lockers(*lockers, *num_lockers);
+ return ret;
+}
+
+/*
+ * On success, the caller is responsible for:
+ *
+ * kfree(tag);
+ * ceph_free_lockers(lockers, num_lockers);
+ */
+int ceph_cls_lock_info(struct ceph_osd_client *osdc,
+ struct ceph_object_id *oid,
+ struct ceph_object_locator *oloc,
+ char *lock_name, u8 *type, char **tag,
+ struct ceph_locker **lockers, u32 *num_lockers)
+{
+ int get_info_op_buf_size;
+ int name_len = strlen(lock_name);
+ struct page *get_info_op_page, *reply_page;
+ size_t reply_len;
+ void *p, *end;
+ int ret;
+
+ get_info_op_buf_size = name_len + sizeof(__le32) +
+ CEPH_ENCODING_START_BLK_LEN;
+ if (get_info_op_buf_size > PAGE_SIZE)
+ return -E2BIG;
+
+ get_info_op_page = alloc_page(GFP_NOIO);
+ if (!get_info_op_page)
+ return -ENOMEM;
+
+ reply_page = alloc_page(GFP_NOIO);
+ if (!reply_page) {
+ __free_page(get_info_op_page);
+ return -ENOMEM;
+ }
+
+ p = page_address(get_info_op_page);
+ end = p + get_info_op_buf_size;
+
+ /* encode cls_lock_get_info_op struct */
+ ceph_start_encoding(&p, 1, 1,
+ get_info_op_buf_size - CEPH_ENCODING_START_BLK_LEN);
+ ceph_encode_string(&p, end, lock_name, name_len);
+
+ dout("%s lock_name %s\n", __func__, lock_name);
+ ret = ceph_osdc_call(osdc, oid, oloc, "lock", "get_info",
+ CEPH_OSD_FLAG_READ, get_info_op_page,
+ get_info_op_buf_size, reply_page, &reply_len);
+
+ dout("%s: status %d\n", __func__, ret);
+ if (ret >= 0) {
+ p = page_address(reply_page);
+ end = p + reply_len;
+
+ ret = decode_lockers(&p, end, type, tag, lockers, num_lockers);
+ }
+
+ __free_page(get_info_op_page);
+ __free_page(reply_page);
+ return ret;
+}
+EXPORT_SYMBOL(ceph_cls_lock_info);
diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c
index 5fcfb98f309e..a421e905331a 100644
--- a/net/ceph/crush/mapper.c
+++ b/net/ceph/crush/mapper.c
@@ -245,7 +245,7 @@ static int bucket_straw_choose(struct crush_bucket_straw *bucket,
/* compute 2^44*log2(input+1) */
static __u64 crush_ln(unsigned int xin)
{
- unsigned int x = xin, x1;
+ unsigned int x = xin;
int iexpon, index1, index2;
__u64 RH, LH, LL, xl64, result;
@@ -253,9 +253,15 @@ static __u64 crush_ln(unsigned int xin)
/* normalize input */
iexpon = 15;
- while (!(x & 0x18000)) {
- x <<= 1;
- iexpon--;
+
+ /*
+ * figure out number of bits we need to shift and
+ * do it in one step instead of iteratively
+ */
+ if (!(x & 0x18000)) {
+ int bits = __builtin_clz(x & 0x1FFFF) - 16;
+ x <<= bits;
+ iexpon = 15 - bits;
}
index1 = (x >> 8) << 1;
@@ -267,12 +273,11 @@ static __u64 crush_ln(unsigned int xin)
/* RH*x ~ 2^48 * (2^15 + xf), xf<2^8 */
xl64 = (__s64)x * RH;
xl64 >>= 48;
- x1 = xl64;
result = iexpon;
result <<= (12 + 32);
- index2 = x1 & 0xff;
+ index2 = xl64 & 0xff;
/* LL ~ 2^48*log2(1.0+index2/2^15) */
LL = __LL_tbl[index2];
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index ef34a02719d7..a8effc8b7280 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -835,6 +835,83 @@ int ceph_monc_get_version_async(struct ceph_mon_client *monc, const char *what,
}
EXPORT_SYMBOL(ceph_monc_get_version_async);
+static void handle_command_ack(struct ceph_mon_client *monc,
+ struct ceph_msg *msg)
+{
+ struct ceph_mon_generic_request *req;
+ void *p = msg->front.iov_base;
+ void *const end = p + msg->front_alloc_len;
+ u64 tid = le64_to_cpu(msg->hdr.tid);
+
+ dout("%s msg %p tid %llu\n", __func__, msg, tid);
+
+ ceph_decode_need(&p, end, sizeof(struct ceph_mon_request_header) +
+ sizeof(u32), bad);
+ p += sizeof(struct ceph_mon_request_header);
+
+ mutex_lock(&monc->mutex);
+ req = lookup_generic_request(&monc->generic_request_tree, tid);
+ if (!req) {
+ mutex_unlock(&monc->mutex);
+ return;
+ }
+
+ req->result = ceph_decode_32(&p);
+ __finish_generic_request(req);
+ mutex_unlock(&monc->mutex);
+
+ complete_generic_request(req);
+ return;
+
+bad:
+ pr_err("corrupt mon_command ack, tid %llu\n", tid);
+ ceph_msg_dump(msg);
+}
+
+int ceph_monc_blacklist_add(struct ceph_mon_client *monc,
+ struct ceph_entity_addr *client_addr)
+{
+ struct ceph_mon_generic_request *req;
+ struct ceph_mon_command *h;
+ int ret = -ENOMEM;
+ int len;
+
+ req = alloc_generic_request(monc, GFP_NOIO);
+ if (!req)
+ goto out;
+
+ req->request = ceph_msg_new(CEPH_MSG_MON_COMMAND, 256, GFP_NOIO, true);
+ if (!req->request)
+ goto out;
+
+ req->reply = ceph_msg_new(CEPH_MSG_MON_COMMAND_ACK, 512, GFP_NOIO,
+ true);
+ if (!req->reply)
+ goto out;
+
+ mutex_lock(&monc->mutex);
+ register_generic_request(req);
+ h = req->request->front.iov_base;
+ h->monhdr.have_version = 0;
+ h->monhdr.session_mon = cpu_to_le16(-1);
+ h->monhdr.session_mon_tid = 0;
+ h->fsid = monc->monmap->fsid;
+ h->num_strs = cpu_to_le32(1);
+ len = sprintf(h->str, "{ \"prefix\": \"osd blacklist\", \
+ \"blacklistop\": \"add\", \
+ \"addr\": \"%pISpc/%u\" }",
+ &client_addr->in_addr, le32_to_cpu(client_addr->nonce));
+ h->str_len = cpu_to_le32(len);
+ send_generic_request(monc, req);
+ mutex_unlock(&monc->mutex);
+
+ ret = wait_generic_request(req);
+out:
+ put_generic_request(req);
+ return ret;
+}
+EXPORT_SYMBOL(ceph_monc_blacklist_add);
+
/*
* Resend pending generic requests.
*/
@@ -1139,6 +1216,10 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg)
handle_get_version_reply(monc, msg);
break;
+ case CEPH_MSG_MON_COMMAND_ACK:
+ handle_command_ack(monc, msg);
+ break;
+
case CEPH_MSG_MON_MAP:
ceph_monc_handle_map(monc, msg);
break;
@@ -1178,6 +1259,7 @@ static struct ceph_msg *mon_alloc_msg(struct ceph_connection *con,
m = ceph_msg_get(monc->m_subscribe_ack);
break;
case CEPH_MSG_STATFS_REPLY:
+ case CEPH_MSG_MON_COMMAND_ACK:
return get_generic_reply(con, hdr, skip);
case CEPH_MSG_AUTH_REPLY:
m = ceph_msg_get(monc->m_auth_reply);
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index a97e7b506612..d9bf7a1d0a58 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -338,6 +338,9 @@ static void osd_req_op_data_release(struct ceph_osd_request *osd_req,
ceph_osd_data_release(&op->notify.request_data);
ceph_osd_data_release(&op->notify.response_data);
break;
+ case CEPH_OSD_OP_LIST_WATCHERS:
+ ceph_osd_data_release(&op->list_watchers.response_data);
+ break;
default:
break;
}
@@ -863,6 +866,8 @@ static u32 osd_req_encode_op(struct ceph_osd_op *dst,
case CEPH_OSD_OP_NOTIFY:
dst->notify.cookie = cpu_to_le64(src->notify.cookie);
break;
+ case CEPH_OSD_OP_LIST_WATCHERS:
+ break;
case CEPH_OSD_OP_SETALLOCHINT:
dst->alloc_hint.expected_object_size =
cpu_to_le64(src->alloc_hint.expected_object_size);
@@ -1445,6 +1450,10 @@ static void setup_request_data(struct ceph_osd_request *req,
ceph_osdc_msg_data_add(req->r_reply,
&op->extent.osd_data);
break;
+ case CEPH_OSD_OP_LIST_WATCHERS:
+ ceph_osdc_msg_data_add(req->r_reply,
+ &op->list_watchers.response_data);
+ break;
/* both */
case CEPH_OSD_OP_CALL:
@@ -3891,12 +3900,121 @@ int ceph_osdc_watch_check(struct ceph_osd_client *osdc,
return ret;
}
+static int decode_watcher(void **p, void *end, struct ceph_watch_item *item)
+{
+ u8 struct_v;
+ u32 struct_len;
+ int ret;
+
+ ret = ceph_start_decoding(p, end, 2, "watch_item_t",
+ &struct_v, &struct_len);
+ if (ret)
+ return ret;
+
+ ceph_decode_copy(p, &item->name, sizeof(item->name));
+ item->cookie = ceph_decode_64(p);
+ *p += 4; /* skip timeout_seconds */
+ if (struct_v >= 2) {
+ ceph_decode_copy(p, &item->addr, sizeof(item->addr));
+ ceph_decode_addr(&item->addr);
+ }
+
+ dout("%s %s%llu cookie %llu addr %s\n", __func__,
+ ENTITY_NAME(item->name), item->cookie,
+ ceph_pr_addr(&item->addr.in_addr));
+ return 0;
+}
+
+static int decode_watchers(void **p, void *end,
+ struct ceph_watch_item **watchers,
+ u32 *num_watchers)
+{
+ u8 struct_v;
+ u32 struct_len;
+ int i;
+ int ret;
+
+ ret = ceph_start_decoding(p, end, 1, "obj_list_watch_response_t",
+ &struct_v, &struct_len);
+ if (ret)
+ return ret;
+
+ *num_watchers = ceph_decode_32(p);
+ *watchers = kcalloc(*num_watchers, sizeof(**watchers), GFP_NOIO);
+ if (!*watchers)
+ return -ENOMEM;
+
+ for (i = 0; i < *num_watchers; i++) {
+ ret = decode_watcher(p, end, *watchers + i);
+ if (ret) {
+ kfree(*watchers);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * On success, the caller is responsible for:
+ *
+ * kfree(watchers);
+ */
+int ceph_osdc_list_watchers(struct ceph_osd_client *osdc,
+ struct ceph_object_id *oid,
+ struct ceph_object_locator *oloc,
+ struct ceph_watch_item **watchers,
+ u32 *num_watchers)
+{
+ struct ceph_osd_request *req;
+ struct page **pages;
+ int ret;
+
+ req = ceph_osdc_alloc_request(osdc, NULL, 1, false, GFP_NOIO);
+ if (!req)
+ return -ENOMEM;
+
+ ceph_oid_copy(&req->r_base_oid, oid);
+ ceph_oloc_copy(&req->r_base_oloc, oloc);
+ req->r_flags = CEPH_OSD_FLAG_READ;
+
+ ret = ceph_osdc_alloc_messages(req, GFP_NOIO);
+ if (ret)
+ goto out_put_req;
+
+ pages = ceph_alloc_page_vector(1, GFP_NOIO);
+ if (IS_ERR(pages)) {
+ ret = PTR_ERR(pages);
+ goto out_put_req;
+ }
+
+ osd_req_op_init(req, 0, CEPH_OSD_OP_LIST_WATCHERS, 0);
+ ceph_osd_data_pages_init(osd_req_op_data(req, 0, list_watchers,
+ response_data),
+ pages, PAGE_SIZE, 0, false, true);
+
+ ceph_osdc_start_request(osdc, req, false);
+ ret = ceph_osdc_wait_request(osdc, req);
+ if (ret >= 0) {
+ void *p = page_address(pages[0]);
+ void *const end = p + req->r_ops[0].outdata_len;
+
+ ret = decode_watchers(&p, end, watchers, num_watchers);
+ }
+
+out_put_req:
+ ceph_osdc_put_request(req);
+ return ret;
+}
+EXPORT_SYMBOL(ceph_osdc_list_watchers);
+
/*
* Call all pending notify callbacks - for use after a watch is
* unregistered, to make sure no more callbacks for it will be invoked
*/
void ceph_osdc_flush_notifies(struct ceph_osd_client *osdc)
{
+ dout("%s osdc %p\n", __func__, osdc);
flush_workqueue(osdc->notify_wq);
}
EXPORT_SYMBOL(ceph_osdc_flush_notifies);
@@ -3910,6 +4028,57 @@ void ceph_osdc_maybe_request_map(struct ceph_osd_client *osdc)
EXPORT_SYMBOL(ceph_osdc_maybe_request_map);
/*
+ * Execute an OSD class method on an object.
+ *
+ * @flags: CEPH_OSD_FLAG_*
+ * @resp_len: out param for reply length
+ */
+int ceph_osdc_call(struct ceph_osd_client *osdc,
+ struct ceph_object_id *oid,
+ struct ceph_object_locator *oloc,
+ const char *class, const char *method,
+ unsigned int flags,
+ struct page *req_page, size_t req_len,
+ struct page *resp_page, size_t *resp_len)
+{
+ struct ceph_osd_request *req;
+ int ret;
+
+ req = ceph_osdc_alloc_request(osdc, NULL, 1, false, GFP_NOIO);
+ if (!req)
+ return -ENOMEM;
+
+ ceph_oid_copy(&req->r_base_oid, oid);
+ ceph_oloc_copy(&req->r_base_oloc, oloc);
+ req->r_flags = flags;
+
+ ret = ceph_osdc_alloc_messages(req, GFP_NOIO);
+ if (ret)
+ goto out_put_req;
+
+ osd_req_op_cls_init(req, 0, CEPH_OSD_OP_CALL, class, method);
+ if (req_page)
+ osd_req_op_cls_request_data_pages(req, 0, &req_page, req_len,
+ 0, false, false);
+ if (resp_page)
+ osd_req_op_cls_response_data_pages(req, 0, &resp_page,
+ PAGE_SIZE, 0, false, false);
+
+ ceph_osdc_start_request(osdc, req, false);
+ ret = ceph_osdc_wait_request(osdc, req);
+ if (ret >= 0) {
+ ret = req->r_ops[0].rval;
+ if (resp_page)
+ *resp_len = req->r_ops[0].outdata_len;
+ }
+
+out_put_req:
+ ceph_osdc_put_request(req);
+ return ret;
+}
+EXPORT_SYMBOL(ceph_osdc_call);
+
+/*
* init, shutdown
*/
int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)
diff --git a/net/core/datagram.c b/net/core/datagram.c
index b7de71f8d5d3..bfb973aebb5b 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -323,6 +323,27 @@ void __skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb, int len)
}
EXPORT_SYMBOL(__skb_free_datagram_locked);
+int __sk_queue_drop_skb(struct sock *sk, struct sk_buff *skb,
+ unsigned int flags)
+{
+ int err = 0;
+
+ if (flags & MSG_PEEK) {
+ err = -ENOENT;
+ spin_lock_bh(&sk->sk_receive_queue.lock);
+ if (skb == skb_peek(&sk->sk_receive_queue)) {
+ __skb_unlink(skb, &sk->sk_receive_queue);
+ atomic_dec(&skb->users);
+ err = 0;
+ }
+ spin_unlock_bh(&sk->sk_receive_queue.lock);
+ }
+
+ atomic_inc(&sk->sk_drops);
+ return err;
+}
+EXPORT_SYMBOL(__sk_queue_drop_skb);
+
/**
* skb_kill_datagram - Free a datagram skbuff forcibly
* @sk: socket
@@ -346,23 +367,10 @@ EXPORT_SYMBOL(__skb_free_datagram_locked);
int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags)
{
- int err = 0;
-
- if (flags & MSG_PEEK) {
- err = -ENOENT;
- spin_lock_bh(&sk->sk_receive_queue.lock);
- if (skb == skb_peek(&sk->sk_receive_queue)) {
- __skb_unlink(skb, &sk->sk_receive_queue);
- atomic_dec(&skb->users);
- err = 0;
- }
- spin_unlock_bh(&sk->sk_receive_queue.lock);
- }
+ int err = __sk_queue_drop_skb(sk, skb, flags);
kfree_skb(skb);
- atomic_inc(&sk->sk_drops);
sk_mem_reclaim_partial(sk);
-
return err;
}
EXPORT_SYMBOL(skb_kill_datagram);
diff --git a/net/core/dev.c b/net/core/dev.c
index c0c291f721d6..f55fb4536016 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -139,7 +139,6 @@
#include <linux/errqueue.h>
#include <linux/hrtimer.h>
#include <linux/netfilter_ingress.h>
-#include <linux/sctp.h>
#include <linux/crash_dump.h>
#include "net-sysfs.h"
@@ -2492,141 +2491,6 @@ out:
}
EXPORT_SYMBOL(skb_checksum_help);
-/* skb_csum_offload_check - Driver helper function to determine if a device
- * with limited checksum offload capabilities is able to offload the checksum
- * for a given packet.
- *
- * Arguments:
- * skb - sk_buff for the packet in question
- * spec - contains the description of what device can offload
- * csum_encapped - returns true if the checksum being offloaded is
- * encpasulated. That is it is checksum for the transport header
- * in the inner headers.
- * checksum_help - when set indicates that helper function should
- * call skb_checksum_help if offload checks fail
- *
- * Returns:
- * true: Packet has passed the checksum checks and should be offloadable to
- * the device (a driver may still need to check for additional
- * restrictions of its device)
- * false: Checksum is not offloadable. If checksum_help was set then
- * skb_checksum_help was called to resolve checksum for non-GSO
- * packets and when IP protocol is not SCTP
- */
-bool __skb_csum_offload_chk(struct sk_buff *skb,
- const struct skb_csum_offl_spec *spec,
- bool *csum_encapped,
- bool csum_help)
-{
- struct iphdr *iph;
- struct ipv6hdr *ipv6;
- void *nhdr;
- int protocol;
- u8 ip_proto;
-
- if (skb->protocol == htons(ETH_P_8021Q) ||
- skb->protocol == htons(ETH_P_8021AD)) {
- if (!spec->vlan_okay)
- goto need_help;
- }
-
- /* We check whether the checksum refers to a transport layer checksum in
- * the outermost header or an encapsulated transport layer checksum that
- * corresponds to the inner headers of the skb. If the checksum is for
- * something else in the packet we need help.
- */
- if (skb_checksum_start_offset(skb) == skb_transport_offset(skb)) {
- /* Non-encapsulated checksum */
- protocol = eproto_to_ipproto(vlan_get_protocol(skb));
- nhdr = skb_network_header(skb);
- *csum_encapped = false;
- if (spec->no_not_encapped)
- goto need_help;
- } else if (skb->encapsulation && spec->encap_okay &&
- skb_checksum_start_offset(skb) ==
- skb_inner_transport_offset(skb)) {
- /* Encapsulated checksum */
- *csum_encapped = true;
- switch (skb->inner_protocol_type) {
- case ENCAP_TYPE_ETHER:
- protocol = eproto_to_ipproto(skb->inner_protocol);
- break;
- case ENCAP_TYPE_IPPROTO:
- protocol = skb->inner_protocol;
- break;
- }
- nhdr = skb_inner_network_header(skb);
- } else {
- goto need_help;
- }
-
- switch (protocol) {
- case IPPROTO_IP:
- if (!spec->ipv4_okay)
- goto need_help;
- iph = nhdr;
- ip_proto = iph->protocol;
- if (iph->ihl != 5 && !spec->ip_options_okay)
- goto need_help;
- break;
- case IPPROTO_IPV6:
- if (!spec->ipv6_okay)
- goto need_help;
- if (spec->no_encapped_ipv6 && *csum_encapped)
- goto need_help;
- ipv6 = nhdr;
- nhdr += sizeof(*ipv6);
- ip_proto = ipv6->nexthdr;
- break;
- default:
- goto need_help;
- }
-
-ip_proto_again:
- switch (ip_proto) {
- case IPPROTO_TCP:
- if (!spec->tcp_okay ||
- skb->csum_offset != offsetof(struct tcphdr, check))
- goto need_help;
- break;
- case IPPROTO_UDP:
- if (!spec->udp_okay ||
- skb->csum_offset != offsetof(struct udphdr, check))
- goto need_help;
- break;
- case IPPROTO_SCTP:
- if (!spec->sctp_okay ||
- skb->csum_offset != offsetof(struct sctphdr, checksum))
- goto cant_help;
- break;
- case NEXTHDR_HOP:
- case NEXTHDR_ROUTING:
- case NEXTHDR_DEST: {
- u8 *opthdr = nhdr;
-
- if (protocol != IPPROTO_IPV6 || !spec->ext_hdrs_okay)
- goto need_help;
-
- ip_proto = opthdr[0];
- nhdr += (opthdr[1] + 1) << 3;
-
- goto ip_proto_again;
- }
- default:
- goto need_help;
- }
-
- /* Passed the tests for offloading checksum */
- return true;
-
-need_help:
- if (csum_help && !skb_shinfo(skb)->gso_size)
- skb_checksum_help(skb);
-cant_help:
- return false;
-}
-EXPORT_SYMBOL(__skb_csum_offload_chk);
-
__be16 skb_network_protocol(struct sk_buff *skb, int *depth)
{
__be16 type = skb->protocol;
@@ -5273,6 +5137,13 @@ static struct netdev_adjacent *__netdev_find_adj(struct net_device *adj_dev,
return NULL;
}
+static int __netdev_has_upper_dev(struct net_device *upper_dev, void *data)
+{
+ struct net_device *dev = data;
+
+ return upper_dev == dev;
+}
+
/**
* netdev_has_upper_dev - Check if device is linked to an upper device
* @dev: device
@@ -5287,11 +5158,30 @@ bool netdev_has_upper_dev(struct net_device *dev,
{
ASSERT_RTNL();
- return __netdev_find_adj(upper_dev, &dev->all_adj_list.upper);
+ return netdev_walk_all_upper_dev_rcu(dev, __netdev_has_upper_dev,
+ upper_dev);
}
EXPORT_SYMBOL(netdev_has_upper_dev);
/**
+ * netdev_has_upper_dev_all - Check if device is linked to an upper device
+ * @dev: device
+ * @upper_dev: upper device to check
+ *
+ * Find out if a device is linked to specified upper device and return true
+ * in case it is. Note that this checks the entire upper device chain.
+ * The caller must hold rcu lock.
+ */
+
+bool netdev_has_upper_dev_all_rcu(struct net_device *dev,
+ struct net_device *upper_dev)
+{
+ return !!netdev_walk_all_upper_dev_rcu(dev, __netdev_has_upper_dev,
+ upper_dev);
+}
+EXPORT_SYMBOL(netdev_has_upper_dev_all_rcu);
+
+/**
* netdev_has_any_upper_dev - Check if device is linked to some device
* @dev: device
*
@@ -5302,7 +5192,7 @@ static bool netdev_has_any_upper_dev(struct net_device *dev)
{
ASSERT_RTNL();
- return !list_empty(&dev->all_adj_list.upper);
+ return !list_empty(&dev->adj_list.upper);
}
/**
@@ -5329,6 +5219,20 @@ struct net_device *netdev_master_upper_dev_get(struct net_device *dev)
}
EXPORT_SYMBOL(netdev_master_upper_dev_get);
+/**
+ * netdev_has_any_lower_dev - Check if device is linked to some device
+ * @dev: device
+ *
+ * Find out if a device is linked to a lower device and return true in case
+ * it is. The caller must hold the RTNL lock.
+ */
+static bool netdev_has_any_lower_dev(struct net_device *dev)
+{
+ ASSERT_RTNL();
+
+ return !list_empty(&dev->adj_list.lower);
+}
+
void *netdev_adjacent_get_private(struct list_head *adj_list)
{
struct netdev_adjacent *adj;
@@ -5365,16 +5269,8 @@ struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev,
}
EXPORT_SYMBOL(netdev_upper_get_next_dev_rcu);
-/**
- * netdev_all_upper_get_next_dev_rcu - Get the next dev from upper list
- * @dev: device
- * @iter: list_head ** of the current position
- *
- * Gets the next device from the dev's upper list, starting from iter
- * position. The caller must hold RCU read lock.
- */
-struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev,
- struct list_head **iter)
+static struct net_device *netdev_next_upper_dev_rcu(struct net_device *dev,
+ struct list_head **iter)
{
struct netdev_adjacent *upper;
@@ -5382,14 +5278,41 @@ struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev,
upper = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
- if (&upper->list == &dev->all_adj_list.upper)
+ if (&upper->list == &dev->adj_list.upper)
return NULL;
*iter = &upper->list;
return upper->dev;
}
-EXPORT_SYMBOL(netdev_all_upper_get_next_dev_rcu);
+
+int netdev_walk_all_upper_dev_rcu(struct net_device *dev,
+ int (*fn)(struct net_device *dev,
+ void *data),
+ void *data)
+{
+ struct net_device *udev;
+ struct list_head *iter;
+ int ret;
+
+ for (iter = &dev->adj_list.upper,
+ udev = netdev_next_upper_dev_rcu(dev, &iter);
+ udev;
+ udev = netdev_next_upper_dev_rcu(dev, &iter)) {
+ /* first is the upper device itself */
+ ret = fn(udev, data);
+ if (ret)
+ return ret;
+
+ /* then look at all of its upper devices */
+ ret = netdev_walk_all_upper_dev_rcu(udev, fn, data);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(netdev_walk_all_upper_dev_rcu);
/**
* netdev_lower_get_next_private - Get the next ->private from the
@@ -5472,51 +5395,90 @@ void *netdev_lower_get_next(struct net_device *dev, struct list_head **iter)
}
EXPORT_SYMBOL(netdev_lower_get_next);
-/**
- * netdev_all_lower_get_next - Get the next device from all lower neighbour list
- * @dev: device
- * @iter: list_head ** of the current position
- *
- * Gets the next netdev_adjacent from the dev's all lower neighbour
- * list, starting from iter position. The caller must hold RTNL lock or
- * its own locking that guarantees that the neighbour all lower
- * list will remain unchanged.
- */
-struct net_device *netdev_all_lower_get_next(struct net_device *dev, struct list_head **iter)
+static struct net_device *netdev_next_lower_dev(struct net_device *dev,
+ struct list_head **iter)
{
struct netdev_adjacent *lower;
lower = list_entry(*iter, struct netdev_adjacent, list);
- if (&lower->list == &dev->all_adj_list.lower)
+ if (&lower->list == &dev->adj_list.lower)
return NULL;
*iter = lower->list.next;
return lower->dev;
}
-EXPORT_SYMBOL(netdev_all_lower_get_next);
-/**
- * netdev_all_lower_get_next_rcu - Get the next device from all
- * lower neighbour list, RCU variant
- * @dev: device
- * @iter: list_head ** of the current position
- *
- * Gets the next netdev_adjacent from the dev's all lower neighbour
- * list, starting from iter position. The caller must hold RCU read lock.
- */
-struct net_device *netdev_all_lower_get_next_rcu(struct net_device *dev,
- struct list_head **iter)
+int netdev_walk_all_lower_dev(struct net_device *dev,
+ int (*fn)(struct net_device *dev,
+ void *data),
+ void *data)
+{
+ struct net_device *ldev;
+ struct list_head *iter;
+ int ret;
+
+ for (iter = &dev->adj_list.lower,
+ ldev = netdev_next_lower_dev(dev, &iter);
+ ldev;
+ ldev = netdev_next_lower_dev(dev, &iter)) {
+ /* first is the lower device itself */
+ ret = fn(ldev, data);
+ if (ret)
+ return ret;
+
+ /* then look at all of its lower devices */
+ ret = netdev_walk_all_lower_dev(ldev, fn, data);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(netdev_walk_all_lower_dev);
+
+static struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev,
+ struct list_head **iter)
{
struct netdev_adjacent *lower;
- lower = list_first_or_null_rcu(&dev->all_adj_list.lower,
- struct netdev_adjacent, list);
+ lower = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
+ if (&lower->list == &dev->adj_list.lower)
+ return NULL;
+
+ *iter = &lower->list;
+
+ return lower->dev;
+}
- return lower ? lower->dev : NULL;
+int netdev_walk_all_lower_dev_rcu(struct net_device *dev,
+ int (*fn)(struct net_device *dev,
+ void *data),
+ void *data)
+{
+ struct net_device *ldev;
+ struct list_head *iter;
+ int ret;
+
+ for (iter = &dev->adj_list.lower,
+ ldev = netdev_next_lower_dev_rcu(dev, &iter);
+ ldev;
+ ldev = netdev_next_lower_dev_rcu(dev, &iter)) {
+ /* first is the lower device itself */
+ ret = fn(ldev, data);
+ if (ret)
+ return ret;
+
+ /* then look at all of its lower devices */
+ ret = netdev_walk_all_lower_dev_rcu(ldev, fn, data);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
}
-EXPORT_SYMBOL(netdev_all_lower_get_next_rcu);
+EXPORT_SYMBOL_GPL(netdev_walk_all_lower_dev_rcu);
/**
* netdev_lower_get_first_private_rcu - Get the first ->private from the
@@ -5598,7 +5560,10 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev,
adj = __netdev_find_adj(adj_dev, dev_list);
if (adj) {
- adj->ref_nr++;
+ adj->ref_nr += 1;
+ pr_debug("Insert adjacency: dev %s adj_dev %s adj->ref_nr %d\n",
+ dev->name, adj_dev->name, adj->ref_nr);
+
return 0;
}
@@ -5612,8 +5577,8 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev,
adj->private = private;
dev_hold(adj_dev);
- pr_debug("dev_hold for %s, because of link added from %s to %s\n",
- adj_dev->name, dev->name, adj_dev->name);
+ pr_debug("Insert adjacency: dev %s adj_dev %s adj->ref_nr %d; dev_hold on %s\n",
+ dev->name, adj_dev->name, adj->ref_nr, adj_dev->name);
if (netdev_adjacent_is_neigh_list(dev, adj_dev, dev_list)) {
ret = netdev_adjacent_sysfs_add(dev, adj_dev, dev_list);
@@ -5647,22 +5612,28 @@ free_adj:
static void __netdev_adjacent_dev_remove(struct net_device *dev,
struct net_device *adj_dev,
+ u16 ref_nr,
struct list_head *dev_list)
{
struct netdev_adjacent *adj;
+ pr_debug("Remove adjacency: dev %s adj_dev %s ref_nr %d\n",
+ dev->name, adj_dev->name, ref_nr);
+
adj = __netdev_find_adj(adj_dev, dev_list);
if (!adj) {
- pr_err("tried to remove device %s from %s\n",
+ pr_err("Adjacency does not exist for device %s from %s\n",
dev->name, adj_dev->name);
- BUG();
+ WARN_ON(1);
+ return;
}
- if (adj->ref_nr > 1) {
- pr_debug("%s to %s ref_nr-- = %d\n", dev->name, adj_dev->name,
- adj->ref_nr-1);
- adj->ref_nr--;
+ if (adj->ref_nr > ref_nr) {
+ pr_debug("adjacency: %s to %s ref_nr - %d = %d\n",
+ dev->name, adj_dev->name, ref_nr,
+ adj->ref_nr - ref_nr);
+ adj->ref_nr -= ref_nr;
return;
}
@@ -5673,7 +5644,7 @@ static void __netdev_adjacent_dev_remove(struct net_device *dev,
netdev_adjacent_sysfs_del(dev, adj_dev->name, dev_list);
list_del_rcu(&adj->list);
- pr_debug("dev_put for %s, because link removed from %s to %s\n",
+ pr_debug("adjacency: dev_put for %s, because link removed from %s to %s\n",
adj_dev->name, dev->name, adj_dev->name);
dev_put(adj_dev);
kfree_rcu(adj, rcu);
@@ -5687,73 +5658,45 @@ static int __netdev_adjacent_dev_link_lists(struct net_device *dev,
{
int ret;
- ret = __netdev_adjacent_dev_insert(dev, upper_dev, up_list, private,
- master);
+ ret = __netdev_adjacent_dev_insert(dev, upper_dev, up_list,
+ private, master);
if (ret)
return ret;
- ret = __netdev_adjacent_dev_insert(upper_dev, dev, down_list, private,
- false);
+ ret = __netdev_adjacent_dev_insert(upper_dev, dev, down_list,
+ private, false);
if (ret) {
- __netdev_adjacent_dev_remove(dev, upper_dev, up_list);
+ __netdev_adjacent_dev_remove(dev, upper_dev, 1, up_list);
return ret;
}
return 0;
}
-static int __netdev_adjacent_dev_link(struct net_device *dev,
- struct net_device *upper_dev)
-{
- return __netdev_adjacent_dev_link_lists(dev, upper_dev,
- &dev->all_adj_list.upper,
- &upper_dev->all_adj_list.lower,
- NULL, false);
-}
-
static void __netdev_adjacent_dev_unlink_lists(struct net_device *dev,
struct net_device *upper_dev,
+ u16 ref_nr,
struct list_head *up_list,
struct list_head *down_list)
{
- __netdev_adjacent_dev_remove(dev, upper_dev, up_list);
- __netdev_adjacent_dev_remove(upper_dev, dev, down_list);
-}
-
-static void __netdev_adjacent_dev_unlink(struct net_device *dev,
- struct net_device *upper_dev)
-{
- __netdev_adjacent_dev_unlink_lists(dev, upper_dev,
- &dev->all_adj_list.upper,
- &upper_dev->all_adj_list.lower);
+ __netdev_adjacent_dev_remove(dev, upper_dev, ref_nr, up_list);
+ __netdev_adjacent_dev_remove(upper_dev, dev, ref_nr, down_list);
}
static int __netdev_adjacent_dev_link_neighbour(struct net_device *dev,
struct net_device *upper_dev,
void *private, bool master)
{
- int ret = __netdev_adjacent_dev_link(dev, upper_dev);
-
- if (ret)
- return ret;
-
- ret = __netdev_adjacent_dev_link_lists(dev, upper_dev,
- &dev->adj_list.upper,
- &upper_dev->adj_list.lower,
- private, master);
- if (ret) {
- __netdev_adjacent_dev_unlink(dev, upper_dev);
- return ret;
- }
-
- return 0;
+ return __netdev_adjacent_dev_link_lists(dev, upper_dev,
+ &dev->adj_list.upper,
+ &upper_dev->adj_list.lower,
+ private, master);
}
static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev,
struct net_device *upper_dev)
{
- __netdev_adjacent_dev_unlink(dev, upper_dev);
- __netdev_adjacent_dev_unlink_lists(dev, upper_dev,
+ __netdev_adjacent_dev_unlink_lists(dev, upper_dev, 1,
&dev->adj_list.upper,
&upper_dev->adj_list.lower);
}
@@ -5763,7 +5706,6 @@ static int __netdev_upper_dev_link(struct net_device *dev,
void *upper_priv, void *upper_info)
{
struct netdev_notifier_changeupper_info changeupper_info;
- struct netdev_adjacent *i, *j, *to_i, *to_j;
int ret = 0;
ASSERT_RTNL();
@@ -5772,10 +5714,10 @@ static int __netdev_upper_dev_link(struct net_device *dev,
return -EBUSY;
/* To prevent loops, check if dev is not upper device to upper_dev. */
- if (__netdev_find_adj(dev, &upper_dev->all_adj_list.upper))
+ if (netdev_has_upper_dev(upper_dev, dev))
return -EBUSY;
- if (__netdev_find_adj(upper_dev, &dev->adj_list.upper))
+ if (netdev_has_upper_dev(dev, upper_dev))
return -EEXIST;
if (master && netdev_master_upper_dev_get(dev))
@@ -5797,80 +5739,15 @@ static int __netdev_upper_dev_link(struct net_device *dev,
if (ret)
return ret;
- /* Now that we linked these devs, make all the upper_dev's
- * all_adj_list.upper visible to every dev's all_adj_list.lower an
- * versa, and don't forget the devices itself. All of these
- * links are non-neighbours.
- */
- list_for_each_entry(i, &dev->all_adj_list.lower, list) {
- list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) {
- pr_debug("Interlinking %s with %s, non-neighbour\n",
- i->dev->name, j->dev->name);
- ret = __netdev_adjacent_dev_link(i->dev, j->dev);
- if (ret)
- goto rollback_mesh;
- }
- }
-
- /* add dev to every upper_dev's upper device */
- list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) {
- pr_debug("linking %s's upper device %s with %s\n",
- upper_dev->name, i->dev->name, dev->name);
- ret = __netdev_adjacent_dev_link(dev, i->dev);
- if (ret)
- goto rollback_upper_mesh;
- }
-
- /* add upper_dev to every dev's lower device */
- list_for_each_entry(i, &dev->all_adj_list.lower, list) {
- pr_debug("linking %s's lower device %s with %s\n", dev->name,
- i->dev->name, upper_dev->name);
- ret = __netdev_adjacent_dev_link(i->dev, upper_dev);
- if (ret)
- goto rollback_lower_mesh;
- }
-
ret = call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev,
&changeupper_info.info);
ret = notifier_to_errno(ret);
if (ret)
- goto rollback_lower_mesh;
+ goto rollback;
return 0;
-rollback_lower_mesh:
- to_i = i;
- list_for_each_entry(i, &dev->all_adj_list.lower, list) {
- if (i == to_i)
- break;
- __netdev_adjacent_dev_unlink(i->dev, upper_dev);
- }
-
- i = NULL;
-
-rollback_upper_mesh:
- to_i = i;
- list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) {
- if (i == to_i)
- break;
- __netdev_adjacent_dev_unlink(dev, i->dev);
- }
-
- i = j = NULL;
-
-rollback_mesh:
- to_i = i;
- to_j = j;
- list_for_each_entry(i, &dev->all_adj_list.lower, list) {
- list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) {
- if (i == to_i && j == to_j)
- break;
- __netdev_adjacent_dev_unlink(i->dev, j->dev);
- }
- if (i == to_i)
- break;
- }
-
+rollback:
__netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);
return ret;
@@ -5927,7 +5804,6 @@ void netdev_upper_dev_unlink(struct net_device *dev,
struct net_device *upper_dev)
{
struct netdev_notifier_changeupper_info changeupper_info;
- struct netdev_adjacent *i, *j;
ASSERT_RTNL();
changeupper_info.upper_dev = upper_dev;
@@ -5939,23 +5815,6 @@ void netdev_upper_dev_unlink(struct net_device *dev,
__netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);
- /* Here is the tricky part. We must remove all dev's lower
- * devices from all upper_dev's upper devices and vice
- * versa, to maintain the graph relationship.
- */
- list_for_each_entry(i, &dev->all_adj_list.lower, list)
- list_for_each_entry(j, &upper_dev->all_adj_list.upper, list)
- __netdev_adjacent_dev_unlink(i->dev, j->dev);
-
- /* remove also the devices itself from lower/upper device
- * list
- */
- list_for_each_entry(i, &dev->all_adj_list.lower, list)
- __netdev_adjacent_dev_unlink(i->dev, upper_dev);
-
- list_for_each_entry(i, &upper_dev->all_adj_list.upper, list)
- __netdev_adjacent_dev_unlink(dev, i->dev);
-
call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev,
&changeupper_info.info);
}
@@ -6493,9 +6352,18 @@ int dev_set_mtu(struct net_device *dev, int new_mtu)
if (new_mtu == dev->mtu)
return 0;
- /* MTU must be positive. */
- if (new_mtu < 0)
+ /* MTU must be positive, and in range */
+ if (new_mtu < 0 || new_mtu < dev->min_mtu) {
+ net_err_ratelimited("%s: Invalid MTU %d requested, hw min %d\n",
+ dev->name, new_mtu, dev->min_mtu);
return -EINVAL;
+ }
+
+ if (dev->max_mtu > 0 && new_mtu > dev->max_mtu) {
+ net_err_ratelimited("%s: Invalid MTU %d requested, hw max %d\n",
+ dev->name, new_mtu, dev->max_mtu);
+ return -EINVAL;
+ }
if (!netif_device_present(dev))
return -ENODEV;
@@ -6770,6 +6638,7 @@ static void rollback_registered_many(struct list_head *head)
/* Notifier chain MUST detach us all upper devices. */
WARN_ON(netdev_has_any_upper_dev(dev));
+ WARN_ON(netdev_has_any_lower_dev(dev));
/* Remove entries from kobject tree */
netdev_unregister_kobject(dev);
@@ -7648,8 +7517,6 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
INIT_LIST_HEAD(&dev->link_watch_list);
INIT_LIST_HEAD(&dev->adj_list.upper);
INIT_LIST_HEAD(&dev->adj_list.lower);
- INIT_LIST_HEAD(&dev->all_adj_list.upper);
- INIT_LIST_HEAD(&dev->all_adj_list.lower);
INIT_LIST_HEAD(&dev->ptype_all);
INIT_LIST_HEAD(&dev->ptype_specific);
#ifdef CONFIG_NET_SCHED
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 1b5063088f1a..c14f8b661db9 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -341,15 +341,7 @@ static void devlink_nl_post_doit(const struct genl_ops *ops,
mutex_unlock(&devlink_mutex);
}
-static struct genl_family devlink_nl_family = {
- .id = GENL_ID_GENERATE,
- .name = DEVLINK_GENL_NAME,
- .version = DEVLINK_GENL_VERSION,
- .maxattr = DEVLINK_ATTR_MAX,
- .netnsok = true,
- .pre_doit = devlink_nl_pre_doit,
- .post_doit = devlink_nl_post_doit,
-};
+static struct genl_family devlink_nl_family;
enum devlink_multicast_groups {
DEVLINK_MCGRP_CONFIG,
@@ -608,6 +600,8 @@ static int devlink_port_type_set(struct devlink *devlink,
if (devlink->ops && devlink->ops->port_type_set) {
if (port_type == DEVLINK_PORT_TYPE_NOTSET)
return -EINVAL;
+ if (port_type == devlink_port->type)
+ return 0;
err = devlink->ops->port_type_set(devlink_port, port_type);
if (err)
return err;
@@ -1618,6 +1612,20 @@ static const struct genl_ops devlink_nl_ops[] = {
},
};
+static struct genl_family devlink_nl_family __ro_after_init = {
+ .name = DEVLINK_GENL_NAME,
+ .version = DEVLINK_GENL_VERSION,
+ .maxattr = DEVLINK_ATTR_MAX,
+ .netnsok = true,
+ .pre_doit = devlink_nl_pre_doit,
+ .post_doit = devlink_nl_post_doit,
+ .module = THIS_MODULE,
+ .ops = devlink_nl_ops,
+ .n_ops = ARRAY_SIZE(devlink_nl_ops),
+ .mcgrps = devlink_nl_mcgrps,
+ .n_mcgrps = ARRAY_SIZE(devlink_nl_mcgrps),
+};
+
/**
* devlink_alloc - Allocate new devlink instance resources
*
@@ -1840,9 +1848,7 @@ EXPORT_SYMBOL_GPL(devlink_sb_unregister);
static int __init devlink_module_init(void)
{
- return genl_register_family_with_ops_groups(&devlink_nl_family,
- devlink_nl_ops,
- devlink_nl_mcgrps);
+ return genl_register_family(&devlink_nl_family);
}
static void __exit devlink_module_exit(void)
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 72cfb0c61125..8e0c0635ee97 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -59,12 +59,7 @@ struct dm_hw_stat_delta {
unsigned long last_drop_val;
};
-static struct genl_family net_drop_monitor_family = {
- .id = GENL_ID_GENERATE,
- .hdrsize = 0,
- .name = "NET_DM",
- .version = 2,
-};
+static struct genl_family net_drop_monitor_family;
static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_cpu_data);
@@ -351,6 +346,17 @@ static const struct genl_ops dropmon_ops[] = {
},
};
+static struct genl_family net_drop_monitor_family __ro_after_init = {
+ .hdrsize = 0,
+ .name = "NET_DM",
+ .version = 2,
+ .module = THIS_MODULE,
+ .ops = dropmon_ops,
+ .n_ops = ARRAY_SIZE(dropmon_ops),
+ .mcgrps = dropmon_mcgrps,
+ .n_mcgrps = ARRAY_SIZE(dropmon_mcgrps),
+};
+
static struct notifier_block dropmon_net_notifier = {
.notifier_call = dropmon_net_event
};
@@ -367,8 +373,7 @@ static int __init init_net_drop_monitor(void)
return -ENOSPC;
}
- rc = genl_register_family_with_ops_groups(&net_drop_monitor_family,
- dropmon_ops, dropmon_mcgrps);
+ rc = genl_register_family(&net_drop_monitor_family);
if (rc) {
pr_err("Could not create drop monitor netlink family\n");
return rc;
diff --git a/net/core/filter.c b/net/core/filter.c
index 00351cdf7d0c..cd9e2ba66b0e 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2492,6 +2492,8 @@ sk_filter_func_proto(enum bpf_func_id func_id)
return &bpf_get_prandom_u32_proto;
case BPF_FUNC_get_smp_processor_id:
return &bpf_get_raw_smp_processor_id_proto;
+ case BPF_FUNC_get_numa_node_id:
+ return &bpf_get_numa_node_id_proto;
case BPF_FUNC_tail_call:
return &bpf_tail_call_proto;
case BPF_FUNC_ktime_get_ns:
diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c
index e5f84c26ba1a..88fd64250b02 100644
--- a/net/core/lwtunnel.c
+++ b/net/core/lwtunnel.c
@@ -130,6 +130,19 @@ int lwtunnel_build_state(struct net_device *dev, u16 encap_type,
}
EXPORT_SYMBOL(lwtunnel_build_state);
+void lwtstate_free(struct lwtunnel_state *lws)
+{
+ const struct lwtunnel_encap_ops *ops = lwtun_encaps[lws->type];
+
+ if (ops->destroy_state) {
+ ops->destroy_state(lws);
+ kfree_rcu(lws, rcu);
+ } else {
+ kfree(lws);
+ }
+}
+EXPORT_SYMBOL(lwtstate_free);
+
int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate)
{
const struct lwtunnel_encap_ops *ops;
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 6e4f34721080..d4fe28606ff5 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -950,10 +950,13 @@ net_rx_queue_update_kobjects(struct net_device *dev, int old_num, int new_num)
}
while (--i >= new_num) {
+ struct kobject *kobj = &dev->_rx[i].kobj;
+
+ if (!list_empty(&dev_net(dev)->exit_list))
+ kobj->uevent_suppress = 1;
if (dev->sysfs_rx_queue_group)
- sysfs_remove_group(&dev->_rx[i].kobj,
- dev->sysfs_rx_queue_group);
- kobject_put(&dev->_rx[i].kobj);
+ sysfs_remove_group(kobj, dev->sysfs_rx_queue_group);
+ kobject_put(kobj);
}
return error;
@@ -1340,6 +1343,8 @@ netdev_queue_update_kobjects(struct net_device *dev, int old_num, int new_num)
while (--i >= new_num) {
struct netdev_queue *queue = dev->_tx + i;
+ if (!list_empty(&dev_net(dev)->exit_list))
+ queue->kobj.uevent_suppress = 1;
#ifdef CONFIG_BQL
sysfs_remove_group(&queue->kobj, &dql_group);
#endif
@@ -1525,6 +1530,9 @@ void netdev_unregister_kobject(struct net_device *ndev)
{
struct device *dev = &(ndev->dev);
+ if (!list_empty(&dev_net(ndev)->exit_list))
+ dev_set_uevent_suppress(dev, 1);
+
kobject_get(&dev->kobj);
remove_queue_kobjects(ndev);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 42bdda0e616b..b9243b14af17 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -309,6 +309,16 @@ out_undo:
#ifdef CONFIG_NET_NS
+static struct ucounts *inc_net_namespaces(struct user_namespace *ns)
+{
+ return inc_ucount(ns, current_euid(), UCOUNT_NET_NAMESPACES);
+}
+
+static void dec_net_namespaces(struct ucounts *ucounts)
+{
+ dec_ucount(ucounts, UCOUNT_NET_NAMESPACES);
+}
+
static struct kmem_cache *net_cachep;
static struct workqueue_struct *netns_wq;
@@ -350,19 +360,34 @@ void net_drop_ns(void *p)
struct net *copy_net_ns(unsigned long flags,
struct user_namespace *user_ns, struct net *old_net)
{
+ struct ucounts *ucounts;
struct net *net;
int rv;
if (!(flags & CLONE_NEWNET))
return get_net(old_net);
+ ucounts = inc_net_namespaces(user_ns);
+ if (!ucounts)
+ return ERR_PTR(-ENOSPC);
+
net = net_alloc();
- if (!net)
+ if (!net) {
+ dec_net_namespaces(ucounts);
return ERR_PTR(-ENOMEM);
+ }
get_user_ns(user_ns);
- mutex_lock(&net_mutex);
+ rv = mutex_lock_killable(&net_mutex);
+ if (rv < 0) {
+ net_free(net);
+ dec_net_namespaces(ucounts);
+ put_user_ns(user_ns);
+ return ERR_PTR(rv);
+ }
+
+ net->ucounts = ucounts;
rv = setup_net(net, user_ns);
if (rv == 0) {
rtnl_lock();
@@ -371,6 +396,7 @@ struct net *copy_net_ns(unsigned long flags,
}
mutex_unlock(&net_mutex);
if (rv < 0) {
+ dec_net_namespaces(ucounts);
put_user_ns(user_ns);
net_drop_ns(net);
return ERR_PTR(rv);
@@ -443,6 +469,7 @@ static void cleanup_net(struct work_struct *work)
/* Finally it is safe to free my network namespace structure */
list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) {
list_del_init(&net->exit_list);
+ dec_net_namespaces(net->ucounts);
put_user_ns(net->user_ns);
net_drop_ns(net);
}
@@ -1004,11 +1031,17 @@ static int netns_install(struct nsproxy *nsproxy, struct ns_common *ns)
return 0;
}
+static struct user_namespace *netns_owner(struct ns_common *ns)
+{
+ return to_net_ns(ns)->user_ns;
+}
+
const struct proc_ns_operations netns_operations = {
.name = "net",
.type = CLONE_NEWNET,
.get = netns_get,
.put = netns_put,
.install = netns_install,
+ .owner = netns_owner,
};
#endif
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index bbd118b19aef..5219a9e2127a 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2286,7 +2286,7 @@ out:
static inline void set_pkt_overhead(struct pktgen_dev *pkt_dev)
{
- pkt_dev->pkt_overhead = LL_RESERVED_SPACE(pkt_dev->odev);
+ pkt_dev->pkt_overhead = 0;
pkt_dev->pkt_overhead += pkt_dev->nr_labels*sizeof(u32);
pkt_dev->pkt_overhead += VLAN_TAG_SIZE(pkt_dev);
pkt_dev->pkt_overhead += SVLAN_TAG_SIZE(pkt_dev);
@@ -2777,13 +2777,13 @@ static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb,
}
static struct sk_buff *pktgen_alloc_skb(struct net_device *dev,
- struct pktgen_dev *pkt_dev,
- unsigned int extralen)
+ struct pktgen_dev *pkt_dev)
{
+ unsigned int extralen = LL_RESERVED_SPACE(dev);
struct sk_buff *skb = NULL;
- unsigned int size = pkt_dev->cur_pkt_size + 64 + extralen +
- pkt_dev->pkt_overhead;
+ unsigned int size;
+ size = pkt_dev->cur_pkt_size + 64 + extralen + pkt_dev->pkt_overhead;
if (pkt_dev->flags & F_NODE) {
int node = pkt_dev->node >= 0 ? pkt_dev->node : numa_node_id();
@@ -2796,8 +2796,9 @@ static struct sk_buff *pktgen_alloc_skb(struct net_device *dev,
skb = __netdev_alloc_skb(dev, size, GFP_NOWAIT);
}
+ /* the caller pre-fetches from skb->data and reserves for the mac hdr */
if (likely(skb))
- skb_reserve(skb, LL_RESERVED_SPACE(dev));
+ skb_reserve(skb, extralen - 16);
return skb;
}
@@ -2830,16 +2831,14 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
mod_cur_headers(pkt_dev);
queue_map = pkt_dev->cur_queue_map;
- datalen = (odev->hard_header_len + 16) & ~0xf;
-
- skb = pktgen_alloc_skb(odev, pkt_dev, datalen);
+ skb = pktgen_alloc_skb(odev, pkt_dev);
if (!skb) {
sprintf(pkt_dev->result, "No memory");
return NULL;
}
prefetchw(skb->data);
- skb_reserve(skb, datalen);
+ skb_reserve(skb, 16);
/* Reserve for ethernet and IP header */
eth = (__u8 *) skb_push(skb, 14);
@@ -2959,7 +2958,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
mod_cur_headers(pkt_dev);
queue_map = pkt_dev->cur_queue_map;
- skb = pktgen_alloc_skb(odev, pkt_dev, 16);
+ skb = pktgen_alloc_skb(odev, pkt_dev);
if (!skb) {
sprintf(pkt_dev->result, "No memory");
return NULL;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 3ac8946bf244..fb7348f13501 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1144,6 +1144,8 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
if (dev->netdev_ops->ndo_get_vf_config(dev, vfs_num, &ivi))
return 0;
+ memset(&vf_vlan_info, 0, sizeof(vf_vlan_info));
+
vf_mac.vf =
vf_vlan.vf =
vf_vlan_info.vf =
@@ -1753,6 +1755,9 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb)
len++;
}
+ if (len == 0)
+ return -EINVAL;
+
err = ops->ndo_set_vf_vlan(dev, ivvl[0]->vf, ivvl[0]->vlan,
ivvl[0]->qos, ivvl[0]->vlan_proto);
if (err < 0)
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index d36c7548952f..1e3e0087245b 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1962,37 +1962,13 @@ static bool __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe,
return false;
}
-ssize_t skb_socket_splice(struct sock *sk,
- struct pipe_inode_info *pipe,
- struct splice_pipe_desc *spd)
-{
- int ret;
-
- /* Drop the socket lock, otherwise we have reverse
- * locking dependencies between sk_lock and i_mutex
- * here as compared to sendfile(). We enter here
- * with the socket lock held, and splice_to_pipe() will
- * grab the pipe inode lock. For sendfile() emulation,
- * we call into ->sendpage() with the i_mutex lock held
- * and networking will grab the socket lock.
- */
- release_sock(sk);
- ret = splice_to_pipe(pipe, spd);
- lock_sock(sk);
-
- return ret;
-}
-
/*
* Map data from the skb to a pipe. Should handle both the linear part,
* the fragments, and the frag list.
*/
int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset,
struct pipe_inode_info *pipe, unsigned int tlen,
- unsigned int flags,
- ssize_t (*splice_cb)(struct sock *,
- struct pipe_inode_info *,
- struct splice_pipe_desc *))
+ unsigned int flags)
{
struct partial_page partial[MAX_SKB_FRAGS];
struct page *pages[MAX_SKB_FRAGS];
@@ -2009,7 +1985,7 @@ int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset,
__skb_splice_bits(skb, pipe, &offset, &tlen, &spd, sk);
if (spd.nr_pages)
- ret = splice_cb(sk, pipe, &spd);
+ ret = splice_to_pipe(pipe, &spd);
return ret;
}
@@ -4528,13 +4504,18 @@ EXPORT_SYMBOL(skb_ensure_writable);
int __skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci)
{
struct vlan_hdr *vhdr;
- unsigned int offset = skb->data - skb_mac_header(skb);
+ int offset = skb->data - skb_mac_header(skb);
int err;
- __skb_push(skb, offset);
+ if (WARN_ONCE(offset,
+ "__skb_vlan_pop got skb with skb->data not at mac header (offset %d)\n",
+ offset)) {
+ return -EINVAL;
+ }
+
err = skb_ensure_writable(skb, VLAN_ETH_HLEN);
if (unlikely(err))
- goto pull;
+ return err;
skb_postpull_rcsum(skb, skb->data + (2 * ETH_ALEN), VLAN_HLEN);
@@ -4551,13 +4532,14 @@ int __skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci)
skb_set_network_header(skb, ETH_HLEN);
skb_reset_mac_len(skb);
-pull:
- __skb_pull(skb, offset);
return err;
}
EXPORT_SYMBOL(__skb_vlan_pop);
+/* Pop a vlan tag either from hwaccel or from payload.
+ * Expects skb->data at mac header.
+ */
int skb_vlan_pop(struct sk_buff *skb)
{
u16 vlan_tci;
@@ -4588,29 +4570,30 @@ int skb_vlan_pop(struct sk_buff *skb)
}
EXPORT_SYMBOL(skb_vlan_pop);
+/* Push a vlan tag either into hwaccel or into payload (if hwaccel tag present).
+ * Expects skb->data at mac header.
+ */
int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci)
{
if (skb_vlan_tag_present(skb)) {
- unsigned int offset = skb->data - skb_mac_header(skb);
+ int offset = skb->data - skb_mac_header(skb);
int err;
- /* __vlan_insert_tag expect skb->data pointing to mac header.
- * So change skb->data before calling it and change back to
- * original position later
- */
- __skb_push(skb, offset);
+ if (WARN_ONCE(offset,
+ "skb_vlan_push got skb with skb->data not at mac header (offset %d)\n",
+ offset)) {
+ return -EINVAL;
+ }
+
err = __vlan_insert_tag(skb, skb->vlan_proto,
skb_vlan_tag_get(skb));
- if (err) {
- __skb_pull(skb, offset);
+ if (err)
return err;
- }
skb->protocol = skb->vlan_proto;
skb->mac_len += VLAN_HLEN;
skb_postpush_rcsum(skb, skb->data + (2 * ETH_ALEN), VLAN_HLEN);
- __skb_pull(skb, offset);
}
__vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci);
return 0;
diff --git a/net/core/sock.c b/net/core/sock.c
index 038e660ef844..d8e4532e89e7 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1363,6 +1363,7 @@ static void sk_prot_free(struct proto *prot, struct sock *sk)
slab = prot->slab;
cgroup_sk_free(&sk->sk_cgrp_data);
+ mem_cgroup_sk_free(sk);
security_sk_free(sk);
if (slab != NULL)
kmem_cache_free(slab, sk);
@@ -1399,6 +1400,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
sock_net_set(sk, net);
atomic_set(&sk->sk_wmem_alloc, 1);
+ mem_cgroup_sk_alloc(sk);
cgroup_sk_alloc(&sk->sk_cgrp_data);
sock_update_classid(&sk->sk_cgrp_data);
sock_update_netprioidx(&sk->sk_cgrp_data);
@@ -1545,6 +1547,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
newsk->sk_incoming_cpu = raw_smp_processor_id();
atomic64_set(&newsk->sk_cookie, 0);
+ mem_cgroup_sk_alloc(newsk);
cgroup_sk_alloc(&newsk->sk_cgrp_data);
/*
@@ -1569,9 +1572,6 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
sk_set_socket(newsk, NULL);
newsk->sk_wq = NULL;
- if (mem_cgroup_sockets_enabled && sk->sk_memcg)
- sock_update_memcg(newsk);
-
if (newsk->sk_prot->sockets_allocated)
sk_sockets_allocated_inc(newsk);
@@ -2091,24 +2091,18 @@ int sk_wait_data(struct sock *sk, long *timeo, const struct sk_buff *skb)
EXPORT_SYMBOL(sk_wait_data);
/**
- * __sk_mem_schedule - increase sk_forward_alloc and memory_allocated
+ * __sk_mem_raise_allocated - increase memory_allocated
* @sk: socket
* @size: memory size to allocate
+ * @amt: pages to allocate
* @kind: allocation type
*
- * If kind is SK_MEM_SEND, it means wmem allocation. Otherwise it means
- * rmem allocation. This function assumes that protocols which have
- * memory_pressure use sk_wmem_queued as write buffer accounting.
+ * Similar to __sk_mem_schedule(), but does not update sk_forward_alloc
*/
-int __sk_mem_schedule(struct sock *sk, int size, int kind)
+int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind)
{
struct proto *prot = sk->sk_prot;
- int amt = sk_mem_pages(size);
- long allocated;
-
- sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
-
- allocated = sk_memory_allocated_add(sk, amt);
+ long allocated = sk_memory_allocated_add(sk, amt);
if (mem_cgroup_sockets_enabled && sk->sk_memcg &&
!mem_cgroup_charge_skmem(sk->sk_memcg, amt))
@@ -2169,9 +2163,6 @@ suppress_allocation:
trace_sock_exceed_buf_limit(sk, prot, allocated);
- /* Alas. Undo changes. */
- sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM;
-
sk_memory_allocated_sub(sk, amt);
if (mem_cgroup_sockets_enabled && sk->sk_memcg)
@@ -2179,18 +2170,40 @@ suppress_allocation:
return 0;
}
+EXPORT_SYMBOL(__sk_mem_raise_allocated);
+
+/**
+ * __sk_mem_schedule - increase sk_forward_alloc and memory_allocated
+ * @sk: socket
+ * @size: memory size to allocate
+ * @kind: allocation type
+ *
+ * If kind is SK_MEM_SEND, it means wmem allocation. Otherwise it means
+ * rmem allocation. This function assumes that protocols which have
+ * memory_pressure use sk_wmem_queued as write buffer accounting.
+ */
+int __sk_mem_schedule(struct sock *sk, int size, int kind)
+{
+ int ret, amt = sk_mem_pages(size);
+
+ sk->sk_forward_alloc += amt << SK_MEM_QUANTUM_SHIFT;
+ ret = __sk_mem_raise_allocated(sk, size, amt, kind);
+ if (!ret)
+ sk->sk_forward_alloc -= amt << SK_MEM_QUANTUM_SHIFT;
+ return ret;
+}
EXPORT_SYMBOL(__sk_mem_schedule);
/**
- * __sk_mem_reclaim - reclaim memory_allocated
+ * __sk_mem_reduce_allocated - reclaim memory_allocated
* @sk: socket
- * @amount: number of bytes (rounded down to a SK_MEM_QUANTUM multiple)
+ * @amount: number of quanta
+ *
+ * Similar to __sk_mem_reclaim(), but does not update sk_forward_alloc
*/
-void __sk_mem_reclaim(struct sock *sk, int amount)
+void __sk_mem_reduce_allocated(struct sock *sk, int amount)
{
- amount >>= SK_MEM_QUANTUM_SHIFT;
sk_memory_allocated_sub(sk, amount);
- sk->sk_forward_alloc -= amount << SK_MEM_QUANTUM_SHIFT;
if (mem_cgroup_sockets_enabled && sk->sk_memcg)
mem_cgroup_uncharge_skmem(sk->sk_memcg, amount);
@@ -2199,6 +2212,19 @@ void __sk_mem_reclaim(struct sock *sk, int amount)
(sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)))
sk_leave_memory_pressure(sk);
}
+EXPORT_SYMBOL(__sk_mem_reduce_allocated);
+
+/**
+ * __sk_mem_reclaim - reclaim sk_forward_alloc and memory_allocated
+ * @sk: socket
+ * @amount: number of bytes (rounded down to a SK_MEM_QUANTUM multiple)
+ */
+void __sk_mem_reclaim(struct sock *sk, int amount)
+{
+ amount >>= SK_MEM_QUANTUM_SHIFT;
+ sk->sk_forward_alloc -= amount << SK_MEM_QUANTUM_SHIFT;
+ __sk_mem_reduce_allocated(sk, amount);
+}
EXPORT_SYMBOL(__sk_mem_reclaim);
int sk_set_peek_off(struct sock *sk, int val)
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 6b1282c006b1..d0c7bce88743 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -641,7 +641,8 @@ static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev)
/* ethtool operations *******************************************************/
static int
-dsa_slave_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+dsa_slave_get_link_ksettings(struct net_device *dev,
+ struct ethtool_link_ksettings *cmd)
{
struct dsa_slave_priv *p = netdev_priv(dev);
int err;
@@ -650,19 +651,20 @@ dsa_slave_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
if (p->phy != NULL) {
err = phy_read_status(p->phy);
if (err == 0)
- err = phy_ethtool_gset(p->phy, cmd);
+ err = phy_ethtool_ksettings_get(p->phy, cmd);
}
return err;
}
static int
-dsa_slave_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+dsa_slave_set_link_ksettings(struct net_device *dev,
+ const struct ethtool_link_ksettings *cmd)
{
struct dsa_slave_priv *p = netdev_priv(dev);
if (p->phy != NULL)
- return phy_ethtool_sset(p->phy, cmd);
+ return phy_ethtool_ksettings_set(p->phy, cmd);
return -EOPNOTSUPP;
}
@@ -990,8 +992,6 @@ void dsa_cpu_port_ethtool_init(struct ethtool_ops *ops)
}
static const struct ethtool_ops dsa_slave_ethtool_ops = {
- .get_settings = dsa_slave_get_settings,
- .set_settings = dsa_slave_set_settings,
.get_drvinfo = dsa_slave_get_drvinfo,
.get_regs_len = dsa_slave_get_regs_len,
.get_regs = dsa_slave_get_regs,
@@ -1007,6 +1007,8 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = {
.get_wol = dsa_slave_get_wol,
.set_eee = dsa_slave_set_eee,
.get_eee = dsa_slave_get_eee,
+ .get_link_ksettings = dsa_slave_get_link_ksettings,
+ .set_link_ksettings = dsa_slave_set_link_ksettings,
};
static const struct net_device_ops dsa_slave_netdev_ops = {
@@ -1245,6 +1247,8 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent,
slave_dev->priv_flags |= IFF_NO_QUEUE;
slave_dev->netdev_ops = &dsa_slave_netdev_ops;
slave_dev->switchdev_ops = &dsa_slave_switchdev_ops;
+ slave_dev->min_mtu = 0;
+ slave_dev->max_mtu = ETH_MAX_MTU;
SET_NETDEV_DEVTYPE(slave_dev, &dsa_type);
netdev_for_each_tx_queue(slave_dev, dsa_slave_set_lockdep_class_one,
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 66dff5e3d772..f983c102ebe3 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -322,8 +322,7 @@ EXPORT_SYMBOL(eth_mac_addr);
*/
int eth_change_mtu(struct net_device *dev, int new_mtu)
{
- if (new_mtu < 68 || new_mtu > ETH_DATA_LEN)
- return -EINVAL;
+ netdev_warn(dev, "%s is deprecated\n", __func__);
dev->mtu = new_mtu;
return 0;
}
@@ -357,6 +356,8 @@ void ether_setup(struct net_device *dev)
dev->type = ARPHRD_ETHER;
dev->hard_header_len = ETH_HLEN;
dev->mtu = ETH_DATA_LEN;
+ dev->min_mtu = ETH_MIN_MTU;
+ dev->max_mtu = ETH_DATA_LEN;
dev->addr_len = ETH_ALEN;
dev->tx_queue_len = 1000; /* Ethernet wants good queues */
dev->flags = IFF_BROADCAST|IFF_MULTICAST;
diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index 16737cd8dae8..fc65b145f6e7 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -398,6 +398,7 @@ void hsr_dev_setup(struct net_device *dev)
random_ether_addr(dev->dev_addr);
ether_setup(dev);
+ dev->min_mtu = 0;
dev->header_ops = &hsr_header_ops;
dev->netdev_ops = &hsr_device_ops;
SET_NETDEV_DEVTYPE(dev, &hsr_type);
diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c
index d4d1617f43a8..1ab30e7d3f99 100644
--- a/net/hsr/hsr_netlink.c
+++ b/net/hsr/hsr_netlink.c
@@ -131,13 +131,7 @@ static const struct nla_policy hsr_genl_policy[HSR_A_MAX + 1] = {
[HSR_A_IF2_SEQ] = { .type = NLA_U16 },
};
-static struct genl_family hsr_genl_family = {
- .id = GENL_ID_GENERATE,
- .hdrsize = 0,
- .name = "HSR",
- .version = 1,
- .maxattr = HSR_A_MAX,
-};
+static struct genl_family hsr_genl_family;
static const struct genl_multicast_group hsr_mcgrps[] = {
{ .name = "hsr-network", },
@@ -467,6 +461,18 @@ static const struct genl_ops hsr_ops[] = {
},
};
+static struct genl_family hsr_genl_family __ro_after_init = {
+ .hdrsize = 0,
+ .name = "HSR",
+ .version = 1,
+ .maxattr = HSR_A_MAX,
+ .module = THIS_MODULE,
+ .ops = hsr_ops,
+ .n_ops = ARRAY_SIZE(hsr_ops),
+ .mcgrps = hsr_mcgrps,
+ .n_mcgrps = ARRAY_SIZE(hsr_mcgrps),
+};
+
int __init hsr_netlink_init(void)
{
int rc;
@@ -475,8 +481,7 @@ int __init hsr_netlink_init(void)
if (rc)
goto fail_rtnl_link_register;
- rc = genl_register_family_with_ops_groups(&hsr_genl_family, hsr_ops,
- hsr_mcgrps);
+ rc = genl_register_family(&hsr_genl_family);
if (rc)
goto fail_genl_register_family;
diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c
index c8133c07ceee..6bde9e5a5503 100644
--- a/net/ieee802154/netlink.c
+++ b/net/ieee802154/netlink.c
@@ -28,14 +28,6 @@
static unsigned int ieee802154_seq_num;
static DEFINE_SPINLOCK(ieee802154_seq_lock);
-struct genl_family nl802154_family = {
- .id = GENL_ID_GENERATE,
- .hdrsize = 0,
- .name = IEEE802154_NL_NAME,
- .version = 1,
- .maxattr = IEEE802154_ATTR_MAX,
-};
-
/* Requests to userspace */
struct sk_buff *ieee802154_nl_create(int flags, u8 req)
{
@@ -139,11 +131,21 @@ static const struct genl_multicast_group ieee802154_mcgrps[] = {
[IEEE802154_BEACON_MCGRP] = { .name = IEEE802154_MCAST_BEACON_NAME, },
};
+struct genl_family nl802154_family __ro_after_init = {
+ .hdrsize = 0,
+ .name = IEEE802154_NL_NAME,
+ .version = 1,
+ .maxattr = IEEE802154_ATTR_MAX,
+ .module = THIS_MODULE,
+ .ops = ieee8021154_ops,
+ .n_ops = ARRAY_SIZE(ieee8021154_ops),
+ .mcgrps = ieee802154_mcgrps,
+ .n_mcgrps = ARRAY_SIZE(ieee802154_mcgrps),
+};
+
int __init ieee802154_nl_init(void)
{
- return genl_register_family_with_ops_groups(&nl802154_family,
- ieee8021154_ops,
- ieee802154_mcgrps);
+ return genl_register_family(&nl802154_family);
}
void ieee802154_nl_exit(void)
diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c
index d90a4ed5b8a0..fc60cd061f39 100644
--- a/net/ieee802154/nl802154.c
+++ b/net/ieee802154/nl802154.c
@@ -26,23 +26,8 @@
#include "rdev-ops.h"
#include "core.h"
-static int nl802154_pre_doit(const struct genl_ops *ops, struct sk_buff *skb,
- struct genl_info *info);
-
-static void nl802154_post_doit(const struct genl_ops *ops, struct sk_buff *skb,
- struct genl_info *info);
-
/* the netlink family */
-static struct genl_family nl802154_fam = {
- .id = GENL_ID_GENERATE, /* don't bother with a hardcoded ID */
- .name = NL802154_GENL_NAME, /* have users key off the name instead */
- .hdrsize = 0, /* no private header */
- .version = 1, /* no particular meaning now */
- .maxattr = NL802154_ATTR_MAX,
- .netnsok = true,
- .pre_doit = nl802154_pre_doit,
- .post_doit = nl802154_post_doit,
-};
+static struct genl_family nl802154_fam;
/* multicast groups */
enum nl802154_multicast_groups {
@@ -263,13 +248,14 @@ nl802154_prepare_wpan_dev_dump(struct sk_buff *skb,
if (!cb->args[0]) {
err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl802154_fam.hdrsize,
- nl802154_fam.attrbuf, nl802154_fam.maxattr,
+ genl_family_attrbuf(&nl802154_fam),
+ nl802154_fam.maxattr,
nl802154_policy);
if (err)
goto out_unlock;
*wpan_dev = __cfg802154_wpan_dev_from_attrs(sock_net(skb->sk),
- nl802154_fam.attrbuf);
+ genl_family_attrbuf(&nl802154_fam));
if (IS_ERR(*wpan_dev)) {
err = PTR_ERR(*wpan_dev);
goto out_unlock;
@@ -575,7 +561,7 @@ static int nl802154_dump_wpan_phy_parse(struct sk_buff *skb,
struct netlink_callback *cb,
struct nl802154_dump_wpan_phy_state *state)
{
- struct nlattr **tb = nl802154_fam.attrbuf;
+ struct nlattr **tb = genl_family_attrbuf(&nl802154_fam);
int ret = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl802154_fam.hdrsize,
tb, nl802154_fam.maxattr, nl802154_policy);
@@ -2476,11 +2462,25 @@ static const struct genl_ops nl802154_ops[] = {
#endif /* CONFIG_IEEE802154_NL802154_EXPERIMENTAL */
};
+static struct genl_family nl802154_fam __ro_after_init = {
+ .name = NL802154_GENL_NAME, /* have users key off the name instead */
+ .hdrsize = 0, /* no private header */
+ .version = 1, /* no particular meaning now */
+ .maxattr = NL802154_ATTR_MAX,
+ .netnsok = true,
+ .pre_doit = nl802154_pre_doit,
+ .post_doit = nl802154_post_doit,
+ .module = THIS_MODULE,
+ .ops = nl802154_ops,
+ .n_ops = ARRAY_SIZE(nl802154_ops),
+ .mcgrps = nl802154_mcgrps,
+ .n_mcgrps = ARRAY_SIZE(nl802154_mcgrps),
+};
+
/* initialisation/exit functions */
-int nl802154_init(void)
+int __init nl802154_init(void)
{
- return genl_register_family_with_ops_groups(&nl802154_fam, nl802154_ops,
- nl802154_mcgrps);
+ return genl_register_family(&nl802154_fam);
}
void nl802154_exit(void)
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 300b06888fdf..28e051a8e847 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -430,6 +430,14 @@ config INET_UDP_DIAG
Support for UDP socket monitoring interface used by the ss tool.
If unsure, say Y.
+config INET_RAW_DIAG
+ tristate "RAW: socket monitoring interface"
+ depends on INET_DIAG && (IPV6 || IPV6=n)
+ default n
+ ---help---
+ Support for RAW socket monitoring interface used by the ss tool.
+ If unsure, say Y.
+
config INET_DIAG_DESTROY
bool "INET: allow privileged process to administratively close sockets"
depends on INET_DIAG
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index bc6a6c8b9bcd..48af58a5686e 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -40,6 +40,7 @@ obj-$(CONFIG_NETFILTER) += netfilter.o netfilter/
obj-$(CONFIG_INET_DIAG) += inet_diag.o
obj-$(CONFIG_INET_TCP_DIAG) += tcp_diag.o
obj-$(CONFIG_INET_UDP_DIAG) += udp_diag.o
+obj-$(CONFIG_INET_RAW_DIAG) += raw_diag.o
obj-$(CONFIG_NET_TCPPROBE) += tcp_probe.o
obj-$(CONFIG_TCP_CONG_BBR) += tcp_bbr.o
obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index cf50f7e2b012..6cb57bb8692d 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -622,14 +622,7 @@ static int fou_destroy(struct net *net, struct fou_cfg *cfg)
return err;
}
-static struct genl_family fou_nl_family = {
- .id = GENL_ID_GENERATE,
- .hdrsize = 0,
- .name = FOU_GENL_NAME,
- .version = FOU_GENL_VERSION,
- .maxattr = FOU_ATTR_MAX,
- .netnsok = true,
-};
+static struct genl_family fou_nl_family;
static const struct nla_policy fou_nl_policy[FOU_ATTR_MAX + 1] = {
[FOU_ATTR_PORT] = { .type = NLA_U16, },
@@ -831,6 +824,17 @@ static const struct genl_ops fou_nl_ops[] = {
},
};
+static struct genl_family fou_nl_family __ro_after_init = {
+ .hdrsize = 0,
+ .name = FOU_GENL_NAME,
+ .version = FOU_GENL_VERSION,
+ .maxattr = FOU_ATTR_MAX,
+ .netnsok = true,
+ .module = THIS_MODULE,
+ .ops = fou_nl_ops,
+ .n_ops = ARRAY_SIZE(fou_nl_ops),
+};
+
size_t fou_encap_hlen(struct ip_tunnel_encap *e)
{
return sizeof(struct udphdr);
@@ -1086,8 +1090,7 @@ static int __init fou_init(void)
if (ret)
goto exit;
- ret = genl_register_family_with_ops(&fou_nl_family,
- fou_nl_ops);
+ ret = genl_register_family(&fou_nl_family);
if (ret < 0)
goto unregister;
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index e4d16fc5bbb3..3b34024202d8 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -200,6 +200,15 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
if (sock_diag_put_meminfo(sk, skb, INET_DIAG_SKMEMINFO))
goto errout;
+ /*
+ * RAW sockets might have user-defined protocols assigned,
+ * so report the one supplied on socket creation.
+ */
+ if (sk->sk_type == SOCK_RAW) {
+ if (nla_put_u8(skb, INET_DIAG_PROTOCOL, sk->sk_protocol))
+ goto errout;
+ }
+
if (!icsk) {
handler->idiag_get_info(sk, r, NULL);
goto out;
@@ -863,7 +872,7 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb,
s_num = num = cb->args[2];
if (cb->args[0] == 0) {
- if (!(idiag_states & TCPF_LISTEN))
+ if (!(idiag_states & TCPF_LISTEN) || r->id.idiag_dport)
goto skip_listen_ht;
for (i = s_i; i < INET_LHTABLE_SIZE; i++) {
@@ -872,7 +881,7 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb,
num = 0;
ilb = &hashinfo->listening_hash[i];
- spin_lock_bh(&ilb->lock);
+ spin_lock(&ilb->lock);
sk_for_each(sk, &ilb->head) {
struct inet_sock *inet = inet_sk(sk);
@@ -892,26 +901,18 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb,
r->id.idiag_sport)
goto next_listen;
- if (r->id.idiag_dport ||
- cb->args[3] > 0)
- goto next_listen;
-
if (inet_csk_diag_dump(sk, skb, cb, r,
bc, net_admin) < 0) {
- spin_unlock_bh(&ilb->lock);
+ spin_unlock(&ilb->lock);
goto done;
}
next_listen:
- cb->args[3] = 0;
- cb->args[4] = 0;
++num;
}
- spin_unlock_bh(&ilb->lock);
+ spin_unlock(&ilb->lock);
s_num = 0;
- cb->args[3] = 0;
- cb->args[4] = 0;
}
skip_listen_ht:
cb->args[0] = 1;
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 5719d6ba0824..12a92e3349ed 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -358,6 +358,7 @@ static struct ip_tunnel *ip_tunnel_create(struct net *net,
{
struct ip_tunnel *nt;
struct net_device *dev;
+ int t_hlen;
BUG_ON(!itn->fb_tunnel_dev);
dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
@@ -367,6 +368,9 @@ static struct ip_tunnel *ip_tunnel_create(struct net *net,
dev->mtu = ip_tunnel_bind_dev(dev);
nt = netdev_priv(dev);
+ t_hlen = nt->hlen + sizeof(struct iphdr);
+ dev->min_mtu = ETH_MIN_MTU;
+ dev->max_mtu = 0xFFF8 - dev->hard_header_len - t_hlen;
ip_tunnel_add(itn, nt);
return nt;
}
@@ -929,7 +933,7 @@ int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
int t_hlen = tunnel->hlen + sizeof(struct iphdr);
int max_mtu = 0xFFF8 - dev->hard_header_len - t_hlen;
- if (new_mtu < 68)
+ if (new_mtu < ETH_MIN_MTU)
return -EINVAL;
if (new_mtu > max_mtu) {
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index a87bcd2d4a94..5f006e13de56 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -2123,7 +2123,7 @@ static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
int ipmr_get_route(struct net *net, struct sk_buff *skb,
__be32 saddr, __be32 daddr,
- struct rtmsg *rtm, int nowait)
+ struct rtmsg *rtm, int nowait, u32 portid)
{
struct mfc_cache *cache;
struct mr_table *mrt;
@@ -2168,6 +2168,7 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb,
return -ENOMEM;
}
+ NETLINK_CB(skb2).portid = portid;
skb_push(skb2, sizeof(struct iphdr));
skb_reset_network_header(skb2);
iph = ip_hdr(skb2);
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 66ddcb60519a..7cf7d6e380c2 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -258,7 +258,7 @@ int ping_init_sock(struct sock *sk)
struct net *net = sock_net(sk);
kgid_t group = current_egid();
struct group_info *group_info;
- int i, j, count;
+ int i;
kgid_t low, high;
int ret = 0;
@@ -270,16 +270,11 @@ int ping_init_sock(struct sock *sk)
return 0;
group_info = get_current_groups();
- count = group_info->ngroups;
- for (i = 0; i < group_info->nblocks; i++) {
- int cp_count = min_t(int, NGROUPS_PER_BLOCK, count);
- for (j = 0; j < cp_count; j++) {
- kgid_t gid = group_info->blocks[i][j];
- if (gid_lte(low, gid) && gid_lte(gid, high))
- goto out_release_group;
- }
+ for (i = 0; i < group_info->ngroups; i++) {
+ kgid_t gid = group_info->gid[i];
- count -= cp_count;
+ if (gid_lte(low, gid) && gid_lte(gid, high))
+ goto out_release_group;
}
ret = -EACCES;
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 1ed015e4bc79..7143ca1a6af9 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -46,6 +46,8 @@
#include <net/sock.h>
#include <net/raw.h>
+#define TCPUDP_MIB_MAX max_t(u32, UDP_MIB_MAX, TCP_MIB_MAX)
+
/*
* Report socket allocation statistics [mea@utu.fi]
*/
@@ -356,22 +358,22 @@ static void icmp_put(struct seq_file *seq)
atomic_long_t *ptr = net->mib.icmpmsg_statistics->mibs;
seq_puts(seq, "\nIcmp: InMsgs InErrors InCsumErrors");
- for (i = 0; icmpmibmap[i].name != NULL; i++)
+ for (i = 0; icmpmibmap[i].name; i++)
seq_printf(seq, " In%s", icmpmibmap[i].name);
seq_puts(seq, " OutMsgs OutErrors");
- for (i = 0; icmpmibmap[i].name != NULL; i++)
+ for (i = 0; icmpmibmap[i].name; i++)
seq_printf(seq, " Out%s", icmpmibmap[i].name);
seq_printf(seq, "\nIcmp: %lu %lu %lu",
snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_INMSGS),
snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_INERRORS),
snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_CSUMERRORS));
- for (i = 0; icmpmibmap[i].name != NULL; i++)
+ for (i = 0; icmpmibmap[i].name; i++)
seq_printf(seq, " %lu",
atomic_long_read(ptr + icmpmibmap[i].index));
seq_printf(seq, " %lu %lu",
snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_OUTMSGS),
snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_OUTERRORS));
- for (i = 0; icmpmibmap[i].name != NULL; i++)
+ for (i = 0; icmpmibmap[i].name; i++)
seq_printf(seq, " %lu",
atomic_long_read(ptr + (icmpmibmap[i].index | 0x100)));
}
@@ -379,14 +381,16 @@ static void icmp_put(struct seq_file *seq)
/*
* Called from the PROCfs module. This outputs /proc/net/snmp.
*/
-static int snmp_seq_show(struct seq_file *seq, void *v)
+static int snmp_seq_show_ipstats(struct seq_file *seq, void *v)
{
- int i;
struct net *net = seq->private;
+ u64 buff64[IPSTATS_MIB_MAX];
+ int i;
- seq_puts(seq, "Ip: Forwarding DefaultTTL");
+ memset(buff64, 0, IPSTATS_MIB_MAX * sizeof(u64));
- for (i = 0; snmp4_ipstats_list[i].name != NULL; i++)
+ seq_puts(seq, "Ip: Forwarding DefaultTTL");
+ for (i = 0; snmp4_ipstats_list[i].name; i++)
seq_printf(seq, " %s", snmp4_ipstats_list[i].name);
seq_printf(seq, "\nIp: %d %d",
@@ -394,57 +398,77 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
net->ipv4.sysctl_ip_default_ttl);
BUILD_BUG_ON(offsetof(struct ipstats_mib, mibs) != 0);
- for (i = 0; snmp4_ipstats_list[i].name != NULL; i++)
- seq_printf(seq, " %llu",
- snmp_fold_field64(net->mib.ip_statistics,
- snmp4_ipstats_list[i].entry,
- offsetof(struct ipstats_mib, syncp)));
+ snmp_get_cpu_field64_batch(buff64, snmp4_ipstats_list,
+ net->mib.ip_statistics,
+ offsetof(struct ipstats_mib, syncp));
+ for (i = 0; snmp4_ipstats_list[i].name; i++)
+ seq_printf(seq, " %llu", buff64[i]);
- icmp_put(seq); /* RFC 2011 compatibility */
- icmpmsg_put(seq);
+ return 0;
+}
+
+static int snmp_seq_show_tcp_udp(struct seq_file *seq, void *v)
+{
+ unsigned long buff[TCPUDP_MIB_MAX];
+ struct net *net = seq->private;
+ int i;
+
+ memset(buff, 0, TCPUDP_MIB_MAX * sizeof(unsigned long));
seq_puts(seq, "\nTcp:");
- for (i = 0; snmp4_tcp_list[i].name != NULL; i++)
+ for (i = 0; snmp4_tcp_list[i].name; i++)
seq_printf(seq, " %s", snmp4_tcp_list[i].name);
seq_puts(seq, "\nTcp:");
- for (i = 0; snmp4_tcp_list[i].name != NULL; i++) {
+ snmp_get_cpu_field_batch(buff, snmp4_tcp_list,
+ net->mib.tcp_statistics);
+ for (i = 0; snmp4_tcp_list[i].name; i++) {
/* MaxConn field is signed, RFC 2012 */
if (snmp4_tcp_list[i].entry == TCP_MIB_MAXCONN)
- seq_printf(seq, " %ld",
- snmp_fold_field(net->mib.tcp_statistics,
- snmp4_tcp_list[i].entry));
+ seq_printf(seq, " %ld", buff[i]);
else
- seq_printf(seq, " %lu",
- snmp_fold_field(net->mib.tcp_statistics,
- snmp4_tcp_list[i].entry));
+ seq_printf(seq, " %lu", buff[i]);
}
+ memset(buff, 0, TCPUDP_MIB_MAX * sizeof(unsigned long));
+
+ snmp_get_cpu_field_batch(buff, snmp4_udp_list,
+ net->mib.udp_statistics);
seq_puts(seq, "\nUdp:");
- for (i = 0; snmp4_udp_list[i].name != NULL; i++)
+ for (i = 0; snmp4_udp_list[i].name; i++)
seq_printf(seq, " %s", snmp4_udp_list[i].name);
-
seq_puts(seq, "\nUdp:");
- for (i = 0; snmp4_udp_list[i].name != NULL; i++)
- seq_printf(seq, " %lu",
- snmp_fold_field(net->mib.udp_statistics,
- snmp4_udp_list[i].entry));
+ for (i = 0; snmp4_udp_list[i].name; i++)
+ seq_printf(seq, " %lu", buff[i]);
+
+ memset(buff, 0, TCPUDP_MIB_MAX * sizeof(unsigned long));
/* the UDP and UDP-Lite MIBs are the same */
seq_puts(seq, "\nUdpLite:");
- for (i = 0; snmp4_udp_list[i].name != NULL; i++)
+ snmp_get_cpu_field_batch(buff, snmp4_udp_list,
+ net->mib.udplite_statistics);
+ for (i = 0; snmp4_udp_list[i].name; i++)
seq_printf(seq, " %s", snmp4_udp_list[i].name);
-
seq_puts(seq, "\nUdpLite:");
- for (i = 0; snmp4_udp_list[i].name != NULL; i++)
- seq_printf(seq, " %lu",
- snmp_fold_field(net->mib.udplite_statistics,
- snmp4_udp_list[i].entry));
+ for (i = 0; snmp4_udp_list[i].name; i++)
+ seq_printf(seq, " %lu", buff[i]);
seq_putc(seq, '\n');
return 0;
}
+static int snmp_seq_show(struct seq_file *seq, void *v)
+{
+ snmp_seq_show_ipstats(seq, v);
+
+ icmp_put(seq); /* RFC 2011 compatibility */
+ icmpmsg_put(seq);
+
+ snmp_seq_show_tcp_udp(seq, v);
+
+ return 0;
+}
+
static int snmp_seq_open(struct inode *inode, struct file *file)
{
return single_open_net(inode, file, snmp_seq_show);
@@ -469,21 +493,21 @@ static int netstat_seq_show(struct seq_file *seq, void *v)
struct net *net = seq->private;
seq_puts(seq, "TcpExt:");
- for (i = 0; snmp4_net_list[i].name != NULL; i++)
+ for (i = 0; snmp4_net_list[i].name; i++)
seq_printf(seq, " %s", snmp4_net_list[i].name);
seq_puts(seq, "\nTcpExt:");
- for (i = 0; snmp4_net_list[i].name != NULL; i++)
+ for (i = 0; snmp4_net_list[i].name; i++)
seq_printf(seq, " %lu",
snmp_fold_field(net->mib.net_statistics,
snmp4_net_list[i].entry));
seq_puts(seq, "\nIpExt:");
- for (i = 0; snmp4_ipextstats_list[i].name != NULL; i++)
+ for (i = 0; snmp4_ipextstats_list[i].name; i++)
seq_printf(seq, " %s", snmp4_ipextstats_list[i].name);
seq_puts(seq, "\nIpExt:");
- for (i = 0; snmp4_ipextstats_list[i].name != NULL; i++)
+ for (i = 0; snmp4_ipextstats_list[i].name; i++)
seq_printf(seq, " %llu",
snmp_fold_field64(net->mib.ip_statistics,
snmp4_ipextstats_list[i].entry,
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 90a85c955872..03618ed03532 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -89,9 +89,10 @@ struct raw_frag_vec {
int hlen;
};
-static struct raw_hashinfo raw_v4_hashinfo = {
+struct raw_hashinfo raw_v4_hashinfo = {
.lock = __RW_LOCK_UNLOCKED(raw_v4_hashinfo.lock),
};
+EXPORT_SYMBOL_GPL(raw_v4_hashinfo);
int raw_hash_sk(struct sock *sk)
{
@@ -120,7 +121,7 @@ void raw_unhash_sk(struct sock *sk)
}
EXPORT_SYMBOL_GPL(raw_unhash_sk);
-static struct sock *__raw_v4_lookup(struct net *net, struct sock *sk,
+struct sock *__raw_v4_lookup(struct net *net, struct sock *sk,
unsigned short num, __be32 raddr, __be32 laddr, int dif)
{
sk_for_each_from(sk) {
@@ -136,6 +137,7 @@ static struct sock *__raw_v4_lookup(struct net *net, struct sock *sk,
found:
return sk;
}
+EXPORT_SYMBOL_GPL(__raw_v4_lookup);
/*
* 0 - deliver
@@ -912,6 +914,20 @@ static int compat_raw_ioctl(struct sock *sk, unsigned int cmd, unsigned long arg
}
#endif
+int raw_abort(struct sock *sk, int err)
+{
+ lock_sock(sk);
+
+ sk->sk_err = err;
+ sk->sk_error_report(sk);
+ udp_disconnect(sk, 0);
+
+ release_sock(sk);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(raw_abort);
+
struct proto raw_prot = {
.name = "RAW",
.owner = THIS_MODULE,
@@ -937,6 +953,7 @@ struct proto raw_prot = {
.compat_getsockopt = compat_raw_getsockopt,
.compat_ioctl = compat_raw_ioctl,
#endif
+ .diag_destroy = raw_abort,
};
#ifdef CONFIG_PROC_FS
diff --git a/net/ipv4/raw_diag.c b/net/ipv4/raw_diag.c
new file mode 100644
index 000000000000..ef3bea061b75
--- /dev/null
+++ b/net/ipv4/raw_diag.c
@@ -0,0 +1,261 @@
+#include <linux/module.h>
+
+#include <linux/inet_diag.h>
+#include <linux/sock_diag.h>
+
+#include <net/raw.h>
+#include <net/rawv6.h>
+
+#ifdef pr_fmt
+# undef pr_fmt
+#endif
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+static struct raw_hashinfo *
+raw_get_hashinfo(const struct inet_diag_req_v2 *r)
+{
+ if (r->sdiag_family == AF_INET) {
+ return &raw_v4_hashinfo;
+#if IS_ENABLED(CONFIG_IPV6)
+ } else if (r->sdiag_family == AF_INET6) {
+ return &raw_v6_hashinfo;
+#endif
+ } else {
+ pr_warn_once("Unexpected inet family %d\n",
+ r->sdiag_family);
+ WARN_ON_ONCE(1);
+ return ERR_PTR(-EINVAL);
+ }
+}
+
+/*
+ * Due to requirement of not breaking user API we can't simply
+ * rename @pad field in inet_diag_req_v2 structure, instead
+ * use helper to figure it out.
+ */
+
+static struct sock *raw_lookup(struct net *net, struct sock *from,
+ const struct inet_diag_req_v2 *req)
+{
+ struct inet_diag_req_raw *r = (void *)req;
+ struct sock *sk = NULL;
+
+ if (r->sdiag_family == AF_INET)
+ sk = __raw_v4_lookup(net, from, r->sdiag_raw_protocol,
+ r->id.idiag_dst[0],
+ r->id.idiag_src[0],
+ r->id.idiag_if);
+#if IS_ENABLED(CONFIG_IPV6)
+ else
+ sk = __raw_v6_lookup(net, from, r->sdiag_raw_protocol,
+ (const struct in6_addr *)r->id.idiag_src,
+ (const struct in6_addr *)r->id.idiag_dst,
+ r->id.idiag_if);
+#endif
+ return sk;
+}
+
+static struct sock *raw_sock_get(struct net *net, const struct inet_diag_req_v2 *r)
+{
+ struct raw_hashinfo *hashinfo = raw_get_hashinfo(r);
+ struct sock *sk = NULL, *s;
+ int slot;
+
+ if (IS_ERR(hashinfo))
+ return ERR_CAST(hashinfo);
+
+ read_lock(&hashinfo->lock);
+ for (slot = 0; slot < RAW_HTABLE_SIZE; slot++) {
+ sk_for_each(s, &hashinfo->ht[slot]) {
+ sk = raw_lookup(net, s, r);
+ if (sk) {
+ /*
+ * Grab it and keep until we fill
+ * diag meaage to be reported, so
+ * caller should call sock_put then.
+ * We can do that because we're keeping
+ * hashinfo->lock here.
+ */
+ sock_hold(sk);
+ break;
+ }
+ }
+ }
+ read_unlock(&hashinfo->lock);
+
+ return sk ? sk : ERR_PTR(-ENOENT);
+}
+
+static int raw_diag_dump_one(struct sk_buff *in_skb,
+ const struct nlmsghdr *nlh,
+ const struct inet_diag_req_v2 *r)
+{
+ struct net *net = sock_net(in_skb->sk);
+ struct sk_buff *rep;
+ struct sock *sk;
+ int err;
+
+ sk = raw_sock_get(net, r);
+ if (IS_ERR(sk))
+ return PTR_ERR(sk);
+
+ rep = nlmsg_new(sizeof(struct inet_diag_msg) +
+ sizeof(struct inet_diag_meminfo) + 64,
+ GFP_KERNEL);
+ if (!rep) {
+ sock_put(sk);
+ return -ENOMEM;
+ }
+
+ err = inet_sk_diag_fill(sk, NULL, rep, r,
+ sk_user_ns(NETLINK_CB(in_skb).sk),
+ NETLINK_CB(in_skb).portid,
+ nlh->nlmsg_seq, 0, nlh,
+ netlink_net_capable(in_skb, CAP_NET_ADMIN));
+ sock_put(sk);
+
+ if (err < 0) {
+ kfree_skb(rep);
+ return err;
+ }
+
+ err = netlink_unicast(net->diag_nlsk, rep,
+ NETLINK_CB(in_skb).portid,
+ MSG_DONTWAIT);
+ if (err > 0)
+ err = 0;
+ return err;
+}
+
+static int sk_diag_dump(struct sock *sk, struct sk_buff *skb,
+ struct netlink_callback *cb,
+ const struct inet_diag_req_v2 *r,
+ struct nlattr *bc, bool net_admin)
+{
+ if (!inet_diag_bc_sk(bc, sk))
+ return 0;
+
+ return inet_sk_diag_fill(sk, NULL, skb, r,
+ sk_user_ns(NETLINK_CB(cb->skb).sk),
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, NLM_F_MULTI,
+ cb->nlh, net_admin);
+}
+
+static void raw_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
+ const struct inet_diag_req_v2 *r, struct nlattr *bc)
+{
+ bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN);
+ struct raw_hashinfo *hashinfo = raw_get_hashinfo(r);
+ struct net *net = sock_net(skb->sk);
+ int num, s_num, slot, s_slot;
+ struct sock *sk = NULL;
+
+ if (IS_ERR(hashinfo))
+ return;
+
+ s_slot = cb->args[0];
+ num = s_num = cb->args[1];
+
+ read_lock(&hashinfo->lock);
+ for (slot = s_slot; slot < RAW_HTABLE_SIZE; s_num = 0, slot++) {
+ num = 0;
+
+ sk_for_each(sk, &hashinfo->ht[slot]) {
+ struct inet_sock *inet = inet_sk(sk);
+
+ if (!net_eq(sock_net(sk), net))
+ continue;
+ if (num < s_num)
+ goto next;
+ if (sk->sk_family != r->sdiag_family)
+ goto next;
+ if (r->id.idiag_sport != inet->inet_sport &&
+ r->id.idiag_sport)
+ goto next;
+ if (r->id.idiag_dport != inet->inet_dport &&
+ r->id.idiag_dport)
+ goto next;
+ if (sk_diag_dump(sk, skb, cb, r, bc, net_admin) < 0)
+ goto out_unlock;
+next:
+ num++;
+ }
+ }
+
+out_unlock:
+ read_unlock(&hashinfo->lock);
+
+ cb->args[0] = slot;
+ cb->args[1] = num;
+}
+
+static void raw_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
+ void *info)
+{
+ r->idiag_rqueue = sk_rmem_alloc_get(sk);
+ r->idiag_wqueue = sk_wmem_alloc_get(sk);
+}
+
+#ifdef CONFIG_INET_DIAG_DESTROY
+static int raw_diag_destroy(struct sk_buff *in_skb,
+ const struct inet_diag_req_v2 *r)
+{
+ struct net *net = sock_net(in_skb->sk);
+ struct sock *sk;
+
+ sk = raw_sock_get(net, r);
+ if (IS_ERR(sk))
+ return PTR_ERR(sk);
+ return sock_diag_destroy(sk, ECONNABORTED);
+}
+#endif
+
+static const struct inet_diag_handler raw_diag_handler = {
+ .dump = raw_diag_dump,
+ .dump_one = raw_diag_dump_one,
+ .idiag_get_info = raw_diag_get_info,
+ .idiag_type = IPPROTO_RAW,
+ .idiag_info_size = 0,
+#ifdef CONFIG_INET_DIAG_DESTROY
+ .destroy = raw_diag_destroy,
+#endif
+};
+
+static void __always_unused __check_inet_diag_req_raw(void)
+{
+ /*
+ * Make sure the two structures are identical,
+ * except the @pad field.
+ */
+#define __offset_mismatch(m1, m2) \
+ (offsetof(struct inet_diag_req_v2, m1) != \
+ offsetof(struct inet_diag_req_raw, m2))
+
+ BUILD_BUG_ON(sizeof(struct inet_diag_req_v2) !=
+ sizeof(struct inet_diag_req_raw));
+ BUILD_BUG_ON(__offset_mismatch(sdiag_family, sdiag_family));
+ BUILD_BUG_ON(__offset_mismatch(sdiag_protocol, sdiag_protocol));
+ BUILD_BUG_ON(__offset_mismatch(idiag_ext, idiag_ext));
+ BUILD_BUG_ON(__offset_mismatch(pad, sdiag_raw_protocol));
+ BUILD_BUG_ON(__offset_mismatch(idiag_states, idiag_states));
+ BUILD_BUG_ON(__offset_mismatch(id, id));
+#undef __offset_mismatch
+}
+
+static int __init raw_diag_init(void)
+{
+ return inet_diag_register(&raw_diag_handler);
+}
+
+static void __exit raw_diag_exit(void)
+{
+ inet_diag_unregister(&raw_diag_handler);
+}
+
+module_init(raw_diag_init);
+module_exit(raw_diag_exit);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-255 /* AF_INET - IPPROTO_RAW */);
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 10-255 /* AF_INET6 - IPPROTO_RAW */);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 654a9af20136..62d4d90c1389 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2265,7 +2265,8 @@ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
if (err) {
res.fi = NULL;
res.table = NULL;
- if (fl4->flowi4_oif) {
+ if (fl4->flowi4_oif &&
+ !netif_index_is_l3_master(net, fl4->flowi4_oif)) {
/* Apparently, routing tables are wrong. Assume,
that the destination is on link.
@@ -2500,7 +2501,8 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, u32 table_id,
IPV4_DEVCONF_ALL(net, MC_FORWARDING)) {
int err = ipmr_get_route(net, skb,
fl4->saddr, fl4->daddr,
- r, nowait);
+ r, nowait, portid);
+
if (err <= 0) {
if (!nowait) {
if (err == 0)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index f253e5019d22..3251fe71f39f 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -424,8 +424,6 @@ void tcp_init_sock(struct sock *sk)
sk->sk_rcvbuf = sysctl_tcp_rmem[1];
local_bh_disable();
- if (mem_cgroup_sockets_enabled)
- sock_update_memcg(sk);
sk_sockets_allocated_inc(sk);
local_bh_enable();
}
@@ -691,8 +689,7 @@ static int tcp_splice_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb,
int ret;
ret = skb_splice_bits(skb, skb->sk, offset, tss->pipe,
- min(rd_desc->count, len), tss->flags,
- skb_socket_splice);
+ min(rd_desc->count, len), tss->flags);
if (ret > 0)
rd_desc->count -= ret;
return ret;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 8c6ad2d319d6..a27b9c0e27c0 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2362,10 +2362,9 @@ static void DBGUNDO(struct sock *sk, const char *msg)
}
#if IS_ENABLED(CONFIG_IPV6)
else if (sk->sk_family == AF_INET6) {
- struct ipv6_pinfo *np = inet6_sk(sk);
pr_debug("Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n",
msg,
- &np->daddr, ntohs(inet->inet_dport),
+ &sk->sk_v6_daddr, ntohs(inet->inet_dport),
tp->snd_cwnd, tcp_left_out(tp),
tp->snd_ssthresh, tp->prior_ssthresh,
tp->packets_out);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 7ac37c314312..83b3d0b8c481 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1871,9 +1871,6 @@ void tcp_v4_destroy_sock(struct sock *sk)
local_bh_disable();
sk_sockets_allocated_dec(sk);
local_bh_enable();
-
- if (mem_cgroup_sockets_enabled && sk->sk_memcg)
- sock_release_memcg(sk);
}
EXPORT_SYMBOL(tcp_v4_destroy_sock);
@@ -1896,7 +1893,7 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
if (!sk) {
get_head:
ilb = &tcp_hashinfo.listening_hash[st->bucket];
- spin_lock_bh(&ilb->lock);
+ spin_lock(&ilb->lock);
sk = sk_head(&ilb->head);
st->offset = 0;
goto get_sk;
@@ -1914,7 +1911,7 @@ get_sk:
return sk;
icsk = inet_csk(sk);
}
- spin_unlock_bh(&ilb->lock);
+ spin_unlock(&ilb->lock);
st->offset = 0;
if (++st->bucket < INET_LHTABLE_SIZE)
goto get_head;
@@ -2122,7 +2119,7 @@ static void tcp_seq_stop(struct seq_file *seq, void *v)
switch (st->state) {
case TCP_SEQ_STATE_LISTENING:
if (v != SEQ_START_TOKEN)
- spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
+ spin_unlock(&tcp_hashinfo.listening_hash[st->bucket].lock);
break;
case TCP_SEQ_STATE_ESTABLISHED:
if (v)
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index bf1f3b2b29d1..d46f4d5b1c62 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -742,14 +742,7 @@ void tcp_fastopen_cache_set(struct sock *sk, u16 mss,
rcu_read_unlock();
}
-static struct genl_family tcp_metrics_nl_family = {
- .id = GENL_ID_GENERATE,
- .hdrsize = 0,
- .name = TCP_METRICS_GENL_NAME,
- .version = TCP_METRICS_GENL_VERSION,
- .maxattr = TCP_METRICS_ATTR_MAX,
- .netnsok = true,
-};
+static struct genl_family tcp_metrics_nl_family;
static const struct nla_policy tcp_metrics_nl_policy[TCP_METRICS_ATTR_MAX + 1] = {
[TCP_METRICS_ATTR_ADDR_IPV4] = { .type = NLA_U32, },
@@ -1116,6 +1109,17 @@ static const struct genl_ops tcp_metrics_nl_ops[] = {
},
};
+static struct genl_family tcp_metrics_nl_family __ro_after_init = {
+ .hdrsize = 0,
+ .name = TCP_METRICS_GENL_NAME,
+ .version = TCP_METRICS_GENL_VERSION,
+ .maxattr = TCP_METRICS_ATTR_MAX,
+ .netnsok = true,
+ .module = THIS_MODULE,
+ .ops = tcp_metrics_nl_ops,
+ .n_ops = ARRAY_SIZE(tcp_metrics_nl_ops),
+};
+
static unsigned int tcpmhash_entries;
static int __init set_tcpmhash_entries(char *str)
{
@@ -1179,8 +1183,7 @@ void __init tcp_metrics_init(void)
if (ret < 0)
panic("Could not allocate the tcp_metrics hash table\n");
- ret = genl_register_family_with_ops(&tcp_metrics_nl_family,
- tcp_metrics_nl_ops);
+ ret = genl_register_family(&tcp_metrics_nl_family);
if (ret < 0)
panic("Could not register tcp_metrics generic netlink\n");
}
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 7c777089a4d6..896e9dfbdb5c 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1992,12 +1992,14 @@ static int tcp_mtu_probe(struct sock *sk)
len = 0;
tcp_for_write_queue_from_safe(skb, next, sk) {
copy = min_t(int, skb->len, probe_size - len);
- if (nskb->ip_summed)
+ if (nskb->ip_summed) {
skb_copy_bits(skb, 0, skb_put(nskb, copy), copy);
- else
- nskb->csum = skb_copy_and_csum_bits(skb, 0,
- skb_put(nskb, copy),
- copy, nskb->csum);
+ } else {
+ __wsum csum = skb_copy_and_csum_bits(skb, 0,
+ skb_put(nskb, copy),
+ copy, 0);
+ nskb->csum = csum_block_add(nskb->csum, csum, len);
+ }
if (skb->len <= copy) {
/* We've eaten all the data from this skb.
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 7d96dc2d3d08..c8332715ee2d 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1172,6 +1172,112 @@ out:
return ret;
}
+static void udp_rmem_release(struct sock *sk, int size, int partial)
+{
+ int amt;
+
+ atomic_sub(size, &sk->sk_rmem_alloc);
+
+ spin_lock_bh(&sk->sk_receive_queue.lock);
+ sk->sk_forward_alloc += size;
+ amt = (sk->sk_forward_alloc - partial) & ~(SK_MEM_QUANTUM - 1);
+ sk->sk_forward_alloc -= amt;
+ spin_unlock_bh(&sk->sk_receive_queue.lock);
+
+ if (amt)
+ __sk_mem_reduce_allocated(sk, amt >> SK_MEM_QUANTUM_SHIFT);
+}
+
+static void udp_rmem_free(struct sk_buff *skb)
+{
+ udp_rmem_release(skb->sk, skb->truesize, 1);
+}
+
+int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb)
+{
+ struct sk_buff_head *list = &sk->sk_receive_queue;
+ int rmem, delta, amt, err = -ENOMEM;
+ int size = skb->truesize;
+
+ /* try to avoid the costly atomic add/sub pair when the receive
+ * queue is full; always allow at least a packet
+ */
+ rmem = atomic_read(&sk->sk_rmem_alloc);
+ if (rmem && (rmem + size > sk->sk_rcvbuf))
+ goto drop;
+
+ /* we drop only if the receive buf is full and the receive
+ * queue contains some other skb
+ */
+ rmem = atomic_add_return(size, &sk->sk_rmem_alloc);
+ if ((rmem > sk->sk_rcvbuf) && (rmem > size))
+ goto uncharge_drop;
+
+ spin_lock(&list->lock);
+ if (size >= sk->sk_forward_alloc) {
+ amt = sk_mem_pages(size);
+ delta = amt << SK_MEM_QUANTUM_SHIFT;
+ if (!__sk_mem_raise_allocated(sk, delta, amt, SK_MEM_RECV)) {
+ err = -ENOBUFS;
+ spin_unlock(&list->lock);
+ goto uncharge_drop;
+ }
+
+ sk->sk_forward_alloc += delta;
+ }
+
+ sk->sk_forward_alloc -= size;
+
+ /* the skb owner in now the udp socket */
+ skb->sk = sk;
+ skb->destructor = udp_rmem_free;
+ skb->dev = NULL;
+ sock_skb_set_dropcount(sk, skb);
+
+ __skb_queue_tail(list, skb);
+ spin_unlock(&list->lock);
+
+ if (!sock_flag(sk, SOCK_DEAD))
+ sk->sk_data_ready(sk);
+
+ return 0;
+
+uncharge_drop:
+ atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
+
+drop:
+ atomic_inc(&sk->sk_drops);
+ return err;
+}
+EXPORT_SYMBOL_GPL(__udp_enqueue_schedule_skb);
+
+static void udp_destruct_sock(struct sock *sk)
+{
+ /* reclaim completely the forward allocated memory */
+ __skb_queue_purge(&sk->sk_receive_queue);
+ udp_rmem_release(sk, 0, 0);
+ inet_sock_destruct(sk);
+}
+
+int udp_init_sock(struct sock *sk)
+{
+ sk->sk_destruct = udp_destruct_sock;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(udp_init_sock);
+
+void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len)
+{
+ if (unlikely(READ_ONCE(sk->sk_peek_off) >= 0)) {
+ bool slow = lock_sock_fast(sk);
+
+ sk_peek_offset_bwd(sk, len);
+ unlock_sock_fast(sk, slow);
+ }
+ consume_skb(skb);
+}
+EXPORT_SYMBOL_GPL(skb_consume_udp);
+
/**
* first_packet_length - return length of first packet in receive queue
* @sk: socket
@@ -1201,13 +1307,7 @@ static int first_packet_length(struct sock *sk)
res = skb ? skb->len : -1;
spin_unlock_bh(&rcvq->lock);
- if (!skb_queue_empty(&list_kill)) {
- bool slow = lock_sock_fast(sk);
-
- __skb_queue_purge(&list_kill);
- sk_mem_reclaim_partial(sk);
- unlock_sock_fast(sk, slow);
- }
+ __skb_queue_purge(&list_kill);
return res;
}
@@ -1256,7 +1356,6 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
int err;
int is_udplite = IS_UDPLITE(sk);
bool checksum_valid = false;
- bool slow;
if (flags & MSG_ERRQUEUE)
return ip_recv_error(sk, msg, len, addr_len);
@@ -1297,13 +1396,12 @@ try_again:
}
if (unlikely(err)) {
- trace_kfree_skb(skb, udp_recvmsg);
if (!peeked) {
atomic_inc(&sk->sk_drops);
UDP_INC_STATS(sock_net(sk),
UDP_MIB_INERRORS, is_udplite);
}
- skb_free_datagram_locked(sk, skb);
+ kfree_skb(skb);
return err;
}
@@ -1328,16 +1426,15 @@ try_again:
if (flags & MSG_TRUNC)
err = ulen;
- __skb_free_datagram_locked(sk, skb, peeking ? -err : err);
+ skb_consume_udp(sk, skb, peeking ? -err : err);
return err;
csum_copy_err:
- slow = lock_sock_fast(sk);
- if (!skb_kill_datagram(sk, skb, flags)) {
+ if (!__sk_queue_drop_skb(sk, skb, flags)) {
UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite);
UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
}
- unlock_sock_fast(sk, slow);
+ kfree_skb(skb);
/* starting over for a new packet, but check if we need to yield */
cond_resched();
@@ -1456,7 +1553,7 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
sk_incoming_cpu_update(sk);
}
- rc = __sock_queue_rcv_skb(sk, skb);
+ rc = __udp_enqueue_schedule_skb(sk, skb);
if (rc < 0) {
int is_udplite = IS_UDPLITE(sk);
@@ -1471,7 +1568,6 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
}
return 0;
-
}
static struct static_key udp_encap_needed __read_mostly;
@@ -1493,7 +1589,6 @@ EXPORT_SYMBOL(udp_encap_enable);
int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
{
struct udp_sock *up = udp_sk(sk);
- int rc;
int is_udplite = IS_UDPLITE(sk);
/*
@@ -1580,25 +1675,9 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
goto drop;
udp_csum_pull_header(skb);
- if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) {
- __UDP_INC_STATS(sock_net(sk), UDP_MIB_RCVBUFERRORS,
- is_udplite);
- goto drop;
- }
-
- rc = 0;
ipv4_pktinfo_prepare(sk, skb);
- bh_lock_sock(sk);
- if (!sock_owned_by_user(sk))
- rc = __udp_queue_rcv_skb(sk, skb);
- else if (sk_add_backlog(sk, skb, sk->sk_rcvbuf)) {
- bh_unlock_sock(sk);
- goto drop;
- }
- bh_unlock_sock(sk);
-
- return rc;
+ return __udp_queue_rcv_skb(sk, skb);
csum_error:
__UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite);
@@ -2208,13 +2287,13 @@ struct proto udp_prot = {
.connect = ip4_datagram_connect,
.disconnect = udp_disconnect,
.ioctl = udp_ioctl,
+ .init = udp_init_sock,
.destroy = udp_destroy_sock,
.setsockopt = udp_setsockopt,
.getsockopt = udp_getsockopt,
.sendmsg = udp_sendmsg,
.recvmsg = udp_recvmsg,
.sendpage = udp_sendpage,
- .backlog_rcv = __udp_queue_rcv_skb,
.release_cb = ip4_datagram_release_cb,
.hash = udp_lib_hash,
.unhash = udp_lib_unhash,
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 2f1f5d439788..d8983e15f859 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -112,6 +112,27 @@ static inline u32 cstamp_delta(unsigned long cstamp)
return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
}
+static inline s32 rfc3315_s14_backoff_init(s32 irt)
+{
+ /* multiply 'initial retransmission time' by 0.9 .. 1.1 */
+ u64 tmp = (900000 + prandom_u32() % 200001) * (u64)irt;
+ do_div(tmp, 1000000);
+ return (s32)tmp;
+}
+
+static inline s32 rfc3315_s14_backoff_update(s32 rt, s32 mrt)
+{
+ /* multiply 'retransmission timeout' by 1.9 .. 2.1 */
+ u64 tmp = (1900000 + prandom_u32() % 200001) * (u64)rt;
+ do_div(tmp, 1000000);
+ if ((s32)tmp > mrt) {
+ /* multiply 'maximum retransmission time' by 0.9 .. 1.1 */
+ tmp = (900000 + prandom_u32() % 200001) * (u64)mrt;
+ do_div(tmp, 1000000);
+ }
+ return (s32)tmp;
+}
+
#ifdef CONFIG_SYSCTL
static int addrconf_sysctl_register(struct inet6_dev *idev);
static void addrconf_sysctl_unregister(struct inet6_dev *idev);
@@ -187,6 +208,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
.dad_transmits = 1,
.rtr_solicits = MAX_RTR_SOLICITATIONS,
.rtr_solicit_interval = RTR_SOLICITATION_INTERVAL,
+ .rtr_solicit_max_interval = RTR_SOLICITATION_MAX_INTERVAL,
.rtr_solicit_delay = MAX_RTR_SOLICITATION_DELAY,
.use_tempaddr = 0,
.temp_valid_lft = TEMP_VALID_LIFETIME,
@@ -232,6 +254,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
.dad_transmits = 1,
.rtr_solicits = MAX_RTR_SOLICITATIONS,
.rtr_solicit_interval = RTR_SOLICITATION_INTERVAL,
+ .rtr_solicit_max_interval = RTR_SOLICITATION_MAX_INTERVAL,
.rtr_solicit_delay = MAX_RTR_SOLICITATION_DELAY,
.use_tempaddr = 0,
.temp_valid_lft = TEMP_VALID_LIFETIME,
@@ -3687,7 +3710,7 @@ static void addrconf_rs_timer(unsigned long data)
if (idev->if_flags & IF_RA_RCVD)
goto out;
- if (idev->rs_probes++ < idev->cnf.rtr_solicits) {
+ if (idev->rs_probes++ < idev->cnf.rtr_solicits || idev->cnf.rtr_solicits < 0) {
write_unlock(&idev->lock);
if (!ipv6_get_lladdr(dev, &lladdr, IFA_F_TENTATIVE))
ndisc_send_rs(dev, &lladdr,
@@ -3696,11 +3719,13 @@ static void addrconf_rs_timer(unsigned long data)
goto put;
write_lock(&idev->lock);
+ idev->rs_interval = rfc3315_s14_backoff_update(
+ idev->rs_interval, idev->cnf.rtr_solicit_max_interval);
/* The wait after the last probe can be shorter */
addrconf_mod_rs_timer(idev, (idev->rs_probes ==
idev->cnf.rtr_solicits) ?
idev->cnf.rtr_solicit_delay :
- idev->cnf.rtr_solicit_interval);
+ idev->rs_interval);
} else {
/*
* Note: we do not support deprecated "all on-link"
@@ -3949,7 +3974,7 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp)
send_mld = ifp->scope == IFA_LINK && ipv6_lonely_lladdr(ifp);
send_rs = send_mld &&
ipv6_accept_ra(ifp->idev) &&
- ifp->idev->cnf.rtr_solicits > 0 &&
+ ifp->idev->cnf.rtr_solicits != 0 &&
(dev->flags&IFF_LOOPBACK) == 0;
read_unlock_bh(&ifp->idev->lock);
@@ -3971,10 +3996,11 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp)
write_lock_bh(&ifp->idev->lock);
spin_lock(&ifp->lock);
+ ifp->idev->rs_interval = rfc3315_s14_backoff_init(
+ ifp->idev->cnf.rtr_solicit_interval);
ifp->idev->rs_probes = 1;
ifp->idev->if_flags |= IF_RS_SENT;
- addrconf_mod_rs_timer(ifp->idev,
- ifp->idev->cnf.rtr_solicit_interval);
+ addrconf_mod_rs_timer(ifp->idev, ifp->idev->rs_interval);
spin_unlock(&ifp->lock);
write_unlock_bh(&ifp->idev->lock);
}
@@ -4891,6 +4917,8 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
array[DEVCONF_RTR_SOLICITS] = cnf->rtr_solicits;
array[DEVCONF_RTR_SOLICIT_INTERVAL] =
jiffies_to_msecs(cnf->rtr_solicit_interval);
+ array[DEVCONF_RTR_SOLICIT_MAX_INTERVAL] =
+ jiffies_to_msecs(cnf->rtr_solicit_max_interval);
array[DEVCONF_RTR_SOLICIT_DELAY] =
jiffies_to_msecs(cnf->rtr_solicit_delay);
array[DEVCONF_FORCE_MLD_VERSION] = cnf->force_mld_version;
@@ -4961,18 +4989,18 @@ static inline size_t inet6_if_nlmsg_size(void)
}
static inline void __snmp6_fill_statsdev(u64 *stats, atomic_long_t *mib,
- int items, int bytes)
+ int bytes)
{
int i;
- int pad = bytes - sizeof(u64) * items;
+ int pad = bytes - sizeof(u64) * ICMP6_MIB_MAX;
BUG_ON(pad < 0);
/* Use put_unaligned() because stats may not be aligned for u64. */
- put_unaligned(items, &stats[0]);
- for (i = 1; i < items; i++)
+ put_unaligned(ICMP6_MIB_MAX, &stats[0]);
+ for (i = 1; i < ICMP6_MIB_MAX; i++)
put_unaligned(atomic_long_read(&mib[i]), &stats[i]);
- memset(&stats[items], 0, pad);
+ memset(&stats[ICMP6_MIB_MAX], 0, pad);
}
static inline void __snmp6_fill_stats64(u64 *stats, void __percpu *mib,
@@ -5005,7 +5033,7 @@ static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype,
offsetof(struct ipstats_mib, syncp));
break;
case IFLA_INET6_ICMP6STATS:
- __snmp6_fill_statsdev(stats, idev->stats.icmpv6dev->mibs, ICMP6_MIB_MAX, bytes);
+ __snmp6_fill_statsdev(stats, idev->stats.icmpv6dev->mibs, bytes);
break;
}
}
@@ -5099,7 +5127,7 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token)
return -EINVAL;
if (!ipv6_accept_ra(idev))
return -EINVAL;
- if (idev->cnf.rtr_solicits <= 0)
+ if (idev->cnf.rtr_solicits == 0)
return -EINVAL;
write_lock_bh(&idev->lock);
@@ -5128,8 +5156,10 @@ update_lft:
if (update_rs) {
idev->if_flags |= IF_RS_SENT;
+ idev->rs_interval = rfc3315_s14_backoff_init(
+ idev->cnf.rtr_solicit_interval);
idev->rs_probes = 1;
- addrconf_mod_rs_timer(idev, idev->cnf.rtr_solicit_interval);
+ addrconf_mod_rs_timer(idev, idev->rs_interval);
}
/* Well, that's kinda nasty ... */
@@ -5467,20 +5497,6 @@ int addrconf_sysctl_forward(struct ctl_table *ctl, int write,
}
static
-int addrconf_sysctl_hop_limit(struct ctl_table *ctl, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos)
-{
- struct ctl_table lctl;
- int min_hl = 1, max_hl = 255;
-
- lctl = *ctl;
- lctl.extra1 = &min_hl;
- lctl.extra2 = &max_hl;
-
- return proc_dointvec_minmax(&lctl, write, buffer, lenp, ppos);
-}
-
-static
int addrconf_sysctl_mtu(struct ctl_table *ctl, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
@@ -5713,6 +5729,10 @@ int addrconf_sysctl_ignore_routes_with_linkdown(struct ctl_table *ctl,
return ret;
}
+static int minus_one = -1;
+static const int one = 1;
+static const int two_five_five = 255;
+
static const struct ctl_table addrconf_sysctl[] = {
{
.procname = "forwarding",
@@ -5726,7 +5746,9 @@ static const struct ctl_table addrconf_sysctl[] = {
.data = &ipv6_devconf.hop_limit,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = addrconf_sysctl_hop_limit,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = (void *)&one,
+ .extra2 = (void *)&two_five_five,
},
{
.procname = "mtu",
@@ -5768,7 +5790,8 @@ static const struct ctl_table addrconf_sysctl[] = {
.data = &ipv6_devconf.rtr_solicits,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &minus_one,
},
{
.procname = "router_solicitation_interval",
@@ -5778,6 +5801,13 @@ static const struct ctl_table addrconf_sysctl[] = {
.proc_handler = proc_dointvec_jiffies,
},
{
+ .procname = "router_solicitation_max_interval",
+ .data = &ipv6_devconf.rtr_solicit_max_interval,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ {
.procname = "router_solicitation_delay",
.data = &ipv6_devconf.rtr_solicit_delay,
.maxlen = sizeof(int),
@@ -6044,8 +6074,14 @@ static int __addrconf_sysctl_register(struct net *net, char *dev_name,
for (i = 0; table[i].data; i++) {
table[i].data += (char *)p - (char *)&ipv6_devconf;
- table[i].extra1 = idev; /* embedded; no ref */
- table[i].extra2 = net;
+ /* If one of these is already set, then it is not safe to
+ * overwrite either of them: this makes proc_dointvec_minmax
+ * usable.
+ */
+ if (!table[i].extra1 && !table[i].extra2) {
+ table[i].extra1 = idev; /* embedded; no ref */
+ table[i].extra2 = net;
+ }
}
snprintf(path, sizeof(path), "net/ipv6/conf/%s", dev_name);
diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c
index e50c27a93e17..a7bc54ab46e2 100644
--- a/net/ipv6/ila/ila_lwt.c
+++ b/net/ipv6/ila/ila_lwt.c
@@ -6,29 +6,88 @@
#include <linux/socket.h>
#include <linux/types.h>
#include <net/checksum.h>
+#include <net/dst_cache.h>
#include <net/ip.h>
#include <net/ip6_fib.h>
+#include <net/ip6_route.h>
#include <net/lwtunnel.h>
#include <net/protocol.h>
#include <uapi/linux/ila.h>
#include "ila.h"
+struct ila_lwt {
+ struct ila_params p;
+ struct dst_cache dst_cache;
+ u32 connected : 1;
+};
+
+static inline struct ila_lwt *ila_lwt_lwtunnel(
+ struct lwtunnel_state *lwt)
+{
+ return (struct ila_lwt *)lwt->data;
+}
+
static inline struct ila_params *ila_params_lwtunnel(
- struct lwtunnel_state *lwstate)
+ struct lwtunnel_state *lwt)
{
- return (struct ila_params *)lwstate->data;
+ return &ila_lwt_lwtunnel(lwt)->p;
}
static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
- struct dst_entry *dst = skb_dst(skb);
+ struct dst_entry *orig_dst = skb_dst(skb);
+ struct rt6_info *rt = (struct rt6_info *)orig_dst;
+ struct ila_lwt *ilwt = ila_lwt_lwtunnel(orig_dst->lwtstate);
+ struct dst_entry *dst;
+ int err = -EINVAL;
if (skb->protocol != htons(ETH_P_IPV6))
goto drop;
- ila_update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate), true);
+ ila_update_ipv6_locator(skb, ila_params_lwtunnel(orig_dst->lwtstate),
+ true);
- return dst->lwtstate->orig_output(net, sk, skb);
+ if (rt->rt6i_flags & (RTF_GATEWAY | RTF_CACHE)) {
+ /* Already have a next hop address in route, no need for
+ * dest cache route.
+ */
+ return orig_dst->lwtstate->orig_output(net, sk, skb);
+ }
+
+ dst = dst_cache_get(&ilwt->dst_cache);
+ if (unlikely(!dst)) {
+ struct ipv6hdr *ip6h = ipv6_hdr(skb);
+ struct flowi6 fl6;
+
+ /* Lookup a route for the new destination. Take into
+ * account that the base route may already have a gateway.
+ */
+
+ memset(&fl6, 0, sizeof(fl6));
+ fl6.flowi6_oif = orig_dst->dev->ifindex;
+ fl6.flowi6_iif = LOOPBACK_IFINDEX;
+ fl6.daddr = *rt6_nexthop((struct rt6_info *)orig_dst,
+ &ip6h->daddr);
+
+ dst = ip6_route_output(net, NULL, &fl6);
+ if (dst->error) {
+ err = -EHOSTUNREACH;
+ dst_release(dst);
+ goto drop;
+ }
+
+ dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
+ if (IS_ERR(dst)) {
+ err = PTR_ERR(dst);
+ goto drop;
+ }
+
+ if (ilwt->connected)
+ dst_cache_set_ip6(&ilwt->dst_cache, dst, &fl6.saddr);
+ }
+
+ skb_dst_set(skb, dst);
+ return dst_output(net, sk, skb);
drop:
kfree_skb(skb);
@@ -60,9 +119,9 @@ static int ila_build_state(struct net_device *dev, struct nlattr *nla,
unsigned int family, const void *cfg,
struct lwtunnel_state **ts)
{
+ struct ila_lwt *ilwt;
struct ila_params *p;
struct nlattr *tb[ILA_ATTR_MAX + 1];
- size_t encap_len = sizeof(*p);
struct lwtunnel_state *newts;
const struct fib6_config *cfg6 = cfg;
struct ila_addr *iaddr;
@@ -71,7 +130,7 @@ static int ila_build_state(struct net_device *dev, struct nlattr *nla,
if (family != AF_INET6)
return -EINVAL;
- if (cfg6->fc_dst_len < sizeof(struct ila_locator) + 1) {
+ if (cfg6->fc_dst_len < 8 * sizeof(struct ila_locator) + 3) {
/* Need to have full locator and at least type field
* included in destination
*/
@@ -95,11 +154,17 @@ static int ila_build_state(struct net_device *dev, struct nlattr *nla,
if (!tb[ILA_ATTR_LOCATOR])
return -EINVAL;
- newts = lwtunnel_state_alloc(encap_len);
+ newts = lwtunnel_state_alloc(sizeof(*ilwt));
if (!newts)
return -ENOMEM;
- newts->len = encap_len;
+ ilwt = ila_lwt_lwtunnel(newts);
+ ret = dst_cache_init(&ilwt->dst_cache, GFP_ATOMIC);
+ if (ret) {
+ kfree(newts);
+ return ret;
+ }
+
p = ila_params_lwtunnel(newts);
p->locator.v64 = (__force __be64)nla_get_u64(tb[ILA_ATTR_LOCATOR]);
@@ -120,11 +185,19 @@ static int ila_build_state(struct net_device *dev, struct nlattr *nla,
newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT |
LWTUNNEL_STATE_INPUT_REDIRECT;
+ if (cfg6->fc_dst_len == 8 * sizeof(struct in6_addr))
+ ilwt->connected = 1;
+
*ts = newts;
return 0;
}
+static void ila_destroy_state(struct lwtunnel_state *lwt)
+{
+ dst_cache_destroy(&ila_lwt_lwtunnel(lwt)->dst_cache);
+}
+
static int ila_fill_encap_info(struct sk_buff *skb,
struct lwtunnel_state *lwtstate)
{
@@ -159,6 +232,7 @@ static int ila_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
static const struct lwtunnel_encap_ops ila_encap_ops = {
.build_state = ila_build_state,
+ .destroy_state = ila_destroy_state,
.output = ila_output,
.input = ila_input,
.fill_encap = ila_fill_encap_info,
diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c
index e604013dd814..628ae6d85b59 100644
--- a/net/ipv6/ila/ila_xlat.c
+++ b/net/ipv6/ila/ila_xlat.c
@@ -118,15 +118,7 @@ static const struct rhashtable_params rht_params = {
.obj_cmpfn = ila_cmpfn,
};
-static struct genl_family ila_nl_family = {
- .id = GENL_ID_GENERATE,
- .hdrsize = 0,
- .name = ILA_GENL_NAME,
- .version = ILA_GENL_VERSION,
- .maxattr = ILA_ATTR_MAX,
- .netnsok = true,
- .parallel_ops = true,
-};
+static struct genl_family ila_nl_family;
static const struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = {
[ILA_ATTR_LOCATOR] = { .type = NLA_U64, },
@@ -561,6 +553,18 @@ static const struct genl_ops ila_nl_ops[] = {
},
};
+static struct genl_family ila_nl_family __ro_after_init = {
+ .hdrsize = 0,
+ .name = ILA_GENL_NAME,
+ .version = ILA_GENL_VERSION,
+ .maxattr = ILA_ATTR_MAX,
+ .netnsok = true,
+ .parallel_ops = true,
+ .module = THIS_MODULE,
+ .ops = ila_nl_ops,
+ .n_ops = ARRAY_SIZE(ila_nl_ops),
+};
+
#define ILA_HASH_TABLE_SIZE 1024
static __net_init int ila_init_net(struct net *net)
@@ -623,7 +627,7 @@ static int ila_xlat_addr(struct sk_buff *skb, bool set_csum_neutral)
return 0;
}
-int ila_xlat_init(void)
+int __init ila_xlat_init(void)
{
int ret;
@@ -631,8 +635,7 @@ int ila_xlat_init(void)
if (ret)
goto exit;
- ret = genl_register_family_with_ops(&ila_nl_family,
- ila_nl_ops);
+ ret = genl_register_family(&ila_nl_family);
if (ret < 0)
goto unregister;
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 4ce74f86291b..d7d6d3ae0b3b 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -648,7 +648,6 @@ static int ip6gre_xmit_other(struct sk_buff *skb, struct net_device *dev)
encap_limit = t->parms.encap_limit;
memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
- fl6.flowi6_proto = skb->protocol;
err = gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM));
if (err)
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 6a66adba0c22..3a70567846aa 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1634,7 +1634,7 @@ int ip6_tnl_change_mtu(struct net_device *dev, int new_mtu)
struct ip6_tnl *tnl = netdev_priv(dev);
if (tnl->parms.proto == IPPROTO_IPIP) {
- if (new_mtu < 68)
+ if (new_mtu < ETH_MIN_MTU)
return -EINVAL;
} else {
if (new_mtu < IPV6_MIN_MTU)
@@ -1787,6 +1787,8 @@ ip6_tnl_dev_init_gen(struct net_device *dev)
dev->mtu = ETH_DATA_LEN - t_hlen;
if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
dev->mtu -= 8;
+ dev->min_mtu = ETH_MIN_MTU;
+ dev->max_mtu = 0xFFF8 - dev->hard_header_len;
return 0;
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 8a02ca8a11af..35c5b2d8c401 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -812,30 +812,11 @@ vti6_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
return err;
}
-/**
- * vti6_tnl_change_mtu - change mtu manually for tunnel device
- * @dev: virtual device associated with tunnel
- * @new_mtu: the new mtu
- *
- * Return:
- * 0 on success,
- * %-EINVAL if mtu too small
- **/
-static int vti6_change_mtu(struct net_device *dev, int new_mtu)
-{
- if (new_mtu < IPV6_MIN_MTU)
- return -EINVAL;
-
- dev->mtu = new_mtu;
- return 0;
-}
-
static const struct net_device_ops vti6_netdev_ops = {
.ndo_init = vti6_dev_init,
.ndo_uninit = vti6_dev_uninit,
.ndo_start_xmit = vti6_tnl_xmit,
.ndo_do_ioctl = vti6_ioctl,
- .ndo_change_mtu = vti6_change_mtu,
.ndo_get_stats64 = ip_tunnel_get_stats64,
.ndo_get_iflink = ip6_tnl_get_iflink,
};
@@ -855,6 +836,8 @@ static void vti6_dev_setup(struct net_device *dev)
dev->type = ARPHRD_TUNNEL6;
dev->hard_header_len = LL_MAX_HEADER + sizeof(struct ipv6hdr);
dev->mtu = ETH_DATA_LEN;
+ dev->min_mtu = IPV6_MIN_MTU;
+ dev->max_mtu = IP_MAX_MTU;
dev->flags |= IFF_NOARP;
dev->addr_len = sizeof(struct in6_addr);
netif_keep_dst(dev);
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index fccb5dd91902..7f4265b1649b 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -2285,8 +2285,8 @@ static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
return 1;
}
-int ip6mr_get_route(struct net *net,
- struct sk_buff *skb, struct rtmsg *rtm, int nowait)
+int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
+ int nowait, u32 portid)
{
int err;
struct mr6_table *mrt;
@@ -2331,6 +2331,7 @@ int ip6mr_get_route(struct net *net,
return -ENOMEM;
}
+ NETLINK_CB(skb2).portid = portid;
skb_reset_transport_header(skb2);
skb_put(skb2, sizeof(struct ipv6hdr));
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index 679253d0af84..cc8e3ae9ca73 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -30,6 +30,11 @@
#include <net/transp_v6.h>
#include <net/ipv6.h>
+#define MAX4(a, b, c, d) \
+ max_t(u32, max_t(u32, a, b), max_t(u32, c, d))
+#define SNMP_MIB_MAX MAX4(UDP_MIB_MAX, TCP_MIB_MAX, \
+ IPSTATS_MIB_MAX, ICMP_MIB_MAX)
+
static int sockstat6_seq_show(struct seq_file *seq, void *v)
{
struct net *net = seq->private;
@@ -191,25 +196,34 @@ static void snmp6_seq_show_item(struct seq_file *seq, void __percpu *pcpumib,
atomic_long_t *smib,
const struct snmp_mib *itemlist)
{
+ unsigned long buff[SNMP_MIB_MAX];
int i;
- unsigned long val;
- for (i = 0; itemlist[i].name; i++) {
- val = pcpumib ?
- snmp_fold_field(pcpumib, itemlist[i].entry) :
- atomic_long_read(smib + itemlist[i].entry);
- seq_printf(seq, "%-32s\t%lu\n", itemlist[i].name, val);
+ if (pcpumib) {
+ memset(buff, 0, sizeof(unsigned long) * SNMP_MIB_MAX);
+
+ snmp_get_cpu_field_batch(buff, itemlist, pcpumib);
+ for (i = 0; itemlist[i].name; i++)
+ seq_printf(seq, "%-32s\t%lu\n",
+ itemlist[i].name, buff[i]);
+ } else {
+ for (i = 0; itemlist[i].name; i++)
+ seq_printf(seq, "%-32s\t%lu\n", itemlist[i].name,
+ atomic_long_read(smib + itemlist[i].entry));
}
}
static void snmp6_seq_show_item64(struct seq_file *seq, void __percpu *mib,
const struct snmp_mib *itemlist, size_t syncpoff)
{
+ u64 buff64[SNMP_MIB_MAX];
int i;
+ memset(buff64, 0, sizeof(unsigned long) * SNMP_MIB_MAX);
+
+ snmp_get_cpu_field64_batch(buff64, itemlist, mib, syncpoff);
for (i = 0; itemlist[i].name; i++)
- seq_printf(seq, "%-32s\t%llu\n", itemlist[i].name,
- snmp_fold_field64(mib, itemlist[i].entry, syncpoff));
+ seq_printf(seq, "%-32s\t%llu\n", itemlist[i].name, buff64[i]);
}
static int snmp6_seq_show(struct seq_file *seq, void *v)
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 54404f08efcc..d7e8b955ade8 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -65,11 +65,12 @@
#define ICMPV6_HDRLEN 4 /* ICMPv6 header, RFC 4443 Section 2.1 */
-static struct raw_hashinfo raw_v6_hashinfo = {
+struct raw_hashinfo raw_v6_hashinfo = {
.lock = __RW_LOCK_UNLOCKED(raw_v6_hashinfo.lock),
};
+EXPORT_SYMBOL_GPL(raw_v6_hashinfo);
-static struct sock *__raw_v6_lookup(struct net *net, struct sock *sk,
+struct sock *__raw_v6_lookup(struct net *net, struct sock *sk,
unsigned short num, const struct in6_addr *loc_addr,
const struct in6_addr *rmt_addr, int dif)
{
@@ -102,6 +103,7 @@ static struct sock *__raw_v6_lookup(struct net *net, struct sock *sk,
found:
return sk;
}
+EXPORT_SYMBOL_GPL(__raw_v6_lookup);
/*
* 0 - deliver
@@ -1259,6 +1261,7 @@ struct proto rawv6_prot = {
.compat_getsockopt = compat_rawv6_getsockopt,
.compat_ioctl = compat_rawv6_ioctl,
#endif
+ .diag_destroy = raw_abort,
};
#ifdef CONFIG_PROC_FS
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 5a5aeb92b4ec..bdbc38e8bf29 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -3216,7 +3216,9 @@ static int rt6_fill_node(struct net *net,
if (iif) {
#ifdef CONFIG_IPV6_MROUTE
if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
- int err = ip6mr_get_route(net, skb, rtm, nowait);
+ int err = ip6mr_get_route(net, skb, rtm, nowait,
+ portid);
+
if (err <= 0) {
if (!nowait) {
if (err == 0)
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index b1cdf8009d29..dc7a3449ffc1 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -1318,23 +1318,11 @@ done:
return err;
}
-static int ipip6_tunnel_change_mtu(struct net_device *dev, int new_mtu)
-{
- struct ip_tunnel *tunnel = netdev_priv(dev);
- int t_hlen = tunnel->hlen + sizeof(struct iphdr);
-
- if (new_mtu < IPV6_MIN_MTU || new_mtu > 0xFFF8 - t_hlen)
- return -EINVAL;
- dev->mtu = new_mtu;
- return 0;
-}
-
static const struct net_device_ops ipip6_netdev_ops = {
.ndo_init = ipip6_tunnel_init,
.ndo_uninit = ipip6_tunnel_uninit,
.ndo_start_xmit = sit_tunnel_xmit,
.ndo_do_ioctl = ipip6_tunnel_ioctl,
- .ndo_change_mtu = ipip6_tunnel_change_mtu,
.ndo_get_stats64 = ip_tunnel_get_stats64,
.ndo_get_iflink = ip_tunnel_get_iflink,
};
@@ -1365,6 +1353,8 @@ static void ipip6_tunnel_setup(struct net_device *dev)
dev->type = ARPHRD_SIT;
dev->hard_header_len = LL_MAX_HEADER + t_hlen;
dev->mtu = ETH_DATA_LEN - t_hlen;
+ dev->min_mtu = IPV6_MIN_MTU;
+ dev->max_mtu = 0xFFF8 - t_hlen;
dev->flags = IFF_NOARP;
netif_keep_dst(dev);
dev->addr_len = 4;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 54cf7197c7ab..5a27ab4eab39 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1190,6 +1190,16 @@ out:
return NULL;
}
+static void tcp_v6_restore_cb(struct sk_buff *skb)
+{
+ /* We need to move header back to the beginning if xfrm6_policy_check()
+ * and tcp_v6_fill_cb() are going to be called again.
+ * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
+ */
+ memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
+ sizeof(struct inet6_skb_parm));
+}
+
/* The socket must have it's spinlock held when we get
* here, unless it is a TCP_LISTEN socket.
*
@@ -1319,6 +1329,7 @@ ipv6_pktoptions:
np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
skb_set_owner_r(opt_skb, sk);
+ tcp_v6_restore_cb(opt_skb);
opt_skb = xchg(&np->pktoptions, opt_skb);
} else {
__kfree_skb(opt_skb);
@@ -1352,15 +1363,6 @@ static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
TCP_SKB_CB(skb)->sacked = 0;
}
-static void tcp_v6_restore_cb(struct sk_buff *skb)
-{
- /* We need to move header back to the beginning if xfrm6_policy_check()
- * and tcp_v6_fill_cb() are going to be called again.
- */
- memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
- sizeof(struct inet6_skb_parm));
-}
-
static int tcp_v6_rcv(struct sk_buff *skb)
{
const struct tcphdr *th;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 9aa7c1c7a9ce..71963b23d5a5 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -334,7 +334,6 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
int is_udplite = IS_UDPLITE(sk);
bool checksum_valid = false;
int is_udp4;
- bool slow;
if (flags & MSG_ERRQUEUE)
return ipv6_recv_error(sk, msg, len, addr_len);
@@ -378,7 +377,6 @@ try_again:
goto csum_copy_err;
}
if (unlikely(err)) {
- trace_kfree_skb(skb, udpv6_recvmsg);
if (!peeked) {
atomic_inc(&sk->sk_drops);
if (is_udp4)
@@ -388,7 +386,7 @@ try_again:
UDP6_INC_STATS(sock_net(sk), UDP_MIB_INERRORS,
is_udplite);
}
- skb_free_datagram_locked(sk, skb);
+ kfree_skb(skb);
return err;
}
if (!peeked) {
@@ -437,12 +435,11 @@ try_again:
if (flags & MSG_TRUNC)
err = ulen;
- __skb_free_datagram_locked(sk, skb, peeking ? -err : err);
+ skb_consume_udp(sk, skb, peeking ? -err : err);
return err;
csum_copy_err:
- slow = lock_sock_fast(sk);
- if (!skb_kill_datagram(sk, skb, flags)) {
+ if (!__sk_queue_drop_skb(sk, skb, flags)) {
if (is_udp4) {
UDP_INC_STATS(sock_net(sk),
UDP_MIB_CSUMERRORS, is_udplite);
@@ -455,7 +452,7 @@ csum_copy_err:
UDP_MIB_INERRORS, is_udplite);
}
}
- unlock_sock_fast(sk, slow);
+ kfree_skb(skb);
/* starting over for a new packet, but check if we need to yield */
cond_resched();
@@ -523,7 +520,7 @@ static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
sk_incoming_cpu_update(sk);
}
- rc = __sock_queue_rcv_skb(sk, skb);
+ rc = __udp_enqueue_schedule_skb(sk, skb);
if (rc < 0) {
int is_udplite = IS_UDPLITE(sk);
@@ -535,6 +532,7 @@ static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
kfree_skb(skb);
return -1;
}
+
return 0;
}
@@ -556,7 +554,6 @@ EXPORT_SYMBOL(udpv6_encap_enable);
int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
{
struct udp_sock *up = udp_sk(sk);
- int rc;
int is_udplite = IS_UDPLITE(sk);
if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
@@ -622,25 +619,10 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
goto drop;
udp_csum_pull_header(skb);
- if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) {
- __UDP6_INC_STATS(sock_net(sk),
- UDP_MIB_RCVBUFERRORS, is_udplite);
- goto drop;
- }
skb_dst_drop(skb);
- bh_lock_sock(sk);
- rc = 0;
- if (!sock_owned_by_user(sk))
- rc = __udpv6_queue_rcv_skb(sk, skb);
- else if (sk_add_backlog(sk, skb, sk->sk_rcvbuf)) {
- bh_unlock_sock(sk);
- goto drop;
- }
- bh_unlock_sock(sk);
-
- return rc;
+ return __udpv6_queue_rcv_skb(sk, skb);
csum_error:
__UDP6_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite);
@@ -1433,12 +1415,12 @@ struct proto udpv6_prot = {
.connect = ip6_datagram_connect,
.disconnect = udp_disconnect,
.ioctl = udp_ioctl,
+ .init = udp_init_sock,
.destroy = udpv6_destroy_sock,
.setsockopt = udpv6_setsockopt,
.getsockopt = udpv6_getsockopt,
.sendmsg = udpv6_sendmsg,
.recvmsg = udpv6_recvmsg,
- .backlog_rcv = __udpv6_queue_rcv_skb,
.release_cb = ip6_datagram_release_cb,
.hash = udp_lib_hash,
.unhash = udp_lib_unhash,
diff --git a/net/irda/irlan/irlan_eth.c b/net/irda/irlan/irlan_eth.c
index d8b7267280c3..74d09f91709e 100644
--- a/net/irda/irlan/irlan_eth.c
+++ b/net/irda/irlan/irlan_eth.c
@@ -51,7 +51,6 @@ static const struct net_device_ops irlan_eth_netdev_ops = {
.ndo_stop = irlan_eth_close,
.ndo_start_xmit = irlan_eth_xmit,
.ndo_set_rx_mode = irlan_eth_set_multicast_list,
- .ndo_change_mtu = eth_change_mtu,
.ndo_validate_addr = eth_validate_addr,
};
@@ -67,7 +66,8 @@ static void irlan_eth_setup(struct net_device *dev)
dev->netdev_ops = &irlan_eth_netdev_ops;
dev->destructor = free_netdev;
-
+ dev->min_mtu = 0;
+ dev->max_mtu = ETH_MAX_MTU;
/*
* Lets do all queueing in IrTTP instead of this device driver.
diff --git a/net/irda/irnetlink.c b/net/irda/irnetlink.c
index e15c40e86660..7fc340e574cf 100644
--- a/net/irda/irnetlink.c
+++ b/net/irda/irnetlink.c
@@ -24,13 +24,7 @@
-static struct genl_family irda_nl_family = {
- .id = GENL_ID_GENERATE,
- .name = IRDA_NL_NAME,
- .hdrsize = 0,
- .version = IRDA_NL_VERSION,
- .maxattr = IRDA_NL_CMD_MAX,
-};
+static struct genl_family irda_nl_family;
static struct net_device * ifname_to_netdev(struct net *net, struct genl_info *info)
{
@@ -147,9 +141,19 @@ static const struct genl_ops irda_nl_ops[] = {
};
-int irda_nl_register(void)
+static struct genl_family irda_nl_family __ro_after_init = {
+ .name = IRDA_NL_NAME,
+ .hdrsize = 0,
+ .version = IRDA_NL_VERSION,
+ .maxattr = IRDA_NL_CMD_MAX,
+ .module = THIS_MODULE,
+ .ops = irda_nl_ops,
+ .n_ops = ARRAY_SIZE(irda_nl_ops),
+};
+
+int __init irda_nl_register(void)
{
- return genl_register_family_with_ops(&irda_nl_family, irda_nl_ops);
+ return genl_register_family(&irda_nl_family);
}
void irda_nl_unregister(void)
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 02b45a8e8b35..cfb9e5f4e28f 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -453,19 +453,27 @@ static void iucv_sever_path(struct sock *sk, int with_user_data)
}
}
-/* Send FIN through an IUCV socket for HIPER transport */
+/* Send controlling flags through an IUCV socket for HIPER transport */
static int iucv_send_ctrl(struct sock *sk, u8 flags)
{
int err = 0;
int blen;
struct sk_buff *skb;
+ u8 shutdown = 0;
blen = sizeof(struct af_iucv_trans_hdr) + ETH_HLEN;
+ if (sk->sk_shutdown & SEND_SHUTDOWN) {
+ /* controlling flags should be sent anyway */
+ shutdown = sk->sk_shutdown;
+ sk->sk_shutdown &= RCV_SHUTDOWN;
+ }
skb = sock_alloc_send_skb(sk, blen, 1, &err);
if (skb) {
skb_reserve(skb, blen);
err = afiucv_hs_send(NULL, sk, skb, flags);
}
+ if (shutdown)
+ sk->sk_shutdown = shutdown;
return err;
}
@@ -1315,8 +1323,13 @@ static void iucv_process_message(struct sock *sk, struct sk_buff *skb,
}
IUCV_SKB_CB(skb)->offset = 0;
- if (sock_queue_rcv_skb(sk, skb))
- skb_queue_head(&iucv_sk(sk)->backlog_skb_q, skb);
+ if (sk_filter(sk, skb)) {
+ atomic_inc(&sk->sk_drops); /* skb rejected by filter */
+ kfree_skb(skb);
+ return;
+ }
+ if (__sock_queue_rcv_skb(sk, skb)) /* handle rcv queue full */
+ skb_queue_tail(&iucv_sk(sk)->backlog_skb_q, skb);
}
/* iucv_process_message_q() - Process outstanding IUCV messages
@@ -1430,13 +1443,13 @@ static int iucv_sock_recvmsg(struct socket *sock, struct msghdr *msg,
rskb = skb_dequeue(&iucv->backlog_skb_q);
while (rskb) {
IUCV_SKB_CB(rskb)->offset = 0;
- if (sock_queue_rcv_skb(sk, rskb)) {
+ if (__sock_queue_rcv_skb(sk, rskb)) {
+ /* handle rcv queue full */
skb_queue_head(&iucv->backlog_skb_q,
rskb);
break;
- } else {
- rskb = skb_dequeue(&iucv->backlog_skb_q);
}
+ rskb = skb_dequeue(&iucv->backlog_skb_q);
}
if (skb_queue_empty(&iucv->backlog_skb_q)) {
if (!list_empty(&iucv->message_q.list))
@@ -2116,12 +2129,17 @@ static int afiucv_hs_callback_rx(struct sock *sk, struct sk_buff *skb)
skb_reset_transport_header(skb);
skb_reset_network_header(skb);
IUCV_SKB_CB(skb)->offset = 0;
+ if (sk_filter(sk, skb)) {
+ atomic_inc(&sk->sk_drops); /* skb rejected by filter */
+ kfree_skb(skb);
+ return NET_RX_SUCCESS;
+ }
+
spin_lock(&iucv->message_q.lock);
if (skb_queue_empty(&iucv->backlog_skb_q)) {
- if (sock_queue_rcv_skb(sk, skb)) {
+ if (__sock_queue_rcv_skb(sk, skb))
/* handle rcv queue full */
skb_queue_tail(&iucv->backlog_skb_q, skb);
- }
} else
skb_queue_tail(&iucv_sk(sk)->backlog_skb_q, skb);
spin_unlock(&iucv->message_q.lock);
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index b7f869a85ab7..7e08a4d3d77d 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -1160,19 +1160,6 @@ out:
return copied ? : err;
}
-static ssize_t kcm_sock_splice(struct sock *sk,
- struct pipe_inode_info *pipe,
- struct splice_pipe_desc *spd)
-{
- int ret;
-
- release_sock(sk);
- ret = splice_to_pipe(pipe, spd);
- lock_sock(sk);
-
- return ret;
-}
-
static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos,
struct pipe_inode_info *pipe, size_t len,
unsigned int flags)
@@ -1202,8 +1189,7 @@ static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos,
if (len > rxm->full_len)
len = rxm->full_len;
- copied = skb_splice_bits(skb, sk, rxm->offset, pipe, len, flags,
- kcm_sock_splice);
+ copied = skb_splice_bits(skb, sk, rxm->offset, pipe, len, flags);
if (copied < 0) {
err = copied;
goto err_out;
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index 965f7e344cef..e2c6ae024565 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -259,6 +259,8 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 p
session->mtu = dev->mtu - session->hdr_len;
dev->mtu = session->mtu;
dev->needed_headroom += session->hdr_len;
+ dev->min_mtu = 0;
+ dev->max_mtu = ETH_MAX_MTU;
priv = netdev_priv(dev);
priv->dev = dev;
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index bf3117771822..59aa2d204e4a 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -31,14 +31,7 @@
#include "l2tp_core.h"
-static struct genl_family l2tp_nl_family = {
- .id = GENL_ID_GENERATE,
- .name = L2TP_GENL_NAME,
- .version = L2TP_GENL_VERSION,
- .hdrsize = 0,
- .maxattr = L2TP_ATTR_MAX,
- .netnsok = true,
-};
+static struct genl_family l2tp_nl_family;
static const struct genl_multicast_group l2tp_multicast_group[] = {
{
@@ -977,6 +970,19 @@ static const struct genl_ops l2tp_nl_ops[] = {
},
};
+static struct genl_family l2tp_nl_family __ro_after_init = {
+ .name = L2TP_GENL_NAME,
+ .version = L2TP_GENL_VERSION,
+ .hdrsize = 0,
+ .maxattr = L2TP_ATTR_MAX,
+ .netnsok = true,
+ .module = THIS_MODULE,
+ .ops = l2tp_nl_ops,
+ .n_ops = ARRAY_SIZE(l2tp_nl_ops),
+ .mcgrps = l2tp_multicast_group,
+ .n_mcgrps = ARRAY_SIZE(l2tp_multicast_group),
+};
+
int l2tp_nl_register_ops(enum l2tp_pwtype pw_type, const struct l2tp_nl_cmd_ops *ops)
{
int ret;
@@ -1010,12 +1016,10 @@ void l2tp_nl_unregister_ops(enum l2tp_pwtype pw_type)
}
EXPORT_SYMBOL_GPL(l2tp_nl_unregister_ops);
-static int l2tp_nl_init(void)
+static int __init l2tp_nl_init(void)
{
pr_info("L2TP netlink interface\n");
- return genl_register_family_with_ops_groups(&l2tp_nl_family,
- l2tp_nl_ops,
- l2tp_multicast_group);
+ return genl_register_family(&l2tp_nl_family);
}
static void l2tp_nl_cleanup(void)
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index e29ff5749944..fd6541f3ade3 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -3,6 +3,7 @@
*
* Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2015 Intel Mobile Communications GmbH
+ * Copyright (C) 2015-2016 Intel Deutschland GmbH
*
* This file is GPLv2 as found in COPYING.
*/
@@ -152,6 +153,149 @@ static void ieee80211_stop_p2p_device(struct wiphy *wiphy,
ieee80211_sdata_stop(IEEE80211_WDEV_TO_SUB_IF(wdev));
}
+static int ieee80211_start_nan(struct wiphy *wiphy,
+ struct wireless_dev *wdev,
+ struct cfg80211_nan_conf *conf)
+{
+ struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev);
+ int ret;
+
+ mutex_lock(&sdata->local->chanctx_mtx);
+ ret = ieee80211_check_combinations(sdata, NULL, 0, 0);
+ mutex_unlock(&sdata->local->chanctx_mtx);
+ if (ret < 0)
+ return ret;
+
+ ret = ieee80211_do_open(wdev, true);
+ if (ret)
+ return ret;
+
+ ret = drv_start_nan(sdata->local, sdata, conf);
+ if (ret)
+ ieee80211_sdata_stop(sdata);
+
+ sdata->u.nan.conf = *conf;
+
+ return ret;
+}
+
+static void ieee80211_stop_nan(struct wiphy *wiphy,
+ struct wireless_dev *wdev)
+{
+ struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev);
+
+ drv_stop_nan(sdata->local, sdata);
+ ieee80211_sdata_stop(sdata);
+}
+
+static int ieee80211_nan_change_conf(struct wiphy *wiphy,
+ struct wireless_dev *wdev,
+ struct cfg80211_nan_conf *conf,
+ u32 changes)
+{
+ struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev);
+ struct cfg80211_nan_conf new_conf;
+ int ret = 0;
+
+ if (sdata->vif.type != NL80211_IFTYPE_NAN)
+ return -EOPNOTSUPP;
+
+ if (!ieee80211_sdata_running(sdata))
+ return -ENETDOWN;
+
+ new_conf = sdata->u.nan.conf;
+
+ if (changes & CFG80211_NAN_CONF_CHANGED_PREF)
+ new_conf.master_pref = conf->master_pref;
+
+ if (changes & CFG80211_NAN_CONF_CHANGED_DUAL)
+ new_conf.dual = conf->dual;
+
+ ret = drv_nan_change_conf(sdata->local, sdata, &new_conf, changes);
+ if (!ret)
+ sdata->u.nan.conf = new_conf;
+
+ return ret;
+}
+
+static int ieee80211_add_nan_func(struct wiphy *wiphy,
+ struct wireless_dev *wdev,
+ struct cfg80211_nan_func *nan_func)
+{
+ struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev);
+ int ret;
+
+ if (sdata->vif.type != NL80211_IFTYPE_NAN)
+ return -EOPNOTSUPP;
+
+ if (!ieee80211_sdata_running(sdata))
+ return -ENETDOWN;
+
+ spin_lock_bh(&sdata->u.nan.func_lock);
+
+ ret = idr_alloc(&sdata->u.nan.function_inst_ids,
+ nan_func, 1, sdata->local->hw.max_nan_de_entries + 1,
+ GFP_ATOMIC);
+ spin_unlock_bh(&sdata->u.nan.func_lock);
+
+ if (ret < 0)
+ return ret;
+
+ nan_func->instance_id = ret;
+
+ WARN_ON(nan_func->instance_id == 0);
+
+ ret = drv_add_nan_func(sdata->local, sdata, nan_func);
+ if (ret) {
+ spin_lock_bh(&sdata->u.nan.func_lock);
+ idr_remove(&sdata->u.nan.function_inst_ids,
+ nan_func->instance_id);
+ spin_unlock_bh(&sdata->u.nan.func_lock);
+ }
+
+ return ret;
+}
+
+static struct cfg80211_nan_func *
+ieee80211_find_nan_func_by_cookie(struct ieee80211_sub_if_data *sdata,
+ u64 cookie)
+{
+ struct cfg80211_nan_func *func;
+ int id;
+
+ lockdep_assert_held(&sdata->u.nan.func_lock);
+
+ idr_for_each_entry(&sdata->u.nan.function_inst_ids, func, id) {
+ if (func->cookie == cookie)
+ return func;
+ }
+
+ return NULL;
+}
+
+static void ieee80211_del_nan_func(struct wiphy *wiphy,
+ struct wireless_dev *wdev, u64 cookie)
+{
+ struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev);
+ struct cfg80211_nan_func *func;
+ u8 instance_id = 0;
+
+ if (sdata->vif.type != NL80211_IFTYPE_NAN ||
+ !ieee80211_sdata_running(sdata))
+ return;
+
+ spin_lock_bh(&sdata->u.nan.func_lock);
+
+ func = ieee80211_find_nan_func_by_cookie(sdata, cookie);
+ if (func)
+ instance_id = func->instance_id;
+
+ spin_unlock_bh(&sdata->u.nan.func_lock);
+
+ if (instance_id)
+ drv_del_nan_func(sdata->local, sdata, instance_id);
+}
+
static int ieee80211_set_noack_map(struct wiphy *wiphy,
struct net_device *dev,
u16 noack_map)
@@ -257,6 +401,7 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
case NL80211_IFTYPE_WDS:
case NL80211_IFTYPE_MONITOR:
case NL80211_IFTYPE_P2P_DEVICE:
+ case NL80211_IFTYPE_NAN:
case NL80211_IFTYPE_UNSPECIFIED:
case NUM_NL80211_IFTYPES:
case NL80211_IFTYPE_P2P_CLIENT:
@@ -2036,6 +2181,7 @@ static int ieee80211_scan(struct wiphy *wiphy,
!(req->flags & NL80211_SCAN_FLAG_AP)))
return -EOPNOTSUPP;
break;
+ case NL80211_IFTYPE_NAN:
default:
return -EOPNOTSUPP;
}
@@ -3377,6 +3523,63 @@ static int ieee80211_del_tx_ts(struct wiphy *wiphy, struct net_device *dev,
return -ENOENT;
}
+void ieee80211_nan_func_terminated(struct ieee80211_vif *vif,
+ u8 inst_id,
+ enum nl80211_nan_func_term_reason reason,
+ gfp_t gfp)
+{
+ struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
+ struct cfg80211_nan_func *func;
+ u64 cookie;
+
+ if (WARN_ON(vif->type != NL80211_IFTYPE_NAN))
+ return;
+
+ spin_lock_bh(&sdata->u.nan.func_lock);
+
+ func = idr_find(&sdata->u.nan.function_inst_ids, inst_id);
+ if (WARN_ON(!func)) {
+ spin_unlock_bh(&sdata->u.nan.func_lock);
+ return;
+ }
+
+ cookie = func->cookie;
+ idr_remove(&sdata->u.nan.function_inst_ids, inst_id);
+
+ spin_unlock_bh(&sdata->u.nan.func_lock);
+
+ cfg80211_free_nan_func(func);
+
+ cfg80211_nan_func_terminated(ieee80211_vif_to_wdev(vif), inst_id,
+ reason, cookie, gfp);
+}
+EXPORT_SYMBOL(ieee80211_nan_func_terminated);
+
+void ieee80211_nan_func_match(struct ieee80211_vif *vif,
+ struct cfg80211_nan_match_params *match,
+ gfp_t gfp)
+{
+ struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
+ struct cfg80211_nan_func *func;
+
+ if (WARN_ON(vif->type != NL80211_IFTYPE_NAN))
+ return;
+
+ spin_lock_bh(&sdata->u.nan.func_lock);
+
+ func = idr_find(&sdata->u.nan.function_inst_ids, match->inst_id);
+ if (WARN_ON(!func)) {
+ spin_unlock_bh(&sdata->u.nan.func_lock);
+ return;
+ }
+ match->cookie = func->cookie;
+
+ spin_unlock_bh(&sdata->u.nan.func_lock);
+
+ cfg80211_nan_match(ieee80211_vif_to_wdev(vif), match, gfp);
+}
+EXPORT_SYMBOL(ieee80211_nan_func_match);
+
const struct cfg80211_ops mac80211_config_ops = {
.add_virtual_intf = ieee80211_add_iface,
.del_virtual_intf = ieee80211_del_iface,
@@ -3462,4 +3665,9 @@ const struct cfg80211_ops mac80211_config_ops = {
.set_ap_chanwidth = ieee80211_set_ap_chanwidth,
.add_tx_ts = ieee80211_add_tx_ts,
.del_tx_ts = ieee80211_del_tx_ts,
+ .start_nan = ieee80211_start_nan,
+ .stop_nan = ieee80211_stop_nan,
+ .nan_change_conf = ieee80211_nan_change_conf,
+ .add_nan_func = ieee80211_add_nan_func,
+ .del_nan_func = ieee80211_del_nan_func,
};
diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index 74142d07ad31..e75cbf6ecc26 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -274,6 +274,7 @@ ieee80211_get_chanctx_max_required_bw(struct ieee80211_local *local,
ieee80211_get_max_required_bw(sdata));
break;
case NL80211_IFTYPE_P2P_DEVICE:
+ case NL80211_IFTYPE_NAN:
continue;
case NL80211_IFTYPE_ADHOC:
case NL80211_IFTYPE_WDS:
@@ -646,6 +647,9 @@ static int ieee80211_assign_vif_chanctx(struct ieee80211_sub_if_data *sdata,
struct ieee80211_chanctx *curr_ctx = NULL;
int ret = 0;
+ if (WARN_ON(sdata->vif.type == NL80211_IFTYPE_NAN))
+ return -ENOTSUPP;
+
conf = rcu_dereference_protected(sdata->vif.chanctx_conf,
lockdep_is_held(&local->chanctx_mtx));
@@ -718,6 +722,7 @@ void ieee80211_recalc_smps_chanctx(struct ieee80211_local *local,
switch (sdata->vif.type) {
case NL80211_IFTYPE_P2P_DEVICE:
+ case NL80211_IFTYPE_NAN:
continue;
case NL80211_IFTYPE_STATION:
if (!sdata->u.mgd.associated)
@@ -980,6 +985,7 @@ ieee80211_vif_chanctx_reservation_complete(struct ieee80211_sub_if_data *sdata)
case NL80211_IFTYPE_P2P_CLIENT:
case NL80211_IFTYPE_P2P_GO:
case NL80211_IFTYPE_P2P_DEVICE:
+ case NL80211_IFTYPE_NAN:
case NUM_NL80211_IFTYPES:
WARN_ON(1);
break;
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index 8ca62b6bb02a..f56e2f487d09 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -89,13 +89,19 @@ static ssize_t aqm_read(struct file *file,
"R fq_flows_cnt %u\n"
"R fq_backlog %u\n"
"R fq_overlimit %u\n"
+ "R fq_overmemory %u\n"
"R fq_collisions %u\n"
+ "R fq_memory_usage %u\n"
+ "RW fq_memory_limit %u\n"
"RW fq_limit %u\n"
"RW fq_quantum %u\n",
fq->flows_cnt,
fq->backlog,
+ fq->overmemory,
fq->overlimit,
fq->collisions,
+ fq->memory_usage,
+ fq->memory_limit,
fq->limit,
fq->quantum);
@@ -128,6 +134,8 @@ static ssize_t aqm_write(struct file *file,
if (sscanf(buf, "fq_limit %u", &local->fq.limit) == 1)
return count;
+ else if (sscanf(buf, "fq_memory_limit %u", &local->fq.memory_limit) == 1)
+ return count;
else if (sscanf(buf, "fq_quantum %u", &local->fq.quantum) == 1)
return count;
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index 5d35c0f37bb7..bcec1240f41d 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -556,9 +556,15 @@ static ssize_t ieee80211_if_parse_tsf(
ret = kstrtoull(buf, 10, &tsf);
if (ret < 0)
return ret;
- if (tsf_is_delta)
- tsf = drv_get_tsf(local, sdata) + tsf_is_delta * tsf;
- if (local->ops->set_tsf) {
+ if (tsf_is_delta && local->ops->offset_tsf) {
+ drv_offset_tsf(local, sdata, tsf_is_delta * tsf);
+ wiphy_info(local->hw.wiphy,
+ "debugfs offset TSF by %018lld\n",
+ tsf_is_delta * tsf);
+ } else if (local->ops->set_tsf) {
+ if (tsf_is_delta)
+ tsf = drv_get_tsf(local, sdata) +
+ tsf_is_delta * tsf;
drv_set_tsf(local, sdata, tsf);
wiphy_info(local->hw.wiphy,
"debugfs set TSF to %#018llx\n", tsf);
diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c
index c701b6438bd9..bb886e7db47f 100644
--- a/net/mac80211/driver-ops.c
+++ b/net/mac80211/driver-ops.c
@@ -215,6 +215,21 @@ void drv_set_tsf(struct ieee80211_local *local,
trace_drv_return_void(local);
}
+void drv_offset_tsf(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ s64 offset)
+{
+ might_sleep();
+
+ if (!check_sdata_in_driver(sdata))
+ return;
+
+ trace_drv_offset_tsf(local, sdata, offset);
+ if (local->ops->offset_tsf)
+ local->ops->offset_tsf(&local->hw, &sdata->vif, offset);
+ trace_drv_return_void(local);
+}
+
void drv_reset_tsf(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata)
{
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index fe35a1c0dc86..09f77e4a8a79 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -162,6 +162,7 @@ static inline void drv_bss_info_changed(struct ieee80211_local *local,
return;
if (WARN_ON_ONCE(sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE ||
+ sdata->vif.type == NL80211_IFTYPE_NAN ||
(sdata->vif.type == NL80211_IFTYPE_MONITOR &&
!sdata->vif.mu_mimo_owner)))
return;
@@ -568,6 +569,9 @@ u64 drv_get_tsf(struct ieee80211_local *local,
void drv_set_tsf(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata,
u64 tsf);
+void drv_offset_tsf(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ s64 offset);
void drv_reset_tsf(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata);
@@ -1165,4 +1169,83 @@ static inline void drv_wake_tx_queue(struct ieee80211_local *local,
local->ops->wake_tx_queue(&local->hw, &txq->txq);
}
+static inline int drv_start_nan(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ struct cfg80211_nan_conf *conf)
+{
+ int ret;
+
+ might_sleep();
+ check_sdata_in_driver(sdata);
+
+ trace_drv_start_nan(local, sdata, conf);
+ ret = local->ops->start_nan(&local->hw, &sdata->vif, conf);
+ trace_drv_return_int(local, ret);
+ return ret;
+}
+
+static inline void drv_stop_nan(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata)
+{
+ might_sleep();
+ check_sdata_in_driver(sdata);
+
+ trace_drv_stop_nan(local, sdata);
+ local->ops->stop_nan(&local->hw, &sdata->vif);
+ trace_drv_return_void(local);
+}
+
+static inline int drv_nan_change_conf(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ struct cfg80211_nan_conf *conf,
+ u32 changes)
+{
+ int ret;
+
+ might_sleep();
+ check_sdata_in_driver(sdata);
+
+ if (!local->ops->nan_change_conf)
+ return -EOPNOTSUPP;
+
+ trace_drv_nan_change_conf(local, sdata, conf, changes);
+ ret = local->ops->nan_change_conf(&local->hw, &sdata->vif, conf,
+ changes);
+ trace_drv_return_int(local, ret);
+
+ return ret;
+}
+
+static inline int drv_add_nan_func(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ const struct cfg80211_nan_func *nan_func)
+{
+ int ret;
+
+ might_sleep();
+ check_sdata_in_driver(sdata);
+
+ if (!local->ops->add_nan_func)
+ return -EOPNOTSUPP;
+
+ trace_drv_add_nan_func(local, sdata, nan_func);
+ ret = local->ops->add_nan_func(&local->hw, &sdata->vif, nan_func);
+ trace_drv_return_int(local, ret);
+
+ return ret;
+}
+
+static inline void drv_del_nan_func(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ u8 instance_id)
+{
+ might_sleep();
+ check_sdata_in_driver(sdata);
+
+ trace_drv_del_nan_func(local, sdata, instance_id);
+ if (local->ops->del_nan_func)
+ local->ops->del_nan_func(&local->hw, &sdata->vif, instance_id);
+ trace_drv_return_void(local);
+}
+
#endif /* __MAC80211_DRIVER_OPS */
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index e496dee5af08..34c2add2c455 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -86,6 +86,8 @@ struct ieee80211_local;
#define IEEE80211_DEAUTH_FRAME_LEN (24 /* hdr */ + 2 /* reason */)
+#define IEEE80211_MAX_NAN_INSTANCE_ID 255
+
struct ieee80211_fragment_entry {
struct sk_buff_head skb_list;
unsigned long first_frag_time;
@@ -813,12 +815,14 @@ enum txq_info_flags {
* @def_flow: used as a fallback flow when a packet destined to @tin hashes to
* a fq_flow which is already owned by a different tin
* @def_cvars: codel vars for @def_flow
+ * @frags: used to keep fragments created after dequeue
*/
struct txq_info {
struct fq_tin tin;
struct fq_flow def_flow;
struct codel_vars def_cvars;
struct codel_stats cstats;
+ struct sk_buff_head frags;
unsigned long flags;
/* keep last! */
@@ -830,6 +834,20 @@ struct ieee80211_if_mntr {
u8 mu_follow_addr[ETH_ALEN] __aligned(2);
};
+/**
+ * struct ieee80211_if_nan - NAN state
+ *
+ * @conf: current NAN configuration
+ * @func_ids: a bitmap of available instance_id's
+ */
+struct ieee80211_if_nan {
+ struct cfg80211_nan_conf conf;
+
+ /* protects function_inst_ids */
+ spinlock_t func_lock;
+ struct idr function_inst_ids;
+};
+
struct ieee80211_sub_if_data {
struct list_head list;
@@ -929,6 +947,7 @@ struct ieee80211_sub_if_data {
struct ieee80211_if_mesh mesh;
struct ieee80211_if_ocb ocb;
struct ieee80211_if_mntr mntr;
+ struct ieee80211_if_nan nan;
} u;
#ifdef CONFIG_MAC80211_DEBUGFS
@@ -1481,6 +1500,13 @@ static inline struct txq_info *to_txq_info(struct ieee80211_txq *txq)
return container_of(txq, struct txq_info, txq);
}
+static inline bool txq_has_queue(struct ieee80211_txq *txq)
+{
+ struct txq_info *txqi = to_txq_info(txq);
+
+ return !(skb_queue_empty(&txqi->frags) && !txqi->tin.backlog_packets);
+}
+
static inline int ieee80211_bssid_match(const u8 *raddr, const u8 *addr)
{
return ether_addr_equal(raddr, addr) ||
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index b0abddc714ef..73e6a8fd2845 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -150,15 +150,6 @@ void ieee80211_recalc_idle(struct ieee80211_local *local)
ieee80211_hw_config(local, change);
}
-static int ieee80211_change_mtu(struct net_device *dev, int new_mtu)
-{
- if (new_mtu < 256 || new_mtu > IEEE80211_MAX_DATA_LEN)
- return -EINVAL;
-
- dev->mtu = new_mtu;
- return 0;
-}
-
static int ieee80211_verify_mac(struct ieee80211_sub_if_data *sdata, u8 *addr,
bool check_dup)
{
@@ -327,6 +318,9 @@ static int ieee80211_check_queues(struct ieee80211_sub_if_data *sdata,
int n_queues = sdata->local->hw.queues;
int i;
+ if (iftype == NL80211_IFTYPE_NAN)
+ return 0;
+
if (iftype != NL80211_IFTYPE_P2P_DEVICE) {
for (i = 0; i < IEEE80211_NUM_ACS; i++) {
if (WARN_ON_ONCE(sdata->vif.hw_queue[i] ==
@@ -545,6 +539,7 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
case NL80211_IFTYPE_ADHOC:
case NL80211_IFTYPE_P2P_DEVICE:
case NL80211_IFTYPE_OCB:
+ case NL80211_IFTYPE_NAN:
/* no special treatment */
break;
case NL80211_IFTYPE_UNSPECIFIED:
@@ -646,7 +641,8 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
local->fif_probe_req++;
}
- if (sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE)
+ if (sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE &&
+ sdata->vif.type != NL80211_IFTYPE_NAN)
changed |= ieee80211_reset_erp_info(sdata);
ieee80211_bss_info_change_notify(sdata, changed);
@@ -660,6 +656,7 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
break;
case NL80211_IFTYPE_WDS:
case NL80211_IFTYPE_P2P_DEVICE:
+ case NL80211_IFTYPE_NAN:
break;
default:
/* not reached */
@@ -792,6 +789,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
struct ps_data *ps;
struct cfg80211_chan_def chandef;
bool cancel_scan;
+ struct cfg80211_nan_func *func;
clear_bit(SDATA_STATE_RUNNING, &sdata->state);
@@ -944,6 +942,18 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
ieee80211_adjust_monitor_flags(sdata, -1);
break;
+ case NL80211_IFTYPE_NAN:
+ /* clean all the functions */
+ spin_lock_bh(&sdata->u.nan.func_lock);
+
+ idr_for_each_entry(&sdata->u.nan.function_inst_ids, func, i) {
+ idr_remove(&sdata->u.nan.function_inst_ids, i);
+ cfg80211_free_nan_func(func);
+ }
+ idr_destroy(&sdata->u.nan.function_inst_ids);
+
+ spin_unlock_bh(&sdata->u.nan.func_lock);
+ break;
case NL80211_IFTYPE_P2P_DEVICE:
/* relies on synchronize_rcu() below */
RCU_INIT_POINTER(local->p2p_sdata, NULL);
@@ -1147,7 +1157,6 @@ static const struct net_device_ops ieee80211_dataif_ops = {
.ndo_uninit = ieee80211_uninit,
.ndo_start_xmit = ieee80211_subif_start_xmit,
.ndo_set_rx_mode = ieee80211_set_multicast_list,
- .ndo_change_mtu = ieee80211_change_mtu,
.ndo_set_mac_address = ieee80211_change_mac,
.ndo_select_queue = ieee80211_netdev_select_queue,
.ndo_get_stats64 = ieee80211_get_stats64,
@@ -1181,7 +1190,6 @@ static const struct net_device_ops ieee80211_monitorif_ops = {
.ndo_uninit = ieee80211_uninit,
.ndo_start_xmit = ieee80211_monitor_start_xmit,
.ndo_set_rx_mode = ieee80211_set_multicast_list,
- .ndo_change_mtu = ieee80211_change_mtu,
.ndo_set_mac_address = ieee80211_change_mac,
.ndo_select_queue = ieee80211_monitor_select_queue,
.ndo_get_stats64 = ieee80211_get_stats64,
@@ -1455,6 +1463,11 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
case NL80211_IFTYPE_WDS:
sdata->vif.bss_conf.bssid = NULL;
break;
+ case NL80211_IFTYPE_NAN:
+ idr_init(&sdata->u.nan.function_inst_ids);
+ spin_lock_init(&sdata->u.nan.func_lock);
+ sdata->vif.bss_conf.bssid = sdata->vif.addr;
+ break;
case NL80211_IFTYPE_AP_VLAN:
case NL80211_IFTYPE_P2P_DEVICE:
sdata->vif.bss_conf.bssid = sdata->vif.addr;
@@ -1722,7 +1735,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
ASSERT_RTNL();
- if (type == NL80211_IFTYPE_P2P_DEVICE) {
+ if (type == NL80211_IFTYPE_P2P_DEVICE || type == NL80211_IFTYPE_NAN) {
struct wireless_dev *wdev;
sdata = kzalloc(sizeof(*sdata) + local->hw.vif_data_size,
@@ -1860,6 +1873,10 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
netdev_set_default_ethtool_ops(ndev, &ieee80211_ethtool_ops);
+ /* MTU range: 256 - 2304 */
+ ndev->min_mtu = 256;
+ ndev->max_mtu = IEEE80211_MAX_DATA_LEN;
+
ret = register_netdevice(ndev);
if (ret) {
ieee80211_if_free(ndev);
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index ac053a9df36d..1075ac24c8c5 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -821,6 +821,11 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
!local->ops->tdls_recv_channel_switch))
return -EOPNOTSUPP;
+ if (WARN_ON(local->hw.wiphy->interface_modes &
+ BIT(NL80211_IFTYPE_NAN) &&
+ (!local->ops->start_nan || !local->ops->stop_nan)))
+ return -EINVAL;
+
#ifdef CONFIG_PM
if (hw->wiphy->wowlan && (!local->ops->suspend || !local->ops->resume))
return -EINVAL;
@@ -1058,6 +1063,9 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
local->dynamic_ps_forced_timeout = -1;
+ if (!local->hw.max_nan_de_entries)
+ local->hw.max_nan_de_entries = IEEE80211_MAX_NAN_INSTANCE_ID;
+
result = ieee80211_wep_init(local);
if (result < 0)
wiphy_debug(local->hw.wiphy, "Failed to initialize wep: %d\n",
diff --git a/net/mac80211/mesh_sync.c b/net/mac80211/mesh_sync.c
index 64bc22ad9496..faca22cd02b5 100644
--- a/net/mac80211/mesh_sync.c
+++ b/net/mac80211/mesh_sync.c
@@ -28,7 +28,7 @@
* could be, for instance, in case a neighbor is restarted and its TSF counter
* reset.
*/
-#define TOFFSET_MAXIMUM_ADJUSTMENT 30000 /* 30 ms */
+#define TOFFSET_MAXIMUM_ADJUSTMENT 800 /* 0.8 ms */
struct sync_method {
u8 method;
@@ -70,9 +70,13 @@ void mesh_sync_adjust_tbtt(struct ieee80211_sub_if_data *sdata)
}
spin_unlock_bh(&ifmsh->sync_offset_lock);
- tsf = drv_get_tsf(local, sdata);
- if (tsf != -1ULL)
- drv_set_tsf(local, sdata, tsf + tsfdelta);
+ if (local->ops->offset_tsf) {
+ drv_offset_tsf(local, sdata, tsfdelta);
+ } else {
+ tsf = drv_get_tsf(local, sdata);
+ if (tsf != -1ULL)
+ drv_set_tsf(local, sdata, tsf + tsfdelta);
+ }
}
static void mesh_sync_offset_rx_bcn_presp(struct ieee80211_sub_if_data *sdata,
diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c
index 55a9c5b94ce1..c3f610bba3fe 100644
--- a/net/mac80211/offchannel.c
+++ b/net/mac80211/offchannel.c
@@ -128,7 +128,8 @@ void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local)
if (!ieee80211_sdata_running(sdata))
continue;
- if (sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE)
+ if (sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE ||
+ sdata->vif.type == NL80211_IFTYPE_NAN)
continue;
if (sdata->vif.type != NL80211_IFTYPE_MONITOR)
@@ -838,6 +839,7 @@ int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
case NL80211_IFTYPE_P2P_DEVICE:
need_offchan = true;
break;
+ case NL80211_IFTYPE_NAN:
default:
return -EOPNOTSUPP;
}
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index f7cf342bab52..6175db385ba7 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1323,9 +1323,7 @@ static void sta_ps_start(struct sta_info *sta)
return;
for (tid = 0; tid < ARRAY_SIZE(sta->sta.txq); tid++) {
- struct txq_info *txqi = to_txq_info(sta->sta.txq[tid]);
-
- if (txqi->tin.backlog_packets)
+ if (txq_has_queue(sta->sta.txq[tid]))
set_bit(tid, &sta->txq_buffered_tids);
else
clear_bit(tid, &sta->txq_buffered_tids);
@@ -3586,6 +3584,9 @@ static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx)
ieee80211_is_probe_req(hdr->frame_control) ||
ieee80211_is_probe_resp(hdr->frame_control) ||
ieee80211_is_beacon(hdr->frame_control);
+ case NL80211_IFTYPE_NAN:
+ /* Currently no frames on NAN interface are allowed */
+ return false;
default:
break;
}
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 011880d633b4..78e9ecbc96e6 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -1202,12 +1202,10 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta)
if (sta->sta.txq[0]) {
for (i = 0; i < ARRAY_SIZE(sta->sta.txq); i++) {
- struct txq_info *txqi = to_txq_info(sta->sta.txq[i]);
-
- if (!txqi->tin.backlog_packets)
+ if (!txq_has_queue(sta->sta.txq[i]))
continue;
- drv_wake_tx_queue(local, txqi);
+ drv_wake_tx_queue(local, to_txq_info(sta->sta.txq[i]));
}
}
@@ -1638,10 +1636,8 @@ ieee80211_sta_ps_deliver_response(struct sta_info *sta,
return;
for (tid = 0; tid < ARRAY_SIZE(sta->sta.txq); tid++) {
- struct txq_info *txqi = to_txq_info(sta->sta.txq[tid]);
-
if (!(driver_release_tids & BIT(tid)) ||
- txqi->tin.backlog_packets)
+ txq_has_queue(sta->sta.txq[tid]))
continue;
sta_info_recalc_tim(sta);
diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h
index 77e4c53baefb..92a47afaa989 100644
--- a/net/mac80211/trace.h
+++ b/net/mac80211/trace.h
@@ -984,6 +984,32 @@ TRACE_EVENT(drv_set_tsf,
)
);
+TRACE_EVENT(drv_offset_tsf,
+ TP_PROTO(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ s64 offset),
+
+ TP_ARGS(local, sdata, offset),
+
+ TP_STRUCT__entry(
+ LOCAL_ENTRY
+ VIF_ENTRY
+ __field(s64, tsf_offset)
+ ),
+
+ TP_fast_assign(
+ LOCAL_ASSIGN;
+ VIF_ASSIGN;
+ __entry->tsf_offset = offset;
+ ),
+
+ TP_printk(
+ LOCAL_PR_FMT VIF_PR_FMT " tsf offset:%lld",
+ LOCAL_PR_ARG, VIF_PR_ARG,
+ (unsigned long long)__entry->tsf_offset
+ )
+);
+
DEFINE_EVENT(local_sdata_evt, drv_reset_tsf,
TP_PROTO(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata),
@@ -1700,6 +1726,139 @@ TRACE_EVENT(drv_get_expected_throughput,
)
);
+TRACE_EVENT(drv_start_nan,
+ TP_PROTO(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ struct cfg80211_nan_conf *conf),
+
+ TP_ARGS(local, sdata, conf),
+ TP_STRUCT__entry(
+ LOCAL_ENTRY
+ VIF_ENTRY
+ __field(u8, master_pref)
+ __field(u8, dual)
+ ),
+
+ TP_fast_assign(
+ LOCAL_ASSIGN;
+ VIF_ASSIGN;
+ __entry->master_pref = conf->master_pref;
+ __entry->dual = conf->dual;
+ ),
+
+ TP_printk(
+ LOCAL_PR_FMT VIF_PR_FMT
+ ", master preference: %u, dual: %d",
+ LOCAL_PR_ARG, VIF_PR_ARG, __entry->master_pref,
+ __entry->dual
+ )
+);
+
+TRACE_EVENT(drv_stop_nan,
+ TP_PROTO(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata),
+
+ TP_ARGS(local, sdata),
+
+ TP_STRUCT__entry(
+ LOCAL_ENTRY
+ VIF_ENTRY
+ ),
+
+ TP_fast_assign(
+ LOCAL_ASSIGN;
+ VIF_ASSIGN;
+ ),
+
+ TP_printk(
+ LOCAL_PR_FMT VIF_PR_FMT,
+ LOCAL_PR_ARG, VIF_PR_ARG
+ )
+);
+
+TRACE_EVENT(drv_nan_change_conf,
+ TP_PROTO(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ struct cfg80211_nan_conf *conf,
+ u32 changes),
+
+ TP_ARGS(local, sdata, conf, changes),
+ TP_STRUCT__entry(
+ LOCAL_ENTRY
+ VIF_ENTRY
+ __field(u8, master_pref)
+ __field(u8, dual)
+ __field(u32, changes)
+ ),
+
+ TP_fast_assign(
+ LOCAL_ASSIGN;
+ VIF_ASSIGN;
+ __entry->master_pref = conf->master_pref;
+ __entry->dual = conf->dual;
+ __entry->changes = changes;
+ ),
+
+ TP_printk(
+ LOCAL_PR_FMT VIF_PR_FMT
+ ", master preference: %u, dual: %d, changes: 0x%x",
+ LOCAL_PR_ARG, VIF_PR_ARG, __entry->master_pref,
+ __entry->dual, __entry->changes
+ )
+);
+
+TRACE_EVENT(drv_add_nan_func,
+ TP_PROTO(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ const struct cfg80211_nan_func *func),
+
+ TP_ARGS(local, sdata, func),
+ TP_STRUCT__entry(
+ LOCAL_ENTRY
+ VIF_ENTRY
+ __field(u8, type)
+ __field(u8, inst_id)
+ ),
+
+ TP_fast_assign(
+ LOCAL_ASSIGN;
+ VIF_ASSIGN;
+ __entry->type = func->type;
+ __entry->inst_id = func->instance_id;
+ ),
+
+ TP_printk(
+ LOCAL_PR_FMT VIF_PR_FMT
+ ", type: %u, inst_id: %u",
+ LOCAL_PR_ARG, VIF_PR_ARG, __entry->type, __entry->inst_id
+ )
+);
+
+TRACE_EVENT(drv_del_nan_func,
+ TP_PROTO(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ u8 instance_id),
+
+ TP_ARGS(local, sdata, instance_id),
+ TP_STRUCT__entry(
+ LOCAL_ENTRY
+ VIF_ENTRY
+ __field(u8, instance_id)
+ ),
+
+ TP_fast_assign(
+ LOCAL_ASSIGN;
+ VIF_ASSIGN;
+ __entry->instance_id = instance_id;
+ ),
+
+ TP_printk(
+ LOCAL_PR_FMT VIF_PR_FMT
+ ", instance_id: %u",
+ LOCAL_PR_ARG, VIF_PR_ARG, __entry->instance_id
+ )
+);
+
/*
* Tracing for API calls that drivers call.
*/
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 1ff08be90a98..1c56abc49627 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -796,36 +796,6 @@ static __le16 ieee80211_tx_next_seq(struct sta_info *sta, int tid)
return ret;
}
-static struct txq_info *ieee80211_get_txq(struct ieee80211_local *local,
- struct ieee80211_vif *vif,
- struct ieee80211_sta *pubsta,
- struct sk_buff *skb)
-{
- struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
- struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
- struct ieee80211_txq *txq = NULL;
-
- if ((info->flags & IEEE80211_TX_CTL_SEND_AFTER_DTIM) ||
- (info->control.flags & IEEE80211_TX_CTRL_PS_RESPONSE))
- return NULL;
-
- if (!ieee80211_is_data(hdr->frame_control))
- return NULL;
-
- if (pubsta) {
- u8 tid = skb->priority & IEEE80211_QOS_CTL_TID_MASK;
-
- txq = pubsta->txq[tid];
- } else if (vif) {
- txq = vif->txq;
- }
-
- if (!txq)
- return NULL;
-
- return to_txq_info(txq);
-}
-
static ieee80211_tx_result debug_noinline
ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx)
{
@@ -883,9 +853,7 @@ ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx)
tid = *qc & IEEE80211_QOS_CTL_TID_MASK;
tx->sta->tx_stats.msdu[tid]++;
- if (!ieee80211_get_txq(tx->local, info->control.vif, &tx->sta->sta,
- tx->skb))
- hdr->seq_ctrl = ieee80211_tx_next_seq(tx->sta, tid);
+ hdr->seq_ctrl = ieee80211_tx_next_seq(tx->sta, tid);
return TX_CONTINUE;
}
@@ -1274,6 +1242,36 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata,
return TX_CONTINUE;
}
+static struct txq_info *ieee80211_get_txq(struct ieee80211_local *local,
+ struct ieee80211_vif *vif,
+ struct ieee80211_sta *pubsta,
+ struct sk_buff *skb)
+{
+ struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
+ struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+ struct ieee80211_txq *txq = NULL;
+
+ if ((info->flags & IEEE80211_TX_CTL_SEND_AFTER_DTIM) ||
+ (info->control.flags & IEEE80211_TX_CTRL_PS_RESPONSE))
+ return NULL;
+
+ if (!ieee80211_is_data(hdr->frame_control))
+ return NULL;
+
+ if (pubsta) {
+ u8 tid = skb->priority & IEEE80211_QOS_CTL_TID_MASK;
+
+ txq = pubsta->txq[tid];
+ } else if (vif) {
+ txq = vif->txq;
+ }
+
+ if (!txq)
+ return NULL;
+
+ return to_txq_info(txq);
+}
+
static void ieee80211_set_skb_enqueue_time(struct sk_buff *skb)
{
IEEE80211_SKB_CB(skb)->control.enqueue_time = codel_get_time();
@@ -1405,6 +1403,7 @@ void ieee80211_txq_init(struct ieee80211_sub_if_data *sdata,
fq_flow_init(&txqi->def_flow);
codel_vars_init(&txqi->def_cvars);
codel_stats_init(&txqi->cstats);
+ __skb_queue_head_init(&txqi->frags);
txqi->txq.vif = &sdata->vif;
@@ -1427,6 +1426,7 @@ void ieee80211_txq_purge(struct ieee80211_local *local,
struct fq_tin *tin = &txqi->tin;
fq_tin_reset(fq, tin, fq_skb_free_func);
+ ieee80211_purge_tx_queue(&local->hw, &txqi->frags);
}
int ieee80211_txq_setup_flows(struct ieee80211_local *local)
@@ -1434,6 +1434,8 @@ int ieee80211_txq_setup_flows(struct ieee80211_local *local)
struct fq *fq = &local->fq;
int ret;
int i;
+ bool supp_vht = false;
+ enum nl80211_band band;
if (!local->ops->wake_tx_queue)
return 0;
@@ -1442,6 +1444,23 @@ int ieee80211_txq_setup_flows(struct ieee80211_local *local)
if (ret)
return ret;
+ /*
+ * If the hardware doesn't support VHT, it is safe to limit the maximum
+ * queue size. 4 Mbytes is 64 max-size aggregates in 802.11n.
+ */
+ for (band = 0; band < NUM_NL80211_BANDS; band++) {
+ struct ieee80211_supported_band *sband;
+
+ sband = local->hw.wiphy->bands[band];
+ if (!sband)
+ continue;
+
+ supp_vht = supp_vht || sband->vht_cap.vht_supported;
+ }
+
+ if (!supp_vht)
+ fq->memory_limit = 4 << 20; /* 4 Mbytes */
+
codel_params_init(&local->cparams);
local->cparams.interval = MS2TIME(100);
local->cparams.target = MS2TIME(20);
@@ -1477,54 +1496,46 @@ void ieee80211_txq_teardown_flows(struct ieee80211_local *local)
spin_unlock_bh(&fq->lock);
}
-struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw,
- struct ieee80211_txq *txq)
+static bool ieee80211_queue_skb(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ struct sta_info *sta,
+ struct sk_buff *skb)
{
- struct ieee80211_local *local = hw_to_local(hw);
- struct txq_info *txqi = container_of(txq, struct txq_info, txq);
- struct ieee80211_hdr *hdr;
- struct sk_buff *skb = NULL;
+ struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
struct fq *fq = &local->fq;
- struct fq_tin *tin = &txqi->tin;
+ struct ieee80211_vif *vif;
+ struct txq_info *txqi;
+ struct ieee80211_sta *pubsta;
- spin_lock_bh(&fq->lock);
+ if (!local->ops->wake_tx_queue ||
+ sdata->vif.type == NL80211_IFTYPE_MONITOR)
+ return false;
- if (test_bit(IEEE80211_TXQ_STOP, &txqi->flags))
- goto out;
+ if (sta && sta->uploaded)
+ pubsta = &sta->sta;
+ else
+ pubsta = NULL;
- skb = fq_tin_dequeue(fq, tin, fq_tin_dequeue_func);
- if (!skb)
- goto out;
+ if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
+ sdata = container_of(sdata->bss,
+ struct ieee80211_sub_if_data, u.ap);
- ieee80211_set_skb_vif(skb, txqi);
+ vif = &sdata->vif;
+ txqi = ieee80211_get_txq(local, vif, pubsta, skb);
- hdr = (struct ieee80211_hdr *)skb->data;
- if (txq->sta && ieee80211_is_data_qos(hdr->frame_control)) {
- struct sta_info *sta = container_of(txq->sta, struct sta_info,
- sta);
- struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+ if (!txqi)
+ return false;
- hdr->seq_ctrl = ieee80211_tx_next_seq(sta, txq->tid);
- if (test_bit(IEEE80211_TXQ_AMPDU, &txqi->flags))
- info->flags |= IEEE80211_TX_CTL_AMPDU;
- else
- info->flags &= ~IEEE80211_TX_CTL_AMPDU;
- }
+ info->control.vif = vif;
-out:
+ spin_lock_bh(&fq->lock);
+ ieee80211_txq_enqueue(local, txqi, skb);
spin_unlock_bh(&fq->lock);
- if (skb && skb_has_frag_list(skb) &&
- !ieee80211_hw_check(&local->hw, TX_FRAG_LIST)) {
- if (skb_linearize(skb)) {
- ieee80211_free_txskb(&local->hw, skb);
- return NULL;
- }
- }
+ drv_wake_tx_queue(local, txqi);
- return skb;
+ return true;
}
-EXPORT_SYMBOL(ieee80211_tx_dequeue);
static bool ieee80211_tx_frags(struct ieee80211_local *local,
struct ieee80211_vif *vif,
@@ -1533,9 +1544,7 @@ static bool ieee80211_tx_frags(struct ieee80211_local *local,
bool txpending)
{
struct ieee80211_tx_control control = {};
- struct fq *fq = &local->fq;
struct sk_buff *skb, *tmp;
- struct txq_info *txqi;
unsigned long flags;
skb_queue_walk_safe(skbs, skb, tmp) {
@@ -1550,21 +1559,6 @@ static bool ieee80211_tx_frags(struct ieee80211_local *local,
}
#endif
- txqi = ieee80211_get_txq(local, vif, sta, skb);
- if (txqi) {
- info->control.vif = vif;
-
- __skb_unlink(skb, skbs);
-
- spin_lock_bh(&fq->lock);
- ieee80211_txq_enqueue(local, txqi, skb);
- spin_unlock_bh(&fq->lock);
-
- drv_wake_tx_queue(local, txqi);
-
- continue;
- }
-
spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
if (local->queue_stop_reasons[q] ||
(!txpending && !skb_queue_empty(&local->pending[q]))) {
@@ -1685,10 +1679,13 @@ static bool __ieee80211_tx(struct ieee80211_local *local,
/*
* Invoke TX handlers, return 0 on success and non-zero if the
* frame was dropped or queued.
+ *
+ * The handlers are split into an early and late part. The latter is everything
+ * that can be sensitive to reordering, and will be deferred to after packets
+ * are dequeued from the intermediate queues (when they are enabled).
*/
-static int invoke_tx_handlers(struct ieee80211_tx_data *tx)
+static int invoke_tx_handlers_early(struct ieee80211_tx_data *tx)
{
- struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
ieee80211_tx_result res = TX_DROP;
#define CALL_TXH(txh) \
@@ -1706,6 +1703,31 @@ static int invoke_tx_handlers(struct ieee80211_tx_data *tx)
if (!ieee80211_hw_check(&tx->local->hw, HAS_RATE_CONTROL))
CALL_TXH(ieee80211_tx_h_rate_ctrl);
+ txh_done:
+ if (unlikely(res == TX_DROP)) {
+ I802_DEBUG_INC(tx->local->tx_handlers_drop);
+ if (tx->skb)
+ ieee80211_free_txskb(&tx->local->hw, tx->skb);
+ else
+ ieee80211_purge_tx_queue(&tx->local->hw, &tx->skbs);
+ return -1;
+ } else if (unlikely(res == TX_QUEUED)) {
+ I802_DEBUG_INC(tx->local->tx_handlers_queued);
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ * Late handlers can be called while the sta lock is held. Handlers that can
+ * cause packets to be generated will cause deadlock!
+ */
+static int invoke_tx_handlers_late(struct ieee80211_tx_data *tx)
+{
+ struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
+ ieee80211_tx_result res = TX_CONTINUE;
+
if (unlikely(info->flags & IEEE80211_TX_INTFL_RETRANSMISSION)) {
__skb_queue_tail(&tx->skbs, tx->skb);
tx->skb = NULL;
@@ -1738,6 +1760,15 @@ static int invoke_tx_handlers(struct ieee80211_tx_data *tx)
return 0;
}
+static int invoke_tx_handlers(struct ieee80211_tx_data *tx)
+{
+ int r = invoke_tx_handlers_early(tx);
+
+ if (r)
+ return r;
+ return invoke_tx_handlers_late(tx);
+}
+
bool ieee80211_tx_prepare_skb(struct ieee80211_hw *hw,
struct ieee80211_vif *vif, struct sk_buff *skb,
int band, struct ieee80211_sta **sta)
@@ -1812,7 +1843,13 @@ static bool ieee80211_tx(struct ieee80211_sub_if_data *sdata,
info->hw_queue =
sdata->vif.hw_queue[skb_get_queue_mapping(skb)];
- if (!invoke_tx_handlers(&tx))
+ if (invoke_tx_handlers_early(&tx))
+ return false;
+
+ if (ieee80211_queue_skb(local, sdata, tx.sta, tx.skb))
+ return true;
+
+ if (!invoke_tx_handlers_late(&tx))
result = __ieee80211_tx(local, &tx.skbs, led_len,
tx.sta, txpending);
@@ -3156,8 +3193,71 @@ out:
return ret;
}
+/*
+ * Can be called while the sta lock is held. Anything that can cause packets to
+ * be generated will cause deadlock!
+ */
+static void ieee80211_xmit_fast_finish(struct ieee80211_sub_if_data *sdata,
+ struct sta_info *sta, u8 pn_offs,
+ struct ieee80211_key *key,
+ struct sk_buff *skb)
+{
+ struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+ struct ieee80211_hdr *hdr = (void *)skb->data;
+ u8 tid = IEEE80211_NUM_TIDS;
+
+ if (key)
+ info->control.hw_key = &key->conf;
+
+ ieee80211_tx_stats(skb->dev, skb->len);
+
+ if (hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) {
+ tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK;
+ *ieee80211_get_qos_ctl(hdr) = tid;
+ hdr->seq_ctrl = ieee80211_tx_next_seq(sta, tid);
+ } else {
+ info->flags |= IEEE80211_TX_CTL_ASSIGN_SEQ;
+ hdr->seq_ctrl = cpu_to_le16(sdata->sequence_number);
+ sdata->sequence_number += 0x10;
+ }
+
+ if (skb_shinfo(skb)->gso_size)
+ sta->tx_stats.msdu[tid] +=
+ DIV_ROUND_UP(skb->len, skb_shinfo(skb)->gso_size);
+ else
+ sta->tx_stats.msdu[tid]++;
+
+ info->hw_queue = sdata->vif.hw_queue[skb_get_queue_mapping(skb)];
+
+ /* statistics normally done by ieee80211_tx_h_stats (but that
+ * has to consider fragmentation, so is more complex)
+ */
+ sta->tx_stats.bytes[skb_get_queue_mapping(skb)] += skb->len;
+ sta->tx_stats.packets[skb_get_queue_mapping(skb)]++;
+
+ if (pn_offs) {
+ u64 pn;
+ u8 *crypto_hdr = skb->data + pn_offs;
+
+ switch (key->conf.cipher) {
+ case WLAN_CIPHER_SUITE_CCMP:
+ case WLAN_CIPHER_SUITE_CCMP_256:
+ case WLAN_CIPHER_SUITE_GCMP:
+ case WLAN_CIPHER_SUITE_GCMP_256:
+ pn = atomic64_inc_return(&key->conf.tx_pn);
+ crypto_hdr[0] = pn;
+ crypto_hdr[1] = pn >> 8;
+ crypto_hdr[4] = pn >> 16;
+ crypto_hdr[5] = pn >> 24;
+ crypto_hdr[6] = pn >> 32;
+ crypto_hdr[7] = pn >> 40;
+ break;
+ }
+ }
+}
+
static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata,
- struct net_device *dev, struct sta_info *sta,
+ struct sta_info *sta,
struct ieee80211_fast_tx *fast_tx,
struct sk_buff *skb)
{
@@ -3208,8 +3308,6 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata,
return true;
}
- ieee80211_tx_stats(dev, skb->len + extra_head);
-
if ((hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) &&
ieee80211_amsdu_aggregate(sdata, sta, fast_tx, skb))
return true;
@@ -3238,24 +3336,7 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata,
info->flags = IEEE80211_TX_CTL_FIRST_FRAGMENT |
IEEE80211_TX_CTL_DONTFRAG |
(tid_tx ? IEEE80211_TX_CTL_AMPDU : 0);
-
- if (hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) {
- *ieee80211_get_qos_ctl(hdr) = tid;
- if (!ieee80211_get_txq(local, &sdata->vif, &sta->sta, skb))
- hdr->seq_ctrl = ieee80211_tx_next_seq(sta, tid);
- } else {
- info->flags |= IEEE80211_TX_CTL_ASSIGN_SEQ;
- hdr->seq_ctrl = cpu_to_le16(sdata->sequence_number);
- sdata->sequence_number += 0x10;
- }
-
- if (skb_shinfo(skb)->gso_size)
- sta->tx_stats.msdu[tid] +=
- DIV_ROUND_UP(skb->len, skb_shinfo(skb)->gso_size);
- else
- sta->tx_stats.msdu[tid]++;
-
- info->hw_queue = sdata->vif.hw_queue[skb_get_queue_mapping(skb)];
+ info->control.flags = IEEE80211_TX_CTRL_FAST_XMIT;
__skb_queue_head_init(&tx.skbs);
@@ -3265,9 +3346,6 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata,
tx.sta = sta;
tx.key = fast_tx->key;
- if (fast_tx->key)
- info->control.hw_key = &fast_tx->key->conf;
-
if (!ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL)) {
tx.skb = skb;
r = ieee80211_tx_h_rate_ctrl(&tx);
@@ -3281,31 +3359,11 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata,
}
}
- /* statistics normally done by ieee80211_tx_h_stats (but that
- * has to consider fragmentation, so is more complex)
- */
- sta->tx_stats.bytes[skb_get_queue_mapping(skb)] += skb->len;
- sta->tx_stats.packets[skb_get_queue_mapping(skb)]++;
-
- if (fast_tx->pn_offs) {
- u64 pn;
- u8 *crypto_hdr = skb->data + fast_tx->pn_offs;
+ if (ieee80211_queue_skb(local, sdata, sta, skb))
+ return true;
- switch (fast_tx->key->conf.cipher) {
- case WLAN_CIPHER_SUITE_CCMP:
- case WLAN_CIPHER_SUITE_CCMP_256:
- case WLAN_CIPHER_SUITE_GCMP:
- case WLAN_CIPHER_SUITE_GCMP_256:
- pn = atomic64_inc_return(&fast_tx->key->conf.tx_pn);
- crypto_hdr[0] = pn;
- crypto_hdr[1] = pn >> 8;
- crypto_hdr[4] = pn >> 16;
- crypto_hdr[5] = pn >> 24;
- crypto_hdr[6] = pn >> 32;
- crypto_hdr[7] = pn >> 40;
- break;
- }
- }
+ ieee80211_xmit_fast_finish(sdata, sta, fast_tx->pn_offs,
+ fast_tx->key, skb);
if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
sdata = container_of(sdata->bss,
@@ -3316,6 +3374,94 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata,
return true;
}
+struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw,
+ struct ieee80211_txq *txq)
+{
+ struct ieee80211_local *local = hw_to_local(hw);
+ struct txq_info *txqi = container_of(txq, struct txq_info, txq);
+ struct ieee80211_hdr *hdr;
+ struct sk_buff *skb = NULL;
+ struct fq *fq = &local->fq;
+ struct fq_tin *tin = &txqi->tin;
+ struct ieee80211_tx_info *info;
+ struct ieee80211_tx_data tx;
+ ieee80211_tx_result r;
+
+ spin_lock_bh(&fq->lock);
+
+ if (test_bit(IEEE80211_TXQ_STOP, &txqi->flags))
+ goto out;
+
+ /* Make sure fragments stay together. */
+ skb = __skb_dequeue(&txqi->frags);
+ if (skb)
+ goto out;
+
+begin:
+ skb = fq_tin_dequeue(fq, tin, fq_tin_dequeue_func);
+ if (!skb)
+ goto out;
+
+ ieee80211_set_skb_vif(skb, txqi);
+
+ hdr = (struct ieee80211_hdr *)skb->data;
+ info = IEEE80211_SKB_CB(skb);
+
+ memset(&tx, 0, sizeof(tx));
+ __skb_queue_head_init(&tx.skbs);
+ tx.local = local;
+ tx.skb = skb;
+ tx.sdata = vif_to_sdata(info->control.vif);
+
+ if (txq->sta)
+ tx.sta = container_of(txq->sta, struct sta_info, sta);
+
+ /*
+ * The key can be removed while the packet was queued, so need to call
+ * this here to get the current key.
+ */
+ r = ieee80211_tx_h_select_key(&tx);
+ if (r != TX_CONTINUE) {
+ ieee80211_free_txskb(&local->hw, skb);
+ goto begin;
+ }
+
+ if (info->control.flags & IEEE80211_TX_CTRL_FAST_XMIT) {
+ struct sta_info *sta = container_of(txq->sta, struct sta_info,
+ sta);
+ u8 pn_offs = 0;
+
+ if (tx.key &&
+ (tx.key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV))
+ pn_offs = ieee80211_hdrlen(hdr->frame_control);
+
+ ieee80211_xmit_fast_finish(sta->sdata, sta, pn_offs,
+ tx.key, skb);
+ } else {
+ if (invoke_tx_handlers_late(&tx))
+ goto begin;
+
+ skb = __skb_dequeue(&tx.skbs);
+
+ if (!skb_queue_empty(&tx.skbs))
+ skb_queue_splice_tail(&tx.skbs, &txqi->frags);
+ }
+
+ if (skb && skb_has_frag_list(skb) &&
+ !ieee80211_hw_check(&local->hw, TX_FRAG_LIST)) {
+ if (skb_linearize(skb)) {
+ ieee80211_free_txskb(&local->hw, skb);
+ goto begin;
+ }
+ }
+
+out:
+ spin_unlock_bh(&fq->lock);
+
+ return skb;
+}
+EXPORT_SYMBOL(ieee80211_tx_dequeue);
+
void __ieee80211_subif_start_xmit(struct sk_buff *skb,
struct net_device *dev,
u32 info_flags)
@@ -3340,7 +3486,7 @@ void __ieee80211_subif_start_xmit(struct sk_buff *skb,
fast_tx = rcu_dereference(sta->fast_tx);
if (fast_tx &&
- ieee80211_xmit_fast(sdata, dev, sta, fast_tx, skb))
+ ieee80211_xmit_fast(sdata, sta, fast_tx, skb))
goto out;
}
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index b6865d884487..545c79a42a77 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1209,7 +1209,8 @@ void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata,
}
if (sdata->vif.type != NL80211_IFTYPE_MONITOR &&
- sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE) {
+ sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE &&
+ sdata->vif.type != NL80211_IFTYPE_NAN) {
sdata->vif.bss_conf.qos = enable_qos;
if (bss_notify)
ieee80211_bss_info_change_notify(sdata,
@@ -1748,6 +1749,46 @@ static void ieee80211_reconfig_stations(struct ieee80211_sub_if_data *sdata)
mutex_unlock(&local->sta_mtx);
}
+static int ieee80211_reconfig_nan(struct ieee80211_sub_if_data *sdata)
+{
+ struct cfg80211_nan_func *func, **funcs;
+ int res, id, i = 0;
+
+ res = drv_start_nan(sdata->local, sdata,
+ &sdata->u.nan.conf);
+ if (WARN_ON(res))
+ return res;
+
+ funcs = kzalloc((sdata->local->hw.max_nan_de_entries + 1) *
+ sizeof(*funcs), GFP_KERNEL);
+ if (!funcs)
+ return -ENOMEM;
+
+ /* Add all the functions:
+ * This is a little bit ugly. We need to call a potentially sleeping
+ * callback for each NAN function, so we can't hold the spinlock.
+ */
+ spin_lock_bh(&sdata->u.nan.func_lock);
+
+ idr_for_each_entry(&sdata->u.nan.function_inst_ids, func, id)
+ funcs[i++] = func;
+
+ spin_unlock_bh(&sdata->u.nan.func_lock);
+
+ for (i = 0; funcs[i]; i++) {
+ res = drv_add_nan_func(sdata->local, sdata, funcs[i]);
+ if (WARN_ON(res))
+ ieee80211_nan_func_terminated(&sdata->vif,
+ funcs[i]->instance_id,
+ NL80211_NAN_FUNC_TERM_REASON_ERROR,
+ GFP_KERNEL);
+ }
+
+ kfree(funcs);
+
+ return 0;
+}
+
int ieee80211_reconfig(struct ieee80211_local *local)
{
struct ieee80211_hw *hw = &local->hw;
@@ -1971,6 +2012,13 @@ int ieee80211_reconfig(struct ieee80211_local *local)
ieee80211_bss_info_change_notify(sdata, changed);
}
break;
+ case NL80211_IFTYPE_NAN:
+ res = ieee80211_reconfig_nan(sdata);
+ if (res < 0) {
+ ieee80211_handle_reconfig_failure(local);
+ return res;
+ }
+ break;
case NL80211_IFTYPE_WDS:
case NL80211_IFTYPE_AP_VLAN:
case NL80211_IFTYPE_MONITOR:
@@ -3393,11 +3441,18 @@ void ieee80211_txq_get_depth(struct ieee80211_txq *txq,
unsigned long *byte_cnt)
{
struct txq_info *txqi = to_txq_info(txq);
+ u32 frag_cnt = 0, frag_bytes = 0;
+ struct sk_buff *skb;
+
+ skb_queue_walk(&txqi->frags, skb) {
+ frag_cnt++;
+ frag_bytes += skb->len;
+ }
if (frame_cnt)
- *frame_cnt = txqi->tin.backlog_packets;
+ *frame_cnt = txqi->tin.backlog_packets + frag_cnt;
if (byte_cnt)
- *byte_cnt = txqi->tin.backlog_bytes;
+ *byte_cnt = txqi->tin.backlog_bytes + frag_bytes;
}
EXPORT_SYMBOL(ieee80211_txq_get_depth);
diff --git a/net/mpls/internal.h b/net/mpls/internal.h
index 732a5c17e986..bdfef6c3271a 100644
--- a/net/mpls/internal.h
+++ b/net/mpls/internal.h
@@ -1,9 +1,6 @@
#ifndef MPLS_INTERNAL_H
#define MPLS_INTERNAL_H
-
-struct mpls_shim_hdr {
- __be32 label_stack_entry;
-};
+#include <net/mpls.h>
struct mpls_entry_decoded {
u32 label;
@@ -93,11 +90,6 @@ struct mpls_route { /* next hop label forwarding entry */
#define endfor_nexthops(rt) }
-static inline struct mpls_shim_hdr *mpls_hdr(const struct sk_buff *skb)
-{
- return (struct mpls_shim_hdr *)skb_network_header(skb);
-}
-
static inline struct mpls_shim_hdr mpls_entry_encode(u32 label, unsigned ttl, unsigned tc, bool bos)
{
struct mpls_shim_hdr result;
diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c
index cf52cf30ac4b..2f7ccd934416 100644
--- a/net/mpls/mpls_iptunnel.c
+++ b/net/mpls/mpls_iptunnel.c
@@ -133,7 +133,6 @@ static int mpls_build_state(struct net_device *dev, struct nlattr *nla,
struct mpls_iptunnel_encap *tun_encap_info;
struct nlattr *tb[MPLS_IPTUNNEL_MAX + 1];
struct lwtunnel_state *newts;
- int tun_encap_info_len;
int ret;
ret = nla_parse_nested(tb, MPLS_IPTUNNEL_MAX, nla,
@@ -144,13 +143,11 @@ static int mpls_build_state(struct net_device *dev, struct nlattr *nla,
if (!tb[MPLS_IPTUNNEL_DST])
return -EINVAL;
- tun_encap_info_len = sizeof(*tun_encap_info);
- newts = lwtunnel_state_alloc(tun_encap_info_len);
+ newts = lwtunnel_state_alloc(sizeof(*tun_encap_info));
if (!newts)
return -ENOMEM;
- newts->len = tun_encap_info_len;
tun_encap_info = mpls_lwtunnel_encap(newts);
ret = nla_get_labels(tb[MPLS_IPTUNNEL_DST], MAX_NEW_LABELS,
&tun_encap_info->labels, tun_encap_info->label);
diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h
index 33738c060547..13290a70fa71 100644
--- a/net/ncsi/internal.h
+++ b/net/ncsi/internal.h
@@ -170,6 +170,7 @@ struct ncsi_package;
#define NCSI_PACKAGE_SHIFT 5
#define NCSI_PACKAGE_INDEX(c) (((c) >> NCSI_PACKAGE_SHIFT) & 0x7)
+#define NCSI_RESERVED_CHANNEL 0x1f
#define NCSI_CHANNEL_INDEX(c) ((c) & ((1 << NCSI_PACKAGE_SHIFT) - 1))
#define NCSI_TO_CHANNEL(p, c) (((p) << NCSI_PACKAGE_SHIFT) | (c))
@@ -186,9 +187,15 @@ struct ncsi_channel {
struct ncsi_channel_mode modes[NCSI_MODE_MAX];
struct ncsi_channel_filter *filters[NCSI_FILTER_MAX];
struct ncsi_channel_stats stats;
- struct timer_list timer; /* Link monitor timer */
- bool enabled; /* Timer is enabled */
- unsigned int timeout; /* Times of timeout */
+ struct {
+ struct timer_list timer;
+ bool enabled;
+ unsigned int state;
+#define NCSI_CHANNEL_MONITOR_START 0
+#define NCSI_CHANNEL_MONITOR_RETRY 1
+#define NCSI_CHANNEL_MONITOR_WAIT 2
+#define NCSI_CHANNEL_MONITOR_WAIT_MAX 5
+ } monitor;
struct list_head node;
struct list_head link;
};
@@ -206,7 +213,8 @@ struct ncsi_package {
struct ncsi_request {
unsigned char id; /* Request ID - 0 to 255 */
bool used; /* Request that has been assigned */
- bool driven; /* Drive state machine */
+ unsigned int flags; /* NCSI request property */
+#define NCSI_REQ_FLAG_EVENT_DRIVEN 1
struct ncsi_dev_priv *ndp; /* Associated NCSI device */
struct sk_buff *cmd; /* Associated NCSI command packet */
struct sk_buff *rsp; /* Associated NCSI response packet */
@@ -258,6 +266,7 @@ struct ncsi_dev_priv {
struct list_head packages; /* List of packages */
struct ncsi_request requests[256]; /* Request table */
unsigned int request_id; /* Last used request ID */
+#define NCSI_REQ_START_IDX 1
unsigned int pending_req_num; /* Number of pending requests */
struct ncsi_package *active_package; /* Currently handled package */
struct ncsi_channel *active_channel; /* Currently handled channel */
@@ -274,7 +283,7 @@ struct ncsi_cmd_arg {
unsigned char package; /* Destination package ID */
unsigned char channel; /* Detination channel ID or 0x1f */
unsigned short payload; /* Command packet payload length */
- bool driven; /* Drive the state machine? */
+ unsigned int req_flags; /* NCSI request properties */
union {
unsigned char bytes[16]; /* Command packet specific data */
unsigned short words[8];
@@ -313,7 +322,8 @@ void ncsi_find_package_and_channel(struct ncsi_dev_priv *ndp,
unsigned char id,
struct ncsi_package **np,
struct ncsi_channel **nc);
-struct ncsi_request *ncsi_alloc_request(struct ncsi_dev_priv *ndp, bool driven);
+struct ncsi_request *ncsi_alloc_request(struct ncsi_dev_priv *ndp,
+ unsigned int req_flags);
void ncsi_free_request(struct ncsi_request *nr);
struct ncsi_dev *ncsi_find_dev(struct net_device *dev);
int ncsi_process_next_channel(struct ncsi_dev_priv *ndp);
diff --git a/net/ncsi/ncsi-aen.c b/net/ncsi/ncsi-aen.c
index d463468442ae..b41a6617d498 100644
--- a/net/ncsi/ncsi-aen.c
+++ b/net/ncsi/ncsi-aen.c
@@ -53,7 +53,9 @@ static int ncsi_aen_handler_lsc(struct ncsi_dev_priv *ndp,
struct ncsi_aen_lsc_pkt *lsc;
struct ncsi_channel *nc;
struct ncsi_channel_mode *ncm;
- unsigned long old_data;
+ bool chained;
+ int state;
+ unsigned long old_data, data;
unsigned long flags;
/* Find the NCSI channel */
@@ -62,20 +64,27 @@ static int ncsi_aen_handler_lsc(struct ncsi_dev_priv *ndp,
return -ENODEV;
/* Update the link status */
- ncm = &nc->modes[NCSI_MODE_LINK];
lsc = (struct ncsi_aen_lsc_pkt *)h;
+
+ spin_lock_irqsave(&nc->lock, flags);
+ ncm = &nc->modes[NCSI_MODE_LINK];
old_data = ncm->data[2];
- ncm->data[2] = ntohl(lsc->status);
+ data = ntohl(lsc->status);
+ ncm->data[2] = data;
ncm->data[4] = ntohl(lsc->oem_status);
- if (!((old_data ^ ncm->data[2]) & 0x1) ||
- !list_empty(&nc->link))
+
+ chained = !list_empty(&nc->link);
+ state = nc->state;
+ spin_unlock_irqrestore(&nc->lock, flags);
+
+ if (!((old_data ^ data) & 0x1) || chained)
return 0;
- if (!(nc->state == NCSI_CHANNEL_INACTIVE && (ncm->data[2] & 0x1)) &&
- !(nc->state == NCSI_CHANNEL_ACTIVE && !(ncm->data[2] & 0x1)))
+ if (!(state == NCSI_CHANNEL_INACTIVE && (data & 0x1)) &&
+ !(state == NCSI_CHANNEL_ACTIVE && !(data & 0x1)))
return 0;
if (!(ndp->flags & NCSI_DEV_HWA) &&
- nc->state == NCSI_CHANNEL_ACTIVE)
+ state == NCSI_CHANNEL_ACTIVE)
ndp->flags |= NCSI_DEV_RESHUFFLE;
ncsi_stop_channel_monitor(nc);
@@ -97,13 +106,21 @@ static int ncsi_aen_handler_cr(struct ncsi_dev_priv *ndp,
if (!nc)
return -ENODEV;
+ spin_lock_irqsave(&nc->lock, flags);
if (!list_empty(&nc->link) ||
- nc->state != NCSI_CHANNEL_ACTIVE)
+ nc->state != NCSI_CHANNEL_ACTIVE) {
+ spin_unlock_irqrestore(&nc->lock, flags);
return 0;
+ }
+ spin_unlock_irqrestore(&nc->lock, flags);
ncsi_stop_channel_monitor(nc);
+ spin_lock_irqsave(&nc->lock, flags);
+ nc->state = NCSI_CHANNEL_INVISIBLE;
+ spin_unlock_irqrestore(&nc->lock, flags);
+
spin_lock_irqsave(&ndp->lock, flags);
- xchg(&nc->state, NCSI_CHANNEL_INACTIVE);
+ nc->state = NCSI_CHANNEL_INACTIVE;
list_add_tail_rcu(&nc->link, &ndp->channel_queue);
spin_unlock_irqrestore(&ndp->lock, flags);
diff --git a/net/ncsi/ncsi-cmd.c b/net/ncsi/ncsi-cmd.c
index 21057a8ceeac..db7083bfd476 100644
--- a/net/ncsi/ncsi-cmd.c
+++ b/net/ncsi/ncsi-cmd.c
@@ -272,7 +272,7 @@ static struct ncsi_request *ncsi_alloc_command(struct ncsi_cmd_arg *nca)
struct sk_buff *skb;
struct ncsi_request *nr;
- nr = ncsi_alloc_request(ndp, nca->driven);
+ nr = ncsi_alloc_request(ndp, nca->req_flags);
if (!nr)
return NULL;
diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c
index ef017b871857..5e509e547c2d 100644
--- a/net/ncsi/ncsi-manage.c
+++ b/net/ncsi/ncsi-manage.c
@@ -132,6 +132,7 @@ static void ncsi_report_link(struct ncsi_dev_priv *ndp, bool force_down)
struct ncsi_dev *nd = &ndp->ndev;
struct ncsi_package *np;
struct ncsi_channel *nc;
+ unsigned long flags;
nd->state = ncsi_dev_state_functional;
if (force_down) {
@@ -142,14 +143,21 @@ static void ncsi_report_link(struct ncsi_dev_priv *ndp, bool force_down)
nd->link_up = 0;
NCSI_FOR_EACH_PACKAGE(ndp, np) {
NCSI_FOR_EACH_CHANNEL(np, nc) {
+ spin_lock_irqsave(&nc->lock, flags);
+
if (!list_empty(&nc->link) ||
- nc->state != NCSI_CHANNEL_ACTIVE)
+ nc->state != NCSI_CHANNEL_ACTIVE) {
+ spin_unlock_irqrestore(&nc->lock, flags);
continue;
+ }
if (nc->modes[NCSI_MODE_LINK].data[2] & 0x1) {
+ spin_unlock_irqrestore(&nc->lock, flags);
nd->link_up = 1;
goto report;
}
+
+ spin_unlock_irqrestore(&nc->lock, flags);
}
}
@@ -163,43 +171,55 @@ static void ncsi_channel_monitor(unsigned long data)
struct ncsi_package *np = nc->package;
struct ncsi_dev_priv *ndp = np->ndp;
struct ncsi_cmd_arg nca;
- bool enabled;
- unsigned int timeout;
+ bool enabled, chained;
+ unsigned int monitor_state;
unsigned long flags;
- int ret;
+ int state, ret;
spin_lock_irqsave(&nc->lock, flags);
- timeout = nc->timeout;
- enabled = nc->enabled;
+ state = nc->state;
+ chained = !list_empty(&nc->link);
+ enabled = nc->monitor.enabled;
+ monitor_state = nc->monitor.state;
spin_unlock_irqrestore(&nc->lock, flags);
- if (!enabled || !list_empty(&nc->link))
+ if (!enabled || chained)
return;
- if (nc->state != NCSI_CHANNEL_INACTIVE &&
- nc->state != NCSI_CHANNEL_ACTIVE)
+ if (state != NCSI_CHANNEL_INACTIVE &&
+ state != NCSI_CHANNEL_ACTIVE)
return;
- if (!(timeout % 2)) {
+ switch (monitor_state) {
+ case NCSI_CHANNEL_MONITOR_START:
+ case NCSI_CHANNEL_MONITOR_RETRY:
nca.ndp = ndp;
nca.package = np->id;
nca.channel = nc->id;
nca.type = NCSI_PKT_CMD_GLS;
- nca.driven = false;
+ nca.req_flags = 0;
ret = ncsi_xmit_cmd(&nca);
if (ret) {
netdev_err(ndp->ndev.dev, "Error %d sending GLS\n",
ret);
return;
}
- }
- if (timeout + 1 >= 3) {
+ break;
+ case NCSI_CHANNEL_MONITOR_WAIT ... NCSI_CHANNEL_MONITOR_WAIT_MAX:
+ break;
+ default:
if (!(ndp->flags & NCSI_DEV_HWA) &&
- nc->state == NCSI_CHANNEL_ACTIVE)
+ state == NCSI_CHANNEL_ACTIVE) {
ncsi_report_link(ndp, true);
+ ndp->flags |= NCSI_DEV_RESHUFFLE;
+ }
+
+ spin_lock_irqsave(&nc->lock, flags);
+ nc->state = NCSI_CHANNEL_INVISIBLE;
+ spin_unlock_irqrestore(&nc->lock, flags);
spin_lock_irqsave(&ndp->lock, flags);
- xchg(&nc->state, NCSI_CHANNEL_INACTIVE);
+ nc->state = NCSI_CHANNEL_INACTIVE;
list_add_tail_rcu(&nc->link, &ndp->channel_queue);
spin_unlock_irqrestore(&ndp->lock, flags);
ncsi_process_next_channel(ndp);
@@ -207,10 +227,9 @@ static void ncsi_channel_monitor(unsigned long data)
}
spin_lock_irqsave(&nc->lock, flags);
- nc->timeout = timeout + 1;
- nc->enabled = true;
+ nc->monitor.state++;
spin_unlock_irqrestore(&nc->lock, flags);
- mod_timer(&nc->timer, jiffies + HZ * (1 << (nc->timeout / 2)));
+ mod_timer(&nc->monitor.timer, jiffies + HZ);
}
void ncsi_start_channel_monitor(struct ncsi_channel *nc)
@@ -218,12 +237,12 @@ void ncsi_start_channel_monitor(struct ncsi_channel *nc)
unsigned long flags;
spin_lock_irqsave(&nc->lock, flags);
- WARN_ON_ONCE(nc->enabled);
- nc->timeout = 0;
- nc->enabled = true;
+ WARN_ON_ONCE(nc->monitor.enabled);
+ nc->monitor.enabled = true;
+ nc->monitor.state = NCSI_CHANNEL_MONITOR_START;
spin_unlock_irqrestore(&nc->lock, flags);
- mod_timer(&nc->timer, jiffies + HZ * (1 << (nc->timeout / 2)));
+ mod_timer(&nc->monitor.timer, jiffies + HZ);
}
void ncsi_stop_channel_monitor(struct ncsi_channel *nc)
@@ -231,14 +250,14 @@ void ncsi_stop_channel_monitor(struct ncsi_channel *nc)
unsigned long flags;
spin_lock_irqsave(&nc->lock, flags);
- if (!nc->enabled) {
+ if (!nc->monitor.enabled) {
spin_unlock_irqrestore(&nc->lock, flags);
return;
}
- nc->enabled = false;
+ nc->monitor.enabled = false;
spin_unlock_irqrestore(&nc->lock, flags);
- del_timer_sync(&nc->timer);
+ del_timer_sync(&nc->monitor.timer);
}
struct ncsi_channel *ncsi_find_channel(struct ncsi_package *np,
@@ -267,8 +286,9 @@ struct ncsi_channel *ncsi_add_channel(struct ncsi_package *np, unsigned char id)
nc->id = id;
nc->package = np;
nc->state = NCSI_CHANNEL_INACTIVE;
- nc->enabled = false;
- setup_timer(&nc->timer, ncsi_channel_monitor, (unsigned long)nc);
+ nc->monitor.enabled = false;
+ setup_timer(&nc->monitor.timer,
+ ncsi_channel_monitor, (unsigned long)nc);
spin_lock_init(&nc->lock);
INIT_LIST_HEAD(&nc->link);
for (index = 0; index < NCSI_CAP_MAX; index++)
@@ -405,7 +425,8 @@ void ncsi_find_package_and_channel(struct ncsi_dev_priv *ndp,
* be same. Otherwise, the bogus response might be replied. So
* the available IDs are allocated in round-robin fashion.
*/
-struct ncsi_request *ncsi_alloc_request(struct ncsi_dev_priv *ndp, bool driven)
+struct ncsi_request *ncsi_alloc_request(struct ncsi_dev_priv *ndp,
+ unsigned int req_flags)
{
struct ncsi_request *nr = NULL;
int i, limit = ARRAY_SIZE(ndp->requests);
@@ -413,30 +434,31 @@ struct ncsi_request *ncsi_alloc_request(struct ncsi_dev_priv *ndp, bool driven)
/* Check if there is one available request until the ceiling */
spin_lock_irqsave(&ndp->lock, flags);
- for (i = ndp->request_id; !nr && i < limit; i++) {
+ for (i = ndp->request_id; i < limit; i++) {
if (ndp->requests[i].used)
continue;
nr = &ndp->requests[i];
nr->used = true;
- nr->driven = driven;
- if (++ndp->request_id >= limit)
- ndp->request_id = 0;
+ nr->flags = req_flags;
+ ndp->request_id = i + 1;
+ goto found;
}
/* Fail back to check from the starting cursor */
- for (i = 0; !nr && i < ndp->request_id; i++) {
+ for (i = NCSI_REQ_START_IDX; i < ndp->request_id; i++) {
if (ndp->requests[i].used)
continue;
nr = &ndp->requests[i];
nr->used = true;
- nr->driven = driven;
- if (++ndp->request_id >= limit)
- ndp->request_id = 0;
+ nr->flags = req_flags;
+ ndp->request_id = i + 1;
+ goto found;
}
- spin_unlock_irqrestore(&ndp->lock, flags);
+found:
+ spin_unlock_irqrestore(&ndp->lock, flags);
return nr;
}
@@ -458,7 +480,7 @@ void ncsi_free_request(struct ncsi_request *nr)
nr->cmd = NULL;
nr->rsp = NULL;
nr->used = false;
- driven = nr->driven;
+ driven = !!(nr->flags & NCSI_REQ_FLAG_EVENT_DRIVEN);
spin_unlock_irqrestore(&ndp->lock, flags);
if (driven && cmd && --ndp->pending_req_num == 0)
@@ -508,10 +530,11 @@ static void ncsi_suspend_channel(struct ncsi_dev_priv *ndp)
struct ncsi_package *np = ndp->active_package;
struct ncsi_channel *nc = ndp->active_channel;
struct ncsi_cmd_arg nca;
+ unsigned long flags;
int ret;
nca.ndp = ndp;
- nca.driven = true;
+ nca.req_flags = NCSI_REQ_FLAG_EVENT_DRIVEN;
switch (nd->state) {
case ncsi_dev_state_suspend:
nd->state = ncsi_dev_state_suspend_select;
@@ -527,7 +550,7 @@ static void ncsi_suspend_channel(struct ncsi_dev_priv *ndp)
nca.package = np->id;
if (nd->state == ncsi_dev_state_suspend_select) {
nca.type = NCSI_PKT_CMD_SP;
- nca.channel = 0x1f;
+ nca.channel = NCSI_RESERVED_CHANNEL;
if (ndp->flags & NCSI_DEV_HWA)
nca.bytes[0] = 0;
else
@@ -544,7 +567,7 @@ static void ncsi_suspend_channel(struct ncsi_dev_priv *ndp)
nd->state = ncsi_dev_state_suspend_deselect;
} else if (nd->state == ncsi_dev_state_suspend_deselect) {
nca.type = NCSI_PKT_CMD_DP;
- nca.channel = 0x1f;
+ nca.channel = NCSI_RESERVED_CHANNEL;
nd->state = ncsi_dev_state_suspend_done;
}
@@ -556,7 +579,9 @@ static void ncsi_suspend_channel(struct ncsi_dev_priv *ndp)
break;
case ncsi_dev_state_suspend_done:
- xchg(&nc->state, NCSI_CHANNEL_INACTIVE);
+ spin_lock_irqsave(&nc->lock, flags);
+ nc->state = NCSI_CHANNEL_INACTIVE;
+ spin_unlock_irqrestore(&nc->lock, flags);
ncsi_process_next_channel(ndp);
break;
@@ -574,10 +599,11 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
struct ncsi_channel *nc = ndp->active_channel;
struct ncsi_cmd_arg nca;
unsigned char index;
+ unsigned long flags;
int ret;
nca.ndp = ndp;
- nca.driven = true;
+ nca.req_flags = NCSI_REQ_FLAG_EVENT_DRIVEN;
switch (nd->state) {
case ncsi_dev_state_config:
case ncsi_dev_state_config_sp:
@@ -590,7 +616,7 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
else
nca.bytes[0] = 1;
nca.package = np->id;
- nca.channel = 0x1f;
+ nca.channel = NCSI_RESERVED_CHANNEL;
ret = ncsi_xmit_cmd(&nca);
if (ret)
goto error;
@@ -675,10 +701,12 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
goto error;
break;
case ncsi_dev_state_config_done:
+ spin_lock_irqsave(&nc->lock, flags);
if (nc->modes[NCSI_MODE_LINK].data[2] & 0x1)
- xchg(&nc->state, NCSI_CHANNEL_ACTIVE);
+ nc->state = NCSI_CHANNEL_ACTIVE;
else
- xchg(&nc->state, NCSI_CHANNEL_INACTIVE);
+ nc->state = NCSI_CHANNEL_INACTIVE;
+ spin_unlock_irqrestore(&nc->lock, flags);
ncsi_start_channel_monitor(nc);
ncsi_process_next_channel(ndp);
@@ -707,18 +735,25 @@ static int ncsi_choose_active_channel(struct ncsi_dev_priv *ndp)
found = NULL;
NCSI_FOR_EACH_PACKAGE(ndp, np) {
NCSI_FOR_EACH_CHANNEL(np, nc) {
+ spin_lock_irqsave(&nc->lock, flags);
+
if (!list_empty(&nc->link) ||
- nc->state != NCSI_CHANNEL_INACTIVE)
+ nc->state != NCSI_CHANNEL_INACTIVE) {
+ spin_unlock_irqrestore(&nc->lock, flags);
continue;
+ }
if (!found)
found = nc;
ncm = &nc->modes[NCSI_MODE_LINK];
if (ncm->data[2] & 0x1) {
+ spin_unlock_irqrestore(&nc->lock, flags);
found = nc;
goto out;
}
+
+ spin_unlock_irqrestore(&nc->lock, flags);
}
}
@@ -797,7 +832,7 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp)
int ret;
nca.ndp = ndp;
- nca.driven = true;
+ nca.req_flags = NCSI_REQ_FLAG_EVENT_DRIVEN;
switch (nd->state) {
case ncsi_dev_state_probe:
nd->state = ncsi_dev_state_probe_deselect;
@@ -807,7 +842,7 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp)
/* Deselect all possible packages */
nca.type = NCSI_PKT_CMD_DP;
- nca.channel = 0x1f;
+ nca.channel = NCSI_RESERVED_CHANNEL;
for (index = 0; index < 8; index++) {
nca.package = index;
ret = ncsi_xmit_cmd(&nca);
@@ -823,7 +858,7 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp)
/* Select all possible packages */
nca.type = NCSI_PKT_CMD_SP;
nca.bytes[0] = 1;
- nca.channel = 0x1f;
+ nca.channel = NCSI_RESERVED_CHANNEL;
for (index = 0; index < 8; index++) {
nca.package = index;
ret = ncsi_xmit_cmd(&nca);
@@ -876,7 +911,7 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp)
nca.type = NCSI_PKT_CMD_SP;
nca.bytes[0] = 1;
nca.package = ndp->active_package->id;
- nca.channel = 0x1f;
+ nca.channel = NCSI_RESERVED_CHANNEL;
ret = ncsi_xmit_cmd(&nca);
if (ret)
goto error;
@@ -884,12 +919,12 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp)
nd->state = ncsi_dev_state_probe_cis;
break;
case ncsi_dev_state_probe_cis:
- ndp->pending_req_num = 32;
+ ndp->pending_req_num = NCSI_RESERVED_CHANNEL;
/* Clear initial state */
nca.type = NCSI_PKT_CMD_CIS;
nca.package = ndp->active_package->id;
- for (index = 0; index < 0x20; index++) {
+ for (index = 0; index < NCSI_RESERVED_CHANNEL; index++) {
nca.channel = index;
ret = ncsi_xmit_cmd(&nca);
if (ret)
@@ -933,7 +968,7 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp)
/* Deselect the active package */
nca.type = NCSI_PKT_CMD_DP;
nca.package = ndp->active_package->id;
- nca.channel = 0x1f;
+ nca.channel = NCSI_RESERVED_CHANNEL;
ret = ncsi_xmit_cmd(&nca);
if (ret)
goto error;
@@ -987,11 +1022,14 @@ int ncsi_process_next_channel(struct ncsi_dev_priv *ndp)
goto out;
}
- old_state = xchg(&nc->state, NCSI_CHANNEL_INVISIBLE);
list_del_init(&nc->link);
-
spin_unlock_irqrestore(&ndp->lock, flags);
+ spin_lock_irqsave(&nc->lock, flags);
+ old_state = nc->state;
+ nc->state = NCSI_CHANNEL_INVISIBLE;
+ spin_unlock_irqrestore(&nc->lock, flags);
+
ndp->active_channel = nc;
ndp->active_package = nc->package;
@@ -1006,7 +1044,7 @@ int ncsi_process_next_channel(struct ncsi_dev_priv *ndp)
break;
default:
netdev_err(ndp->ndev.dev, "Invalid state 0x%x on %d:%d\n",
- nc->state, nc->package->id, nc->id);
+ old_state, nc->package->id, nc->id);
ncsi_report_link(ndp, false);
return -EINVAL;
}
@@ -1070,7 +1108,7 @@ static int ncsi_inet6addr_event(struct notifier_block *this,
return NOTIFY_OK;
nca.ndp = ndp;
- nca.driven = false;
+ nca.req_flags = 0;
nca.package = np->id;
nca.channel = nc->id;
nca.dwords[0] = nc->caps[NCSI_CAP_MC].cap;
@@ -1118,7 +1156,7 @@ struct ncsi_dev *ncsi_register_dev(struct net_device *dev,
/* Initialize private NCSI device */
spin_lock_init(&ndp->lock);
INIT_LIST_HEAD(&ndp->packages);
- ndp->request_id = 0;
+ ndp->request_id = NCSI_REQ_START_IDX;
for (i = 0; i < ARRAY_SIZE(ndp->requests); i++) {
ndp->requests[i].id = i;
ndp->requests[i].ndp = ndp;
@@ -1149,9 +1187,7 @@ EXPORT_SYMBOL_GPL(ncsi_register_dev);
int ncsi_start_dev(struct ncsi_dev *nd)
{
struct ncsi_dev_priv *ndp = TO_NCSI_DEV_PRIV(nd);
- struct ncsi_package *np;
- struct ncsi_channel *nc;
- int old_state, ret;
+ int ret;
if (nd->state != ncsi_dev_state_registered &&
nd->state != ncsi_dev_state_functional)
@@ -1163,15 +1199,6 @@ int ncsi_start_dev(struct ncsi_dev *nd)
return 0;
}
- /* Reset channel's state and start over */
- NCSI_FOR_EACH_PACKAGE(ndp, np) {
- NCSI_FOR_EACH_CHANNEL(np, nc) {
- old_state = xchg(&nc->state, NCSI_CHANNEL_INACTIVE);
- WARN_ON_ONCE(!list_empty(&nc->link) ||
- old_state == NCSI_CHANNEL_INVISIBLE);
- }
- }
-
if (ndp->flags & NCSI_DEV_HWA)
ret = ncsi_enable_hwa(ndp);
else
@@ -1181,6 +1208,35 @@ int ncsi_start_dev(struct ncsi_dev *nd)
}
EXPORT_SYMBOL_GPL(ncsi_start_dev);
+void ncsi_stop_dev(struct ncsi_dev *nd)
+{
+ struct ncsi_dev_priv *ndp = TO_NCSI_DEV_PRIV(nd);
+ struct ncsi_package *np;
+ struct ncsi_channel *nc;
+ bool chained;
+ int old_state;
+ unsigned long flags;
+
+ /* Stop the channel monitor and reset channel's state */
+ NCSI_FOR_EACH_PACKAGE(ndp, np) {
+ NCSI_FOR_EACH_CHANNEL(np, nc) {
+ ncsi_stop_channel_monitor(nc);
+
+ spin_lock_irqsave(&nc->lock, flags);
+ chained = !list_empty(&nc->link);
+ old_state = nc->state;
+ nc->state = NCSI_CHANNEL_INACTIVE;
+ spin_unlock_irqrestore(&nc->lock, flags);
+
+ WARN_ON_ONCE(chained ||
+ old_state == NCSI_CHANNEL_INVISIBLE);
+ }
+ }
+
+ ncsi_report_link(ndp, true);
+}
+EXPORT_SYMBOL_GPL(ncsi_stop_dev);
+
void ncsi_unregister_dev(struct ncsi_dev *nd)
{
struct ncsi_dev_priv *ndp = TO_NCSI_DEV_PRIV(nd);
diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c
index af84389a6bf1..087db775b3dc 100644
--- a/net/ncsi/ncsi-rsp.c
+++ b/net/ncsi/ncsi-rsp.c
@@ -317,12 +317,12 @@ static int ncsi_rsp_handler_gls(struct ncsi_request *nr)
ncm->data[3] = ntohl(rsp->other);
ncm->data[4] = ntohl(rsp->oem_status);
- if (nr->driven)
+ if (nr->flags & NCSI_REQ_FLAG_EVENT_DRIVEN)
return 0;
/* Reset the channel monitor if it has been enabled */
spin_lock_irqsave(&nc->lock, flags);
- nc->timeout = 0;
+ nc->monitor.state = NCSI_CHANNEL_MONITOR_START;
spin_unlock_irqrestore(&nc->lock, flags);
return 0;
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index fa6715db4581..fcb5d1df11e9 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -65,51 +65,37 @@ static DEFINE_MUTEX(nf_hook_mutex);
#define nf_entry_dereference(e) \
rcu_dereference_protected(e, lockdep_is_held(&nf_hook_mutex))
-static struct nf_hook_entry *nf_hook_entry_head(struct net *net,
- const struct nf_hook_ops *reg)
+static struct nf_hook_entry __rcu **nf_hook_entry_head(struct net *net, const struct nf_hook_ops *reg)
{
- struct nf_hook_entry *hook_head = NULL;
-
if (reg->pf != NFPROTO_NETDEV)
- hook_head = nf_entry_dereference(net->nf.hooks[reg->pf]
- [reg->hooknum]);
- else if (reg->hooknum == NF_NETDEV_INGRESS) {
+ return net->nf.hooks[reg->pf]+reg->hooknum;
+
#ifdef CONFIG_NETFILTER_INGRESS
+ if (reg->hooknum == NF_NETDEV_INGRESS) {
if (reg->dev && dev_net(reg->dev) == net)
- hook_head =
- nf_entry_dereference(
- reg->dev->nf_hooks_ingress);
-#endif
- }
- return hook_head;
-}
-
-/* must hold nf_hook_mutex */
-static void nf_set_hooks_head(struct net *net, const struct nf_hook_ops *reg,
- struct nf_hook_entry *entry)
-{
- switch (reg->pf) {
- case NFPROTO_NETDEV:
- /* We already checked in nf_register_net_hook() that this is
- * used from ingress.
- */
- rcu_assign_pointer(reg->dev->nf_hooks_ingress, entry);
- break;
- default:
- rcu_assign_pointer(net->nf.hooks[reg->pf][reg->hooknum],
- entry);
- break;
+ return &reg->dev->nf_hooks_ingress;
}
+#endif
+ return NULL;
}
int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
{
- struct nf_hook_entry *hooks_entry;
- struct nf_hook_entry *entry;
+ struct nf_hook_entry __rcu **pp;
+ struct nf_hook_entry *entry, *p;
+
+ if (reg->pf == NFPROTO_NETDEV) {
+#ifndef CONFIG_NETFILTER_INGRESS
+ if (reg->hooknum == NF_NETDEV_INGRESS)
+ return -EOPNOTSUPP;
+#endif
+ if (reg->hooknum != NF_NETDEV_INGRESS ||
+ !reg->dev || dev_net(reg->dev) != net)
+ return -EINVAL;
+ }
- if (reg->pf == NFPROTO_NETDEV &&
- (reg->hooknum != NF_NETDEV_INGRESS ||
- !reg->dev || dev_net(reg->dev) != net))
+ pp = nf_hook_entry_head(net, reg);
+ if (!pp)
return -EINVAL;
entry = kmalloc(sizeof(*entry), GFP_KERNEL);
@@ -121,26 +107,15 @@ int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
entry->next = NULL;
mutex_lock(&nf_hook_mutex);
- hooks_entry = nf_hook_entry_head(net, reg);
- if (hooks_entry && hooks_entry->orig_ops->priority > reg->priority) {
- /* This is the case where we need to insert at the head */
- entry->next = hooks_entry;
- hooks_entry = NULL;
- }
-
- while (hooks_entry &&
- reg->priority >= hooks_entry->orig_ops->priority &&
- nf_entry_dereference(hooks_entry->next)) {
- hooks_entry = nf_entry_dereference(hooks_entry->next);
- }
-
- if (hooks_entry) {
- entry->next = nf_entry_dereference(hooks_entry->next);
- rcu_assign_pointer(hooks_entry->next, entry);
- } else {
- nf_set_hooks_head(net, reg, entry);
+ /* Find the spot in the list */
+ while ((p = nf_entry_dereference(*pp)) != NULL) {
+ if (reg->priority < p->orig_ops->priority)
+ break;
+ pp = &p->next;
}
+ rcu_assign_pointer(entry->next, p);
+ rcu_assign_pointer(*pp, entry);
mutex_unlock(&nf_hook_mutex);
#ifdef CONFIG_NETFILTER_INGRESS
@@ -156,33 +131,23 @@ EXPORT_SYMBOL(nf_register_net_hook);
void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
{
- struct nf_hook_entry *hooks_entry;
+ struct nf_hook_entry __rcu **pp;
+ struct nf_hook_entry *p;
- mutex_lock(&nf_hook_mutex);
- hooks_entry = nf_hook_entry_head(net, reg);
- if (hooks_entry->orig_ops == reg) {
- nf_set_hooks_head(net, reg,
- nf_entry_dereference(hooks_entry->next));
- goto unlock;
- }
- while (hooks_entry && nf_entry_dereference(hooks_entry->next)) {
- struct nf_hook_entry *next =
- nf_entry_dereference(hooks_entry->next);
- struct nf_hook_entry *nnext;
+ pp = nf_hook_entry_head(net, reg);
+ if (WARN_ON_ONCE(!pp))
+ return;
- if (next->orig_ops != reg) {
- hooks_entry = next;
- continue;
+ mutex_lock(&nf_hook_mutex);
+ while ((p = nf_entry_dereference(*pp)) != NULL) {
+ if (p->orig_ops == reg) {
+ rcu_assign_pointer(*pp, p->next);
+ break;
}
- nnext = nf_entry_dereference(next->next);
- rcu_assign_pointer(hooks_entry->next, nnext);
- hooks_entry = next;
- break;
+ pp = &p->next;
}
-
-unlock:
mutex_unlock(&nf_hook_mutex);
- if (!hooks_entry) {
+ if (!p) {
WARN(1, "nf_unregister_net_hook: hook not found!\n");
return;
}
@@ -194,10 +159,10 @@ unlock:
static_key_slow_dec(&nf_hooks_needed[reg->pf][reg->hooknum]);
#endif
synchronize_net();
- nf_queue_nf_hook_drop(net, hooks_entry);
+ nf_queue_nf_hook_drop(net, p);
/* other cpu might still process nfqueue verdict that used reg */
synchronize_net();
- kfree(hooks_entry);
+ kfree(p);
}
EXPORT_SYMBOL(nf_unregister_net_hook);
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index c3c809b2e712..6b85ded4f91d 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -2840,14 +2840,7 @@ static struct nf_sockopt_ops ip_vs_sockopts = {
*/
/* IPVS genetlink family */
-static struct genl_family ip_vs_genl_family = {
- .id = GENL_ID_GENERATE,
- .hdrsize = 0,
- .name = IPVS_GENL_NAME,
- .version = IPVS_GENL_VERSION,
- .maxattr = IPVS_CMD_MAX,
- .netnsok = true, /* Make ipvsadm to work on netns */
-};
+static struct genl_family ip_vs_genl_family;
/* Policy used for first-level command attributes */
static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
@@ -3872,10 +3865,20 @@ static const struct genl_ops ip_vs_genl_ops[] = {
},
};
+static struct genl_family ip_vs_genl_family __ro_after_init = {
+ .hdrsize = 0,
+ .name = IPVS_GENL_NAME,
+ .version = IPVS_GENL_VERSION,
+ .maxattr = IPVS_CMD_MAX,
+ .netnsok = true, /* Make ipvsadm to work on netns */
+ .module = THIS_MODULE,
+ .ops = ip_vs_genl_ops,
+ .n_ops = ARRAY_SIZE(ip_vs_genl_ops),
+};
+
static int __init ip_vs_genl_register(void)
{
- return genl_register_family_with_ops(&ip_vs_genl_family,
- ip_vs_genl_ops);
+ return genl_register_family(&ip_vs_genl_family);
}
static void ip_vs_genl_unregister(void)
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index 30a17d649a83..3dca90dc24ad 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -422,7 +422,7 @@ static int nf_log_proc_dostring(struct ctl_table *table, int write,
char buf[NFLOGGER_NAME_LEN];
int r = 0;
int tindex = (unsigned long)table->extra1;
- struct net *net = current->nsproxy->net_ns;
+ struct net *net = table->extra2;
if (write) {
struct ctl_table tmp = *table;
@@ -476,7 +476,6 @@ static int netfilter_log_sysctl_init(struct net *net)
3, "%d", i);
nf_log_sysctl_table[i].procname =
nf_log_sysctl_fnames[i];
- nf_log_sysctl_table[i].data = NULL;
nf_log_sysctl_table[i].maxlen = NFLOGGER_NAME_LEN;
nf_log_sysctl_table[i].mode = 0644;
nf_log_sysctl_table[i].proc_handler =
@@ -486,6 +485,9 @@ static int netfilter_log_sysctl_init(struct net *net)
}
}
+ for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++)
+ table[i].extra2 = net;
+
net->nf.nf_log_dir_header = register_net_sysctl(net,
"net/netfilter/nf_log",
table);
diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c
index 070b98938e02..c6baf412236d 100644
--- a/net/netfilter/nft_limit.c
+++ b/net/netfilter/nft_limit.c
@@ -145,7 +145,7 @@ static int nft_limit_pkts_init(const struct nft_ctx *ctx,
if (err < 0)
return err;
- priv->cost = div_u64(priv->limit.nsecs, priv->limit.rate);
+ priv->cost = div64_u64(priv->limit.nsecs, priv->limit.rate);
return 0;
}
@@ -170,7 +170,7 @@ static void nft_limit_pkt_bytes_eval(const struct nft_expr *expr,
const struct nft_pktinfo *pkt)
{
struct nft_limit *priv = nft_expr_priv(expr);
- u64 cost = div_u64(priv->nsecs * pkt->skb->len, priv->rate);
+ u64 cost = div64_u64(priv->nsecs * pkt->skb->len, priv->rate);
if (nft_limit_eval(priv, cost))
regs->verdict.code = NFT_BREAK;
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 44a095ecc7b7..2fab0c65aa94 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -467,17 +467,18 @@ static u64 user2credits(u64 user, int revision)
/* If multiplying would overflow... */
if (user > 0xFFFFFFFF / (HZ*CREDITS_PER_JIFFY_v1))
/* Divide first. */
- return (user / XT_HASHLIMIT_SCALE) *\
- HZ * CREDITS_PER_JIFFY_v1;
+ return div64_u64(user, XT_HASHLIMIT_SCALE)
+ * HZ * CREDITS_PER_JIFFY_v1;
- return (user * HZ * CREDITS_PER_JIFFY_v1) \
- / XT_HASHLIMIT_SCALE;
+ return div64_u64(user * HZ * CREDITS_PER_JIFFY_v1,
+ XT_HASHLIMIT_SCALE);
} else {
if (user > 0xFFFFFFFFFFFFFFFF / (HZ*CREDITS_PER_JIFFY))
- return (user / XT_HASHLIMIT_SCALE_v2) *\
- HZ * CREDITS_PER_JIFFY;
+ return div64_u64(user, XT_HASHLIMIT_SCALE_v2)
+ * HZ * CREDITS_PER_JIFFY;
- return (user * HZ * CREDITS_PER_JIFFY) / XT_HASHLIMIT_SCALE_v2;
+ return div64_u64(user * HZ * CREDITS_PER_JIFFY,
+ XT_HASHLIMIT_SCALE_v2);
}
}
diff --git a/net/netlabel/netlabel_calipso.c b/net/netlabel/netlabel_calipso.c
index 2ec93c5e77bb..d177dd066504 100644
--- a/net/netlabel/netlabel_calipso.c
+++ b/net/netlabel/netlabel_calipso.c
@@ -60,13 +60,7 @@ struct netlbl_domhsh_walk_arg {
};
/* NetLabel Generic NETLINK CALIPSO family */
-static struct genl_family netlbl_calipso_gnl_family = {
- .id = GENL_ID_GENERATE,
- .hdrsize = 0,
- .name = NETLBL_NLTYPE_CALIPSO_NAME,
- .version = NETLBL_PROTO_VERSION,
- .maxattr = NLBL_CALIPSO_A_MAX,
-};
+static struct genl_family netlbl_calipso_gnl_family;
/* NetLabel Netlink attribute policy */
static const struct nla_policy calipso_genl_policy[NLBL_CALIPSO_A_MAX + 1] = {
@@ -355,6 +349,16 @@ static const struct genl_ops netlbl_calipso_ops[] = {
},
};
+static struct genl_family netlbl_calipso_gnl_family __ro_after_init = {
+ .hdrsize = 0,
+ .name = NETLBL_NLTYPE_CALIPSO_NAME,
+ .version = NETLBL_PROTO_VERSION,
+ .maxattr = NLBL_CALIPSO_A_MAX,
+ .module = THIS_MODULE,
+ .ops = netlbl_calipso_ops,
+ .n_ops = ARRAY_SIZE(netlbl_calipso_ops),
+};
+
/* NetLabel Generic NETLINK Protocol Functions
*/
@@ -368,8 +372,7 @@ static const struct genl_ops netlbl_calipso_ops[] = {
*/
int __init netlbl_calipso_genl_init(void)
{
- return genl_register_family_with_ops(&netlbl_calipso_gnl_family,
- netlbl_calipso_ops);
+ return genl_register_family(&netlbl_calipso_gnl_family);
}
static const struct netlbl_calipso_ops *calipso_ops;
diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c
index 7fd1104ba900..4149d3e63589 100644
--- a/net/netlabel/netlabel_cipso_v4.c
+++ b/net/netlabel/netlabel_cipso_v4.c
@@ -59,14 +59,7 @@ struct netlbl_domhsh_walk_arg {
};
/* NetLabel Generic NETLINK CIPSOv4 family */
-static struct genl_family netlbl_cipsov4_gnl_family = {
- .id = GENL_ID_GENERATE,
- .hdrsize = 0,
- .name = NETLBL_NLTYPE_CIPSOV4_NAME,
- .version = NETLBL_PROTO_VERSION,
- .maxattr = NLBL_CIPSOV4_A_MAX,
-};
-
+static struct genl_family netlbl_cipsov4_gnl_family;
/* NetLabel Netlink attribute policy */
static const struct nla_policy netlbl_cipsov4_genl_policy[NLBL_CIPSOV4_A_MAX + 1] = {
[NLBL_CIPSOV4_A_DOI] = { .type = NLA_U32 },
@@ -767,6 +760,16 @@ static const struct genl_ops netlbl_cipsov4_ops[] = {
},
};
+static struct genl_family netlbl_cipsov4_gnl_family __ro_after_init = {
+ .hdrsize = 0,
+ .name = NETLBL_NLTYPE_CIPSOV4_NAME,
+ .version = NETLBL_PROTO_VERSION,
+ .maxattr = NLBL_CIPSOV4_A_MAX,
+ .module = THIS_MODULE,
+ .ops = netlbl_cipsov4_ops,
+ .n_ops = ARRAY_SIZE(netlbl_cipsov4_ops),
+};
+
/*
* NetLabel Generic NETLINK Protocol Functions
*/
@@ -781,6 +784,5 @@ static const struct genl_ops netlbl_cipsov4_ops[] = {
*/
int __init netlbl_cipsov4_genl_init(void)
{
- return genl_register_family_with_ops(&netlbl_cipsov4_gnl_family,
- netlbl_cipsov4_ops);
+ return genl_register_family(&netlbl_cipsov4_gnl_family);
}
diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c
index f85d0e07af2d..21e0095b1d14 100644
--- a/net/netlabel/netlabel_mgmt.c
+++ b/net/netlabel/netlabel_mgmt.c
@@ -60,13 +60,7 @@ struct netlbl_domhsh_walk_arg {
};
/* NetLabel Generic NETLINK CIPSOv4 family */
-static struct genl_family netlbl_mgmt_gnl_family = {
- .id = GENL_ID_GENERATE,
- .hdrsize = 0,
- .name = NETLBL_NLTYPE_MGMT_NAME,
- .version = NETLBL_PROTO_VERSION,
- .maxattr = NLBL_MGMT_A_MAX,
-};
+static struct genl_family netlbl_mgmt_gnl_family;
/* NetLabel Netlink attribute policy */
static const struct nla_policy netlbl_mgmt_genl_policy[NLBL_MGMT_A_MAX + 1] = {
@@ -834,6 +828,16 @@ static const struct genl_ops netlbl_mgmt_genl_ops[] = {
},
};
+static struct genl_family netlbl_mgmt_gnl_family __ro_after_init = {
+ .hdrsize = 0,
+ .name = NETLBL_NLTYPE_MGMT_NAME,
+ .version = NETLBL_PROTO_VERSION,
+ .maxattr = NLBL_MGMT_A_MAX,
+ .module = THIS_MODULE,
+ .ops = netlbl_mgmt_genl_ops,
+ .n_ops = ARRAY_SIZE(netlbl_mgmt_genl_ops),
+};
+
/*
* NetLabel Generic NETLINK Protocol Functions
*/
@@ -848,6 +852,5 @@ static const struct genl_ops netlbl_mgmt_genl_ops[] = {
*/
int __init netlbl_mgmt_genl_init(void)
{
- return genl_register_family_with_ops(&netlbl_mgmt_gnl_family,
- netlbl_mgmt_genl_ops);
+ return genl_register_family(&netlbl_mgmt_gnl_family);
}
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index 4528cff9138b..22dc1b9d6362 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -123,13 +123,7 @@ static struct netlbl_unlhsh_iface __rcu *netlbl_unlhsh_def;
static u8 netlabel_unlabel_acceptflg;
/* NetLabel Generic NETLINK unlabeled family */
-static struct genl_family netlbl_unlabel_gnl_family = {
- .id = GENL_ID_GENERATE,
- .hdrsize = 0,
- .name = NETLBL_NLTYPE_UNLABELED_NAME,
- .version = NETLBL_PROTO_VERSION,
- .maxattr = NLBL_UNLABEL_A_MAX,
-};
+static struct genl_family netlbl_unlabel_gnl_family;
/* NetLabel Netlink attribute policy */
static const struct nla_policy netlbl_unlabel_genl_policy[NLBL_UNLABEL_A_MAX + 1] = {
@@ -1378,6 +1372,16 @@ static const struct genl_ops netlbl_unlabel_genl_ops[] = {
},
};
+static struct genl_family netlbl_unlabel_gnl_family __ro_after_init = {
+ .hdrsize = 0,
+ .name = NETLBL_NLTYPE_UNLABELED_NAME,
+ .version = NETLBL_PROTO_VERSION,
+ .maxattr = NLBL_UNLABEL_A_MAX,
+ .module = THIS_MODULE,
+ .ops = netlbl_unlabel_genl_ops,
+ .n_ops = ARRAY_SIZE(netlbl_unlabel_genl_ops),
+};
+
/*
* NetLabel Generic NETLINK Protocol Functions
*/
@@ -1392,8 +1396,7 @@ static const struct genl_ops netlbl_unlabel_genl_ops[] = {
*/
int __init netlbl_unlabel_genl_init(void)
{
- return genl_register_family_with_ops(&netlbl_unlabel_gnl_family,
- netlbl_unlabel_genl_ops);
+ return genl_register_family(&netlbl_unlabel_gnl_family);
}
/*
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 627f898c05b9..62bea4591054 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1832,7 +1832,7 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
/* Record the max length of recvmsg() calls for future allocations */
nlk->max_recvmsg_len = max(nlk->max_recvmsg_len, len);
nlk->max_recvmsg_len = min_t(size_t, nlk->max_recvmsg_len,
- 16384);
+ SKB_WITH_OVERHEAD(32768));
copied = data_skb->len;
if (len < copied) {
@@ -2083,8 +2083,9 @@ static int netlink_dump(struct sock *sk)
if (alloc_min_size < nlk->max_recvmsg_len) {
alloc_size = nlk->max_recvmsg_len;
- skb = alloc_skb(alloc_size, GFP_KERNEL |
- __GFP_NOWARN | __GFP_NORETRY);
+ skb = alloc_skb(alloc_size,
+ (GFP_KERNEL & ~__GFP_DIRECT_RECLAIM) |
+ __GFP_NOWARN | __GFP_NORETRY);
}
if (!skb) {
alloc_size = alloc_min_size;
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 23cc12639ba7..df0cbcddda2c 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -17,6 +17,7 @@
#include <linux/mutex.h>
#include <linux/bitmap.h>
#include <linux/rwsem.h>
+#include <linux/idr.h>
#include <net/sock.h>
#include <net/genetlink.h>
@@ -58,10 +59,8 @@ static void genl_unlock_all(void)
up_write(&cb_lock);
}
-#define GENL_FAM_TAB_SIZE 16
-#define GENL_FAM_TAB_MASK (GENL_FAM_TAB_SIZE - 1)
+static DEFINE_IDR(genl_fam_idr);
-static struct list_head family_ht[GENL_FAM_TAB_SIZE];
/*
* Bitmap of multicast groups that are currently in use.
*
@@ -86,45 +85,29 @@ static unsigned long mc_group_start = 0x3 | BIT(GENL_ID_CTRL) |
static unsigned long *mc_groups = &mc_group_start;
static unsigned long mc_groups_longs = 1;
-static int genl_ctrl_event(int event, struct genl_family *family,
+static int genl_ctrl_event(int event, const struct genl_family *family,
const struct genl_multicast_group *grp,
int grp_id);
-static inline unsigned int genl_family_hash(unsigned int id)
+static const struct genl_family *genl_family_find_byid(unsigned int id)
{
- return id & GENL_FAM_TAB_MASK;
+ return idr_find(&genl_fam_idr, id);
}
-static inline struct list_head *genl_family_chain(unsigned int id)
+static const struct genl_family *genl_family_find_byname(char *name)
{
- return &family_ht[genl_family_hash(id)];
-}
-
-static struct genl_family *genl_family_find_byid(unsigned int id)
-{
- struct genl_family *f;
+ const struct genl_family *family;
+ unsigned int id;
- list_for_each_entry(f, genl_family_chain(id), family_list)
- if (f->id == id)
- return f;
+ idr_for_each_entry(&genl_fam_idr, family, id)
+ if (strcmp(family->name, name) == 0)
+ return family;
return NULL;
}
-static struct genl_family *genl_family_find_byname(char *name)
-{
- struct genl_family *f;
- int i;
-
- for (i = 0; i < GENL_FAM_TAB_SIZE; i++)
- list_for_each_entry(f, genl_family_chain(i), family_list)
- if (strcmp(f->name, name) == 0)
- return f;
-
- return NULL;
-}
-
-static const struct genl_ops *genl_get_cmd(u8 cmd, struct genl_family *family)
+static const struct genl_ops *genl_get_cmd(u8 cmd,
+ const struct genl_family *family)
{
int i;
@@ -135,26 +118,6 @@ static const struct genl_ops *genl_get_cmd(u8 cmd, struct genl_family *family)
return NULL;
}
-/* Of course we are going to have problems once we hit
- * 2^16 alive types, but that can only happen by year 2K
-*/
-static u16 genl_generate_id(void)
-{
- static u16 id_gen_idx = GENL_MIN_ID;
- int i;
-
- for (i = 0; i <= GENL_MAX_ID - GENL_MIN_ID; i++) {
- if (id_gen_idx != GENL_ID_VFS_DQUOT &&
- id_gen_idx != GENL_ID_PMCRAID &&
- !genl_family_find_byid(id_gen_idx))
- return id_gen_idx;
- if (++id_gen_idx > GENL_MAX_ID)
- id_gen_idx = GENL_MIN_ID;
- }
-
- return 0;
-}
-
static int genl_allocate_reserve_groups(int n_groups, int *first_id)
{
unsigned long *new_groups;
@@ -295,7 +258,7 @@ static int genl_validate_assign_mc_groups(struct genl_family *family)
return err;
}
-static void genl_unregister_mc_groups(struct genl_family *family)
+static void genl_unregister_mc_groups(const struct genl_family *family)
{
struct net *net;
int i;
@@ -344,28 +307,21 @@ static int genl_validate_ops(const struct genl_family *family)
}
/**
- * __genl_register_family - register a generic netlink family
+ * genl_register_family - register a generic netlink family
* @family: generic netlink family
*
* Registers the specified family after validating it first. Only one
* family may be registered with the same family name or identifier.
- * The family id may equal GENL_ID_GENERATE causing an unique id to
- * be automatically generated and assigned.
*
- * The family's ops array must already be assigned, you can use the
- * genl_register_family_with_ops() helper function.
+ * The family's ops, multicast groups and module pointer must already
+ * be assigned.
*
* Return 0 on success or a negative error code.
*/
-int __genl_register_family(struct genl_family *family)
+int genl_register_family(struct genl_family *family)
{
- int err = -EINVAL, i;
-
- if (family->id && family->id < GENL_MIN_ID)
- goto errout;
-
- if (family->id > GENL_MAX_ID)
- goto errout;
+ int err, i;
+ int start = GENL_START_ALLOC, end = GENL_MAX_ID;
err = genl_validate_ops(family);
if (err)
@@ -378,18 +334,20 @@ int __genl_register_family(struct genl_family *family)
goto errout_locked;
}
- if (family->id == GENL_ID_GENERATE) {
- u16 newid = genl_generate_id();
-
- if (!newid) {
- err = -ENOMEM;
- goto errout_locked;
- }
-
- family->id = newid;
- } else if (genl_family_find_byid(family->id)) {
- err = -EEXIST;
- goto errout_locked;
+ /*
+ * Sadly, a few cases need to be special-cased
+ * due to them having previously abused the API
+ * and having used their family ID also as their
+ * multicast group ID, so we use reserved IDs
+ * for both to be sure we can do that mapping.
+ */
+ if (family == &genl_ctrl) {
+ /* and this needs to be special for initial family lookups */
+ start = end = GENL_ID_CTRL;
+ } else if (strcmp(family->name, "pmcraid") == 0) {
+ start = end = GENL_ID_PMCRAID;
+ } else if (strcmp(family->name, "VFS_DQUOT") == 0) {
+ start = end = GENL_ID_VFS_DQUOT;
}
if (family->maxattr && !family->parallel_ops) {
@@ -402,11 +360,15 @@ int __genl_register_family(struct genl_family *family)
} else
family->attrbuf = NULL;
+ family->id = idr_alloc(&genl_fam_idr, family,
+ start, end + 1, GFP_KERNEL);
+ if (!family->id)
+ goto errout_locked;
+
err = genl_validate_assign_mc_groups(family);
if (err)
- goto errout_locked;
+ goto errout_remove;
- list_add_tail(&family->family_list, genl_family_chain(family->id));
genl_unlock_all();
/* send all events */
@@ -417,12 +379,13 @@ int __genl_register_family(struct genl_family *family)
return 0;
+errout_remove:
+ idr_remove(&genl_fam_idr, family->id);
errout_locked:
genl_unlock_all();
-errout:
return err;
}
-EXPORT_SYMBOL(__genl_register_family);
+EXPORT_SYMBOL(genl_register_family);
/**
* genl_unregister_family - unregister generic netlink family
@@ -432,33 +395,29 @@ EXPORT_SYMBOL(__genl_register_family);
*
* Returns 0 on success or a negative error code.
*/
-int genl_unregister_family(struct genl_family *family)
+int genl_unregister_family(const struct genl_family *family)
{
- struct genl_family *rc;
-
genl_lock_all();
- list_for_each_entry(rc, genl_family_chain(family->id), family_list) {
- if (family->id != rc->id || strcmp(rc->name, family->name))
- continue;
+ if (genl_family_find_byid(family->id)) {
+ genl_unlock_all();
+ return -ENOENT;
+ }
- genl_unregister_mc_groups(family);
+ genl_unregister_mc_groups(family);
- list_del(&rc->family_list);
- family->n_ops = 0;
- up_write(&cb_lock);
- wait_event(genl_sk_destructing_waitq,
- atomic_read(&genl_sk_destructing_cnt) == 0);
- genl_unlock();
+ idr_remove(&genl_fam_idr, family->id);
- kfree(family->attrbuf);
- genl_ctrl_event(CTRL_CMD_DELFAMILY, family, NULL, 0);
- return 0;
- }
+ up_write(&cb_lock);
+ wait_event(genl_sk_destructing_waitq,
+ atomic_read(&genl_sk_destructing_cnt) == 0);
+ genl_unlock();
- genl_unlock_all();
+ kfree(family->attrbuf);
+
+ genl_ctrl_event(CTRL_CMD_DELFAMILY, family, NULL, 0);
- return -ENOENT;
+ return 0;
}
EXPORT_SYMBOL(genl_unregister_family);
@@ -474,7 +433,7 @@ EXPORT_SYMBOL(genl_unregister_family);
* Returns pointer to user specific header
*/
void *genlmsg_put(struct sk_buff *skb, u32 portid, u32 seq,
- struct genl_family *family, int flags, u8 cmd)
+ const struct genl_family *family, int flags, u8 cmd)
{
struct nlmsghdr *nlh;
struct genlmsghdr *hdr;
@@ -533,7 +492,7 @@ static int genl_lock_done(struct netlink_callback *cb)
return rc;
}
-static int genl_family_rcv_msg(struct genl_family *family,
+static int genl_family_rcv_msg(const struct genl_family *family,
struct sk_buff *skb,
struct nlmsghdr *nlh)
{
@@ -645,7 +604,7 @@ out:
static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
{
- struct genl_family *family;
+ const struct genl_family *family;
int err;
family = genl_family_find_byid(nlh->nlmsg_type);
@@ -674,15 +633,9 @@ static void genl_rcv(struct sk_buff *skb)
* Controller
**************************************************************************/
-static struct genl_family genl_ctrl = {
- .id = GENL_ID_CTRL,
- .name = "nlctrl",
- .version = 0x2,
- .maxattr = CTRL_ATTR_MAX,
- .netnsok = true,
-};
+static struct genl_family genl_ctrl;
-static int ctrl_fill_info(struct genl_family *family, u32 portid, u32 seq,
+static int ctrl_fill_info(const struct genl_family *family, u32 portid, u32 seq,
u32 flags, struct sk_buff *skb, u8 cmd)
{
void *hdr;
@@ -769,7 +722,7 @@ nla_put_failure:
return -EMSGSIZE;
}
-static int ctrl_fill_mcgrp_info(struct genl_family *family,
+static int ctrl_fill_mcgrp_info(const struct genl_family *family,
const struct genl_multicast_group *grp,
int grp_id, u32 portid, u32 seq, u32 flags,
struct sk_buff *skb, u8 cmd)
@@ -812,37 +765,30 @@ nla_put_failure:
static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb)
{
-
- int i, n = 0;
+ int n = 0;
struct genl_family *rt;
struct net *net = sock_net(skb->sk);
- int chains_to_skip = cb->args[0];
- int fams_to_skip = cb->args[1];
-
- for (i = chains_to_skip; i < GENL_FAM_TAB_SIZE; i++) {
- n = 0;
- list_for_each_entry(rt, genl_family_chain(i), family_list) {
- if (!rt->netnsok && !net_eq(net, &init_net))
- continue;
- if (++n < fams_to_skip)
- continue;
- if (ctrl_fill_info(rt, NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, NLM_F_MULTI,
- skb, CTRL_CMD_NEWFAMILY) < 0)
- goto errout;
- }
+ int fams_to_skip = cb->args[0];
+ unsigned int id;
- fams_to_skip = 0;
- }
+ idr_for_each_entry(&genl_fam_idr, rt, id) {
+ if (!rt->netnsok && !net_eq(net, &init_net))
+ continue;
+
+ if (n++ < fams_to_skip)
+ continue;
-errout:
- cb->args[0] = i;
- cb->args[1] = n;
+ if (ctrl_fill_info(rt, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, NLM_F_MULTI,
+ skb, CTRL_CMD_NEWFAMILY) < 0)
+ break;
+ }
+ cb->args[0] = n;
return skb->len;
}
-static struct sk_buff *ctrl_build_family_msg(struct genl_family *family,
+static struct sk_buff *ctrl_build_family_msg(const struct genl_family *family,
u32 portid, int seq, u8 cmd)
{
struct sk_buff *skb;
@@ -862,7 +808,7 @@ static struct sk_buff *ctrl_build_family_msg(struct genl_family *family,
}
static struct sk_buff *
-ctrl_build_mcgrp_msg(struct genl_family *family,
+ctrl_build_mcgrp_msg(const struct genl_family *family,
const struct genl_multicast_group *grp,
int grp_id, u32 portid, int seq, u8 cmd)
{
@@ -892,7 +838,7 @@ static const struct nla_policy ctrl_policy[CTRL_ATTR_MAX+1] = {
static int ctrl_getfamily(struct sk_buff *skb, struct genl_info *info)
{
struct sk_buff *msg;
- struct genl_family *res = NULL;
+ const struct genl_family *res = NULL;
int err = -EINVAL;
if (info->attrs[CTRL_ATTR_FAMILY_ID]) {
@@ -936,7 +882,7 @@ static int ctrl_getfamily(struct sk_buff *skb, struct genl_info *info)
return genlmsg_reply(msg, info);
}
-static int genl_ctrl_event(int event, struct genl_family *family,
+static int genl_ctrl_event(int event, const struct genl_family *family,
const struct genl_multicast_group *grp,
int grp_id)
{
@@ -990,27 +936,39 @@ static const struct genl_multicast_group genl_ctrl_groups[] = {
{ .name = "notify", },
};
+static struct genl_family genl_ctrl __ro_after_init = {
+ .module = THIS_MODULE,
+ .ops = genl_ctrl_ops,
+ .n_ops = ARRAY_SIZE(genl_ctrl_ops),
+ .mcgrps = genl_ctrl_groups,
+ .n_mcgrps = ARRAY_SIZE(genl_ctrl_groups),
+ .id = GENL_ID_CTRL,
+ .name = "nlctrl",
+ .version = 0x2,
+ .maxattr = CTRL_ATTR_MAX,
+ .netnsok = true,
+};
+
static int genl_bind(struct net *net, int group)
{
- int i, err = -ENOENT;
+ struct genl_family *f;
+ int err = -ENOENT;
+ unsigned int id;
down_read(&cb_lock);
- for (i = 0; i < GENL_FAM_TAB_SIZE; i++) {
- struct genl_family *f;
-
- list_for_each_entry(f, genl_family_chain(i), family_list) {
- if (group >= f->mcgrp_offset &&
- group < f->mcgrp_offset + f->n_mcgrps) {
- int fam_grp = group - f->mcgrp_offset;
-
- if (!f->netnsok && net != &init_net)
- err = -ENOENT;
- else if (f->mcast_bind)
- err = f->mcast_bind(net, fam_grp);
- else
- err = 0;
- break;
- }
+
+ idr_for_each_entry(&genl_fam_idr, f, id) {
+ if (group >= f->mcgrp_offset &&
+ group < f->mcgrp_offset + f->n_mcgrps) {
+ int fam_grp = group - f->mcgrp_offset;
+
+ if (!f->netnsok && net != &init_net)
+ err = -ENOENT;
+ else if (f->mcast_bind)
+ err = f->mcast_bind(net, fam_grp);
+ else
+ err = 0;
+ break;
}
}
up_read(&cb_lock);
@@ -1020,21 +978,19 @@ static int genl_bind(struct net *net, int group)
static void genl_unbind(struct net *net, int group)
{
- int i;
+ struct genl_family *f;
+ unsigned int id;
down_read(&cb_lock);
- for (i = 0; i < GENL_FAM_TAB_SIZE; i++) {
- struct genl_family *f;
- list_for_each_entry(f, genl_family_chain(i), family_list) {
- if (group >= f->mcgrp_offset &&
- group < f->mcgrp_offset + f->n_mcgrps) {
- int fam_grp = group - f->mcgrp_offset;
+ idr_for_each_entry(&genl_fam_idr, f, id) {
+ if (group >= f->mcgrp_offset &&
+ group < f->mcgrp_offset + f->n_mcgrps) {
+ int fam_grp = group - f->mcgrp_offset;
- if (f->mcast_unbind)
- f->mcast_unbind(net, fam_grp);
- break;
- }
+ if (f->mcast_unbind)
+ f->mcast_unbind(net, fam_grp);
+ break;
}
}
up_read(&cb_lock);
@@ -1074,13 +1030,9 @@ static struct pernet_operations genl_pernet_ops = {
static int __init genl_init(void)
{
- int i, err;
-
- for (i = 0; i < GENL_FAM_TAB_SIZE; i++)
- INIT_LIST_HEAD(&family_ht[i]);
+ int err;
- err = genl_register_family_with_ops_groups(&genl_ctrl, genl_ctrl_ops,
- genl_ctrl_groups);
+ err = genl_register_family(&genl_ctrl);
if (err < 0)
goto problem;
@@ -1096,6 +1048,25 @@ problem:
subsys_initcall(genl_init);
+/**
+ * genl_family_attrbuf - return family's attrbuf
+ * @family: the family
+ *
+ * Return the family's attrbuf, while validating that it's
+ * actually valid to access it.
+ *
+ * You cannot use this function with a family that has parallel_ops
+ * and you can only use it within (pre/post) doit/dumpit callbacks.
+ */
+struct nlattr **genl_family_attrbuf(const struct genl_family *family)
+{
+ if (!WARN_ON(family->parallel_ops))
+ lockdep_assert_held(&genl_mutex);
+
+ return family->attrbuf;
+}
+EXPORT_SYMBOL(genl_family_attrbuf);
+
static int genlmsg_mcast(struct sk_buff *skb, u32 portid, unsigned long group,
gfp_t flags)
{
@@ -1125,8 +1096,9 @@ static int genlmsg_mcast(struct sk_buff *skb, u32 portid, unsigned long group,
return err;
}
-int genlmsg_multicast_allns(struct genl_family *family, struct sk_buff *skb,
- u32 portid, unsigned int group, gfp_t flags)
+int genlmsg_multicast_allns(const struct genl_family *family,
+ struct sk_buff *skb, u32 portid,
+ unsigned int group, gfp_t flags)
{
if (WARN_ON_ONCE(group >= family->n_mcgrps))
return -EINVAL;
@@ -1135,7 +1107,7 @@ int genlmsg_multicast_allns(struct genl_family *family, struct sk_buff *skb,
}
EXPORT_SYMBOL(genlmsg_multicast_allns);
-void genl_notify(struct genl_family *family, struct sk_buff *skb,
+void genl_notify(const struct genl_family *family, struct sk_buff *skb,
struct genl_info *info, u32 group, gfp_t flags)
{
struct net *net = genl_info_net(info);
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index ea023b35f1c2..03f3d5c7beb8 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -38,14 +38,7 @@ static const struct genl_multicast_group nfc_genl_mcgrps[] = {
{ .name = NFC_GENL_MCAST_EVENT_NAME, },
};
-static struct genl_family nfc_genl_family = {
- .id = GENL_ID_GENERATE,
- .hdrsize = 0,
- .name = NFC_GENL_NAME,
- .version = NFC_GENL_VERSION,
- .maxattr = NFC_ATTR_MAX,
-};
-
+static struct genl_family nfc_genl_family;
static const struct nla_policy nfc_genl_policy[NFC_ATTR_MAX + 1] = {
[NFC_ATTR_DEVICE_INDEX] = { .type = NLA_U32 },
[NFC_ATTR_DEVICE_NAME] = { .type = NLA_STRING,
@@ -120,21 +113,20 @@ nla_put_failure:
static struct nfc_dev *__get_device_from_cb(struct netlink_callback *cb)
{
+ struct nlattr **attrbuf = genl_family_attrbuf(&nfc_genl_family);
struct nfc_dev *dev;
int rc;
u32 idx;
rc = nlmsg_parse(cb->nlh, GENL_HDRLEN + nfc_genl_family.hdrsize,
- nfc_genl_family.attrbuf,
- nfc_genl_family.maxattr,
- nfc_genl_policy);
+ attrbuf, nfc_genl_family.maxattr, nfc_genl_policy);
if (rc < 0)
return ERR_PTR(rc);
- if (!nfc_genl_family.attrbuf[NFC_ATTR_DEVICE_INDEX])
+ if (!attrbuf[NFC_ATTR_DEVICE_INDEX])
return ERR_PTR(-EINVAL);
- idx = nla_get_u32(nfc_genl_family.attrbuf[NFC_ATTR_DEVICE_INDEX]);
+ idx = nla_get_u32(attrbuf[NFC_ATTR_DEVICE_INDEX]);
dev = nfc_get_device(idx);
if (!dev)
@@ -1754,6 +1746,18 @@ static const struct genl_ops nfc_genl_ops[] = {
},
};
+static struct genl_family nfc_genl_family __ro_after_init = {
+ .hdrsize = 0,
+ .name = NFC_GENL_NAME,
+ .version = NFC_GENL_VERSION,
+ .maxattr = NFC_ATTR_MAX,
+ .module = THIS_MODULE,
+ .ops = nfc_genl_ops,
+ .n_ops = ARRAY_SIZE(nfc_genl_ops),
+ .mcgrps = nfc_genl_mcgrps,
+ .n_mcgrps = ARRAY_SIZE(nfc_genl_mcgrps),
+};
+
struct urelease_work {
struct work_struct w;
@@ -1839,9 +1843,7 @@ int __init nfc_genl_init(void)
{
int rc;
- rc = genl_register_family_with_ops_groups(&nfc_genl_family,
- nfc_genl_ops,
- nfc_genl_mcgrps);
+ rc = genl_register_family(&nfc_genl_family);
if (rc)
return rc;
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 863e992dfbc0..1105c4e29c62 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -62,7 +62,8 @@ struct ovs_frag_data {
struct vport *vport;
struct ovs_skb_cb cb;
__be16 inner_protocol;
- __u16 vlan_tci;
+ u16 network_offset; /* valid only for MPLS */
+ u16 vlan_tci;
__be16 vlan_proto;
unsigned int l2_len;
u8 l2_data[MAX_L2_LEN];
@@ -160,7 +161,7 @@ static void update_ethertype(struct sk_buff *skb, struct ethhdr *hdr,
static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key,
const struct ovs_action_push_mpls *mpls)
{
- __be32 *new_mpls_lse;
+ struct mpls_shim_hdr *new_mpls_lse;
/* Networking stack do not allow simultaneous Tunnel and MPLS GSO. */
if (skb->encapsulation)
@@ -180,8 +181,8 @@ static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key,
skb_reset_mac_header(skb);
skb_set_network_header(skb, skb->mac_len);
- new_mpls_lse = (__be32 *)skb_mpls_header(skb);
- *new_mpls_lse = mpls->mpls_lse;
+ new_mpls_lse = mpls_hdr(skb);
+ new_mpls_lse->label_stack_entry = mpls->mpls_lse;
skb_postpush_rcsum(skb, new_mpls_lse, MPLS_HLEN);
@@ -202,7 +203,7 @@ static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key,
if (unlikely(err))
return err;
- skb_postpull_rcsum(skb, skb_mpls_header(skb), MPLS_HLEN);
+ skb_postpull_rcsum(skb, mpls_hdr(skb), MPLS_HLEN);
memmove(skb_mac_header(skb) + MPLS_HLEN, skb_mac_header(skb),
skb->mac_len);
@@ -211,10 +212,10 @@ static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key,
skb_reset_mac_header(skb);
skb_set_network_header(skb, skb->mac_len);
- /* skb_mpls_header() is used to locate the ethertype
- * field correctly in the presence of VLAN tags.
+ /* mpls_hdr() is used to locate the ethertype field correctly in the
+ * presence of VLAN tags.
*/
- hdr = (struct ethhdr *)(skb_mpls_header(skb) - ETH_HLEN);
+ hdr = (struct ethhdr *)((void *)mpls_hdr(skb) - ETH_HLEN);
update_ethertype(skb, hdr, ethertype);
if (eth_p_mpls(skb->protocol))
skb->protocol = ethertype;
@@ -226,7 +227,7 @@ static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key,
static int set_mpls(struct sk_buff *skb, struct sw_flow_key *flow_key,
const __be32 *mpls_lse, const __be32 *mask)
{
- __be32 *stack;
+ struct mpls_shim_hdr *stack;
__be32 lse;
int err;
@@ -234,16 +235,16 @@ static int set_mpls(struct sk_buff *skb, struct sw_flow_key *flow_key,
if (unlikely(err))
return err;
- stack = (__be32 *)skb_mpls_header(skb);
- lse = OVS_MASKED(*stack, *mpls_lse, *mask);
+ stack = mpls_hdr(skb);
+ lse = OVS_MASKED(stack->label_stack_entry, *mpls_lse, *mask);
if (skb->ip_summed == CHECKSUM_COMPLETE) {
- __be32 diff[] = { ~(*stack), lse };
+ __be32 diff[] = { ~(stack->label_stack_entry), lse };
skb->csum = ~csum_partial((char *)diff, sizeof(diff),
~skb->csum);
}
- *stack = lse;
+ stack->label_stack_entry = lse;
flow_key->mpls.top_lse = lse;
return 0;
}
@@ -666,6 +667,12 @@ static int ovs_vport_output(struct net *net, struct sock *sk, struct sk_buff *sk
skb_postpush_rcsum(skb, skb->data, data->l2_len);
skb_reset_mac_header(skb);
+ if (eth_p_mpls(skb->protocol)) {
+ skb->inner_network_header = skb->network_header;
+ skb_set_network_header(skb, data->network_offset);
+ skb_reset_mac_len(skb);
+ }
+
ovs_vport_send(vport, skb);
return 0;
}
@@ -684,7 +691,8 @@ static struct dst_ops ovs_dst_ops = {
/* prepare_frag() is called once per (larger-than-MTU) frame; its inverse is
* ovs_vport_output(), which is called once per fragmented packet.
*/
-static void prepare_frag(struct vport *vport, struct sk_buff *skb)
+static void prepare_frag(struct vport *vport, struct sk_buff *skb,
+ u16 orig_network_offset)
{
unsigned int hlen = skb_network_offset(skb);
struct ovs_frag_data *data;
@@ -694,6 +702,7 @@ static void prepare_frag(struct vport *vport, struct sk_buff *skb)
data->vport = vport;
data->cb = *OVS_CB(skb);
data->inner_protocol = skb->inner_protocol;
+ data->network_offset = orig_network_offset;
data->vlan_tci = skb->vlan_tci;
data->vlan_proto = skb->vlan_proto;
data->l2_len = hlen;
@@ -706,6 +715,13 @@ static void prepare_frag(struct vport *vport, struct sk_buff *skb)
static void ovs_fragment(struct net *net, struct vport *vport,
struct sk_buff *skb, u16 mru, __be16 ethertype)
{
+ u16 orig_network_offset = 0;
+
+ if (eth_p_mpls(skb->protocol)) {
+ orig_network_offset = skb_network_offset(skb);
+ skb->network_header = skb->inner_network_header;
+ }
+
if (skb_network_offset(skb) > MAX_L2_LEN) {
OVS_NLERR(1, "L2 header too long to fragment");
goto err;
@@ -715,7 +731,7 @@ static void ovs_fragment(struct net *net, struct vport *vport,
struct dst_entry ovs_dst;
unsigned long orig_dst;
- prepare_frag(vport, skb);
+ prepare_frag(vport, skb, orig_network_offset);
dst_init(&ovs_dst, &ovs_dst_ops, NULL, 1,
DST_OBSOLETE_NONE, DST_NOCOUNT);
ovs_dst.dev = vport->dev;
@@ -735,7 +751,7 @@ static void ovs_fragment(struct net *net, struct vport *vport,
goto err;
}
- prepare_frag(vport, skb);
+ prepare_frag(vport, skb, orig_network_offset);
memset(&ovs_rt, 0, sizeof(ovs_rt));
dst_init(&ovs_rt.dst, &ovs_dst_ops, NULL, 1,
DST_OBSOLETE_NONE, DST_NOCOUNT);
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 4d67ea856067..fa8760176b7d 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -59,7 +59,6 @@
#include "vport-netdev.h"
int ovs_net_id __read_mostly;
-EXPORT_SYMBOL_GPL(ovs_net_id);
static struct genl_family dp_packet_genl_family;
static struct genl_family dp_flow_genl_family;
@@ -131,7 +130,6 @@ int lockdep_ovsl_is_held(void)
else
return 1;
}
-EXPORT_SYMBOL_GPL(lockdep_ovsl_is_held);
#endif
static struct vport *new_vport(const struct vport_parms *);
@@ -672,8 +670,7 @@ static const struct genl_ops dp_packet_genl_ops[] = {
}
};
-static struct genl_family dp_packet_genl_family = {
- .id = GENL_ID_GENERATE,
+static struct genl_family dp_packet_genl_family __ro_after_init = {
.hdrsize = sizeof(struct ovs_header),
.name = OVS_PACKET_FAMILY,
.version = OVS_PACKET_VERSION,
@@ -682,6 +679,7 @@ static struct genl_family dp_packet_genl_family = {
.parallel_ops = true,
.ops = dp_packet_genl_ops,
.n_ops = ARRAY_SIZE(dp_packet_genl_ops),
+ .module = THIS_MODULE,
};
static void get_dp_stats(const struct datapath *dp, struct ovs_dp_stats *stats,
@@ -1437,8 +1435,7 @@ static const struct genl_ops dp_flow_genl_ops[] = {
},
};
-static struct genl_family dp_flow_genl_family = {
- .id = GENL_ID_GENERATE,
+static struct genl_family dp_flow_genl_family __ro_after_init = {
.hdrsize = sizeof(struct ovs_header),
.name = OVS_FLOW_FAMILY,
.version = OVS_FLOW_VERSION,
@@ -1449,6 +1446,7 @@ static struct genl_family dp_flow_genl_family = {
.n_ops = ARRAY_SIZE(dp_flow_genl_ops),
.mcgrps = &ovs_dp_flow_multicast_group,
.n_mcgrps = 1,
+ .module = THIS_MODULE,
};
static size_t ovs_dp_cmd_msg_size(void)
@@ -1823,8 +1821,7 @@ static const struct genl_ops dp_datapath_genl_ops[] = {
},
};
-static struct genl_family dp_datapath_genl_family = {
- .id = GENL_ID_GENERATE,
+static struct genl_family dp_datapath_genl_family __ro_after_init = {
.hdrsize = sizeof(struct ovs_header),
.name = OVS_DATAPATH_FAMILY,
.version = OVS_DATAPATH_VERSION,
@@ -1835,6 +1832,7 @@ static struct genl_family dp_datapath_genl_family = {
.n_ops = ARRAY_SIZE(dp_datapath_genl_ops),
.mcgrps = &ovs_dp_datapath_multicast_group,
.n_mcgrps = 1,
+ .module = THIS_MODULE,
};
/* Called with ovs_mutex or RCU read lock. */
@@ -2245,8 +2243,7 @@ static const struct genl_ops dp_vport_genl_ops[] = {
},
};
-struct genl_family dp_vport_genl_family = {
- .id = GENL_ID_GENERATE,
+struct genl_family dp_vport_genl_family __ro_after_init = {
.hdrsize = sizeof(struct ovs_header),
.name = OVS_VPORT_FAMILY,
.version = OVS_VPORT_VERSION,
@@ -2257,6 +2254,7 @@ struct genl_family dp_vport_genl_family = {
.n_ops = ARRAY_SIZE(dp_vport_genl_ops),
.mcgrps = &ovs_dp_vport_multicast_group,
.n_mcgrps = 1,
+ .module = THIS_MODULE,
};
static struct genl_family * const dp_genl_families[] = {
@@ -2274,7 +2272,7 @@ static void dp_unregister_genl(int n_families)
genl_unregister_family(dp_genl_families[i]);
}
-static int dp_register_genl(void)
+static int __init dp_register_genl(void)
{
int err;
int i;
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 634cc10d6dee..22087062bd10 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -343,7 +343,7 @@ static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key)
key->eth.cvlan.tci = 0;
key->eth.cvlan.tpid = 0;
- if (likely(skb_vlan_tag_present(skb))) {
+ if (skb_vlan_tag_present(skb)) {
key->eth.vlan.tci = htons(skb->vlan_tci);
key->eth.vlan.tpid = skb->vlan_proto;
} else {
@@ -633,12 +633,7 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
} else if (eth_p_mpls(key->eth.type)) {
size_t stack_len = MPLS_HLEN;
- /* In the presence of an MPLS label stack the end of the L2
- * header and the beginning of the L3 header differ.
- *
- * Advance network_header to the beginning of the L3
- * header. mac_len corresponds to the end of the L2 header.
- */
+ skb_set_inner_network_header(skb, skb->mac_len);
while (1) {
__be32 lse;
@@ -646,12 +641,12 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
if (unlikely(error))
return 0;
- memcpy(&lse, skb_network_header(skb), MPLS_HLEN);
+ memcpy(&lse, skb_inner_network_header(skb), MPLS_HLEN);
if (stack_len == MPLS_HLEN)
memcpy(&key->mpls.top_lse, &lse, MPLS_HLEN);
- skb_set_network_header(skb, skb->mac_len + stack_len);
+ skb_set_inner_network_header(skb, skb->mac_len + stack_len);
if (lse & htonl(MPLS_LS_S_MASK))
break;
diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c
index 95c36147a6e1..d5d6caecd072 100644
--- a/net/openvswitch/vport-internal_dev.c
+++ b/net/openvswitch/vport-internal_dev.c
@@ -89,15 +89,6 @@ static const struct ethtool_ops internal_dev_ethtool_ops = {
.get_link = ethtool_op_get_link,
};
-static int internal_dev_change_mtu(struct net_device *netdev, int new_mtu)
-{
- if (new_mtu < 68)
- return -EINVAL;
-
- netdev->mtu = new_mtu;
- return 0;
-}
-
static void internal_dev_destructor(struct net_device *dev)
{
struct vport *vport = ovs_internal_dev_get_vport(dev);
@@ -148,7 +139,6 @@ static const struct net_device_ops internal_dev_netdev_ops = {
.ndo_stop = internal_dev_stop,
.ndo_start_xmit = internal_dev_xmit,
.ndo_set_mac_address = eth_mac_addr,
- .ndo_change_mtu = internal_dev_change_mtu,
.ndo_get_stats64 = internal_get_stats,
.ndo_set_rx_headroom = internal_set_rx_headroom,
};
@@ -176,7 +166,7 @@ static void do_setup(struct net_device *netdev)
netdev->vlan_features = netdev->features;
netdev->hw_enc_features = netdev->features;
- netdev->features |= NETIF_F_HW_VLAN_CTAG_TX;
+ netdev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX;
netdev->hw_features = netdev->features & ~NETIF_F_LLTX;
eth_hw_addr_random(netdev);
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
index 4e3972344aa6..e825753de1e0 100644
--- a/net/openvswitch/vport-netdev.c
+++ b/net/openvswitch/vport-netdev.c
@@ -162,7 +162,6 @@ void ovs_netdev_detach_dev(struct vport *vport)
netdev_master_upper_dev_get(vport->dev));
dev_set_promiscuity(vport->dev, -1);
}
-EXPORT_SYMBOL_GPL(ovs_netdev_detach_dev);
static void netdev_destroy(struct vport *vport)
{
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 8f198437c724..9bb85b35a1fb 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -463,29 +463,13 @@ int ovs_vport_receive(struct vport *vport, struct sk_buff *skb,
ovs_dp_process_packet(skb, &key);
return 0;
}
-EXPORT_SYMBOL_GPL(ovs_vport_receive);
-
-static void free_vport_rcu(struct rcu_head *rcu)
-{
- struct vport *vport = container_of(rcu, struct vport, rcu);
-
- ovs_vport_free(vport);
-}
-
-void ovs_vport_deferred_free(struct vport *vport)
-{
- if (!vport)
- return;
-
- call_rcu(&vport->rcu, free_vport_rcu);
-}
-EXPORT_SYMBOL_GPL(ovs_vport_deferred_free);
static unsigned int packet_length(const struct sk_buff *skb)
{
unsigned int length = skb->len - ETH_HLEN;
- if (skb_vlan_tagged(skb))
+ if (!skb_vlan_tag_present(skb) &&
+ eth_type_vlan(skb->protocol))
length -= VLAN_HLEN;
/* Don't subtract for multiple VLAN tags. Most (all?) drivers allow
diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
index f01f28a567ad..46e5b69927c7 100644
--- a/net/openvswitch/vport.h
+++ b/net/openvswitch/vport.h
@@ -149,7 +149,6 @@ struct vport_ops {
struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *,
const struct vport_parms *);
void ovs_vport_free(struct vport *);
-void ovs_vport_deferred_free(struct vport *vport);
#define VPORT_ALIGN 8
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 33a4697d5539..11db0d619c00 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -3952,6 +3952,7 @@ static int packet_notifier(struct notifier_block *this,
}
if (msg == NETDEV_UNREGISTER) {
packet_cached_dev_reset(po);
+ fanout_release(sk);
po->ifindex = -1;
if (po->prot_hook.dev)
dev_put(po->prot_hook.dev);
diff --git a/net/phonet/pep-gprs.c b/net/phonet/pep-gprs.c
index fa8237fdc57b..21c28b51be94 100644
--- a/net/phonet/pep-gprs.c
+++ b/net/phonet/pep-gprs.c
@@ -217,20 +217,10 @@ static netdev_tx_t gprs_xmit(struct sk_buff *skb, struct net_device *dev)
return NETDEV_TX_OK;
}
-static int gprs_set_mtu(struct net_device *dev, int new_mtu)
-{
- if ((new_mtu < 576) || (new_mtu > (PHONET_MAX_MTU - 11)))
- return -EINVAL;
-
- dev->mtu = new_mtu;
- return 0;
-}
-
static const struct net_device_ops gprs_netdev_ops = {
.ndo_open = gprs_open,
.ndo_stop = gprs_close,
.ndo_start_xmit = gprs_xmit,
- .ndo_change_mtu = gprs_set_mtu,
};
static void gprs_setup(struct net_device *dev)
@@ -239,6 +229,8 @@ static void gprs_setup(struct net_device *dev)
dev->type = ARPHRD_PHONET_PIPE;
dev->flags = IFF_POINTOPOINT | IFF_NOARP;
dev->mtu = GPRS_DEFAULT_MTU;
+ dev->min_mtu = 576;
+ dev->max_mtu = (PHONET_MAX_MTU - 11);
dev->hard_header_len = 0;
dev->addr_len = 0;
dev->tx_queue_len = 10;
diff --git a/net/rds/connection.c b/net/rds/connection.c
index f5058559bb08..13f459dad4ef 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -689,21 +689,6 @@ void rds_conn_connect_if_down(struct rds_connection *conn)
}
EXPORT_SYMBOL_GPL(rds_conn_connect_if_down);
-/*
- * An error occurred on the connection
- */
-void
-__rds_conn_error(struct rds_connection *conn, const char *fmt, ...)
-{
- va_list ap;
-
- va_start(ap, fmt);
- vprintk(fmt, ap);
- va_end(ap);
-
- rds_conn_drop(conn);
-}
-
void
__rds_conn_path_error(struct rds_conn_path *cp, const char *fmt, ...)
{
diff --git a/net/rds/ib.c b/net/rds/ib.c
index 7eaf887e46f8..5680d90b0b77 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -160,7 +160,7 @@ static void rds_ib_add_one(struct ib_device *device)
rds_ibdev->max_responder_resources = device->attrs.max_qp_rd_atom;
rds_ibdev->dev = device;
- rds_ibdev->pd = ib_alloc_pd(device);
+ rds_ibdev->pd = ib_alloc_pd(device, 0);
if (IS_ERR(rds_ibdev->pd)) {
rds_ibdev->pd = NULL;
goto put_dev;
diff --git a/net/rds/rds.h b/net/rds/rds.h
index fd0bccb2f9f9..25532a46602f 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -683,10 +683,6 @@ void rds_for_each_conn_info(struct socket *sock, unsigned int len,
struct rds_info_lengths *lens,
int (*visitor)(struct rds_connection *, void *),
size_t item_len);
-__printf(2, 3)
-void __rds_conn_error(struct rds_connection *conn, const char *, ...);
-#define rds_conn_error(conn, fmt...) \
- __rds_conn_error(conn, KERN_WARNING "RDS: " fmt)
__printf(2, 3)
void __rds_conn_path_error(struct rds_conn_path *cp, const char *, ...);
diff --git a/net/rds/threads.c b/net/rds/threads.c
index e42df11bf30a..e36e333a0aa0 100644
--- a/net/rds/threads.c
+++ b/net/rds/threads.c
@@ -171,8 +171,7 @@ void rds_connect_worker(struct work_struct *work)
RDS_CONN_DOWN))
rds_queue_reconnect(cp);
else
- rds_conn_path_error(cp,
- "RDS: connect failed\n");
+ rds_conn_path_error(cp, "connect failed\n");
}
}
}
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 8dbf7bed2cc4..2d59c9be40e1 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -136,7 +136,8 @@ static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len)
struct sockaddr_rxrpc *srx = (struct sockaddr_rxrpc *)saddr;
struct sock *sk = sock->sk;
struct rxrpc_local *local;
- struct rxrpc_sock *rx = rxrpc_sk(sk), *prx;
+ struct rxrpc_sock *rx = rxrpc_sk(sk);
+ u16 service_id = srx->srx_service;
int ret;
_enter("%p,%p,%d", rx, saddr, len);
@@ -160,15 +161,12 @@ static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len)
goto error_unlock;
}
- if (rx->srx.srx_service) {
+ if (service_id) {
write_lock(&local->services_lock);
- hlist_for_each_entry(prx, &local->services, listen_link) {
- if (prx->srx.srx_service == rx->srx.srx_service)
- goto service_in_use;
- }
-
+ if (rcu_access_pointer(local->service))
+ goto service_in_use;
rx->local = local;
- hlist_add_head_rcu(&rx->listen_link, &local->services);
+ rcu_assign_pointer(local->service, rx);
write_unlock(&local->services_lock);
rx->sk.sk_state = RXRPC_SERVER_BOUND;
@@ -599,7 +597,6 @@ static int rxrpc_create(struct net *net, struct socket *sock, int protocol,
rx->family = protocol;
rx->calls = RB_ROOT;
- INIT_HLIST_NODE(&rx->listen_link);
spin_lock_init(&rx->incoming_lock);
INIT_LIST_HEAD(&rx->sock_calls);
INIT_LIST_HEAD(&rx->to_be_accepted);
@@ -681,11 +678,9 @@ static int rxrpc_release_sock(struct sock *sk)
sk->sk_state = RXRPC_CLOSE;
spin_unlock_bh(&sk->sk_receive_queue.lock);
- ASSERTCMP(rx->listen_link.next, !=, LIST_POISON1);
-
- if (!hlist_unhashed(&rx->listen_link)) {
+ if (rx->local && rcu_access_pointer(rx->local->service) == rx) {
write_lock(&rx->local->services_lock);
- hlist_del_rcu(&rx->listen_link);
+ rcu_assign_pointer(rx->local->service, NULL);
write_unlock(&rx->local->services_lock);
}
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index ca96e547cb9a..f60e35576526 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -93,7 +93,6 @@ struct rxrpc_sock {
rxrpc_notify_new_call_t notify_new_call; /* Func to notify of new call */
rxrpc_discard_new_call_t discard_new_call; /* Func to discard a new call */
struct rxrpc_local *local; /* local endpoint */
- struct hlist_node listen_link; /* link in the local endpoint's listen list */
struct rxrpc_backlog *backlog; /* Preallocation for services */
spinlock_t incoming_lock; /* Incoming call vs service shutdown lock */
struct list_head sock_calls; /* List of calls owned by this socket */
@@ -145,9 +144,7 @@ struct rxrpc_skb_priv {
u8 nr_jumbo; /* Number of jumbo subpackets */
};
union {
- unsigned int offset; /* offset into buffer of next read */
int remain; /* amount of space remaining for next write */
- u32 error; /* network error code */
};
struct rxrpc_host_header hdr; /* RxRPC packet header from this packet */
@@ -216,7 +213,7 @@ struct rxrpc_local {
struct list_head link;
struct socket *socket; /* my UDP socket */
struct work_struct processor;
- struct hlist_head services; /* services listening on this endpoint */
+ struct rxrpc_sock __rcu *service; /* Service(s) listening on this endpoint */
struct rw_semaphore defrag_sem; /* control re-enablement of IP DF bit */
struct sk_buff_head reject_queue; /* packets awaiting rejection */
struct sk_buff_head event_queue; /* endpoint event packets awaiting processing */
@@ -401,6 +398,7 @@ enum rxrpc_call_flag {
RXRPC_CALL_EXPOSED, /* The call was exposed to the world */
RXRPC_CALL_RX_LAST, /* Received the last packet (at rxtx_top) */
RXRPC_CALL_TX_LAST, /* Last packet in Tx buffer (at rxtx_top) */
+ RXRPC_CALL_SEND_PING, /* A ping will need to be sent */
RXRPC_CALL_PINGING, /* Ping in process */
RXRPC_CALL_RETRANS_TIMEOUT, /* Retransmission due to timeout occurred */
};
@@ -413,6 +411,7 @@ enum rxrpc_call_event {
RXRPC_CALL_EV_ABORT, /* need to generate abort */
RXRPC_CALL_EV_TIMER, /* Timer expired */
RXRPC_CALL_EV_RESEND, /* Tx resend required */
+ RXRPC_CALL_EV_PING, /* Ping send required */
};
/*
@@ -467,9 +466,10 @@ struct rxrpc_call {
struct rxrpc_connection *conn; /* connection carrying call */
struct rxrpc_peer *peer; /* Peer record for remote address */
struct rxrpc_sock __rcu *socket; /* socket responsible */
- unsigned long ack_at; /* When deferred ACK needs to happen */
- unsigned long resend_at; /* When next resend needs to happen */
- unsigned long expire_at; /* When the call times out */
+ ktime_t ack_at; /* When deferred ACK needs to happen */
+ ktime_t resend_at; /* When next resend needs to happen */
+ ktime_t ping_at; /* When next to send a ping */
+ ktime_t expire_at; /* When the call times out */
struct timer_list timer; /* Combined event timer */
struct work_struct processor; /* Event processor */
rxrpc_notify_rx_t notify_rx; /* kernel service Rx notification function */
@@ -561,8 +561,10 @@ struct rxrpc_call {
rxrpc_seq_t ackr_prev_seq; /* previous sequence number received */
rxrpc_seq_t ackr_consumed; /* Highest packet shown consumed */
rxrpc_seq_t ackr_seen; /* Highest packet shown seen */
- rxrpc_serial_t ackr_ping; /* Last ping sent */
- ktime_t ackr_ping_time; /* Time last ping sent */
+
+ /* ping management */
+ rxrpc_serial_t ping_serial; /* Last ping sent */
+ ktime_t ping_time; /* Time last ping sent */
/* transmission-phase ACK management */
ktime_t acks_latest_ts; /* Timestamp of latest ACK received */
@@ -603,7 +605,6 @@ enum rxrpc_skb_trace {
rxrpc_skb_tx_cleaned,
rxrpc_skb_tx_freed,
rxrpc_skb_tx_got,
- rxrpc_skb_tx_lost,
rxrpc_skb_tx_new,
rxrpc_skb_tx_rotated,
rxrpc_skb_tx_seen,
@@ -732,8 +733,10 @@ extern const char rxrpc_rtt_rx_traces[rxrpc_rtt_rx__nr_trace][5];
enum rxrpc_timer_trace {
rxrpc_timer_begin,
rxrpc_timer_init_for_reply,
+ rxrpc_timer_init_for_send_reply,
rxrpc_timer_expired,
rxrpc_timer_set_for_ack,
+ rxrpc_timer_set_for_ping,
rxrpc_timer_set_for_resend,
rxrpc_timer_set_for_send,
rxrpc_timer__nr_trace
@@ -747,6 +750,7 @@ enum rxrpc_propose_ack_trace {
rxrpc_propose_ack_ping_for_lost_ack,
rxrpc_propose_ack_ping_for_lost_reply,
rxrpc_propose_ack_ping_for_params,
+ rxrpc_propose_ack_processing_op,
rxrpc_propose_ack_respond_to_ack,
rxrpc_propose_ack_respond_to_ping,
rxrpc_propose_ack_retry_tx,
@@ -781,7 +785,7 @@ extern const char rxrpc_congest_modes[NR__RXRPC_CONGEST_MODES][10];
extern const char rxrpc_congest_changes[rxrpc_congest__nr_change][9];
extern const char *const rxrpc_pkts[];
-extern const char const rxrpc_ack_names[RXRPC_ACK__INVALID + 1][4];
+extern const char rxrpc_ack_names[RXRPC_ACK__INVALID + 1][4];
#include <trace/events/rxrpc.h>
@@ -809,7 +813,8 @@ int rxrpc_reject_call(struct rxrpc_sock *);
/*
* call_event.c
*/
-void rxrpc_set_timer(struct rxrpc_call *, enum rxrpc_timer_trace);
+void __rxrpc_set_timer(struct rxrpc_call *, enum rxrpc_timer_trace, ktime_t);
+void rxrpc_set_timer(struct rxrpc_call *, enum rxrpc_timer_trace, ktime_t);
void rxrpc_propose_ACK(struct rxrpc_call *, u8, u16, u32, bool, bool,
enum rxrpc_propose_ack_trace);
void rxrpc_process_call(struct work_struct *);
@@ -1072,8 +1077,9 @@ extern const s8 rxrpc_ack_priority[];
/*
* output.c
*/
-int rxrpc_send_call_packet(struct rxrpc_call *, u8);
-int rxrpc_send_data_packet(struct rxrpc_call *, struct sk_buff *);
+int rxrpc_send_ack_packet(struct rxrpc_call *, bool);
+int rxrpc_send_abort_packet(struct rxrpc_call *);
+int rxrpc_send_data_packet(struct rxrpc_call *, struct sk_buff *, bool);
void rxrpc_reject_packets(struct rxrpc_local *);
/*
diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
index a8d39d7cf42c..832d854c2d5c 100644
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c
@@ -331,14 +331,14 @@ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local,
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
struct rxrpc_sock *rx;
struct rxrpc_call *call;
+ u16 service_id = sp->hdr.serviceId;
_enter("");
/* Get the socket providing the service */
- hlist_for_each_entry_rcu_bh(rx, &local->services, listen_link) {
- if (rx->srx.srx_service == sp->hdr.serviceId)
- goto found_service;
- }
+ rx = rcu_dereference(local->service);
+ if (rx && service_id == rx->srx.srx_service)
+ goto found_service;
trace_rxrpc_abort("INV", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
RX_INVALID_OPERATION, EOPNOTSUPP);
@@ -565,7 +565,7 @@ out_discard:
write_unlock_bh(&call->state_lock);
write_unlock(&rx->call_lock);
if (abort) {
- rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT);
+ rxrpc_send_abort_packet(call);
rxrpc_release_call(rx, call);
rxrpc_put_call(call, rxrpc_call_put);
}
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index 0e8478012212..97a17ada4431 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -24,36 +24,99 @@
/*
* Set the timer
*/
-void rxrpc_set_timer(struct rxrpc_call *call, enum rxrpc_timer_trace why)
+void __rxrpc_set_timer(struct rxrpc_call *call, enum rxrpc_timer_trace why,
+ ktime_t now)
{
- unsigned long t, now = jiffies;
-
- read_lock_bh(&call->state_lock);
+ unsigned long t_j, now_j = jiffies;
+ ktime_t t;
+ bool queue = false;
if (call->state < RXRPC_CALL_COMPLETE) {
t = call->expire_at;
- if (time_before_eq(t, now))
+ if (!ktime_after(t, now)) {
+ trace_rxrpc_timer(call, why, now, now_j);
+ queue = true;
goto out;
+ }
- if (time_after(call->resend_at, now) &&
- time_before(call->resend_at, t))
+ if (!ktime_after(call->resend_at, now)) {
+ call->resend_at = call->expire_at;
+ if (!test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events))
+ queue = true;
+ } else if (ktime_before(call->resend_at, t)) {
t = call->resend_at;
+ }
- if (time_after(call->ack_at, now) &&
- time_before(call->ack_at, t))
+ if (!ktime_after(call->ack_at, now)) {
+ call->ack_at = call->expire_at;
+ if (!test_and_set_bit(RXRPC_CALL_EV_ACK, &call->events))
+ queue = true;
+ } else if (ktime_before(call->ack_at, t)) {
t = call->ack_at;
+ }
- if (call->timer.expires != t || !timer_pending(&call->timer)) {
- mod_timer(&call->timer, t);
- trace_rxrpc_timer(call, why, now);
+ if (!ktime_after(call->ping_at, now)) {
+ call->ping_at = call->expire_at;
+ if (!test_and_set_bit(RXRPC_CALL_EV_PING, &call->events))
+ queue = true;
+ } else if (ktime_before(call->ping_at, t)) {
+ t = call->ping_at;
+ }
+
+ t_j = nsecs_to_jiffies(ktime_to_ns(ktime_sub(t, now)));
+ t_j += jiffies;
+
+ /* We have to make sure that the calculated jiffies value falls
+ * at or after the nsec value, or we may loop ceaselessly
+ * because the timer times out, but we haven't reached the nsec
+ * timeout yet.
+ */
+ t_j++;
+
+ if (call->timer.expires != t_j || !timer_pending(&call->timer)) {
+ mod_timer(&call->timer, t_j);
+ trace_rxrpc_timer(call, why, now, now_j);
}
}
out:
+ if (queue)
+ rxrpc_queue_call(call);
+}
+
+/*
+ * Set the timer
+ */
+void rxrpc_set_timer(struct rxrpc_call *call, enum rxrpc_timer_trace why,
+ ktime_t now)
+{
+ read_lock_bh(&call->state_lock);
+ __rxrpc_set_timer(call, why, now);
read_unlock_bh(&call->state_lock);
}
/*
+ * Propose a PING ACK be sent.
+ */
+static void rxrpc_propose_ping(struct rxrpc_call *call,
+ bool immediate, bool background)
+{
+ if (immediate) {
+ if (background &&
+ !test_and_set_bit(RXRPC_CALL_EV_PING, &call->events))
+ rxrpc_queue_call(call);
+ } else {
+ ktime_t now = ktime_get_real();
+ ktime_t ping_at = ktime_add_ms(now, rxrpc_idle_ack_delay);
+
+ if (ktime_before(ping_at, call->ping_at)) {
+ call->ping_at = ping_at;
+ rxrpc_set_timer(call, rxrpc_timer_set_for_ping, now);
+ }
+ }
+}
+
+/*
* propose an ACK be sent
*/
static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
@@ -62,9 +125,18 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
enum rxrpc_propose_ack_trace why)
{
enum rxrpc_propose_ack_outcome outcome = rxrpc_propose_ack_use;
- unsigned long now, ack_at, expiry = rxrpc_soft_ack_delay;
+ unsigned int expiry = rxrpc_soft_ack_delay;
+ ktime_t now, ack_at;
s8 prior = rxrpc_ack_priority[ack_reason];
+ /* Pings are handled specially because we don't want to accidentally
+ * lose a ping response by subsuming it into a ping.
+ */
+ if (ack_reason == RXRPC_ACK_PING) {
+ rxrpc_propose_ping(call, immediate, background);
+ goto trace;
+ }
+
/* Update DELAY, IDLE, REQUESTED and PING_RESPONSE ACK serial
* numbers, but we don't alter the timeout.
*/
@@ -100,7 +172,6 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
expiry = rxrpc_soft_ack_delay;
break;
- case RXRPC_ACK_PING:
case RXRPC_ACK_IDLE:
if (rxrpc_idle_ack_delay < expiry)
expiry = rxrpc_idle_ack_delay;
@@ -111,7 +182,6 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
break;
}
- now = jiffies;
if (test_bit(RXRPC_CALL_EV_ACK, &call->events)) {
_debug("already scheduled");
} else if (immediate || expiry == 0) {
@@ -120,11 +190,11 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
background)
rxrpc_queue_call(call);
} else {
- ack_at = now + expiry;
- _debug("deferred ACK %ld < %ld", expiry, call->ack_at - now);
- if (time_before(ack_at, call->ack_at)) {
+ now = ktime_get_real();
+ ack_at = ktime_add_ms(now, expiry);
+ if (ktime_before(ack_at, call->ack_at)) {
call->ack_at = ack_at;
- rxrpc_set_timer(call, rxrpc_timer_set_for_ack);
+ rxrpc_set_timer(call, rxrpc_timer_set_for_ack, now);
}
}
@@ -157,12 +227,12 @@ static void rxrpc_congestion_timeout(struct rxrpc_call *call)
/*
* Perform retransmission of NAK'd and unack'd packets.
*/
-static void rxrpc_resend(struct rxrpc_call *call)
+static void rxrpc_resend(struct rxrpc_call *call, ktime_t now)
{
struct rxrpc_skb_priv *sp;
struct sk_buff *skb;
rxrpc_seq_t cursor, seq, top;
- ktime_t now = ktime_get_real(), max_age, oldest, resend_at, ack_ts;
+ ktime_t max_age, oldest, ack_ts;
int ix;
u8 annotation, anno_type, retrans = 0, unacked = 0;
@@ -212,14 +282,7 @@ static void rxrpc_resend(struct rxrpc_call *call)
ktime_to_ns(ktime_sub(skb->tstamp, max_age)));
}
- resend_at = ktime_add_ms(oldest, rxrpc_resend_timeout);
- call->resend_at = jiffies +
- nsecs_to_jiffies(ktime_to_ns(ktime_sub(resend_at, now))) +
- 1; /* We have to make sure that the calculated jiffies value
- * falls at or after the nsec value, or we shall loop
- * ceaselessly because the timer times out, but we haven't
- * reached the nsec timeout yet.
- */
+ call->resend_at = ktime_add_ms(oldest, rxrpc_resend_timeout);
if (unacked)
rxrpc_congestion_timeout(call);
@@ -229,14 +292,14 @@ static void rxrpc_resend(struct rxrpc_call *call)
* retransmitting data.
*/
if (!retrans) {
- rxrpc_set_timer(call, rxrpc_timer_set_for_resend);
+ rxrpc_set_timer(call, rxrpc_timer_set_for_resend, now);
spin_unlock_bh(&call->lock);
ack_ts = ktime_sub(now, call->acks_latest_ts);
if (ktime_to_ns(ack_ts) < call->peer->rtt)
goto out;
rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, 0, true, false,
rxrpc_propose_ack_ping_for_lost_ack);
- rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK);
+ rxrpc_send_ack_packet(call, true);
goto out;
}
@@ -256,7 +319,7 @@ static void rxrpc_resend(struct rxrpc_call *call)
rxrpc_get_skb(skb, rxrpc_skb_tx_got);
spin_unlock_bh(&call->lock);
- if (rxrpc_send_data_packet(call, skb) < 0) {
+ if (rxrpc_send_data_packet(call, skb, true) < 0) {
rxrpc_free_skb(skb, rxrpc_skb_tx_freed);
return;
}
@@ -301,7 +364,7 @@ void rxrpc_process_call(struct work_struct *work)
{
struct rxrpc_call *call =
container_of(work, struct rxrpc_call, processor);
- unsigned long now;
+ ktime_t now;
rxrpc_see_call(call);
@@ -311,38 +374,41 @@ void rxrpc_process_call(struct work_struct *work)
recheck_state:
if (test_and_clear_bit(RXRPC_CALL_EV_ABORT, &call->events)) {
- rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT);
+ rxrpc_send_abort_packet(call);
goto recheck_state;
}
if (call->state == RXRPC_CALL_COMPLETE) {
del_timer_sync(&call->timer);
+ rxrpc_notify_socket(call);
goto out_put;
}
- now = jiffies;
- if (time_after_eq(now, call->expire_at)) {
+ now = ktime_get_real();
+ if (ktime_before(call->expire_at, now)) {
rxrpc_abort_call("EXP", call, 0, RX_CALL_TIMEOUT, ETIME);
set_bit(RXRPC_CALL_EV_ABORT, &call->events);
goto recheck_state;
}
- if (test_and_clear_bit(RXRPC_CALL_EV_ACK, &call->events) ||
- time_after_eq(now, call->ack_at)) {
- call->ack_at = call->expire_at;
+ if (test_and_clear_bit(RXRPC_CALL_EV_ACK, &call->events)) {
if (call->ackr_reason) {
- rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK);
+ rxrpc_send_ack_packet(call, false);
goto recheck_state;
}
}
- if (test_and_clear_bit(RXRPC_CALL_EV_RESEND, &call->events) ||
- time_after_eq(now, call->resend_at)) {
- rxrpc_resend(call);
+ if (test_and_clear_bit(RXRPC_CALL_EV_PING, &call->events)) {
+ rxrpc_send_ack_packet(call, true);
+ goto recheck_state;
+ }
+
+ if (test_and_clear_bit(RXRPC_CALL_EV_RESEND, &call->events)) {
+ rxrpc_resend(call, now);
goto recheck_state;
}
- rxrpc_set_timer(call, rxrpc_timer_set_for_resend);
+ rxrpc_set_timer(call, rxrpc_timer_set_for_resend, now);
/* other events may have been raised since we started checking */
if (call->events && call->state < RXRPC_CALL_COMPLETE) {
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index d4b3293b78fa..4353a29f3b57 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -19,11 +19,6 @@
#include <net/af_rxrpc.h>
#include "ar-internal.h"
-/*
- * Maximum lifetime of a call (in jiffies).
- */
-unsigned int rxrpc_max_call_lifetime = 60 * HZ;
-
const char *const rxrpc_call_states[NR__RXRPC_CALL_STATES] = {
[RXRPC_CALL_UNINITIALISED] = "Uninit ",
[RXRPC_CALL_CLIENT_AWAIT_CONN] = "ClWtConn",
@@ -76,10 +71,8 @@ static void rxrpc_call_timer_expired(unsigned long _call)
_enter("%d", call->debug_id);
- if (call->state < RXRPC_CALL_COMPLETE) {
- trace_rxrpc_timer(call, rxrpc_timer_expired, jiffies);
- rxrpc_queue_call(call);
- }
+ if (call->state < RXRPC_CALL_COMPLETE)
+ rxrpc_set_timer(call, rxrpc_timer_expired, ktime_get_real());
}
/*
@@ -207,14 +200,15 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct sockaddr_rxrpc *srx,
*/
static void rxrpc_start_call_timer(struct rxrpc_call *call)
{
- unsigned long expire_at;
+ ktime_t now = ktime_get_real(), expire_at;
- expire_at = jiffies + rxrpc_max_call_lifetime;
+ expire_at = ktime_add_ms(now, rxrpc_max_call_lifetime);
call->expire_at = expire_at;
call->ack_at = expire_at;
+ call->ping_at = expire_at;
call->resend_at = expire_at;
- call->timer.expires = expire_at + 1;
- rxrpc_set_timer(call, rxrpc_timer_begin);
+ call->timer.expires = jiffies + LONG_MAX / 2;
+ rxrpc_set_timer(call, rxrpc_timer_begin, now);
}
/*
@@ -505,7 +499,7 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx)
struct rxrpc_call, sock_link);
rxrpc_get_call(call, rxrpc_call_got);
rxrpc_abort_call("SKT", call, 0, RX_CALL_DEAD, ECONNRESET);
- rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT);
+ rxrpc_send_abort_packet(call);
rxrpc_release_call(rx, call);
rxrpc_put_call(call, rxrpc_call_put);
}
diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
index c76a125df891..60ef9605167e 100644
--- a/net/rxrpc/conn_client.c
+++ b/net/rxrpc/conn_client.c
@@ -200,7 +200,7 @@ rxrpc_alloc_client_connection(struct rxrpc_conn_parameters *cp, gfp_t gfp)
}
atomic_set(&conn->usage, 1);
- if (conn->params.exclusive)
+ if (cp->exclusive)
__set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags);
conn->params = *cp;
@@ -576,28 +576,42 @@ static void rxrpc_activate_one_channel(struct rxrpc_connection *conn,
}
/*
+ * Assign channels and callNumbers to waiting calls with channel_lock
+ * held by caller.
+ */
+static void rxrpc_activate_channels_locked(struct rxrpc_connection *conn)
+{
+ u8 avail, mask;
+
+ switch (conn->cache_state) {
+ case RXRPC_CONN_CLIENT_ACTIVE:
+ mask = RXRPC_ACTIVE_CHANS_MASK;
+ break;
+ default:
+ return;
+ }
+
+ while (!list_empty(&conn->waiting_calls) &&
+ (avail = ~conn->active_chans,
+ avail &= mask,
+ avail != 0))
+ rxrpc_activate_one_channel(conn, __ffs(avail));
+}
+
+/*
* Assign channels and callNumbers to waiting calls.
*/
static void rxrpc_activate_channels(struct rxrpc_connection *conn)
{
- unsigned char mask;
-
_enter("%d", conn->debug_id);
trace_rxrpc_client(conn, -1, rxrpc_client_activate_chans);
- if (conn->cache_state != RXRPC_CONN_CLIENT_ACTIVE ||
- conn->active_chans == RXRPC_ACTIVE_CHANS_MASK)
+ if (conn->active_chans == RXRPC_ACTIVE_CHANS_MASK)
return;
spin_lock(&conn->channel_lock);
-
- while (!list_empty(&conn->waiting_calls) &&
- (mask = ~conn->active_chans,
- mask &= RXRPC_ACTIVE_CHANS_MASK,
- mask != 0))
- rxrpc_activate_one_channel(conn, __ffs(mask));
-
+ rxrpc_activate_channels_locked(conn);
spin_unlock(&conn->channel_lock);
_leave("");
}
diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c
index 37609ce89f52..3f9d8d7ec632 100644
--- a/net/rxrpc/conn_event.c
+++ b/net/rxrpc/conn_event.c
@@ -276,7 +276,8 @@ static int rxrpc_process_event(struct rxrpc_connection *conn,
return 0;
case RXRPC_PACKET_TYPE_ABORT:
- if (skb_copy_bits(skb, sp->offset, &wtmp, sizeof(wtmp)) < 0)
+ if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header),
+ &wtmp, sizeof(wtmp)) < 0)
return -EPROTO;
abort_code = ntohl(wtmp);
_proto("Rx ABORT %%%u { ac=%d }", sp->hdr.serial, abort_code);
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 094720dd1eaf..44fb8d893c7d 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -41,10 +41,10 @@ static void rxrpc_proto_abort(const char *why,
*/
static void rxrpc_congestion_management(struct rxrpc_call *call,
struct sk_buff *skb,
- struct rxrpc_ack_summary *summary)
+ struct rxrpc_ack_summary *summary,
+ rxrpc_serial_t acked_serial)
{
enum rxrpc_congest_change change = rxrpc_cong_no_change;
- struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
unsigned int cumulative_acks = call->cong_cumul_acks;
unsigned int cwnd = call->cong_cwnd;
bool resend = false;
@@ -57,7 +57,7 @@ static void rxrpc_congestion_management(struct rxrpc_call *call,
call->cong_ssthresh = max_t(unsigned int,
summary->flight_size / 2, 2);
cwnd = 1;
- if (cwnd > call->cong_ssthresh &&
+ if (cwnd >= call->cong_ssthresh &&
call->cong_mode == RXRPC_CALL_SLOW_START) {
call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE;
call->cong_tstamp = skb->tstamp;
@@ -82,7 +82,7 @@ static void rxrpc_congestion_management(struct rxrpc_call *call,
goto packet_loss_detected;
if (summary->cumulative_acks > 0)
cwnd += 1;
- if (cwnd > call->cong_ssthresh) {
+ if (cwnd >= call->cong_ssthresh) {
call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE;
call->cong_tstamp = skb->tstamp;
}
@@ -161,7 +161,7 @@ resume_normality:
call->cong_dup_acks = 0;
call->cong_extra = 0;
call->cong_tstamp = skb->tstamp;
- if (cwnd <= call->cong_ssthresh)
+ if (cwnd < call->cong_ssthresh)
call->cong_mode = RXRPC_CALL_SLOW_START;
else
call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE;
@@ -172,7 +172,7 @@ out_no_clear_ca:
cwnd = RXRPC_RXTX_BUFF_SIZE - 1;
call->cong_cwnd = cwnd;
call->cong_cumul_acks = cumulative_acks;
- trace_rxrpc_congest(call, summary, sp->hdr.serial, change);
+ trace_rxrpc_congest(call, summary, acked_serial, change);
if (resend && !test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events))
rxrpc_queue_call(call);
return;
@@ -328,7 +328,8 @@ static bool rxrpc_receiving_reply(struct rxrpc_call *call)
call->resend_at = call->expire_at;
call->ack_at = call->expire_at;
spin_unlock_bh(&call->lock);
- rxrpc_set_timer(call, rxrpc_timer_init_for_reply);
+ rxrpc_set_timer(call, rxrpc_timer_init_for_reply,
+ ktime_get_real());
}
if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags))
@@ -358,7 +359,7 @@ static bool rxrpc_receiving_reply(struct rxrpc_call *call)
static bool rxrpc_validate_jumbo(struct sk_buff *skb)
{
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
- unsigned int offset = sp->offset;
+ unsigned int offset = sizeof(struct rxrpc_wire_header);
unsigned int len = skb->len;
int nr_jumbo = 1;
u8 flags = sp->hdr.flags;
@@ -419,7 +420,7 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb,
u16 skew)
{
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
- unsigned int offset = sp->offset;
+ unsigned int offset = sizeof(struct rxrpc_wire_header);
unsigned int ix;
rxrpc_serial_t serial = sp->hdr.serial, ack_serial = 0;
rxrpc_seq_t seq = sp->hdr.seq, hard_ack;
@@ -624,9 +625,9 @@ static void rxrpc_input_ping_response(struct rxrpc_call *call,
rxrpc_serial_t ping_serial;
ktime_t ping_time;
- ping_time = call->ackr_ping_time;
+ ping_time = call->ping_time;
smp_rmb();
- ping_serial = call->ackr_ping;
+ ping_serial = call->ping_serial;
if (!test_bit(RXRPC_CALL_PINGING, &call->flags) ||
before(orig_serial, ping_serial))
@@ -658,6 +659,8 @@ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb,
if (rwind > RXRPC_RXTX_BUFF_SIZE - 1)
rwind = RXRPC_RXTX_BUFF_SIZE - 1;
call->tx_winsize = rwind;
+ if (call->cong_ssthresh > rwind)
+ call->cong_ssthresh = rwind;
mtu = min(ntohl(ackinfo->rxMTU), ntohl(ackinfo->maxMTU));
@@ -744,15 +747,16 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb,
} buf;
rxrpc_serial_t acked_serial;
rxrpc_seq_t first_soft_ack, hard_ack;
- int nr_acks, offset;
+ int nr_acks, offset, ioffset;
_enter("");
- if (skb_copy_bits(skb, sp->offset, &buf.ack, sizeof(buf.ack)) < 0) {
+ offset = sizeof(struct rxrpc_wire_header);
+ if (skb_copy_bits(skb, offset, &buf.ack, sizeof(buf.ack)) < 0) {
_debug("extraction failure");
return rxrpc_proto_abort("XAK", call, 0);
}
- sp->offset += sizeof(buf.ack);
+ offset += sizeof(buf.ack);
acked_serial = ntohl(buf.ack.serial);
first_soft_ack = ntohl(buf.ack.firstPacket);
@@ -790,9 +794,9 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb,
rxrpc_propose_ack_respond_to_ack);
}
- offset = sp->offset + nr_acks + 3;
- if (skb->len >= offset + sizeof(buf.info)) {
- if (skb_copy_bits(skb, offset, &buf.info, sizeof(buf.info)) < 0)
+ ioffset = offset + nr_acks + 3;
+ if (skb->len >= ioffset + sizeof(buf.info)) {
+ if (skb_copy_bits(skb, ioffset, &buf.info, sizeof(buf.info)) < 0)
return rxrpc_proto_abort("XAI", call, 0);
rxrpc_input_ackinfo(call, skb, &buf.info);
}
@@ -830,7 +834,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb,
rxrpc_rotate_tx_window(call, hard_ack, &summary);
if (nr_acks > 0) {
- if (skb_copy_bits(skb, sp->offset, buf.acks, nr_acks) < 0)
+ if (skb_copy_bits(skb, offset, buf.acks, nr_acks) < 0)
return rxrpc_proto_abort("XSA", call, 0);
rxrpc_input_soft_acks(call, buf.acks, first_soft_ack, nr_acks,
&summary);
@@ -843,12 +847,13 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb,
if (call->rxtx_annotations[call->tx_top & RXRPC_RXTX_BUFF_MASK] &
RXRPC_TX_ANNO_LAST &&
- summary.nr_acks == call->tx_top - hard_ack)
+ summary.nr_acks == call->tx_top - hard_ack &&
+ rxrpc_is_client_call(call))
rxrpc_propose_ACK(call, RXRPC_ACK_PING, skew, sp->hdr.serial,
false, true,
rxrpc_propose_ack_ping_for_lost_reply);
- return rxrpc_congestion_management(call, skb, &summary);
+ return rxrpc_congestion_management(call, skb, &summary, acked_serial);
}
/*
@@ -878,7 +883,8 @@ static void rxrpc_input_abort(struct rxrpc_call *call, struct sk_buff *skb)
_enter("");
if (skb->len >= 4 &&
- skb_copy_bits(skb, sp->offset, &wtmp, sizeof(wtmp)) >= 0)
+ skb_copy_bits(skb, sizeof(struct rxrpc_wire_header),
+ &wtmp, sizeof(wtmp)) >= 0)
abort_code = ntohl(wtmp);
_proto("Rx ABORT %%%u { %x }", sp->hdr.serial, abort_code);
@@ -933,6 +939,33 @@ static void rxrpc_input_call_packet(struct rxrpc_call *call,
}
/*
+ * Handle a new call on a channel implicitly completing the preceding call on
+ * that channel.
+ *
+ * TODO: If callNumber > call_id + 1, renegotiate security.
+ */
+static void rxrpc_input_implicit_end_call(struct rxrpc_connection *conn,
+ struct rxrpc_call *call)
+{
+ switch (call->state) {
+ case RXRPC_CALL_SERVER_AWAIT_ACK:
+ rxrpc_call_completed(call);
+ break;
+ case RXRPC_CALL_COMPLETE:
+ break;
+ default:
+ if (rxrpc_abort_call("IMP", call, 0, RX_CALL_DEAD, ESHUTDOWN)) {
+ set_bit(RXRPC_CALL_EV_ABORT, &call->events);
+ rxrpc_queue_call(call);
+ }
+ break;
+ }
+
+ __rxrpc_disconnect_call(conn, call);
+ rxrpc_notify_socket(call);
+}
+
+/*
* post connection-level events to the connection
* - this includes challenges, responses, some aborts and call terminal packet
* retransmission.
@@ -994,7 +1027,6 @@ int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb)
sp->hdr.securityIndex = whdr.securityIndex;
sp->hdr._rsvd = ntohs(whdr._rsvd);
sp->hdr.serviceId = ntohs(whdr.serviceId);
- sp->offset = sizeof(whdr);
return 0;
}
@@ -1141,6 +1173,16 @@ void rxrpc_data_ready(struct sock *udp_sk)
}
call = rcu_dereference(chan->call);
+
+ if (sp->hdr.callNumber > chan->call_id) {
+ if (!(sp->hdr.flags & RXRPC_CLIENT_INITIATED)) {
+ rcu_read_unlock();
+ goto reject_packet;
+ }
+ if (call)
+ rxrpc_input_implicit_end_call(conn, call);
+ call = NULL;
+ }
} else {
skew = 0;
call = NULL;
diff --git a/net/rxrpc/local_event.c b/net/rxrpc/local_event.c
index 190f68bd9e27..540d3955c1bc 100644
--- a/net/rxrpc/local_event.c
+++ b/net/rxrpc/local_event.c
@@ -95,7 +95,8 @@ void rxrpc_process_local_events(struct rxrpc_local *local)
switch (sp->hdr.type) {
case RXRPC_PACKET_TYPE_VERSION:
- if (skb_copy_bits(skb, sp->offset, &v, 1) < 0)
+ if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header),
+ &v, 1) < 0)
return;
_proto("Rx VERSION { %02x }", v);
if (v == 0)
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index e3fad80b0795..ff4864d550b8 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -86,7 +86,6 @@ static struct rxrpc_local *rxrpc_alloc_local(const struct sockaddr_rxrpc *srx)
atomic_set(&local->usage, 1);
INIT_LIST_HEAD(&local->link);
INIT_WORK(&local->processor, rxrpc_local_processor);
- INIT_HLIST_HEAD(&local->services);
init_rwsem(&local->defrag_sem);
skb_queue_head_init(&local->reject_queue);
skb_queue_head_init(&local->event_queue);
@@ -292,7 +291,7 @@ static void rxrpc_local_destroyer(struct rxrpc_local *local)
mutex_unlock(&rxrpc_local_mutex);
ASSERT(RB_EMPTY_ROOT(&local->client_conns));
- ASSERT(hlist_empty(&local->services));
+ ASSERT(!local->service);
if (socket) {
local->socket = NULL;
diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c
index aedb8978226d..6dee55fad2d3 100644
--- a/net/rxrpc/misc.c
+++ b/net/rxrpc/misc.c
@@ -21,28 +21,33 @@
unsigned int rxrpc_max_backlog __read_mostly = 10;
/*
+ * Maximum lifetime of a call (in mx).
+ */
+unsigned int rxrpc_max_call_lifetime = 60 * 1000;
+
+/*
* How long to wait before scheduling ACK generation after seeing a
- * packet with RXRPC_REQUEST_ACK set (in jiffies).
+ * packet with RXRPC_REQUEST_ACK set (in ms).
*/
unsigned int rxrpc_requested_ack_delay = 1;
/*
- * How long to wait before scheduling an ACK with subtype DELAY (in jiffies).
+ * How long to wait before scheduling an ACK with subtype DELAY (in ms).
*
* We use this when we've received new data packets. If those packets aren't
* all consumed within this time we will send a DELAY ACK if an ACK was not
* requested to let the sender know it doesn't need to resend.
*/
-unsigned int rxrpc_soft_ack_delay = 1 * HZ;
+unsigned int rxrpc_soft_ack_delay = 1 * 1000;
/*
- * How long to wait before scheduling an ACK with subtype IDLE (in jiffies).
+ * How long to wait before scheduling an ACK with subtype IDLE (in ms).
*
* We use this when we've consumed some previously soft-ACK'd packets when
* further packets aren't immediately received to decide when to send an IDLE
* ACK let the other end know that it can free up its Tx buffer space.
*/
-unsigned int rxrpc_idle_ack_delay = 0.5 * HZ;
+unsigned int rxrpc_idle_ack_delay = 0.5 * 1000;
/*
* Receive window size in packets. This indicates the maximum number of
@@ -88,10 +93,9 @@ const s8 rxrpc_ack_priority[] = {
[RXRPC_ACK_EXCEEDS_WINDOW] = 6,
[RXRPC_ACK_NOSPACE] = 7,
[RXRPC_ACK_PING_RESPONSE] = 8,
- [RXRPC_ACK_PING] = 9,
};
-const char const rxrpc_ack_names[RXRPC_ACK__INVALID + 1][4] = {
+const char rxrpc_ack_names[RXRPC_ACK__INVALID + 1][4] = {
"---", "REQ", "DUP", "OOS", "WIN", "MEM", "PNG", "PNR", "DLY",
"IDL", "-?-"
};
@@ -108,7 +112,6 @@ const char rxrpc_skb_traces[rxrpc_skb__nr_trace][7] = {
[rxrpc_skb_tx_cleaned] = "Tx CLN",
[rxrpc_skb_tx_freed] = "Tx FRE",
[rxrpc_skb_tx_got] = "Tx GOT",
- [rxrpc_skb_tx_lost] = "Tx *L*",
[rxrpc_skb_tx_new] = "Tx NEW",
[rxrpc_skb_tx_rotated] = "Tx ROT",
[rxrpc_skb_tx_seen] = "Tx SEE",
@@ -192,7 +195,9 @@ const char rxrpc_timer_traces[rxrpc_timer__nr_trace][8] = {
[rxrpc_timer_begin] = "Begin ",
[rxrpc_timer_expired] = "*EXPR*",
[rxrpc_timer_init_for_reply] = "IniRpl",
+ [rxrpc_timer_init_for_send_reply] = "SndRpl",
[rxrpc_timer_set_for_ack] = "SetAck",
+ [rxrpc_timer_set_for_ping] = "SetPng",
[rxrpc_timer_set_for_send] = "SetTx ",
[rxrpc_timer_set_for_resend] = "SetRTx",
};
@@ -203,6 +208,7 @@ const char rxrpc_propose_ack_traces[rxrpc_propose_ack__nr_trace][8] = {
[rxrpc_propose_ack_ping_for_lost_ack] = "LostAck",
[rxrpc_propose_ack_ping_for_lost_reply] = "LostRpl",
[rxrpc_propose_ack_ping_for_params] = "Params ",
+ [rxrpc_propose_ack_processing_op] = "ProcOp ",
[rxrpc_propose_ack_respond_to_ack] = "Rsp2Ack",
[rxrpc_propose_ack_respond_to_ping] = "Rsp2Png",
[rxrpc_propose_ack_retry_tx] = "RetryTx",
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index cf43a715685e..5dab1ff3a6c2 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -19,26 +19,27 @@
#include <net/af_rxrpc.h>
#include "ar-internal.h"
-struct rxrpc_pkt_buffer {
+struct rxrpc_ack_buffer {
struct rxrpc_wire_header whdr;
- union {
- struct {
- struct rxrpc_ackpacket ack;
- u8 acks[255];
- u8 pad[3];
- };
- __be32 abort_code;
- };
+ struct rxrpc_ackpacket ack;
+ u8 acks[255];
+ u8 pad[3];
struct rxrpc_ackinfo ackinfo;
};
+struct rxrpc_abort_buffer {
+ struct rxrpc_wire_header whdr;
+ __be32 abort_code;
+};
+
/*
* Fill out an ACK packet.
*/
static size_t rxrpc_fill_out_ack(struct rxrpc_call *call,
- struct rxrpc_pkt_buffer *pkt,
+ struct rxrpc_ack_buffer *pkt,
rxrpc_seq_t *_hard_ack,
- rxrpc_seq_t *_top)
+ rxrpc_seq_t *_top,
+ u8 reason)
{
rxrpc_serial_t serial;
rxrpc_seq_t hard_ack, top, seq;
@@ -58,10 +59,10 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_call *call,
pkt->ack.firstPacket = htonl(hard_ack + 1);
pkt->ack.previousPacket = htonl(call->ackr_prev_seq);
pkt->ack.serial = htonl(serial);
- pkt->ack.reason = call->ackr_reason;
+ pkt->ack.reason = reason;
pkt->ack.nAcks = top - hard_ack;
- if (pkt->ack.reason == RXRPC_ACK_PING)
+ if (reason == RXRPC_ACK_PING)
pkt->whdr.flags |= RXRPC_REQUEST_ACK;
if (after(top, hard_ack)) {
@@ -91,22 +92,19 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_call *call,
}
/*
- * Send an ACK or ABORT call packet.
+ * Send an ACK call packet.
*/
-int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type)
+int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping)
{
struct rxrpc_connection *conn = NULL;
- struct rxrpc_pkt_buffer *pkt;
+ struct rxrpc_ack_buffer *pkt;
struct msghdr msg;
struct kvec iov[2];
rxrpc_serial_t serial;
rxrpc_seq_t hard_ack, top;
size_t len, n;
- bool ping = false;
- int ioc, ret;
- u32 abort_code;
-
- _enter("%u,%s", call->debug_id, rxrpc_pkts[type]);
+ int ret;
+ u8 reason;
spin_lock_bh(&call->lock);
if (call->conn)
@@ -131,68 +129,44 @@ int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type)
pkt->whdr.cid = htonl(call->cid);
pkt->whdr.callNumber = htonl(call->call_id);
pkt->whdr.seq = 0;
- pkt->whdr.type = type;
- pkt->whdr.flags = conn->out_clientflag;
+ pkt->whdr.type = RXRPC_PACKET_TYPE_ACK;
+ pkt->whdr.flags = RXRPC_SLOW_START_OK | conn->out_clientflag;
pkt->whdr.userStatus = 0;
pkt->whdr.securityIndex = call->security_ix;
pkt->whdr._rsvd = 0;
pkt->whdr.serviceId = htons(call->service_id);
- iov[0].iov_base = pkt;
- iov[0].iov_len = sizeof(pkt->whdr);
- len = sizeof(pkt->whdr);
-
- switch (type) {
- case RXRPC_PACKET_TYPE_ACK:
- spin_lock_bh(&call->lock);
+ spin_lock_bh(&call->lock);
+ if (ping) {
+ reason = RXRPC_ACK_PING;
+ } else {
+ reason = call->ackr_reason;
if (!call->ackr_reason) {
spin_unlock_bh(&call->lock);
ret = 0;
goto out;
}
- ping = (call->ackr_reason == RXRPC_ACK_PING);
- n = rxrpc_fill_out_ack(call, pkt, &hard_ack, &top);
call->ackr_reason = 0;
+ }
+ n = rxrpc_fill_out_ack(call, pkt, &hard_ack, &top, reason);
- spin_unlock_bh(&call->lock);
-
-
- pkt->whdr.flags |= RXRPC_SLOW_START_OK;
-
- iov[0].iov_len += sizeof(pkt->ack) + n;
- iov[1].iov_base = &pkt->ackinfo;
- iov[1].iov_len = sizeof(pkt->ackinfo);
- len += sizeof(pkt->ack) + n + sizeof(pkt->ackinfo);
- ioc = 2;
- break;
-
- case RXRPC_PACKET_TYPE_ABORT:
- abort_code = call->abort_code;
- pkt->abort_code = htonl(abort_code);
- iov[0].iov_len += sizeof(pkt->abort_code);
- len += sizeof(pkt->abort_code);
- ioc = 1;
- break;
+ spin_unlock_bh(&call->lock);
- default:
- BUG();
- ret = -ENOANO;
- goto out;
- }
+ iov[0].iov_base = pkt;
+ iov[0].iov_len = sizeof(pkt->whdr) + sizeof(pkt->ack) + n;
+ iov[1].iov_base = &pkt->ackinfo;
+ iov[1].iov_len = sizeof(pkt->ackinfo);
+ len = iov[0].iov_len + iov[1].iov_len;
serial = atomic_inc_return(&conn->serial);
pkt->whdr.serial = htonl(serial);
- switch (type) {
- case RXRPC_PACKET_TYPE_ACK:
- trace_rxrpc_tx_ack(call, serial,
- ntohl(pkt->ack.firstPacket),
- ntohl(pkt->ack.serial),
- pkt->ack.reason, pkt->ack.nAcks);
- break;
- }
+ trace_rxrpc_tx_ack(call, serial,
+ ntohl(pkt->ack.firstPacket),
+ ntohl(pkt->ack.serial),
+ pkt->ack.reason, pkt->ack.nAcks);
if (ping) {
- call->ackr_ping = serial;
+ call->ping_serial = serial;
smp_wmb();
/* We need to stick a time in before we send the packet in case
* the reply gets back before kernel_sendmsg() completes - but
@@ -201,19 +175,19 @@ int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type)
* the packet transmission is more likely to happen towards the
* end of the kernel_sendmsg() call.
*/
- call->ackr_ping_time = ktime_get_real();
+ call->ping_time = ktime_get_real();
set_bit(RXRPC_CALL_PINGING, &call->flags);
trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_ping, serial);
}
- ret = kernel_sendmsg(conn->params.local->socket,
- &msg, iov, ioc, len);
+
+ ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len);
if (ping)
- call->ackr_ping_time = ktime_get_real();
+ call->ping_time = ktime_get_real();
- if (type == RXRPC_PACKET_TYPE_ACK &&
- call->state < RXRPC_CALL_COMPLETE) {
+ if (call->state < RXRPC_CALL_COMPLETE) {
if (ret < 0) {
- clear_bit(RXRPC_CALL_PINGING, &call->flags);
+ if (ping)
+ clear_bit(RXRPC_CALL_PINGING, &call->flags);
rxrpc_propose_ACK(call, pkt->ack.reason,
ntohs(pkt->ack.maxSkew),
ntohl(pkt->ack.serial),
@@ -236,9 +210,60 @@ out:
}
/*
+ * Send an ABORT call packet.
+ */
+int rxrpc_send_abort_packet(struct rxrpc_call *call)
+{
+ struct rxrpc_connection *conn = NULL;
+ struct rxrpc_abort_buffer pkt;
+ struct msghdr msg;
+ struct kvec iov[1];
+ rxrpc_serial_t serial;
+ int ret;
+
+ spin_lock_bh(&call->lock);
+ if (call->conn)
+ conn = rxrpc_get_connection_maybe(call->conn);
+ spin_unlock_bh(&call->lock);
+ if (!conn)
+ return -ECONNRESET;
+
+ msg.msg_name = &call->peer->srx.transport;
+ msg.msg_namelen = call->peer->srx.transport_len;
+ msg.msg_control = NULL;
+ msg.msg_controllen = 0;
+ msg.msg_flags = 0;
+
+ pkt.whdr.epoch = htonl(conn->proto.epoch);
+ pkt.whdr.cid = htonl(call->cid);
+ pkt.whdr.callNumber = htonl(call->call_id);
+ pkt.whdr.seq = 0;
+ pkt.whdr.type = RXRPC_PACKET_TYPE_ABORT;
+ pkt.whdr.flags = conn->out_clientflag;
+ pkt.whdr.userStatus = 0;
+ pkt.whdr.securityIndex = call->security_ix;
+ pkt.whdr._rsvd = 0;
+ pkt.whdr.serviceId = htons(call->service_id);
+ pkt.abort_code = htonl(call->abort_code);
+
+ iov[0].iov_base = &pkt;
+ iov[0].iov_len = sizeof(pkt);
+
+ serial = atomic_inc_return(&conn->serial);
+ pkt.whdr.serial = htonl(serial);
+
+ ret = kernel_sendmsg(conn->params.local->socket,
+ &msg, iov, 1, sizeof(pkt));
+
+ rxrpc_put_connection(conn);
+ return ret;
+}
+
+/*
* send a packet through the transport endpoint
*/
-int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb)
+int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb,
+ bool retrans)
{
struct rxrpc_connection *conn = call->conn;
struct rxrpc_wire_header whdr;
@@ -247,6 +272,7 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb)
struct kvec iov[2];
rxrpc_serial_t serial;
size_t len;
+ bool lost = false;
int ret, opt;
_enter(",{%d}", skb->len);
@@ -281,20 +307,20 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb)
/* If our RTT cache needs working on, request an ACK. Also request
* ACKs if a DATA packet appears to have been lost.
*/
- if (call->cong_mode == RXRPC_CALL_FAST_RETRANSMIT ||
- (call->peer->rtt_usage < 3 && sp->hdr.seq & 1) ||
- ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000),
- ktime_get_real()))
+ if (!(sp->hdr.flags & RXRPC_LAST_PACKET) &&
+ (retrans ||
+ call->cong_mode == RXRPC_CALL_SLOW_START ||
+ (call->peer->rtt_usage < 3 && sp->hdr.seq & 1) ||
+ ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000),
+ ktime_get_real())))
whdr.flags |= RXRPC_REQUEST_ACK;
if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) {
static int lose;
if ((lose++ & 7) == 7) {
- trace_rxrpc_tx_data(call, sp->hdr.seq, serial,
- whdr.flags, true);
- rxrpc_lose_skb(skb, rxrpc_skb_tx_lost);
- _leave(" = 0 [lose]");
- return 0;
+ ret = 0;
+ lost = true;
+ goto done;
}
}
@@ -319,7 +345,8 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb)
goto send_fragmentable;
done:
- trace_rxrpc_tx_data(call, sp->hdr.seq, serial, whdr.flags, false);
+ trace_rxrpc_tx_data(call, sp->hdr.seq, serial, whdr.flags,
+ retrans, lost);
if (ret >= 0) {
ktime_t now = ktime_get_real();
skb->tstamp = now;
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index 038ae62ddb4d..c29362d50a92 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -143,7 +143,7 @@ static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial)
if (call->state == RXRPC_CALL_CLIENT_RECV_REPLY) {
rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, serial, true, false,
rxrpc_propose_ack_terminal_ack);
- rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK);
+ rxrpc_send_ack_packet(call, false);
}
write_lock_bh(&call->state_lock);
@@ -151,17 +151,21 @@ static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial)
switch (call->state) {
case RXRPC_CALL_CLIENT_RECV_REPLY:
__rxrpc_call_completed(call);
+ write_unlock_bh(&call->state_lock);
break;
case RXRPC_CALL_SERVER_RECV_REQUEST:
call->tx_phase = true;
call->state = RXRPC_CALL_SERVER_ACK_REQUEST;
+ call->ack_at = call->expire_at;
+ write_unlock_bh(&call->state_lock);
+ rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, 0, serial, false, true,
+ rxrpc_propose_ack_processing_op);
break;
default:
+ write_unlock_bh(&call->state_lock);
break;
}
-
- write_unlock_bh(&call->state_lock);
}
/*
@@ -212,7 +216,7 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call)
true, false,
rxrpc_propose_ack_rotate_rx);
if (call->ackr_reason)
- rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK);
+ rxrpc_send_ack_packet(call, false);
}
}
@@ -261,15 +265,13 @@ static int rxrpc_locate_data(struct rxrpc_call *call, struct sk_buff *skb,
u8 *_annotation,
unsigned int *_offset, unsigned int *_len)
{
- struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
- unsigned int offset = *_offset;
+ unsigned int offset = sizeof(struct rxrpc_wire_header);
unsigned int len = *_len;
int ret;
u8 annotation = *_annotation;
/* Locate the subpacket */
- offset = sp->offset;
- len = skb->len - sp->offset;
+ len = skb->len - offset;
if ((annotation & RXRPC_RX_ANNO_JUMBO) > 0) {
offset += (((annotation & RXRPC_RX_ANNO_JUMBO) - 1) *
RXRPC_JUMBO_SUBPKTLEN);
@@ -654,7 +656,7 @@ excess_data:
goto out;
call_complete:
*_abort = call->abort_code;
- ret = call->error;
+ ret = -call->error;
if (call->completion == RXRPC_CALL_SUCCEEDED) {
ret = 1;
if (size > 0)
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index 88d080a1a3de..4374e7b9c7bf 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -381,7 +381,7 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb,
return 0;
protocol_error:
- rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT);
+ rxrpc_send_abort_packet(call);
_leave(" = -EPROTO");
return -EPROTO;
@@ -471,7 +471,7 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb,
return 0;
protocol_error:
- rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT);
+ rxrpc_send_abort_packet(call);
_leave(" = -EPROTO");
return -EPROTO;
@@ -523,7 +523,7 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb,
if (cksum != expected_cksum) {
rxrpc_abort_call("VCK", call, seq, RXKADSEALEDINCON, EPROTO);
- rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT);
+ rxrpc_send_abort_packet(call);
_leave(" = -EPROTO [csum failed]");
return -EPROTO;
}
@@ -771,7 +771,8 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn,
}
abort_code = RXKADPACKETSHORT;
- if (skb_copy_bits(skb, sp->offset, &challenge, sizeof(challenge)) < 0)
+ if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header),
+ &challenge, sizeof(challenge)) < 0)
goto protocol_error;
version = ntohl(challenge.version);
@@ -1028,7 +1029,8 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
_enter("{%d,%x}", conn->debug_id, key_serial(conn->server_key));
abort_code = RXKADPACKETSHORT;
- if (skb_copy_bits(skb, sp->offset, &response, sizeof(response)) < 0)
+ if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header),
+ &response, sizeof(response)) < 0)
goto protocol_error;
if (!pskb_pull(skb, sizeof(response)))
BUG();
@@ -1057,7 +1059,8 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
return -ENOMEM;
abort_code = RXKADPACKETSHORT;
- if (skb_copy_bits(skb, sp->offset, ticket, ticket_len) < 0)
+ if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header),
+ ticket, ticket_len) < 0)
goto protocol_error_free;
ret = rxkad_decrypt_ticket(conn, ticket, ticket_len, &session_key,
diff --git a/net/rxrpc/security.c b/net/rxrpc/security.c
index 82d8134e9287..7d921e56e715 100644
--- a/net/rxrpc/security.c
+++ b/net/rxrpc/security.c
@@ -131,10 +131,10 @@ int rxrpc_init_server_conn_security(struct rxrpc_connection *conn)
/* find the service */
read_lock(&local->services_lock);
- hlist_for_each_entry(rx, &local->services, listen_link) {
- if (rx->srx.srx_service == conn->params.service_id)
- goto found_service;
- }
+ rx = rcu_dereference_protected(local->service,
+ lockdep_is_held(&local->services_lock));
+ if (rx && rx->srx.srx_service == conn->params.service_id)
+ goto found_service;
/* the service appears to have died */
read_unlock(&local->services_lock);
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index 1f8040d82395..b214a4d4a641 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -130,6 +130,11 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb,
break;
case RXRPC_CALL_SERVER_ACK_REQUEST:
call->state = RXRPC_CALL_SERVER_SEND_REPLY;
+ call->ack_at = call->expire_at;
+ if (call->ackr_reason == RXRPC_ACK_DELAY)
+ call->ackr_reason = 0;
+ __rxrpc_set_timer(call, rxrpc_timer_init_for_send_reply,
+ ktime_get_real());
if (!last)
break;
case RXRPC_CALL_SERVER_SEND_REPLY:
@@ -144,18 +149,18 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb,
if (seq == 1 && rxrpc_is_client_call(call))
rxrpc_expose_client_call(call);
- ret = rxrpc_send_data_packet(call, skb);
+ ret = rxrpc_send_data_packet(call, skb, false);
if (ret < 0) {
_debug("need instant resend %d", ret);
rxrpc_instant_resend(call, ix);
} else {
- unsigned long resend_at;
+ ktime_t now = ktime_get_real(), resend_at;
- resend_at = jiffies + msecs_to_jiffies(rxrpc_resend_timeout);
+ resend_at = ktime_add_ms(now, rxrpc_resend_timeout);
- if (time_before(resend_at, call->resend_at)) {
+ if (ktime_before(resend_at, call->resend_at)) {
call->resend_at = resend_at;
- rxrpc_set_timer(call, rxrpc_timer_set_for_send);
+ rxrpc_set_timer(call, rxrpc_timer_set_for_send, now);
}
}
@@ -197,7 +202,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
do {
/* Check to see if there's a ping ACK to reply to. */
if (call->ackr_reason == RXRPC_ACK_PING_RESPONSE)
- rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK);
+ rxrpc_send_ack_packet(call, false);
if (!skb) {
size_t size, chunk, max, space;
@@ -514,8 +519,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
} else if (cmd == RXRPC_CMD_SEND_ABORT) {
ret = 0;
if (rxrpc_abort_call("CMD", call, 0, abort_code, ECONNABORTED))
- ret = rxrpc_send_call_packet(call,
- RXRPC_PACKET_TYPE_ABORT);
+ ret = rxrpc_send_abort_packet(call);
} else if (cmd != RXRPC_CMD_SEND_DATA) {
ret = -EINVAL;
} else if (rxrpc_is_client_call(call) &&
@@ -597,7 +601,7 @@ void rxrpc_kernel_abort_call(struct socket *sock, struct rxrpc_call *call,
lock_sock(sock->sk);
if (rxrpc_abort_call(why, call, 0, abort_code, error))
- rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT);
+ rxrpc_send_abort_packet(call);
release_sock(sock->sk);
_leave("");
diff --git a/net/rxrpc/skbuff.c b/net/rxrpc/skbuff.c
index 5154cbf7e540..67b02c45271b 100644
--- a/net/rxrpc/skbuff.c
+++ b/net/rxrpc/skbuff.c
@@ -77,14 +77,9 @@ void rxrpc_lose_skb(struct sk_buff *skb, enum rxrpc_skb_trace op)
if (skb) {
int n;
CHECK_SLAB_OKAY(&skb->users);
- if (op == rxrpc_skb_tx_lost) {
- n = atomic_read(select_skb_count(op));
- trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here);
- } else {
- n = atomic_dec_return(select_skb_count(op));
- trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here);
- kfree_skb(skb);
- }
+ n = atomic_dec_return(select_skb_count(op));
+ trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here);
+ kfree_skb(skb);
}
}
diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c
index 13d1df03ebac..34c706d2f79c 100644
--- a/net/rxrpc/sysctl.c
+++ b/net/rxrpc/sysctl.c
@@ -35,7 +35,7 @@ static struct ctl_table rxrpc_sysctl_table[] = {
.data = &rxrpc_requested_ack_delay,
.maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = proc_dointvec_ms_jiffies,
+ .proc_handler = proc_dointvec,
.extra1 = (void *)&zero,
},
{
@@ -43,7 +43,7 @@ static struct ctl_table rxrpc_sysctl_table[] = {
.data = &rxrpc_soft_ack_delay,
.maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = proc_dointvec_ms_jiffies,
+ .proc_handler = proc_dointvec,
.extra1 = (void *)&one,
},
{
@@ -51,7 +51,7 @@ static struct ctl_table rxrpc_sysctl_table[] = {
.data = &rxrpc_idle_ack_delay,
.maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = proc_dointvec_ms_jiffies,
+ .proc_handler = proc_dointvec,
.extra1 = (void *)&one,
},
{
@@ -85,7 +85,7 @@ static struct ctl_table rxrpc_sysctl_table[] = {
.data = &rxrpc_max_call_lifetime,
.maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
+ .proc_handler = proc_dointvec,
.extra1 = (void *)&one,
},
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index c9102172ce3b..a512b18c0088 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -341,22 +341,25 @@ int tcf_register_action(struct tc_action_ops *act,
if (!act->act || !act->dump || !act->init || !act->walk || !act->lookup)
return -EINVAL;
+ /* We have to register pernet ops before making the action ops visible,
+ * otherwise tcf_action_init_1() could get a partially initialized
+ * netns.
+ */
+ ret = register_pernet_subsys(ops);
+ if (ret)
+ return ret;
+
write_lock(&act_mod_lock);
list_for_each_entry(a, &act_base, head) {
if (act->type == a->type || (strcmp(act->kind, a->kind) == 0)) {
write_unlock(&act_mod_lock);
+ unregister_pernet_subsys(ops);
return -EEXIST;
}
}
list_add_tail(&act->head, &act_base);
write_unlock(&act_mod_lock);
- ret = register_pernet_subsys(ops);
- if (ret) {
- tcf_unregister_action(act, ops);
- return ret;
- }
-
return 0;
}
EXPORT_SYMBOL(tcf_register_action);
@@ -367,8 +370,6 @@ int tcf_unregister_action(struct tc_action_ops *act,
struct tc_action_ops *a;
int err = -ENOENT;
- unregister_pernet_subsys(ops);
-
write_lock(&act_mod_lock);
list_for_each_entry(a, &act_base, head) {
if (a == act) {
@@ -378,6 +379,8 @@ int tcf_unregister_action(struct tc_action_ops *act,
}
}
write_unlock(&act_mod_lock);
+ if (!err)
+ unregister_pernet_subsys(ops);
return err;
}
EXPORT_SYMBOL(tcf_unregister_action);
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index ccf7b4b655fe..95c463cbb9a6 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -53,7 +53,7 @@ int ife_tlv_meta_encode(void *skbdata, u16 attrtype, u16 dlen, const void *dval)
u32 *tlv = (u32 *)(skbdata);
u16 totlen = nla_total_size(dlen); /*alignment + hdr */
char *dptr = (char *)tlv + NLA_HDRLEN;
- u32 htlv = attrtype << 16 | dlen;
+ u32 htlv = attrtype << 16 | (dlen + NLA_HDRLEN);
*tlv = htonl(htlv);
memset(dptr, 0, totlen - NLA_HDRLEN);
@@ -653,7 +653,7 @@ static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a,
struct tcf_ife_info *ife = to_ife(a);
int action = ife->tcf_action;
struct ifeheadr *ifehdr = (struct ifeheadr *)skb->data;
- u16 ifehdrln = ifehdr->metalen;
+ int ifehdrln = (int)ifehdr->metalen;
struct meta_tlvhdr *tlv = (struct meta_tlvhdr *)(ifehdr->tlv_data);
spin_lock(&ife->tcf_lock);
@@ -766,8 +766,6 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a,
return TC_ACT_SHOT;
}
- iethh = eth_hdr(skb);
-
err = skb_cow_head(skb, hdrm);
if (unlikely(err)) {
ife->tcf_qstats.drops++;
@@ -778,6 +776,7 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a,
if (!(at & AT_EGRESS))
skb_push(skb, skb->dev->hard_header_len);
+ iethh = (struct ethhdr *)skb->data;
__skb_push(skb, hdrm);
memcpy(skb->data, iethh, skb->mac_len);
skb_reset_mac_header(skb);
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 667dc382df82..2d93be6717e5 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -33,6 +33,25 @@
static LIST_HEAD(mirred_list);
static DEFINE_SPINLOCK(mirred_list_lock);
+static bool tcf_mirred_is_act_redirect(int action)
+{
+ return action == TCA_EGRESS_REDIR || action == TCA_INGRESS_REDIR;
+}
+
+static u32 tcf_mirred_act_direction(int action)
+{
+ switch (action) {
+ case TCA_EGRESS_REDIR:
+ case TCA_EGRESS_MIRROR:
+ return AT_EGRESS;
+ case TCA_INGRESS_REDIR:
+ case TCA_INGRESS_MIRROR:
+ return AT_INGRESS;
+ default:
+ BUG();
+ }
+}
+
static void tcf_mirred_release(struct tc_action *a, int bind)
{
struct tcf_mirred *m = to_mirred(a);
@@ -54,17 +73,32 @@ static const struct nla_policy mirred_policy[TCA_MIRRED_MAX + 1] = {
static int mirred_net_id;
static struct tc_action_ops act_mirred_ops;
+static bool dev_is_mac_header_xmit(const struct net_device *dev)
+{
+ switch (dev->type) {
+ case ARPHRD_TUNNEL:
+ case ARPHRD_TUNNEL6:
+ case ARPHRD_SIT:
+ case ARPHRD_IPGRE:
+ case ARPHRD_VOID:
+ case ARPHRD_NONE:
+ return false;
+ }
+ return true;
+}
+
static int tcf_mirred_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a, int ovr,
int bind)
{
struct tc_action_net *tn = net_generic(net, mirred_net_id);
struct nlattr *tb[TCA_MIRRED_MAX + 1];
+ bool mac_header_xmit = false;
struct tc_mirred *parm;
struct tcf_mirred *m;
struct net_device *dev;
- int ret, ok_push = 0;
bool exists = false;
+ int ret;
if (nla == NULL)
return -EINVAL;
@@ -82,6 +116,8 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
switch (parm->eaction) {
case TCA_EGRESS_MIRROR:
case TCA_EGRESS_REDIR:
+ case TCA_INGRESS_REDIR:
+ case TCA_INGRESS_MIRROR:
break;
default:
if (exists)
@@ -95,19 +131,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
tcf_hash_release(*a, bind);
return -ENODEV;
}
- switch (dev->type) {
- case ARPHRD_TUNNEL:
- case ARPHRD_TUNNEL6:
- case ARPHRD_SIT:
- case ARPHRD_IPGRE:
- case ARPHRD_VOID:
- case ARPHRD_NONE:
- ok_push = 0;
- break;
- default:
- ok_push = 1;
- break;
- }
+ mac_header_xmit = dev_is_mac_header_xmit(dev);
} else {
dev = NULL;
}
@@ -136,7 +160,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
dev_put(rcu_dereference_protected(m->tcfm_dev, 1));
dev_hold(dev);
rcu_assign_pointer(m->tcfm_dev, dev);
- m->tcfm_ok_push = ok_push;
+ m->tcfm_mac_header_xmit = mac_header_xmit;
}
if (ret == ACT_P_CREATED) {
@@ -153,15 +177,20 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
{
struct tcf_mirred *m = to_mirred(a);
+ bool m_mac_header_xmit;
struct net_device *dev;
struct sk_buff *skb2;
- int retval, err;
+ int retval, err = 0;
+ int m_eaction;
+ int mac_len;
u32 at;
tcf_lastuse_update(&m->tcf_tm);
bstats_cpu_update(this_cpu_ptr(m->common.cpu_bstats), skb);
rcu_read_lock();
+ m_mac_header_xmit = READ_ONCE(m->tcfm_mac_header_xmit);
+ m_eaction = READ_ONCE(m->tcfm_eaction);
retval = READ_ONCE(m->tcf_action);
dev = rcu_dereference(m->tcfm_dev);
if (unlikely(!dev)) {
@@ -180,23 +209,36 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
if (!skb2)
goto out;
- if (!(at & AT_EGRESS)) {
- if (m->tcfm_ok_push)
+ /* If action's target direction differs than filter's direction,
+ * and devices expect a mac header on xmit, then mac push/pull is
+ * needed.
+ */
+ if (at != tcf_mirred_act_direction(m_eaction) && m_mac_header_xmit) {
+ if (at & AT_EGRESS) {
+ /* caught at egress, act ingress: pull mac */
+ mac_len = skb_network_header(skb) - skb_mac_header(skb);
+ skb_pull_rcsum(skb2, mac_len);
+ } else {
+ /* caught at ingress, act egress: push mac */
skb_push_rcsum(skb2, skb->mac_len);
+ }
}
/* mirror is always swallowed */
- if (m->tcfm_eaction != TCA_EGRESS_MIRROR)
+ if (tcf_mirred_is_act_redirect(m_eaction))
skb2->tc_verd = SET_TC_FROM(skb2->tc_verd, at);
skb2->skb_iif = skb->dev->ifindex;
skb2->dev = dev;
- err = dev_queue_xmit(skb2);
+ if (tcf_mirred_act_direction(m_eaction) & AT_EGRESS)
+ err = dev_queue_xmit(skb2);
+ else
+ err = netif_receive_skb(skb2);
if (err) {
out:
qstats_overlimit_inc(this_cpu_ptr(m->common.cpu_qstats));
- if (m->tcfm_eaction != TCA_EGRESS_MIRROR)
+ if (tcf_mirred_is_act_redirect(m_eaction))
retval = TC_ACT_SHOT;
}
rcu_read_unlock();
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index a133dcb82132..024f3a3afeff 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -46,8 +46,10 @@ static int tcf_skbedit(struct sk_buff *skb, const struct tc_action *a,
if (d->flags & SKBEDIT_F_QUEUE_MAPPING &&
skb->dev->real_num_tx_queues > d->queue_mapping)
skb_set_queue_mapping(skb, d->queue_mapping);
- if (d->flags & SKBEDIT_F_MARK)
- skb->mark = d->mark;
+ if (d->flags & SKBEDIT_F_MARK) {
+ skb->mark &= ~d->mask;
+ skb->mark |= d->mark & d->mask;
+ }
if (d->flags & SKBEDIT_F_PTYPE)
skb->pkt_type = d->ptype;
@@ -61,6 +63,7 @@ static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = {
[TCA_SKBEDIT_QUEUE_MAPPING] = { .len = sizeof(u16) },
[TCA_SKBEDIT_MARK] = { .len = sizeof(u32) },
[TCA_SKBEDIT_PTYPE] = { .len = sizeof(u16) },
+ [TCA_SKBEDIT_MASK] = { .len = sizeof(u32) },
};
static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
@@ -71,7 +74,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
struct nlattr *tb[TCA_SKBEDIT_MAX + 1];
struct tc_skbedit *parm;
struct tcf_skbedit *d;
- u32 flags = 0, *priority = NULL, *mark = NULL;
+ u32 flags = 0, *priority = NULL, *mark = NULL, *mask = NULL;
u16 *queue_mapping = NULL, *ptype = NULL;
bool exists = false;
int ret = 0, err;
@@ -108,6 +111,11 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
mark = nla_data(tb[TCA_SKBEDIT_MARK]);
}
+ if (tb[TCA_SKBEDIT_MASK] != NULL) {
+ flags |= SKBEDIT_F_MASK;
+ mask = nla_data(tb[TCA_SKBEDIT_MASK]);
+ }
+
parm = nla_data(tb[TCA_SKBEDIT_PARMS]);
exists = tcf_hash_check(tn, parm->index, a, bind);
@@ -145,6 +153,10 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
d->mark = *mark;
if (flags & SKBEDIT_F_PTYPE)
d->ptype = *ptype;
+ /* default behaviour is to use all the bits */
+ d->mask = 0xffffffff;
+ if (flags & SKBEDIT_F_MASK)
+ d->mask = *mask;
d->tcf_action = parm->action;
@@ -182,6 +194,9 @@ static int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a,
if ((d->flags & SKBEDIT_F_PTYPE) &&
nla_put_u16(skb, TCA_SKBEDIT_PTYPE, d->ptype))
goto nla_put_failure;
+ if ((d->flags & SKBEDIT_F_MASK) &&
+ nla_put_u32(skb, TCA_SKBEDIT_MASK, d->mask))
+ goto nla_put_failure;
tcf_tm_dump(&t, &d->tcf_tm);
if (nla_put_64bit(skb, TCA_SKBEDIT_TM, sizeof(t), &t, TCA_SKBEDIT_PAD))
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index a95c00b119da..b57fcbcefea1 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -37,6 +37,12 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a,
bstats_update(&v->tcf_bstats, skb);
action = v->tcf_action;
+ /* Ensure 'data' points at mac_header prior calling vlan manipulating
+ * functions.
+ */
+ if (skb_at_tc_ingress(skb))
+ skb_push_rcsum(skb, skb->mac_len);
+
switch (v->tcfv_action) {
case TCA_VLAN_ACT_POP:
err = skb_vlan_pop(skb);
@@ -83,6 +89,9 @@ drop:
action = TC_ACT_SHOT;
v->tcf_qstats.drops++;
unlock:
+ if (skb_at_tc_ingress(skb))
+ skb_pull_rcsum(skb, skb->mac_len);
+
spin_unlock(&v->tcf_lock);
return action;
}
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 11da7da0b7c4..2ee29a3375f6 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -101,7 +101,7 @@ EXPORT_SYMBOL(unregister_tcf_proto_ops);
static int tfilter_notify(struct net *net, struct sk_buff *oskb,
struct nlmsghdr *n, struct tcf_proto *tp,
- unsigned long fh, int event);
+ unsigned long fh, int event, bool unicast);
static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb,
struct nlmsghdr *n,
@@ -112,7 +112,7 @@ static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb,
for (it_chain = chain; (tp = rtnl_dereference(*it_chain)) != NULL;
it_chain = &tp->next)
- tfilter_notify(net, oskb, n, tp, 0, event);
+ tfilter_notify(net, oskb, n, tp, 0, event, false);
}
/* Select new prio value from the range, managed by kernel. */
@@ -319,7 +319,8 @@ replay:
RCU_INIT_POINTER(*back, next);
- tfilter_notify(net, skb, n, tp, fh, RTM_DELTFILTER);
+ tfilter_notify(net, skb, n, tp, fh,
+ RTM_DELTFILTER, false);
tcf_destroy(tp, true);
err = 0;
goto errout;
@@ -345,14 +346,14 @@ replay:
struct tcf_proto *next = rtnl_dereference(tp->next);
tfilter_notify(net, skb, n, tp, fh,
- RTM_DELTFILTER);
+ RTM_DELTFILTER, false);
if (tcf_destroy(tp, false))
RCU_INIT_POINTER(*back, next);
}
goto errout;
case RTM_GETTFILTER:
err = tfilter_notify(net, skb, n, tp, fh,
- RTM_NEWTFILTER);
+ RTM_NEWTFILTER, true);
goto errout;
default:
err = -EINVAL;
@@ -367,7 +368,7 @@ replay:
RCU_INIT_POINTER(tp->next, rtnl_dereference(*back));
rcu_assign_pointer(*back, tp);
}
- tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER);
+ tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER, false);
} else {
if (tp_created)
tcf_destroy(tp, true);
@@ -419,7 +420,7 @@ nla_put_failure:
static int tfilter_notify(struct net *net, struct sk_buff *oskb,
struct nlmsghdr *n, struct tcf_proto *tp,
- unsigned long fh, int event)
+ unsigned long fh, int event, bool unicast)
{
struct sk_buff *skb;
u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
@@ -433,6 +434,9 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb,
return -EINVAL;
}
+ if (unicast)
+ return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
+
return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
n->nlmsg_flags & NLM_F_ECHO);
}
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index a309a07ccb35..41c80b6c3906 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -176,11 +176,12 @@ META_COLLECTOR(int_vlan_tag)
{
unsigned short tag;
- tag = skb_vlan_tag_get(skb);
- if (!tag && __vlan_get_tag(skb, &tag))
- *err = -1;
- else
+ if (skb_vlan_tag_present(skb))
+ dst->value = skb_vlan_tag_get(skb);
+ else if (!__vlan_get_tag(skb, &tag))
dst->value = tag;
+ else
+ *err = -1;
}
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index c798d0de8a9d..9926fe4f3b6f 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1145,7 +1145,7 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
d, NULL, &cl->bstats) < 0 ||
- gnet_stats_copy_rate_est(d, NULL, &cl->rate_est) < 0 ||
+ gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 ||
gnet_stats_copy_queue(d, NULL, &qs, qlen) < 0)
return -1;
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index f27ffee106f6..ca0516e6f743 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -1153,6 +1153,7 @@ static struct sk_buff *qfq_dequeue(struct Qdisc *sch)
if (!skb)
return NULL;
+ qdisc_qstats_backlog_dec(sch, skb);
sch->q.qlen--;
qdisc_bstats_update(sch, skb);
@@ -1256,6 +1257,7 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
}
bstats_update(&cl->bstats, skb);
+ qdisc_qstats_backlog_inc(sch, skb);
++sch->q.qlen;
agg = cl->agg;
@@ -1476,6 +1478,7 @@ static void qfq_reset_qdisc(struct Qdisc *sch)
qdisc_reset(cl->qdisc);
}
}
+ sch->qstats.backlog = 0;
sch->q.qlen = 0;
}
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index add3cc7d37ec..20a350bd1b1d 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -400,6 +400,7 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch,
enqueue:
ret = qdisc_enqueue(skb, child, to_free);
if (likely(ret == NET_XMIT_SUCCESS)) {
+ qdisc_qstats_backlog_inc(sch, skb);
sch->q.qlen++;
increment_qlen(skb, q);
} else if (net_xmit_drop_count(ret)) {
@@ -428,6 +429,7 @@ static struct sk_buff *sfb_dequeue(struct Qdisc *sch)
if (skb) {
qdisc_bstats_update(sch, skb);
+ qdisc_qstats_backlog_dec(sch, skb);
sch->q.qlen--;
decrement_qlen(skb, q);
}
@@ -450,6 +452,7 @@ static void sfb_reset(struct Qdisc *sch)
struct sfb_sched_data *q = qdisc_priv(sch);
qdisc_reset(q->qdisc);
+ sch->qstats.backlog = 0;
sch->q.qlen = 0;
q->slot = 0;
q->double_buffering = false;
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 2cd9b4478b92..b0196366d58d 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -418,9 +418,6 @@ static int teql_master_mtu(struct net_device *dev, int new_mtu)
struct teql_master *m = netdev_priv(dev);
struct Qdisc *q;
- if (new_mtu < 68)
- return -EINVAL;
-
q = m->slaves;
if (q) {
do {
@@ -460,6 +457,8 @@ static __init void teql_master_setup(struct net_device *dev)
dev->netdev_ops = &teql_netdev_ops;
dev->type = ARPHRD_VOID;
dev->mtu = 1500;
+ dev->min_mtu = 68;
+ dev->max_mtu = 65535;
dev->tx_queue_len = 100;
dev->flags = IFF_NOARP;
dev->hard_header_len = LL_MAX_HEADER;
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index 8afe2e90d003..615f0ddd41df 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -52,7 +52,6 @@ static void sctp_datamsg_init(struct sctp_datamsg *msg)
atomic_set(&msg->refcnt, 1);
msg->send_failed = 0;
msg->send_error = 0;
- msg->can_abandon = 0;
msg->can_delay = 1;
msg->expires_at = 0;
INIT_LIST_HEAD(&msg->chunks);
@@ -182,15 +181,11 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
/* Note: Calculate this outside of the loop, so that all fragments
* have the same expiration.
*/
- if (sinfo->sinfo_timetolive) {
- /* sinfo_timetolive is in milliseconds */
+ if (asoc->peer.prsctp_capable && sinfo->sinfo_timetolive &&
+ (SCTP_PR_TTL_ENABLED(sinfo->sinfo_flags) ||
+ !SCTP_PR_POLICY(sinfo->sinfo_flags)))
msg->expires_at = jiffies +
msecs_to_jiffies(sinfo->sinfo_timetolive);
- msg->can_abandon = 1;
-
- pr_debug("%s: msg:%p expires_at:%ld jiffies:%ld\n", __func__,
- msg, msg->expires_at, jiffies);
- }
/* This is the biggest possible DATA chunk that can fit into
* the packet
@@ -349,30 +344,24 @@ errout:
/* Check whether this message has expired. */
int sctp_chunk_abandoned(struct sctp_chunk *chunk)
{
- if (!chunk->asoc->prsctp_enable ||
- !SCTP_PR_POLICY(chunk->sinfo.sinfo_flags)) {
- struct sctp_datamsg *msg = chunk->msg;
-
- if (!msg->can_abandon)
- return 0;
-
- if (time_after(jiffies, msg->expires_at))
- return 1;
-
+ if (!chunk->asoc->peer.prsctp_capable)
return 0;
- }
if (SCTP_PR_TTL_ENABLED(chunk->sinfo.sinfo_flags) &&
- time_after(jiffies, chunk->prsctp_param)) {
+ time_after(jiffies, chunk->msg->expires_at)) {
if (chunk->sent_count)
chunk->asoc->abandoned_sent[SCTP_PR_INDEX(TTL)]++;
else
chunk->asoc->abandoned_unsent[SCTP_PR_INDEX(TTL)]++;
return 1;
} else if (SCTP_PR_RTX_ENABLED(chunk->sinfo.sinfo_flags) &&
- chunk->sent_count > chunk->prsctp_param) {
+ chunk->sent_count > chunk->sinfo.sinfo_timetolive) {
chunk->asoc->abandoned_sent[SCTP_PR_INDEX(RTX)]++;
return 1;
+ } else if (!SCTP_PR_POLICY(chunk->sinfo.sinfo_flags) &&
+ chunk->msg->expires_at &&
+ time_after(jiffies, chunk->msg->expires_at)) {
+ return 1;
}
/* PRIO policy is processed by sendmsg, not here */
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 2a5c1896d18f..4282b488985b 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -552,7 +552,8 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
* for a given destination transport address.
*/
- if (!chunk->resent && !tp->rto_pending) {
+ if (!sctp_chunk_retransmitted(chunk) &&
+ !tp->rto_pending) {
chunk->rtt_in_progress = 1;
tp->rto_pending = 1;
}
@@ -865,9 +866,6 @@ static void sctp_packet_append_data(struct sctp_packet *packet,
rwnd = 0;
asoc->peer.rwnd = rwnd;
- /* Has been accepted for transmission. */
- if (!asoc->peer.prsctp_capable)
- chunk->msg->can_abandon = 0;
sctp_chunk_assign_tsn(chunk);
sctp_chunk_assign_ssn(chunk);
}
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 3ec6da8bbb53..e54082699520 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -304,7 +304,7 @@ void sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk, gfp_t gfp)
"illegal chunk");
sctp_outq_tail_data(q, chunk);
- if (chunk->asoc->prsctp_enable &&
+ if (chunk->asoc->peer.prsctp_capable &&
SCTP_PR_PRIO_ENABLED(chunk->sinfo.sinfo_flags))
chunk->asoc->sent_cnt_removable++;
if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED)
@@ -354,7 +354,7 @@ static int sctp_prsctp_prune_sent(struct sctp_association *asoc,
list_for_each_entry_safe(chk, temp, queue, transmitted_list) {
if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) ||
- chk->prsctp_param <= sinfo->sinfo_timetolive)
+ chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive)
continue;
list_del_init(&chk->transmitted_list);
@@ -389,7 +389,7 @@ static int sctp_prsctp_prune_unsent(struct sctp_association *asoc,
list_for_each_entry_safe(chk, temp, queue, list) {
if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) ||
- chk->prsctp_param <= sinfo->sinfo_timetolive)
+ chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive)
continue;
list_del_init(&chk->list);
@@ -413,7 +413,7 @@ void sctp_prsctp_prune(struct sctp_association *asoc,
{
struct sctp_transport *transport;
- if (!asoc->prsctp_enable || !asoc->sent_cnt_removable)
+ if (!asoc->peer.prsctp_capable || !asoc->sent_cnt_removable)
return;
msg_len = sctp_prsctp_prune_sent(asoc, sinfo,
@@ -507,8 +507,6 @@ void sctp_retransmit_mark(struct sctp_outq *q,
transport->rto_pending = 0;
}
- chunk->resent = 1;
-
/* Move the chunk to the retransmit queue. The chunks
* on the retransmit queue are always kept in order.
*/
@@ -1026,7 +1024,7 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp)
/* Mark as failed send. */
sctp_chunk_fail(chunk, SCTP_ERROR_INV_STRM);
- if (asoc->prsctp_enable &&
+ if (asoc->peer.prsctp_capable &&
SCTP_PR_PRIO_ENABLED(chunk->sinfo.sinfo_flags))
asoc->sent_cnt_removable--;
sctp_chunk_free(chunk);
@@ -1319,7 +1317,7 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_chunk *chunk)
tsn = ntohl(tchunk->subh.data_hdr->tsn);
if (TSN_lte(tsn, ctsn)) {
list_del_init(&tchunk->transmitted_list);
- if (asoc->prsctp_enable &&
+ if (asoc->peer.prsctp_capable &&
SCTP_PR_PRIO_ENABLED(chunk->sinfo.sinfo_flags))
asoc->sent_cnt_removable--;
sctp_chunk_free(tchunk);
@@ -1439,7 +1437,7 @@ static void sctp_check_transmitted(struct sctp_outq *q,
* instance).
*/
if (!tchunk->tsn_gap_acked &&
- !tchunk->resent &&
+ !sctp_chunk_retransmitted(tchunk) &&
tchunk->rtt_in_progress) {
tchunk->rtt_in_progress = 0;
rtt = jiffies - tchunk->sent_at;
diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index ef8ba77a5bea..206377fe91ec 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -73,13 +73,17 @@ static const struct snmp_mib sctp_snmp_list[] = {
/* Display sctp snmp mib statistics(/proc/net/sctp/snmp). */
static int sctp_snmp_seq_show(struct seq_file *seq, void *v)
{
+ unsigned long buff[SCTP_MIB_MAX];
struct net *net = seq->private;
int i;
- for (i = 0; sctp_snmp_list[i].name != NULL; i++)
+ memset(buff, 0, sizeof(unsigned long) * SCTP_MIB_MAX);
+
+ snmp_get_cpu_field_batch(buff, sctp_snmp_list,
+ net->sctp.sctp_statistics);
+ for (i = 0; sctp_snmp_list[i].name; i++)
seq_printf(seq, "%-32s\t%ld\n", sctp_snmp_list[i].name,
- snmp_fold_field(net->sctp.sctp_statistics,
- sctp_snmp_list[i].entry));
+ buff[i]);
return 0;
}
diff --git a/net/sctp/sctp_diag.c b/net/sctp/sctp_diag.c
index 807158e32f5f..048954eee984 100644
--- a/net/sctp/sctp_diag.c
+++ b/net/sctp/sctp_diag.c
@@ -276,28 +276,17 @@ out:
return err;
}
-static int sctp_tsp_dump(struct sctp_transport *tsp, void *p)
+static int sctp_sock_dump(struct sock *sk, void *p)
{
- struct sctp_endpoint *ep = tsp->asoc->ep;
+ struct sctp_endpoint *ep = sctp_sk(sk)->ep;
struct sctp_comm_param *commp = p;
- struct sock *sk = ep->base.sk;
struct sk_buff *skb = commp->skb;
struct netlink_callback *cb = commp->cb;
const struct inet_diag_req_v2 *r = commp->r;
- struct sctp_association *assoc =
- list_entry(ep->asocs.next, struct sctp_association, asocs);
+ struct sctp_association *assoc;
int err = 0;
- /* find the ep only once through the transports by this condition */
- if (tsp->asoc != assoc)
- goto out;
-
- if (r->sdiag_family != AF_UNSPEC && sk->sk_family != r->sdiag_family)
- goto out;
-
lock_sock(sk);
- if (sk != assoc->base.sk)
- goto release;
list_for_each_entry(assoc, &ep->asocs, asocs) {
if (cb->args[4] < cb->args[1])
goto next;
@@ -317,7 +306,7 @@ static int sctp_tsp_dump(struct sctp_transport *tsp, void *p)
NLM_F_MULTI, cb->nlh,
commp->net_admin) < 0) {
cb->args[3] = 1;
- err = 2;
+ err = 1;
goto release;
}
cb->args[3] = 1;
@@ -327,7 +316,7 @@ static int sctp_tsp_dump(struct sctp_transport *tsp, void *p)
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, 0, cb->nlh,
commp->net_admin) < 0) {
- err = 2;
+ err = 1;
goto release;
}
next:
@@ -339,10 +328,35 @@ next:
cb->args[4] = 0;
release:
release_sock(sk);
+ sock_put(sk);
return err;
+}
+
+static int sctp_get_sock(struct sctp_transport *tsp, void *p)
+{
+ struct sctp_endpoint *ep = tsp->asoc->ep;
+ struct sctp_comm_param *commp = p;
+ struct sock *sk = ep->base.sk;
+ struct netlink_callback *cb = commp->cb;
+ const struct inet_diag_req_v2 *r = commp->r;
+ struct sctp_association *assoc =
+ list_entry(ep->asocs.next, struct sctp_association, asocs);
+
+ /* find the ep only once through the transports by this condition */
+ if (tsp->asoc != assoc)
+ goto out;
+
+ if (r->sdiag_family != AF_UNSPEC && sk->sk_family != r->sdiag_family)
+ goto out;
+
+ sock_hold(sk);
+ cb->args[5] = (long)sk;
+
+ return 1;
+
out:
cb->args[2]++;
- return err;
+ return 0;
}
static int sctp_ep_dump(struct sctp_endpoint *ep, void *p)
@@ -480,10 +494,18 @@ skip:
* 2 : to record the transport pos of this time's traversal
* 3 : to mark if we have dumped the ep info of the current asoc
* 4 : to work as a temporary variable to traversal list
+ * 5 : to save the sk we get from travelsing the tsp list.
*/
if (!(idiag_states & ~(TCPF_LISTEN | TCPF_CLOSE)))
goto done;
- sctp_for_each_transport(sctp_tsp_dump, net, cb->args[2], &commp);
+
+next:
+ cb->args[5] = 0;
+ sctp_for_each_transport(sctp_get_sock, net, cb->args[2], &commp);
+
+ if (cb->args[5] && !sctp_sock_dump((struct sock *)cb->args[5], &commp))
+ goto next;
+
done:
cb->args[1] = cb->args[4];
cb->args[4] = 0;
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 79dd66079dd7..9e9690b7afe1 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -706,20 +706,6 @@ nodata:
return retval;
}
-static void sctp_set_prsctp_policy(struct sctp_chunk *chunk,
- const struct sctp_sndrcvinfo *sinfo)
-{
- if (!chunk->asoc->prsctp_enable)
- return;
-
- if (SCTP_PR_TTL_ENABLED(sinfo->sinfo_flags))
- chunk->prsctp_param =
- jiffies + msecs_to_jiffies(sinfo->sinfo_timetolive);
- else if (SCTP_PR_RTX_ENABLED(sinfo->sinfo_flags) ||
- SCTP_PR_PRIO_ENABLED(sinfo->sinfo_flags))
- chunk->prsctp_param = sinfo->sinfo_timetolive;
-}
-
/* Make a DATA chunk for the given association from the provided
* parameters. However, do not populate the data payload.
*/
@@ -753,7 +739,6 @@ struct sctp_chunk *sctp_make_datafrag_empty(struct sctp_association *asoc,
retval->subh.data_hdr = sctp_addto_chunk(retval, sizeof(dp), &dp);
memcpy(&retval->sinfo, sinfo, sizeof(struct sctp_sndrcvinfo));
- sctp_set_prsctp_policy(retval, sinfo);
nodata:
return retval;
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 6cdc61c21438..fb02c7033307 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -4473,17 +4473,21 @@ int sctp_transport_lookup_process(int (*cb)(struct sctp_transport *, void *),
const union sctp_addr *paddr, void *p)
{
struct sctp_transport *transport;
- int err = 0;
+ int err = -ENOENT;
rcu_read_lock();
transport = sctp_addrs_lookup_transport(net, laddr, paddr);
if (!transport || !sctp_transport_hold(transport))
goto out;
- err = cb(transport, p);
+
+ sctp_association_hold(transport->asoc);
sctp_transport_put(transport);
-out:
rcu_read_unlock();
+ err = cb(transport, p);
+ sctp_association_put(transport->asoc);
+
+out:
return err;
}
EXPORT_SYMBOL_GPL(sctp_transport_lookup_process);
diff --git a/net/socket.c b/net/socket.c
index a1bd16106625..5a9bf5ee2464 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -320,11 +320,38 @@ static const struct dentry_operations sockfs_dentry_operations = {
.d_dname = sockfs_dname,
};
+static int sockfs_xattr_get(const struct xattr_handler *handler,
+ struct dentry *dentry, struct inode *inode,
+ const char *suffix, void *value, size_t size)
+{
+ if (value) {
+ if (dentry->d_name.len + 1 > size)
+ return -ERANGE;
+ memcpy(value, dentry->d_name.name, dentry->d_name.len + 1);
+ }
+ return dentry->d_name.len + 1;
+}
+
+#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
+#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
+#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
+
+static const struct xattr_handler sockfs_xattr_handler = {
+ .name = XATTR_NAME_SOCKPROTONAME,
+ .get = sockfs_xattr_get,
+};
+
+static const struct xattr_handler *sockfs_xattr_handlers[] = {
+ &sockfs_xattr_handler,
+ NULL
+};
+
static struct dentry *sockfs_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data)
{
- return mount_pseudo(fs_type, "socket:", &sockfs_ops,
- &sockfs_dentry_operations, SOCKFS_MAGIC);
+ return mount_pseudo_xattr(fs_type, "socket:", &sockfs_ops,
+ sockfs_xattr_handlers,
+ &sockfs_dentry_operations, SOCKFS_MAGIC);
}
static struct vfsmount *sock_mnt __read_mostly;
@@ -463,35 +490,6 @@ static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
return NULL;
}
-#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
-#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
-#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
-static ssize_t sockfs_getxattr(struct dentry *dentry, struct inode *inode,
- const char *name, void *value, size_t size)
-{
- const char *proto_name;
- size_t proto_size;
- int error;
-
- error = -ENODATA;
- if (!strncmp(name, XATTR_NAME_SOCKPROTONAME, XATTR_NAME_SOCKPROTONAME_LEN)) {
- proto_name = dentry->d_name.name;
- proto_size = strlen(proto_name);
-
- if (value) {
- error = -ERANGE;
- if (proto_size + 1 > size)
- goto out;
-
- strncpy(value, proto_name, proto_size + 1);
- }
- error = proto_size + 1;
- }
-
-out:
- return error;
-}
-
static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
size_t size)
{
@@ -521,7 +519,6 @@ static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
}
static const struct inode_operations sockfs_inode_ops = {
- .getxattr = sockfs_getxattr,
.listxattr = sockfs_listxattr,
};
diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c
index 5c7549b5b92c..41adf362936d 100644
--- a/net/strparser/strparser.c
+++ b/net/strparser/strparser.c
@@ -246,7 +246,7 @@ static int strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
} else {
strp->rx_interrupted = 1;
}
- strp_parser_err(strp, err, desc);
+ strp_parser_err(strp, len, desc);
break;
} else if (len > strp->sk->sk_rcvbuf) {
/* Message length exceeds maximum allowed */
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index a7e42f9a405c..2bff63a73cf8 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -551,7 +551,7 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred,
*entry, *new;
unsigned int nr;
- nr = hash_long(from_kuid(&init_user_ns, acred->uid), cache->hashbits);
+ nr = auth->au_ops->hash_cred(acred, cache->hashbits);
rcu_read_lock();
hlist_for_each_entry_rcu(entry, &cache->hashtable[nr], cr_hash) {
diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c
index 168219535a34..f1df9837f1ac 100644
--- a/net/sunrpc/auth_generic.c
+++ b/net/sunrpc/auth_generic.c
@@ -78,6 +78,14 @@ static struct rpc_cred *generic_bind_cred(struct rpc_task *task,
return auth->au_ops->lookup_cred(auth, acred, lookupflags);
}
+static int
+generic_hash_cred(struct auth_cred *acred, unsigned int hashbits)
+{
+ return hash_64(from_kgid(&init_user_ns, acred->gid) |
+ ((u64)from_kuid(&init_user_ns, acred->uid) <<
+ (sizeof(gid_t) * 8)), hashbits);
+}
+
/*
* Lookup generic creds for current process
*/
@@ -176,8 +184,8 @@ generic_match(struct auth_cred *acred, struct rpc_cred *cred, int flags)
if (gcred->acred.group_info->ngroups != acred->group_info->ngroups)
goto out_nomatch;
for (i = 0; i < gcred->acred.group_info->ngroups; i++) {
- if (!gid_eq(GROUP_AT(gcred->acred.group_info, i),
- GROUP_AT(acred->group_info, i)))
+ if (!gid_eq(gcred->acred.group_info->gid[i],
+ acred->group_info->gid[i]))
goto out_nomatch;
}
out_match:
@@ -258,6 +266,7 @@ generic_key_timeout(struct rpc_auth *auth, struct rpc_cred *cred)
static const struct rpc_authops generic_auth_ops = {
.owner = THIS_MODULE,
.au_name = "Generic",
+ .hash_cred = generic_hash_cred,
.lookup_cred = generic_lookup_cred,
.crcreate = generic_create_cred,
.key_timeout = generic_key_timeout,
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 976c7812bbd5..d8bd97a5a7c9 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -1298,6 +1298,12 @@ gss_destroy_cred(struct rpc_cred *cred)
gss_destroy_nullcred(cred);
}
+static int
+gss_hash_cred(struct auth_cred *acred, unsigned int hashbits)
+{
+ return hash_64(from_kuid(&init_user_ns, acred->uid), hashbits);
+}
+
/*
* Lookup RPCSEC_GSS cred for the current process
*/
@@ -1982,6 +1988,7 @@ static const struct rpc_authops authgss_ops = {
.au_name = "RPCSEC_GSS",
.create = gss_create,
.destroy = gss_destroy,
+ .hash_cred = gss_hash_cred,
.lookup_cred = gss_lookup_cred,
.crcreate = gss_create_cred,
.list_pseudoflavors = gss_mech_list_pseudoflavors,
diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.c b/net/sunrpc/auth_gss/gss_rpc_xdr.c
index eeeba5adee6d..dc6fb79a361f 100644
--- a/net/sunrpc/auth_gss/gss_rpc_xdr.c
+++ b/net/sunrpc/auth_gss/gss_rpc_xdr.c
@@ -229,7 +229,7 @@ static int gssx_dec_linux_creds(struct xdr_stream *xdr,
kgid = make_kgid(&init_user_ns, tmp);
if (!gid_valid(kgid))
goto out_free_groups;
- GROUP_AT(creds->cr_group_info, i) = kgid;
+ creds->cr_group_info->gid[i] = kgid;
}
return 0;
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index d8582028b346..d67f7e1bc82d 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -479,7 +479,7 @@ static int rsc_parse(struct cache_detail *cd,
kgid = make_kgid(&init_user_ns, id);
if (!gid_valid(kgid))
goto out;
- GROUP_AT(rsci.cred.cr_group_info, i) = kgid;
+ rsci.cred.cr_group_info->gid[i] = kgid;
}
/* mech name */
diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c
index a99278c984e8..306fc0f54596 100644
--- a/net/sunrpc/auth_unix.c
+++ b/net/sunrpc/auth_unix.c
@@ -46,6 +46,14 @@ unx_destroy(struct rpc_auth *auth)
rpcauth_clear_credcache(auth->au_credcache);
}
+static int
+unx_hash_cred(struct auth_cred *acred, unsigned int hashbits)
+{
+ return hash_64(from_kgid(&init_user_ns, acred->gid) |
+ ((u64)from_kuid(&init_user_ns, acred->uid) <<
+ (sizeof(gid_t) * 8)), hashbits);
+}
+
/*
* Lookup AUTH_UNIX creds for current process
*/
@@ -79,7 +87,7 @@ unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags, gfp_t
cred->uc_gid = acred->gid;
for (i = 0; i < groups; i++)
- cred->uc_gids[i] = GROUP_AT(acred->group_info, i);
+ cred->uc_gids[i] = acred->group_info->gid[i];
if (i < NFS_NGROUPS)
cred->uc_gids[i] = INVALID_GID;
@@ -127,7 +135,7 @@ unx_match(struct auth_cred *acred, struct rpc_cred *rcred, int flags)
if (groups > NFS_NGROUPS)
groups = NFS_NGROUPS;
for (i = 0; i < groups ; i++)
- if (!gid_eq(cred->uc_gids[i], GROUP_AT(acred->group_info, i)))
+ if (!gid_eq(cred->uc_gids[i], acred->group_info->gid[i]))
return 0;
if (groups < NFS_NGROUPS && gid_valid(cred->uc_gids[groups]))
return 0;
@@ -220,6 +228,7 @@ const struct rpc_authops authunix_ops = {
.au_name = "UNIX",
.create = unx_create,
.destroy = unx_destroy,
+ .hash_cred = unx_hash_cred,
.lookup_cred = unx_lookup_cred,
.crcreate = unx_create_cred,
};
diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c
index 229956bf8457..ac701c28f44f 100644
--- a/net/sunrpc/backchannel_rqst.c
+++ b/net/sunrpc/backchannel_rqst.c
@@ -76,13 +76,7 @@ static int xprt_alloc_xdr_buf(struct xdr_buf *buf, gfp_t gfp_flags)
page = alloc_page(gfp_flags);
if (page == NULL)
return -ENOMEM;
- buf->head[0].iov_base = page_address(page);
- buf->head[0].iov_len = PAGE_SIZE;
- buf->tail[0].iov_base = NULL;
- buf->tail[0].iov_len = 0;
- buf->page_len = 0;
- buf->len = 0;
- buf->buflen = PAGE_SIZE;
+ xdr_buf_init(buf, page_address(page), PAGE_SIZE);
return 0;
}
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 4d8e11f94a35..8aabe12201f8 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -353,7 +353,7 @@ void sunrpc_init_cache_detail(struct cache_detail *cd)
spin_unlock(&cache_list_lock);
/* start the cleaning process */
- schedule_delayed_work(&cache_cleaner, 0);
+ queue_delayed_work(system_power_efficient_wq, &cache_cleaner, 0);
}
EXPORT_SYMBOL_GPL(sunrpc_init_cache_detail);
@@ -476,7 +476,8 @@ static void do_cache_clean(struct work_struct *work)
delay = 0;
if (delay)
- schedule_delayed_work(&cache_cleaner, delay);
+ queue_delayed_work(system_power_efficient_wq,
+ &cache_cleaner, delay);
}
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 66f23b376fa0..34dd7b26ee5f 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -184,7 +184,6 @@ static int __rpc_clnt_handle_event(struct rpc_clnt *clnt, unsigned long event,
struct super_block *sb)
{
struct dentry *dentry;
- int err = 0;
switch (event) {
case RPC_PIPEFS_MOUNT:
@@ -201,7 +200,7 @@ static int __rpc_clnt_handle_event(struct rpc_clnt *clnt, unsigned long event,
printk(KERN_ERR "%s: unknown event: %ld\n", __func__, event);
return -ENOTSUPP;
}
- return err;
+ return 0;
}
static int __rpc_pipefs_event(struct rpc_clnt *clnt, unsigned long event,
@@ -988,7 +987,6 @@ void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt)
{
if (clnt != NULL) {
- rpc_task_release_client(task);
if (task->tk_xprt == NULL)
task->tk_xprt = xprt_iter_get_next(&clnt->cl_xpi);
task->tk_client = clnt;
@@ -1693,6 +1691,7 @@ call_allocate(struct rpc_task *task)
struct rpc_rqst *req = task->tk_rqstp;
struct rpc_xprt *xprt = req->rq_xprt;
struct rpc_procinfo *proc = task->tk_msg.rpc_proc;
+ int status;
dprint_status(task);
@@ -1718,11 +1717,14 @@ call_allocate(struct rpc_task *task)
req->rq_rcvsize = RPC_REPHDRSIZE + slack + proc->p_replen;
req->rq_rcvsize <<= 2;
- req->rq_buffer = xprt->ops->buf_alloc(task,
- req->rq_callsize + req->rq_rcvsize);
- if (req->rq_buffer != NULL)
- return;
+ status = xprt->ops->buf_alloc(task);
xprt_inject_disconnect(xprt);
+ if (status == 0)
+ return;
+ if (status != -ENOMEM) {
+ rpc_exit(task, status);
+ return;
+ }
dprintk("RPC: %5u rpc_buffer allocation failed\n", task->tk_pid);
@@ -1748,18 +1750,6 @@ rpc_task_force_reencode(struct rpc_task *task)
task->tk_rqstp->rq_bytes_sent = 0;
}
-static inline void
-rpc_xdr_buf_init(struct xdr_buf *buf, void *start, size_t len)
-{
- buf->head[0].iov_base = start;
- buf->head[0].iov_len = len;
- buf->tail[0].iov_len = 0;
- buf->page_len = 0;
- buf->flags = 0;
- buf->len = 0;
- buf->buflen = len;
-}
-
/*
* 3. Encode arguments of an RPC call
*/
@@ -1772,12 +1762,12 @@ rpc_xdr_encode(struct rpc_task *task)
dprint_status(task);
- rpc_xdr_buf_init(&req->rq_snd_buf,
- req->rq_buffer,
- req->rq_callsize);
- rpc_xdr_buf_init(&req->rq_rcv_buf,
- (char *)req->rq_buffer + req->rq_callsize,
- req->rq_rcvsize);
+ xdr_buf_init(&req->rq_snd_buf,
+ req->rq_buffer,
+ req->rq_callsize);
+ xdr_buf_init(&req->rq_rcv_buf,
+ req->rq_rbuffer,
+ req->rq_rcvsize);
p = rpc_encode_header(task);
if (p == NULL) {
@@ -2616,6 +2606,70 @@ int rpc_clnt_test_and_add_xprt(struct rpc_clnt *clnt,
EXPORT_SYMBOL_GPL(rpc_clnt_test_and_add_xprt);
/**
+ * rpc_clnt_setup_test_and_add_xprt()
+ *
+ * This is an rpc_clnt_add_xprt setup() function which returns 1 so:
+ * 1) caller of the test function must dereference the rpc_xprt_switch
+ * and the rpc_xprt.
+ * 2) test function must call rpc_xprt_switch_add_xprt, usually in
+ * the rpc_call_done routine.
+ *
+ * Upon success (return of 1), the test function adds the new
+ * transport to the rpc_clnt xprt switch
+ *
+ * @clnt: struct rpc_clnt to get the new transport
+ * @xps: the rpc_xprt_switch to hold the new transport
+ * @xprt: the rpc_xprt to test
+ * @data: a struct rpc_add_xprt_test pointer that holds the test function
+ * and test function call data
+ */
+int rpc_clnt_setup_test_and_add_xprt(struct rpc_clnt *clnt,
+ struct rpc_xprt_switch *xps,
+ struct rpc_xprt *xprt,
+ void *data)
+{
+ struct rpc_cred *cred;
+ struct rpc_task *task;
+ struct rpc_add_xprt_test *xtest = (struct rpc_add_xprt_test *)data;
+ int status = -EADDRINUSE;
+
+ xprt = xprt_get(xprt);
+ xprt_switch_get(xps);
+
+ if (rpc_xprt_switch_has_addr(xps, (struct sockaddr *)&xprt->addr))
+ goto out_err;
+
+ /* Test the connection */
+ cred = authnull_ops.lookup_cred(NULL, NULL, 0);
+ task = rpc_call_null_helper(clnt, xprt, cred,
+ RPC_TASK_SOFT | RPC_TASK_SOFTCONN,
+ NULL, NULL);
+ put_rpccred(cred);
+ if (IS_ERR(task)) {
+ status = PTR_ERR(task);
+ goto out_err;
+ }
+ status = task->tk_status;
+ rpc_put_task(task);
+
+ if (status < 0)
+ goto out_err;
+
+ /* rpc_xprt_switch and rpc_xprt are deferrenced by add_xprt_test() */
+ xtest->add_xprt_test(clnt, xprt, xtest->data);
+
+ /* so that rpc_clnt_add_xprt does not call rpc_xprt_switch_add_xprt */
+ return 1;
+out_err:
+ xprt_put(xprt);
+ xprt_switch_put(xps);
+ pr_info("RPC: rpc_clnt_test_xprt failed: %d addr %s not added\n",
+ status, xprt->address_strings[RPC_DISPLAY_ADDR]);
+ return status;
+}
+EXPORT_SYMBOL_GPL(rpc_clnt_setup_test_and_add_xprt);
+
+/**
* rpc_clnt_add_xprt - Add a new transport to a rpc_clnt
* @clnt: pointer to struct rpc_clnt
* @xprtargs: pointer to struct xprt_create
@@ -2697,6 +2751,34 @@ rpc_cap_max_reconnect_timeout(struct rpc_clnt *clnt, unsigned long timeo)
}
EXPORT_SYMBOL_GPL(rpc_cap_max_reconnect_timeout);
+void rpc_clnt_xprt_switch_put(struct rpc_clnt *clnt)
+{
+ xprt_switch_put(rcu_dereference(clnt->cl_xpi.xpi_xpswitch));
+}
+EXPORT_SYMBOL_GPL(rpc_clnt_xprt_switch_put);
+
+void rpc_clnt_xprt_switch_add_xprt(struct rpc_clnt *clnt, struct rpc_xprt *xprt)
+{
+ rpc_xprt_switch_add_xprt(rcu_dereference(clnt->cl_xpi.xpi_xpswitch),
+ xprt);
+}
+EXPORT_SYMBOL_GPL(rpc_clnt_xprt_switch_add_xprt);
+
+bool rpc_clnt_xprt_switch_has_addr(struct rpc_clnt *clnt,
+ const struct sockaddr *sap)
+{
+ struct rpc_xprt_switch *xps;
+ bool ret;
+
+ xps = rcu_dereference(clnt->cl_xpi.xpi_xpswitch);
+
+ rcu_read_lock();
+ ret = rpc_xprt_switch_has_addr(xps, sap);
+ rcu_read_unlock();
+ return ret;
+}
+EXPORT_SYMBOL_GPL(rpc_clnt_xprt_switch_has_addr);
+
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
static void rpc_show_header(void)
{
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 84f98cbe31c3..61a504fb1ae2 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -477,7 +477,7 @@ rpc_get_inode(struct super_block *sb, umode_t mode)
return NULL;
inode->i_ino = get_next_ino();
inode->i_mode = mode;
- inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+ inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
switch (mode & S_IFMT) {
case S_IFDIR:
inode->i_fop = &simple_dir_operations;
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 9ae588511aaf..5db68b371db2 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -849,14 +849,17 @@ static void rpc_async_schedule(struct work_struct *work)
}
/**
- * rpc_malloc - allocate an RPC buffer
- * @task: RPC task that will use this buffer
- * @size: requested byte size
+ * rpc_malloc - allocate RPC buffer resources
+ * @task: RPC task
+ *
+ * A single memory region is allocated, which is split between the
+ * RPC call and RPC reply that this task is being used for. When
+ * this RPC is retired, the memory is released by calling rpc_free.
*
* To prevent rpciod from hanging, this allocator never sleeps,
- * returning NULL and suppressing warning if the request cannot be serviced
- * immediately.
- * The caller can arrange to sleep in a way that is safe for rpciod.
+ * returning -ENOMEM and suppressing warning if the request cannot
+ * be serviced immediately. The caller can arrange to sleep in a
+ * way that is safe for rpciod.
*
* Most requests are 'small' (under 2KiB) and can be serviced from a
* mempool, ensuring that NFS reads and writes can always proceed,
@@ -865,8 +868,10 @@ static void rpc_async_schedule(struct work_struct *work)
* In order to avoid memory starvation triggering more writebacks of
* NFS requests, we avoid using GFP_KERNEL.
*/
-void *rpc_malloc(struct rpc_task *task, size_t size)
+int rpc_malloc(struct rpc_task *task)
{
+ struct rpc_rqst *rqst = task->tk_rqstp;
+ size_t size = rqst->rq_callsize + rqst->rq_rcvsize;
struct rpc_buffer *buf;
gfp_t gfp = GFP_NOIO | __GFP_NOWARN;
@@ -880,28 +885,28 @@ void *rpc_malloc(struct rpc_task *task, size_t size)
buf = kmalloc(size, gfp);
if (!buf)
- return NULL;
+ return -ENOMEM;
buf->len = size;
dprintk("RPC: %5u allocated buffer of size %zu at %p\n",
task->tk_pid, size, buf);
- return &buf->data;
+ rqst->rq_buffer = buf->data;
+ rqst->rq_rbuffer = (char *)rqst->rq_buffer + rqst->rq_callsize;
+ return 0;
}
EXPORT_SYMBOL_GPL(rpc_malloc);
/**
- * rpc_free - free buffer allocated via rpc_malloc
- * @buffer: buffer to free
+ * rpc_free - free RPC buffer resources allocated via rpc_malloc
+ * @task: RPC task
*
*/
-void rpc_free(void *buffer)
+void rpc_free(struct rpc_task *task)
{
+ void *buffer = task->tk_rqstp->rq_buffer;
size_t size;
struct rpc_buffer *buf;
- if (!buffer)
- return;
-
buf = container_of(buffer, struct rpc_buffer, data);
size = buf->len;
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index c5b0cb4f4056..7c8070ec93c8 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -401,6 +401,21 @@ int svc_bind(struct svc_serv *serv, struct net *net)
}
EXPORT_SYMBOL_GPL(svc_bind);
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
+static void
+__svc_init_bc(struct svc_serv *serv)
+{
+ INIT_LIST_HEAD(&serv->sv_cb_list);
+ spin_lock_init(&serv->sv_cb_lock);
+ init_waitqueue_head(&serv->sv_cb_waitq);
+}
+#else
+static void
+__svc_init_bc(struct svc_serv *serv)
+{
+}
+#endif
+
/*
* Create an RPC service
*/
@@ -443,6 +458,8 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
init_timer(&serv->sv_temptimer);
spin_lock_init(&serv->sv_lock);
+ __svc_init_bc(serv);
+
serv->sv_nrpools = npools;
serv->sv_pools =
kcalloc(serv->sv_nrpools, sizeof(struct svc_pool),
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index dfacdc95b3f5..64af4f034de6 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -517,7 +517,7 @@ static int unix_gid_parse(struct cache_detail *cd,
kgid = make_kgid(&init_user_ns, gid);
if (!gid_valid(kgid))
goto out;
- GROUP_AT(ug.gi, i) = kgid;
+ ug.gi->gid[i] = kgid;
}
ugp = unix_gid_lookup(cd, uid);
@@ -564,7 +564,7 @@ static int unix_gid_show(struct seq_file *m,
seq_printf(m, "%u %d:", from_kuid_munged(user_ns, ug->uid), glen);
for (i = 0; i < glen; i++)
- seq_printf(m, " %d", from_kgid_munged(user_ns, GROUP_AT(ug->gi, i)));
+ seq_printf(m, " %d", from_kgid_munged(user_ns, ug->gi->gid[i]));
seq_printf(m, "\n");
return 0;
}
@@ -817,7 +817,7 @@ svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp)
return SVC_CLOSE;
for (i = 0; i < slen; i++) {
kgid_t kgid = make_kgid(&init_user_ns, svc_getnl(argv));
- GROUP_AT(cred->cr_group_info, i) = kgid;
+ cred->cr_group_info->gid[i] = kgid;
}
if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) {
*authp = rpc_autherr_badverf;
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 57625f64efd5..e2a55dc787e6 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -39,6 +39,7 @@
#include <net/checksum.h>
#include <net/ip.h>
#include <net/ipv6.h>
+#include <net/udp.h>
#include <net/tcp.h>
#include <net/tcp_states.h>
#include <asm/uaccess.h>
@@ -129,6 +130,18 @@ static void svc_release_skb(struct svc_rqst *rqstp)
}
}
+static void svc_release_udp_skb(struct svc_rqst *rqstp)
+{
+ struct sk_buff *skb = rqstp->rq_xprt_ctxt;
+
+ if (skb) {
+ rqstp->rq_xprt_ctxt = NULL;
+
+ dprintk("svc: service %p, releasing skb %p\n", rqstp, skb);
+ consume_skb(skb);
+ }
+}
+
union svc_pktinfo_u {
struct in_pktinfo pkti;
struct in6_pktinfo pkti6;
@@ -575,7 +588,7 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp)
goto out_free;
}
local_bh_enable();
- skb_free_datagram_locked(svsk->sk_sk, skb);
+ consume_skb(skb);
} else {
/* we can use it in-place */
rqstp->rq_arg.head[0].iov_base = skb->data;
@@ -602,8 +615,7 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp)
return len;
out_free:
- trace_kfree_skb(skb, svc_udp_recvfrom);
- skb_free_datagram_locked(svsk->sk_sk, skb);
+ kfree_skb(skb);
return 0;
}
@@ -660,7 +672,7 @@ static struct svc_xprt_ops svc_udp_ops = {
.xpo_create = svc_udp_create,
.xpo_recvfrom = svc_udp_recvfrom,
.xpo_sendto = svc_udp_sendto,
- .xpo_release_rqst = svc_release_skb,
+ .xpo_release_rqst = svc_release_udp_skb,
.xpo_detach = svc_sock_detach,
.xpo_free = svc_sock_free,
.xpo_prep_reply_hdr = svc_udp_prep_reply_hdr,
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index c4f3cc0c0775..7f1071e103ca 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -767,7 +767,7 @@ static void xdr_set_next_page(struct xdr_stream *xdr)
newbase -= xdr->buf->page_base;
if (xdr_set_page_base(xdr, newbase, PAGE_SIZE) < 0)
- xdr_set_iov(xdr, xdr->buf->tail, xdr->buf->len);
+ xdr_set_iov(xdr, xdr->buf->tail, xdr->nwords << 2);
}
static bool xdr_set_next_buffer(struct xdr_stream *xdr)
@@ -776,7 +776,7 @@ static bool xdr_set_next_buffer(struct xdr_stream *xdr)
xdr_set_next_page(xdr);
else if (xdr->iov == xdr->buf->head) {
if (xdr_set_page_base(xdr, 0, PAGE_SIZE) < 0)
- xdr_set_iov(xdr, xdr->buf->tail, xdr->buf->len);
+ xdr_set_iov(xdr, xdr->buf->tail, xdr->nwords << 2);
}
return xdr->p != xdr->end;
}
@@ -859,12 +859,15 @@ EXPORT_SYMBOL_GPL(xdr_set_scratch_buffer);
static __be32 *xdr_copy_to_scratch(struct xdr_stream *xdr, size_t nbytes)
{
__be32 *p;
- void *cpdest = xdr->scratch.iov_base;
+ char *cpdest = xdr->scratch.iov_base;
size_t cplen = (char *)xdr->end - (char *)xdr->p;
if (nbytes > xdr->scratch.iov_len)
return NULL;
- memcpy(cpdest, xdr->p, cplen);
+ p = __xdr_inline_decode(xdr, cplen);
+ if (p == NULL)
+ return NULL;
+ memcpy(cpdest, p, cplen);
cpdest += cplen;
nbytes -= cplen;
if (!xdr_set_next_buffer(xdr))
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index ea244b29138b..685e6d225414 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -1295,7 +1295,7 @@ void xprt_release(struct rpc_task *task)
xprt_schedule_autodisconnect(xprt);
spin_unlock_bh(&xprt->transport_lock);
if (req->rq_buffer)
- xprt->ops->buf_free(req->rq_buffer);
+ xprt->ops->buf_free(task);
xprt_inject_disconnect(xprt);
if (req->rq_cred != NULL)
put_rpccred(req->rq_cred);
diff --git a/net/sunrpc/xprtmultipath.c b/net/sunrpc/xprtmultipath.c
index 66c9d63f4797..ae92a9e9ba52 100644
--- a/net/sunrpc/xprtmultipath.c
+++ b/net/sunrpc/xprtmultipath.c
@@ -15,6 +15,7 @@
#include <asm/cmpxchg.h>
#include <linux/spinlock.h>
#include <linux/sunrpc/xprt.h>
+#include <linux/sunrpc/addr.h>
#include <linux/sunrpc/xprtmultipath.h>
typedef struct rpc_xprt *(*xprt_switch_find_xprt_t)(struct list_head *head,
@@ -49,7 +50,8 @@ void rpc_xprt_switch_add_xprt(struct rpc_xprt_switch *xps,
if (xprt == NULL)
return;
spin_lock(&xps->xps_lock);
- if (xps->xps_net == xprt->xprt_net || xps->xps_net == NULL)
+ if ((xps->xps_net == xprt->xprt_net || xps->xps_net == NULL) &&
+ !rpc_xprt_switch_has_addr(xps, (struct sockaddr *)&xprt->addr))
xprt_switch_add_xprt_locked(xps, xprt);
spin_unlock(&xps->xps_lock);
}
@@ -232,6 +234,26 @@ struct rpc_xprt *xprt_iter_current_entry(struct rpc_xprt_iter *xpi)
return xprt_switch_find_current_entry(head, xpi->xpi_cursor);
}
+bool rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps,
+ const struct sockaddr *sap)
+{
+ struct list_head *head;
+ struct rpc_xprt *pos;
+
+ if (xps == NULL || sap == NULL)
+ return false;
+
+ head = &xps->xps_xprt_list;
+ list_for_each_entry_rcu(pos, head, xprt_switch) {
+ if (rpc_cmp_addr_port(sap, (struct sockaddr *)&pos->addr)) {
+ pr_info("RPC: addr %s already in xprt switch\n",
+ pos->address_strings[RPC_DISPLAY_ADDR]);
+ return true;
+ }
+ }
+ return false;
+}
+
static
struct rpc_xprt *xprt_switch_find_next_entry(struct list_head *head,
const struct rpc_xprt *cur)
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index 87762d976b63..2c472e1b4827 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -27,7 +27,7 @@ static void rpcrdma_bc_free_rqst(struct rpcrdma_xprt *r_xprt,
list_del(&req->rl_all);
spin_unlock(&buf->rb_reqslock);
- rpcrdma_destroy_req(&r_xprt->rx_ia, req);
+ rpcrdma_destroy_req(req);
kfree(rqst);
}
@@ -35,10 +35,8 @@ static void rpcrdma_bc_free_rqst(struct rpcrdma_xprt *r_xprt,
static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt,
struct rpc_rqst *rqst)
{
- struct rpcrdma_ia *ia = &r_xprt->rx_ia;
struct rpcrdma_regbuf *rb;
struct rpcrdma_req *req;
- struct xdr_buf *buf;
size_t size;
req = rpcrdma_create_req(r_xprt);
@@ -46,30 +44,19 @@ static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt,
return PTR_ERR(req);
req->rl_backchannel = true;
- size = RPCRDMA_INLINE_WRITE_THRESHOLD(rqst);
- rb = rpcrdma_alloc_regbuf(ia, size, GFP_KERNEL);
+ rb = rpcrdma_alloc_regbuf(RPCRDMA_HDRBUF_SIZE,
+ DMA_TO_DEVICE, GFP_KERNEL);
if (IS_ERR(rb))
goto out_fail;
req->rl_rdmabuf = rb;
- size += RPCRDMA_INLINE_READ_THRESHOLD(rqst);
- rb = rpcrdma_alloc_regbuf(ia, size, GFP_KERNEL);
+ size = r_xprt->rx_data.inline_rsize;
+ rb = rpcrdma_alloc_regbuf(size, DMA_TO_DEVICE, GFP_KERNEL);
if (IS_ERR(rb))
goto out_fail;
- rb->rg_owner = req;
req->rl_sendbuf = rb;
- /* so that rpcr_to_rdmar works when receiving a request */
- rqst->rq_buffer = (void *)req->rl_sendbuf->rg_base;
-
- buf = &rqst->rq_snd_buf;
- buf->head[0].iov_base = rqst->rq_buffer;
- buf->head[0].iov_len = 0;
- buf->tail[0].iov_base = NULL;
- buf->tail[0].iov_len = 0;
- buf->page_len = 0;
- buf->len = 0;
- buf->buflen = size;
-
+ xdr_buf_init(&rqst->rq_snd_buf, rb->rg_base, size);
+ rpcrdma_set_xprtdata(rqst, req);
return 0;
out_fail:
@@ -219,7 +206,6 @@ int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst)
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
struct rpcrdma_msg *headerp;
- size_t rpclen;
headerp = rdmab_to_msg(req->rl_rdmabuf);
headerp->rm_xid = rqst->rq_xid;
@@ -231,26 +217,9 @@ int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst)
headerp->rm_body.rm_chunks[1] = xdr_zero;
headerp->rm_body.rm_chunks[2] = xdr_zero;
- rpclen = rqst->rq_svec[0].iov_len;
-
-#ifdef RPCRDMA_BACKCHANNEL_DEBUG
- pr_info("RPC: %s: rpclen %zd headerp 0x%p lkey 0x%x\n",
- __func__, rpclen, headerp, rdmab_lkey(req->rl_rdmabuf));
- pr_info("RPC: %s: RPC/RDMA: %*ph\n",
- __func__, (int)RPCRDMA_HDRLEN_MIN, headerp);
- pr_info("RPC: %s: RPC: %*ph\n",
- __func__, (int)rpclen, rqst->rq_svec[0].iov_base);
-#endif
-
- req->rl_send_iov[0].addr = rdmab_addr(req->rl_rdmabuf);
- req->rl_send_iov[0].length = RPCRDMA_HDRLEN_MIN;
- req->rl_send_iov[0].lkey = rdmab_lkey(req->rl_rdmabuf);
-
- req->rl_send_iov[1].addr = rdmab_addr(req->rl_sendbuf);
- req->rl_send_iov[1].length = rpclen;
- req->rl_send_iov[1].lkey = rdmab_lkey(req->rl_sendbuf);
-
- req->rl_niovs = 2;
+ if (!rpcrdma_prepare_send_sges(&r_xprt->rx_ia, req, RPCRDMA_HDRLEN_MIN,
+ &rqst->rq_snd_buf, rpcrdma_noch))
+ return -EIO;
return 0;
}
@@ -402,7 +371,7 @@ out_overflow:
out_short:
pr_warn("RPC/RDMA short backward direction call\n");
- if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, &r_xprt->rx_ep, rep))
+ if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, rep))
xprt_disconnect_done(xprt);
else
pr_warn("RPC: %s: reposting rep %p\n",
diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
index 21cb3b150b37..1ebb09e1ac4f 100644
--- a/net/sunrpc/xprtrdma/fmr_ops.c
+++ b/net/sunrpc/xprtrdma/fmr_ops.c
@@ -160,9 +160,8 @@ static int
fmr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
struct rpcrdma_create_data_internal *cdata)
{
- rpcrdma_set_max_header_sizes(ia, cdata, max_t(unsigned int, 1,
- RPCRDMA_MAX_DATA_SEGS /
- RPCRDMA_MAX_FMR_SGES));
+ ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS /
+ RPCRDMA_MAX_FMR_SGES);
return 0;
}
@@ -274,6 +273,7 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
*/
list_for_each_entry(mw, &req->rl_registered, mw_list)
list_add_tail(&mw->fmr.fm_mr->list, &unmap_list);
+ r_xprt->rx_stats.local_inv_needed++;
rc = ib_unmap_fmr(&unmap_list);
if (rc)
goto out_reset;
@@ -331,4 +331,5 @@ const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
.ro_init_mr = fmr_op_init_mr,
.ro_release_mr = fmr_op_release_mr,
.ro_displayname = "fmr",
+ .ro_send_w_inv_ok = 0,
};
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 892b5e1d9b09..210949562786 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -67,6 +67,8 @@
* pending send queue WRs before the transport is reconnected.
*/
+#include <linux/sunrpc/rpc_rdma.h>
+
#include "xprt_rdma.h"
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
@@ -161,7 +163,7 @@ __frwr_reset_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r)
return PTR_ERR(f->fr_mr);
}
- dprintk("RPC: %s: recovered FRMR %p\n", __func__, r);
+ dprintk("RPC: %s: recovered FRMR %p\n", __func__, f);
f->fr_state = FRMR_IS_INVALID;
return 0;
}
@@ -242,9 +244,8 @@ frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
depth;
}
- rpcrdma_set_max_header_sizes(ia, cdata, max_t(unsigned int, 1,
- RPCRDMA_MAX_DATA_SEGS /
- ia->ri_max_frmr_depth));
+ ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS /
+ ia->ri_max_frmr_depth);
return 0;
}
@@ -329,7 +330,7 @@ frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc)
frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe);
if (wc->status != IB_WC_SUCCESS)
__frwr_sendcompletion_flush(wc, frmr, "localinv");
- complete_all(&frmr->fr_linv_done);
+ complete(&frmr->fr_linv_done);
}
/* Post a REG_MR Work Request to register a memory region
@@ -396,7 +397,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
goto out_mapmr_err;
dprintk("RPC: %s: Using frmr %p to map %u segments (%u bytes)\n",
- __func__, mw, mw->mw_nents, mr->length);
+ __func__, frmr, mw->mw_nents, mr->length);
key = (u8)(mr->rkey & 0x000000FF);
ib_update_fast_reg_key(mr, ++key);
@@ -449,6 +450,8 @@ __frwr_prepare_linv_wr(struct rpcrdma_mw *mw)
struct rpcrdma_frmr *f = &mw->frmr;
struct ib_send_wr *invalidate_wr;
+ dprintk("RPC: %s: invalidating frmr %p\n", __func__, f);
+
f->fr_state = FRMR_IS_INVALID;
invalidate_wr = &f->fr_invwr;
@@ -472,6 +475,7 @@ static void
frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
{
struct ib_send_wr *invalidate_wrs, *pos, *prev, *bad_wr;
+ struct rpcrdma_rep *rep = req->rl_reply;
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
struct rpcrdma_mw *mw, *tmp;
struct rpcrdma_frmr *f;
@@ -487,6 +491,12 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
f = NULL;
invalidate_wrs = pos = prev = NULL;
list_for_each_entry(mw, &req->rl_registered, mw_list) {
+ if ((rep->rr_wc_flags & IB_WC_WITH_INVALIDATE) &&
+ (mw->mw_handle == rep->rr_inv_rkey)) {
+ mw->frmr.fr_state = FRMR_IS_INVALID;
+ continue;
+ }
+
pos = __frwr_prepare_linv_wr(mw);
if (!invalidate_wrs)
@@ -496,6 +506,8 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
prev = pos;
f = &mw->frmr;
}
+ if (!f)
+ goto unmap;
/* Strong send queue ordering guarantees that when the
* last WR in the chain completes, all WRs in the chain
@@ -510,6 +522,7 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
* replaces the QP. The RPC reply handler won't call us
* unless ri_id->qp is a valid pointer.
*/
+ r_xprt->rx_stats.local_inv_needed++;
rc = ib_post_send(ia->ri_id->qp, invalidate_wrs, &bad_wr);
if (rc)
goto reset_mrs;
@@ -521,6 +534,8 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
*/
unmap:
list_for_each_entry_safe(mw, tmp, &req->rl_registered, mw_list) {
+ dprintk("RPC: %s: unmapping frmr %p\n",
+ __func__, &mw->frmr);
list_del_init(&mw->mw_list);
ib_dma_unmap_sg(ia->ri_device,
mw->mw_sg, mw->mw_nents, mw->mw_dir);
@@ -576,4 +591,5 @@ const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
.ro_init_mr = frwr_op_init_mr,
.ro_release_mr = frwr_op_release_mr,
.ro_displayname = "frwr",
+ .ro_send_w_inv_ok = RPCRDMA_CMP_F_SND_W_INV_OK,
};
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index a47f170b20ef..d987c2d3dd6e 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -53,14 +53,6 @@
# define RPCDBG_FACILITY RPCDBG_TRANS
#endif
-enum rpcrdma_chunktype {
- rpcrdma_noch = 0,
- rpcrdma_readch,
- rpcrdma_areadch,
- rpcrdma_writech,
- rpcrdma_replych
-};
-
static const char transfertypes[][12] = {
"inline", /* no chunks */
"read list", /* some argument via rdma read */
@@ -118,10 +110,12 @@ static unsigned int rpcrdma_max_reply_header_size(unsigned int maxsegs)
return size;
}
-void rpcrdma_set_max_header_sizes(struct rpcrdma_ia *ia,
- struct rpcrdma_create_data_internal *cdata,
- unsigned int maxsegs)
+void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *r_xprt)
{
+ struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
+ struct rpcrdma_ia *ia = &r_xprt->rx_ia;
+ unsigned int maxsegs = ia->ri_max_segs;
+
ia->ri_max_inline_write = cdata->inline_wsize -
rpcrdma_max_call_header_size(maxsegs);
ia->ri_max_inline_read = cdata->inline_rsize -
@@ -155,42 +149,6 @@ static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
return rqst->rq_rcv_buf.buflen <= ia->ri_max_inline_read;
}
-static int
-rpcrdma_tail_pullup(struct xdr_buf *buf)
-{
- size_t tlen = buf->tail[0].iov_len;
- size_t skip = tlen & 3;
-
- /* Do not include the tail if it is only an XDR pad */
- if (tlen < 4)
- return 0;
-
- /* xdr_write_pages() adds a pad at the beginning of the tail
- * if the content in "buf->pages" is unaligned. Force the
- * tail's actual content to land at the next XDR position
- * after the head instead.
- */
- if (skip) {
- unsigned char *src, *dst;
- unsigned int count;
-
- src = buf->tail[0].iov_base;
- dst = buf->head[0].iov_base;
- dst += buf->head[0].iov_len;
-
- src += skip;
- tlen -= skip;
-
- dprintk("RPC: %s: skip=%zu, memmove(%p, %p, %zu)\n",
- __func__, skip, dst, src, tlen);
-
- for (count = tlen; count; count--)
- *dst++ = *src++;
- }
-
- return tlen;
-}
-
/* Split "vec" on page boundaries into segments. FMR registers pages,
* not a byte range. Other modes coalesce these segments into a single
* MR when they can.
@@ -229,7 +187,8 @@ rpcrdma_convert_kvec(struct kvec *vec, struct rpcrdma_mr_seg *seg, int n)
static int
rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos,
- enum rpcrdma_chunktype type, struct rpcrdma_mr_seg *seg)
+ enum rpcrdma_chunktype type, struct rpcrdma_mr_seg *seg,
+ bool reminv_expected)
{
int len, n, p, page_base;
struct page **ppages;
@@ -271,6 +230,13 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos,
if (type == rpcrdma_readch)
return n;
+ /* When encoding the Write list, some servers need to see an extra
+ * segment for odd-length Write chunks. The upper layer provides
+ * space in the tail iovec for this purpose.
+ */
+ if (type == rpcrdma_writech && reminv_expected)
+ return n;
+
if (xdrbuf->tail[0].iov_len) {
/* the rpcrdma protocol allows us to omit any trailing
* xdr pad bytes, saving the server an RDMA operation. */
@@ -327,7 +293,7 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt,
if (rtype == rpcrdma_areadch)
pos = 0;
seg = req->rl_segments;
- nsegs = rpcrdma_convert_iovs(&rqst->rq_snd_buf, pos, rtype, seg);
+ nsegs = rpcrdma_convert_iovs(&rqst->rq_snd_buf, pos, rtype, seg, false);
if (nsegs < 0)
return ERR_PTR(nsegs);
@@ -391,7 +357,8 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
seg = req->rl_segments;
nsegs = rpcrdma_convert_iovs(&rqst->rq_rcv_buf,
rqst->rq_rcv_buf.head[0].iov_len,
- wtype, seg);
+ wtype, seg,
+ r_xprt->rx_ia.ri_reminv_expected);
if (nsegs < 0)
return ERR_PTR(nsegs);
@@ -456,7 +423,8 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt,
}
seg = req->rl_segments;
- nsegs = rpcrdma_convert_iovs(&rqst->rq_rcv_buf, 0, wtype, seg);
+ nsegs = rpcrdma_convert_iovs(&rqst->rq_rcv_buf, 0, wtype, seg,
+ r_xprt->rx_ia.ri_reminv_expected);
if (nsegs < 0)
return ERR_PTR(nsegs);
@@ -491,74 +459,184 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt,
return iptr;
}
-/*
- * Copy write data inline.
- * This function is used for "small" requests. Data which is passed
- * to RPC via iovecs (or page list) is copied directly into the
- * pre-registered memory buffer for this request. For small amounts
- * of data, this is efficient. The cutoff value is tunable.
+/* Prepare the RPC-over-RDMA header SGE.
*/
-static void rpcrdma_inline_pullup(struct rpc_rqst *rqst)
+static bool
+rpcrdma_prepare_hdr_sge(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
+ u32 len)
{
- int i, npages, curlen;
- int copy_len;
- unsigned char *srcp, *destp;
- struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
- int page_base;
- struct page **ppages;
+ struct rpcrdma_regbuf *rb = req->rl_rdmabuf;
+ struct ib_sge *sge = &req->rl_send_sge[0];
+
+ if (unlikely(!rpcrdma_regbuf_is_mapped(rb))) {
+ if (!__rpcrdma_dma_map_regbuf(ia, rb))
+ return false;
+ sge->addr = rdmab_addr(rb);
+ sge->lkey = rdmab_lkey(rb);
+ }
+ sge->length = len;
+
+ ib_dma_sync_single_for_device(ia->ri_device, sge->addr,
+ sge->length, DMA_TO_DEVICE);
+ req->rl_send_wr.num_sge++;
+ return true;
+}
- destp = rqst->rq_svec[0].iov_base;
- curlen = rqst->rq_svec[0].iov_len;
- destp += curlen;
+/* Prepare the Send SGEs. The head and tail iovec, and each entry
+ * in the page list, gets its own SGE.
+ */
+static bool
+rpcrdma_prepare_msg_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
+ struct xdr_buf *xdr, enum rpcrdma_chunktype rtype)
+{
+ unsigned int sge_no, page_base, len, remaining;
+ struct rpcrdma_regbuf *rb = req->rl_sendbuf;
+ struct ib_device *device = ia->ri_device;
+ struct ib_sge *sge = req->rl_send_sge;
+ u32 lkey = ia->ri_pd->local_dma_lkey;
+ struct page *page, **ppages;
+
+ /* The head iovec is straightforward, as it is already
+ * DMA-mapped. Sync the content that has changed.
+ */
+ if (!rpcrdma_dma_map_regbuf(ia, rb))
+ return false;
+ sge_no = 1;
+ sge[sge_no].addr = rdmab_addr(rb);
+ sge[sge_no].length = xdr->head[0].iov_len;
+ sge[sge_no].lkey = rdmab_lkey(rb);
+ ib_dma_sync_single_for_device(device, sge[sge_no].addr,
+ sge[sge_no].length, DMA_TO_DEVICE);
+
+ /* If there is a Read chunk, the page list is being handled
+ * via explicit RDMA, and thus is skipped here. However, the
+ * tail iovec may include an XDR pad for the page list, as
+ * well as additional content, and may not reside in the
+ * same page as the head iovec.
+ */
+ if (rtype == rpcrdma_readch) {
+ len = xdr->tail[0].iov_len;
- dprintk("RPC: %s: destp 0x%p len %d hdrlen %d\n",
- __func__, destp, rqst->rq_slen, curlen);
+ /* Do not include the tail if it is only an XDR pad */
+ if (len < 4)
+ goto out;
- copy_len = rqst->rq_snd_buf.page_len;
+ page = virt_to_page(xdr->tail[0].iov_base);
+ page_base = (unsigned long)xdr->tail[0].iov_base & ~PAGE_MASK;
- if (rqst->rq_snd_buf.tail[0].iov_len) {
- curlen = rqst->rq_snd_buf.tail[0].iov_len;
- if (destp + copy_len != rqst->rq_snd_buf.tail[0].iov_base) {
- memmove(destp + copy_len,
- rqst->rq_snd_buf.tail[0].iov_base, curlen);
- r_xprt->rx_stats.pullup_copy_count += curlen;
+ /* If the content in the page list is an odd length,
+ * xdr_write_pages() has added a pad at the beginning
+ * of the tail iovec. Force the tail's non-pad content
+ * to land at the next XDR position in the Send message.
+ */
+ page_base += len & 3;
+ len -= len & 3;
+ goto map_tail;
+ }
+
+ /* If there is a page list present, temporarily DMA map
+ * and prepare an SGE for each page to be sent.
+ */
+ if (xdr->page_len) {
+ ppages = xdr->pages + (xdr->page_base >> PAGE_SHIFT);
+ page_base = xdr->page_base & ~PAGE_MASK;
+ remaining = xdr->page_len;
+ while (remaining) {
+ sge_no++;
+ if (sge_no > RPCRDMA_MAX_SEND_SGES - 2)
+ goto out_mapping_overflow;
+
+ len = min_t(u32, PAGE_SIZE - page_base, remaining);
+ sge[sge_no].addr = ib_dma_map_page(device, *ppages,
+ page_base, len,
+ DMA_TO_DEVICE);
+ if (ib_dma_mapping_error(device, sge[sge_no].addr))
+ goto out_mapping_err;
+ sge[sge_no].length = len;
+ sge[sge_no].lkey = lkey;
+
+ req->rl_mapped_sges++;
+ ppages++;
+ remaining -= len;
+ page_base = 0;
}
- dprintk("RPC: %s: tail destp 0x%p len %d\n",
- __func__, destp + copy_len, curlen);
- rqst->rq_svec[0].iov_len += curlen;
}
- r_xprt->rx_stats.pullup_copy_count += copy_len;
- page_base = rqst->rq_snd_buf.page_base;
- ppages = rqst->rq_snd_buf.pages + (page_base >> PAGE_SHIFT);
- page_base &= ~PAGE_MASK;
- npages = PAGE_ALIGN(page_base+copy_len) >> PAGE_SHIFT;
- for (i = 0; copy_len && i < npages; i++) {
- curlen = PAGE_SIZE - page_base;
- if (curlen > copy_len)
- curlen = copy_len;
- dprintk("RPC: %s: page %d destp 0x%p len %d curlen %d\n",
- __func__, i, destp, copy_len, curlen);
- srcp = kmap_atomic(ppages[i]);
- memcpy(destp, srcp+page_base, curlen);
- kunmap_atomic(srcp);
- rqst->rq_svec[0].iov_len += curlen;
- destp += curlen;
- copy_len -= curlen;
- page_base = 0;
+ /* The tail iovec is not always constructed in the same
+ * page where the head iovec resides (see, for example,
+ * gss_wrap_req_priv). To neatly accommodate that case,
+ * DMA map it separately.
+ */
+ if (xdr->tail[0].iov_len) {
+ page = virt_to_page(xdr->tail[0].iov_base);
+ page_base = (unsigned long)xdr->tail[0].iov_base & ~PAGE_MASK;
+ len = xdr->tail[0].iov_len;
+
+map_tail:
+ sge_no++;
+ sge[sge_no].addr = ib_dma_map_page(device, page,
+ page_base, len,
+ DMA_TO_DEVICE);
+ if (ib_dma_mapping_error(device, sge[sge_no].addr))
+ goto out_mapping_err;
+ sge[sge_no].length = len;
+ sge[sge_no].lkey = lkey;
+ req->rl_mapped_sges++;
}
- /* header now contains entire send message */
+
+out:
+ req->rl_send_wr.num_sge = sge_no + 1;
+ return true;
+
+out_mapping_overflow:
+ pr_err("rpcrdma: too many Send SGEs (%u)\n", sge_no);
+ return false;
+
+out_mapping_err:
+ pr_err("rpcrdma: Send mapping error\n");
+ return false;
+}
+
+bool
+rpcrdma_prepare_send_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
+ u32 hdrlen, struct xdr_buf *xdr,
+ enum rpcrdma_chunktype rtype)
+{
+ req->rl_send_wr.num_sge = 0;
+ req->rl_mapped_sges = 0;
+
+ if (!rpcrdma_prepare_hdr_sge(ia, req, hdrlen))
+ goto out_map;
+
+ if (rtype != rpcrdma_areadch)
+ if (!rpcrdma_prepare_msg_sges(ia, req, xdr, rtype))
+ goto out_map;
+
+ return true;
+
+out_map:
+ pr_err("rpcrdma: failed to DMA map a Send buffer\n");
+ return false;
+}
+
+void
+rpcrdma_unmap_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req)
+{
+ struct ib_device *device = ia->ri_device;
+ struct ib_sge *sge;
+ int count;
+
+ sge = &req->rl_send_sge[2];
+ for (count = req->rl_mapped_sges; count--; sge++)
+ ib_dma_unmap_page(device, sge->addr, sge->length,
+ DMA_TO_DEVICE);
+ req->rl_mapped_sges = 0;
}
/*
* Marshal a request: the primary job of this routine is to choose
* the transfer modes. See comments below.
*
- * Prepares up to two IOVs per Call message:
- *
- * [0] -- RPC RDMA header
- * [1] -- the RPC header/data
- *
* Returns zero on success, otherwise a negative errno.
*/
@@ -626,12 +704,11 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
*/
if (rpcrdma_args_inline(r_xprt, rqst)) {
rtype = rpcrdma_noch;
- rpcrdma_inline_pullup(rqst);
- rpclen = rqst->rq_svec[0].iov_len;
+ rpclen = rqst->rq_snd_buf.len;
} else if (ddp_allowed && rqst->rq_snd_buf.flags & XDRBUF_WRITE) {
rtype = rpcrdma_readch;
- rpclen = rqst->rq_svec[0].iov_len;
- rpclen += rpcrdma_tail_pullup(&rqst->rq_snd_buf);
+ rpclen = rqst->rq_snd_buf.head[0].iov_len +
+ rqst->rq_snd_buf.tail[0].iov_len;
} else {
r_xprt->rx_stats.nomsg_call_count++;
headerp->rm_type = htonl(RDMA_NOMSG);
@@ -673,34 +750,18 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
goto out_unmap;
hdrlen = (unsigned char *)iptr - (unsigned char *)headerp;
- if (hdrlen + rpclen > RPCRDMA_INLINE_WRITE_THRESHOLD(rqst))
- goto out_overflow;
-
dprintk("RPC: %5u %s: %s/%s: hdrlen %zd rpclen %zd\n",
rqst->rq_task->tk_pid, __func__,
transfertypes[rtype], transfertypes[wtype],
hdrlen, rpclen);
- req->rl_send_iov[0].addr = rdmab_addr(req->rl_rdmabuf);
- req->rl_send_iov[0].length = hdrlen;
- req->rl_send_iov[0].lkey = rdmab_lkey(req->rl_rdmabuf);
-
- req->rl_niovs = 1;
- if (rtype == rpcrdma_areadch)
- return 0;
-
- req->rl_send_iov[1].addr = rdmab_addr(req->rl_sendbuf);
- req->rl_send_iov[1].length = rpclen;
- req->rl_send_iov[1].lkey = rdmab_lkey(req->rl_sendbuf);
-
- req->rl_niovs = 2;
+ if (!rpcrdma_prepare_send_sges(&r_xprt->rx_ia, req, hdrlen,
+ &rqst->rq_snd_buf, rtype)) {
+ iptr = ERR_PTR(-EIO);
+ goto out_unmap;
+ }
return 0;
-out_overflow:
- pr_err("rpcrdma: send overflow: hdrlen %zd rpclen %zu %s/%s\n",
- hdrlen, rpclen, transfertypes[rtype], transfertypes[wtype]);
- iptr = ERR_PTR(-EIO);
-
out_unmap:
r_xprt->rx_ia.ri_ops->ro_unmap_safe(r_xprt, req, false);
return PTR_ERR(iptr);
@@ -916,8 +977,10 @@ rpcrdma_conn_func(struct rpcrdma_ep *ep)
* allowed to timeout, to discover the errors at that time.
*/
void
-rpcrdma_reply_handler(struct rpcrdma_rep *rep)
+rpcrdma_reply_handler(struct work_struct *work)
{
+ struct rpcrdma_rep *rep =
+ container_of(work, struct rpcrdma_rep, rr_work);
struct rpcrdma_msg *headerp;
struct rpcrdma_req *req;
struct rpc_rqst *rqst;
@@ -1132,6 +1195,6 @@ out_duplicate:
repost:
r_xprt->rx_stats.bad_reply_count++;
- if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, &r_xprt->rx_ep, rep))
+ if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, rep))
rpcrdma_recv_buffer_put(rep);
}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
index a2a7519b0f23..2d8545c34095 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -129,7 +129,7 @@ static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma,
ret = -EIO;
goto out_unmap;
}
- atomic_inc(&rdma->sc_dma_used);
+ svc_rdma_count_mappings(rdma, ctxt);
memset(&send_wr, 0, sizeof(send_wr));
ctxt->cqe.done = svc_rdma_wc_send;
@@ -159,33 +159,34 @@ out_unmap:
/* Server-side transport endpoint wants a whole page for its send
* buffer. The client RPC code constructs the RPC header in this
* buffer before it invokes ->send_request.
- *
- * Returns NULL if there was a temporary allocation failure.
*/
-static void *
-xprt_rdma_bc_allocate(struct rpc_task *task, size_t size)
+static int
+xprt_rdma_bc_allocate(struct rpc_task *task)
{
struct rpc_rqst *rqst = task->tk_rqstp;
struct svc_xprt *sxprt = rqst->rq_xprt->bc_xprt;
+ size_t size = rqst->rq_callsize;
struct svcxprt_rdma *rdma;
struct page *page;
rdma = container_of(sxprt, struct svcxprt_rdma, sc_xprt);
- /* Prevent an infinite loop: try to make this case work */
- if (size > PAGE_SIZE)
+ if (size > PAGE_SIZE) {
WARN_ONCE(1, "svcrdma: large bc buffer request (size %zu)\n",
size);
+ return -EINVAL;
+ }
page = alloc_page(RPCRDMA_DEF_GFP);
if (!page)
- return NULL;
+ return -ENOMEM;
- return page_address(page);
+ rqst->rq_buffer = page_address(page);
+ return 0;
}
static void
-xprt_rdma_bc_free(void *buffer)
+xprt_rdma_bc_free(struct rpc_task *task)
{
/* No-op: ctxt and page have already been freed. */
}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 2c25606f2561..ad1df979b3f0 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -159,7 +159,7 @@ int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt,
ctxt->sge[pno].addr);
if (ret)
goto err;
- atomic_inc(&xprt->sc_dma_used);
+ svc_rdma_count_mappings(xprt, ctxt);
ctxt->sge[pno].lkey = xprt->sc_pd->local_dma_lkey;
ctxt->sge[pno].length = len;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 54d533300620..f5a91edcd233 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -225,6 +225,48 @@ svc_rdma_get_reply_array(struct rpcrdma_msg *rmsgp,
return rp_ary;
}
+/* RPC-over-RDMA Version One private extension: Remote Invalidation.
+ * Responder's choice: requester signals it can handle Send With
+ * Invalidate, and responder chooses one rkey to invalidate.
+ *
+ * Find a candidate rkey to invalidate when sending a reply. Picks the
+ * first rkey it finds in the chunks lists.
+ *
+ * Returns zero if RPC's chunk lists are empty.
+ */
+static u32 svc_rdma_get_inv_rkey(struct rpcrdma_msg *rdma_argp,
+ struct rpcrdma_write_array *wr_ary,
+ struct rpcrdma_write_array *rp_ary)
+{
+ struct rpcrdma_read_chunk *rd_ary;
+ struct rpcrdma_segment *arg_ch;
+ u32 inv_rkey;
+
+ inv_rkey = 0;
+
+ rd_ary = svc_rdma_get_read_chunk(rdma_argp);
+ if (rd_ary) {
+ inv_rkey = be32_to_cpu(rd_ary->rc_target.rs_handle);
+ goto out;
+ }
+
+ if (wr_ary && be32_to_cpu(wr_ary->wc_nchunks)) {
+ arg_ch = &wr_ary->wc_array[0].wc_target;
+ inv_rkey = be32_to_cpu(arg_ch->rs_handle);
+ goto out;
+ }
+
+ if (rp_ary && be32_to_cpu(rp_ary->wc_nchunks)) {
+ arg_ch = &rp_ary->wc_array[0].wc_target;
+ inv_rkey = be32_to_cpu(arg_ch->rs_handle);
+ goto out;
+ }
+
+out:
+ dprintk("svcrdma: Send With Invalidate rkey=%08x\n", inv_rkey);
+ return inv_rkey;
+}
+
/* Assumptions:
* - The specified write_len can be represented in sc_max_sge * PAGE_SIZE
*/
@@ -280,7 +322,7 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
if (ib_dma_mapping_error(xprt->sc_cm_id->device,
sge[sge_no].addr))
goto err;
- atomic_inc(&xprt->sc_dma_used);
+ svc_rdma_count_mappings(xprt, ctxt);
sge[sge_no].lkey = xprt->sc_pd->local_dma_lkey;
ctxt->count++;
sge_off = 0;
@@ -464,7 +506,8 @@ static int send_reply(struct svcxprt_rdma *rdma,
struct page *page,
struct rpcrdma_msg *rdma_resp,
struct svc_rdma_req_map *vec,
- int byte_count)
+ int byte_count,
+ u32 inv_rkey)
{
struct svc_rdma_op_ctxt *ctxt;
struct ib_send_wr send_wr;
@@ -489,7 +532,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
ctxt->sge[0].length, DMA_TO_DEVICE);
if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr))
goto err;
- atomic_inc(&rdma->sc_dma_used);
+ svc_rdma_count_mappings(rdma, ctxt);
ctxt->direction = DMA_TO_DEVICE;
@@ -505,7 +548,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
if (ib_dma_mapping_error(rdma->sc_cm_id->device,
ctxt->sge[sge_no].addr))
goto err;
- atomic_inc(&rdma->sc_dma_used);
+ svc_rdma_count_mappings(rdma, ctxt);
ctxt->sge[sge_no].lkey = rdma->sc_pd->local_dma_lkey;
ctxt->sge[sge_no].length = sge_bytes;
}
@@ -523,23 +566,9 @@ static int send_reply(struct svcxprt_rdma *rdma,
ctxt->pages[page_no+1] = rqstp->rq_respages[page_no];
ctxt->count++;
rqstp->rq_respages[page_no] = NULL;
- /*
- * If there are more pages than SGE, terminate SGE
- * list so that svc_rdma_unmap_dma doesn't attempt to
- * unmap garbage.
- */
- if (page_no+1 >= sge_no)
- ctxt->sge[page_no+1].length = 0;
}
rqstp->rq_next_page = rqstp->rq_respages + 1;
- /* The loop above bumps sc_dma_used for each sge. The
- * xdr_buf.tail gets a separate sge, but resides in the
- * same page as xdr_buf.head. Don't count it twice.
- */
- if (sge_no > ctxt->count)
- atomic_dec(&rdma->sc_dma_used);
-
if (sge_no > rdma->sc_max_sge) {
pr_err("svcrdma: Too many sges (%d)\n", sge_no);
goto err;
@@ -549,7 +578,11 @@ static int send_reply(struct svcxprt_rdma *rdma,
send_wr.wr_cqe = &ctxt->cqe;
send_wr.sg_list = ctxt->sge;
send_wr.num_sge = sge_no;
- send_wr.opcode = IB_WR_SEND;
+ if (inv_rkey) {
+ send_wr.opcode = IB_WR_SEND_WITH_INV;
+ send_wr.ex.invalidate_rkey = inv_rkey;
+ } else
+ send_wr.opcode = IB_WR_SEND;
send_wr.send_flags = IB_SEND_SIGNALED;
ret = svc_rdma_send(rdma, &send_wr);
@@ -581,6 +614,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
int inline_bytes;
struct page *res_page;
struct svc_rdma_req_map *vec;
+ u32 inv_rkey;
dprintk("svcrdma: sending response for rqstp=%p\n", rqstp);
@@ -591,6 +625,10 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
wr_ary = svc_rdma_get_write_array(rdma_argp);
rp_ary = svc_rdma_get_reply_array(rdma_argp, wr_ary);
+ inv_rkey = 0;
+ if (rdma->sc_snd_w_inv)
+ inv_rkey = svc_rdma_get_inv_rkey(rdma_argp, wr_ary, rp_ary);
+
/* Build an req vec for the XDR */
vec = svc_rdma_get_req_map(rdma);
ret = svc_rdma_map_xdr(rdma, &rqstp->rq_res, vec, wr_ary != NULL);
@@ -633,9 +671,9 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
goto err1;
ret = send_reply(rdma, rqstp, res_page, rdma_resp, vec,
- inline_bytes);
+ inline_bytes, inv_rkey);
if (ret < 0)
- goto err1;
+ goto err0;
svc_rdma_put_req_map(rdma, vec);
dprintk("svcrdma: send_reply returns %d\n", ret);
@@ -692,7 +730,7 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
svc_rdma_put_context(ctxt, 1);
return;
}
- atomic_inc(&xprt->sc_dma_used);
+ svc_rdma_count_mappings(xprt, ctxt);
/* Prepare SEND WR */
memset(&err_wr, 0, sizeof(err_wr));
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index dd9440137834..6864fb967038 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -198,6 +198,7 @@ struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
out:
ctxt->count = 0;
+ ctxt->mapped_sges = 0;
ctxt->frmr = NULL;
return ctxt;
@@ -221,22 +222,27 @@ out_empty:
void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt)
{
struct svcxprt_rdma *xprt = ctxt->xprt;
- int i;
- for (i = 0; i < ctxt->count && ctxt->sge[i].length; i++) {
+ struct ib_device *device = xprt->sc_cm_id->device;
+ u32 lkey = xprt->sc_pd->local_dma_lkey;
+ unsigned int i, count;
+
+ for (count = 0, i = 0; i < ctxt->mapped_sges; i++) {
/*
* Unmap the DMA addr in the SGE if the lkey matches
* the local_dma_lkey, otherwise, ignore it since it is
* an FRMR lkey and will be unmapped later when the
* last WR that uses it completes.
*/
- if (ctxt->sge[i].lkey == xprt->sc_pd->local_dma_lkey) {
- atomic_dec(&xprt->sc_dma_used);
- ib_dma_unmap_page(xprt->sc_cm_id->device,
+ if (ctxt->sge[i].lkey == lkey) {
+ count++;
+ ib_dma_unmap_page(device,
ctxt->sge[i].addr,
ctxt->sge[i].length,
ctxt->direction);
}
}
+ ctxt->mapped_sges = 0;
+ atomic_sub(count, &xprt->sc_dma_used);
}
void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages)
@@ -600,7 +606,7 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt, gfp_t flags)
DMA_FROM_DEVICE);
if (ib_dma_mapping_error(xprt->sc_cm_id->device, pa))
goto err_put_ctxt;
- atomic_inc(&xprt->sc_dma_used);
+ svc_rdma_count_mappings(xprt, ctxt);
ctxt->sge[sge_no].addr = pa;
ctxt->sge[sge_no].length = PAGE_SIZE;
ctxt->sge[sge_no].lkey = xprt->sc_pd->local_dma_lkey;
@@ -642,6 +648,26 @@ int svc_rdma_repost_recv(struct svcxprt_rdma *xprt, gfp_t flags)
return ret;
}
+static void
+svc_rdma_parse_connect_private(struct svcxprt_rdma *newxprt,
+ struct rdma_conn_param *param)
+{
+ const struct rpcrdma_connect_private *pmsg = param->private_data;
+
+ if (pmsg &&
+ pmsg->cp_magic == rpcrdma_cmp_magic &&
+ pmsg->cp_version == RPCRDMA_CMP_VERSION) {
+ newxprt->sc_snd_w_inv = pmsg->cp_flags &
+ RPCRDMA_CMP_F_SND_W_INV_OK;
+
+ dprintk("svcrdma: client send_size %u, recv_size %u "
+ "remote inv %ssupported\n",
+ rpcrdma_decode_buffer_size(pmsg->cp_send_size),
+ rpcrdma_decode_buffer_size(pmsg->cp_recv_size),
+ newxprt->sc_snd_w_inv ? "" : "un");
+ }
+}
+
/*
* This function handles the CONNECT_REQUEST event on a listening
* endpoint. It is passed the cma_id for the _new_ connection. The context in
@@ -653,7 +679,8 @@ int svc_rdma_repost_recv(struct svcxprt_rdma *xprt, gfp_t flags)
* will call the recvfrom method on the listen xprt which will accept the new
* connection.
*/
-static void handle_connect_req(struct rdma_cm_id *new_cma_id, size_t client_ird)
+static void handle_connect_req(struct rdma_cm_id *new_cma_id,
+ struct rdma_conn_param *param)
{
struct svcxprt_rdma *listen_xprt = new_cma_id->context;
struct svcxprt_rdma *newxprt;
@@ -669,9 +696,10 @@ static void handle_connect_req(struct rdma_cm_id *new_cma_id, size_t client_ird)
new_cma_id->context = newxprt;
dprintk("svcrdma: Creating newxprt=%p, cm_id=%p, listenxprt=%p\n",
newxprt, newxprt->sc_cm_id, listen_xprt);
+ svc_rdma_parse_connect_private(newxprt, param);
/* Save client advertised inbound read limit for use later in accept. */
- newxprt->sc_ord = client_ird;
+ newxprt->sc_ord = param->initiator_depth;
/* Set the local and remote addresses in the transport */
sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr;
@@ -706,8 +734,7 @@ static int rdma_listen_handler(struct rdma_cm_id *cma_id,
dprintk("svcrdma: Connect request on cma_id=%p, xprt = %p, "
"event = %s (%d)\n", cma_id, cma_id->context,
rdma_event_msg(event->event), event->event);
- handle_connect_req(cma_id,
- event->param.conn.initiator_depth);
+ handle_connect_req(cma_id, &event->param.conn);
break;
case RDMA_CM_EVENT_ESTABLISHED:
@@ -941,6 +968,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
struct svcxprt_rdma *listen_rdma;
struct svcxprt_rdma *newxprt = NULL;
struct rdma_conn_param conn_param;
+ struct rpcrdma_connect_private pmsg;
struct ib_qp_init_attr qp_attr;
struct ib_device *dev;
unsigned int i;
@@ -993,7 +1021,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
newxprt->sc_ord = min_t(size_t, dev->attrs.max_qp_rd_atom, newxprt->sc_ord);
newxprt->sc_ord = min_t(size_t, svcrdma_ord, newxprt->sc_ord);
- newxprt->sc_pd = ib_alloc_pd(dev);
+ newxprt->sc_pd = ib_alloc_pd(dev, 0);
if (IS_ERR(newxprt->sc_pd)) {
dprintk("svcrdma: error creating PD for connect request\n");
goto errout;
@@ -1070,7 +1098,8 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
dev->attrs.max_fast_reg_page_list_len;
newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_FAST_REG;
newxprt->sc_reader = rdma_read_chunk_frmr;
- }
+ } else
+ newxprt->sc_snd_w_inv = false;
/*
* Determine if a DMA MR is required and if so, what privs are required
@@ -1094,11 +1123,20 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
/* Swap out the handler */
newxprt->sc_cm_id->event_handler = rdma_cma_handler;
+ /* Construct RDMA-CM private message */
+ pmsg.cp_magic = rpcrdma_cmp_magic;
+ pmsg.cp_version = RPCRDMA_CMP_VERSION;
+ pmsg.cp_flags = 0;
+ pmsg.cp_send_size = pmsg.cp_recv_size =
+ rpcrdma_encode_buffer_size(newxprt->sc_max_req_size);
+
/* Accept Connection */
set_bit(RDMAXPRT_CONN_PENDING, &newxprt->sc_flags);
memset(&conn_param, 0, sizeof conn_param);
conn_param.responder_resources = 0;
conn_param.initiator_depth = newxprt->sc_ord;
+ conn_param.private_data = &pmsg;
+ conn_param.private_data_len = sizeof(pmsg);
ret = rdma_accept(newxprt->sc_cm_id, &conn_param);
if (ret) {
dprintk("svcrdma: failed to accept new connection, ret=%d\n",
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 81f0e879f019..ed5e285fd2ea 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -97,7 +97,7 @@ static struct ctl_table xr_tunables_table[] = {
.data = &xprt_rdma_max_inline_read,
.maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = &min_inline_size,
.extra2 = &max_inline_size,
},
@@ -106,7 +106,7 @@ static struct ctl_table xr_tunables_table[] = {
.data = &xprt_rdma_max_inline_write,
.maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = &min_inline_size,
.extra2 = &max_inline_size,
},
@@ -477,115 +477,152 @@ xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task)
}
}
-/*
- * The RDMA allocate/free functions need the task structure as a place
- * to hide the struct rpcrdma_req, which is necessary for the actual send/recv
- * sequence.
+/* Allocate a fixed-size buffer in which to construct and send the
+ * RPC-over-RDMA header for this request.
+ */
+static bool
+rpcrdma_get_rdmabuf(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
+ gfp_t flags)
+{
+ size_t size = RPCRDMA_HDRBUF_SIZE;
+ struct rpcrdma_regbuf *rb;
+
+ if (req->rl_rdmabuf)
+ return true;
+
+ rb = rpcrdma_alloc_regbuf(size, DMA_TO_DEVICE, flags);
+ if (IS_ERR(rb))
+ return false;
+
+ r_xprt->rx_stats.hardway_register_count += size;
+ req->rl_rdmabuf = rb;
+ return true;
+}
+
+static bool
+rpcrdma_get_sendbuf(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
+ size_t size, gfp_t flags)
+{
+ struct rpcrdma_regbuf *rb;
+
+ if (req->rl_sendbuf && rdmab_length(req->rl_sendbuf) >= size)
+ return true;
+
+ rb = rpcrdma_alloc_regbuf(size, DMA_TO_DEVICE, flags);
+ if (IS_ERR(rb))
+ return false;
+
+ rpcrdma_free_regbuf(req->rl_sendbuf);
+ r_xprt->rx_stats.hardway_register_count += size;
+ req->rl_sendbuf = rb;
+ return true;
+}
+
+/* The rq_rcv_buf is used only if a Reply chunk is necessary.
+ * The decision to use a Reply chunk is made later in
+ * rpcrdma_marshal_req. This buffer is registered at that time.
*
- * The RPC layer allocates both send and receive buffers in the same call
- * (rq_send_buf and rq_rcv_buf are both part of a single contiguous buffer).
- * We may register rq_rcv_buf when using reply chunks.
+ * Otherwise, the associated RPC Reply arrives in a separate
+ * Receive buffer, arbitrarily chosen by the HCA. The buffer
+ * allocated here for the RPC Reply is not utilized in that
+ * case. See rpcrdma_inline_fixup.
+ *
+ * A regbuf is used here to remember the buffer size.
*/
-static void *
-xprt_rdma_allocate(struct rpc_task *task, size_t size)
+static bool
+rpcrdma_get_recvbuf(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
+ size_t size, gfp_t flags)
{
- struct rpc_xprt *xprt = task->tk_rqstp->rq_xprt;
- struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
struct rpcrdma_regbuf *rb;
+
+ if (req->rl_recvbuf && rdmab_length(req->rl_recvbuf) >= size)
+ return true;
+
+ rb = rpcrdma_alloc_regbuf(size, DMA_NONE, flags);
+ if (IS_ERR(rb))
+ return false;
+
+ rpcrdma_free_regbuf(req->rl_recvbuf);
+ r_xprt->rx_stats.hardway_register_count += size;
+ req->rl_recvbuf = rb;
+ return true;
+}
+
+/**
+ * xprt_rdma_allocate - allocate transport resources for an RPC
+ * @task: RPC task
+ *
+ * Return values:
+ * 0: Success; rq_buffer points to RPC buffer to use
+ * ENOMEM: Out of memory, call again later
+ * EIO: A permanent error occurred, do not retry
+ *
+ * The RDMA allocate/free functions need the task structure as a place
+ * to hide the struct rpcrdma_req, which is necessary for the actual
+ * send/recv sequence.
+ *
+ * xprt_rdma_allocate provides buffers that are already mapped for
+ * DMA, and a local DMA lkey is provided for each.
+ */
+static int
+xprt_rdma_allocate(struct rpc_task *task)
+{
+ struct rpc_rqst *rqst = task->tk_rqstp;
+ struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
struct rpcrdma_req *req;
- size_t min_size;
gfp_t flags;
req = rpcrdma_buffer_get(&r_xprt->rx_buf);
if (req == NULL)
- return NULL;
+ return -ENOMEM;
flags = RPCRDMA_DEF_GFP;
if (RPC_IS_SWAPPER(task))
flags = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN;
- if (req->rl_rdmabuf == NULL)
- goto out_rdmabuf;
- if (req->rl_sendbuf == NULL)
- goto out_sendbuf;
- if (size > req->rl_sendbuf->rg_size)
- goto out_sendbuf;
-
-out:
- dprintk("RPC: %s: size %zd, request 0x%p\n", __func__, size, req);
- req->rl_connect_cookie = 0; /* our reserved value */
- req->rl_task = task;
- return req->rl_sendbuf->rg_base;
-
-out_rdmabuf:
- min_size = RPCRDMA_INLINE_WRITE_THRESHOLD(task->tk_rqstp);
- rb = rpcrdma_alloc_regbuf(&r_xprt->rx_ia, min_size, flags);
- if (IS_ERR(rb))
+ if (!rpcrdma_get_rdmabuf(r_xprt, req, flags))
goto out_fail;
- req->rl_rdmabuf = rb;
-
-out_sendbuf:
- /* XDR encoding and RPC/RDMA marshaling of this request has not
- * yet occurred. Thus a lower bound is needed to prevent buffer
- * overrun during marshaling.
- *
- * RPC/RDMA marshaling may choose to send payload bearing ops
- * inline, if the result is smaller than the inline threshold.
- * The value of the "size" argument accounts for header
- * requirements but not for the payload in these cases.
- *
- * Likewise, allocate enough space to receive a reply up to the
- * size of the inline threshold.
- *
- * It's unlikely that both the send header and the received
- * reply will be large, but slush is provided here to allow
- * flexibility when marshaling.
- */
- min_size = RPCRDMA_INLINE_READ_THRESHOLD(task->tk_rqstp);
- min_size += RPCRDMA_INLINE_WRITE_THRESHOLD(task->tk_rqstp);
- if (size < min_size)
- size = min_size;
-
- rb = rpcrdma_alloc_regbuf(&r_xprt->rx_ia, size, flags);
- if (IS_ERR(rb))
+ if (!rpcrdma_get_sendbuf(r_xprt, req, rqst->rq_callsize, flags))
+ goto out_fail;
+ if (!rpcrdma_get_recvbuf(r_xprt, req, rqst->rq_rcvsize, flags))
goto out_fail;
- rb->rg_owner = req;
- r_xprt->rx_stats.hardway_register_count += size;
- rpcrdma_free_regbuf(&r_xprt->rx_ia, req->rl_sendbuf);
- req->rl_sendbuf = rb;
- goto out;
+ dprintk("RPC: %5u %s: send size = %zd, recv size = %zd, req = %p\n",
+ task->tk_pid, __func__, rqst->rq_callsize,
+ rqst->rq_rcvsize, req);
+
+ req->rl_connect_cookie = 0; /* our reserved value */
+ rpcrdma_set_xprtdata(rqst, req);
+ rqst->rq_buffer = req->rl_sendbuf->rg_base;
+ rqst->rq_rbuffer = req->rl_recvbuf->rg_base;
+ return 0;
out_fail:
rpcrdma_buffer_put(req);
- return NULL;
+ return -ENOMEM;
}
-/*
- * This function returns all RDMA resources to the pool.
+/**
+ * xprt_rdma_free - release resources allocated by xprt_rdma_allocate
+ * @task: RPC task
+ *
+ * Caller guarantees rqst->rq_buffer is non-NULL.
*/
static void
-xprt_rdma_free(void *buffer)
+xprt_rdma_free(struct rpc_task *task)
{
- struct rpcrdma_req *req;
- struct rpcrdma_xprt *r_xprt;
- struct rpcrdma_regbuf *rb;
-
- if (buffer == NULL)
- return;
+ struct rpc_rqst *rqst = task->tk_rqstp;
+ struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
+ struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
+ struct rpcrdma_ia *ia = &r_xprt->rx_ia;
- rb = container_of(buffer, struct rpcrdma_regbuf, rg_base[0]);
- req = rb->rg_owner;
if (req->rl_backchannel)
return;
- r_xprt = container_of(req->rl_buffer, struct rpcrdma_xprt, rx_buf);
-
dprintk("RPC: %s: called on 0x%p\n", __func__, req->rl_reply);
- r_xprt->rx_ia.ri_ops->ro_unmap_safe(r_xprt, req,
- !RPC_IS_ASYNC(req->rl_task));
-
+ ia->ri_ops->ro_unmap_safe(r_xprt, req, !RPC_IS_ASYNC(task));
+ rpcrdma_unmap_sges(ia, req);
rpcrdma_buffer_put(req);
}
@@ -685,10 +722,11 @@ void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
r_xprt->rx_stats.failed_marshal_count,
r_xprt->rx_stats.bad_reply_count,
r_xprt->rx_stats.nomsg_call_count);
- seq_printf(seq, "%lu %lu %lu\n",
+ seq_printf(seq, "%lu %lu %lu %lu\n",
r_xprt->rx_stats.mrs_recovered,
r_xprt->rx_stats.mrs_orphaned,
- r_xprt->rx_stats.mrs_allocated);
+ r_xprt->rx_stats.mrs_allocated,
+ r_xprt->rx_stats.local_inv_needed);
}
static int
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 799cce6cbe45..ec74289af7ec 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -129,15 +129,6 @@ rpcrdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
wc->status, wc->vendor_err);
}
-static void
-rpcrdma_receive_worker(struct work_struct *work)
-{
- struct rpcrdma_rep *rep =
- container_of(work, struct rpcrdma_rep, rr_work);
-
- rpcrdma_reply_handler(rep);
-}
-
/* Perform basic sanity checking to avoid using garbage
* to update the credit grant value.
*/
@@ -161,13 +152,13 @@ rpcrdma_update_granted_credits(struct rpcrdma_rep *rep)
}
/**
- * rpcrdma_receive_wc - Invoked by RDMA provider for each polled Receive WC
+ * rpcrdma_wc_receive - Invoked by RDMA provider for each polled Receive WC
* @cq: completion queue (ignored)
* @wc: completed WR
*
*/
static void
-rpcrdma_receive_wc(struct ib_cq *cq, struct ib_wc *wc)
+rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
{
struct ib_cqe *cqe = wc->wr_cqe;
struct rpcrdma_rep *rep = container_of(cqe, struct rpcrdma_rep,
@@ -185,6 +176,9 @@ rpcrdma_receive_wc(struct ib_cq *cq, struct ib_wc *wc)
__func__, rep, wc->byte_len);
rep->rr_len = wc->byte_len;
+ rep->rr_wc_flags = wc->wc_flags;
+ rep->rr_inv_rkey = wc->ex.invalidate_rkey;
+
ib_dma_sync_single_for_cpu(rep->rr_device,
rdmab_addr(rep->rr_rdmabuf),
rep->rr_len, DMA_FROM_DEVICE);
@@ -204,6 +198,36 @@ out_fail:
goto out_schedule;
}
+static void
+rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt,
+ struct rdma_conn_param *param)
+{
+ struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
+ const struct rpcrdma_connect_private *pmsg = param->private_data;
+ unsigned int rsize, wsize;
+
+ /* Default settings for RPC-over-RDMA Version One */
+ r_xprt->rx_ia.ri_reminv_expected = false;
+ rsize = RPCRDMA_V1_DEF_INLINE_SIZE;
+ wsize = RPCRDMA_V1_DEF_INLINE_SIZE;
+
+ if (pmsg &&
+ pmsg->cp_magic == rpcrdma_cmp_magic &&
+ pmsg->cp_version == RPCRDMA_CMP_VERSION) {
+ r_xprt->rx_ia.ri_reminv_expected = true;
+ rsize = rpcrdma_decode_buffer_size(pmsg->cp_send_size);
+ wsize = rpcrdma_decode_buffer_size(pmsg->cp_recv_size);
+ }
+
+ if (rsize < cdata->inline_rsize)
+ cdata->inline_rsize = rsize;
+ if (wsize < cdata->inline_wsize)
+ cdata->inline_wsize = wsize;
+ pr_info("rpcrdma: max send %u, max recv %u\n",
+ cdata->inline_wsize, cdata->inline_rsize);
+ rpcrdma_set_max_header_sizes(r_xprt);
+}
+
static int
rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
{
@@ -244,6 +268,7 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
" (%d initiator)\n",
__func__, attr->max_dest_rd_atomic,
attr->max_rd_atomic);
+ rpcrdma_update_connect_private(xprt, &event->param.conn);
goto connected;
case RDMA_CM_EVENT_CONNECT_ERROR:
connstate = -ENOTCONN;
@@ -387,7 +412,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
}
ia->ri_device = ia->ri_id->device;
- ia->ri_pd = ib_alloc_pd(ia->ri_device);
+ ia->ri_pd = ib_alloc_pd(ia->ri_device, 0);
if (IS_ERR(ia->ri_pd)) {
rc = PTR_ERR(ia->ri_pd);
pr_err("rpcrdma: ib_alloc_pd() returned %d\n", rc);
@@ -454,11 +479,12 @@ int
rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
struct rpcrdma_create_data_internal *cdata)
{
+ struct rpcrdma_connect_private *pmsg = &ep->rep_cm_private;
struct ib_cq *sendcq, *recvcq;
unsigned int max_qp_wr;
int rc;
- if (ia->ri_device->attrs.max_sge < RPCRDMA_MAX_IOVS) {
+ if (ia->ri_device->attrs.max_sge < RPCRDMA_MAX_SEND_SGES) {
dprintk("RPC: %s: insufficient sge's available\n",
__func__);
return -ENOMEM;
@@ -487,7 +513,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS;
ep->rep_attr.cap.max_recv_wr += 1; /* drain cqe */
- ep->rep_attr.cap.max_send_sge = RPCRDMA_MAX_IOVS;
+ ep->rep_attr.cap.max_send_sge = RPCRDMA_MAX_SEND_SGES;
ep->rep_attr.cap.max_recv_sge = 1;
ep->rep_attr.cap.max_inline_data = 0;
ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
@@ -536,9 +562,14 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
/* Initialize cma parameters */
memset(&ep->rep_remote_cma, 0, sizeof(ep->rep_remote_cma));
- /* RPC/RDMA does not use private data */
- ep->rep_remote_cma.private_data = NULL;
- ep->rep_remote_cma.private_data_len = 0;
+ /* Prepare RDMA-CM private message */
+ pmsg->cp_magic = rpcrdma_cmp_magic;
+ pmsg->cp_version = RPCRDMA_CMP_VERSION;
+ pmsg->cp_flags |= ia->ri_ops->ro_send_w_inv_ok;
+ pmsg->cp_send_size = rpcrdma_encode_buffer_size(cdata->inline_wsize);
+ pmsg->cp_recv_size = rpcrdma_encode_buffer_size(cdata->inline_rsize);
+ ep->rep_remote_cma.private_data = pmsg;
+ ep->rep_remote_cma.private_data_len = sizeof(*pmsg);
/* Client offers RDMA Read but does not initiate */
ep->rep_remote_cma.initiator_depth = 0;
@@ -849,6 +880,10 @@ rpcrdma_create_req(struct rpcrdma_xprt *r_xprt)
req->rl_cqe.done = rpcrdma_wc_send;
req->rl_buffer = &r_xprt->rx_buf;
INIT_LIST_HEAD(&req->rl_registered);
+ req->rl_send_wr.next = NULL;
+ req->rl_send_wr.wr_cqe = &req->rl_cqe;
+ req->rl_send_wr.sg_list = req->rl_send_sge;
+ req->rl_send_wr.opcode = IB_WR_SEND;
return req;
}
@@ -865,17 +900,21 @@ rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt)
if (rep == NULL)
goto out;
- rep->rr_rdmabuf = rpcrdma_alloc_regbuf(ia, cdata->inline_rsize,
- GFP_KERNEL);
+ rep->rr_rdmabuf = rpcrdma_alloc_regbuf(cdata->inline_rsize,
+ DMA_FROM_DEVICE, GFP_KERNEL);
if (IS_ERR(rep->rr_rdmabuf)) {
rc = PTR_ERR(rep->rr_rdmabuf);
goto out_free;
}
rep->rr_device = ia->ri_device;
- rep->rr_cqe.done = rpcrdma_receive_wc;
+ rep->rr_cqe.done = rpcrdma_wc_receive;
rep->rr_rxprt = r_xprt;
- INIT_WORK(&rep->rr_work, rpcrdma_receive_worker);
+ INIT_WORK(&rep->rr_work, rpcrdma_reply_handler);
+ rep->rr_recv_wr.next = NULL;
+ rep->rr_recv_wr.wr_cqe = &rep->rr_cqe;
+ rep->rr_recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov;
+ rep->rr_recv_wr.num_sge = 1;
return rep;
out_free:
@@ -966,17 +1005,18 @@ rpcrdma_buffer_get_rep_locked(struct rpcrdma_buffer *buf)
}
static void
-rpcrdma_destroy_rep(struct rpcrdma_ia *ia, struct rpcrdma_rep *rep)
+rpcrdma_destroy_rep(struct rpcrdma_rep *rep)
{
- rpcrdma_free_regbuf(ia, rep->rr_rdmabuf);
+ rpcrdma_free_regbuf(rep->rr_rdmabuf);
kfree(rep);
}
void
-rpcrdma_destroy_req(struct rpcrdma_ia *ia, struct rpcrdma_req *req)
+rpcrdma_destroy_req(struct rpcrdma_req *req)
{
- rpcrdma_free_regbuf(ia, req->rl_sendbuf);
- rpcrdma_free_regbuf(ia, req->rl_rdmabuf);
+ rpcrdma_free_regbuf(req->rl_recvbuf);
+ rpcrdma_free_regbuf(req->rl_sendbuf);
+ rpcrdma_free_regbuf(req->rl_rdmabuf);
kfree(req);
}
@@ -1009,15 +1049,13 @@ rpcrdma_destroy_mrs(struct rpcrdma_buffer *buf)
void
rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
{
- struct rpcrdma_ia *ia = rdmab_to_ia(buf);
-
cancel_delayed_work_sync(&buf->rb_recovery_worker);
while (!list_empty(&buf->rb_recv_bufs)) {
struct rpcrdma_rep *rep;
rep = rpcrdma_buffer_get_rep_locked(buf);
- rpcrdma_destroy_rep(ia, rep);
+ rpcrdma_destroy_rep(rep);
}
buf->rb_send_count = 0;
@@ -1030,7 +1068,7 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
list_del(&req->rl_all);
spin_unlock(&buf->rb_reqslock);
- rpcrdma_destroy_req(ia, req);
+ rpcrdma_destroy_req(req);
spin_lock(&buf->rb_reqslock);
}
spin_unlock(&buf->rb_reqslock);
@@ -1129,7 +1167,7 @@ rpcrdma_buffer_put(struct rpcrdma_req *req)
struct rpcrdma_buffer *buffers = req->rl_buffer;
struct rpcrdma_rep *rep = req->rl_reply;
- req->rl_niovs = 0;
+ req->rl_send_wr.num_sge = 0;
req->rl_reply = NULL;
spin_lock(&buffers->rb_lock);
@@ -1171,70 +1209,81 @@ rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep)
spin_unlock(&buffers->rb_lock);
}
-/*
- * Wrappers for internal-use kmalloc memory registration, used by buffer code.
- */
-
/**
- * rpcrdma_alloc_regbuf - kmalloc and register memory for SEND/RECV buffers
- * @ia: controlling rpcrdma_ia
+ * rpcrdma_alloc_regbuf - allocate and DMA-map memory for SEND/RECV buffers
* @size: size of buffer to be allocated, in bytes
+ * @direction: direction of data movement
* @flags: GFP flags
*
- * Returns pointer to private header of an area of internally
- * registered memory, or an ERR_PTR. The registered buffer follows
- * the end of the private header.
+ * Returns an ERR_PTR, or a pointer to a regbuf, a buffer that
+ * can be persistently DMA-mapped for I/O.
*
* xprtrdma uses a regbuf for posting an outgoing RDMA SEND, or for
- * receiving the payload of RDMA RECV operations. regbufs are not
- * used for RDMA READ/WRITE operations, thus are registered only for
- * LOCAL access.
+ * receiving the payload of RDMA RECV operations. During Long Calls
+ * or Replies they may be registered externally via ro_map.
*/
struct rpcrdma_regbuf *
-rpcrdma_alloc_regbuf(struct rpcrdma_ia *ia, size_t size, gfp_t flags)
+rpcrdma_alloc_regbuf(size_t size, enum dma_data_direction direction,
+ gfp_t flags)
{
struct rpcrdma_regbuf *rb;
- struct ib_sge *iov;
rb = kmalloc(sizeof(*rb) + size, flags);
if (rb == NULL)
- goto out;
+ return ERR_PTR(-ENOMEM);
- iov = &rb->rg_iov;
- iov->addr = ib_dma_map_single(ia->ri_device,
- (void *)rb->rg_base, size,
- DMA_BIDIRECTIONAL);
- if (ib_dma_mapping_error(ia->ri_device, iov->addr))
- goto out_free;
+ rb->rg_device = NULL;
+ rb->rg_direction = direction;
+ rb->rg_iov.length = size;
- iov->length = size;
- iov->lkey = ia->ri_pd->local_dma_lkey;
- rb->rg_size = size;
- rb->rg_owner = NULL;
return rb;
+}
-out_free:
- kfree(rb);
-out:
- return ERR_PTR(-ENOMEM);
+/**
+ * __rpcrdma_map_regbuf - DMA-map a regbuf
+ * @ia: controlling rpcrdma_ia
+ * @rb: regbuf to be mapped
+ */
+bool
+__rpcrdma_dma_map_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb)
+{
+ if (rb->rg_direction == DMA_NONE)
+ return false;
+
+ rb->rg_iov.addr = ib_dma_map_single(ia->ri_device,
+ (void *)rb->rg_base,
+ rdmab_length(rb),
+ rb->rg_direction);
+ if (ib_dma_mapping_error(ia->ri_device, rdmab_addr(rb)))
+ return false;
+
+ rb->rg_device = ia->ri_device;
+ rb->rg_iov.lkey = ia->ri_pd->local_dma_lkey;
+ return true;
+}
+
+static void
+rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb)
+{
+ if (!rpcrdma_regbuf_is_mapped(rb))
+ return;
+
+ ib_dma_unmap_single(rb->rg_device, rdmab_addr(rb),
+ rdmab_length(rb), rb->rg_direction);
+ rb->rg_device = NULL;
}
/**
* rpcrdma_free_regbuf - deregister and free registered buffer
- * @ia: controlling rpcrdma_ia
* @rb: regbuf to be deregistered and freed
*/
void
-rpcrdma_free_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb)
+rpcrdma_free_regbuf(struct rpcrdma_regbuf *rb)
{
- struct ib_sge *iov;
-
if (!rb)
return;
- iov = &rb->rg_iov;
- ib_dma_unmap_single(ia->ri_device,
- iov->addr, iov->length, DMA_BIDIRECTIONAL);
+ rpcrdma_dma_unmap_regbuf(rb);
kfree(rb);
}
@@ -1248,39 +1297,28 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
struct rpcrdma_ep *ep,
struct rpcrdma_req *req)
{
- struct ib_device *device = ia->ri_device;
- struct ib_send_wr send_wr, *send_wr_fail;
- struct rpcrdma_rep *rep = req->rl_reply;
- struct ib_sge *iov = req->rl_send_iov;
- int i, rc;
+ struct ib_send_wr *send_wr = &req->rl_send_wr;
+ struct ib_send_wr *send_wr_fail;
+ int rc;
- if (rep) {
- rc = rpcrdma_ep_post_recv(ia, ep, rep);
+ if (req->rl_reply) {
+ rc = rpcrdma_ep_post_recv(ia, req->rl_reply);
if (rc)
return rc;
req->rl_reply = NULL;
}
- send_wr.next = NULL;
- send_wr.wr_cqe = &req->rl_cqe;
- send_wr.sg_list = iov;
- send_wr.num_sge = req->rl_niovs;
- send_wr.opcode = IB_WR_SEND;
-
- for (i = 0; i < send_wr.num_sge; i++)
- ib_dma_sync_single_for_device(device, iov[i].addr,
- iov[i].length, DMA_TO_DEVICE);
dprintk("RPC: %s: posting %d s/g entries\n",
- __func__, send_wr.num_sge);
+ __func__, send_wr->num_sge);
if (DECR_CQCOUNT(ep) > 0)
- send_wr.send_flags = 0;
+ send_wr->send_flags = 0;
else { /* Provider must take a send completion every now and then */
INIT_CQCOUNT(ep);
- send_wr.send_flags = IB_SEND_SIGNALED;
+ send_wr->send_flags = IB_SEND_SIGNALED;
}
- rc = ib_post_send(ia->ri_id->qp, &send_wr, &send_wr_fail);
+ rc = ib_post_send(ia->ri_id->qp, send_wr, &send_wr_fail);
if (rc)
goto out_postsend_err;
return 0;
@@ -1290,32 +1328,24 @@ out_postsend_err:
return -ENOTCONN;
}
-/*
- * (Re)post a receive buffer.
- */
int
rpcrdma_ep_post_recv(struct rpcrdma_ia *ia,
- struct rpcrdma_ep *ep,
struct rpcrdma_rep *rep)
{
- struct ib_recv_wr recv_wr, *recv_wr_fail;
+ struct ib_recv_wr *recv_wr_fail;
int rc;
- recv_wr.next = NULL;
- recv_wr.wr_cqe = &rep->rr_cqe;
- recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov;
- recv_wr.num_sge = 1;
-
- ib_dma_sync_single_for_cpu(ia->ri_device,
- rdmab_addr(rep->rr_rdmabuf),
- rdmab_length(rep->rr_rdmabuf),
- DMA_BIDIRECTIONAL);
-
- rc = ib_post_recv(ia->ri_id->qp, &recv_wr, &recv_wr_fail);
+ if (!rpcrdma_dma_map_regbuf(ia, rep->rr_rdmabuf))
+ goto out_map;
+ rc = ib_post_recv(ia->ri_id->qp, &rep->rr_recv_wr, &recv_wr_fail);
if (rc)
goto out_postrecv;
return 0;
+out_map:
+ pr_err("rpcrdma: failed to DMA map the Receive buffer\n");
+ return -EIO;
+
out_postrecv:
pr_err("rpcrdma: ib_post_recv returned %i\n", rc);
return -ENOTCONN;
@@ -1333,7 +1363,6 @@ rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *r_xprt, unsigned int count)
{
struct rpcrdma_buffer *buffers = &r_xprt->rx_buf;
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
- struct rpcrdma_ep *ep = &r_xprt->rx_ep;
struct rpcrdma_rep *rep;
int rc;
@@ -1344,7 +1373,7 @@ rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *r_xprt, unsigned int count)
rep = rpcrdma_buffer_get_rep_locked(buffers);
spin_unlock(&buffers->rb_lock);
- rc = rpcrdma_ep_post_recv(ia, ep, rep);
+ rc = rpcrdma_ep_post_recv(ia, rep);
if (rc)
goto out_rc;
}
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index a71b0f5897d8..0d35b761c883 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -70,9 +70,11 @@ struct rpcrdma_ia {
struct ib_pd *ri_pd;
struct completion ri_done;
int ri_async_rc;
+ unsigned int ri_max_segs;
unsigned int ri_max_frmr_depth;
unsigned int ri_max_inline_write;
unsigned int ri_max_inline_read;
+ bool ri_reminv_expected;
struct ib_qp_attr ri_qp_attr;
struct ib_qp_init_attr ri_qp_init_attr;
};
@@ -87,6 +89,7 @@ struct rpcrdma_ep {
int rep_connected;
struct ib_qp_init_attr rep_attr;
wait_queue_head_t rep_connect_wait;
+ struct rpcrdma_connect_private rep_cm_private;
struct rdma_conn_param rep_remote_cma;
struct sockaddr_storage rep_remote_addr;
struct delayed_work rep_connect_worker;
@@ -112,9 +115,9 @@ struct rpcrdma_ep {
*/
struct rpcrdma_regbuf {
- size_t rg_size;
- struct rpcrdma_req *rg_owner;
struct ib_sge rg_iov;
+ struct ib_device *rg_device;
+ enum dma_data_direction rg_direction;
__be32 rg_base[0] __attribute__ ((aligned(256)));
};
@@ -162,7 +165,10 @@ rdmab_to_msg(struct rpcrdma_regbuf *rb)
* The smallest inline threshold is 1024 bytes, ensuring that
* at least 750 bytes are available for RPC messages.
*/
-#define RPCRDMA_MAX_HDR_SEGS (8)
+enum {
+ RPCRDMA_MAX_HDR_SEGS = 8,
+ RPCRDMA_HDRBUF_SIZE = 256,
+};
/*
* struct rpcrdma_rep -- this structure encapsulates state required to recv
@@ -182,10 +188,13 @@ rdmab_to_msg(struct rpcrdma_regbuf *rb)
struct rpcrdma_rep {
struct ib_cqe rr_cqe;
unsigned int rr_len;
+ int rr_wc_flags;
+ u32 rr_inv_rkey;
struct ib_device *rr_device;
struct rpcrdma_xprt *rr_rxprt;
struct work_struct rr_work;
struct list_head rr_list;
+ struct ib_recv_wr rr_recv_wr;
struct rpcrdma_regbuf *rr_rdmabuf;
};
@@ -276,19 +285,30 @@ struct rpcrdma_mr_seg { /* chunk descriptors */
char *mr_offset; /* kva if no page, else offset */
};
-#define RPCRDMA_MAX_IOVS (2)
+/* Reserve enough Send SGEs to send a maximum size inline request:
+ * - RPC-over-RDMA header
+ * - xdr_buf head iovec
+ * - RPCRDMA_MAX_INLINE bytes, possibly unaligned, in pages
+ * - xdr_buf tail iovec
+ */
+enum {
+ RPCRDMA_MAX_SEND_PAGES = PAGE_SIZE + RPCRDMA_MAX_INLINE - 1,
+ RPCRDMA_MAX_PAGE_SGES = (RPCRDMA_MAX_SEND_PAGES >> PAGE_SHIFT) + 1,
+ RPCRDMA_MAX_SEND_SGES = 1 + 1 + RPCRDMA_MAX_PAGE_SGES + 1,
+};
struct rpcrdma_buffer;
struct rpcrdma_req {
struct list_head rl_free;
- unsigned int rl_niovs;
+ unsigned int rl_mapped_sges;
unsigned int rl_connect_cookie;
- struct rpc_task *rl_task;
struct rpcrdma_buffer *rl_buffer;
- struct rpcrdma_rep *rl_reply;/* holder for reply buffer */
- struct ib_sge rl_send_iov[RPCRDMA_MAX_IOVS];
- struct rpcrdma_regbuf *rl_rdmabuf;
- struct rpcrdma_regbuf *rl_sendbuf;
+ struct rpcrdma_rep *rl_reply;
+ struct ib_send_wr rl_send_wr;
+ struct ib_sge rl_send_sge[RPCRDMA_MAX_SEND_SGES];
+ struct rpcrdma_regbuf *rl_rdmabuf; /* xprt header */
+ struct rpcrdma_regbuf *rl_sendbuf; /* rq_snd_buf */
+ struct rpcrdma_regbuf *rl_recvbuf; /* rq_rcv_buf */
struct ib_cqe rl_cqe;
struct list_head rl_all;
@@ -298,14 +318,16 @@ struct rpcrdma_req {
struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS];
};
+static inline void
+rpcrdma_set_xprtdata(struct rpc_rqst *rqst, struct rpcrdma_req *req)
+{
+ rqst->rq_xprtdata = req;
+}
+
static inline struct rpcrdma_req *
rpcr_to_rdmar(struct rpc_rqst *rqst)
{
- void *buffer = rqst->rq_buffer;
- struct rpcrdma_regbuf *rb;
-
- rb = container_of(buffer, struct rpcrdma_regbuf, rg_base);
- return rb->rg_owner;
+ return rqst->rq_xprtdata;
}
/*
@@ -356,15 +378,6 @@ struct rpcrdma_create_data_internal {
unsigned int padding; /* non-rdma write header padding */
};
-#define RPCRDMA_INLINE_READ_THRESHOLD(rq) \
- (rpcx_to_rdmad(rq->rq_xprt).inline_rsize)
-
-#define RPCRDMA_INLINE_WRITE_THRESHOLD(rq)\
- (rpcx_to_rdmad(rq->rq_xprt).inline_wsize)
-
-#define RPCRDMA_INLINE_PAD_VALUE(rq)\
- rpcx_to_rdmad(rq->rq_xprt).padding
-
/*
* Statistics for RPCRDMA
*/
@@ -386,6 +399,7 @@ struct rpcrdma_stats {
unsigned long mrs_recovered;
unsigned long mrs_orphaned;
unsigned long mrs_allocated;
+ unsigned long local_inv_needed;
};
/*
@@ -409,6 +423,7 @@ struct rpcrdma_memreg_ops {
struct rpcrdma_mw *);
void (*ro_release_mr)(struct rpcrdma_mw *);
const char *ro_displayname;
+ const int ro_send_w_inv_ok;
};
extern const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops;
@@ -461,15 +476,14 @@ void rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *);
int rpcrdma_ep_post(struct rpcrdma_ia *, struct rpcrdma_ep *,
struct rpcrdma_req *);
-int rpcrdma_ep_post_recv(struct rpcrdma_ia *, struct rpcrdma_ep *,
- struct rpcrdma_rep *);
+int rpcrdma_ep_post_recv(struct rpcrdma_ia *, struct rpcrdma_rep *);
/*
* Buffer calls - xprtrdma/verbs.c
*/
struct rpcrdma_req *rpcrdma_create_req(struct rpcrdma_xprt *);
struct rpcrdma_rep *rpcrdma_create_rep(struct rpcrdma_xprt *);
-void rpcrdma_destroy_req(struct rpcrdma_ia *, struct rpcrdma_req *);
+void rpcrdma_destroy_req(struct rpcrdma_req *);
int rpcrdma_buffer_create(struct rpcrdma_xprt *);
void rpcrdma_buffer_destroy(struct rpcrdma_buffer *);
@@ -482,10 +496,24 @@ void rpcrdma_recv_buffer_put(struct rpcrdma_rep *);
void rpcrdma_defer_mr_recovery(struct rpcrdma_mw *);
-struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(struct rpcrdma_ia *,
- size_t, gfp_t);
-void rpcrdma_free_regbuf(struct rpcrdma_ia *,
- struct rpcrdma_regbuf *);
+struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(size_t, enum dma_data_direction,
+ gfp_t);
+bool __rpcrdma_dma_map_regbuf(struct rpcrdma_ia *, struct rpcrdma_regbuf *);
+void rpcrdma_free_regbuf(struct rpcrdma_regbuf *);
+
+static inline bool
+rpcrdma_regbuf_is_mapped(struct rpcrdma_regbuf *rb)
+{
+ return rb->rg_device != NULL;
+}
+
+static inline bool
+rpcrdma_dma_map_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb)
+{
+ if (likely(rpcrdma_regbuf_is_mapped(rb)))
+ return true;
+ return __rpcrdma_dma_map_regbuf(ia, rb);
+}
int rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *, unsigned int);
@@ -507,15 +535,25 @@ rpcrdma_data_dir(bool writing)
*/
void rpcrdma_connect_worker(struct work_struct *);
void rpcrdma_conn_func(struct rpcrdma_ep *);
-void rpcrdma_reply_handler(struct rpcrdma_rep *);
+void rpcrdma_reply_handler(struct work_struct *);
/*
* RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c
*/
+
+enum rpcrdma_chunktype {
+ rpcrdma_noch = 0,
+ rpcrdma_readch,
+ rpcrdma_areadch,
+ rpcrdma_writech,
+ rpcrdma_replych
+};
+
+bool rpcrdma_prepare_send_sges(struct rpcrdma_ia *, struct rpcrdma_req *,
+ u32, struct xdr_buf *, enum rpcrdma_chunktype);
+void rpcrdma_unmap_sges(struct rpcrdma_ia *, struct rpcrdma_req *);
int rpcrdma_marshal_req(struct rpc_rqst *);
-void rpcrdma_set_max_header_sizes(struct rpcrdma_ia *,
- struct rpcrdma_create_data_internal *,
- unsigned int);
+void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *);
/* RPC/RDMA module init - xprtrdma/transport.c
*/
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index bf168838a029..1758665d609c 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -473,7 +473,16 @@ static int xs_nospace(struct rpc_task *task)
spin_unlock_bh(&xprt->transport_lock);
/* Race breaker in case memory is freed before above code is called */
- sk->sk_write_space(sk);
+ if (ret == -EAGAIN) {
+ struct socket_wq *wq;
+
+ rcu_read_lock();
+ wq = rcu_dereference(sk->sk_wq);
+ set_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags);
+ rcu_read_unlock();
+
+ sk->sk_write_space(sk);
+ }
return ret;
}
@@ -1074,7 +1083,7 @@ static void xs_udp_data_receive(struct sock_xprt *transport)
skb = skb_recv_datagram(sk, 0, 1, &err);
if (skb != NULL) {
xs_udp_data_read_skb(&transport->xprt, sk, skb);
- skb_free_datagram_locked(sk, skb);
+ consume_skb(skb);
continue;
}
if (!test_and_clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state))
@@ -2533,35 +2542,38 @@ static void xs_tcp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
* we allocate pages instead doing a kmalloc like rpc_malloc is because we want
* to use the server side send routines.
*/
-static void *bc_malloc(struct rpc_task *task, size_t size)
+static int bc_malloc(struct rpc_task *task)
{
+ struct rpc_rqst *rqst = task->tk_rqstp;
+ size_t size = rqst->rq_callsize;
struct page *page;
struct rpc_buffer *buf;
- WARN_ON_ONCE(size > PAGE_SIZE - sizeof(struct rpc_buffer));
- if (size > PAGE_SIZE - sizeof(struct rpc_buffer))
- return NULL;
+ if (size > PAGE_SIZE - sizeof(struct rpc_buffer)) {
+ WARN_ONCE(1, "xprtsock: large bc buffer request (size %zu)\n",
+ size);
+ return -EINVAL;
+ }
page = alloc_page(GFP_KERNEL);
if (!page)
- return NULL;
+ return -ENOMEM;
buf = page_address(page);
buf->len = PAGE_SIZE;
- return buf->data;
+ rqst->rq_buffer = buf->data;
+ return 0;
}
/*
* Free the space allocated in the bc_alloc routine
*/
-static void bc_free(void *buffer)
+static void bc_free(struct rpc_task *task)
{
+ void *buffer = task->tk_rqstp->rq_buffer;
struct rpc_buffer *buf;
- if (!buffer)
- return;
-
buf = container_of(buffer, struct rpc_buffer, data);
free_page((unsigned long)buf);
}
diff --git a/net/sysctl_net.c b/net/sysctl_net.c
index 5bc1a3d57401..919981324171 100644
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c
@@ -27,9 +27,9 @@
#endif
static struct ctl_table_set *
-net_ctl_header_lookup(struct ctl_table_root *root, struct nsproxy *namespaces)
+net_ctl_header_lookup(struct ctl_table_root *root)
{
- return &namespaces->net_ns->sysctls;
+ return &current->nsproxy->net_ns->sysctls;
}
static int is_seen(struct ctl_table_set *set)
@@ -44,7 +44,7 @@ static int net_ctl_permissions(struct ctl_table_header *head,
struct net *net = container_of(head->set, struct net, sysctls);
/* Allow network administrator to have same access as root. */
- if (ns_capable(net->user_ns, CAP_NET_ADMIN)) {
+ if (ns_capable_noaudit(net->user_ns, CAP_NET_ADMIN)) {
int mode = (table->mode >> 6) & 7;
return (mode << 6) | (mode << 3) | mode;
}
diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c
index 3200059d14b2..26ca8dd64ded 100644
--- a/net/tipc/netlink.c
+++ b/net/tipc/netlink.c
@@ -135,15 +135,6 @@ const struct nla_policy tipc_nl_udp_policy[TIPC_NLA_UDP_MAX + 1] = {
/* Users of the legacy API (tipc-config) can't handle that we add operations,
* so we have a separate genl handling for the new API.
*/
-struct genl_family tipc_genl_family = {
- .id = GENL_ID_GENERATE,
- .name = TIPC_GENL_V2_NAME,
- .version = TIPC_GENL_V2_VERSION,
- .hdrsize = 0,
- .maxattr = TIPC_NLA_MAX,
- .netnsok = true,
-};
-
static const struct genl_ops tipc_genl_v2_ops[] = {
{
.cmd = TIPC_NL_BEARER_DISABLE,
@@ -258,23 +249,33 @@ static const struct genl_ops tipc_genl_v2_ops[] = {
#endif
};
+struct genl_family tipc_genl_family __ro_after_init = {
+ .name = TIPC_GENL_V2_NAME,
+ .version = TIPC_GENL_V2_VERSION,
+ .hdrsize = 0,
+ .maxattr = TIPC_NLA_MAX,
+ .netnsok = true,
+ .module = THIS_MODULE,
+ .ops = tipc_genl_v2_ops,
+ .n_ops = ARRAY_SIZE(tipc_genl_v2_ops),
+};
+
int tipc_nlmsg_parse(const struct nlmsghdr *nlh, struct nlattr ***attr)
{
u32 maxattr = tipc_genl_family.maxattr;
- *attr = tipc_genl_family.attrbuf;
+ *attr = genl_family_attrbuf(&tipc_genl_family);
if (!*attr)
return -EOPNOTSUPP;
return nlmsg_parse(nlh, GENL_HDRLEN, *attr, maxattr, tipc_nl_policy);
}
-int tipc_netlink_start(void)
+int __init tipc_netlink_start(void)
{
int res;
- res = genl_register_family_with_ops(&tipc_genl_family,
- tipc_genl_v2_ops);
+ res = genl_register_family(&tipc_genl_family);
if (res) {
pr_err("Failed to register netlink interface\n");
return res;
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index 1fd464764765..e1ae8a8a2b8e 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -1215,15 +1215,6 @@ send:
return err;
}
-static struct genl_family tipc_genl_compat_family = {
- .id = GENL_ID_GENERATE,
- .name = TIPC_GENL_NAME,
- .version = TIPC_GENL_VERSION,
- .hdrsize = TIPC_GENL_HDRLEN,
- .maxattr = 0,
- .netnsok = true,
-};
-
static struct genl_ops tipc_genl_compat_ops[] = {
{
.cmd = TIPC_GENL_CMD,
@@ -1231,12 +1222,22 @@ static struct genl_ops tipc_genl_compat_ops[] = {
},
};
-int tipc_netlink_compat_start(void)
+static struct genl_family tipc_genl_compat_family __ro_after_init = {
+ .name = TIPC_GENL_NAME,
+ .version = TIPC_GENL_VERSION,
+ .hdrsize = TIPC_GENL_HDRLEN,
+ .maxattr = 0,
+ .netnsok = true,
+ .module = THIS_MODULE,
+ .ops = tipc_genl_compat_ops,
+ .n_ops = ARRAY_SIZE(tipc_genl_compat_ops),
+};
+
+int __init tipc_netlink_compat_start(void)
{
int res;
- res = genl_register_family_with_ops(&tipc_genl_compat_family,
- tipc_genl_compat_ops);
+ res = genl_register_family(&tipc_genl_compat_family);
if (res) {
pr_err("Failed to register legacy compat interface\n");
return res;
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index d80cd3f7503f..78cab9c5a445 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -407,6 +407,7 @@ static int __tipc_nl_add_udp_addr(struct sk_buff *skb,
if (ntohs(addr->proto) == ETH_P_IP) {
struct sockaddr_in ip4;
+ memset(&ip4, 0, sizeof(ip4));
ip4.sin_family = AF_INET;
ip4.sin_port = addr->port;
ip4.sin_addr.s_addr = addr->ipv4.s_addr;
@@ -417,6 +418,7 @@ static int __tipc_nl_add_udp_addr(struct sk_buff *skb,
} else if (ntohs(addr->proto) == ETH_P_IPV6) {
struct sockaddr_in6 ip6;
+ memset(&ip6, 0, sizeof(ip6));
ip6.sin6_family = AF_INET6;
ip6.sin6_port = addr->port;
memcpy(&ip6.sin6_addr, &addr->ipv6, sizeof(struct in6_addr));
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 8309687a56b0..145082e2ba36 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -2475,28 +2475,13 @@ static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
return unix_stream_read_generic(&state);
}
-static ssize_t skb_unix_socket_splice(struct sock *sk,
- struct pipe_inode_info *pipe,
- struct splice_pipe_desc *spd)
-{
- int ret;
- struct unix_sock *u = unix_sk(sk);
-
- mutex_unlock(&u->iolock);
- ret = splice_to_pipe(pipe, spd);
- mutex_lock(&u->iolock);
-
- return ret;
-}
-
static int unix_stream_splice_actor(struct sk_buff *skb,
int skip, int chunk,
struct unix_stream_read_state *state)
{
return skb_splice_bits(skb, state->socket->sk,
UNIXCB(skb).consumed + skip,
- state->pipe, chunk, state->splice_flags,
- skb_unix_socket_splice);
+ state->pipe, chunk, state->splice_flags);
}
static ssize_t unix_stream_splice_read(struct socket *sock, loff_t *ppos,
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 17dbbe64cd73..8a398b3fb532 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -465,6 +465,8 @@ void vsock_pending_work(struct work_struct *work)
if (vsock_is_pending(sk)) {
vsock_remove_pending(listener, sk);
+
+ listener->sk_ack_backlog--;
} else if (!vsk->rejected) {
/* We are not on the pending list and accept() did not reject
* us, so we must have been accepted by our user process. We
@@ -475,8 +477,6 @@ void vsock_pending_work(struct work_struct *work)
goto out;
}
- listener->sk_ack_backlog--;
-
/* We need to remove ourself from the global connected sockets list so
* incoming packets can't find this socket, and to reduce the reference
* count.
@@ -2010,5 +2010,5 @@ EXPORT_SYMBOL_GPL(vsock_core_get_transport);
MODULE_AUTHOR("VMware, Inc.");
MODULE_DESCRIPTION("VMware Virtual Socket Family");
-MODULE_VERSION("1.0.1.0-k");
+MODULE_VERSION("1.0.2.0-k");
MODULE_LICENSE("GPL v2");
diff --git a/net/wimax/stack.c b/net/wimax/stack.c
index 3f816e2971ee..5db731512014 100644
--- a/net/wimax/stack.c
+++ b/net/wimax/stack.c
@@ -572,16 +572,20 @@ struct d_level D_LEVEL[] = {
size_t D_LEVEL_SIZE = ARRAY_SIZE(D_LEVEL);
-struct genl_family wimax_gnl_family = {
- .id = GENL_ID_GENERATE,
+static const struct genl_multicast_group wimax_gnl_mcgrps[] = {
+ { .name = "msg", },
+};
+
+struct genl_family wimax_gnl_family __ro_after_init = {
.name = "WiMAX",
.version = WIMAX_GNL_VERSION,
.hdrsize = 0,
.maxattr = WIMAX_GNL_ATTR_MAX,
-};
-
-static const struct genl_multicast_group wimax_gnl_mcgrps[] = {
- { .name = "msg", },
+ .module = THIS_MODULE,
+ .ops = wimax_gnl_ops,
+ .n_ops = ARRAY_SIZE(wimax_gnl_ops),
+ .mcgrps = wimax_gnl_mcgrps,
+ .n_mcgrps = ARRAY_SIZE(wimax_gnl_mcgrps),
};
@@ -596,11 +600,7 @@ int __init wimax_subsys_init(void)
d_parse_params(D_LEVEL, D_LEVEL_SIZE, wimax_debug_params,
"wimax.debug");
- snprintf(wimax_gnl_family.name, sizeof(wimax_gnl_family.name),
- "WiMAX");
- result = genl_register_family_with_ops_groups(&wimax_gnl_family,
- wimax_gnl_ops,
- wimax_gnl_mcgrps);
+ result = genl_register_family(&wimax_gnl_family);
if (unlikely(result < 0)) {
pr_err("cannot register generic netlink family: %d\n", result);
goto error_register_family;
diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index 0f506220a3bd..5497d022fada 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -372,6 +372,7 @@ int cfg80211_chandef_dfs_required(struct wiphy *wiphy,
case NL80211_IFTYPE_AP_VLAN:
case NL80211_IFTYPE_WDS:
case NL80211_IFTYPE_P2P_DEVICE:
+ case NL80211_IFTYPE_NAN:
break;
case NL80211_IFTYPE_UNSPECIFIED:
case NUM_NL80211_IFTYPES:
@@ -946,6 +947,7 @@ cfg80211_get_chan_state(struct wireless_dev *wdev,
case NL80211_IFTYPE_AP_VLAN:
case NL80211_IFTYPE_WDS:
case NL80211_IFTYPE_P2P_DEVICE:
+ case NL80211_IFTYPE_NAN:
/* these interface types don't really have a channel */
return;
case NL80211_IFTYPE_UNSPECIFIED:
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 4911cd997b9a..8201e6d7449e 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -225,6 +225,23 @@ void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev,
}
}
+void cfg80211_stop_nan(struct cfg80211_registered_device *rdev,
+ struct wireless_dev *wdev)
+{
+ ASSERT_RTNL();
+
+ if (WARN_ON(wdev->iftype != NL80211_IFTYPE_NAN))
+ return;
+
+ if (!wdev->nan_started)
+ return;
+
+ rdev_stop_nan(rdev, wdev);
+ wdev->nan_started = false;
+
+ rdev->opencount--;
+}
+
void cfg80211_shutdown_all_interfaces(struct wiphy *wiphy)
{
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
@@ -242,6 +259,9 @@ void cfg80211_shutdown_all_interfaces(struct wiphy *wiphy)
case NL80211_IFTYPE_P2P_DEVICE:
cfg80211_stop_p2p_device(rdev, wdev);
break;
+ case NL80211_IFTYPE_NAN:
+ cfg80211_stop_nan(rdev, wdev);
+ break;
default:
break;
}
@@ -537,6 +557,11 @@ static int wiphy_verify_combinations(struct wiphy *wiphy)
c->limits[j].max > 1))
return -EINVAL;
+ /* Only a single NAN can be allowed */
+ if (WARN_ON(types & BIT(NL80211_IFTYPE_NAN) &&
+ c->limits[j].max > 1))
+ return -EINVAL;
+
cnt += c->limits[j].max;
/*
* Don't advertise an unsupported type
@@ -579,6 +604,11 @@ int wiphy_register(struct wiphy *wiphy)
!rdev->ops->tdls_cancel_channel_switch)))
return -EINVAL;
+ if (WARN_ON((wiphy->interface_modes & BIT(NL80211_IFTYPE_NAN)) &&
+ (!rdev->ops->start_nan || !rdev->ops->stop_nan ||
+ !rdev->ops->add_nan_func || !rdev->ops->del_nan_func)))
+ return -EINVAL;
+
/*
* if a wiphy has unsupported modes for regulatory channel enforcement,
* opt-out of enforcement checking
@@ -589,6 +619,7 @@ int wiphy_register(struct wiphy *wiphy)
BIT(NL80211_IFTYPE_P2P_GO) |
BIT(NL80211_IFTYPE_ADHOC) |
BIT(NL80211_IFTYPE_P2P_DEVICE) |
+ BIT(NL80211_IFTYPE_NAN) |
BIT(NL80211_IFTYPE_AP_VLAN) |
BIT(NL80211_IFTYPE_MONITOR)))
wiphy->regulatory_flags |= REGULATORY_IGNORE_STALE_KICKOFF;
@@ -916,6 +947,9 @@ void cfg80211_unregister_wdev(struct wireless_dev *wdev)
cfg80211_mlme_purge_registrations(wdev);
cfg80211_stop_p2p_device(rdev, wdev);
break;
+ case NL80211_IFTYPE_NAN:
+ cfg80211_stop_nan(rdev, wdev);
+ break;
default:
WARN_ON_ONCE(1);
break;
@@ -979,6 +1013,7 @@ void __cfg80211_leave(struct cfg80211_registered_device *rdev,
/* must be handled by mac80211/driver, has no APIs */
break;
case NL80211_IFTYPE_P2P_DEVICE:
+ case NL80211_IFTYPE_NAN:
/* cannot happen, has no netdev */
break;
case NL80211_IFTYPE_AP_VLAN:
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 5555e3c13ae9..08d2e948c9ad 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -249,8 +249,8 @@ struct cfg80211_event {
};
struct cfg80211_cached_keys {
- struct key_params params[4];
- u8 data[4][WLAN_KEY_LEN_WEP104];
+ struct key_params params[CFG80211_MAX_WEP_KEYS];
+ u8 data[CFG80211_MAX_WEP_KEYS][WLAN_KEY_LEN_WEP104];
int def;
};
@@ -488,6 +488,9 @@ void cfg80211_leave(struct cfg80211_registered_device *rdev,
void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev,
struct wireless_dev *wdev);
+void cfg80211_stop_nan(struct cfg80211_registered_device *rdev,
+ struct wireless_dev *wdev);
+
#define CFG80211_MAX_NUM_DIFFERENT_CHANNELS 10
#ifdef CONFIG_CFG80211_DEVELOPER_WARNINGS
diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c
index eafdfa5798ae..364f900a3dc4 100644
--- a/net/wireless/ibss.c
+++ b/net/wireless/ibss.c
@@ -43,7 +43,8 @@ void __cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid,
cfg80211_hold_bss(bss_from_pub(bss));
wdev->current_bss = bss_from_pub(bss);
- cfg80211_upload_connect_keys(wdev);
+ if (!(wdev->wiphy->flags & WIPHY_FLAG_HAS_STATIC_WEP))
+ cfg80211_upload_connect_keys(wdev);
nl80211_send_ibss_bssid(wiphy_to_rdev(wdev->wiphy), dev, bssid,
GFP_KERNEL);
@@ -296,7 +297,7 @@ int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev,
ck = kmemdup(wdev->wext.keys, sizeof(*ck), GFP_KERNEL);
if (!ck)
return -ENOMEM;
- for (i = 0; i < 4; i++)
+ for (i = 0; i < CFG80211_MAX_WEP_KEYS; i++)
ck->params[i].key = ck->data[i];
}
err = __cfg80211_join_ibss(rdev, wdev->netdev,
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index d6abb0704db5..cbb48e26a871 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -634,6 +634,7 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev,
* fall through, P2P device only supports
* public action frames
*/
+ case NL80211_IFTYPE_NAN:
default:
err = -EOPNOTSUPP;
break;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index fd111e2b559d..271707dacfea 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -32,22 +32,8 @@ static int nl80211_crypto_settings(struct cfg80211_registered_device *rdev,
struct cfg80211_crypto_settings *settings,
int cipher_limit);
-static int nl80211_pre_doit(const struct genl_ops *ops, struct sk_buff *skb,
- struct genl_info *info);
-static void nl80211_post_doit(const struct genl_ops *ops, struct sk_buff *skb,
- struct genl_info *info);
-
/* the netlink family */
-static struct genl_family nl80211_fam = {
- .id = GENL_ID_GENERATE, /* don't bother with a hardcoded ID */
- .name = NL80211_GENL_NAME, /* have users key off the name instead */
- .hdrsize = 0, /* no private header */
- .version = 1, /* no particular meaning now */
- .maxattr = NL80211_ATTR_MAX,
- .netnsok = true,
- .pre_doit = nl80211_pre_doit,
- .post_doit = nl80211_post_doit,
-};
+static struct genl_family nl80211_fam;
/* multicast groups */
enum nl80211_multicast_groups {
@@ -56,6 +42,7 @@ enum nl80211_multicast_groups {
NL80211_MCGRP_REGULATORY,
NL80211_MCGRP_MLME,
NL80211_MCGRP_VENDOR,
+ NL80211_MCGRP_NAN,
NL80211_MCGRP_TESTMODE /* keep last - ifdef! */
};
@@ -65,6 +52,7 @@ static const struct genl_multicast_group nl80211_mcgrps[] = {
[NL80211_MCGRP_REGULATORY] = { .name = NL80211_MULTICAST_GROUP_REG },
[NL80211_MCGRP_MLME] = { .name = NL80211_MULTICAST_GROUP_MLME },
[NL80211_MCGRP_VENDOR] = { .name = NL80211_MULTICAST_GROUP_VENDOR },
+ [NL80211_MCGRP_NAN] = { .name = NL80211_MULTICAST_GROUP_NAN },
#ifdef CONFIG_NL80211_TESTMODE
[NL80211_MCGRP_TESTMODE] = { .name = NL80211_MULTICAST_GROUP_TESTMODE }
#endif
@@ -409,6 +397,9 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
.len = VHT_MUMIMO_GROUPS_DATA_LEN
},
[NL80211_ATTR_MU_MIMO_FOLLOW_MAC_ADDR] = { .len = ETH_ALEN },
+ [NL80211_ATTR_NAN_MASTER_PREF] = { .type = NLA_U8 },
+ [NL80211_ATTR_NAN_DUAL] = { .type = NLA_U8 },
+ [NL80211_ATTR_NAN_FUNC] = { .type = NLA_NESTED },
};
/* policy for the key attributes */
@@ -502,6 +493,39 @@ nl80211_bss_select_policy[NL80211_BSS_SELECT_ATTR_MAX + 1] = {
},
};
+/* policy for NAN function attributes */
+static const struct nla_policy
+nl80211_nan_func_policy[NL80211_NAN_FUNC_ATTR_MAX + 1] = {
+ [NL80211_NAN_FUNC_TYPE] = { .type = NLA_U8 },
+ [NL80211_NAN_FUNC_SERVICE_ID] = { .type = NLA_BINARY,
+ .len = NL80211_NAN_FUNC_SERVICE_ID_LEN },
+ [NL80211_NAN_FUNC_PUBLISH_TYPE] = { .type = NLA_U8 },
+ [NL80211_NAN_FUNC_PUBLISH_BCAST] = { .type = NLA_FLAG },
+ [NL80211_NAN_FUNC_SUBSCRIBE_ACTIVE] = { .type = NLA_FLAG },
+ [NL80211_NAN_FUNC_FOLLOW_UP_ID] = { .type = NLA_U8 },
+ [NL80211_NAN_FUNC_FOLLOW_UP_REQ_ID] = { .type = NLA_U8 },
+ [NL80211_NAN_FUNC_FOLLOW_UP_DEST] = { .len = ETH_ALEN },
+ [NL80211_NAN_FUNC_CLOSE_RANGE] = { .type = NLA_FLAG },
+ [NL80211_NAN_FUNC_TTL] = { .type = NLA_U32 },
+ [NL80211_NAN_FUNC_SERVICE_INFO] = { .type = NLA_BINARY,
+ .len = NL80211_NAN_FUNC_SERVICE_SPEC_INFO_MAX_LEN },
+ [NL80211_NAN_FUNC_SRF] = { .type = NLA_NESTED },
+ [NL80211_NAN_FUNC_RX_MATCH_FILTER] = { .type = NLA_NESTED },
+ [NL80211_NAN_FUNC_TX_MATCH_FILTER] = { .type = NLA_NESTED },
+ [NL80211_NAN_FUNC_INSTANCE_ID] = { .type = NLA_U8 },
+ [NL80211_NAN_FUNC_TERM_REASON] = { .type = NLA_U8 },
+};
+
+/* policy for Service Response Filter attributes */
+static const struct nla_policy
+nl80211_nan_srf_policy[NL80211_NAN_SRF_ATTR_MAX + 1] = {
+ [NL80211_NAN_SRF_INCLUDE] = { .type = NLA_FLAG },
+ [NL80211_NAN_SRF_BF] = { .type = NLA_BINARY,
+ .len = NL80211_NAN_FUNC_SRF_MAX_LEN },
+ [NL80211_NAN_SRF_BF_IDX] = { .type = NLA_U8 },
+ [NL80211_NAN_SRF_MAC_ADDRS] = { .type = NLA_NESTED },
+};
+
static int nl80211_prepare_wdev_dump(struct sk_buff *skb,
struct netlink_callback *cb,
struct cfg80211_registered_device **rdev,
@@ -513,13 +537,14 @@ static int nl80211_prepare_wdev_dump(struct sk_buff *skb,
if (!cb->args[0]) {
err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize,
- nl80211_fam.attrbuf, nl80211_fam.maxattr,
- nl80211_policy);
+ genl_family_attrbuf(&nl80211_fam),
+ nl80211_fam.maxattr, nl80211_policy);
if (err)
goto out_unlock;
- *wdev = __cfg80211_wdev_from_attrs(sock_net(skb->sk),
- nl80211_fam.attrbuf);
+ *wdev = __cfg80211_wdev_from_attrs(
+ sock_net(skb->sk),
+ genl_family_attrbuf(&nl80211_fam));
if (IS_ERR(*wdev)) {
err = PTR_ERR(*wdev);
goto out_unlock;
@@ -934,6 +959,7 @@ static int nl80211_key_allowed(struct wireless_dev *wdev)
case NL80211_IFTYPE_UNSPECIFIED:
case NL80211_IFTYPE_OCB:
case NL80211_IFTYPE_MONITOR:
+ case NL80211_IFTYPE_NAN:
case NL80211_IFTYPE_P2P_DEVICE:
case NL80211_IFTYPE_WDS:
case NUM_NL80211_IFTYPES:
@@ -1842,7 +1868,7 @@ static int nl80211_dump_wiphy_parse(struct sk_buff *skb,
struct netlink_callback *cb,
struct nl80211_dump_wiphy_state *state)
{
- struct nlattr **tb = nl80211_fam.attrbuf;
+ struct nlattr **tb = genl_family_attrbuf(&nl80211_fam);
int ret = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize,
tb, nl80211_fam.maxattr, nl80211_policy);
/* ignore parse errors for backward compatibility */
@@ -2819,7 +2845,7 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info)
!(rdev->wiphy.interface_modes & (1 << type)))
return -EOPNOTSUPP;
- if ((type == NL80211_IFTYPE_P2P_DEVICE ||
+ if ((type == NL80211_IFTYPE_P2P_DEVICE || type == NL80211_IFTYPE_NAN ||
rdev->wiphy.features & NL80211_FEATURE_MAC_ON_CREATE) &&
info->attrs[NL80211_ATTR_MAC]) {
nla_memcpy(params.macaddr, info->attrs[NL80211_ATTR_MAC],
@@ -2875,9 +2901,10 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info)
wdev->mesh_id_up_len);
wdev_unlock(wdev);
break;
+ case NL80211_IFTYPE_NAN:
case NL80211_IFTYPE_P2P_DEVICE:
/*
- * P2P Device doesn't have a netdev, so doesn't go
+ * P2P Device and NAN do not have a netdev, so don't go
* through the netdev notifier and must be added here
*/
mutex_init(&wdev->mtx);
@@ -3340,6 +3367,291 @@ static int nl80211_set_mac_acl(struct sk_buff *skb, struct genl_info *info)
return err;
}
+static u32 rateset_to_mask(struct ieee80211_supported_band *sband,
+ u8 *rates, u8 rates_len)
+{
+ u8 i;
+ u32 mask = 0;
+
+ for (i = 0; i < rates_len; i++) {
+ int rate = (rates[i] & 0x7f) * 5;
+ int ridx;
+
+ for (ridx = 0; ridx < sband->n_bitrates; ridx++) {
+ struct ieee80211_rate *srate =
+ &sband->bitrates[ridx];
+ if (rate == srate->bitrate) {
+ mask |= 1 << ridx;
+ break;
+ }
+ }
+ if (ridx == sband->n_bitrates)
+ return 0; /* rate not found */
+ }
+
+ return mask;
+}
+
+static bool ht_rateset_to_mask(struct ieee80211_supported_band *sband,
+ u8 *rates, u8 rates_len,
+ u8 mcs[IEEE80211_HT_MCS_MASK_LEN])
+{
+ u8 i;
+
+ memset(mcs, 0, IEEE80211_HT_MCS_MASK_LEN);
+
+ for (i = 0; i < rates_len; i++) {
+ int ridx, rbit;
+
+ ridx = rates[i] / 8;
+ rbit = BIT(rates[i] % 8);
+
+ /* check validity */
+ if ((ridx < 0) || (ridx >= IEEE80211_HT_MCS_MASK_LEN))
+ return false;
+
+ /* check availability */
+ if (sband->ht_cap.mcs.rx_mask[ridx] & rbit)
+ mcs[ridx] |= rbit;
+ else
+ return false;
+ }
+
+ return true;
+}
+
+static u16 vht_mcs_map_to_mcs_mask(u8 vht_mcs_map)
+{
+ u16 mcs_mask = 0;
+
+ switch (vht_mcs_map) {
+ case IEEE80211_VHT_MCS_NOT_SUPPORTED:
+ break;
+ case IEEE80211_VHT_MCS_SUPPORT_0_7:
+ mcs_mask = 0x00FF;
+ break;
+ case IEEE80211_VHT_MCS_SUPPORT_0_8:
+ mcs_mask = 0x01FF;
+ break;
+ case IEEE80211_VHT_MCS_SUPPORT_0_9:
+ mcs_mask = 0x03FF;
+ break;
+ default:
+ break;
+ }
+
+ return mcs_mask;
+}
+
+static void vht_build_mcs_mask(u16 vht_mcs_map,
+ u16 vht_mcs_mask[NL80211_VHT_NSS_MAX])
+{
+ u8 nss;
+
+ for (nss = 0; nss < NL80211_VHT_NSS_MAX; nss++) {
+ vht_mcs_mask[nss] = vht_mcs_map_to_mcs_mask(vht_mcs_map & 0x03);
+ vht_mcs_map >>= 2;
+ }
+}
+
+static bool vht_set_mcs_mask(struct ieee80211_supported_band *sband,
+ struct nl80211_txrate_vht *txrate,
+ u16 mcs[NL80211_VHT_NSS_MAX])
+{
+ u16 tx_mcs_map = le16_to_cpu(sband->vht_cap.vht_mcs.tx_mcs_map);
+ u16 tx_mcs_mask[NL80211_VHT_NSS_MAX] = {};
+ u8 i;
+
+ if (!sband->vht_cap.vht_supported)
+ return false;
+
+ memset(mcs, 0, sizeof(u16) * NL80211_VHT_NSS_MAX);
+
+ /* Build vht_mcs_mask from VHT capabilities */
+ vht_build_mcs_mask(tx_mcs_map, tx_mcs_mask);
+
+ for (i = 0; i < NL80211_VHT_NSS_MAX; i++) {
+ if ((tx_mcs_mask[i] & txrate->mcs[i]) == txrate->mcs[i])
+ mcs[i] = txrate->mcs[i];
+ else
+ return false;
+ }
+
+ return true;
+}
+
+static const struct nla_policy nl80211_txattr_policy[NL80211_TXRATE_MAX + 1] = {
+ [NL80211_TXRATE_LEGACY] = { .type = NLA_BINARY,
+ .len = NL80211_MAX_SUPP_RATES },
+ [NL80211_TXRATE_HT] = { .type = NLA_BINARY,
+ .len = NL80211_MAX_SUPP_HT_RATES },
+ [NL80211_TXRATE_VHT] = { .len = sizeof(struct nl80211_txrate_vht)},
+ [NL80211_TXRATE_GI] = { .type = NLA_U8 },
+};
+
+static int nl80211_parse_tx_bitrate_mask(struct genl_info *info,
+ struct cfg80211_bitrate_mask *mask)
+{
+ struct nlattr *tb[NL80211_TXRATE_MAX + 1];
+ struct cfg80211_registered_device *rdev = info->user_ptr[0];
+ int rem, i;
+ struct nlattr *tx_rates;
+ struct ieee80211_supported_band *sband;
+ u16 vht_tx_mcs_map;
+
+ memset(mask, 0, sizeof(*mask));
+ /* Default to all rates enabled */
+ for (i = 0; i < NUM_NL80211_BANDS; i++) {
+ sband = rdev->wiphy.bands[i];
+
+ if (!sband)
+ continue;
+
+ mask->control[i].legacy = (1 << sband->n_bitrates) - 1;
+ memcpy(mask->control[i].ht_mcs,
+ sband->ht_cap.mcs.rx_mask,
+ sizeof(mask->control[i].ht_mcs));
+
+ if (!sband->vht_cap.vht_supported)
+ continue;
+
+ vht_tx_mcs_map = le16_to_cpu(sband->vht_cap.vht_mcs.tx_mcs_map);
+ vht_build_mcs_mask(vht_tx_mcs_map, mask->control[i].vht_mcs);
+ }
+
+ /* if no rates are given set it back to the defaults */
+ if (!info->attrs[NL80211_ATTR_TX_RATES])
+ goto out;
+
+ /* The nested attribute uses enum nl80211_band as the index. This maps
+ * directly to the enum nl80211_band values used in cfg80211.
+ */
+ BUILD_BUG_ON(NL80211_MAX_SUPP_HT_RATES > IEEE80211_HT_MCS_MASK_LEN * 8);
+ nla_for_each_nested(tx_rates, info->attrs[NL80211_ATTR_TX_RATES], rem) {
+ enum nl80211_band band = nla_type(tx_rates);
+ int err;
+
+ if (band < 0 || band >= NUM_NL80211_BANDS)
+ return -EINVAL;
+ sband = rdev->wiphy.bands[band];
+ if (sband == NULL)
+ return -EINVAL;
+ err = nla_parse(tb, NL80211_TXRATE_MAX, nla_data(tx_rates),
+ nla_len(tx_rates), nl80211_txattr_policy);
+ if (err)
+ return err;
+ if (tb[NL80211_TXRATE_LEGACY]) {
+ mask->control[band].legacy = rateset_to_mask(
+ sband,
+ nla_data(tb[NL80211_TXRATE_LEGACY]),
+ nla_len(tb[NL80211_TXRATE_LEGACY]));
+ if ((mask->control[band].legacy == 0) &&
+ nla_len(tb[NL80211_TXRATE_LEGACY]))
+ return -EINVAL;
+ }
+ if (tb[NL80211_TXRATE_HT]) {
+ if (!ht_rateset_to_mask(
+ sband,
+ nla_data(tb[NL80211_TXRATE_HT]),
+ nla_len(tb[NL80211_TXRATE_HT]),
+ mask->control[band].ht_mcs))
+ return -EINVAL;
+ }
+ if (tb[NL80211_TXRATE_VHT]) {
+ if (!vht_set_mcs_mask(
+ sband,
+ nla_data(tb[NL80211_TXRATE_VHT]),
+ mask->control[band].vht_mcs))
+ return -EINVAL;
+ }
+ if (tb[NL80211_TXRATE_GI]) {
+ mask->control[band].gi =
+ nla_get_u8(tb[NL80211_TXRATE_GI]);
+ if (mask->control[band].gi > NL80211_TXRATE_FORCE_LGI)
+ return -EINVAL;
+ }
+
+ if (mask->control[band].legacy == 0) {
+ /* don't allow empty legacy rates if HT or VHT
+ * are not even supported.
+ */
+ if (!(rdev->wiphy.bands[band]->ht_cap.ht_supported ||
+ rdev->wiphy.bands[band]->vht_cap.vht_supported))
+ return -EINVAL;
+
+ for (i = 0; i < IEEE80211_HT_MCS_MASK_LEN; i++)
+ if (mask->control[band].ht_mcs[i])
+ goto out;
+
+ for (i = 0; i < NL80211_VHT_NSS_MAX; i++)
+ if (mask->control[band].vht_mcs[i])
+ goto out;
+
+ /* legacy and mcs rates may not be both empty */
+ return -EINVAL;
+ }
+ }
+
+out:
+ return 0;
+}
+
+static int validate_beacon_tx_rate(struct cfg80211_registered_device *rdev,
+ enum nl80211_band band,
+ struct cfg80211_bitrate_mask *beacon_rate)
+{
+ u32 count_ht, count_vht, i;
+ u32 rate = beacon_rate->control[band].legacy;
+
+ /* Allow only one rate */
+ if (hweight32(rate) > 1)
+ return -EINVAL;
+
+ count_ht = 0;
+ for (i = 0; i < IEEE80211_HT_MCS_MASK_LEN; i++) {
+ if (hweight8(beacon_rate->control[band].ht_mcs[i]) > 1) {
+ return -EINVAL;
+ } else if (beacon_rate->control[band].ht_mcs[i]) {
+ count_ht++;
+ if (count_ht > 1)
+ return -EINVAL;
+ }
+ if (count_ht && rate)
+ return -EINVAL;
+ }
+
+ count_vht = 0;
+ for (i = 0; i < NL80211_VHT_NSS_MAX; i++) {
+ if (hweight16(beacon_rate->control[band].vht_mcs[i]) > 1) {
+ return -EINVAL;
+ } else if (beacon_rate->control[band].vht_mcs[i]) {
+ count_vht++;
+ if (count_vht > 1)
+ return -EINVAL;
+ }
+ if (count_vht && rate)
+ return -EINVAL;
+ }
+
+ if ((count_ht && count_vht) || (!rate && !count_ht && !count_vht))
+ return -EINVAL;
+
+ if (rate &&
+ !wiphy_ext_feature_isset(&rdev->wiphy,
+ NL80211_EXT_FEATURE_BEACON_RATE_LEGACY))
+ return -EINVAL;
+ if (count_ht &&
+ !wiphy_ext_feature_isset(&rdev->wiphy,
+ NL80211_EXT_FEATURE_BEACON_RATE_HT))
+ return -EINVAL;
+ if (count_vht &&
+ !wiphy_ext_feature_isset(&rdev->wiphy,
+ NL80211_EXT_FEATURE_BEACON_RATE_VHT))
+ return -EINVAL;
+
+ return 0;
+}
+
static int nl80211_parse_beacon(struct nlattr *attrs[],
struct cfg80211_beacon_data *bcn)
{
@@ -3569,6 +3881,17 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
wdev->iftype))
return -EINVAL;
+ if (info->attrs[NL80211_ATTR_TX_RATES]) {
+ err = nl80211_parse_tx_bitrate_mask(info, &params.beacon_rate);
+ if (err)
+ return err;
+
+ err = validate_beacon_tx_rate(rdev, params.chandef.chan->band,
+ &params.beacon_rate);
+ if (err)
+ return err;
+ }
+
if (info->attrs[NL80211_ATTR_SMPS_MODE]) {
params.smps_mode =
nla_get_u8(info->attrs[NL80211_ATTR_SMPS_MODE]);
@@ -6138,6 +6461,9 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
wiphy = &rdev->wiphy;
+ if (wdev->iftype == NL80211_IFTYPE_NAN)
+ return -EOPNOTSUPP;
+
if (!rdev->ops->scan)
return -EOPNOTSUPP;
@@ -7304,6 +7630,7 @@ static int nl80211_send_survey(struct sk_buff *msg, u32 portid, u32 seq,
static int nl80211_dump_survey(struct sk_buff *skb, struct netlink_callback *cb)
{
+ struct nlattr **attrbuf = genl_family_attrbuf(&nl80211_fam);
struct survey_info survey;
struct cfg80211_registered_device *rdev;
struct wireless_dev *wdev;
@@ -7316,7 +7643,7 @@ static int nl80211_dump_survey(struct sk_buff *skb, struct netlink_callback *cb)
return res;
/* prepare_wdev_dump parsed the attributes */
- radio_stats = nl80211_fam.attrbuf[NL80211_ATTR_SURVEY_RADIO_STATS];
+ radio_stats = attrbuf[NL80211_ATTR_SURVEY_RADIO_STATS];
if (!wdev->netdev) {
res = -EINVAL;
@@ -8139,14 +8466,14 @@ static int nl80211_testmode_dump(struct sk_buff *skb,
*/
phy_idx = cb->args[0] - 1;
} else {
+ struct nlattr **attrbuf = genl_family_attrbuf(&nl80211_fam);
+
err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize,
- nl80211_fam.attrbuf, nl80211_fam.maxattr,
- nl80211_policy);
+ attrbuf, nl80211_fam.maxattr, nl80211_policy);
if (err)
goto out_err;
- rdev = __cfg80211_rdev_from_attrs(sock_net(skb->sk),
- nl80211_fam.attrbuf);
+ rdev = __cfg80211_rdev_from_attrs(sock_net(skb->sk), attrbuf);
if (IS_ERR(rdev)) {
err = PTR_ERR(rdev);
goto out_err;
@@ -8154,9 +8481,8 @@ static int nl80211_testmode_dump(struct sk_buff *skb,
phy_idx = rdev->wiphy_idx;
rdev = NULL;
- if (nl80211_fam.attrbuf[NL80211_ATTR_TESTDATA])
- cb->args[1] =
- (long)nl80211_fam.attrbuf[NL80211_ATTR_TESTDATA];
+ if (attrbuf[NL80211_ATTR_TESTDATA])
+ cb->args[1] = (long)attrbuf[NL80211_ATTR_TESTDATA];
}
if (cb->args[1]) {
@@ -8641,238 +8967,21 @@ static int nl80211_cancel_remain_on_channel(struct sk_buff *skb,
return rdev_cancel_remain_on_channel(rdev, wdev, cookie);
}
-static u32 rateset_to_mask(struct ieee80211_supported_band *sband,
- u8 *rates, u8 rates_len)
-{
- u8 i;
- u32 mask = 0;
-
- for (i = 0; i < rates_len; i++) {
- int rate = (rates[i] & 0x7f) * 5;
- int ridx;
-
- for (ridx = 0; ridx < sband->n_bitrates; ridx++) {
- struct ieee80211_rate *srate =
- &sband->bitrates[ridx];
- if (rate == srate->bitrate) {
- mask |= 1 << ridx;
- break;
- }
- }
- if (ridx == sband->n_bitrates)
- return 0; /* rate not found */
- }
-
- return mask;
-}
-
-static bool ht_rateset_to_mask(struct ieee80211_supported_band *sband,
- u8 *rates, u8 rates_len,
- u8 mcs[IEEE80211_HT_MCS_MASK_LEN])
-{
- u8 i;
-
- memset(mcs, 0, IEEE80211_HT_MCS_MASK_LEN);
-
- for (i = 0; i < rates_len; i++) {
- int ridx, rbit;
-
- ridx = rates[i] / 8;
- rbit = BIT(rates[i] % 8);
-
- /* check validity */
- if ((ridx < 0) || (ridx >= IEEE80211_HT_MCS_MASK_LEN))
- return false;
-
- /* check availability */
- if (sband->ht_cap.mcs.rx_mask[ridx] & rbit)
- mcs[ridx] |= rbit;
- else
- return false;
- }
-
- return true;
-}
-
-static u16 vht_mcs_map_to_mcs_mask(u8 vht_mcs_map)
-{
- u16 mcs_mask = 0;
-
- switch (vht_mcs_map) {
- case IEEE80211_VHT_MCS_NOT_SUPPORTED:
- break;
- case IEEE80211_VHT_MCS_SUPPORT_0_7:
- mcs_mask = 0x00FF;
- break;
- case IEEE80211_VHT_MCS_SUPPORT_0_8:
- mcs_mask = 0x01FF;
- break;
- case IEEE80211_VHT_MCS_SUPPORT_0_9:
- mcs_mask = 0x03FF;
- break;
- default:
- break;
- }
-
- return mcs_mask;
-}
-
-static void vht_build_mcs_mask(u16 vht_mcs_map,
- u16 vht_mcs_mask[NL80211_VHT_NSS_MAX])
-{
- u8 nss;
-
- for (nss = 0; nss < NL80211_VHT_NSS_MAX; nss++) {
- vht_mcs_mask[nss] = vht_mcs_map_to_mcs_mask(vht_mcs_map & 0x03);
- vht_mcs_map >>= 2;
- }
-}
-
-static bool vht_set_mcs_mask(struct ieee80211_supported_band *sband,
- struct nl80211_txrate_vht *txrate,
- u16 mcs[NL80211_VHT_NSS_MAX])
-{
- u16 tx_mcs_map = le16_to_cpu(sband->vht_cap.vht_mcs.tx_mcs_map);
- u16 tx_mcs_mask[NL80211_VHT_NSS_MAX] = {};
- u8 i;
-
- if (!sband->vht_cap.vht_supported)
- return false;
-
- memset(mcs, 0, sizeof(u16) * NL80211_VHT_NSS_MAX);
-
- /* Build vht_mcs_mask from VHT capabilities */
- vht_build_mcs_mask(tx_mcs_map, tx_mcs_mask);
-
- for (i = 0; i < NL80211_VHT_NSS_MAX; i++) {
- if ((tx_mcs_mask[i] & txrate->mcs[i]) == txrate->mcs[i])
- mcs[i] = txrate->mcs[i];
- else
- return false;
- }
-
- return true;
-}
-
-static const struct nla_policy nl80211_txattr_policy[NL80211_TXRATE_MAX + 1] = {
- [NL80211_TXRATE_LEGACY] = { .type = NLA_BINARY,
- .len = NL80211_MAX_SUPP_RATES },
- [NL80211_TXRATE_HT] = { .type = NLA_BINARY,
- .len = NL80211_MAX_SUPP_HT_RATES },
- [NL80211_TXRATE_VHT] = { .len = sizeof(struct nl80211_txrate_vht)},
- [NL80211_TXRATE_GI] = { .type = NLA_U8 },
-};
-
static int nl80211_set_tx_bitrate_mask(struct sk_buff *skb,
struct genl_info *info)
{
- struct nlattr *tb[NL80211_TXRATE_MAX + 1];
- struct cfg80211_registered_device *rdev = info->user_ptr[0];
struct cfg80211_bitrate_mask mask;
- int rem, i;
+ struct cfg80211_registered_device *rdev = info->user_ptr[0];
struct net_device *dev = info->user_ptr[1];
- struct nlattr *tx_rates;
- struct ieee80211_supported_band *sband;
- u16 vht_tx_mcs_map;
+ int err;
if (!rdev->ops->set_bitrate_mask)
return -EOPNOTSUPP;
- memset(&mask, 0, sizeof(mask));
- /* Default to all rates enabled */
- for (i = 0; i < NUM_NL80211_BANDS; i++) {
- sband = rdev->wiphy.bands[i];
-
- if (!sband)
- continue;
-
- mask.control[i].legacy = (1 << sband->n_bitrates) - 1;
- memcpy(mask.control[i].ht_mcs,
- sband->ht_cap.mcs.rx_mask,
- sizeof(mask.control[i].ht_mcs));
-
- if (!sband->vht_cap.vht_supported)
- continue;
-
- vht_tx_mcs_map = le16_to_cpu(sband->vht_cap.vht_mcs.tx_mcs_map);
- vht_build_mcs_mask(vht_tx_mcs_map, mask.control[i].vht_mcs);
- }
-
- /* if no rates are given set it back to the defaults */
- if (!info->attrs[NL80211_ATTR_TX_RATES])
- goto out;
-
- /*
- * The nested attribute uses enum nl80211_band as the index. This maps
- * directly to the enum nl80211_band values used in cfg80211.
- */
- BUILD_BUG_ON(NL80211_MAX_SUPP_HT_RATES > IEEE80211_HT_MCS_MASK_LEN * 8);
- nla_for_each_nested(tx_rates, info->attrs[NL80211_ATTR_TX_RATES], rem) {
- enum nl80211_band band = nla_type(tx_rates);
- int err;
-
- if (band < 0 || band >= NUM_NL80211_BANDS)
- return -EINVAL;
- sband = rdev->wiphy.bands[band];
- if (sband == NULL)
- return -EINVAL;
- err = nla_parse(tb, NL80211_TXRATE_MAX, nla_data(tx_rates),
- nla_len(tx_rates), nl80211_txattr_policy);
- if (err)
- return err;
- if (tb[NL80211_TXRATE_LEGACY]) {
- mask.control[band].legacy = rateset_to_mask(
- sband,
- nla_data(tb[NL80211_TXRATE_LEGACY]),
- nla_len(tb[NL80211_TXRATE_LEGACY]));
- if ((mask.control[band].legacy == 0) &&
- nla_len(tb[NL80211_TXRATE_LEGACY]))
- return -EINVAL;
- }
- if (tb[NL80211_TXRATE_HT]) {
- if (!ht_rateset_to_mask(
- sband,
- nla_data(tb[NL80211_TXRATE_HT]),
- nla_len(tb[NL80211_TXRATE_HT]),
- mask.control[band].ht_mcs))
- return -EINVAL;
- }
- if (tb[NL80211_TXRATE_VHT]) {
- if (!vht_set_mcs_mask(
- sband,
- nla_data(tb[NL80211_TXRATE_VHT]),
- mask.control[band].vht_mcs))
- return -EINVAL;
- }
- if (tb[NL80211_TXRATE_GI]) {
- mask.control[band].gi =
- nla_get_u8(tb[NL80211_TXRATE_GI]);
- if (mask.control[band].gi > NL80211_TXRATE_FORCE_LGI)
- return -EINVAL;
- }
-
- if (mask.control[band].legacy == 0) {
- /* don't allow empty legacy rates if HT or VHT
- * are not even supported.
- */
- if (!(rdev->wiphy.bands[band]->ht_cap.ht_supported ||
- rdev->wiphy.bands[band]->vht_cap.vht_supported))
- return -EINVAL;
-
- for (i = 0; i < IEEE80211_HT_MCS_MASK_LEN; i++)
- if (mask.control[band].ht_mcs[i])
- goto out;
-
- for (i = 0; i < NL80211_VHT_NSS_MAX; i++)
- if (mask.control[band].vht_mcs[i])
- goto out;
-
- /* legacy and mcs rates may not be both empty */
- return -EINVAL;
- }
- }
+ err = nl80211_parse_tx_bitrate_mask(info, &mask);
+ if (err)
+ return err;
-out:
return rdev_set_bitrate_mask(rdev, dev, NULL, &mask);
}
@@ -8898,6 +9007,7 @@ static int nl80211_register_mgmt(struct sk_buff *skb, struct genl_info *info)
case NL80211_IFTYPE_P2P_GO:
case NL80211_IFTYPE_P2P_DEVICE:
break;
+ case NL80211_IFTYPE_NAN:
default:
return -EOPNOTSUPP;
}
@@ -8943,6 +9053,7 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info)
case NL80211_IFTYPE_MESH_POINT:
case NL80211_IFTYPE_P2P_GO:
break;
+ case NL80211_IFTYPE_NAN:
default:
return -EOPNOTSUPP;
}
@@ -9059,6 +9170,7 @@ static int nl80211_tx_mgmt_cancel_wait(struct sk_buff *skb, struct genl_info *in
case NL80211_IFTYPE_P2P_GO:
case NL80211_IFTYPE_P2P_DEVICE:
break;
+ case NL80211_IFTYPE_NAN:
default:
return -EOPNOTSUPP;
}
@@ -9340,6 +9452,17 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info)
return err;
}
+ if (info->attrs[NL80211_ATTR_TX_RATES]) {
+ err = nl80211_parse_tx_bitrate_mask(info, &setup.beacon_rate);
+ if (err)
+ return err;
+
+ err = validate_beacon_tx_rate(rdev, setup.chandef.chan->band,
+ &setup.beacon_rate);
+ if (err)
+ return err;
+ }
+
return cfg80211_join_mesh(rdev, dev, &setup, &cfg);
}
@@ -10414,6 +10537,549 @@ static int nl80211_stop_p2p_device(struct sk_buff *skb, struct genl_info *info)
return 0;
}
+static int nl80211_start_nan(struct sk_buff *skb, struct genl_info *info)
+{
+ struct cfg80211_registered_device *rdev = info->user_ptr[0];
+ struct wireless_dev *wdev = info->user_ptr[1];
+ struct cfg80211_nan_conf conf = {};
+ int err;
+
+ if (wdev->iftype != NL80211_IFTYPE_NAN)
+ return -EOPNOTSUPP;
+
+ if (wdev->nan_started)
+ return -EEXIST;
+
+ if (rfkill_blocked(rdev->rfkill))
+ return -ERFKILL;
+
+ if (!info->attrs[NL80211_ATTR_NAN_MASTER_PREF])
+ return -EINVAL;
+
+ if (!info->attrs[NL80211_ATTR_NAN_DUAL])
+ return -EINVAL;
+
+ conf.master_pref =
+ nla_get_u8(info->attrs[NL80211_ATTR_NAN_MASTER_PREF]);
+ if (!conf.master_pref)
+ return -EINVAL;
+
+ conf.dual = nla_get_u8(info->attrs[NL80211_ATTR_NAN_DUAL]);
+
+ err = rdev_start_nan(rdev, wdev, &conf);
+ if (err)
+ return err;
+
+ wdev->nan_started = true;
+ rdev->opencount++;
+
+ return 0;
+}
+
+static int nl80211_stop_nan(struct sk_buff *skb, struct genl_info *info)
+{
+ struct cfg80211_registered_device *rdev = info->user_ptr[0];
+ struct wireless_dev *wdev = info->user_ptr[1];
+
+ if (wdev->iftype != NL80211_IFTYPE_NAN)
+ return -EOPNOTSUPP;
+
+ cfg80211_stop_nan(rdev, wdev);
+
+ return 0;
+}
+
+static int validate_nan_filter(struct nlattr *filter_attr)
+{
+ struct nlattr *attr;
+ int len = 0, n_entries = 0, rem;
+
+ nla_for_each_nested(attr, filter_attr, rem) {
+ len += nla_len(attr);
+ n_entries++;
+ }
+
+ if (len >= U8_MAX)
+ return -EINVAL;
+
+ return n_entries;
+}
+
+static int handle_nan_filter(struct nlattr *attr_filter,
+ struct cfg80211_nan_func *func,
+ bool tx)
+{
+ struct nlattr *attr;
+ int n_entries, rem, i;
+ struct cfg80211_nan_func_filter *filter;
+
+ n_entries = validate_nan_filter(attr_filter);
+ if (n_entries < 0)
+ return n_entries;
+
+ BUILD_BUG_ON(sizeof(*func->rx_filters) != sizeof(*func->tx_filters));
+
+ filter = kcalloc(n_entries, sizeof(*func->rx_filters), GFP_KERNEL);
+ if (!filter)
+ return -ENOMEM;
+
+ i = 0;
+ nla_for_each_nested(attr, attr_filter, rem) {
+ filter[i].filter = kmemdup(nla_data(attr), nla_len(attr),
+ GFP_KERNEL);
+ filter[i].len = nla_len(attr);
+ i++;
+ }
+ if (tx) {
+ func->num_tx_filters = n_entries;
+ func->tx_filters = filter;
+ } else {
+ func->num_rx_filters = n_entries;
+ func->rx_filters = filter;
+ }
+
+ return 0;
+}
+
+static int nl80211_nan_add_func(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct cfg80211_registered_device *rdev = info->user_ptr[0];
+ struct wireless_dev *wdev = info->user_ptr[1];
+ struct nlattr *tb[NUM_NL80211_NAN_FUNC_ATTR], *func_attr;
+ struct cfg80211_nan_func *func;
+ struct sk_buff *msg = NULL;
+ void *hdr = NULL;
+ int err = 0;
+
+ if (wdev->iftype != NL80211_IFTYPE_NAN)
+ return -EOPNOTSUPP;
+
+ if (!wdev->nan_started)
+ return -ENOTCONN;
+
+ if (!info->attrs[NL80211_ATTR_NAN_FUNC])
+ return -EINVAL;
+
+ if (wdev->owner_nlportid &&
+ wdev->owner_nlportid != info->snd_portid)
+ return -ENOTCONN;
+
+ err = nla_parse(tb, NL80211_NAN_FUNC_ATTR_MAX,
+ nla_data(info->attrs[NL80211_ATTR_NAN_FUNC]),
+ nla_len(info->attrs[NL80211_ATTR_NAN_FUNC]),
+ nl80211_nan_func_policy);
+ if (err)
+ return err;
+
+ func = kzalloc(sizeof(*func), GFP_KERNEL);
+ if (!func)
+ return -ENOMEM;
+
+ func->cookie = wdev->wiphy->cookie_counter++;
+
+ if (!tb[NL80211_NAN_FUNC_TYPE] ||
+ nla_get_u8(tb[NL80211_NAN_FUNC_TYPE]) > NL80211_NAN_FUNC_MAX_TYPE) {
+ err = -EINVAL;
+ goto out;
+ }
+
+
+ func->type = nla_get_u8(tb[NL80211_NAN_FUNC_TYPE]);
+
+ if (!tb[NL80211_NAN_FUNC_SERVICE_ID]) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ memcpy(func->service_id, nla_data(tb[NL80211_NAN_FUNC_SERVICE_ID]),
+ sizeof(func->service_id));
+
+ func->close_range =
+ nla_get_flag(tb[NL80211_NAN_FUNC_CLOSE_RANGE]);
+
+ if (tb[NL80211_NAN_FUNC_SERVICE_INFO]) {
+ func->serv_spec_info_len =
+ nla_len(tb[NL80211_NAN_FUNC_SERVICE_INFO]);
+ func->serv_spec_info =
+ kmemdup(nla_data(tb[NL80211_NAN_FUNC_SERVICE_INFO]),
+ func->serv_spec_info_len,
+ GFP_KERNEL);
+ if (!func->serv_spec_info) {
+ err = -ENOMEM;
+ goto out;
+ }
+ }
+
+ if (tb[NL80211_NAN_FUNC_TTL])
+ func->ttl = nla_get_u32(tb[NL80211_NAN_FUNC_TTL]);
+
+ switch (func->type) {
+ case NL80211_NAN_FUNC_PUBLISH:
+ if (!tb[NL80211_NAN_FUNC_PUBLISH_TYPE]) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ func->publish_type =
+ nla_get_u8(tb[NL80211_NAN_FUNC_PUBLISH_TYPE]);
+ func->publish_bcast =
+ nla_get_flag(tb[NL80211_NAN_FUNC_PUBLISH_BCAST]);
+
+ if ((!(func->publish_type & NL80211_NAN_SOLICITED_PUBLISH)) &&
+ func->publish_bcast) {
+ err = -EINVAL;
+ goto out;
+ }
+ break;
+ case NL80211_NAN_FUNC_SUBSCRIBE:
+ func->subscribe_active =
+ nla_get_flag(tb[NL80211_NAN_FUNC_SUBSCRIBE_ACTIVE]);
+ break;
+ case NL80211_NAN_FUNC_FOLLOW_UP:
+ if (!tb[NL80211_NAN_FUNC_FOLLOW_UP_ID] ||
+ !tb[NL80211_NAN_FUNC_FOLLOW_UP_REQ_ID]) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ func->followup_id =
+ nla_get_u8(tb[NL80211_NAN_FUNC_FOLLOW_UP_ID]);
+ func->followup_reqid =
+ nla_get_u8(tb[NL80211_NAN_FUNC_FOLLOW_UP_REQ_ID]);
+ memcpy(func->followup_dest.addr,
+ nla_data(tb[NL80211_NAN_FUNC_FOLLOW_UP_DEST]),
+ sizeof(func->followup_dest.addr));
+ if (func->ttl) {
+ err = -EINVAL;
+ goto out;
+ }
+ break;
+ default:
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (tb[NL80211_NAN_FUNC_SRF]) {
+ struct nlattr *srf_tb[NUM_NL80211_NAN_SRF_ATTR];
+
+ err = nla_parse(srf_tb, NL80211_NAN_SRF_ATTR_MAX,
+ nla_data(tb[NL80211_NAN_FUNC_SRF]),
+ nla_len(tb[NL80211_NAN_FUNC_SRF]), NULL);
+ if (err)
+ goto out;
+
+ func->srf_include =
+ nla_get_flag(srf_tb[NL80211_NAN_SRF_INCLUDE]);
+
+ if (srf_tb[NL80211_NAN_SRF_BF]) {
+ if (srf_tb[NL80211_NAN_SRF_MAC_ADDRS] ||
+ !srf_tb[NL80211_NAN_SRF_BF_IDX]) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ func->srf_bf_len =
+ nla_len(srf_tb[NL80211_NAN_SRF_BF]);
+ func->srf_bf =
+ kmemdup(nla_data(srf_tb[NL80211_NAN_SRF_BF]),
+ func->srf_bf_len, GFP_KERNEL);
+ if (!func->srf_bf) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ func->srf_bf_idx =
+ nla_get_u8(srf_tb[NL80211_NAN_SRF_BF_IDX]);
+ } else {
+ struct nlattr *attr, *mac_attr =
+ srf_tb[NL80211_NAN_SRF_MAC_ADDRS];
+ int n_entries, rem, i = 0;
+
+ if (!mac_attr) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ n_entries = validate_acl_mac_addrs(mac_attr);
+ if (n_entries <= 0) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ func->srf_num_macs = n_entries;
+ func->srf_macs =
+ kzalloc(sizeof(*func->srf_macs) * n_entries,
+ GFP_KERNEL);
+ if (!func->srf_macs) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ nla_for_each_nested(attr, mac_attr, rem)
+ memcpy(func->srf_macs[i++].addr, nla_data(attr),
+ sizeof(*func->srf_macs));
+ }
+ }
+
+ if (tb[NL80211_NAN_FUNC_TX_MATCH_FILTER]) {
+ err = handle_nan_filter(tb[NL80211_NAN_FUNC_TX_MATCH_FILTER],
+ func, true);
+ if (err)
+ goto out;
+ }
+
+ if (tb[NL80211_NAN_FUNC_RX_MATCH_FILTER]) {
+ err = handle_nan_filter(tb[NL80211_NAN_FUNC_RX_MATCH_FILTER],
+ func, false);
+ if (err)
+ goto out;
+ }
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0,
+ NL80211_CMD_ADD_NAN_FUNCTION);
+ /* This can't really happen - we just allocated 4KB */
+ if (WARN_ON(!hdr)) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ err = rdev_add_nan_func(rdev, wdev, func);
+out:
+ if (err < 0) {
+ cfg80211_free_nan_func(func);
+ nlmsg_free(msg);
+ return err;
+ }
+
+ /* propagate the instance id and cookie to userspace */
+ if (nla_put_u64_64bit(msg, NL80211_ATTR_COOKIE, func->cookie,
+ NL80211_ATTR_PAD))
+ goto nla_put_failure;
+
+ func_attr = nla_nest_start(msg, NL80211_ATTR_NAN_FUNC);
+ if (!func_attr)
+ goto nla_put_failure;
+
+ if (nla_put_u8(msg, NL80211_NAN_FUNC_INSTANCE_ID,
+ func->instance_id))
+ goto nla_put_failure;
+
+ nla_nest_end(msg, func_attr);
+
+ genlmsg_end(msg, hdr);
+ return genlmsg_reply(msg, info);
+
+nla_put_failure:
+ nlmsg_free(msg);
+ return -ENOBUFS;
+}
+
+static int nl80211_nan_del_func(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct cfg80211_registered_device *rdev = info->user_ptr[0];
+ struct wireless_dev *wdev = info->user_ptr[1];
+ u64 cookie;
+
+ if (wdev->iftype != NL80211_IFTYPE_NAN)
+ return -EOPNOTSUPP;
+
+ if (!wdev->nan_started)
+ return -ENOTCONN;
+
+ if (!info->attrs[NL80211_ATTR_COOKIE])
+ return -EINVAL;
+
+ if (wdev->owner_nlportid &&
+ wdev->owner_nlportid != info->snd_portid)
+ return -ENOTCONN;
+
+ cookie = nla_get_u64(info->attrs[NL80211_ATTR_COOKIE]);
+
+ rdev_del_nan_func(rdev, wdev, cookie);
+
+ return 0;
+}
+
+static int nl80211_nan_change_config(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct cfg80211_registered_device *rdev = info->user_ptr[0];
+ struct wireless_dev *wdev = info->user_ptr[1];
+ struct cfg80211_nan_conf conf = {};
+ u32 changed = 0;
+
+ if (wdev->iftype != NL80211_IFTYPE_NAN)
+ return -EOPNOTSUPP;
+
+ if (!wdev->nan_started)
+ return -ENOTCONN;
+
+ if (info->attrs[NL80211_ATTR_NAN_MASTER_PREF]) {
+ conf.master_pref =
+ nla_get_u8(info->attrs[NL80211_ATTR_NAN_MASTER_PREF]);
+ if (conf.master_pref <= 1 || conf.master_pref == 255)
+ return -EINVAL;
+
+ changed |= CFG80211_NAN_CONF_CHANGED_PREF;
+ }
+
+ if (info->attrs[NL80211_ATTR_NAN_DUAL]) {
+ conf.dual = nla_get_u8(info->attrs[NL80211_ATTR_NAN_DUAL]);
+ changed |= CFG80211_NAN_CONF_CHANGED_DUAL;
+ }
+
+ if (!changed)
+ return -EINVAL;
+
+ return rdev_nan_change_conf(rdev, wdev, &conf, changed);
+}
+
+void cfg80211_nan_match(struct wireless_dev *wdev,
+ struct cfg80211_nan_match_params *match, gfp_t gfp)
+{
+ struct wiphy *wiphy = wdev->wiphy;
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
+ struct nlattr *match_attr, *local_func_attr, *peer_func_attr;
+ struct sk_buff *msg;
+ void *hdr;
+
+ if (WARN_ON(!match->inst_id || !match->peer_inst_id || !match->addr))
+ return;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);
+ if (!msg)
+ return;
+
+ hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_NAN_MATCH);
+ if (!hdr) {
+ nlmsg_free(msg);
+ return;
+ }
+
+ if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) ||
+ (wdev->netdev && nla_put_u32(msg, NL80211_ATTR_IFINDEX,
+ wdev->netdev->ifindex)) ||
+ nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev),
+ NL80211_ATTR_PAD))
+ goto nla_put_failure;
+
+ if (nla_put_u64_64bit(msg, NL80211_ATTR_COOKIE, match->cookie,
+ NL80211_ATTR_PAD) ||
+ nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, match->addr))
+ goto nla_put_failure;
+
+ match_attr = nla_nest_start(msg, NL80211_ATTR_NAN_MATCH);
+ if (!match_attr)
+ goto nla_put_failure;
+
+ local_func_attr = nla_nest_start(msg, NL80211_NAN_MATCH_FUNC_LOCAL);
+ if (!local_func_attr)
+ goto nla_put_failure;
+
+ if (nla_put_u8(msg, NL80211_NAN_FUNC_INSTANCE_ID, match->inst_id))
+ goto nla_put_failure;
+
+ nla_nest_end(msg, local_func_attr);
+
+ peer_func_attr = nla_nest_start(msg, NL80211_NAN_MATCH_FUNC_PEER);
+ if (!peer_func_attr)
+ goto nla_put_failure;
+
+ if (nla_put_u8(msg, NL80211_NAN_FUNC_TYPE, match->type) ||
+ nla_put_u8(msg, NL80211_NAN_FUNC_INSTANCE_ID, match->peer_inst_id))
+ goto nla_put_failure;
+
+ if (match->info && match->info_len &&
+ nla_put(msg, NL80211_NAN_FUNC_SERVICE_INFO, match->info_len,
+ match->info))
+ goto nla_put_failure;
+
+ nla_nest_end(msg, peer_func_attr);
+ nla_nest_end(msg, match_attr);
+ genlmsg_end(msg, hdr);
+
+ if (!wdev->owner_nlportid)
+ genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy),
+ msg, 0, NL80211_MCGRP_NAN, gfp);
+ else
+ genlmsg_unicast(wiphy_net(&rdev->wiphy), msg,
+ wdev->owner_nlportid);
+
+ return;
+
+nla_put_failure:
+ nlmsg_free(msg);
+}
+EXPORT_SYMBOL(cfg80211_nan_match);
+
+void cfg80211_nan_func_terminated(struct wireless_dev *wdev,
+ u8 inst_id,
+ enum nl80211_nan_func_term_reason reason,
+ u64 cookie, gfp_t gfp)
+{
+ struct wiphy *wiphy = wdev->wiphy;
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
+ struct sk_buff *msg;
+ struct nlattr *func_attr;
+ void *hdr;
+
+ if (WARN_ON(!inst_id))
+ return;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);
+ if (!msg)
+ return;
+
+ hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_DEL_NAN_FUNCTION);
+ if (!hdr) {
+ nlmsg_free(msg);
+ return;
+ }
+
+ if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) ||
+ (wdev->netdev && nla_put_u32(msg, NL80211_ATTR_IFINDEX,
+ wdev->netdev->ifindex)) ||
+ nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev),
+ NL80211_ATTR_PAD))
+ goto nla_put_failure;
+
+ if (nla_put_u64_64bit(msg, NL80211_ATTR_COOKIE, cookie,
+ NL80211_ATTR_PAD))
+ goto nla_put_failure;
+
+ func_attr = nla_nest_start(msg, NL80211_ATTR_NAN_FUNC);
+ if (!func_attr)
+ goto nla_put_failure;
+
+ if (nla_put_u8(msg, NL80211_NAN_FUNC_INSTANCE_ID, inst_id) ||
+ nla_put_u8(msg, NL80211_NAN_FUNC_TERM_REASON, reason))
+ goto nla_put_failure;
+
+ nla_nest_end(msg, func_attr);
+ genlmsg_end(msg, hdr);
+
+ if (!wdev->owner_nlportid)
+ genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy),
+ msg, 0, NL80211_MCGRP_NAN, gfp);
+ else
+ genlmsg_unicast(wiphy_net(&rdev->wiphy), msg,
+ wdev->owner_nlportid);
+
+ return;
+
+nla_put_failure:
+ nlmsg_free(msg);
+}
+EXPORT_SYMBOL(cfg80211_nan_func_terminated);
+
static int nl80211_get_protocol_features(struct sk_buff *skb,
struct genl_info *info)
{
@@ -10598,6 +11264,7 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb,
struct cfg80211_registered_device **rdev,
struct wireless_dev **wdev)
{
+ struct nlattr **attrbuf = genl_family_attrbuf(&nl80211_fam);
u32 vid, subcmd;
unsigned int i;
int vcmd_idx = -1;
@@ -10633,31 +11300,28 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb,
}
err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize,
- nl80211_fam.attrbuf, nl80211_fam.maxattr,
- nl80211_policy);
+ attrbuf, nl80211_fam.maxattr, nl80211_policy);
if (err)
goto out_unlock;
- if (!nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_ID] ||
- !nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_SUBCMD]) {
+ if (!attrbuf[NL80211_ATTR_VENDOR_ID] ||
+ !attrbuf[NL80211_ATTR_VENDOR_SUBCMD]) {
err = -EINVAL;
goto out_unlock;
}
- *wdev = __cfg80211_wdev_from_attrs(sock_net(skb->sk),
- nl80211_fam.attrbuf);
+ *wdev = __cfg80211_wdev_from_attrs(sock_net(skb->sk), attrbuf);
if (IS_ERR(*wdev))
*wdev = NULL;
- *rdev = __cfg80211_rdev_from_attrs(sock_net(skb->sk),
- nl80211_fam.attrbuf);
+ *rdev = __cfg80211_rdev_from_attrs(sock_net(skb->sk), attrbuf);
if (IS_ERR(*rdev)) {
err = PTR_ERR(*rdev);
goto out_unlock;
}
- vid = nla_get_u32(nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_ID]);
- subcmd = nla_get_u32(nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_SUBCMD]);
+ vid = nla_get_u32(attrbuf[NL80211_ATTR_VENDOR_ID]);
+ subcmd = nla_get_u32(attrbuf[NL80211_ATTR_VENDOR_SUBCMD]);
for (i = 0; i < (*rdev)->wiphy.n_vendor_commands; i++) {
const struct wiphy_vendor_command *vcmd;
@@ -10681,9 +11345,9 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb,
goto out_unlock;
}
- if (nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_DATA]) {
- data = nla_data(nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_DATA]);
- data_len = nla_len(nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_DATA]);
+ if (attrbuf[NL80211_ATTR_VENDOR_DATA]) {
+ data = nla_data(attrbuf[NL80211_ATTR_VENDOR_DATA]);
+ data_len = nla_len(attrbuf[NL80211_ATTR_VENDOR_DATA]);
}
/* 0 is the first index - add 1 to parse only once */
@@ -11115,7 +11779,14 @@ static int nl80211_pre_doit(const struct genl_ops *ops, struct sk_buff *skb,
dev_hold(dev);
} else if (ops->internal_flags & NL80211_FLAG_CHECK_NETDEV_UP) {
- if (!wdev->p2p_started) {
+ if (wdev->iftype == NL80211_IFTYPE_P2P_DEVICE &&
+ !wdev->p2p_started) {
+ if (rtnl)
+ rtnl_unlock();
+ return -ENETDOWN;
+ }
+ if (wdev->iftype == NL80211_IFTYPE_NAN &&
+ !wdev->nan_started) {
if (rtnl)
rtnl_unlock();
return -ENETDOWN;
@@ -11749,6 +12420,46 @@ static const struct genl_ops nl80211_ops[] = {
NL80211_FLAG_NEED_RTNL,
},
{
+ .cmd = NL80211_CMD_START_NAN,
+ .doit = nl80211_start_nan,
+ .policy = nl80211_policy,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = NL80211_FLAG_NEED_WDEV |
+ NL80211_FLAG_NEED_RTNL,
+ },
+ {
+ .cmd = NL80211_CMD_STOP_NAN,
+ .doit = nl80211_stop_nan,
+ .policy = nl80211_policy,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = NL80211_FLAG_NEED_WDEV_UP |
+ NL80211_FLAG_NEED_RTNL,
+ },
+ {
+ .cmd = NL80211_CMD_ADD_NAN_FUNCTION,
+ .doit = nl80211_nan_add_func,
+ .policy = nl80211_policy,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = NL80211_FLAG_NEED_WDEV_UP |
+ NL80211_FLAG_NEED_RTNL,
+ },
+ {
+ .cmd = NL80211_CMD_DEL_NAN_FUNCTION,
+ .doit = nl80211_nan_del_func,
+ .policy = nl80211_policy,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = NL80211_FLAG_NEED_WDEV_UP |
+ NL80211_FLAG_NEED_RTNL,
+ },
+ {
+ .cmd = NL80211_CMD_CHANGE_NAN_CONFIG,
+ .doit = nl80211_nan_change_config,
+ .policy = nl80211_policy,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = NL80211_FLAG_NEED_WDEV_UP |
+ NL80211_FLAG_NEED_RTNL,
+ },
+ {
.cmd = NL80211_CMD_SET_MCAST_RATE,
.doit = nl80211_set_mcast_rate,
.policy = nl80211_policy,
@@ -11875,6 +12586,21 @@ static const struct genl_ops nl80211_ops[] = {
},
};
+static struct genl_family nl80211_fam __ro_after_init = {
+ .name = NL80211_GENL_NAME, /* have users key off the name instead */
+ .hdrsize = 0, /* no private header */
+ .version = 1, /* no particular meaning now */
+ .maxattr = NL80211_ATTR_MAX,
+ .netnsok = true,
+ .pre_doit = nl80211_pre_doit,
+ .post_doit = nl80211_post_doit,
+ .module = THIS_MODULE,
+ .ops = nl80211_ops,
+ .n_ops = ARRAY_SIZE(nl80211_ops),
+ .mcgrps = nl80211_mcgrps,
+ .n_mcgrps = ARRAY_SIZE(nl80211_mcgrps),
+};
+
/* notification functions */
void nl80211_notify_wiphy(struct cfg80211_registered_device *rdev,
@@ -13837,12 +14563,11 @@ void nl80211_send_ap_stopped(struct wireless_dev *wdev)
/* initialisation/exit functions */
-int nl80211_init(void)
+int __init nl80211_init(void)
{
int err;
- err = genl_register_family_with_ops_groups(&nl80211_fam, nl80211_ops,
- nl80211_mcgrps);
+ err = genl_register_family(&nl80211_fam);
if (err)
return err;
diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h
index 85ff30bee2b9..11cf83c8ad4f 100644
--- a/net/wireless/rdev-ops.h
+++ b/net/wireless/rdev-ops.h
@@ -887,6 +887,64 @@ static inline void rdev_stop_p2p_device(struct cfg80211_registered_device *rdev,
trace_rdev_return_void(&rdev->wiphy);
}
+static inline int rdev_start_nan(struct cfg80211_registered_device *rdev,
+ struct wireless_dev *wdev,
+ struct cfg80211_nan_conf *conf)
+{
+ int ret;
+
+ trace_rdev_start_nan(&rdev->wiphy, wdev, conf);
+ ret = rdev->ops->start_nan(&rdev->wiphy, wdev, conf);
+ trace_rdev_return_int(&rdev->wiphy, ret);
+ return ret;
+}
+
+static inline void rdev_stop_nan(struct cfg80211_registered_device *rdev,
+ struct wireless_dev *wdev)
+{
+ trace_rdev_stop_nan(&rdev->wiphy, wdev);
+ rdev->ops->stop_nan(&rdev->wiphy, wdev);
+ trace_rdev_return_void(&rdev->wiphy);
+}
+
+static inline int
+rdev_add_nan_func(struct cfg80211_registered_device *rdev,
+ struct wireless_dev *wdev,
+ struct cfg80211_nan_func *nan_func)
+{
+ int ret;
+
+ trace_rdev_add_nan_func(&rdev->wiphy, wdev, nan_func);
+ ret = rdev->ops->add_nan_func(&rdev->wiphy, wdev, nan_func);
+ trace_rdev_return_int(&rdev->wiphy, ret);
+ return ret;
+}
+
+static inline void rdev_del_nan_func(struct cfg80211_registered_device *rdev,
+ struct wireless_dev *wdev, u64 cookie)
+{
+ trace_rdev_del_nan_func(&rdev->wiphy, wdev, cookie);
+ rdev->ops->del_nan_func(&rdev->wiphy, wdev, cookie);
+ trace_rdev_return_void(&rdev->wiphy);
+}
+
+static inline int
+rdev_nan_change_conf(struct cfg80211_registered_device *rdev,
+ struct wireless_dev *wdev,
+ struct cfg80211_nan_conf *conf, u32 changes)
+{
+ int ret;
+
+ trace_rdev_nan_change_conf(&rdev->wiphy, wdev, conf, changes);
+ if (rdev->ops->nan_change_conf)
+ ret = rdev->ops->nan_change_conf(&rdev->wiphy, wdev, conf,
+ changes);
+ else
+ ret = -ENOTSUPP;
+ trace_rdev_return_int(&rdev->wiphy, ret);
+ return ret;
+}
+
static inline int rdev_set_mac_acl(struct cfg80211_registered_device *rdev,
struct net_device *dev,
struct cfg80211_acl_data *params)
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index c08a3b57dca1..a77db333927e 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -726,7 +726,8 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
wdev->current_bss = bss_from_pub(bss);
- cfg80211_upload_connect_keys(wdev);
+ if (!(wdev->wiphy->flags & WIPHY_FLAG_HAS_STATIC_WEP))
+ cfg80211_upload_connect_keys(wdev);
rcu_read_lock();
country_ie = ieee80211_bss_get_ie(bss, WLAN_EID_COUNTRY);
@@ -1043,6 +1044,9 @@ int cfg80211_connect(struct cfg80211_registered_device *rdev,
connect->crypto.ciphers_pairwise[0] = cipher;
}
}
+
+ connect->crypto.wep_keys = connkeys->params;
+ connect->crypto.wep_tx_key = connkeys->def;
} else {
if (WARN_ON(connkeys))
return -EINVAL;
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index 72b5255cefe2..a3d0a91b1e09 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -1889,6 +1889,96 @@ DEFINE_EVENT(wiphy_wdev_evt, rdev_stop_p2p_device,
TP_ARGS(wiphy, wdev)
);
+TRACE_EVENT(rdev_start_nan,
+ TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev,
+ struct cfg80211_nan_conf *conf),
+ TP_ARGS(wiphy, wdev, conf),
+ TP_STRUCT__entry(
+ WIPHY_ENTRY
+ WDEV_ENTRY
+ __field(u8, master_pref)
+ __field(u8, dual);
+ ),
+ TP_fast_assign(
+ WIPHY_ASSIGN;
+ WDEV_ASSIGN;
+ __entry->master_pref = conf->master_pref;
+ __entry->dual = conf->dual;
+ ),
+ TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT
+ ", master preference: %u, dual: %d",
+ WIPHY_PR_ARG, WDEV_PR_ARG, __entry->master_pref,
+ __entry->dual)
+);
+
+TRACE_EVENT(rdev_nan_change_conf,
+ TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev,
+ struct cfg80211_nan_conf *conf, u32 changes),
+ TP_ARGS(wiphy, wdev, conf, changes),
+ TP_STRUCT__entry(
+ WIPHY_ENTRY
+ WDEV_ENTRY
+ __field(u8, master_pref)
+ __field(u8, dual);
+ __field(u32, changes);
+ ),
+ TP_fast_assign(
+ WIPHY_ASSIGN;
+ WDEV_ASSIGN;
+ __entry->master_pref = conf->master_pref;
+ __entry->dual = conf->dual;
+ __entry->changes = changes;
+ ),
+ TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT
+ ", master preference: %u, dual: %d, changes: %x",
+ WIPHY_PR_ARG, WDEV_PR_ARG, __entry->master_pref,
+ __entry->dual, __entry->changes)
+);
+
+DEFINE_EVENT(wiphy_wdev_evt, rdev_stop_nan,
+ TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev),
+ TP_ARGS(wiphy, wdev)
+);
+
+TRACE_EVENT(rdev_add_nan_func,
+ TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev,
+ const struct cfg80211_nan_func *func),
+ TP_ARGS(wiphy, wdev, func),
+ TP_STRUCT__entry(
+ WIPHY_ENTRY
+ WDEV_ENTRY
+ __field(u8, func_type)
+ __field(u64, cookie)
+ ),
+ TP_fast_assign(
+ WIPHY_ASSIGN;
+ WDEV_ASSIGN;
+ __entry->func_type = func->type;
+ __entry->cookie = func->cookie
+ ),
+ TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", type=%u, cookie=%llu",
+ WIPHY_PR_ARG, WDEV_PR_ARG, __entry->func_type,
+ __entry->cookie)
+);
+
+TRACE_EVENT(rdev_del_nan_func,
+ TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev,
+ u64 cookie),
+ TP_ARGS(wiphy, wdev, cookie),
+ TP_STRUCT__entry(
+ WIPHY_ENTRY
+ WDEV_ENTRY
+ __field(u64, cookie)
+ ),
+ TP_fast_assign(
+ WIPHY_ASSIGN;
+ WDEV_ASSIGN;
+ __entry->cookie = cookie;
+ ),
+ TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", cookie=%llu",
+ WIPHY_PR_ARG, WDEV_PR_ARG, __entry->cookie)
+);
+
TRACE_EVENT(rdev_set_mac_acl,
TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
struct cfg80211_acl_data *params),
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 9e6e2aaa7766..8edce22d1b93 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -912,7 +912,7 @@ void cfg80211_upload_connect_keys(struct wireless_dev *wdev)
if (!wdev->connect_keys)
return;
- for (i = 0; i < 4; i++) {
+ for (i = 0; i < CFG80211_MAX_WEP_KEYS; i++) {
if (!wdev->connect_keys->params[i].cipher)
continue;
if (rdev_add_key(rdev, dev, i, false, NULL,
@@ -1008,8 +1008,9 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
if (otype == NL80211_IFTYPE_AP_VLAN)
return -EOPNOTSUPP;
- /* cannot change into P2P device type */
- if (ntype == NL80211_IFTYPE_P2P_DEVICE)
+ /* cannot change into P2P device or NAN */
+ if (ntype == NL80211_IFTYPE_P2P_DEVICE ||
+ ntype == NL80211_IFTYPE_NAN)
return -EOPNOTSUPP;
if (!rdev->ops->change_virtual_intf ||
@@ -1088,6 +1089,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
/* not happening */
break;
case NL80211_IFTYPE_P2P_DEVICE:
+ case NL80211_IFTYPE_NAN:
WARN_ON(1);
break;
}
@@ -1760,6 +1762,28 @@ int cfg80211_get_station(struct net_device *dev, const u8 *mac_addr,
}
EXPORT_SYMBOL(cfg80211_get_station);
+void cfg80211_free_nan_func(struct cfg80211_nan_func *f)
+{
+ int i;
+
+ if (!f)
+ return;
+
+ kfree(f->serv_spec_info);
+ kfree(f->srf_bf);
+ kfree(f->srf_macs);
+ for (i = 0; i < f->num_rx_filters; i++)
+ kfree(f->rx_filters[i].filter);
+
+ for (i = 0; i < f->num_tx_filters; i++)
+ kfree(f->tx_filters[i].filter);
+
+ kfree(f->rx_filters);
+ kfree(f->tx_filters);
+ kfree(f);
+}
+EXPORT_SYMBOL(cfg80211_free_nan_func);
+
/* See IEEE 802.1H for LLC/SNAP encapsulation/decapsulation */
/* Ethernet-II snap header (RFC1042 for most EtherTypes) */
const unsigned char rfc1042_header[] __aligned(2) =
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index 7b97d43b27e1..a220156cf217 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -406,12 +406,16 @@ static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev,
if (pairwise && !addr)
return -EINVAL;
+ /*
+ * In many cases we won't actually need this, but it's better
+ * to do it first in case the allocation fails. Don't use wext.
+ */
if (!wdev->wext.keys) {
wdev->wext.keys = kzalloc(sizeof(*wdev->wext.keys),
GFP_KERNEL);
if (!wdev->wext.keys)
return -ENOMEM;
- for (i = 0; i < 4; i++)
+ for (i = 0; i < CFG80211_MAX_WEP_KEYS; i++)
wdev->wext.keys->params[i].key =
wdev->wext.keys->data[i];
}
@@ -493,7 +497,13 @@ static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev,
if (err)
return err;
- if (!addr) {
+ /*
+ * We only need to store WEP keys, since they're the only keys that
+ * can be be set before a connection is established and persist after
+ * disconnecting.
+ */
+ if (!addr && (params->cipher == WLAN_CIPHER_SUITE_WEP40 ||
+ params->cipher == WLAN_CIPHER_SUITE_WEP104)) {
wdev->wext.keys->params[idx] = *params;
memcpy(wdev->wext.keys->data[idx],
params->key, params->key_len);
diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c
index 88f1f6931ab8..995163830a61 100644
--- a/net/wireless/wext-sme.c
+++ b/net/wireless/wext-sme.c
@@ -46,7 +46,7 @@ int cfg80211_mgd_wext_connect(struct cfg80211_registered_device *rdev,
ck = kmemdup(wdev->wext.keys, sizeof(*ck), GFP_KERNEL);
if (!ck)
return -ENOMEM;
- for (i = 0; i < 4; i++)
+ for (i = 0; i < CFG80211_MAX_WEP_KEYS; i++)
ck->params[i].key = ck->data[i];
}
diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c
index 9c4fbd8935f4..ba2b539879bc 100644
--- a/net/xfrm/xfrm_proc.c
+++ b/net/xfrm/xfrm_proc.c
@@ -50,12 +50,18 @@ static const struct snmp_mib xfrm_mib_list[] = {
static int xfrm_statistics_seq_show(struct seq_file *seq, void *v)
{
+ unsigned long buff[LINUX_MIB_XFRMMAX];
struct net *net = seq->private;
int i;
+
+ memset(buff, 0, sizeof(unsigned long) * LINUX_MIB_XFRMMAX);
+
+ snmp_get_cpu_field_batch(buff, xfrm_mib_list,
+ net->mib.xfrm_statistics);
for (i = 0; xfrm_mib_list[i].name; i++)
seq_printf(seq, "%-24s\t%lu\n", xfrm_mib_list[i].name,
- snmp_fold_field(net->mib.xfrm_statistics,
- xfrm_mib_list[i].entry));
+ buff[i]);
+
return 0;
}