summaryrefslogtreecommitdiff
path: root/net/core/sock.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-09-07 00:45:08 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2017-09-07 00:45:08 +0300
commitaae3dbb4776e7916b6cd442d00159bea27a695c1 (patch)
treed074c5d783a81e7e2e084b1eba77f57459da7e37 /net/core/sock.c
parentec3604c7a5aae8953545b0d05495357009a960e5 (diff)
parent66bed8465a808400eb14562510e26c8818082cb8 (diff)
downloadlinux-aae3dbb4776e7916b6cd442d00159bea27a695c1.tar.xz
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: 1) Support ipv6 checksum offload in sunvnet driver, from Shannon Nelson. 2) Move to RB-tree instead of custom AVL code in inetpeer, from Eric Dumazet. 3) Allow generic XDP to work on virtual devices, from John Fastabend. 4) Add bpf device maps and XDP_REDIRECT, which can be used to build arbitrary switching frameworks using XDP. From John Fastabend. 5) Remove UFO offloads from the tree, gave us little other than bugs. 6) Remove the IPSEC flow cache, from Florian Westphal. 7) Support ipv6 route offload in mlxsw driver. 8) Support VF representors in bnxt_en, from Sathya Perla. 9) Add support for forward error correction modes to ethtool, from Vidya Sagar Ravipati. 10) Add time filter for packet scheduler action dumping, from Jamal Hadi Salim. 11) Extend the zerocopy sendmsg() used by virtio and tap to regular sockets via MSG_ZEROCOPY. From Willem de Bruijn. 12) Significantly rework value tracking in the BPF verifier, from Edward Cree. 13) Add new jump instructions to eBPF, from Daniel Borkmann. 14) Rework rtnetlink plumbing so that operations can be run without taking the RTNL semaphore. From Florian Westphal. 15) Support XDP in tap driver, from Jason Wang. 16) Add 32-bit eBPF JIT for ARM, from Shubham Bansal. 17) Add Huawei hinic ethernet driver. 18) Allow to report MD5 keys in TCP inet_diag dumps, from Ivan Delalande. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1780 commits) i40e: point wb_desc at the nvm_wb_desc during i40e_read_nvm_aq i40e: avoid NVM acquire deadlock during NVM update drivers: net: xgene: Remove return statement from void function drivers: net: xgene: Configure tx/rx delay for ACPI drivers: net: xgene: Read tx/rx delay for ACPI rocker: fix kcalloc parameter order rds: Fix non-atomic operation on shared flag variable net: sched: don't use GFP_KERNEL under spin lock vhost_net: correctly check tx avail during rx busy polling net: mdio-mux: add mdio_mux parameter to mdio_mux_init() rxrpc: Make service connection lookup always check for retry net: stmmac: Delete dead code for MDIO registration gianfar: Fix Tx flow control deactivation cxgb4: Ignore MPS_TX_INT_CAUSE[Bubble] for T6 cxgb4: Fix pause frame count in t4_get_port_stats cxgb4: fix memory leak tun: rename generic_xdp to skb_xdp tun: reserve extra headroom only when XDP is set net: dsa: bcm_sf2: Configure IMP port TC2QOS mapping net: dsa: bcm_sf2: Advertise number of egress queues ...
Diffstat (limited to 'net/core/sock.c')
-rw-r--r--net/core/sock.c84
1 files changed, 70 insertions, 14 deletions
diff --git a/net/core/sock.c b/net/core/sock.c
index ac2a404c73eb..9b7b6bbb2a23 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -307,16 +307,6 @@ static struct lock_class_key af_wlock_keys[AF_MAX];
static struct lock_class_key af_elock_keys[AF_MAX];
static struct lock_class_key af_kern_callback_keys[AF_MAX];
-/* Take into consideration the size of the struct sk_buff overhead in the
- * determination of these values, since that is non-constant across
- * platforms. This makes socket queueing behavior and performance
- * not depend upon such differences.
- */
-#define _SK_MEM_PACKETS 256
-#define _SK_MEM_OVERHEAD SKB_TRUESIZE(256)
-#define SK_WMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
-#define SK_RMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
-
/* Run time adjustable parameters. */
__u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
EXPORT_SYMBOL(sysctl_wmem_max);
@@ -1055,6 +1045,20 @@ set_rcvbuf:
if (val == 1)
dst_negative_advice(sk);
break;
+
+ case SO_ZEROCOPY:
+ if (sk->sk_family != PF_INET && sk->sk_family != PF_INET6)
+ ret = -ENOTSUPP;
+ else if (sk->sk_protocol != IPPROTO_TCP)
+ ret = -ENOTSUPP;
+ else if (sk->sk_state != TCP_CLOSE)
+ ret = -EBUSY;
+ else if (val < 0 || val > 1)
+ ret = -EINVAL;
+ else
+ sock_valbool_flag(sk, SOCK_ZEROCOPY, valbool);
+ break;
+
default:
ret = -ENOPROTOOPT;
break;
@@ -1383,6 +1387,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
v.val64 = sock_gen_cookie(sk);
break;
+ case SO_ZEROCOPY:
+ v.val = sock_flag(sk, SOCK_ZEROCOPY);
+ break;
+
default:
/* We implement the SO_SNDLOWAT etc to not be settable
* (1003.1g 7).
@@ -1670,6 +1678,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
atomic_set(&newsk->sk_drops, 0);
newsk->sk_send_head = NULL;
newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
+ atomic_set(&newsk->sk_zckey, 0);
sock_reset_flag(newsk, SOCK_DONE);
@@ -1757,7 +1766,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
sk->sk_route_caps &= ~sk->sk_route_nocaps;
if (sk_can_gso(sk)) {
- if (dst->header_len) {
+ if (dst->header_len && !xfrm_dst_offload_ok(dst)) {
sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
} else {
sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
@@ -1923,6 +1932,33 @@ struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
}
EXPORT_SYMBOL(sock_wmalloc);
+static void sock_ofree(struct sk_buff *skb)
+{
+ struct sock *sk = skb->sk;
+
+ atomic_sub(skb->truesize, &sk->sk_omem_alloc);
+}
+
+struct sk_buff *sock_omalloc(struct sock *sk, unsigned long size,
+ gfp_t priority)
+{
+ struct sk_buff *skb;
+
+ /* small safe race: SKB_TRUESIZE may differ from final skb->truesize */
+ if (atomic_read(&sk->sk_omem_alloc) + SKB_TRUESIZE(size) >
+ sysctl_optmem_max)
+ return NULL;
+
+ skb = alloc_skb(size, priority);
+ if (!skb)
+ return NULL;
+
+ atomic_add(skb->truesize, &sk->sk_omem_alloc);
+ skb->sk = sk;
+ skb->destructor = sock_ofree;
+ return skb;
+}
+
/*
* Allocate a memory block from the socket's option memory buffer.
*/
@@ -2408,9 +2444,6 @@ EXPORT_SYMBOL(__sk_mem_reclaim);
int sk_set_peek_off(struct sock *sk, int val)
{
- if (val < 0)
- return -EINVAL;
-
sk->sk_peek_off = val;
return 0;
}
@@ -2500,6 +2533,12 @@ int sock_no_sendmsg(struct socket *sock, struct msghdr *m, size_t len)
}
EXPORT_SYMBOL(sock_no_sendmsg);
+int sock_no_sendmsg_locked(struct sock *sk, struct msghdr *m, size_t len)
+{
+ return -EOPNOTSUPP;
+}
+EXPORT_SYMBOL(sock_no_sendmsg_locked);
+
int sock_no_recvmsg(struct socket *sock, struct msghdr *m, size_t len,
int flags)
{
@@ -2528,6 +2567,22 @@ ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, siz
}
EXPORT_SYMBOL(sock_no_sendpage);
+ssize_t sock_no_sendpage_locked(struct sock *sk, struct page *page,
+ int offset, size_t size, int flags)
+{
+ ssize_t res;
+ struct msghdr msg = {.msg_flags = flags};
+ struct kvec iov;
+ char *kaddr = kmap(page);
+
+ iov.iov_base = kaddr + offset;
+ iov.iov_len = size;
+ res = kernel_sendmsg_locked(sk, &msg, &iov, 1, size);
+ kunmap(page);
+ return res;
+}
+EXPORT_SYMBOL(sock_no_sendpage_locked);
+
/*
* Default Socket Callbacks
*/
@@ -2673,6 +2728,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
sk->sk_stamp = SK_DEFAULT_STAMP;
+ atomic_set(&sk->sk_zckey, 0);
#ifdef CONFIG_NET_RX_BUSY_POLL
sk->sk_napi_id = 0;