summaryrefslogtreecommitdiff
path: root/include/linux/skbuff.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/skbuff.h')
-rw-r--r--include/linux/skbuff.h277
1 files changed, 76 insertions, 201 deletions
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index d3d10556f0fa..ca8afa382bf2 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -43,6 +43,7 @@
#include <linux/netfilter/nf_conntrack_common.h>
#endif
#include <net/net_debug.h>
+#include <net/dropreason.h>
/**
* DOC: skb checksums
@@ -337,184 +338,6 @@ struct sk_buff_head {
struct sk_buff;
-/* The reason of skb drop, which is used in kfree_skb_reason().
- * en...maybe they should be splited by group?
- *
- * Each item here should also be in 'TRACE_SKB_DROP_REASON', which is
- * used to translate the reason to string.
- */
-enum skb_drop_reason {
- SKB_NOT_DROPPED_YET = 0,
- SKB_DROP_REASON_NOT_SPECIFIED, /* drop reason is not specified */
- SKB_DROP_REASON_NO_SOCKET, /* socket not found */
- SKB_DROP_REASON_PKT_TOO_SMALL, /* packet size is too small */
- SKB_DROP_REASON_TCP_CSUM, /* TCP checksum error */
- SKB_DROP_REASON_SOCKET_FILTER, /* dropped by socket filter */
- SKB_DROP_REASON_UDP_CSUM, /* UDP checksum error */
- SKB_DROP_REASON_NETFILTER_DROP, /* dropped by netfilter */
- SKB_DROP_REASON_OTHERHOST, /* packet don't belong to current
- * host (interface is in promisc
- * mode)
- */
- SKB_DROP_REASON_IP_CSUM, /* IP checksum error */
- SKB_DROP_REASON_IP_INHDR, /* there is something wrong with
- * IP header (see
- * IPSTATS_MIB_INHDRERRORS)
- */
- SKB_DROP_REASON_IP_RPFILTER, /* IP rpfilter validate failed.
- * see the document for rp_filter
- * in ip-sysctl.rst for more
- * information
- */
- SKB_DROP_REASON_UNICAST_IN_L2_MULTICAST, /* destination address of L2
- * is multicast, but L3 is
- * unicast.
- */
- SKB_DROP_REASON_XFRM_POLICY, /* xfrm policy check failed */
- SKB_DROP_REASON_IP_NOPROTO, /* no support for IP protocol */
- SKB_DROP_REASON_SOCKET_RCVBUFF, /* socket receive buff is full */
- SKB_DROP_REASON_PROTO_MEM, /* proto memory limition, such as
- * udp packet drop out of
- * udp_memory_allocated.
- */
- SKB_DROP_REASON_TCP_MD5NOTFOUND, /* no MD5 hash and one
- * expected, corresponding
- * to LINUX_MIB_TCPMD5NOTFOUND
- */
- SKB_DROP_REASON_TCP_MD5UNEXPECTED, /* MD5 hash and we're not
- * expecting one, corresponding
- * to LINUX_MIB_TCPMD5UNEXPECTED
- */
- SKB_DROP_REASON_TCP_MD5FAILURE, /* MD5 hash and its wrong,
- * corresponding to
- * LINUX_MIB_TCPMD5FAILURE
- */
- SKB_DROP_REASON_SOCKET_BACKLOG, /* failed to add skb to socket
- * backlog (see
- * LINUX_MIB_TCPBACKLOGDROP)
- */
- SKB_DROP_REASON_TCP_FLAGS, /* TCP flags invalid */
- SKB_DROP_REASON_TCP_ZEROWINDOW, /* TCP receive window size is zero,
- * see LINUX_MIB_TCPZEROWINDOWDROP
- */
- SKB_DROP_REASON_TCP_OLD_DATA, /* the TCP data reveived is already
- * received before (spurious retrans
- * may happened), see
- * LINUX_MIB_DELAYEDACKLOST
- */
- SKB_DROP_REASON_TCP_OVERWINDOW, /* the TCP data is out of window,
- * the seq of the first byte exceed
- * the right edges of receive
- * window
- */
- SKB_DROP_REASON_TCP_OFOMERGE, /* the data of skb is already in
- * the ofo queue, corresponding to
- * LINUX_MIB_TCPOFOMERGE
- */
- SKB_DROP_REASON_TCP_RFC7323_PAWS, /* PAWS check, corresponding to
- * LINUX_MIB_PAWSESTABREJECTED
- */
- SKB_DROP_REASON_TCP_INVALID_SEQUENCE, /* Not acceptable SEQ field */
- SKB_DROP_REASON_TCP_RESET, /* Invalid RST packet */
- SKB_DROP_REASON_TCP_INVALID_SYN, /* Incoming packet has unexpected SYN flag */
- SKB_DROP_REASON_TCP_CLOSE, /* TCP socket in CLOSE state */
- SKB_DROP_REASON_TCP_FASTOPEN, /* dropped by FASTOPEN request socket */
- SKB_DROP_REASON_TCP_OLD_ACK, /* TCP ACK is old, but in window */
- SKB_DROP_REASON_TCP_TOO_OLD_ACK, /* TCP ACK is too old */
- SKB_DROP_REASON_TCP_ACK_UNSENT_DATA, /* TCP ACK for data we haven't sent yet */
- SKB_DROP_REASON_TCP_OFO_QUEUE_PRUNE, /* pruned from TCP OFO queue */
- SKB_DROP_REASON_TCP_OFO_DROP, /* data already in receive queue */
- SKB_DROP_REASON_IP_OUTNOROUTES, /* route lookup failed */
- SKB_DROP_REASON_BPF_CGROUP_EGRESS, /* dropped by
- * BPF_PROG_TYPE_CGROUP_SKB
- * eBPF program
- */
- SKB_DROP_REASON_IPV6DISABLED, /* IPv6 is disabled on the device */
- SKB_DROP_REASON_NEIGH_CREATEFAIL, /* failed to create neigh
- * entry
- */
- SKB_DROP_REASON_NEIGH_FAILED, /* neigh entry in failed state */
- SKB_DROP_REASON_NEIGH_QUEUEFULL, /* arp_queue for neigh
- * entry is full
- */
- SKB_DROP_REASON_NEIGH_DEAD, /* neigh entry is dead */
- SKB_DROP_REASON_TC_EGRESS, /* dropped in TC egress HOOK */
- SKB_DROP_REASON_QDISC_DROP, /* dropped by qdisc when packet
- * outputting (failed to enqueue to
- * current qdisc)
- */
- SKB_DROP_REASON_CPU_BACKLOG, /* failed to enqueue the skb to
- * the per CPU backlog queue. This
- * can be caused by backlog queue
- * full (see netdev_max_backlog in
- * net.rst) or RPS flow limit
- */
- SKB_DROP_REASON_XDP, /* dropped by XDP in input path */
- SKB_DROP_REASON_TC_INGRESS, /* dropped in TC ingress HOOK */
- SKB_DROP_REASON_UNHANDLED_PROTO, /* protocol not implemented
- * or not supported
- */
- SKB_DROP_REASON_SKB_CSUM, /* sk_buff checksum computation
- * error
- */
- SKB_DROP_REASON_SKB_GSO_SEG, /* gso segmentation error */
- SKB_DROP_REASON_SKB_UCOPY_FAULT, /* failed to copy data from
- * user space, e.g., via
- * zerocopy_sg_from_iter()
- * or skb_orphan_frags_rx()
- */
- SKB_DROP_REASON_DEV_HDR, /* device driver specific
- * header/metadata is invalid
- */
- /* the device is not ready to xmit/recv due to any of its data
- * structure that is not up/ready/initialized, e.g., the IFF_UP is
- * not set, or driver specific tun->tfiles[txq] is not initialized
- */
- SKB_DROP_REASON_DEV_READY,
- SKB_DROP_REASON_FULL_RING, /* ring buffer is full */
- SKB_DROP_REASON_NOMEM, /* error due to OOM */
- SKB_DROP_REASON_HDR_TRUNC, /* failed to trunc/extract the header
- * from networking data, e.g., failed
- * to pull the protocol header from
- * frags via pskb_may_pull()
- */
- SKB_DROP_REASON_TAP_FILTER, /* dropped by (ebpf) filter directly
- * attached to tun/tap, e.g., via
- * TUNSETFILTEREBPF
- */
- SKB_DROP_REASON_TAP_TXFILTER, /* dropped by tx filter implemented
- * at tun/tap, e.g., check_filter()
- */
- SKB_DROP_REASON_ICMP_CSUM, /* ICMP checksum error */
- SKB_DROP_REASON_INVALID_PROTO, /* the packet doesn't follow RFC
- * 2211, such as a broadcasts
- * ICMP_TIMESTAMP
- */
- SKB_DROP_REASON_IP_INADDRERRORS, /* host unreachable, corresponding
- * to IPSTATS_MIB_INADDRERRORS
- */
- SKB_DROP_REASON_IP_INNOROUTES, /* network unreachable, corresponding
- * to IPSTATS_MIB_INADDRERRORS
- */
- SKB_DROP_REASON_PKT_TOO_BIG, /* packet size is too big (maybe exceed
- * the MTU)
- */
- SKB_DROP_REASON_MAX,
-};
-
-#define SKB_DR_INIT(name, reason) \
- enum skb_drop_reason name = SKB_DROP_REASON_##reason
-#define SKB_DR(name) \
- SKB_DR_INIT(name, NOT_SPECIFIED)
-#define SKB_DR_SET(name, reason) \
- (name = SKB_DROP_REASON_##reason)
-#define SKB_DR_OR(name, reason) \
- do { \
- if (name == SKB_DROP_REASON_NOT_SPECIFIED || \
- name == SKB_NOT_DROPPED_YET) \
- SKB_DR_SET(name, reason); \
- } while (0)
-
/* To allow 64K frame to be packed as single skb without frag_list we
* require 64K/PAGE_SIZE pages plus 1 additional page to allow for
* buffers which do not start on a page boundary.
@@ -686,10 +509,18 @@ enum {
* charged to the kernel memory.
*/
SKBFL_PURE_ZEROCOPY = BIT(2),
+
+ SKBFL_DONT_ORPHAN = BIT(3),
+
+ /* page references are managed by the ubuf_info, so it's safe to
+ * use frags only up until ubuf_info is released
+ */
+ SKBFL_MANAGED_FRAG_REFS = BIT(4),
};
#define SKBFL_ZEROCOPY_FRAG (SKBFL_ZEROCOPY_ENABLE | SKBFL_SHARED_FRAG)
-#define SKBFL_ALL_ZEROCOPY (SKBFL_ZEROCOPY_FRAG | SKBFL_PURE_ZEROCOPY)
+#define SKBFL_ALL_ZEROCOPY (SKBFL_ZEROCOPY_FRAG | SKBFL_PURE_ZEROCOPY | \
+ SKBFL_DONT_ORPHAN | SKBFL_MANAGED_FRAG_REFS)
/*
* The callback notifies userspace to release buffers when skb DMA is done in
@@ -1773,13 +1604,14 @@ void msg_zerocopy_put_abort(struct ubuf_info *uarg, bool have_uref);
void msg_zerocopy_callback(struct sk_buff *skb, struct ubuf_info *uarg,
bool success);
-int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb,
- struct iov_iter *from, size_t length);
+int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk,
+ struct sk_buff *skb, struct iov_iter *from,
+ size_t length);
static inline int skb_zerocopy_iter_dgram(struct sk_buff *skb,
struct msghdr *msg, int len)
{
- return __zerocopy_sg_from_iter(skb->sk, skb, &msg->msg_iter, len);
+ return __zerocopy_sg_from_iter(msg, skb->sk, skb, &msg->msg_iter, len);
}
int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb,
@@ -1806,6 +1638,11 @@ static inline bool skb_zcopy_pure(const struct sk_buff *skb)
return skb_shinfo(skb)->flags & SKBFL_PURE_ZEROCOPY;
}
+static inline bool skb_zcopy_managed(const struct sk_buff *skb)
+{
+ return skb_shinfo(skb)->flags & SKBFL_MANAGED_FRAG_REFS;
+}
+
static inline bool skb_pure_zcopy_same(const struct sk_buff *skb1,
const struct sk_buff *skb2)
{
@@ -1880,6 +1717,14 @@ static inline void skb_zcopy_clear(struct sk_buff *skb, bool zerocopy_success)
}
}
+void __skb_zcopy_downgrade_managed(struct sk_buff *skb);
+
+static inline void skb_zcopy_downgrade_managed(struct sk_buff *skb)
+{
+ if (unlikely(skb_zcopy_managed(skb)))
+ __skb_zcopy_downgrade_managed(skb);
+}
+
static inline void skb_mark_not_on_list(struct sk_buff *skb)
{
skb->next = NULL;
@@ -2528,6 +2373,34 @@ static inline unsigned int skb_pagelen(const struct sk_buff *skb)
return skb_headlen(skb) + __skb_pagelen(skb);
}
+static inline void __skb_fill_page_desc_noacc(struct skb_shared_info *shinfo,
+ int i, struct page *page,
+ int off, int size)
+{
+ skb_frag_t *frag = &shinfo->frags[i];
+
+ /*
+ * Propagate page pfmemalloc to the skb if we can. The problem is
+ * that not all callers have unique ownership of the page but rely
+ * on page_is_pfmemalloc doing the right thing(tm).
+ */
+ frag->bv_page = page;
+ frag->bv_offset = off;
+ skb_frag_size_set(frag, size);
+}
+
+/**
+ * skb_len_add - adds a number to len fields of skb
+ * @skb: buffer to add len to
+ * @delta: number of bytes to add
+ */
+static inline void skb_len_add(struct sk_buff *skb, int delta)
+{
+ skb->len += delta;
+ skb->data_len += delta;
+ skb->truesize += delta;
+}
+
/**
* __skb_fill_page_desc - initialise a paged fragment in an skb
* @skb: buffer containing fragment to be initialised
@@ -2544,17 +2417,7 @@ static inline unsigned int skb_pagelen(const struct sk_buff *skb)
static inline void __skb_fill_page_desc(struct sk_buff *skb, int i,
struct page *page, int off, int size)
{
- skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
-
- /*
- * Propagate page pfmemalloc to the skb if we can. The problem is
- * that not all callers have unique ownership of the page but rely
- * on page_is_pfmemalloc doing the right thing(tm).
- */
- frag->bv_page = page;
- frag->bv_offset = off;
- skb_frag_size_set(frag, size);
-
+ __skb_fill_page_desc_noacc(skb_shinfo(skb), i, page, off, size);
page = compound_head(page);
if (page_is_pfmemalloc(page))
skb->pfmemalloc = true;
@@ -2624,6 +2487,14 @@ static inline void skb_set_tail_pointer(struct sk_buff *skb, const int offset)
#endif /* NET_SKBUFF_DATA_USES_OFFSET */
+static inline void skb_assert_len(struct sk_buff *skb)
+{
+#ifdef CONFIG_DEBUG_NET
+ if (WARN_ONCE(!skb->len, "%s\n", __func__))
+ DO_ONCE_LITE(skb_dump, KERN_ERR, skb, false);
+#endif /* CONFIG_DEBUG_NET */
+}
+
/*
* Add data to an sk_buff
*/
@@ -2940,8 +2811,14 @@ static inline void skb_set_network_header(struct sk_buff *skb, const int offset)
skb->network_header += offset;
}
+static inline int skb_mac_header_was_set(const struct sk_buff *skb)
+{
+ return skb->mac_header != (typeof(skb->mac_header))~0U;
+}
+
static inline unsigned char *skb_mac_header(const struct sk_buff *skb)
{
+ DEBUG_NET_WARN_ON_ONCE(!skb_mac_header_was_set(skb));
return skb->head + skb->mac_header;
}
@@ -2952,14 +2829,10 @@ static inline int skb_mac_offset(const struct sk_buff *skb)
static inline u32 skb_mac_header_len(const struct sk_buff *skb)
{
+ DEBUG_NET_WARN_ON_ONCE(!skb_mac_header_was_set(skb));
return skb->network_header - skb->mac_header;
}
-static inline int skb_mac_header_was_set(const struct sk_buff *skb)
-{
- return skb->mac_header != (typeof(skb->mac_header))~0U;
-}
-
static inline void skb_unset_mac_header(struct sk_buff *skb)
{
skb->mac_header = (typeof(skb->mac_header))~0U;
@@ -3182,8 +3055,7 @@ static inline int skb_orphan_frags(struct sk_buff *skb, gfp_t gfp_mask)
{
if (likely(!skb_zcopy(skb)))
return 0;
- if (!skb_zcopy_is_nouarg(skb) &&
- skb_uarg(skb)->callback == msg_zerocopy_callback)
+ if (skb_shinfo(skb)->flags & SKBFL_DONT_ORPHAN)
return 0;
return skb_copy_ubufs(skb, gfp_mask);
}
@@ -3496,7 +3368,10 @@ static inline void __skb_frag_unref(skb_frag_t *frag, bool recycle)
*/
static inline void skb_frag_unref(struct sk_buff *skb, int f)
{
- __skb_frag_unref(&skb_shinfo(skb)->frags[f], skb->pp_recycle);
+ struct skb_shared_info *shinfo = skb_shinfo(skb);
+
+ if (!skb_zcopy_managed(skb))
+ __skb_frag_unref(&shinfo->frags[f], skb->pp_recycle);
}
/**