summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2022-09-29 04:51:27 +0300
committerJakub Kicinski <kuba@kernel.org>2022-09-29 04:51:28 +0300
commit578b054684e6ad46f6089b726c05054fc5e3cd74 (patch)
treeb14a20279c36ca9fac082210cf5419e057075337
parent929a6cdfaeac9de6a1004eb18999e1439527cfb4 (diff)
parente7d2b510165fff6bedc9cca88c071ad846850c74 (diff)
downloadlinux-578b054684e6ad46f6089b726c05054fc5e3cd74.tar.xz
Merge branch 'shrink-struct-ubuf_info'
Pavel Begunkov says: ==================== shrink struct ubuf_info struct ubuf_info is large but not all fields are needed for all cases. We have limited space in io_uring for it and large ubuf_info prevents some struct embedding, even though we use only a subset of the fields. It's also not very clean trying to use this typeless extra space. Shrink struct ubuf_info to only necessary fields used in generic paths, namely ->callback, ->refcnt and ->flags, which take only 16 bytes. And make MSG_ZEROCOPY and some other users to embed it into a larger struct ubuf_info_msgzc mimicking the former ubuf_info. Note, xen/vhost may also have some cleaning on top by creating new structs containing ubuf_info but with proper types. ==================== Link: https://lore.kernel.org/r/cover.1663892211.git.asml.silence@gmail.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r--drivers/net/xen-netback/common.h2
-rw-r--r--drivers/net/xen-netback/interface.c4
-rw-r--r--drivers/net/xen-netback/netback.c7
-rw-r--r--drivers/vhost/net.c15
-rw-r--r--include/linux/skbuff.h11
-rw-r--r--net/core/skbuff.c38
-rw-r--r--net/ipv4/ip_output.c2
-rw-r--r--net/ipv4/tcp.c2
-rw-r--r--net/ipv6/ip6_output.c2
9 files changed, 48 insertions, 35 deletions
diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index 8174d7b2966c..1545cbee77a4 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -62,7 +62,7 @@ struct pending_tx_info {
* ubuf_to_vif is a helper which finds the struct xenvif from a pointer
* to this field.
*/
- struct ubuf_info callback_struct;
+ struct ubuf_info_msgzc callback_struct;
};
#define XEN_NETIF_TX_RING_SIZE __CONST_RING_SIZE(xen_netif_tx, XEN_PAGE_SIZE)
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index fb32ae82d9b0..e579ecd40b74 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -591,8 +591,8 @@ int xenvif_init_queue(struct xenvif_queue *queue)
}
for (i = 0; i < MAX_PENDING_REQS; i++) {
- queue->pending_tx_info[i].callback_struct = (struct ubuf_info)
- { .callback = xenvif_zerocopy_callback,
+ queue->pending_tx_info[i].callback_struct = (struct ubuf_info_msgzc)
+ { { .callback = xenvif_zerocopy_callback },
{ { .ctx = NULL,
.desc = i } } };
queue->grant_tx_handle[i] = NETBACK_INVALID_HANDLE;
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index a256695fc89e..3d2081bbbc86 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -133,7 +133,7 @@ static inline unsigned long idx_to_kaddr(struct xenvif_queue *queue,
/* Find the containing VIF's structure from a pointer in pending_tx_info array
*/
-static inline struct xenvif_queue *ubuf_to_queue(const struct ubuf_info *ubuf)
+static inline struct xenvif_queue *ubuf_to_queue(const struct ubuf_info_msgzc *ubuf)
{
u16 pending_idx = ubuf->desc;
struct pending_tx_info *temp =
@@ -1228,11 +1228,12 @@ static int xenvif_tx_submit(struct xenvif_queue *queue)
return work_done;
}
-void xenvif_zerocopy_callback(struct sk_buff *skb, struct ubuf_info *ubuf,
+void xenvif_zerocopy_callback(struct sk_buff *skb, struct ubuf_info *ubuf_base,
bool zerocopy_success)
{
unsigned long flags;
pending_ring_idx_t index;
+ struct ubuf_info_msgzc *ubuf = uarg_to_msgzc(ubuf_base);
struct xenvif_queue *queue = ubuf_to_queue(ubuf);
/* This is the only place where we grab this lock, to protect callbacks
@@ -1241,7 +1242,7 @@ void xenvif_zerocopy_callback(struct sk_buff *skb, struct ubuf_info *ubuf,
spin_lock_irqsave(&queue->callback_lock, flags);
do {
u16 pending_idx = ubuf->desc;
- ubuf = (struct ubuf_info *) ubuf->ctx;
+ ubuf = (struct ubuf_info_msgzc *) ubuf->ctx;
BUG_ON(queue->dealloc_prod - queue->dealloc_cons >=
MAX_PENDING_REQS);
index = pending_index(queue->dealloc_prod);
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 68e4ecd1cc0e..d7a04d573988 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -118,7 +118,7 @@ struct vhost_net_virtqueue {
/* Number of XDP frames batched */
int batched_xdp;
/* an array of userspace buffers info */
- struct ubuf_info *ubuf_info;
+ struct ubuf_info_msgzc *ubuf_info;
/* Reference counting for outstanding ubufs.
* Protected by vq mutex. Writers must also take device mutex. */
struct vhost_net_ubuf_ref *ubufs;
@@ -382,8 +382,9 @@ static void vhost_zerocopy_signal_used(struct vhost_net *net,
}
static void vhost_zerocopy_callback(struct sk_buff *skb,
- struct ubuf_info *ubuf, bool success)
+ struct ubuf_info *ubuf_base, bool success)
{
+ struct ubuf_info_msgzc *ubuf = uarg_to_msgzc(ubuf_base);
struct vhost_net_ubuf_ref *ubufs = ubuf->ctx;
struct vhost_virtqueue *vq = ubufs->vq;
int cnt;
@@ -871,7 +872,7 @@ static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock)
size_t len, total_len = 0;
int err;
struct vhost_net_ubuf_ref *ubufs;
- struct ubuf_info *ubuf;
+ struct ubuf_info_msgzc *ubuf;
bool zcopy_used;
int sent_pkts = 0;
@@ -907,14 +908,14 @@ static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock)
ubuf = nvq->ubuf_info + nvq->upend_idx;
vq->heads[nvq->upend_idx].id = cpu_to_vhost32(vq, head);
vq->heads[nvq->upend_idx].len = VHOST_DMA_IN_PROGRESS;
- ubuf->callback = vhost_zerocopy_callback;
ubuf->ctx = nvq->ubufs;
ubuf->desc = nvq->upend_idx;
- ubuf->flags = SKBFL_ZEROCOPY_FRAG;
- refcount_set(&ubuf->refcnt, 1);
+ ubuf->ubuf.callback = vhost_zerocopy_callback;
+ ubuf->ubuf.flags = SKBFL_ZEROCOPY_FRAG;
+ refcount_set(&ubuf->ubuf.refcnt, 1);
msg.msg_control = &ctl;
ctl.type = TUN_MSG_UBUF;
- ctl.ptr = ubuf;
+ ctl.ptr = &ubuf->ubuf;
msg.msg_controllen = sizeof(ctl);
ubufs = nvq->ubufs;
atomic_inc(&ubufs->refcount);
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index f15d5b62539b..920eb6413fee 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -533,6 +533,13 @@ enum {
struct ubuf_info {
void (*callback)(struct sk_buff *, struct ubuf_info *,
bool zerocopy_success);
+ refcount_t refcnt;
+ u8 flags;
+};
+
+struct ubuf_info_msgzc {
+ struct ubuf_info ubuf;
+
union {
struct {
unsigned long desc;
@@ -545,8 +552,6 @@ struct ubuf_info {
u32 bytelen;
};
};
- refcount_t refcnt;
- u8 flags;
struct mmpin {
struct user_struct *user;
@@ -555,6 +560,8 @@ struct ubuf_info {
};
#define skb_uarg(SKB) ((struct ubuf_info *)(skb_shinfo(SKB)->destructor_arg))
+#define uarg_to_msgzc(ubuf_ptr) container_of((ubuf_ptr), struct ubuf_info_msgzc, \
+ ubuf)
int mm_account_pinned_pages(struct mmpin *mmp, size_t size);
void mm_unaccount_pinned_pages(struct mmpin *mmp);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index f1b8b20fc20b..bbcfb1c7f59e 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1188,7 +1188,7 @@ EXPORT_SYMBOL_GPL(mm_unaccount_pinned_pages);
static struct ubuf_info *msg_zerocopy_alloc(struct sock *sk, size_t size)
{
- struct ubuf_info *uarg;
+ struct ubuf_info_msgzc *uarg;
struct sk_buff *skb;
WARN_ON_ONCE(!in_task());
@@ -1206,19 +1206,19 @@ static struct ubuf_info *msg_zerocopy_alloc(struct sock *sk, size_t size)
return NULL;
}
- uarg->callback = msg_zerocopy_callback;
+ uarg->ubuf.callback = msg_zerocopy_callback;
uarg->id = ((u32)atomic_inc_return(&sk->sk_zckey)) - 1;
uarg->len = 1;
uarg->bytelen = size;
uarg->zerocopy = 1;
- uarg->flags = SKBFL_ZEROCOPY_FRAG | SKBFL_DONT_ORPHAN;
- refcount_set(&uarg->refcnt, 1);
+ uarg->ubuf.flags = SKBFL_ZEROCOPY_FRAG | SKBFL_DONT_ORPHAN;
+ refcount_set(&uarg->ubuf.refcnt, 1);
sock_hold(sk);
- return uarg;
+ return &uarg->ubuf;
}
-static inline struct sk_buff *skb_from_uarg(struct ubuf_info *uarg)
+static inline struct sk_buff *skb_from_uarg(struct ubuf_info_msgzc *uarg)
{
return container_of((void *)uarg, struct sk_buff, cb);
}
@@ -1227,6 +1227,7 @@ struct ubuf_info *msg_zerocopy_realloc(struct sock *sk, size_t size,
struct ubuf_info *uarg)
{
if (uarg) {
+ struct ubuf_info_msgzc *uarg_zc;
const u32 byte_limit = 1 << 19; /* limit to a few TSO */
u32 bytelen, next;
@@ -1242,8 +1243,9 @@ struct ubuf_info *msg_zerocopy_realloc(struct sock *sk, size_t size,
return NULL;
}
- bytelen = uarg->bytelen + size;
- if (uarg->len == USHRT_MAX - 1 || bytelen > byte_limit) {
+ uarg_zc = uarg_to_msgzc(uarg);
+ bytelen = uarg_zc->bytelen + size;
+ if (uarg_zc->len == USHRT_MAX - 1 || bytelen > byte_limit) {
/* TCP can create new skb to attach new uarg */
if (sk->sk_type == SOCK_STREAM)
goto new_alloc;
@@ -1251,11 +1253,11 @@ struct ubuf_info *msg_zerocopy_realloc(struct sock *sk, size_t size,
}
next = (u32)atomic_read(&sk->sk_zckey);
- if ((u32)(uarg->id + uarg->len) == next) {
- if (mm_account_pinned_pages(&uarg->mmp, size))
+ if ((u32)(uarg_zc->id + uarg_zc->len) == next) {
+ if (mm_account_pinned_pages(&uarg_zc->mmp, size))
return NULL;
- uarg->len++;
- uarg->bytelen = bytelen;
+ uarg_zc->len++;
+ uarg_zc->bytelen = bytelen;
atomic_set(&sk->sk_zckey, ++next);
/* no extra ref when appending to datagram (MSG_MORE) */
@@ -1291,7 +1293,7 @@ static bool skb_zerocopy_notify_extend(struct sk_buff *skb, u32 lo, u16 len)
return true;
}
-static void __msg_zerocopy_callback(struct ubuf_info *uarg)
+static void __msg_zerocopy_callback(struct ubuf_info_msgzc *uarg)
{
struct sk_buff *tail, *skb = skb_from_uarg(uarg);
struct sock_exterr_skb *serr;
@@ -1344,19 +1346,21 @@ release:
void msg_zerocopy_callback(struct sk_buff *skb, struct ubuf_info *uarg,
bool success)
{
- uarg->zerocopy = uarg->zerocopy & success;
+ struct ubuf_info_msgzc *uarg_zc = uarg_to_msgzc(uarg);
+
+ uarg_zc->zerocopy = uarg_zc->zerocopy & success;
if (refcount_dec_and_test(&uarg->refcnt))
- __msg_zerocopy_callback(uarg);
+ __msg_zerocopy_callback(uarg_zc);
}
EXPORT_SYMBOL_GPL(msg_zerocopy_callback);
void msg_zerocopy_put_abort(struct ubuf_info *uarg, bool have_uref)
{
- struct sock *sk = skb_from_uarg(uarg)->sk;
+ struct sock *sk = skb_from_uarg(uarg_to_msgzc(uarg))->sk;
atomic_dec(&sk->sk_zckey);
- uarg->len--;
+ uarg_to_msgzc(uarg)->len--;
if (have_uref)
msg_zerocopy_callback(NULL, uarg, true);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 8201cd423ff9..1ae83ad629b2 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1043,7 +1043,7 @@ static int __ip_append_data(struct sock *sk,
paged = true;
zc = true;
} else {
- uarg->zerocopy = 0;
+ uarg_to_msgzc(uarg)->zerocopy = 0;
skb_zcopy_set(skb, uarg, &extra_uref);
}
}
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 5702ca9b952d..a109a4cb1e47 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1239,7 +1239,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
}
zc = sk->sk_route_caps & NETIF_F_SG;
if (!zc)
- uarg->zerocopy = 0;
+ uarg_to_msgzc(uarg)->zerocopy = 0;
}
}
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index a60176e913a8..e19507614f64 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1567,7 +1567,7 @@ emsgsize:
paged = true;
zc = true;
} else {
- uarg->zerocopy = 0;
+ uarg_to_msgzc(uarg)->zerocopy = 0;
skb_zcopy_set(skb, uarg, &extra_uref);
}
}