summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIlya Dryomov <idryomov@gmail.com>2019-08-30 21:44:24 +0300
committerIlya Dryomov <idryomov@gmail.com>2019-09-16 13:06:25 +0300
commit10c12851a022662bf6085bd4384b4ebed4c447ce (patch)
tree609a2973a0bbbd33afc073e6a122ddec858efe3f
parent6fd4e634835208ddb331234bfa51d75396a5c42c (diff)
downloadlinux-10c12851a022662bf6085bd4384b4ebed4c447ce.tar.xz
libceph: avoid a __vmalloc() deadlock in ceph_kvmalloc()
The vmalloc allocator doesn't fully respect the specified gfp mask: while the actual pages are allocated as requested, the page table pages are always allocated with GFP_KERNEL. ceph_kvmalloc() may be called with GFP_NOFS and GFP_NOIO (for ceph and rbd respectively), so this may result in a deadlock. There is no real reason for the current PAGE_ALLOC_COSTLY_ORDER logic, it's just something that seemed sensible at the time (ceph_kvmalloc() predates kvmalloc()). kvmalloc() is smarter: in an attempt to reduce long term fragmentation, it first tries to kmalloc non-disruptively. Switch to kvmalloc() and set the respective PF_MEMALLOC_* flag using the scope API to avoid the deadlock. Note that kvmalloc() needs to be passed GFP_KERNEL to enable the fallback. Signed-off-by: Ilya Dryomov <idryomov@gmail.com> Reviewed-by: Jeff Layton <jlayton@kernel.org>
-rw-r--r--net/ceph/ceph_common.c29
1 files changed, 23 insertions, 6 deletions
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index b412a3ccc4fc..2d568246803f 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -13,6 +13,7 @@
#include <linux/nsproxy.h>
#include <linux/parser.h>
#include <linux/sched.h>
+#include <linux/sched/mm.h>
#include <linux/seq_file.h>
#include <linux/slab.h>
#include <linux/statfs.h>
@@ -185,18 +186,34 @@ int ceph_compare_options(struct ceph_options *new_opt,
}
EXPORT_SYMBOL(ceph_compare_options);
+/*
+ * kvmalloc() doesn't fall back to the vmalloc allocator unless flags are
+ * compatible with (a superset of) GFP_KERNEL. This is because while the
+ * actual pages are allocated with the specified flags, the page table pages
+ * are always allocated with GFP_KERNEL. map_vm_area() doesn't even take
+ * flags because GFP_KERNEL is hard-coded in {p4d,pud,pmd,pte}_alloc().
+ *
+ * ceph_kvmalloc() may be called with GFP_KERNEL, GFP_NOFS or GFP_NOIO.
+ */
void *ceph_kvmalloc(size_t size, gfp_t flags)
{
- if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) {
- void *ptr = kmalloc(size, flags | __GFP_NOWARN);
- if (ptr)
- return ptr;
+ void *p;
+
+ if ((flags & (__GFP_IO | __GFP_FS)) == (__GFP_IO | __GFP_FS)) {
+ p = kvmalloc(size, flags);
+ } else if ((flags & (__GFP_IO | __GFP_FS)) == __GFP_IO) {
+ unsigned int nofs_flag = memalloc_nofs_save();
+ p = kvmalloc(size, GFP_KERNEL);
+ memalloc_nofs_restore(nofs_flag);
+ } else {
+ unsigned int noio_flag = memalloc_noio_save();
+ p = kvmalloc(size, GFP_KERNEL);
+ memalloc_noio_restore(noio_flag);
}
- return __vmalloc(size, flags, PAGE_KERNEL);
+ return p;
}
-
static int parse_fsid(const char *str, struct ceph_fsid *fsid)
{
int i = 0;