summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Kravetz <mike.kravetz@oracle.com>2021-09-03 00:58:53 +0300
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2021-09-18 14:40:36 +0300
commite1fa3b2b60abb9f43b937741e27c6531ef4cbef9 (patch)
tree437d5ad77f806eae0e9b5fd4e778e43a00420d3f
parent27dd91221b3f714f315c6e6b8f59924fbdd4c331 (diff)
downloadlinux-e1fa3b2b60abb9f43b937741e27c6531ef4cbef9.tar.xz
hugetlb: fix hugetlb cgroup refcounting during vma split
commit 09a26e832705fdb7a9484495b71a05e0bbc65207 upstream. Guillaume Morin reported hitting the following WARNING followed by GPF or NULL pointer deference either in cgroups_destroy or in the kill_css path.: percpu ref (css_release) <= 0 (-1) after switching to atomic WARNING: CPU: 23 PID: 130 at lib/percpu-refcount.c:196 percpu_ref_switch_to_atomic_rcu+0x127/0x130 CPU: 23 PID: 130 Comm: ksoftirqd/23 Kdump: loaded Tainted: G O 5.10.60 #1 RIP: 0010:percpu_ref_switch_to_atomic_rcu+0x127/0x130 Call Trace: rcu_core+0x30f/0x530 rcu_core_si+0xe/0x10 __do_softirq+0x103/0x2a2 run_ksoftirqd+0x2b/0x40 smpboot_thread_fn+0x11a/0x170 kthread+0x10a/0x140 ret_from_fork+0x22/0x30 Upon further examination, it was discovered that the css structure was associated with hugetlb reservations. For private hugetlb mappings the vma points to a reserve map that contains a pointer to the css. At mmap time, reservations are set up and a reference to the css is taken. This reference is dropped in the vma close operation; hugetlb_vm_op_close. However, if a vma is split no additional reference to the css is taken yet hugetlb_vm_op_close will be called twice for the split vma resulting in an underflow. Fix by taking another reference in hugetlb_vm_op_open. Note that the reference is only taken for the owner of the reserve map. In the more common fork case, the pointer to the reserve map is cleared for non-owning vmas. Link: https://lkml.kernel.org/r/20210830215015.155224-1-mike.kravetz@oracle.com Fixes: e9fe92ae0cd2 ("hugetlb_cgroup: add reservation accounting for private mappings") Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com> Reported-by: Guillaume Morin <guillaume@morinfr.org> Suggested-by: Guillaume Morin <guillaume@morinfr.org> Tested-by: Guillaume Morin <guillaume@morinfr.org> Cc: <stable@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-rw-r--r--include/linux/hugetlb_cgroup.h12
-rw-r--r--mm/hugetlb.c4
2 files changed, 15 insertions, 1 deletions
diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h
index 0bff345c4bc6..171bf1be4011 100644
--- a/include/linux/hugetlb_cgroup.h
+++ b/include/linux/hugetlb_cgroup.h
@@ -118,6 +118,13 @@ static inline void hugetlb_cgroup_put_rsvd_cgroup(struct hugetlb_cgroup *h_cg)
css_put(&h_cg->css);
}
+static inline void resv_map_dup_hugetlb_cgroup_uncharge_info(
+ struct resv_map *resv_map)
+{
+ if (resv_map->css)
+ css_get(resv_map->css);
+}
+
extern int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
struct hugetlb_cgroup **ptr);
extern int hugetlb_cgroup_charge_cgroup_rsvd(int idx, unsigned long nr_pages,
@@ -196,6 +203,11 @@ static inline void hugetlb_cgroup_put_rsvd_cgroup(struct hugetlb_cgroup *h_cg)
{
}
+static inline void resv_map_dup_hugetlb_cgroup_uncharge_info(
+ struct resv_map *resv_map)
+{
+}
+
static inline int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
struct hugetlb_cgroup **ptr)
{
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index fa6b0ac6c280..6e92ab0ae070 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3659,8 +3659,10 @@ static void hugetlb_vm_op_open(struct vm_area_struct *vma)
* after this open call completes. It is therefore safe to take a
* new reference here without additional locking.
*/
- if (resv && is_vma_resv_set(vma, HPAGE_RESV_OWNER))
+ if (resv && is_vma_resv_set(vma, HPAGE_RESV_OWNER)) {
+ resv_map_dup_hugetlb_cgroup_uncharge_info(resv);
kref_get(&resv->refs);
+ }
}
static void hugetlb_vm_op_close(struct vm_area_struct *vma)