summaryrefslogtreecommitdiff
path: root/meta-openbmc-mods/meta-common/recipes-kernel/linux/linux-aspeed/CVE-2019-19770/0001-blktrace-fix-debugfs-use-after-free.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta-openbmc-mods/meta-common/recipes-kernel/linux/linux-aspeed/CVE-2019-19770/0001-blktrace-fix-debugfs-use-after-free.patch')
-rw-r--r--meta-openbmc-mods/meta-common/recipes-kernel/linux/linux-aspeed/CVE-2019-19770/0001-blktrace-fix-debugfs-use-after-free.patch214
1 files changed, 214 insertions, 0 deletions
diff --git a/meta-openbmc-mods/meta-common/recipes-kernel/linux/linux-aspeed/CVE-2019-19770/0001-blktrace-fix-debugfs-use-after-free.patch b/meta-openbmc-mods/meta-common/recipes-kernel/linux/linux-aspeed/CVE-2019-19770/0001-blktrace-fix-debugfs-use-after-free.patch
new file mode 100644
index 000000000..a455a43c5
--- /dev/null
+++ b/meta-openbmc-mods/meta-common/recipes-kernel/linux/linux-aspeed/CVE-2019-19770/0001-blktrace-fix-debugfs-use-after-free.patch
@@ -0,0 +1,214 @@
+From 1e1cea6eb8838117f676aaf6e0a465dd6a9addad Mon Sep 17 00:00:00 2001
+From: Luis Chamberlain <mcgrof@kernel.org>
+Date: Fri, 19 Jun 2020 20:47:28 +0000
+Subject: [PATCH] blktrace: fix debugfs use after free
+
+On commit 6ac93117ab00 ("blktrace: use existing disk debugfs directory")
+merged on v4.12 Omar fixed the original blktrace code for request-based
+drivers (multiqueue). This however left in place a possible crash, if you
+happen to abuse blktrace while racing to remove / add a device.
+
+We used to use asynchronous removal of the request_queue, and with that
+the issue was easier to reproduce. Now that we have reverted to
+synchronous removal of the request_queue, the issue is still possible to
+reproduce, its however just a bit more difficult.
+
+We essentially run two instances of break-blktrace which add/remove
+a loop device, and setup a blktrace and just never tear the blktrace
+down. We do this twice in parallel. This is easily reproduced with the
+script run_0004.sh from break-blktrace [0].
+
+We can end up with two types of panics each reflecting where we
+race, one a failed blktrace setup:
+
+[ 252.426751] debugfs: Directory 'loop0' with parent 'block' already present!
+[ 252.432265] BUG: kernel NULL pointer dereference, address: 00000000000000a0
+[ 252.436592] #PF: supervisor write access in kernel mode
+[ 252.439822] #PF: error_code(0x0002) - not-present page
+[ 252.442967] PGD 0 P4D 0
+[ 252.444656] Oops: 0002 [#1] SMP NOPTI
+[ 252.446972] CPU: 10 PID: 1153 Comm: break-blktrace Tainted: G E 5.7.0-rc2-next-20200420+ #164
+[ 252.452673] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1 04/01/2014
+[ 252.456343] RIP: 0010:down_write+0x15/0x40
+[ 252.458146] Code: eb ca e8 ae 22 8d ff cc cc cc cc cc cc cc cc cc cc cc cc
+ cc cc 0f 1f 44 00 00 55 48 89 fd e8 52 db ff ff 31 c0 ba 01 00
+ 00 00 <f0> 48 0f b1 55 00 75 0f 48 8b 04 25 c0 8b 01 00 48 89
+ 45 08 5d
+[ 252.463638] RSP: 0018:ffffa626415abcc8 EFLAGS: 00010246
+[ 252.464950] RAX: 0000000000000000 RBX: ffff958c25f0f5c0 RCX: ffffff8100000000
+[ 252.466727] RDX: 0000000000000001 RSI: ffffff8100000000 RDI: 00000000000000a0
+[ 252.468482] RBP: 00000000000000a0 R08: 0000000000000000 R09: 0000000000000001
+[ 252.470014] R10: 0000000000000000 R11: ffff958d1f9227ff R12: 0000000000000000
+[ 252.471473] R13: ffff958c25ea5380 R14: ffffffff8cce15f1 R15: 00000000000000a0
+[ 252.473346] FS: 00007f2e69dee540(0000) GS:ffff958c2fc80000(0000) knlGS:0000000000000000
+[ 252.475225] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[ 252.476267] CR2: 00000000000000a0 CR3: 0000000427d10004 CR4: 0000000000360ee0
+[ 252.477526] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+[ 252.478776] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+[ 252.479866] Call Trace:
+[ 252.480322] simple_recursive_removal+0x4e/0x2e0
+[ 252.481078] ? debugfs_remove+0x60/0x60
+[ 252.481725] ? relay_destroy_buf+0x77/0xb0
+[ 252.482662] debugfs_remove+0x40/0x60
+[ 252.483518] blk_remove_buf_file_callback+0x5/0x10
+[ 252.484328] relay_close_buf+0x2e/0x60
+[ 252.484930] relay_open+0x1ce/0x2c0
+[ 252.485520] do_blk_trace_setup+0x14f/0x2b0
+[ 252.486187] __blk_trace_setup+0x54/0xb0
+[ 252.486803] blk_trace_ioctl+0x90/0x140
+[ 252.487423] ? do_sys_openat2+0x1ab/0x2d0
+[ 252.488053] blkdev_ioctl+0x4d/0x260
+[ 252.488636] block_ioctl+0x39/0x40
+[ 252.489139] ksys_ioctl+0x87/0xc0
+[ 252.489675] __x64_sys_ioctl+0x16/0x20
+[ 252.490380] do_syscall_64+0x52/0x180
+[ 252.491032] entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+And the other on the device removal:
+
+[ 128.528940] debugfs: Directory 'loop0' with parent 'block' already present!
+[ 128.615325] BUG: kernel NULL pointer dereference, address: 00000000000000a0
+[ 128.619537] #PF: supervisor write access in kernel mode
+[ 128.622700] #PF: error_code(0x0002) - not-present page
+[ 128.625842] PGD 0 P4D 0
+[ 128.627585] Oops: 0002 [#1] SMP NOPTI
+[ 128.629871] CPU: 12 PID: 544 Comm: break-blktrace Tainted: G E 5.7.0-rc2-next-20200420+ #164
+[ 128.635595] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1 04/01/2014
+[ 128.640471] RIP: 0010:down_write+0x15/0x40
+[ 128.643041] Code: eb ca e8 ae 22 8d ff cc cc cc cc cc cc cc cc cc cc cc cc
+ cc cc 0f 1f 44 00 00 55 48 89 fd e8 52 db ff ff 31 c0 ba 01 00
+ 00 00 <f0> 48 0f b1 55 00 75 0f 65 48 8b 04 25 c0 8b 01 00 48 89
+ 45 08 5d
+[ 128.650180] RSP: 0018:ffffa9c3c05ebd78 EFLAGS: 00010246
+[ 128.651820] RAX: 0000000000000000 RBX: ffff8ae9a6370240 RCX: ffffff8100000000
+[ 128.653942] RDX: 0000000000000001 RSI: ffffff8100000000 RDI: 00000000000000a0
+[ 128.655720] RBP: 00000000000000a0 R08: 0000000000000002 R09: ffff8ae9afd2d3d0
+[ 128.657400] R10: 0000000000000056 R11: 0000000000000000 R12: 0000000000000000
+[ 128.659099] R13: 0000000000000000 R14: 0000000000000003 R15: 00000000000000a0
+[ 128.660500] FS: 00007febfd995540(0000) GS:ffff8ae9afd00000(0000) knlGS:0000000000000000
+[ 128.662204] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[ 128.663426] CR2: 00000000000000a0 CR3: 0000000420042003 CR4: 0000000000360ee0
+[ 128.664776] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+[ 128.666022] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+[ 128.667282] Call Trace:
+[ 128.667801] simple_recursive_removal+0x4e/0x2e0
+[ 128.668663] ? debugfs_remove+0x60/0x60
+[ 128.669368] debugfs_remove+0x40/0x60
+[ 128.669985] blk_trace_free+0xd/0x50
+[ 128.670593] __blk_trace_remove+0x27/0x40
+[ 128.671274] blk_trace_shutdown+0x30/0x40
+[ 128.671935] blk_release_queue+0x95/0xf0
+[ 128.672589] kobject_put+0xa5/0x1b0
+[ 128.673188] disk_release+0xa2/0xc0
+[ 128.673786] device_release+0x28/0x80
+[ 128.674376] kobject_put+0xa5/0x1b0
+[ 128.674915] loop_remove+0x39/0x50 [loop]
+[ 128.675511] loop_control_ioctl+0x113/0x130 [loop]
+[ 128.676199] ksys_ioctl+0x87/0xc0
+[ 128.676708] __x64_sys_ioctl+0x16/0x20
+[ 128.677274] do_syscall_64+0x52/0x180
+[ 128.677823] entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+The common theme here is:
+
+debugfs: Directory 'loop0' with parent 'block' already present
+
+This crash happens because of how blktrace uses the debugfs directory
+where it places its files. Upon init we always create the same directory
+which would be needed by blktrace but we only do this for make_request
+drivers (multiqueue) block drivers. When you race a removal of these
+devices with a blktrace setup you end up in a situation where the
+make_request recursive debugfs removal will sweep away the blktrace
+files and then later blktrace will also try to remove individual
+dentries which are already NULL. The inverse is also possible and hence
+the two types of use after frees.
+
+We don't create the block debugfs directory on init for these types of
+block devices:
+
+ * request-based block driver block devices
+ * every possible partition
+ * scsi-generic
+
+And so, this race should in theory only be possible with make_request
+drivers.
+
+We can fix the UAF by simply re-using the debugfs directory for
+make_request drivers (multiqueue) and only creating the ephemeral
+directory for the other type of block devices. The new clarifications
+on relying on the q->blk_trace_mutex *and* also checking for q->blk_trace
+*prior* to processing a blktrace ensures the debugfs directories are
+only created if no possible directory name clashes are possible.
+
+This goes tested with:
+
+ o nvme partitions
+ o ISCSI with tgt, and blktracing against scsi-generic with:
+ o block
+ o tape
+ o cdrom
+ o media changer
+ o blktests
+
+This patch is part of the work which disputes the severity of
+CVE-2019-19770 which shows this issue is not a core debugfs issue, but
+a misuse of debugfs within blktace.
+
+Fixes: 6ac93117ab00 ("blktrace: use existing disk debugfs directory")
+Reported-by: syzbot+603294af2d01acfdd6da@syzkaller.appspotmail.com
+Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Cc: Bart Van Assche <bvanassche@acm.org>
+Cc: Omar Sandoval <osandov@fb.com>
+Cc: Hannes Reinecke <hare@suse.com>
+Cc: Nicolai Stange <nstange@suse.de>
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Michal Hocko <mhocko@kernel.org>
+Cc: "Martin K. Petersen" <martin.petersen@oracle.com>
+Cc: "James E.J. Bottomley" <jejb@linux.ibm.com>
+Cc: yu kuai <yukuai3@huawei.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Jae Hyun Yoo <jae.hyun.yoo@intel.com>
+---
+ kernel/trace/blktrace.c | 18 ++++++++++++------
+ 1 file changed, 12 insertions(+), 6 deletions(-)
+
+diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
+index e7e483cdbea6..fc2ad395c61d 100644
+--- a/kernel/trace/blktrace.c
++++ b/kernel/trace/blktrace.c
+@@ -508,10 +508,18 @@ static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
+ if (!bt->msg_data)
+ goto err;
+
+- ret = -ENOENT;
+-
+- dir = debugfs_lookup(buts->name, blk_debugfs_root);
+- if (!dir)
++#ifdef CONFIG_BLK_DEBUG_FS
++ /*
++ * When tracing whole make_request drivers (multiqueue) block devices,
++ * reuse the existing debugfs directory created by the block layer on
++ * init. For request-based block devices, all partitions block devices,
++ * and scsi-generic block devices we create a temporary new debugfs
++ * directory that will be removed once the trace ends.
++ */
++ if (queue_is_mq(q) && bdev && bdev == bdev->bd_contains)
++ dir = q->debugfs_dir;
++ else
++#endif
+ bt->dir = dir = debugfs_create_dir(buts->name, blk_debugfs_root);
+
+ bt->dev = dev;
+@@ -552,8 +560,6 @@ static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
+
+ ret = 0;
+ err:
+- if (dir && !bt->dir)
+- dput(dir);
+ if (ret)
+ blk_trace_free(bt);
+ return ret;
+--
+2.17.1
+