summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/prctl/no_new_privs.txt50
-rw-r--r--arch/arm/kernel/vmlinux.lds.S2
-rw-r--r--arch/arm/mm/mmu.c74
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S2
-rw-r--r--arch/powerpc/xmon/xmon.c2
-rw-r--r--drivers/gpu/drm/drm_edid.c27
-rw-r--r--drivers/gpu/drm/i915/i915_dma.c37
-rw-r--r--drivers/gpu/drm/radeon/radeon_gart.c13
-rw-r--r--drivers/gpu/drm/radeon/radeon_gem.c10
-rw-r--r--drivers/gpu/drm/radeon/si.c4
-rw-r--r--drivers/md/md.c8
-rw-r--r--drivers/md/multipath.c3
-rw-r--r--drivers/md/raid1.c13
-rw-r--r--drivers/md/raid10.c26
-rw-r--r--drivers/md/raid5.c67
-rw-r--r--security/security.c1
16 files changed, 277 insertions, 62 deletions
diff --git a/Documentation/prctl/no_new_privs.txt b/Documentation/prctl/no_new_privs.txt
new file mode 100644
index 000000000000..cb705ec69abe
--- /dev/null
+++ b/Documentation/prctl/no_new_privs.txt
@@ -0,0 +1,50 @@
+The execve system call can grant a newly-started program privileges that
+its parent did not have. The most obvious examples are setuid/setgid
+programs and file capabilities. To prevent the parent program from
+gaining these privileges as well, the kernel and user code must be
+careful to prevent the parent from doing anything that could subvert the
+child. For example:
+
+ - The dynamic loader handles LD_* environment variables differently if
+ a program is setuid.
+
+ - chroot is disallowed to unprivileged processes, since it would allow
+ /etc/passwd to be replaced from the point of view of a process that
+ inherited chroot.
+
+ - The exec code has special handling for ptrace.
+
+These are all ad-hoc fixes. The no_new_privs bit (since Linux 3.5) is a
+new, generic mechanism to make it safe for a process to modify its
+execution environment in a manner that persists across execve. Any task
+can set no_new_privs. Once the bit is set, it is inherited across fork,
+clone, and execve and cannot be unset. With no_new_privs set, execve
+promises not to grant the privilege to do anything that could not have
+been done without the execve call. For example, the setuid and setgid
+bits will no longer change the uid or gid; file capabilities will not
+add to the permitted set, and LSMs will not relax constraints after
+execve.
+
+Note that no_new_privs does not prevent privilege changes that do not
+involve execve. An appropriately privileged task can still call
+setuid(2) and receive SCM_RIGHTS datagrams.
+
+There are two main use cases for no_new_privs so far:
+
+ - Filters installed for the seccomp mode 2 sandbox persist across
+ execve and can change the behavior of newly-executed programs.
+ Unprivileged users are therefore only allowed to install such filters
+ if no_new_privs is set.
+
+ - By itself, no_new_privs can be used to reduce the attack surface
+ available to an unprivileged user. If everything running with a
+ given uid has no_new_privs set, then that uid will be unable to
+ escalate its privileges by directly attacking setuid, setgid, and
+ fcap-using binaries; it will need to compromise something without the
+ no_new_privs bit set first.
+
+In the future, other potentially dangerous kernel features could become
+available to unprivileged tasks if no_new_privs is set. In principle,
+several options to unshare(2) and clone(2) would be safe when
+no_new_privs is set, and no_new_privs + chroot is considerable less
+dangerous than chroot by itself.
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index 43a31fb06318..36ff15bbfdd4 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -183,7 +183,9 @@ SECTIONS
}
#endif
+#ifdef CONFIG_SMP
PERCPU_SECTION(L1_CACHE_BYTES)
+#endif
#ifdef CONFIG_XIP_KERNEL
__data_loc = ALIGN(4); /* location in binary */
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index e5dad60b558b..cf4528d51774 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -791,6 +791,79 @@ void __init iotable_init(struct map_desc *io_desc, int nr)
}
}
+#ifndef CONFIG_ARM_LPAE
+
+/*
+ * The Linux PMD is made of two consecutive section entries covering 2MB
+ * (see definition in include/asm/pgtable-2level.h). However a call to
+ * create_mapping() may optimize static mappings by using individual
+ * 1MB section mappings. This leaves the actual PMD potentially half
+ * initialized if the top or bottom section entry isn't used, leaving it
+ * open to problems if a subsequent ioremap() or vmalloc() tries to use
+ * the virtual space left free by that unused section entry.
+ *
+ * Let's avoid the issue by inserting dummy vm entries covering the unused
+ * PMD halves once the static mappings are in place.
+ */
+
+static void __init pmd_empty_section_gap(unsigned long addr)
+{
+ struct vm_struct *vm;
+
+ vm = early_alloc_aligned(sizeof(*vm), __alignof__(*vm));
+ vm->addr = (void *)addr;
+ vm->size = SECTION_SIZE;
+ vm->flags = VM_IOREMAP | VM_ARM_STATIC_MAPPING;
+ vm->caller = pmd_empty_section_gap;
+ vm_area_add_early(vm);
+}
+
+static void __init fill_pmd_gaps(void)
+{
+ struct vm_struct *vm;
+ unsigned long addr, next = 0;
+ pmd_t *pmd;
+
+ /* we're still single threaded hence no lock needed here */
+ for (vm = vmlist; vm; vm = vm->next) {
+ if (!(vm->flags & VM_ARM_STATIC_MAPPING))
+ continue;
+ addr = (unsigned long)vm->addr;
+ if (addr < next)
+ continue;
+
+ /*
+ * Check if this vm starts on an odd section boundary.
+ * If so and the first section entry for this PMD is free
+ * then we block the corresponding virtual address.
+ */
+ if ((addr & ~PMD_MASK) == SECTION_SIZE) {
+ pmd = pmd_off_k(addr);
+ if (pmd_none(*pmd))
+ pmd_empty_section_gap(addr & PMD_MASK);
+ }
+
+ /*
+ * Then check if this vm ends on an odd section boundary.
+ * If so and the second section entry for this PMD is empty
+ * then we block the corresponding virtual address.
+ */
+ addr += vm->size;
+ if ((addr & ~PMD_MASK) == SECTION_SIZE) {
+ pmd = pmd_off_k(addr) + 1;
+ if (pmd_none(*pmd))
+ pmd_empty_section_gap(addr);
+ }
+
+ /* no need to look at any vm entry until we hit the next PMD */
+ next = (addr + PMD_SIZE - 1) & PMD_MASK;
+ }
+}
+
+#else
+#define fill_pmd_gaps() do { } while (0)
+#endif
+
static void * __initdata vmalloc_min =
(void *)(VMALLOC_END - (240 << 20) - VMALLOC_OFFSET);
@@ -1072,6 +1145,7 @@ static void __init devicemaps_init(struct machine_desc *mdesc)
*/
if (mdesc->map_io)
mdesc->map_io();
+ fill_pmd_gaps();
/*
* Finally flush the caches and tlb to ensure that we're in a
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index a84aafce2a12..a1044f43becd 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -810,7 +810,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
lwz r3,VCORE_NAPPING_THREADS(r5)
lwz r4,VCPU_PTID(r9)
li r0,1
- sldi r0,r0,r4
+ sld r0,r0,r4
andc. r3,r3,r0 /* no sense IPI'ing ourselves */
beq 43f
mulli r4,r4,PACA_SIZE /* get paca for thread 0 */
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 0f3ab06d2222..eab3492a45c5 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -971,7 +971,7 @@ static int cpu_cmd(void)
/* print cpus waiting or in xmon */
printf("cpus stopped:");
count = 0;
- for (cpu = 0; cpu < NR_CPUS; ++cpu) {
+ for_each_possible_cpu(cpu) {
if (cpumask_test_cpu(cpu, &cpus_in_xmon)) {
if (count == 0)
printf(" %x", cpu);
diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index 5873e481e5d2..a8743c399e83 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -1039,6 +1039,24 @@ mode_in_range(const struct drm_display_mode *mode, struct edid *edid,
return true;
}
+static bool valid_inferred_mode(const struct drm_connector *connector,
+ const struct drm_display_mode *mode)
+{
+ struct drm_display_mode *m;
+ bool ok = false;
+
+ list_for_each_entry(m, &connector->probed_modes, head) {
+ if (mode->hdisplay == m->hdisplay &&
+ mode->vdisplay == m->vdisplay &&
+ drm_mode_vrefresh(mode) == drm_mode_vrefresh(m))
+ return false; /* duplicated */
+ if (mode->hdisplay <= m->hdisplay &&
+ mode->vdisplay <= m->vdisplay)
+ ok = true;
+ }
+ return ok;
+}
+
static int
drm_dmt_modes_for_range(struct drm_connector *connector, struct edid *edid,
struct detailed_timing *timing)
@@ -1048,7 +1066,8 @@ drm_dmt_modes_for_range(struct drm_connector *connector, struct edid *edid,
struct drm_device *dev = connector->dev;
for (i = 0; i < drm_num_dmt_modes; i++) {
- if (mode_in_range(drm_dmt_modes + i, edid, timing)) {
+ if (mode_in_range(drm_dmt_modes + i, edid, timing) &&
+ valid_inferred_mode(connector, drm_dmt_modes + i)) {
newmode = drm_mode_duplicate(dev, &drm_dmt_modes[i]);
if (newmode) {
drm_mode_probed_add(connector, newmode);
@@ -1088,7 +1107,8 @@ drm_gtf_modes_for_range(struct drm_connector *connector, struct edid *edid,
return modes;
fixup_mode_1366x768(newmode);
- if (!mode_in_range(newmode, edid, timing)) {
+ if (!mode_in_range(newmode, edid, timing) ||
+ !valid_inferred_mode(connector, newmode)) {
drm_mode_destroy(dev, newmode);
continue;
}
@@ -1116,7 +1136,8 @@ drm_cvt_modes_for_range(struct drm_connector *connector, struct edid *edid,
return modes;
fixup_mode_1366x768(newmode);
- if (!mode_in_range(newmode, edid, timing)) {
+ if (!mode_in_range(newmode, edid, timing) ||
+ !valid_inferred_mode(connector, newmode)) {
drm_mode_destroy(dev, newmode);
continue;
}
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index f94792626b94..36822b924eb1 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1401,6 +1401,27 @@ i915_mtrr_setup(struct drm_i915_private *dev_priv, unsigned long base,
}
}
+static void i915_kick_out_firmware_fb(struct drm_i915_private *dev_priv)
+{
+ struct apertures_struct *ap;
+ struct pci_dev *pdev = dev_priv->dev->pdev;
+ bool primary;
+
+ ap = alloc_apertures(1);
+ if (!ap)
+ return;
+
+ ap->ranges[0].base = dev_priv->dev->agp->base;
+ ap->ranges[0].size =
+ dev_priv->mm.gtt->gtt_mappable_entries << PAGE_SHIFT;
+ primary =
+ pdev->resource[PCI_ROM_RESOURCE].flags & IORESOURCE_ROM_SHADOW;
+
+ remove_conflicting_framebuffers(ap, "inteldrmfb", primary);
+
+ kfree(ap);
+}
+
/**
* i915_driver_load - setup chip and create an initial config
* @dev: DRM device
@@ -1446,6 +1467,15 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
goto free_priv;
}
+ dev_priv->mm.gtt = intel_gtt_get();
+ if (!dev_priv->mm.gtt) {
+ DRM_ERROR("Failed to initialize GTT\n");
+ ret = -ENODEV;
+ goto put_bridge;
+ }
+
+ i915_kick_out_firmware_fb(dev_priv);
+
pci_set_master(dev->pdev);
/* overlay on gen2 is broken and can't address above 1G */
@@ -1471,13 +1501,6 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
goto put_bridge;
}
- dev_priv->mm.gtt = intel_gtt_get();
- if (!dev_priv->mm.gtt) {
- DRM_ERROR("Failed to initialize GTT\n");
- ret = -ENODEV;
- goto out_rmmap;
- }
-
aperture_size = dev_priv->mm.gtt->gtt_mappable_entries << PAGE_SHIFT;
dev_priv->mm.gtt_mapping =
diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c
index 59d44937dd9f..84b648a7ddd8 100644
--- a/drivers/gpu/drm/radeon/radeon_gart.c
+++ b/drivers/gpu/drm/radeon/radeon_gart.c
@@ -289,8 +289,9 @@ int radeon_vm_manager_init(struct radeon_device *rdev)
rdev->vm_manager.enabled = false;
/* mark first vm as always in use, it's the system one */
+ /* allocate enough for 2 full VM pts */
r = radeon_sa_bo_manager_init(rdev, &rdev->vm_manager.sa_manager,
- rdev->vm_manager.max_pfn * 8,
+ rdev->vm_manager.max_pfn * 8 * 2,
RADEON_GEM_DOMAIN_VRAM);
if (r) {
dev_err(rdev->dev, "failed to allocate vm bo (%dKB)\n",
@@ -633,7 +634,15 @@ int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm)
mutex_init(&vm->mutex);
INIT_LIST_HEAD(&vm->list);
INIT_LIST_HEAD(&vm->va);
- vm->last_pfn = 0;
+ /* SI requires equal sized PTs for all VMs, so always set
+ * last_pfn to max_pfn. cayman allows variable sized
+ * pts so we can grow then as needed. Once we switch
+ * to two level pts we can unify this again.
+ */
+ if (rdev->family >= CHIP_TAHITI)
+ vm->last_pfn = rdev->vm_manager.max_pfn;
+ else
+ vm->last_pfn = 0;
/* map the ib pool buffer at 0 in virtual address space, set
* read only
*/
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
index f28bd4b7ef98..21ec9f5653ce 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -292,6 +292,7 @@ int radeon_gem_mmap_ioctl(struct drm_device *dev, void *data,
int radeon_gem_busy_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp)
{
+ struct radeon_device *rdev = dev->dev_private;
struct drm_radeon_gem_busy *args = data;
struct drm_gem_object *gobj;
struct radeon_bo *robj;
@@ -317,13 +318,14 @@ int radeon_gem_busy_ioctl(struct drm_device *dev, void *data,
break;
}
drm_gem_object_unreference_unlocked(gobj);
- r = radeon_gem_handle_lockup(robj->rdev, r);
+ r = radeon_gem_handle_lockup(rdev, r);
return r;
}
int radeon_gem_wait_idle_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp)
{
+ struct radeon_device *rdev = dev->dev_private;
struct drm_radeon_gem_wait_idle *args = data;
struct drm_gem_object *gobj;
struct radeon_bo *robj;
@@ -336,10 +338,10 @@ int radeon_gem_wait_idle_ioctl(struct drm_device *dev, void *data,
robj = gem_to_radeon_bo(gobj);
r = radeon_bo_wait(robj, NULL, false);
/* callback hw specific functions if any */
- if (robj->rdev->asic->ioctl_wait_idle)
- robj->rdev->asic->ioctl_wait_idle(robj->rdev, robj);
+ if (rdev->asic->ioctl_wait_idle)
+ robj->rdev->asic->ioctl_wait_idle(rdev, robj);
drm_gem_object_unreference_unlocked(gobj);
- r = radeon_gem_handle_lockup(robj->rdev, r);
+ r = radeon_gem_handle_lockup(rdev, r);
return r;
}
diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c
index c7b61f16ecfd..0b0279291a73 100644
--- a/drivers/gpu/drm/radeon/si.c
+++ b/drivers/gpu/drm/radeon/si.c
@@ -2365,12 +2365,12 @@ int si_pcie_gart_enable(struct radeon_device *rdev)
WREG32(0x15DC, 0);
/* empty context1-15 */
- /* FIXME start with 1G, once using 2 level pt switch to full
+ /* FIXME start with 4G, once using 2 level pt switch to full
* vm size space
*/
/* set vm size, must be a multiple of 4 */
WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
- WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, (1 << 30) / RADEON_GPU_PAGE_SIZE);
+ WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
for (i = 1; i < 16; i++) {
if (i < 8)
WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 1c2f9048e1ae..a4c219e3c859 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -5784,8 +5784,7 @@ static int add_new_disk(struct mddev * mddev, mdu_disk_info_t *info)
super_types[mddev->major_version].
validate_super(mddev, rdev);
if ((info->state & (1<<MD_DISK_SYNC)) &&
- (!test_bit(In_sync, &rdev->flags) ||
- rdev->raid_disk != info->raid_disk)) {
+ rdev->raid_disk != info->raid_disk) {
/* This was a hot-add request, but events doesn't
* match, so reject it.
*/
@@ -6751,7 +6750,7 @@ struct md_thread *md_register_thread(void (*run) (struct mddev *), struct mddev
thread->tsk = kthread_run(md_thread, thread,
"%s_%s",
mdname(thread->mddev),
- name ?: mddev->pers->name);
+ name);
if (IS_ERR(thread->tsk)) {
kfree(thread);
return NULL;
@@ -7298,6 +7297,7 @@ void md_do_sync(struct mddev *mddev)
int skipped = 0;
struct md_rdev *rdev;
char *desc;
+ struct blk_plug plug;
/* just incase thread restarts... */
if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
@@ -7447,6 +7447,7 @@ void md_do_sync(struct mddev *mddev)
}
mddev->curr_resync_completed = j;
+ blk_start_plug(&plug);
while (j < max_sectors) {
sector_t sectors;
@@ -7552,6 +7553,7 @@ void md_do_sync(struct mddev *mddev)
* this also signals 'finished resyncing' to md_stop
*/
out:
+ blk_finish_plug(&plug);
wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));
/* tell personality that we are finished */
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index 9339e67fcc79..61a1833ebaf3 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -474,7 +474,8 @@ static int multipath_run (struct mddev *mddev)
}
{
- mddev->thread = md_register_thread(multipathd, mddev, NULL);
+ mddev->thread = md_register_thread(multipathd, mddev,
+ "multipath");
if (!mddev->thread) {
printk(KERN_ERR "multipath: couldn't allocate thread"
" for %s\n", mdname(mddev));
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index a9c7981ddd24..8c2754f835ef 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -517,8 +517,8 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
int bad_sectors;
int disk = start_disk + i;
- if (disk >= conf->raid_disks)
- disk -= conf->raid_disks;
+ if (disk >= conf->raid_disks * 2)
+ disk -= conf->raid_disks * 2;
rdev = rcu_dereference(conf->mirrors[disk].rdev);
if (r1_bio->bios[disk] == IO_BLOCKED
@@ -883,7 +883,6 @@ static void make_request(struct mddev *mddev, struct bio * bio)
const unsigned long do_sync = (bio->bi_rw & REQ_SYNC);
const unsigned long do_flush_fua = (bio->bi_rw & (REQ_FLUSH | REQ_FUA));
struct md_rdev *blocked_rdev;
- int plugged;
int first_clone;
int sectors_handled;
int max_sectors;
@@ -1034,7 +1033,6 @@ read_again:
* the bad blocks. Each set of writes gets it's own r1bio
* with a set of bios attached.
*/
- plugged = mddev_check_plugged(mddev);
disks = conf->raid_disks * 2;
retry_write:
@@ -1191,6 +1189,8 @@ read_again:
bio_list_add(&conf->pending_bio_list, mbio);
conf->pending_count++;
spin_unlock_irqrestore(&conf->device_lock, flags);
+ if (!mddev_check_plugged(mddev))
+ md_wakeup_thread(mddev->thread);
}
/* Mustn't call r1_bio_write_done before this next test,
* as it could result in the bio being freed.
@@ -1213,9 +1213,6 @@ read_again:
/* In case raid1d snuck in to freeze_array */
wake_up(&conf->wait_barrier);
-
- if (do_sync || !bitmap || !plugged)
- md_wakeup_thread(mddev->thread);
}
static void status(struct seq_file *seq, struct mddev *mddev)
@@ -2621,7 +2618,7 @@ static struct r1conf *setup_conf(struct mddev *mddev)
goto abort;
}
err = -ENOMEM;
- conf->thread = md_register_thread(raid1d, mddev, NULL);
+ conf->thread = md_register_thread(raid1d, mddev, "raid1");
if (!conf->thread) {
printk(KERN_ERR
"md/raid1:%s: couldn't allocate thread\n",
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 99ae6068e456..acf5a828c7e1 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1039,7 +1039,6 @@ static void make_request(struct mddev *mddev, struct bio * bio)
const unsigned long do_fua = (bio->bi_rw & REQ_FUA);
unsigned long flags;
struct md_rdev *blocked_rdev;
- int plugged;
int sectors_handled;
int max_sectors;
int sectors;
@@ -1239,7 +1238,6 @@ read_again:
* of r10_bios is recored in bio->bi_phys_segments just as with
* the read case.
*/
- plugged = mddev_check_plugged(mddev);
r10_bio->read_slot = -1; /* make sure repl_bio gets freed */
raid10_find_phys(conf, r10_bio);
@@ -1396,6 +1394,8 @@ retry_write:
bio_list_add(&conf->pending_bio_list, mbio);
conf->pending_count++;
spin_unlock_irqrestore(&conf->device_lock, flags);
+ if (!mddev_check_plugged(mddev, 0, 0))
+ md_wakeup_thread(mddev->thread);
if (!r10_bio->devs[i].repl_bio)
continue;
@@ -1423,6 +1423,8 @@ retry_write:
bio_list_add(&conf->pending_bio_list, mbio);
conf->pending_count++;
spin_unlock_irqrestore(&conf->device_lock, flags);
+ if (!mddev_check_plugged(mddev))
+ md_wakeup_thread(mddev->thread);
}
/* Don't remove the bias on 'remaining' (one_write_done) until
@@ -1448,9 +1450,6 @@ retry_write:
/* In case raid10d snuck in to freeze_array */
wake_up(&conf->wait_barrier);
-
- if (do_sync || !mddev->bitmap || !plugged)
- md_wakeup_thread(mddev->thread);
}
static void status(struct seq_file *seq, struct mddev *mddev)
@@ -2310,7 +2309,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
if (r10_sync_page_io(rdev,
r10_bio->devs[sl].addr +
sect,
- s<<9, conf->tmppage, WRITE)
+ s, conf->tmppage, WRITE)
== 0) {
/* Well, this device is dead */
printk(KERN_NOTICE
@@ -2349,7 +2348,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
switch (r10_sync_page_io(rdev,
r10_bio->devs[sl].addr +
sect,
- s<<9, conf->tmppage,
+ s, conf->tmppage,
READ)) {
case 0:
/* Well, this device is dead */
@@ -2512,7 +2511,7 @@ read_more:
slot = r10_bio->read_slot;
printk_ratelimited(
KERN_ERR
- "md/raid10:%s: %s: redirecting"
+ "md/raid10:%s: %s: redirecting "
"sector %llu to another mirror\n",
mdname(mddev),
bdevname(rdev->bdev, b),
@@ -2661,7 +2660,8 @@ static void raid10d(struct mddev *mddev)
blk_start_plug(&plug);
for (;;) {
- flush_pending_writes(conf);
+ if (atomic_read(&mddev->plug_cnt) == 0)
+ flush_pending_writes(conf);
spin_lock_irqsave(&conf->device_lock, flags);
if (list_empty(head)) {
@@ -2890,6 +2890,12 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
/* want to reconstruct this device */
rb2 = r10_bio;
sect = raid10_find_virt(conf, sector_nr, i);
+ if (sect >= mddev->resync_max_sectors) {
+ /* last stripe is not complete - don't
+ * try to recover this sector.
+ */
+ continue;
+ }
/* Unless we are doing a full sync, or a replacement
* we only need to recover the block if it is set in
* the bitmap
@@ -3421,7 +3427,7 @@ static struct r10conf *setup_conf(struct mddev *mddev)
spin_lock_init(&conf->resync_lock);
init_waitqueue_head(&conf->wait_barrier);
- conf->thread = md_register_thread(raid10d, mddev, NULL);
+ conf->thread = md_register_thread(raid10d, mddev, "raid10");
if (!conf->thread)
goto out;
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index d26767246d26..04348d76bb30 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -196,12 +196,14 @@ static void __release_stripe(struct r5conf *conf, struct stripe_head *sh)
BUG_ON(!list_empty(&sh->lru));
BUG_ON(atomic_read(&conf->active_stripes)==0);
if (test_bit(STRIPE_HANDLE, &sh->state)) {
- if (test_bit(STRIPE_DELAYED, &sh->state))
+ if (test_bit(STRIPE_DELAYED, &sh->state) &&
+ !test_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
list_add_tail(&sh->lru, &conf->delayed_list);
else if (test_bit(STRIPE_BIT_DELAY, &sh->state) &&
sh->bm_seq - conf->seq_write > 0)
list_add_tail(&sh->lru, &conf->bitmap_list);
else {
+ clear_bit(STRIPE_DELAYED, &sh->state);
clear_bit(STRIPE_BIT_DELAY, &sh->state);
list_add_tail(&sh->lru, &conf->handle_list);
}
@@ -606,6 +608,12 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
* a chance*/
md_check_recovery(conf->mddev);
}
+ /*
+ * Because md_wait_for_blocked_rdev
+ * will dec nr_pending, we must
+ * increment it first.
+ */
+ atomic_inc(&rdev->nr_pending);
md_wait_for_blocked_rdev(rdev, conf->mddev);
} else {
/* Acknowledged bad block - skip the write */
@@ -1737,6 +1745,7 @@ static void raid5_end_read_request(struct bio * bi, int error)
} else {
const char *bdn = bdevname(rdev->bdev, b);
int retry = 0;
+ int set_bad = 0;
clear_bit(R5_UPTODATE, &sh->dev[i].flags);
atomic_inc(&rdev->read_errors);
@@ -1748,7 +1757,8 @@ static void raid5_end_read_request(struct bio * bi, int error)
mdname(conf->mddev),
(unsigned long long)s,
bdn);
- else if (conf->mddev->degraded >= conf->max_degraded)
+ else if (conf->mddev->degraded >= conf->max_degraded) {
+ set_bad = 1;
printk_ratelimited(
KERN_WARNING
"md/raid:%s: read error not correctable "
@@ -1756,8 +1766,9 @@ static void raid5_end_read_request(struct bio * bi, int error)
mdname(conf->mddev),
(unsigned long long)s,
bdn);
- else if (test_bit(R5_ReWrite, &sh->dev[i].flags))
+ } else if (test_bit(R5_ReWrite, &sh->dev[i].flags)) {
/* Oh, no!!! */
+ set_bad = 1;
printk_ratelimited(
KERN_WARNING
"md/raid:%s: read error NOT corrected!! "
@@ -1765,7 +1776,7 @@ static void raid5_end_read_request(struct bio * bi, int error)
mdname(conf->mddev),
(unsigned long long)s,
bdn);
- else if (atomic_read(&rdev->read_errors)
+ } else if (atomic_read(&rdev->read_errors)
> conf->max_nr_stripes)
printk(KERN_WARNING
"md/raid:%s: Too many read errors, failing device %s.\n",
@@ -1777,7 +1788,11 @@ static void raid5_end_read_request(struct bio * bi, int error)
else {
clear_bit(R5_ReadError, &sh->dev[i].flags);
clear_bit(R5_ReWrite, &sh->dev[i].flags);
- md_error(conf->mddev, rdev);
+ if (!(set_bad
+ && test_bit(In_sync, &rdev->flags)
+ && rdev_set_badblocks(
+ rdev, sh->sector, STRIPE_SECTORS, 0)))
+ md_error(conf->mddev, rdev);
}
}
rdev_dec_pending(rdev, conf->mddev);
@@ -3582,8 +3597,18 @@ static void handle_stripe(struct stripe_head *sh)
finish:
/* wait for this device to become unblocked */
- if (conf->mddev->external && unlikely(s.blocked_rdev))
- md_wait_for_blocked_rdev(s.blocked_rdev, conf->mddev);
+ if (unlikely(s.blocked_rdev)) {
+ if (conf->mddev->external)
+ md_wait_for_blocked_rdev(s.blocked_rdev,
+ conf->mddev);
+ else
+ /* Internal metadata will immediately
+ * be written by raid5d, so we don't
+ * need to wait here.
+ */
+ rdev_dec_pending(s.blocked_rdev,
+ conf->mddev);
+ }
if (s.handle_bad_blocks)
for (i = disks; i--; ) {
@@ -3881,8 +3906,6 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
raid_bio->bi_next = (void*)rdev;
align_bi->bi_bdev = rdev->bdev;
align_bi->bi_flags &= ~(1 << BIO_SEG_VALID);
- /* No reshape active, so we can trust rdev->data_offset */
- align_bi->bi_sector += rdev->data_offset;
if (!bio_fits_rdev(align_bi) ||
is_badblock(rdev, align_bi->bi_sector, align_bi->bi_size>>9,
@@ -3893,6 +3916,9 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
return 0;
}
+ /* No reshape active, so we can trust rdev->data_offset */
+ align_bi->bi_sector += rdev->data_offset;
+
spin_lock_irq(&conf->device_lock);
wait_event_lock_irq(conf->wait_for_stripe,
conf->quiesce == 0,
@@ -3971,7 +3997,6 @@ static void make_request(struct mddev *mddev, struct bio * bi)
struct stripe_head *sh;
const int rw = bio_data_dir(bi);
int remaining;
- int plugged;
if (unlikely(bi->bi_rw & REQ_FLUSH)) {
md_flush_request(mddev, bi);
@@ -3990,7 +4015,6 @@ static void make_request(struct mddev *mddev, struct bio * bi)
bi->bi_next = NULL;
bi->bi_phys_segments = 1; /* over-loaded to count active stripes */
- plugged = mddev_check_plugged(mddev);
for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) {
DEFINE_WAIT(w);
int previous;
@@ -4092,6 +4116,7 @@ static void make_request(struct mddev *mddev, struct bio * bi)
if ((bi->bi_rw & REQ_SYNC) &&
!test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
atomic_inc(&conf->preread_active_stripes);
+ mddev_check_plugged(mddev);
release_stripe(sh);
} else {
/* cannot get stripe for read-ahead, just give-up */
@@ -4099,10 +4124,7 @@ static void make_request(struct mddev *mddev, struct bio * bi)
finish_wait(&conf->wait_for_overlap, &w);
break;
}
-
}
- if (!plugged)
- md_wakeup_thread(mddev->thread);
spin_lock_irq(&conf->device_lock);
remaining = raid5_dec_bi_phys_segments(bi);
@@ -4823,6 +4845,7 @@ static struct r5conf *setup_conf(struct mddev *mddev)
int raid_disk, memory, max_disks;
struct md_rdev *rdev;
struct disk_info *disk;
+ char pers_name[6];
if (mddev->new_level != 5
&& mddev->new_level != 4
@@ -4946,7 +4969,8 @@ static struct r5conf *setup_conf(struct mddev *mddev)
printk(KERN_INFO "md/raid:%s: allocated %dkB\n",
mdname(mddev), memory);
- conf->thread = md_register_thread(raid5d, mddev, NULL);
+ sprintf(pers_name, "raid%d", mddev->new_level);
+ conf->thread = md_register_thread(raid5d, mddev, pers_name);
if (!conf->thread) {
printk(KERN_ERR
"md/raid:%s: couldn't allocate thread.\n",
@@ -5465,10 +5489,9 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev)
if (rdev->saved_raid_disk >= 0 &&
rdev->saved_raid_disk >= first &&
conf->disks[rdev->saved_raid_disk].rdev == NULL)
- disk = rdev->saved_raid_disk;
- else
- disk = first;
- for ( ; disk <= last ; disk++) {
+ first = rdev->saved_raid_disk;
+
+ for (disk = first; disk <= last; disk++) {
p = conf->disks + disk;
if (p->rdev == NULL) {
clear_bit(In_sync, &rdev->flags);
@@ -5477,8 +5500,11 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev)
if (rdev->saved_raid_disk != disk)
conf->fullsync = 1;
rcu_assign_pointer(p->rdev, rdev);
- break;
+ goto out;
}
+ }
+ for (disk = first; disk <= last; disk++) {
+ p = conf->disks + disk;
if (test_bit(WantReplacement, &p->rdev->flags) &&
p->replacement == NULL) {
clear_bit(In_sync, &rdev->flags);
@@ -5490,6 +5516,7 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev)
break;
}
}
+out:
print_raid5_conf(conf);
return err;
}
diff --git a/security/security.c b/security/security.c
index 3efc9b12aef4..860aeb349cb3 100644
--- a/security/security.c
+++ b/security/security.c
@@ -23,6 +23,7 @@
#include <linux/mman.h>
#include <linux/mount.h>
#include <linux/personality.h>
+#include <linux/backing-dev.h>
#include <net/flow.h>
#define MAX_LSM_EVM_XATTR 2