diff options
Diffstat (limited to 'drivers/xen')
-rw-r--r-- | drivers/xen/Kconfig | 8 | ||||
-rw-r--r-- | drivers/xen/Makefile | 2 | ||||
-rw-r--r-- | drivers/xen/events.c | 18 | ||||
-rw-r--r-- | drivers/xen/gntdev.c | 2 | ||||
-rw-r--r-- | drivers/xen/grant-table.c | 67 | ||||
-rw-r--r-- | drivers/xen/mcelog.c | 414 | ||||
-rw-r--r-- | drivers/xen/pcpu.c | 371 | ||||
-rw-r--r-- | drivers/xen/platform-pci.c | 3 | ||||
-rw-r--r-- | drivers/xen/privcmd.c | 135 | ||||
-rw-r--r-- | drivers/xen/swiotlb-xen.c | 4 | ||||
-rw-r--r-- | drivers/xen/sys-hypervisor.c | 13 | ||||
-rw-r--r-- | drivers/xen/tmem.c | 1 | ||||
-rw-r--r-- | drivers/xen/xen-acpi-processor.c | 9 | ||||
-rw-r--r-- | drivers/xen/xen-pciback/conf_space.c | 6 | ||||
-rw-r--r-- | drivers/xen/xen-pciback/pci_stub.c | 97 | ||||
-rw-r--r-- | drivers/xen/xenbus/xenbus_client.c | 6 | ||||
-rw-r--r-- | drivers/xen/xenbus/xenbus_comms.c | 2 | ||||
-rw-r--r-- | drivers/xen/xenbus/xenbus_dev_backend.c | 2 | ||||
-rw-r--r-- | drivers/xen/xenbus/xenbus_probe.c | 56 | ||||
-rw-r--r-- | drivers/xen/xenbus/xenbus_probe_frontend.c | 1 | ||||
-rw-r--r-- | drivers/xen/xenbus/xenbus_xs.c | 21 |
21 files changed, 1152 insertions, 86 deletions
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index 8d2501e604dd..d4dffcd52873 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -196,4 +196,12 @@ config XEN_ACPI_PROCESSOR called xen_acpi_processor If you do not know what to choose, select M here. If the CPUFREQ drivers are built in, select Y here. +config XEN_MCE_LOG + bool "Xen platform mcelog" + depends on XEN_DOM0 && X86_64 && X86_MCE + default n + help + Allow kernel fetching MCE error from Xen platform and + converting it into Linux mcelog format for mcelog tools + endmenu diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index fc3488631136..d80bea5535a2 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile @@ -17,7 +17,9 @@ obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o obj-$(CONFIG_XEN_PVHVM) += platform-pci.o obj-$(CONFIG_XEN_TMEM) += tmem.o obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o +obj-$(CONFIG_XEN_DOM0) += pcpu.o obj-$(CONFIG_XEN_DOM0) += pci.o acpi.o +obj-$(CONFIG_XEN_MCE_LOG) += mcelog.o obj-$(CONFIG_XEN_PCIDEV_BACKEND) += xen-pciback/ obj-$(CONFIG_XEN_PRIVCMD) += xen-privcmd.o obj-$(CONFIG_XEN_ACPI_PROCESSOR) += xen-acpi-processor.o diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 7595581d032c..c60d1629c916 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -373,11 +373,22 @@ static void unmask_evtchn(int port) { struct shared_info *s = HYPERVISOR_shared_info; unsigned int cpu = get_cpu(); + int do_hypercall = 0, evtchn_pending = 0; BUG_ON(!irqs_disabled()); - /* Slow path (hypercall) if this is a non-local port. */ - if (unlikely(cpu != cpu_from_evtchn(port))) { + if (unlikely((cpu != cpu_from_evtchn(port)))) + do_hypercall = 1; + else + evtchn_pending = sync_test_bit(port, &s->evtchn_pending[0]); + + if (unlikely(evtchn_pending && xen_hvm_domain())) + do_hypercall = 1; + + /* Slow path (hypercall) if this is a non-local port or if this is + * an hvm domain and an event is pending (hvm domains don't have + * their own implementation of irq_enable). */ + if (do_hypercall) { struct evtchn_unmask unmask = { .port = port }; (void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask); } else { @@ -390,7 +401,7 @@ static void unmask_evtchn(int port) * 'hw_resend_irq'. Just like a real IO-APIC we 'lose * the interrupt edge' if the channel is masked. */ - if (sync_test_bit(port, &s->evtchn_pending[0]) && + if (evtchn_pending && !sync_test_and_set_bit(port / BITS_PER_LONG, &vcpu_info->evtchn_pending_sel)) vcpu_info->evtchn_upcall_pending = 1; @@ -831,6 +842,7 @@ int bind_evtchn_to_irq(unsigned int evtchn) struct irq_info *info = info_for_irq(irq); WARN_ON(info == NULL || info->type != IRQT_EVTCHN); } + irq_clear_status_flags(irq, IRQ_NOREQUEST|IRQ_NOAUTOEN); out: mutex_unlock(&irq_mapping_update_lock); diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 1ffd03bf8e10..163b7e985ed0 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -445,7 +445,7 @@ static void mn_release(struct mmu_notifier *mn, spin_unlock(&priv->lock); } -struct mmu_notifier_ops gntdev_mmu_ops = { +static struct mmu_notifier_ops gntdev_mmu_ops = { .release = mn_release, .invalidate_page = mn_invl_page, .invalidate_range_start = mn_invl_range_start, diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 0bfc1ef11259..3a567b15600b 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -38,6 +38,7 @@ #include <linux/vmalloc.h> #include <linux/uaccess.h> #include <linux/io.h> +#include <linux/delay.h> #include <linux/hardirq.h> #include <xen/xen.h> @@ -47,6 +48,7 @@ #include <xen/interface/memory.h> #include <xen/hvc-console.h> #include <asm/xen/hypercall.h> +#include <asm/xen/interface.h> #include <asm/pgtable.h> #include <asm/sync_bitops.h> @@ -285,10 +287,9 @@ int gnttab_grant_foreign_access(domid_t domid, unsigned long frame, } EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access); -void gnttab_update_subpage_entry_v2(grant_ref_t ref, domid_t domid, - unsigned long frame, int flags, - unsigned page_off, - unsigned length) +static void gnttab_update_subpage_entry_v2(grant_ref_t ref, domid_t domid, + unsigned long frame, int flags, + unsigned page_off, unsigned length) { gnttab_shared.v2[ref].sub_page.frame = frame; gnttab_shared.v2[ref].sub_page.page_off = page_off; @@ -345,9 +346,9 @@ bool gnttab_subpage_grants_available(void) } EXPORT_SYMBOL_GPL(gnttab_subpage_grants_available); -void gnttab_update_trans_entry_v2(grant_ref_t ref, domid_t domid, - int flags, domid_t trans_domid, - grant_ref_t trans_gref) +static void gnttab_update_trans_entry_v2(grant_ref_t ref, domid_t domid, + int flags, domid_t trans_domid, + grant_ref_t trans_gref) { gnttab_shared.v2[ref].transitive.trans_domid = trans_domid; gnttab_shared.v2[ref].transitive.gref = trans_gref; @@ -823,6 +824,52 @@ unsigned int gnttab_max_grant_frames(void) } EXPORT_SYMBOL_GPL(gnttab_max_grant_frames); +/* Handling of paged out grant targets (GNTST_eagain) */ +#define MAX_DELAY 256 +static inline void +gnttab_retry_eagain_gop(unsigned int cmd, void *gop, int16_t *status, + const char *func) +{ + unsigned delay = 1; + + do { + BUG_ON(HYPERVISOR_grant_table_op(cmd, gop, 1)); + if (*status == GNTST_eagain) + msleep(delay++); + } while ((*status == GNTST_eagain) && (delay < MAX_DELAY)); + + if (delay >= MAX_DELAY) { + printk(KERN_ERR "%s: %s eagain grant\n", func, current->comm); + *status = GNTST_bad_page; + } +} + +void gnttab_batch_map(struct gnttab_map_grant_ref *batch, unsigned count) +{ + struct gnttab_map_grant_ref *op; + + if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, batch, count)) + BUG(); + for (op = batch; op < batch + count; op++) + if (op->status == GNTST_eagain) + gnttab_retry_eagain_gop(GNTTABOP_map_grant_ref, op, + &op->status, __func__); +} +EXPORT_SYMBOL_GPL(gnttab_batch_map); + +void gnttab_batch_copy(struct gnttab_copy *batch, unsigned count) +{ + struct gnttab_copy *op; + + if (HYPERVISOR_grant_table_op(GNTTABOP_copy, batch, count)) + BUG(); + for (op = batch; op < batch + count; op++) + if (op->status == GNTST_eagain) + gnttab_retry_eagain_gop(GNTTABOP_copy, op, + &op->status, __func__); +} +EXPORT_SYMBOL_GPL(gnttab_batch_copy); + int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, struct gnttab_map_grant_ref *kmap_ops, struct page **pages, unsigned int count) @@ -836,6 +883,12 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, if (ret) return ret; + /* Retry eagain maps */ + for (i = 0; i < count; i++) + if (map_ops[i].status == GNTST_eagain) + gnttab_retry_eagain_gop(GNTTABOP_map_grant_ref, map_ops + i, + &map_ops[i].status, __func__); + if (xen_feature(XENFEAT_auto_translated_physmap)) return ret; diff --git a/drivers/xen/mcelog.c b/drivers/xen/mcelog.c new file mode 100644 index 000000000000..8feee08bcb43 --- /dev/null +++ b/drivers/xen/mcelog.c @@ -0,0 +1,414 @@ +/****************************************************************************** + * mcelog.c + * Driver for receiving and transferring machine check error infomation + * + * Copyright (c) 2012 Intel Corporation + * Author: Liu, Jinsong <jinsong.liu@intel.com> + * Author: Jiang, Yunhong <yunhong.jiang@intel.com> + * Author: Ke, Liping <liping.ke@intel.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <linux/init.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/fs.h> +#include <linux/device.h> +#include <linux/miscdevice.h> +#include <linux/uaccess.h> +#include <linux/capability.h> +#include <linux/poll.h> +#include <linux/sched.h> + +#include <xen/interface/xen.h> +#include <xen/events.h> +#include <xen/interface/vcpu.h> +#include <xen/xen.h> +#include <asm/xen/hypercall.h> +#include <asm/xen/hypervisor.h> + +#define XEN_MCELOG "xen_mcelog: " + +static struct mc_info g_mi; +static struct mcinfo_logical_cpu *g_physinfo; +static uint32_t ncpus; + +static DEFINE_MUTEX(mcelog_lock); + +static struct xen_mce_log xen_mcelog = { + .signature = XEN_MCE_LOG_SIGNATURE, + .len = XEN_MCE_LOG_LEN, + .recordlen = sizeof(struct xen_mce), +}; + +static DEFINE_SPINLOCK(xen_mce_chrdev_state_lock); +static int xen_mce_chrdev_open_count; /* #times opened */ +static int xen_mce_chrdev_open_exclu; /* already open exclusive? */ + +static DECLARE_WAIT_QUEUE_HEAD(xen_mce_chrdev_wait); + +static int xen_mce_chrdev_open(struct inode *inode, struct file *file) +{ + spin_lock(&xen_mce_chrdev_state_lock); + + if (xen_mce_chrdev_open_exclu || + (xen_mce_chrdev_open_count && (file->f_flags & O_EXCL))) { + spin_unlock(&xen_mce_chrdev_state_lock); + + return -EBUSY; + } + + if (file->f_flags & O_EXCL) + xen_mce_chrdev_open_exclu = 1; + xen_mce_chrdev_open_count++; + + spin_unlock(&xen_mce_chrdev_state_lock); + + return nonseekable_open(inode, file); +} + +static int xen_mce_chrdev_release(struct inode *inode, struct file *file) +{ + spin_lock(&xen_mce_chrdev_state_lock); + + xen_mce_chrdev_open_count--; + xen_mce_chrdev_open_exclu = 0; + + spin_unlock(&xen_mce_chrdev_state_lock); + + return 0; +} + +static ssize_t xen_mce_chrdev_read(struct file *filp, char __user *ubuf, + size_t usize, loff_t *off) +{ + char __user *buf = ubuf; + unsigned num; + int i, err; + + mutex_lock(&mcelog_lock); + + num = xen_mcelog.next; + + /* Only supports full reads right now */ + err = -EINVAL; + if (*off != 0 || usize < XEN_MCE_LOG_LEN*sizeof(struct xen_mce)) + goto out; + + err = 0; + for (i = 0; i < num; i++) { + struct xen_mce *m = &xen_mcelog.entry[i]; + + err |= copy_to_user(buf, m, sizeof(*m)); + buf += sizeof(*m); + } + + memset(xen_mcelog.entry, 0, num * sizeof(struct xen_mce)); + xen_mcelog.next = 0; + + if (err) + err = -EFAULT; + +out: + mutex_unlock(&mcelog_lock); + + return err ? err : buf - ubuf; +} + +static unsigned int xen_mce_chrdev_poll(struct file *file, poll_table *wait) +{ + poll_wait(file, &xen_mce_chrdev_wait, wait); + + if (xen_mcelog.next) + return POLLIN | POLLRDNORM; + + return 0; +} + +static long xen_mce_chrdev_ioctl(struct file *f, unsigned int cmd, + unsigned long arg) +{ + int __user *p = (int __user *)arg; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + switch (cmd) { + case MCE_GET_RECORD_LEN: + return put_user(sizeof(struct xen_mce), p); + case MCE_GET_LOG_LEN: + return put_user(XEN_MCE_LOG_LEN, p); + case MCE_GETCLEAR_FLAGS: { + unsigned flags; + + do { + flags = xen_mcelog.flags; + } while (cmpxchg(&xen_mcelog.flags, flags, 0) != flags); + + return put_user(flags, p); + } + default: + return -ENOTTY; + } +} + +static const struct file_operations xen_mce_chrdev_ops = { + .open = xen_mce_chrdev_open, + .release = xen_mce_chrdev_release, + .read = xen_mce_chrdev_read, + .poll = xen_mce_chrdev_poll, + .unlocked_ioctl = xen_mce_chrdev_ioctl, + .llseek = no_llseek, +}; + +static struct miscdevice xen_mce_chrdev_device = { + MISC_MCELOG_MINOR, + "mcelog", + &xen_mce_chrdev_ops, +}; + +/* + * Caller should hold the mcelog_lock + */ +static void xen_mce_log(struct xen_mce *mce) +{ + unsigned entry; + + entry = xen_mcelog.next; + + /* + * When the buffer fills up discard new entries. + * Assume that the earlier errors are the more + * interesting ones: + */ + if (entry >= XEN_MCE_LOG_LEN) { + set_bit(XEN_MCE_OVERFLOW, + (unsigned long *)&xen_mcelog.flags); + return; + } + + memcpy(xen_mcelog.entry + entry, mce, sizeof(struct xen_mce)); + + xen_mcelog.next++; +} + +static int convert_log(struct mc_info *mi) +{ + struct mcinfo_common *mic; + struct mcinfo_global *mc_global; + struct mcinfo_bank *mc_bank; + struct xen_mce m; + uint32_t i; + + mic = NULL; + x86_mcinfo_lookup(&mic, mi, MC_TYPE_GLOBAL); + if (unlikely(!mic)) { + pr_warning(XEN_MCELOG "Failed to find global error info\n"); + return -ENODEV; + } + + memset(&m, 0, sizeof(struct xen_mce)); + + mc_global = (struct mcinfo_global *)mic; + m.mcgstatus = mc_global->mc_gstatus; + m.apicid = mc_global->mc_apicid; + + for (i = 0; i < ncpus; i++) + if (g_physinfo[i].mc_apicid == m.apicid) + break; + if (unlikely(i == ncpus)) { + pr_warning(XEN_MCELOG "Failed to match cpu with apicid %d\n", + m.apicid); + return -ENODEV; + } + + m.socketid = g_physinfo[i].mc_chipid; + m.cpu = m.extcpu = g_physinfo[i].mc_cpunr; + m.cpuvendor = (__u8)g_physinfo[i].mc_vendor; + m.mcgcap = g_physinfo[i].mc_msrvalues[__MC_MSR_MCGCAP].value; + + mic = NULL; + x86_mcinfo_lookup(&mic, mi, MC_TYPE_BANK); + if (unlikely(!mic)) { + pr_warning(XEN_MCELOG "Fail to find bank error info\n"); + return -ENODEV; + } + + do { + if ((!mic) || (mic->size == 0) || + (mic->type != MC_TYPE_GLOBAL && + mic->type != MC_TYPE_BANK && + mic->type != MC_TYPE_EXTENDED && + mic->type != MC_TYPE_RECOVERY)) + break; + + if (mic->type == MC_TYPE_BANK) { + mc_bank = (struct mcinfo_bank *)mic; + m.misc = mc_bank->mc_misc; + m.status = mc_bank->mc_status; + m.addr = mc_bank->mc_addr; + m.tsc = mc_bank->mc_tsc; + m.bank = mc_bank->mc_bank; + m.finished = 1; + /*log this record*/ + xen_mce_log(&m); + } + mic = x86_mcinfo_next(mic); + } while (1); + + return 0; +} + +static int mc_queue_handle(uint32_t flags) +{ + struct xen_mc mc_op; + int ret = 0; + + mc_op.cmd = XEN_MC_fetch; + mc_op.interface_version = XEN_MCA_INTERFACE_VERSION; + set_xen_guest_handle(mc_op.u.mc_fetch.data, &g_mi); + do { + mc_op.u.mc_fetch.flags = flags; + ret = HYPERVISOR_mca(&mc_op); + if (ret) { + pr_err(XEN_MCELOG "Failed to fetch %s error log\n", + (flags == XEN_MC_URGENT) ? + "urgnet" : "nonurgent"); + break; + } + + if (mc_op.u.mc_fetch.flags & XEN_MC_NODATA || + mc_op.u.mc_fetch.flags & XEN_MC_FETCHFAILED) + break; + else { + ret = convert_log(&g_mi); + if (ret) + pr_warning(XEN_MCELOG + "Failed to convert this error log, " + "continue acking it anyway\n"); + + mc_op.u.mc_fetch.flags = flags | XEN_MC_ACK; + ret = HYPERVISOR_mca(&mc_op); + if (ret) { + pr_err(XEN_MCELOG + "Failed to ack previous error log\n"); + break; + } + } + } while (1); + + return ret; +} + +/* virq handler for machine check error info*/ +static void xen_mce_work_fn(struct work_struct *work) +{ + int err; + + mutex_lock(&mcelog_lock); + + /* urgent mc_info */ + err = mc_queue_handle(XEN_MC_URGENT); + if (err) + pr_err(XEN_MCELOG + "Failed to handle urgent mc_info queue, " + "continue handling nonurgent mc_info queue anyway.\n"); + + /* nonurgent mc_info */ + err = mc_queue_handle(XEN_MC_NONURGENT); + if (err) + pr_err(XEN_MCELOG + "Failed to handle nonurgent mc_info queue.\n"); + + /* wake processes polling /dev/mcelog */ + wake_up_interruptible(&xen_mce_chrdev_wait); + + mutex_unlock(&mcelog_lock); +} +static DECLARE_WORK(xen_mce_work, xen_mce_work_fn); + +static irqreturn_t xen_mce_interrupt(int irq, void *dev_id) +{ + schedule_work(&xen_mce_work); + return IRQ_HANDLED; +} + +static int bind_virq_for_mce(void) +{ + int ret; + struct xen_mc mc_op; + + memset(&mc_op, 0, sizeof(struct xen_mc)); + + /* Fetch physical CPU Numbers */ + mc_op.cmd = XEN_MC_physcpuinfo; + mc_op.interface_version = XEN_MCA_INTERFACE_VERSION; + set_xen_guest_handle(mc_op.u.mc_physcpuinfo.info, g_physinfo); + ret = HYPERVISOR_mca(&mc_op); + if (ret) { + pr_err(XEN_MCELOG "Failed to get CPU numbers\n"); + return ret; + } + + /* Fetch each CPU Physical Info for later reference*/ + ncpus = mc_op.u.mc_physcpuinfo.ncpus; + g_physinfo = kcalloc(ncpus, sizeof(struct mcinfo_logical_cpu), + GFP_KERNEL); + if (!g_physinfo) + return -ENOMEM; + set_xen_guest_handle(mc_op.u.mc_physcpuinfo.info, g_physinfo); + ret = HYPERVISOR_mca(&mc_op); + if (ret) { + pr_err(XEN_MCELOG "Failed to get CPU info\n"); + kfree(g_physinfo); + return ret; + } + + ret = bind_virq_to_irqhandler(VIRQ_MCA, 0, + xen_mce_interrupt, 0, "mce", NULL); + if (ret < 0) { + pr_err(XEN_MCELOG "Failed to bind virq\n"); + kfree(g_physinfo); + return ret; + } + + return 0; +} + +static int __init xen_late_init_mcelog(void) +{ + /* Only DOM0 is responsible for MCE logging */ + if (xen_initial_domain()) { + /* register character device /dev/mcelog for xen mcelog */ + if (misc_register(&xen_mce_chrdev_device)) + return -ENODEV; + return bind_virq_for_mce(); + } + + return -ENODEV; +} +device_initcall(xen_late_init_mcelog); diff --git a/drivers/xen/pcpu.c b/drivers/xen/pcpu.c new file mode 100644 index 000000000000..067fcfa1723e --- /dev/null +++ b/drivers/xen/pcpu.c @@ -0,0 +1,371 @@ +/****************************************************************************** + * pcpu.c + * Management physical cpu in dom0, get pcpu info and provide sys interface + * + * Copyright (c) 2012 Intel Corporation + * Author: Liu, Jinsong <jinsong.liu@intel.com> + * Author: Jiang, Yunhong <yunhong.jiang@intel.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <linux/interrupt.h> +#include <linux/spinlock.h> +#include <linux/cpu.h> +#include <linux/stat.h> +#include <linux/capability.h> + +#include <xen/xen.h> +#include <xen/xenbus.h> +#include <xen/events.h> +#include <xen/interface/platform.h> +#include <asm/xen/hypervisor.h> +#include <asm/xen/hypercall.h> + +#define XEN_PCPU "xen_cpu: " + +/* + * @cpu_id: Xen physical cpu logic number + * @flags: Xen physical cpu status flag + * - XEN_PCPU_FLAGS_ONLINE: cpu is online + * - XEN_PCPU_FLAGS_INVALID: cpu is not present + */ +struct pcpu { + struct list_head list; + struct device dev; + uint32_t cpu_id; + uint32_t flags; +}; + +static struct bus_type xen_pcpu_subsys = { + .name = "xen_cpu", + .dev_name = "xen_cpu", +}; + +static DEFINE_MUTEX(xen_pcpu_lock); + +static LIST_HEAD(xen_pcpus); + +static int xen_pcpu_down(uint32_t cpu_id) +{ + struct xen_platform_op op = { + .cmd = XENPF_cpu_offline, + .interface_version = XENPF_INTERFACE_VERSION, + .u.cpu_ol.cpuid = cpu_id, + }; + + return HYPERVISOR_dom0_op(&op); +} + +static int xen_pcpu_up(uint32_t cpu_id) +{ + struct xen_platform_op op = { + .cmd = XENPF_cpu_online, + .interface_version = XENPF_INTERFACE_VERSION, + .u.cpu_ol.cpuid = cpu_id, + }; + + return HYPERVISOR_dom0_op(&op); +} + +static ssize_t show_online(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct pcpu *cpu = container_of(dev, struct pcpu, dev); + + return sprintf(buf, "%u\n", !!(cpu->flags & XEN_PCPU_FLAGS_ONLINE)); +} + +static ssize_t __ref store_online(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct pcpu *pcpu = container_of(dev, struct pcpu, dev); + unsigned long long val; + ssize_t ret; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (kstrtoull(buf, 0, &val) < 0) + return -EINVAL; + + switch (val) { + case 0: + ret = xen_pcpu_down(pcpu->cpu_id); + break; + case 1: + ret = xen_pcpu_up(pcpu->cpu_id); + break; + default: + ret = -EINVAL; + } + + if (ret >= 0) + ret = count; + return ret; +} +static DEVICE_ATTR(online, S_IRUGO | S_IWUSR, show_online, store_online); + +static bool xen_pcpu_online(uint32_t flags) +{ + return !!(flags & XEN_PCPU_FLAGS_ONLINE); +} + +static void pcpu_online_status(struct xenpf_pcpuinfo *info, + struct pcpu *pcpu) +{ + if (xen_pcpu_online(info->flags) && + !xen_pcpu_online(pcpu->flags)) { + /* the pcpu is onlined */ + pcpu->flags |= XEN_PCPU_FLAGS_ONLINE; + kobject_uevent(&pcpu->dev.kobj, KOBJ_ONLINE); + } else if (!xen_pcpu_online(info->flags) && + xen_pcpu_online(pcpu->flags)) { + /* The pcpu is offlined */ + pcpu->flags &= ~XEN_PCPU_FLAGS_ONLINE; + kobject_uevent(&pcpu->dev.kobj, KOBJ_OFFLINE); + } +} + +static struct pcpu *get_pcpu(uint32_t cpu_id) +{ + struct pcpu *pcpu; + + list_for_each_entry(pcpu, &xen_pcpus, list) { + if (pcpu->cpu_id == cpu_id) + return pcpu; + } + + return NULL; +} + +static void pcpu_release(struct device *dev) +{ + struct pcpu *pcpu = container_of(dev, struct pcpu, dev); + + list_del(&pcpu->list); + kfree(pcpu); +} + +static void unregister_and_remove_pcpu(struct pcpu *pcpu) +{ + struct device *dev; + + if (!pcpu) + return; + + dev = &pcpu->dev; + if (dev->id) + device_remove_file(dev, &dev_attr_online); + + /* pcpu remove would be implicitly done */ + device_unregister(dev); +} + +static int register_pcpu(struct pcpu *pcpu) +{ + struct device *dev; + int err = -EINVAL; + + if (!pcpu) + return err; + + dev = &pcpu->dev; + dev->bus = &xen_pcpu_subsys; + dev->id = pcpu->cpu_id; + dev->release = pcpu_release; + + err = device_register(dev); + if (err) { + pcpu_release(dev); + return err; + } + + /* + * Xen never offline cpu0 due to several restrictions + * and assumptions. This basically doesn't add a sys control + * to user, one cannot attempt to offline BSP. + */ + if (dev->id) { + err = device_create_file(dev, &dev_attr_online); + if (err) { + device_unregister(dev); + return err; + } + } + + return 0; +} + +static struct pcpu *create_and_register_pcpu(struct xenpf_pcpuinfo *info) +{ + struct pcpu *pcpu; + int err; + + if (info->flags & XEN_PCPU_FLAGS_INVALID) + return ERR_PTR(-ENODEV); + + pcpu = kzalloc(sizeof(struct pcpu), GFP_KERNEL); + if (!pcpu) + return ERR_PTR(-ENOMEM); + + INIT_LIST_HEAD(&pcpu->list); + pcpu->cpu_id = info->xen_cpuid; + pcpu->flags = info->flags; + + /* Need hold on xen_pcpu_lock before pcpu list manipulations */ + list_add_tail(&pcpu->list, &xen_pcpus); + + err = register_pcpu(pcpu); + if (err) { + pr_warning(XEN_PCPU "Failed to register pcpu%u\n", + info->xen_cpuid); + return ERR_PTR(-ENOENT); + } + + return pcpu; +} + +/* + * Caller should hold the xen_pcpu_lock + */ +static int sync_pcpu(uint32_t cpu, uint32_t *max_cpu) +{ + int ret; + struct pcpu *pcpu = NULL; + struct xenpf_pcpuinfo *info; + struct xen_platform_op op = { + .cmd = XENPF_get_cpuinfo, + .interface_version = XENPF_INTERFACE_VERSION, + .u.pcpu_info.xen_cpuid = cpu, + }; + + ret = HYPERVISOR_dom0_op(&op); + if (ret) + return ret; + + info = &op.u.pcpu_info; + if (max_cpu) + *max_cpu = info->max_present; + + pcpu = get_pcpu(cpu); + + /* + * Only those at cpu present map has its sys interface. + */ + if (info->flags & XEN_PCPU_FLAGS_INVALID) { + if (pcpu) + unregister_and_remove_pcpu(pcpu); + return 0; + } + + if (!pcpu) { + pcpu = create_and_register_pcpu(info); + if (IS_ERR_OR_NULL(pcpu)) + return -ENODEV; + } else + pcpu_online_status(info, pcpu); + + return 0; +} + +/* + * Sync dom0's pcpu information with xen hypervisor's + */ +static int xen_sync_pcpus(void) +{ + /* + * Boot cpu always have cpu_id 0 in xen + */ + uint32_t cpu = 0, max_cpu = 0; + int err = 0; + struct pcpu *pcpu, *tmp; + + mutex_lock(&xen_pcpu_lock); + + while (!err && (cpu <= max_cpu)) { + err = sync_pcpu(cpu, &max_cpu); + cpu++; + } + + if (err) + list_for_each_entry_safe(pcpu, tmp, &xen_pcpus, list) + unregister_and_remove_pcpu(pcpu); + + mutex_unlock(&xen_pcpu_lock); + + return err; +} + +static void xen_pcpu_work_fn(struct work_struct *work) +{ + xen_sync_pcpus(); +} +static DECLARE_WORK(xen_pcpu_work, xen_pcpu_work_fn); + +static irqreturn_t xen_pcpu_interrupt(int irq, void *dev_id) +{ + schedule_work(&xen_pcpu_work); + return IRQ_HANDLED; +} + +static int __init xen_pcpu_init(void) +{ + int irq, ret; + + if (!xen_initial_domain()) + return -ENODEV; + + irq = bind_virq_to_irqhandler(VIRQ_PCPU_STATE, 0, + xen_pcpu_interrupt, 0, + "xen-pcpu", NULL); + if (irq < 0) { + pr_warning(XEN_PCPU "Failed to bind pcpu virq\n"); + return irq; + } + + ret = subsys_system_register(&xen_pcpu_subsys, NULL); + if (ret) { + pr_warning(XEN_PCPU "Failed to register pcpu subsys\n"); + goto err1; + } + + ret = xen_sync_pcpus(); + if (ret) { + pr_warning(XEN_PCPU "Failed to sync pcpu info\n"); + goto err2; + } + + return 0; + +err2: + bus_unregister(&xen_pcpu_subsys); +err1: + unbind_from_irqhandler(irq, NULL); + return ret; +} +arch_initcall(xen_pcpu_init); diff --git a/drivers/xen/platform-pci.c b/drivers/xen/platform-pci.c index 2389e581e23c..97ca359ae2bd 100644 --- a/drivers/xen/platform-pci.c +++ b/drivers/xen/platform-pci.c @@ -109,6 +109,9 @@ static int __devinit platform_pci_init(struct pci_dev *pdev, long mmio_addr, mmio_len; unsigned int max_nr_gframes; + if (!xen_domain()) + return -ENODEV; + i = pci_enable_device(pdev); if (i) return i; diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c index ccee0f16bcf8..ef6389580b8c 100644 --- a/drivers/xen/privcmd.c +++ b/drivers/xen/privcmd.c @@ -76,7 +76,7 @@ static void free_page_list(struct list_head *pages) */ static int gather_array(struct list_head *pagelist, unsigned nelem, size_t size, - void __user *data) + const void __user *data) { unsigned pageidx; void *pagedata; @@ -246,61 +246,117 @@ struct mmap_batch_state { domid_t domain; unsigned long va; struct vm_area_struct *vma; - int err; - - xen_pfn_t __user *user; + /* A tristate: + * 0 for no errors + * 1 if at least one error has happened (and no + * -ENOENT errors have happened) + * -ENOENT if at least 1 -ENOENT has happened. + */ + int global_error; + /* An array for individual errors */ + int *err; + + /* User-space mfn array to store errors in the second pass for V1. */ + xen_pfn_t __user *user_mfn; }; static int mmap_batch_fn(void *data, void *state) { xen_pfn_t *mfnp = data; struct mmap_batch_state *st = state; + int ret; + + ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1, + st->vma->vm_page_prot, st->domain); - if (xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1, - st->vma->vm_page_prot, st->domain) < 0) { - *mfnp |= 0xf0000000U; - st->err++; + /* Store error code for second pass. */ + *(st->err++) = ret; + + /* And see if it affects the global_error. */ + if (ret < 0) { + if (ret == -ENOENT) + st->global_error = -ENOENT; + else { + /* Record that at least one error has happened. */ + if (st->global_error == 0) + st->global_error = 1; + } } st->va += PAGE_SIZE; return 0; } -static int mmap_return_errors(void *data, void *state) +static int mmap_return_errors_v1(void *data, void *state) { xen_pfn_t *mfnp = data; struct mmap_batch_state *st = state; - - return put_user(*mfnp, st->user++); + int err = *(st->err++); + + /* + * V1 encodes the error codes in the 32bit top nibble of the + * mfn (with its known limitations vis-a-vis 64 bit callers). + */ + *mfnp |= (err == -ENOENT) ? + PRIVCMD_MMAPBATCH_PAGED_ERROR : + PRIVCMD_MMAPBATCH_MFN_ERROR; + return __put_user(*mfnp, st->user_mfn++); } static struct vm_operations_struct privcmd_vm_ops; -static long privcmd_ioctl_mmap_batch(void __user *udata) +static long privcmd_ioctl_mmap_batch(void __user *udata, int version) { int ret; - struct privcmd_mmapbatch m; + struct privcmd_mmapbatch_v2 m; struct mm_struct *mm = current->mm; struct vm_area_struct *vma; unsigned long nr_pages; LIST_HEAD(pagelist); + int *err_array = NULL; struct mmap_batch_state state; if (!xen_initial_domain()) return -EPERM; - if (copy_from_user(&m, udata, sizeof(m))) - return -EFAULT; + switch (version) { + case 1: + if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch))) + return -EFAULT; + /* Returns per-frame error in m.arr. */ + m.err = NULL; + if (!access_ok(VERIFY_WRITE, m.arr, m.num * sizeof(*m.arr))) + return -EFAULT; + break; + case 2: + if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch_v2))) + return -EFAULT; + /* Returns per-frame error code in m.err. */ + if (!access_ok(VERIFY_WRITE, m.err, m.num * (sizeof(*m.err)))) + return -EFAULT; + break; + default: + return -EINVAL; + } nr_pages = m.num; if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT))) return -EINVAL; - ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t), - m.arr); + ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t), m.arr); - if (ret || list_empty(&pagelist)) + if (ret) goto out; + if (list_empty(&pagelist)) { + ret = -EINVAL; + goto out; + } + + err_array = kcalloc(m.num, sizeof(int), GFP_KERNEL); + if (err_array == NULL) { + ret = -ENOMEM; + goto out; + } down_write(&mm->mmap_sem); @@ -315,24 +371,37 @@ static long privcmd_ioctl_mmap_batch(void __user *udata) goto out; } - state.domain = m.dom; - state.vma = vma; - state.va = m.addr; - state.err = 0; + state.domain = m.dom; + state.vma = vma; + state.va = m.addr; + state.global_error = 0; + state.err = err_array; - ret = traverse_pages(m.num, sizeof(xen_pfn_t), - &pagelist, mmap_batch_fn, &state); + /* mmap_batch_fn guarantees ret == 0 */ + BUG_ON(traverse_pages(m.num, sizeof(xen_pfn_t), + &pagelist, mmap_batch_fn, &state)); up_write(&mm->mmap_sem); - if (state.err > 0) { - state.user = m.arr; + if (state.global_error && (version == 1)) { + /* Write back errors in second pass. */ + state.user_mfn = (xen_pfn_t *)m.arr; + state.err = err_array; ret = traverse_pages(m.num, sizeof(xen_pfn_t), - &pagelist, - mmap_return_errors, &state); + &pagelist, mmap_return_errors_v1, &state); + } else if (version == 2) { + ret = __copy_to_user(m.err, err_array, m.num * sizeof(int)); + if (ret) + ret = -EFAULT; } + /* If we have not had any EFAULT-like global errors then set the global + * error to -ENOENT if necessary. */ + if ((ret == 0) && (state.global_error == -ENOENT)) + ret = -ENOENT; + out: + kfree(err_array); free_page_list(&pagelist); return ret; @@ -354,7 +423,11 @@ static long privcmd_ioctl(struct file *file, break; case IOCTL_PRIVCMD_MMAPBATCH: - ret = privcmd_ioctl_mmap_batch(udata); + ret = privcmd_ioctl_mmap_batch(udata, 1); + break; + + case IOCTL_PRIVCMD_MMAPBATCH_V2: + ret = privcmd_ioctl_mmap_batch(udata, 2); break; default: @@ -380,10 +453,6 @@ static struct vm_operations_struct privcmd_vm_ops = { static int privcmd_mmap(struct file *file, struct vm_area_struct *vma) { - /* Unsupported for auto-translate guests. */ - if (xen_feature(XENFEAT_auto_translated_physmap)) - return -ENOSYS; - /* DONTCOPY is essential for Xen because copy_page_range doesn't know * how to recreate these mappings */ vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY | VM_PFNMAP; diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index ab4c66c19c1a..58db6df866ef 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -52,7 +52,7 @@ static unsigned long xen_io_tlb_nslabs; * Quick lookup value of the bus address of the IOTLB. */ -u64 start_dma_addr; +static u64 start_dma_addr; static dma_addr_t xen_phys_to_bus(phys_addr_t paddr) { @@ -281,7 +281,7 @@ xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size, return ret; if (hwdev && hwdev->coherent_dma_mask) - dma_mask = hwdev->coherent_dma_mask; + dma_mask = dma_alloc_coherent_mask(hwdev, flags); phys = virt_to_phys(ret); dev_addr = xen_phys_to_bus(phys); diff --git a/drivers/xen/sys-hypervisor.c b/drivers/xen/sys-hypervisor.c index fdb6d229c9bb..5e5ad7e28858 100644 --- a/drivers/xen/sys-hypervisor.c +++ b/drivers/xen/sys-hypervisor.c @@ -114,7 +114,7 @@ static void xen_sysfs_version_destroy(void) /* UUID */ -static ssize_t uuid_show(struct hyp_sysfs_attr *attr, char *buffer) +static ssize_t uuid_show_fallback(struct hyp_sysfs_attr *attr, char *buffer) { char *vm, *val; int ret; @@ -135,6 +135,17 @@ static ssize_t uuid_show(struct hyp_sysfs_attr *attr, char *buffer) return ret; } +static ssize_t uuid_show(struct hyp_sysfs_attr *attr, char *buffer) +{ + xen_domain_handle_t uuid; + int ret; + ret = HYPERVISOR_xen_version(XENVER_guest_handle, uuid); + if (ret) + return uuid_show_fallback(attr, buffer); + ret = sprintf(buffer, "%pU\n", uuid); + return ret; +} + HYPERVISOR_ATTR_RO(uuid); static int __init xen_sysfs_uuid_init(void) diff --git a/drivers/xen/tmem.c b/drivers/xen/tmem.c index 89f264c67420..144564e5eb29 100644 --- a/drivers/xen/tmem.c +++ b/drivers/xen/tmem.c @@ -21,6 +21,7 @@ #include <asm/xen/hypercall.h> #include <asm/xen/page.h> #include <asm/xen/hypervisor.h> +#include <xen/tmem.h> #define TMEM_CONTROL 0 #define TMEM_NEW_POOL 1 diff --git a/drivers/xen/xen-acpi-processor.c b/drivers/xen/xen-acpi-processor.c index 7ff2569e17ae..b590ee067fcd 100644 --- a/drivers/xen/xen-acpi-processor.c +++ b/drivers/xen/xen-acpi-processor.c @@ -520,15 +520,18 @@ static int __init xen_acpi_processor_init(void) if (!pr_backup) { pr_backup = kzalloc(sizeof(struct acpi_processor), GFP_KERNEL); - memcpy(pr_backup, _pr, sizeof(struct acpi_processor)); + if (pr_backup) + memcpy(pr_backup, _pr, sizeof(struct acpi_processor)); } (void)upload_pm_data(_pr); } rc = check_acpi_ids(pr_backup); - if (rc) - goto err_unregister; kfree(pr_backup); + pr_backup = NULL; + + if (rc) + goto err_unregister; return 0; err_unregister: diff --git a/drivers/xen/xen-pciback/conf_space.c b/drivers/xen/xen-pciback/conf_space.c index 30d7be026c18..46ae0f9f02ad 100644 --- a/drivers/xen/xen-pciback/conf_space.c +++ b/drivers/xen/xen-pciback/conf_space.c @@ -124,7 +124,7 @@ static inline u32 merge_value(u32 val, u32 new_val, u32 new_val_mask, return val; } -static int pcibios_err_to_errno(int err) +static int xen_pcibios_err_to_errno(int err) { switch (err) { case PCIBIOS_SUCCESSFUL: @@ -202,7 +202,7 @@ out: pci_name(dev), size, offset, value); *ret_val = value; - return pcibios_err_to_errno(err); + return xen_pcibios_err_to_errno(err); } int xen_pcibk_config_write(struct pci_dev *dev, int offset, int size, u32 value) @@ -290,7 +290,7 @@ int xen_pcibk_config_write(struct pci_dev *dev, int offset, int size, u32 value) } } - return pcibios_err_to_errno(err); + return xen_pcibios_err_to_errno(err); } void xen_pcibk_config_free_dyn_fields(struct pci_dev *dev) diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c index 097e536e8672..20e1c42c1c48 100644 --- a/drivers/xen/xen-pciback/pci_stub.c +++ b/drivers/xen/xen-pciback/pci_stub.c @@ -353,16 +353,16 @@ static int __devinit pcistub_init_device(struct pci_dev *dev) if (err) goto config_release; - dev_dbg(&dev->dev, "reseting (FLR, D3, etc) the device\n"); - __pci_reset_function_locked(dev); - /* We need the device active to save the state. */ dev_dbg(&dev->dev, "save state of device\n"); pci_save_state(dev); dev_data->pci_saved_state = pci_store_saved_state(dev); if (!dev_data->pci_saved_state) dev_err(&dev->dev, "Could not store PCI conf saved state!\n"); - + else { + dev_dbg(&dev->dev, "reseting (FLR, D3, etc) the device\n"); + __pci_reset_function_locked(dev); + } /* Now disable the device (this also ensures some private device * data is setup before we export) */ @@ -897,17 +897,41 @@ static inline int str_to_slot(const char *buf, int *domain, int *bus, int *slot, int *func) { int err; + char wc = '*'; err = sscanf(buf, " %x:%x:%x.%x", domain, bus, slot, func); - if (err == 4) + switch (err) { + case 3: + *func = -1; + err = sscanf(buf, " %x:%x:%x.%c", domain, bus, slot, &wc); + break; + case 2: + *slot = *func = -1; + err = sscanf(buf, " %x:%x:*.%c", domain, bus, &wc); + if (err >= 2) + ++err; + break; + } + if (err == 4 && wc == '*') return 0; else if (err < 0) return -EINVAL; /* try again without domain */ *domain = 0; + wc = '*'; err = sscanf(buf, " %x:%x.%x", bus, slot, func); - if (err == 3) + switch (err) { + case 2: + *func = -1; + err = sscanf(buf, " %x:%x.%c", bus, slot, &wc); + break; + case 1: + *slot = *func = -1; + err = sscanf(buf, " %x:*.%c", bus, &wc) + 1; + break; + } + if (err == 3 && wc == '*') return 0; return -EINVAL; @@ -930,6 +954,19 @@ static int pcistub_device_id_add(int domain, int bus, int slot, int func) { struct pcistub_device_id *pci_dev_id; unsigned long flags; + int rc = 0; + + if (slot < 0) { + for (slot = 0; !rc && slot < 32; ++slot) + rc = pcistub_device_id_add(domain, bus, slot, func); + return rc; + } + + if (func < 0) { + for (func = 0; !rc && func < 8; ++func) + rc = pcistub_device_id_add(domain, bus, slot, func); + return rc; + } pci_dev_id = kmalloc(sizeof(*pci_dev_id), GFP_KERNEL); if (!pci_dev_id) @@ -952,15 +989,15 @@ static int pcistub_device_id_add(int domain, int bus, int slot, int func) static int pcistub_device_id_remove(int domain, int bus, int slot, int func) { struct pcistub_device_id *pci_dev_id, *t; - int devfn = PCI_DEVFN(slot, func); int err = -ENOENT; unsigned long flags; spin_lock_irqsave(&device_ids_lock, flags); list_for_each_entry_safe(pci_dev_id, t, &pcistub_device_ids, slot_list) { - if (pci_dev_id->domain == domain - && pci_dev_id->bus == bus && pci_dev_id->devfn == devfn) { + if (pci_dev_id->domain == domain && pci_dev_id->bus == bus + && (slot < 0 || PCI_SLOT(pci_dev_id->devfn) == slot) + && (func < 0 || PCI_FUNC(pci_dev_id->devfn) == func)) { /* Don't break; here because it's possible the same * slot could be in the list more than once */ @@ -1216,6 +1253,10 @@ static ssize_t permissive_add(struct device_driver *drv, const char *buf, err = str_to_slot(buf, &domain, &bus, &slot, &func); if (err) goto out; + if (slot < 0 || func < 0) { + err = -EINVAL; + goto out; + } psdev = pcistub_device_find(domain, bus, slot, func); if (!psdev) { err = -ENODEV; @@ -1297,17 +1338,51 @@ static int __init pcistub_init(void) if (pci_devs_to_hide && *pci_devs_to_hide) { do { + char wc = '*'; + parsed = 0; err = sscanf(pci_devs_to_hide + pos, " (%x:%x:%x.%x) %n", &domain, &bus, &slot, &func, &parsed); - if (err != 4) { + switch (err) { + case 3: + func = -1; + err = sscanf(pci_devs_to_hide + pos, + " (%x:%x:%x.%c) %n", + &domain, &bus, &slot, &wc, + &parsed); + break; + case 2: + slot = func = -1; + err = sscanf(pci_devs_to_hide + pos, + " (%x:%x:*.%c) %n", + &domain, &bus, &wc, &parsed) + 1; + break; + } + + if (err != 4 || wc != '*') { domain = 0; + wc = '*'; err = sscanf(pci_devs_to_hide + pos, " (%x:%x.%x) %n", &bus, &slot, &func, &parsed); - if (err != 3) + switch (err) { + case 2: + func = -1; + err = sscanf(pci_devs_to_hide + pos, + " (%x:%x.%c) %n", + &bus, &slot, &wc, + &parsed); + break; + case 1: + slot = func = -1; + err = sscanf(pci_devs_to_hide + pos, + " (%x:*.%c) %n", + &bus, &wc, &parsed) + 1; + break; + } + if (err != 3 || wc != '*') goto parse_error; } diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c index b3e146edb51d..bcf3ba4a6ec1 100644 --- a/drivers/xen/xenbus/xenbus_client.c +++ b/drivers/xen/xenbus/xenbus_client.c @@ -490,8 +490,7 @@ static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev, op.host_addr = arbitrary_virt_to_machine(pte).maddr; - if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)) - BUG(); + gnttab_batch_map(&op, 1); if (op.status != GNTST_okay) { free_vm_area(area); @@ -572,8 +571,7 @@ int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref, gnttab_set_map_op(&op, (unsigned long)vaddr, GNTMAP_host_map, gnt_ref, dev->otherend_id); - if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)) - BUG(); + gnttab_batch_map(&op, 1); if (op.status != GNTST_okay) { xenbus_dev_fatal(dev, op.status, diff --git a/drivers/xen/xenbus/xenbus_comms.c b/drivers/xen/xenbus/xenbus_comms.c index 52fe7ad07666..c5aa55c5d371 100644 --- a/drivers/xen/xenbus/xenbus_comms.c +++ b/drivers/xen/xenbus/xenbus_comms.c @@ -224,7 +224,7 @@ int xb_init_comms(void) int err; err = bind_evtchn_to_irqhandler(xen_store_evtchn, wake_waiting, 0, "xenbus", &xb_waitq); - if (err <= 0) { + if (err < 0) { printk(KERN_ERR "XENBUS request irq failed %i\n", err); return err; } diff --git a/drivers/xen/xenbus/xenbus_dev_backend.c b/drivers/xen/xenbus/xenbus_dev_backend.c index be738c43104b..d73000800762 100644 --- a/drivers/xen/xenbus/xenbus_dev_backend.c +++ b/drivers/xen/xenbus/xenbus_dev_backend.c @@ -107,7 +107,7 @@ static int xenbus_backend_mmap(struct file *file, struct vm_area_struct *vma) return 0; } -const struct file_operations xenbus_backend_fops = { +static const struct file_operations xenbus_backend_fops = { .open = xenbus_backend_open, .mmap = xenbus_backend_mmap, .unlocked_ioctl = xenbus_backend_ioctl, diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index b793723e724d..038b71dbf03c 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -324,8 +324,8 @@ static int cmp_dev(struct device *dev, void *data) return 0; } -struct xenbus_device *xenbus_device_find(const char *nodename, - struct bus_type *bus) +static struct xenbus_device *xenbus_device_find(const char *nodename, + struct bus_type *bus) { struct xb_find_info info = { .dev = NULL, .nodename = nodename }; @@ -719,17 +719,47 @@ static int __init xenstored_local_init(void) return err; } +enum xenstore_init { + UNKNOWN, + PV, + HVM, + LOCAL, +}; static int __init xenbus_init(void) { int err = 0; + enum xenstore_init usage = UNKNOWN; + uint64_t v = 0; if (!xen_domain()) return -ENODEV; xenbus_ring_ops_init(); - if (xen_hvm_domain()) { - uint64_t v = 0; + if (xen_pv_domain()) + usage = PV; + if (xen_hvm_domain()) + usage = HVM; + if (xen_hvm_domain() && xen_initial_domain()) + usage = LOCAL; + if (xen_pv_domain() && !xen_start_info->store_evtchn) + usage = LOCAL; + if (xen_pv_domain() && xen_start_info->store_evtchn) + xenstored_ready = 1; + + switch (usage) { + case LOCAL: + err = xenstored_local_init(); + if (err) + goto out_error; + xen_store_interface = mfn_to_virt(xen_store_mfn); + break; + case PV: + xen_store_evtchn = xen_start_info->store_evtchn; + xen_store_mfn = xen_start_info->store_mfn; + xen_store_interface = mfn_to_virt(xen_store_mfn); + break; + case HVM: err = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN, &v); if (err) goto out_error; @@ -738,18 +768,12 @@ static int __init xenbus_init(void) if (err) goto out_error; xen_store_mfn = (unsigned long)v; - xen_store_interface = ioremap(xen_store_mfn << PAGE_SHIFT, PAGE_SIZE); - } else { - xen_store_evtchn = xen_start_info->store_evtchn; - xen_store_mfn = xen_start_info->store_mfn; - if (xen_store_evtchn) - xenstored_ready = 1; - else { - err = xenstored_local_init(); - if (err) - goto out_error; - } - xen_store_interface = mfn_to_virt(xen_store_mfn); + xen_store_interface = + ioremap(xen_store_mfn << PAGE_SHIFT, PAGE_SIZE); + break; + default: + pr_warn("Xenstore state unknown\n"); + break; } /* Initialize the interface to xenstore. */ diff --git a/drivers/xen/xenbus/xenbus_probe_frontend.c b/drivers/xen/xenbus/xenbus_probe_frontend.c index a31b54d48839..3159a37d966d 100644 --- a/drivers/xen/xenbus/xenbus_probe_frontend.c +++ b/drivers/xen/xenbus/xenbus_probe_frontend.c @@ -21,6 +21,7 @@ #include <xen/xenbus.h> #include <xen/events.h> #include <xen/page.h> +#include <xen/xen.h> #include <xen/platform_pci.h> diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c index d1c217b23a42..131dec04794e 100644 --- a/drivers/xen/xenbus/xenbus_xs.c +++ b/drivers/xen/xenbus/xenbus_xs.c @@ -44,6 +44,7 @@ #include <linux/rwsem.h> #include <linux/module.h> #include <linux/mutex.h> +#include <asm/xen/hypervisor.h> #include <xen/xenbus.h> #include <xen/xen.h> #include "xenbus_comms.h" @@ -618,6 +619,23 @@ static struct xenbus_watch *find_watch(const char *token) return NULL; } +static void xs_reset_watches(void) +{ + int err, supported = 0; + + if (!xen_hvm_domain() || xen_initial_domain()) + return; + + err = xenbus_scanf(XBT_NIL, "control", + "platform-feature-xs_reset_watches", "%d", &supported); + if (err != 1 || !supported) + return; + + err = xs_error(xs_single(XBT_NIL, XS_RESET_WATCHES, "", NULL)); + if (err && err != -EEXIST) + printk(KERN_WARNING "xs_reset_watches failed: %d\n", err); +} + /* Register callback to watch this node. */ int register_xenbus_watch(struct xenbus_watch *watch) { @@ -900,5 +918,8 @@ int xs_init(void) if (IS_ERR(task)) return PTR_ERR(task); + /* shutdown watches for kexec boot */ + xs_reset_watches(); + return 0; } |