From ff3a3e9ba5e4273a8bc10570adab4a390fb90757 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:20:59 +0200 Subject: x86 mmiotrace: move files into arch/x86/mm/. Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/Makefile | 2 - arch/x86/kernel/mmiotrace/Makefile | 4 - arch/x86/kernel/mmiotrace/kmmio.c | 499 ------------------------------ arch/x86/kernel/mmiotrace/mmio-mod.c | 457 --------------------------- arch/x86/kernel/mmiotrace/pf_in.c | 489 ----------------------------- arch/x86/kernel/mmiotrace/pf_in.h | 39 --- arch/x86/kernel/mmiotrace/testmmiotrace.c | 71 ----- arch/x86/mm/Makefile | 5 + arch/x86/mm/kmmio.c | 499 ++++++++++++++++++++++++++++++ arch/x86/mm/mmio-mod.c | 457 +++++++++++++++++++++++++++ arch/x86/mm/pf_in.c | 489 +++++++++++++++++++++++++++++ arch/x86/mm/pf_in.h | 39 +++ arch/x86/mm/testmmiotrace.c | 71 +++++ 13 files changed, 1560 insertions(+), 1561 deletions(-) delete mode 100644 arch/x86/kernel/mmiotrace/Makefile delete mode 100644 arch/x86/kernel/mmiotrace/kmmio.c delete mode 100644 arch/x86/kernel/mmiotrace/mmio-mod.c delete mode 100644 arch/x86/kernel/mmiotrace/pf_in.c delete mode 100644 arch/x86/kernel/mmiotrace/pf_in.h delete mode 100644 arch/x86/kernel/mmiotrace/testmmiotrace.c create mode 100644 arch/x86/mm/kmmio.c create mode 100644 arch/x86/mm/mmio-mod.c create mode 100644 arch/x86/mm/pf_in.c create mode 100644 arch/x86/mm/pf_in.h create mode 100644 arch/x86/mm/testmmiotrace.c diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index a51ac153685e..739d49acd2f1 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -79,8 +79,6 @@ obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_VM86) += vm86_32.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o -obj-$(CONFIG_MMIOTRACE) += mmiotrace/ - obj-$(CONFIG_HPET_TIMER) += hpet.o obj-$(CONFIG_K8_NB) += k8.o diff --git a/arch/x86/kernel/mmiotrace/Makefile b/arch/x86/kernel/mmiotrace/Makefile deleted file mode 100644 index dbcd8d50fb8d..000000000000 --- a/arch/x86/kernel/mmiotrace/Makefile +++ /dev/null @@ -1,4 +0,0 @@ -obj-$(CONFIG_MMIOTRACE_HOOKS) += kmmio.o -obj-$(CONFIG_MMIOTRACE) += mmiotrace.o -mmiotrace-y := pf_in.o mmio-mod.o -obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o diff --git a/arch/x86/kernel/mmiotrace/kmmio.c b/arch/x86/kernel/mmiotrace/kmmio.c deleted file mode 100644 index 3ad27b8504a5..000000000000 --- a/arch/x86/kernel/mmiotrace/kmmio.c +++ /dev/null @@ -1,499 +0,0 @@ -/* Support for MMIO probes. - * Benfit many code from kprobes - * (C) 2002 Louis Zhuang . - * 2007 Alexander Eichner - * 2008 Pekka Paalanen - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define KMMIO_PAGE_HASH_BITS 4 -#define KMMIO_PAGE_TABLE_SIZE (1 << KMMIO_PAGE_HASH_BITS) - -struct kmmio_fault_page { - struct list_head list; - struct kmmio_fault_page *release_next; - unsigned long page; /* location of the fault page */ - - /* - * Number of times this page has been registered as a part - * of a probe. If zero, page is disarmed and this may be freed. - * Used only by writers (RCU). - */ - int count; -}; - -struct kmmio_delayed_release { - struct rcu_head rcu; - struct kmmio_fault_page *release_list; -}; - -struct kmmio_context { - struct kmmio_fault_page *fpage; - struct kmmio_probe *probe; - unsigned long saved_flags; - unsigned long addr; - int active; -}; - -static DEFINE_SPINLOCK(kmmio_lock); - -/* Protected by kmmio_lock */ -unsigned int kmmio_count; - -/* Read-protected by RCU, write-protected by kmmio_lock. */ -static struct list_head kmmio_page_table[KMMIO_PAGE_TABLE_SIZE]; -static LIST_HEAD(kmmio_probes); - -static struct list_head *kmmio_page_list(unsigned long page) -{ - return &kmmio_page_table[hash_long(page, KMMIO_PAGE_HASH_BITS)]; -} - -/* Accessed per-cpu */ -static DEFINE_PER_CPU(struct kmmio_context, kmmio_ctx); - -/* - * this is basically a dynamic stabbing problem: - * Could use the existing prio tree code or - * Possible better implementations: - * The Interval Skip List: A Data Structure for Finding All Intervals That - * Overlap a Point (might be simple) - * Space Efficient Dynamic Stabbing with Fast Queries - Mikkel Thorup - */ -/* Get the kmmio at this addr (if any). You must be holding RCU read lock. */ -static struct kmmio_probe *get_kmmio_probe(unsigned long addr) -{ - struct kmmio_probe *p; - list_for_each_entry_rcu(p, &kmmio_probes, list) { - if (addr >= p->addr && addr <= (p->addr + p->len)) - return p; - } - return NULL; -} - -/* You must be holding RCU read lock. */ -static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page) -{ - struct list_head *head; - struct kmmio_fault_page *p; - - page &= PAGE_MASK; - head = kmmio_page_list(page); - list_for_each_entry_rcu(p, head, list) { - if (p->page == page) - return p; - } - return NULL; -} - -static void set_page_present(unsigned long addr, bool present, int *pglevel) -{ - pteval_t pteval; - pmdval_t pmdval; - int level; - pmd_t *pmd; - pte_t *pte = lookup_address(addr, &level); - - if (!pte) { - pr_err("kmmio: no pte for page 0x%08lx\n", addr); - return; - } - - if (pglevel) - *pglevel = level; - - switch (level) { - case PG_LEVEL_2M: - pmd = (pmd_t *)pte; - pmdval = pmd_val(*pmd) & ~_PAGE_PRESENT; - if (present) - pmdval |= _PAGE_PRESENT; - set_pmd(pmd, __pmd(pmdval)); - break; - - case PG_LEVEL_4K: - pteval = pte_val(*pte) & ~_PAGE_PRESENT; - if (present) - pteval |= _PAGE_PRESENT; - set_pte_atomic(pte, __pte(pteval)); - break; - - default: - pr_err("kmmio: unexpected page level 0x%x.\n", level); - return; - } - - __flush_tlb_one(addr); -} - -/** Mark the given page as not present. Access to it will trigger a fault. */ -static void arm_kmmio_fault_page(unsigned long page, int *page_level) -{ - set_page_present(page & PAGE_MASK, false, page_level); -} - -/** Mark the given page as present. */ -static void disarm_kmmio_fault_page(unsigned long page, int *page_level) -{ - set_page_present(page & PAGE_MASK, true, page_level); -} - -/* - * This is being called from do_page_fault(). - * - * We may be in an interrupt or a critical section. Also prefecthing may - * trigger a page fault. We may be in the middle of process switch. - * We cannot take any locks, because we could be executing especially - * within a kmmio critical section. - * - * Local interrupts are disabled, so preemption cannot happen. - * Do not enable interrupts, do not sleep, and watch out for other CPUs. - */ -/* - * Interrupts are disabled on entry as trap3 is an interrupt gate - * and they remain disabled thorough out this function. - */ -int kmmio_handler(struct pt_regs *regs, unsigned long addr) -{ - struct kmmio_context *ctx; - struct kmmio_fault_page *faultpage; - int ret = 0; /* default to fault not handled */ - - /* - * Preemption is now disabled to prevent process switch during - * single stepping. We can only handle one active kmmio trace - * per cpu, so ensure that we finish it before something else - * gets to run. We also hold the RCU read lock over single - * stepping to avoid looking up the probe and kmmio_fault_page - * again. - */ - preempt_disable(); - rcu_read_lock(); - - faultpage = get_kmmio_fault_page(addr); - if (!faultpage) { - /* - * Either this page fault is not caused by kmmio, or - * another CPU just pulled the kmmio probe from under - * our feet. The latter case should not be possible. - */ - goto no_kmmio; - } - - ctx = &get_cpu_var(kmmio_ctx); - if (ctx->active) { - disarm_kmmio_fault_page(faultpage->page, NULL); - if (addr == ctx->addr) { - /* - * On SMP we sometimes get recursive probe hits on the - * same address. Context is already saved, fall out. - */ - pr_debug("kmmio: duplicate probe hit on CPU %d, for " - "address 0x%08lx.\n", - smp_processor_id(), addr); - ret = 1; - goto no_kmmio_ctx; - } - /* - * Prevent overwriting already in-flight context. - * This should not happen, let's hope disarming at least - * prevents a panic. - */ - pr_emerg("kmmio: recursive probe hit on CPU %d, " - "for address 0x%08lx. Ignoring.\n", - smp_processor_id(), addr); - pr_emerg("kmmio: previous hit was at 0x%08lx.\n", - ctx->addr); - goto no_kmmio_ctx; - } - ctx->active++; - - ctx->fpage = faultpage; - ctx->probe = get_kmmio_probe(addr); - ctx->saved_flags = (regs->flags & (X86_EFLAGS_TF | X86_EFLAGS_IF)); - ctx->addr = addr; - - if (ctx->probe && ctx->probe->pre_handler) - ctx->probe->pre_handler(ctx->probe, regs, addr); - - /* - * Enable single-stepping and disable interrupts for the faulting - * context. Local interrupts must not get enabled during stepping. - */ - regs->flags |= X86_EFLAGS_TF; - regs->flags &= ~X86_EFLAGS_IF; - - /* Now we set present bit in PTE and single step. */ - disarm_kmmio_fault_page(ctx->fpage->page, NULL); - - /* - * If another cpu accesses the same page while we are stepping, - * the access will not be caught. It will simply succeed and the - * only downside is we lose the event. If this becomes a problem, - * the user should drop to single cpu before tracing. - */ - - put_cpu_var(kmmio_ctx); - return 1; /* fault handled */ - -no_kmmio_ctx: - put_cpu_var(kmmio_ctx); -no_kmmio: - rcu_read_unlock(); - preempt_enable_no_resched(); - return ret; -} - -/* - * Interrupts are disabled on entry as trap1 is an interrupt gate - * and they remain disabled thorough out this function. - * This must always get called as the pair to kmmio_handler(). - */ -static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) -{ - int ret = 0; - struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx); - - if (!ctx->active) { - pr_debug("kmmio: spurious debug trap on CPU %d.\n", - smp_processor_id()); - goto out; - } - - if (ctx->probe && ctx->probe->post_handler) - ctx->probe->post_handler(ctx->probe, condition, regs); - - arm_kmmio_fault_page(ctx->fpage->page, NULL); - - regs->flags &= ~X86_EFLAGS_TF; - regs->flags |= ctx->saved_flags; - - /* These were acquired in kmmio_handler(). */ - ctx->active--; - BUG_ON(ctx->active); - rcu_read_unlock(); - preempt_enable_no_resched(); - - /* - * if somebody else is singlestepping across a probe point, flags - * will have TF set, in which case, continue the remaining processing - * of do_debug, as if this is not a probe hit. - */ - if (!(regs->flags & X86_EFLAGS_TF)) - ret = 1; -out: - put_cpu_var(kmmio_ctx); - return ret; -} - -/* You must be holding kmmio_lock. */ -static int add_kmmio_fault_page(unsigned long page) -{ - struct kmmio_fault_page *f; - - page &= PAGE_MASK; - f = get_kmmio_fault_page(page); - if (f) { - if (!f->count) - arm_kmmio_fault_page(f->page, NULL); - f->count++; - return 0; - } - - f = kmalloc(sizeof(*f), GFP_ATOMIC); - if (!f) - return -1; - - f->count = 1; - f->page = page; - list_add_rcu(&f->list, kmmio_page_list(f->page)); - - arm_kmmio_fault_page(f->page, NULL); - - return 0; -} - -/* You must be holding kmmio_lock. */ -static void release_kmmio_fault_page(unsigned long page, - struct kmmio_fault_page **release_list) -{ - struct kmmio_fault_page *f; - - page &= PAGE_MASK; - f = get_kmmio_fault_page(page); - if (!f) - return; - - f->count--; - BUG_ON(f->count < 0); - if (!f->count) { - disarm_kmmio_fault_page(f->page, NULL); - f->release_next = *release_list; - *release_list = f; - } -} - -int register_kmmio_probe(struct kmmio_probe *p) -{ - unsigned long flags; - int ret = 0; - unsigned long size = 0; - - spin_lock_irqsave(&kmmio_lock, flags); - if (get_kmmio_probe(p->addr)) { - ret = -EEXIST; - goto out; - } - kmmio_count++; - list_add_rcu(&p->list, &kmmio_probes); - while (size < p->len) { - if (add_kmmio_fault_page(p->addr + size)) - pr_err("kmmio: Unable to set page fault.\n"); - size += PAGE_SIZE; - } -out: - spin_unlock_irqrestore(&kmmio_lock, flags); - /* - * XXX: What should I do here? - * Here was a call to global_flush_tlb(), but it does not exist - * anymore. It seems it's not needed after all. - */ - return ret; -} -EXPORT_SYMBOL(register_kmmio_probe); - -static void rcu_free_kmmio_fault_pages(struct rcu_head *head) -{ - struct kmmio_delayed_release *dr = container_of( - head, - struct kmmio_delayed_release, - rcu); - struct kmmio_fault_page *p = dr->release_list; - while (p) { - struct kmmio_fault_page *next = p->release_next; - BUG_ON(p->count); - kfree(p); - p = next; - } - kfree(dr); -} - -static void remove_kmmio_fault_pages(struct rcu_head *head) -{ - struct kmmio_delayed_release *dr = container_of( - head, - struct kmmio_delayed_release, - rcu); - struct kmmio_fault_page *p = dr->release_list; - struct kmmio_fault_page **prevp = &dr->release_list; - unsigned long flags; - spin_lock_irqsave(&kmmio_lock, flags); - while (p) { - if (!p->count) - list_del_rcu(&p->list); - else - *prevp = p->release_next; - prevp = &p->release_next; - p = p->release_next; - } - spin_unlock_irqrestore(&kmmio_lock, flags); - /* This is the real RCU destroy call. */ - call_rcu(&dr->rcu, rcu_free_kmmio_fault_pages); -} - -/* - * Remove a kmmio probe. You have to synchronize_rcu() before you can be - * sure that the callbacks will not be called anymore. Only after that - * you may actually release your struct kmmio_probe. - * - * Unregistering a kmmio fault page has three steps: - * 1. release_kmmio_fault_page() - * Disarm the page, wait a grace period to let all faults finish. - * 2. remove_kmmio_fault_pages() - * Remove the pages from kmmio_page_table. - * 3. rcu_free_kmmio_fault_pages() - * Actally free the kmmio_fault_page structs as with RCU. - */ -void unregister_kmmio_probe(struct kmmio_probe *p) -{ - unsigned long flags; - unsigned long size = 0; - struct kmmio_fault_page *release_list = NULL; - struct kmmio_delayed_release *drelease; - - spin_lock_irqsave(&kmmio_lock, flags); - while (size < p->len) { - release_kmmio_fault_page(p->addr + size, &release_list); - size += PAGE_SIZE; - } - list_del_rcu(&p->list); - kmmio_count--; - spin_unlock_irqrestore(&kmmio_lock, flags); - - drelease = kmalloc(sizeof(*drelease), GFP_ATOMIC); - if (!drelease) { - pr_crit("kmmio: leaking kmmio_fault_page objects.\n"); - return; - } - drelease->release_list = release_list; - - /* - * This is not really RCU here. We have just disarmed a set of - * pages so that they cannot trigger page faults anymore. However, - * we cannot remove the pages from kmmio_page_table, - * because a probe hit might be in flight on another CPU. The - * pages are collected into a list, and they will be removed from - * kmmio_page_table when it is certain that no probe hit related to - * these pages can be in flight. RCU grace period sounds like a - * good choice. - * - * If we removed the pages too early, kmmio page fault handler might - * not find the respective kmmio_fault_page and determine it's not - * a kmmio fault, when it actually is. This would lead to madness. - */ - call_rcu(&drelease->rcu, remove_kmmio_fault_pages); -} -EXPORT_SYMBOL(unregister_kmmio_probe); - -static int kmmio_die_notifier(struct notifier_block *nb, unsigned long val, - void *args) -{ - struct die_args *arg = args; - - if (val == DIE_DEBUG && (arg->err & DR_STEP)) - if (post_kmmio_handler(arg->err, arg->regs) == 1) - return NOTIFY_STOP; - - return NOTIFY_DONE; -} - -static struct notifier_block nb_die = { - .notifier_call = kmmio_die_notifier -}; - -static int __init init_kmmio(void) -{ - int i; - for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++) - INIT_LIST_HEAD(&kmmio_page_table[i]); - return register_die_notifier(&nb_die); -} -fs_initcall(init_kmmio); /* should be before device_initcall() */ diff --git a/arch/x86/kernel/mmiotrace/mmio-mod.c b/arch/x86/kernel/mmiotrace/mmio-mod.c deleted file mode 100644 index 8256546d49bf..000000000000 --- a/arch/x86/kernel/mmiotrace/mmio-mod.c +++ /dev/null @@ -1,457 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * - * Copyright (C) IBM Corporation, 2005 - * Jeff Muizelaar, 2006, 2007 - * Pekka Paalanen, 2008 - * - * Derived from the read-mod example from relay-examples by Tom Zanussi. - */ -#define DEBUG 1 - -#include -#include -#include -#include -#include -#include -#include -#include -#include /* for ISA_START_ADDRESS */ -#include -#include - -#include "pf_in.h" - -#define NAME "mmiotrace: " - -struct trap_reason { - unsigned long addr; - unsigned long ip; - enum reason_type type; - int active_traces; -}; - -struct remap_trace { - struct list_head list; - struct kmmio_probe probe; - unsigned long phys; - unsigned long id; -}; - -/* Accessed per-cpu. */ -static DEFINE_PER_CPU(struct trap_reason, pf_reason); -static DEFINE_PER_CPU(struct mmiotrace_rw, cpu_trace); - -#if 0 /* XXX: no way gather this info anymore */ -/* Access to this is not per-cpu. */ -static DEFINE_PER_CPU(atomic_t, dropped); -#endif - -static struct dentry *marker_file; - -static DEFINE_MUTEX(mmiotrace_mutex); -static DEFINE_SPINLOCK(trace_lock); -static atomic_t mmiotrace_enabled; -static LIST_HEAD(trace_list); /* struct remap_trace */ - -/* - * Locking in this file: - * - mmiotrace_mutex enforces enable/disable_mmiotrace() critical sections. - * - mmiotrace_enabled may be modified only when holding mmiotrace_mutex - * and trace_lock. - * - Routines depending on is_enabled() must take trace_lock. - * - trace_list users must hold trace_lock. - * - is_enabled() guarantees that mmio_trace_record is allowed. - * - pre/post callbacks assume the effect of is_enabled() being true. - */ - -/* module parameters */ -static unsigned long filter_offset; -static int nommiotrace; -static int ISA_trace; -static int trace_pc; - -module_param(filter_offset, ulong, 0); -module_param(nommiotrace, bool, 0); -module_param(ISA_trace, bool, 0); -module_param(trace_pc, bool, 0); - -MODULE_PARM_DESC(filter_offset, "Start address of traced mappings."); -MODULE_PARM_DESC(nommiotrace, "Disable actual MMIO tracing."); -MODULE_PARM_DESC(ISA_trace, "Do not exclude the low ISA range."); -MODULE_PARM_DESC(trace_pc, "Record address of faulting instructions."); - -static bool is_enabled(void) -{ - return atomic_read(&mmiotrace_enabled); -} - -#if 0 /* XXX: needs rewrite */ -/* - * Write callback for the debugfs entry: - * Read a marker and write it to the mmio trace log - */ -static ssize_t write_marker(struct file *file, const char __user *buffer, - size_t count, loff_t *ppos) -{ - char *event = NULL; - struct mm_io_header *headp; - ssize_t len = (count > 65535) ? 65535 : count; - - event = kzalloc(sizeof(*headp) + len, GFP_KERNEL); - if (!event) - return -ENOMEM; - - headp = (struct mm_io_header *)event; - headp->type = MMIO_MAGIC | (MMIO_MARKER << MMIO_OPCODE_SHIFT); - headp->data_len = len; - - if (copy_from_user(event + sizeof(*headp), buffer, len)) { - kfree(event); - return -EFAULT; - } - - spin_lock_irq(&trace_lock); -#if 0 /* XXX: convert this to use tracing */ - if (is_enabled()) - relay_write(chan, event, sizeof(*headp) + len); - else -#endif - len = -EINVAL; - spin_unlock_irq(&trace_lock); - kfree(event); - return len; -} -#endif - -static void print_pte(unsigned long address) -{ - int level; - pte_t *pte = lookup_address(address, &level); - - if (!pte) { - pr_err(NAME "Error in %s: no pte for page 0x%08lx\n", - __func__, address); - return; - } - - if (level == PG_LEVEL_2M) { - pr_emerg(NAME "4MB pages are not currently supported: " - "0x%08lx\n", address); - BUG(); - } - pr_info(NAME "pte for 0x%lx: 0x%lx 0x%lx\n", address, pte_val(*pte), - pte_val(*pte) & _PAGE_PRESENT); -} - -/* - * For some reason the pre/post pairs have been called in an - * unmatched order. Report and die. - */ -static void die_kmmio_nesting_error(struct pt_regs *regs, unsigned long addr) -{ - const struct trap_reason *my_reason = &get_cpu_var(pf_reason); - pr_emerg(NAME "unexpected fault for address: 0x%08lx, " - "last fault for address: 0x%08lx\n", - addr, my_reason->addr); - print_pte(addr); - print_symbol(KERN_EMERG "faulting IP is at %s\n", regs->ip); - print_symbol(KERN_EMERG "last faulting IP was at %s\n", my_reason->ip); -#ifdef __i386__ - pr_emerg("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n", - regs->ax, regs->bx, regs->cx, regs->dx); - pr_emerg("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", - regs->si, regs->di, regs->bp, regs->sp); -#else - pr_emerg("rax: %016lx rcx: %016lx rdx: %016lx\n", - regs->ax, regs->cx, regs->dx); - pr_emerg("rsi: %016lx rdi: %016lx rbp: %016lx rsp: %016lx\n", - regs->si, regs->di, regs->bp, regs->sp); -#endif - put_cpu_var(pf_reason); - BUG(); -} - -static void pre(struct kmmio_probe *p, struct pt_regs *regs, - unsigned long addr) -{ - struct trap_reason *my_reason = &get_cpu_var(pf_reason); - struct mmiotrace_rw *my_trace = &get_cpu_var(cpu_trace); - const unsigned long instptr = instruction_pointer(regs); - const enum reason_type type = get_ins_type(instptr); - struct remap_trace *trace = p->user_data; - - /* it doesn't make sense to have more than one active trace per cpu */ - if (my_reason->active_traces) - die_kmmio_nesting_error(regs, addr); - else - my_reason->active_traces++; - - my_reason->type = type; - my_reason->addr = addr; - my_reason->ip = instptr; - - my_trace->phys = addr - trace->probe.addr + trace->phys; - my_trace->map_id = trace->id; - - /* - * Only record the program counter when requested. - * It may taint clean-room reverse engineering. - */ - if (trace_pc) - my_trace->pc = instptr; - else - my_trace->pc = 0; - - /* - * XXX: the timestamp recorded will be *after* the tracing has been - * done, not at the time we hit the instruction. SMP implications - * on event ordering? - */ - - switch (type) { - case REG_READ: - my_trace->opcode = MMIO_READ; - my_trace->width = get_ins_mem_width(instptr); - break; - case REG_WRITE: - my_trace->opcode = MMIO_WRITE; - my_trace->width = get_ins_mem_width(instptr); - my_trace->value = get_ins_reg_val(instptr, regs); - break; - case IMM_WRITE: - my_trace->opcode = MMIO_WRITE; - my_trace->width = get_ins_mem_width(instptr); - my_trace->value = get_ins_imm_val(instptr); - break; - default: - { - unsigned char *ip = (unsigned char *)instptr; - my_trace->opcode = MMIO_UNKNOWN_OP; - my_trace->width = 0; - my_trace->value = (*ip) << 16 | *(ip + 1) << 8 | - *(ip + 2); - } - } - put_cpu_var(cpu_trace); - put_cpu_var(pf_reason); -} - -static void post(struct kmmio_probe *p, unsigned long condition, - struct pt_regs *regs) -{ - struct trap_reason *my_reason = &get_cpu_var(pf_reason); - struct mmiotrace_rw *my_trace = &get_cpu_var(cpu_trace); - - /* this should always return the active_trace count to 0 */ - my_reason->active_traces--; - if (my_reason->active_traces) { - pr_emerg(NAME "unexpected post handler"); - BUG(); - } - - switch (my_reason->type) { - case REG_READ: - my_trace->value = get_ins_reg_val(my_reason->ip, regs); - break; - default: - break; - } - - mmio_trace_rw(my_trace); - put_cpu_var(cpu_trace); - put_cpu_var(pf_reason); -} - -static void ioremap_trace_core(unsigned long offset, unsigned long size, - void __iomem *addr) -{ - static atomic_t next_id; - struct remap_trace *trace = kmalloc(sizeof(*trace), GFP_KERNEL); - struct mmiotrace_map map = { - .phys = offset, - .virt = (unsigned long)addr, - .len = size, - .opcode = MMIO_PROBE - }; - - if (!trace) { - pr_err(NAME "kmalloc failed in ioremap\n"); - return; - } - - *trace = (struct remap_trace) { - .probe = { - .addr = (unsigned long)addr, - .len = size, - .pre_handler = pre, - .post_handler = post, - .user_data = trace - }, - .phys = offset, - .id = atomic_inc_return(&next_id) - }; - map.map_id = trace->id; - - spin_lock_irq(&trace_lock); - if (!is_enabled()) - goto not_enabled; - - mmio_trace_mapping(&map); - list_add_tail(&trace->list, &trace_list); - if (!nommiotrace) - register_kmmio_probe(&trace->probe); - -not_enabled: - spin_unlock_irq(&trace_lock); -} - -void -mmiotrace_ioremap(unsigned long offset, unsigned long size, void __iomem *addr) -{ - if (!is_enabled()) /* recheck and proper locking in *_core() */ - return; - - pr_debug(NAME "ioremap_*(0x%lx, 0x%lx) = %p\n", offset, size, addr); - if ((filter_offset) && (offset != filter_offset)) - return; - ioremap_trace_core(offset, size, addr); -} - -static void iounmap_trace_core(volatile void __iomem *addr) -{ - struct mmiotrace_map map = { - .phys = 0, - .virt = (unsigned long)addr, - .len = 0, - .opcode = MMIO_UNPROBE - }; - struct remap_trace *trace; - struct remap_trace *tmp; - struct remap_trace *found_trace = NULL; - - pr_debug(NAME "Unmapping %p.\n", addr); - - spin_lock_irq(&trace_lock); - if (!is_enabled()) - goto not_enabled; - - list_for_each_entry_safe(trace, tmp, &trace_list, list) { - if ((unsigned long)addr == trace->probe.addr) { - if (!nommiotrace) - unregister_kmmio_probe(&trace->probe); - list_del(&trace->list); - found_trace = trace; - break; - } - } - map.map_id = (found_trace) ? found_trace->id : -1; - mmio_trace_mapping(&map); - -not_enabled: - spin_unlock_irq(&trace_lock); - if (found_trace) { - synchronize_rcu(); /* unregister_kmmio_probe() requirement */ - kfree(found_trace); - } -} - -void mmiotrace_iounmap(volatile void __iomem *addr) -{ - might_sleep(); - if (is_enabled()) /* recheck and proper locking in *_core() */ - iounmap_trace_core(addr); -} - -static void clear_trace_list(void) -{ - struct remap_trace *trace; - struct remap_trace *tmp; - - /* - * No locking required, because the caller ensures we are in a - * critical section via mutex, and is_enabled() is false, - * i.e. nothing can traverse or modify this list. - * Caller also ensures is_enabled() cannot change. - */ - list_for_each_entry(trace, &trace_list, list) { - pr_notice(NAME "purging non-iounmapped " - "trace @0x%08lx, size 0x%lx.\n", - trace->probe.addr, trace->probe.len); - if (!nommiotrace) - unregister_kmmio_probe(&trace->probe); - } - synchronize_rcu(); /* unregister_kmmio_probe() requirement */ - - list_for_each_entry_safe(trace, tmp, &trace_list, list) { - list_del(&trace->list); - kfree(trace); - } -} - -#if 0 /* XXX: out of order */ -static struct file_operations fops_marker = { - .owner = THIS_MODULE, - .write = write_marker -}; -#endif - -void enable_mmiotrace(void) -{ - mutex_lock(&mmiotrace_mutex); - if (is_enabled()) - goto out; - -#if 0 /* XXX: tracing does not support text entries */ - marker_file = debugfs_create_file("marker", 0660, dir, NULL, - &fops_marker); - if (!marker_file) - pr_err(NAME "marker file creation failed.\n"); -#endif - - if (nommiotrace) - pr_info(NAME "MMIO tracing disabled.\n"); - if (ISA_trace) - pr_warning(NAME "Warning! low ISA range will be traced.\n"); - spin_lock_irq(&trace_lock); - atomic_inc(&mmiotrace_enabled); - spin_unlock_irq(&trace_lock); - pr_info(NAME "enabled.\n"); -out: - mutex_unlock(&mmiotrace_mutex); -} - -void disable_mmiotrace(void) -{ - mutex_lock(&mmiotrace_mutex); - if (!is_enabled()) - goto out; - - spin_lock_irq(&trace_lock); - atomic_dec(&mmiotrace_enabled); - BUG_ON(is_enabled()); - spin_unlock_irq(&trace_lock); - - clear_trace_list(); /* guarantees: no more kmmio callbacks */ - if (marker_file) { - debugfs_remove(marker_file); - marker_file = NULL; - } - - pr_info(NAME "disabled.\n"); -out: - mutex_unlock(&mmiotrace_mutex); -} diff --git a/arch/x86/kernel/mmiotrace/pf_in.c b/arch/x86/kernel/mmiotrace/pf_in.c deleted file mode 100644 index efa1911e20ca..000000000000 --- a/arch/x86/kernel/mmiotrace/pf_in.c +++ /dev/null @@ -1,489 +0,0 @@ -/* - * Fault Injection Test harness (FI) - * Copyright (C) Intel Crop. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, - * USA. - * - */ - -/* Id: pf_in.c,v 1.1.1.1 2002/11/12 05:56:32 brlock Exp - * Copyright by Intel Crop., 2002 - * Louis Zhuang (louis.zhuang@intel.com) - * - * Bjorn Steinbrink (B.Steinbrink@gmx.de), 2007 - */ - -#include -#include /* struct pt_regs */ -#include "pf_in.h" - -#ifdef __i386__ -/* IA32 Manual 3, 2-1 */ -static unsigned char prefix_codes[] = { - 0xF0, 0xF2, 0xF3, 0x2E, 0x36, 0x3E, 0x26, 0x64, - 0x65, 0x2E, 0x3E, 0x66, 0x67 -}; -/* IA32 Manual 3, 3-432*/ -static unsigned int reg_rop[] = { - 0x8A, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F -}; -static unsigned int reg_wop[] = { 0x88, 0x89 }; -static unsigned int imm_wop[] = { 0xC6, 0xC7 }; -/* IA32 Manual 3, 3-432*/ -static unsigned int rw8[] = { 0x88, 0x8A, 0xC6 }; -static unsigned int rw32[] = { - 0x89, 0x8B, 0xC7, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F -}; -static unsigned int mw8[] = { 0x88, 0x8A, 0xC6, 0xB60F, 0xBE0F }; -static unsigned int mw16[] = { 0xB70F, 0xBF0F }; -static unsigned int mw32[] = { 0x89, 0x8B, 0xC7 }; -static unsigned int mw64[] = {}; -#else /* not __i386__ */ -static unsigned char prefix_codes[] = { - 0x66, 0x67, 0x2E, 0x3E, 0x26, 0x64, 0x65, 0x36, - 0xF0, 0xF3, 0xF2, - /* REX Prefixes */ - 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, - 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f -}; -/* AMD64 Manual 3, Appendix A*/ -static unsigned int reg_rop[] = { - 0x8A, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F -}; -static unsigned int reg_wop[] = { 0x88, 0x89 }; -static unsigned int imm_wop[] = { 0xC6, 0xC7 }; -static unsigned int rw8[] = { 0xC6, 0x88, 0x8A }; -static unsigned int rw32[] = { - 0xC7, 0x89, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F -}; -/* 8 bit only */ -static unsigned int mw8[] = { 0xC6, 0x88, 0x8A, 0xB60F, 0xBE0F }; -/* 16 bit only */ -static unsigned int mw16[] = { 0xB70F, 0xBF0F }; -/* 16 or 32 bit */ -static unsigned int mw32[] = { 0xC7 }; -/* 16, 32 or 64 bit */ -static unsigned int mw64[] = { 0x89, 0x8B }; -#endif /* not __i386__ */ - -static int skip_prefix(unsigned char *addr, int *shorted, int *enlarged, - int *rexr) -{ - int i; - unsigned char *p = addr; - *shorted = 0; - *enlarged = 0; - *rexr = 0; - -restart: - for (i = 0; i < ARRAY_SIZE(prefix_codes); i++) { - if (*p == prefix_codes[i]) { - if (*p == 0x66) - *shorted = 1; -#ifdef __amd64__ - if ((*p & 0xf8) == 0x48) - *enlarged = 1; - if ((*p & 0xf4) == 0x44) - *rexr = 1; -#endif - p++; - goto restart; - } - } - - return (p - addr); -} - -static int get_opcode(unsigned char *addr, unsigned int *opcode) -{ - int len; - - if (*addr == 0x0F) { - /* 0x0F is extension instruction */ - *opcode = *(unsigned short *)addr; - len = 2; - } else { - *opcode = *addr; - len = 1; - } - - return len; -} - -#define CHECK_OP_TYPE(opcode, array, type) \ - for (i = 0; i < ARRAY_SIZE(array); i++) { \ - if (array[i] == opcode) { \ - rv = type; \ - goto exit; \ - } \ - } - -enum reason_type get_ins_type(unsigned long ins_addr) -{ - unsigned int opcode; - unsigned char *p; - int shorted, enlarged, rexr; - int i; - enum reason_type rv = OTHERS; - - p = (unsigned char *)ins_addr; - p += skip_prefix(p, &shorted, &enlarged, &rexr); - p += get_opcode(p, &opcode); - - CHECK_OP_TYPE(opcode, reg_rop, REG_READ); - CHECK_OP_TYPE(opcode, reg_wop, REG_WRITE); - CHECK_OP_TYPE(opcode, imm_wop, IMM_WRITE); - -exit: - return rv; -} -#undef CHECK_OP_TYPE - -static unsigned int get_ins_reg_width(unsigned long ins_addr) -{ - unsigned int opcode; - unsigned char *p; - int i, shorted, enlarged, rexr; - - p = (unsigned char *)ins_addr; - p += skip_prefix(p, &shorted, &enlarged, &rexr); - p += get_opcode(p, &opcode); - - for (i = 0; i < ARRAY_SIZE(rw8); i++) - if (rw8[i] == opcode) - return 1; - - for (i = 0; i < ARRAY_SIZE(rw32); i++) - if (rw32[i] == opcode) - return (shorted ? 2 : (enlarged ? 8 : 4)); - - printk(KERN_ERR "mmiotrace: Unknown opcode 0x%02x\n", opcode); - return 0; -} - -unsigned int get_ins_mem_width(unsigned long ins_addr) -{ - unsigned int opcode; - unsigned char *p; - int i, shorted, enlarged, rexr; - - p = (unsigned char *)ins_addr; - p += skip_prefix(p, &shorted, &enlarged, &rexr); - p += get_opcode(p, &opcode); - - for (i = 0; i < ARRAY_SIZE(mw8); i++) - if (mw8[i] == opcode) - return 1; - - for (i = 0; i < ARRAY_SIZE(mw16); i++) - if (mw16[i] == opcode) - return 2; - - for (i = 0; i < ARRAY_SIZE(mw32); i++) - if (mw32[i] == opcode) - return shorted ? 2 : 4; - - for (i = 0; i < ARRAY_SIZE(mw64); i++) - if (mw64[i] == opcode) - return shorted ? 2 : (enlarged ? 8 : 4); - - printk(KERN_ERR "mmiotrace: Unknown opcode 0x%02x\n", opcode); - return 0; -} - -/* - * Define register ident in mod/rm byte. - * Note: these are NOT the same as in ptrace-abi.h. - */ -enum { - arg_AL = 0, - arg_CL = 1, - arg_DL = 2, - arg_BL = 3, - arg_AH = 4, - arg_CH = 5, - arg_DH = 6, - arg_BH = 7, - - arg_AX = 0, - arg_CX = 1, - arg_DX = 2, - arg_BX = 3, - arg_SP = 4, - arg_BP = 5, - arg_SI = 6, - arg_DI = 7, -#ifdef __amd64__ - arg_R8 = 8, - arg_R9 = 9, - arg_R10 = 10, - arg_R11 = 11, - arg_R12 = 12, - arg_R13 = 13, - arg_R14 = 14, - arg_R15 = 15 -#endif -}; - -static unsigned char *get_reg_w8(int no, struct pt_regs *regs) -{ - unsigned char *rv = NULL; - - switch (no) { - case arg_AL: - rv = (unsigned char *)®s->ax; - break; - case arg_BL: - rv = (unsigned char *)®s->bx; - break; - case arg_CL: - rv = (unsigned char *)®s->cx; - break; - case arg_DL: - rv = (unsigned char *)®s->dx; - break; - case arg_AH: - rv = 1 + (unsigned char *)®s->ax; - break; - case arg_BH: - rv = 1 + (unsigned char *)®s->bx; - break; - case arg_CH: - rv = 1 + (unsigned char *)®s->cx; - break; - case arg_DH: - rv = 1 + (unsigned char *)®s->dx; - break; -#ifdef __amd64__ - case arg_R8: - rv = (unsigned char *)®s->r8; - break; - case arg_R9: - rv = (unsigned char *)®s->r9; - break; - case arg_R10: - rv = (unsigned char *)®s->r10; - break; - case arg_R11: - rv = (unsigned char *)®s->r11; - break; - case arg_R12: - rv = (unsigned char *)®s->r12; - break; - case arg_R13: - rv = (unsigned char *)®s->r13; - break; - case arg_R14: - rv = (unsigned char *)®s->r14; - break; - case arg_R15: - rv = (unsigned char *)®s->r15; - break; -#endif - default: - printk(KERN_ERR "mmiotrace: Error reg no# %d\n", no); - break; - } - return rv; -} - -static unsigned long *get_reg_w32(int no, struct pt_regs *regs) -{ - unsigned long *rv = NULL; - - switch (no) { - case arg_AX: - rv = ®s->ax; - break; - case arg_BX: - rv = ®s->bx; - break; - case arg_CX: - rv = ®s->cx; - break; - case arg_DX: - rv = ®s->dx; - break; - case arg_SP: - rv = ®s->sp; - break; - case arg_BP: - rv = ®s->bp; - break; - case arg_SI: - rv = ®s->si; - break; - case arg_DI: - rv = ®s->di; - break; -#ifdef __amd64__ - case arg_R8: - rv = ®s->r8; - break; - case arg_R9: - rv = ®s->r9; - break; - case arg_R10: - rv = ®s->r10; - break; - case arg_R11: - rv = ®s->r11; - break; - case arg_R12: - rv = ®s->r12; - break; - case arg_R13: - rv = ®s->r13; - break; - case arg_R14: - rv = ®s->r14; - break; - case arg_R15: - rv = ®s->r15; - break; -#endif - default: - printk(KERN_ERR "mmiotrace: Error reg no# %d\n", no); - } - - return rv; -} - -unsigned long get_ins_reg_val(unsigned long ins_addr, struct pt_regs *regs) -{ - unsigned int opcode; - unsigned char mod_rm; - int reg; - unsigned char *p; - int i, shorted, enlarged, rexr; - unsigned long rv; - - p = (unsigned char *)ins_addr; - p += skip_prefix(p, &shorted, &enlarged, &rexr); - p += get_opcode(p, &opcode); - for (i = 0; i < ARRAY_SIZE(reg_rop); i++) - if (reg_rop[i] == opcode) { - rv = REG_READ; - goto do_work; - } - - for (i = 0; i < ARRAY_SIZE(reg_wop); i++) - if (reg_wop[i] == opcode) { - rv = REG_WRITE; - goto do_work; - } - - printk(KERN_ERR "mmiotrace: Not a register instruction, opcode " - "0x%02x\n", opcode); - goto err; - -do_work: - mod_rm = *p; - reg = ((mod_rm >> 3) & 0x7) | (rexr << 3); - switch (get_ins_reg_width(ins_addr)) { - case 1: - return *get_reg_w8(reg, regs); - - case 2: - return *(unsigned short *)get_reg_w32(reg, regs); - - case 4: - return *(unsigned int *)get_reg_w32(reg, regs); - -#ifdef __amd64__ - case 8: - return *(unsigned long *)get_reg_w32(reg, regs); -#endif - - default: - printk(KERN_ERR "mmiotrace: Error width# %d\n", reg); - } - -err: - return 0; -} - -unsigned long get_ins_imm_val(unsigned long ins_addr) -{ - unsigned int opcode; - unsigned char mod_rm; - unsigned char mod; - unsigned char *p; - int i, shorted, enlarged, rexr; - unsigned long rv; - - p = (unsigned char *)ins_addr; - p += skip_prefix(p, &shorted, &enlarged, &rexr); - p += get_opcode(p, &opcode); - for (i = 0; i < ARRAY_SIZE(imm_wop); i++) - if (imm_wop[i] == opcode) { - rv = IMM_WRITE; - goto do_work; - } - - printk(KERN_ERR "mmiotrace: Not an immediate instruction, opcode " - "0x%02x\n", opcode); - goto err; - -do_work: - mod_rm = *p; - mod = mod_rm >> 6; - p++; - switch (mod) { - case 0: - /* if r/m is 5 we have a 32 disp (IA32 Manual 3, Table 2-2) */ - /* AMD64: XXX Check for address size prefix? */ - if ((mod_rm & 0x7) == 0x5) - p += 4; - break; - - case 1: - p += 1; - break; - - case 2: - p += 4; - break; - - case 3: - default: - printk(KERN_ERR "mmiotrace: not a memory access instruction " - "at 0x%lx, rm_mod=0x%02x\n", - ins_addr, mod_rm); - } - - switch (get_ins_reg_width(ins_addr)) { - case 1: - return *(unsigned char *)p; - - case 2: - return *(unsigned short *)p; - - case 4: - return *(unsigned int *)p; - -#ifdef __amd64__ - case 8: - return *(unsigned long *)p; -#endif - - default: - printk(KERN_ERR "mmiotrace: Error: width.\n"); - } - -err: - return 0; -} diff --git a/arch/x86/kernel/mmiotrace/pf_in.h b/arch/x86/kernel/mmiotrace/pf_in.h deleted file mode 100644 index e05341a51a27..000000000000 --- a/arch/x86/kernel/mmiotrace/pf_in.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Fault Injection Test harness (FI) - * Copyright (C) Intel Crop. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, - * USA. - * - */ - -#ifndef __PF_H_ -#define __PF_H_ - -enum reason_type { - NOT_ME, /* page fault is not in regions */ - NOTHING, /* access others point in regions */ - REG_READ, /* read from addr to reg */ - REG_WRITE, /* write from reg to addr */ - IMM_WRITE, /* write from imm to addr */ - OTHERS /* Other instructions can not intercept */ -}; - -enum reason_type get_ins_type(unsigned long ins_addr); -unsigned int get_ins_mem_width(unsigned long ins_addr); -unsigned long get_ins_reg_val(unsigned long ins_addr, struct pt_regs *regs); -unsigned long get_ins_imm_val(unsigned long ins_addr); - -#endif /* __PF_H_ */ diff --git a/arch/x86/kernel/mmiotrace/testmmiotrace.c b/arch/x86/kernel/mmiotrace/testmmiotrace.c deleted file mode 100644 index cfa60b227c8d..000000000000 --- a/arch/x86/kernel/mmiotrace/testmmiotrace.c +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Written by Pekka Paalanen, 2008 - */ -#include -#include - -#define MODULE_NAME "testmmiotrace" - -static unsigned long mmio_address; -module_param(mmio_address, ulong, 0); -MODULE_PARM_DESC(mmio_address, "Start address of the mapping of 16 kB."); - -static void do_write_test(void __iomem *p) -{ - unsigned int i; - for (i = 0; i < 256; i++) - iowrite8(i, p + i); - for (i = 1024; i < (5 * 1024); i += 2) - iowrite16(i * 12 + 7, p + i); - for (i = (5 * 1024); i < (16 * 1024); i += 4) - iowrite32(i * 212371 + 13, p + i); -} - -static void do_read_test(void __iomem *p) -{ - unsigned int i; - for (i = 0; i < 256; i++) - ioread8(p + i); - for (i = 1024; i < (5 * 1024); i += 2) - ioread16(p + i); - for (i = (5 * 1024); i < (16 * 1024); i += 4) - ioread32(p + i); -} - -static void do_test(void) -{ - void __iomem *p = ioremap_nocache(mmio_address, 0x4000); - if (!p) { - pr_err(MODULE_NAME ": could not ioremap, aborting.\n"); - return; - } - do_write_test(p); - do_read_test(p); - iounmap(p); -} - -static int __init init(void) -{ - if (mmio_address == 0) { - pr_err(MODULE_NAME ": you have to use the module argument " - "mmio_address.\n"); - pr_err(MODULE_NAME ": DO NOT LOAD THIS MODULE UNLESS" - " YOU REALLY KNOW WHAT YOU ARE DOING!\n"); - return -ENXIO; - } - - pr_warning(MODULE_NAME ": WARNING: mapping 16 kB @ 0x%08lx " - "in PCI address space, and writing " - "rubbish in there.\n", mmio_address); - do_test(); - return 0; -} - -static void __exit cleanup(void) -{ - pr_debug(MODULE_NAME ": unloaded.\n"); -} - -module_init(init); -module_exit(cleanup); -MODULE_LICENSE("GPL"); diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index b7b3e4c7cfc9..07dab503c9e3 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -8,6 +8,11 @@ obj-$(CONFIG_X86_PTDUMP) += dump_pagetables.o obj-$(CONFIG_HIGHMEM) += highmem_32.o +obj-$(CONFIG_MMIOTRACE_HOOKS) += kmmio.o +obj-$(CONFIG_MMIOTRACE) += mmiotrace.o +mmiotrace-y := pf_in.o mmio-mod.o +obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o + ifeq ($(CONFIG_X86_32),y) obj-$(CONFIG_NUMA) += discontig_32.o else diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c new file mode 100644 index 000000000000..3ad27b8504a5 --- /dev/null +++ b/arch/x86/mm/kmmio.c @@ -0,0 +1,499 @@ +/* Support for MMIO probes. + * Benfit many code from kprobes + * (C) 2002 Louis Zhuang . + * 2007 Alexander Eichner + * 2008 Pekka Paalanen + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define KMMIO_PAGE_HASH_BITS 4 +#define KMMIO_PAGE_TABLE_SIZE (1 << KMMIO_PAGE_HASH_BITS) + +struct kmmio_fault_page { + struct list_head list; + struct kmmio_fault_page *release_next; + unsigned long page; /* location of the fault page */ + + /* + * Number of times this page has been registered as a part + * of a probe. If zero, page is disarmed and this may be freed. + * Used only by writers (RCU). + */ + int count; +}; + +struct kmmio_delayed_release { + struct rcu_head rcu; + struct kmmio_fault_page *release_list; +}; + +struct kmmio_context { + struct kmmio_fault_page *fpage; + struct kmmio_probe *probe; + unsigned long saved_flags; + unsigned long addr; + int active; +}; + +static DEFINE_SPINLOCK(kmmio_lock); + +/* Protected by kmmio_lock */ +unsigned int kmmio_count; + +/* Read-protected by RCU, write-protected by kmmio_lock. */ +static struct list_head kmmio_page_table[KMMIO_PAGE_TABLE_SIZE]; +static LIST_HEAD(kmmio_probes); + +static struct list_head *kmmio_page_list(unsigned long page) +{ + return &kmmio_page_table[hash_long(page, KMMIO_PAGE_HASH_BITS)]; +} + +/* Accessed per-cpu */ +static DEFINE_PER_CPU(struct kmmio_context, kmmio_ctx); + +/* + * this is basically a dynamic stabbing problem: + * Could use the existing prio tree code or + * Possible better implementations: + * The Interval Skip List: A Data Structure for Finding All Intervals That + * Overlap a Point (might be simple) + * Space Efficient Dynamic Stabbing with Fast Queries - Mikkel Thorup + */ +/* Get the kmmio at this addr (if any). You must be holding RCU read lock. */ +static struct kmmio_probe *get_kmmio_probe(unsigned long addr) +{ + struct kmmio_probe *p; + list_for_each_entry_rcu(p, &kmmio_probes, list) { + if (addr >= p->addr && addr <= (p->addr + p->len)) + return p; + } + return NULL; +} + +/* You must be holding RCU read lock. */ +static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page) +{ + struct list_head *head; + struct kmmio_fault_page *p; + + page &= PAGE_MASK; + head = kmmio_page_list(page); + list_for_each_entry_rcu(p, head, list) { + if (p->page == page) + return p; + } + return NULL; +} + +static void set_page_present(unsigned long addr, bool present, int *pglevel) +{ + pteval_t pteval; + pmdval_t pmdval; + int level; + pmd_t *pmd; + pte_t *pte = lookup_address(addr, &level); + + if (!pte) { + pr_err("kmmio: no pte for page 0x%08lx\n", addr); + return; + } + + if (pglevel) + *pglevel = level; + + switch (level) { + case PG_LEVEL_2M: + pmd = (pmd_t *)pte; + pmdval = pmd_val(*pmd) & ~_PAGE_PRESENT; + if (present) + pmdval |= _PAGE_PRESENT; + set_pmd(pmd, __pmd(pmdval)); + break; + + case PG_LEVEL_4K: + pteval = pte_val(*pte) & ~_PAGE_PRESENT; + if (present) + pteval |= _PAGE_PRESENT; + set_pte_atomic(pte, __pte(pteval)); + break; + + default: + pr_err("kmmio: unexpected page level 0x%x.\n", level); + return; + } + + __flush_tlb_one(addr); +} + +/** Mark the given page as not present. Access to it will trigger a fault. */ +static void arm_kmmio_fault_page(unsigned long page, int *page_level) +{ + set_page_present(page & PAGE_MASK, false, page_level); +} + +/** Mark the given page as present. */ +static void disarm_kmmio_fault_page(unsigned long page, int *page_level) +{ + set_page_present(page & PAGE_MASK, true, page_level); +} + +/* + * This is being called from do_page_fault(). + * + * We may be in an interrupt or a critical section. Also prefecthing may + * trigger a page fault. We may be in the middle of process switch. + * We cannot take any locks, because we could be executing especially + * within a kmmio critical section. + * + * Local interrupts are disabled, so preemption cannot happen. + * Do not enable interrupts, do not sleep, and watch out for other CPUs. + */ +/* + * Interrupts are disabled on entry as trap3 is an interrupt gate + * and they remain disabled thorough out this function. + */ +int kmmio_handler(struct pt_regs *regs, unsigned long addr) +{ + struct kmmio_context *ctx; + struct kmmio_fault_page *faultpage; + int ret = 0; /* default to fault not handled */ + + /* + * Preemption is now disabled to prevent process switch during + * single stepping. We can only handle one active kmmio trace + * per cpu, so ensure that we finish it before something else + * gets to run. We also hold the RCU read lock over single + * stepping to avoid looking up the probe and kmmio_fault_page + * again. + */ + preempt_disable(); + rcu_read_lock(); + + faultpage = get_kmmio_fault_page(addr); + if (!faultpage) { + /* + * Either this page fault is not caused by kmmio, or + * another CPU just pulled the kmmio probe from under + * our feet. The latter case should not be possible. + */ + goto no_kmmio; + } + + ctx = &get_cpu_var(kmmio_ctx); + if (ctx->active) { + disarm_kmmio_fault_page(faultpage->page, NULL); + if (addr == ctx->addr) { + /* + * On SMP we sometimes get recursive probe hits on the + * same address. Context is already saved, fall out. + */ + pr_debug("kmmio: duplicate probe hit on CPU %d, for " + "address 0x%08lx.\n", + smp_processor_id(), addr); + ret = 1; + goto no_kmmio_ctx; + } + /* + * Prevent overwriting already in-flight context. + * This should not happen, let's hope disarming at least + * prevents a panic. + */ + pr_emerg("kmmio: recursive probe hit on CPU %d, " + "for address 0x%08lx. Ignoring.\n", + smp_processor_id(), addr); + pr_emerg("kmmio: previous hit was at 0x%08lx.\n", + ctx->addr); + goto no_kmmio_ctx; + } + ctx->active++; + + ctx->fpage = faultpage; + ctx->probe = get_kmmio_probe(addr); + ctx->saved_flags = (regs->flags & (X86_EFLAGS_TF | X86_EFLAGS_IF)); + ctx->addr = addr; + + if (ctx->probe && ctx->probe->pre_handler) + ctx->probe->pre_handler(ctx->probe, regs, addr); + + /* + * Enable single-stepping and disable interrupts for the faulting + * context. Local interrupts must not get enabled during stepping. + */ + regs->flags |= X86_EFLAGS_TF; + regs->flags &= ~X86_EFLAGS_IF; + + /* Now we set present bit in PTE and single step. */ + disarm_kmmio_fault_page(ctx->fpage->page, NULL); + + /* + * If another cpu accesses the same page while we are stepping, + * the access will not be caught. It will simply succeed and the + * only downside is we lose the event. If this becomes a problem, + * the user should drop to single cpu before tracing. + */ + + put_cpu_var(kmmio_ctx); + return 1; /* fault handled */ + +no_kmmio_ctx: + put_cpu_var(kmmio_ctx); +no_kmmio: + rcu_read_unlock(); + preempt_enable_no_resched(); + return ret; +} + +/* + * Interrupts are disabled on entry as trap1 is an interrupt gate + * and they remain disabled thorough out this function. + * This must always get called as the pair to kmmio_handler(). + */ +static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) +{ + int ret = 0; + struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx); + + if (!ctx->active) { + pr_debug("kmmio: spurious debug trap on CPU %d.\n", + smp_processor_id()); + goto out; + } + + if (ctx->probe && ctx->probe->post_handler) + ctx->probe->post_handler(ctx->probe, condition, regs); + + arm_kmmio_fault_page(ctx->fpage->page, NULL); + + regs->flags &= ~X86_EFLAGS_TF; + regs->flags |= ctx->saved_flags; + + /* These were acquired in kmmio_handler(). */ + ctx->active--; + BUG_ON(ctx->active); + rcu_read_unlock(); + preempt_enable_no_resched(); + + /* + * if somebody else is singlestepping across a probe point, flags + * will have TF set, in which case, continue the remaining processing + * of do_debug, as if this is not a probe hit. + */ + if (!(regs->flags & X86_EFLAGS_TF)) + ret = 1; +out: + put_cpu_var(kmmio_ctx); + return ret; +} + +/* You must be holding kmmio_lock. */ +static int add_kmmio_fault_page(unsigned long page) +{ + struct kmmio_fault_page *f; + + page &= PAGE_MASK; + f = get_kmmio_fault_page(page); + if (f) { + if (!f->count) + arm_kmmio_fault_page(f->page, NULL); + f->count++; + return 0; + } + + f = kmalloc(sizeof(*f), GFP_ATOMIC); + if (!f) + return -1; + + f->count = 1; + f->page = page; + list_add_rcu(&f->list, kmmio_page_list(f->page)); + + arm_kmmio_fault_page(f->page, NULL); + + return 0; +} + +/* You must be holding kmmio_lock. */ +static void release_kmmio_fault_page(unsigned long page, + struct kmmio_fault_page **release_list) +{ + struct kmmio_fault_page *f; + + page &= PAGE_MASK; + f = get_kmmio_fault_page(page); + if (!f) + return; + + f->count--; + BUG_ON(f->count < 0); + if (!f->count) { + disarm_kmmio_fault_page(f->page, NULL); + f->release_next = *release_list; + *release_list = f; + } +} + +int register_kmmio_probe(struct kmmio_probe *p) +{ + unsigned long flags; + int ret = 0; + unsigned long size = 0; + + spin_lock_irqsave(&kmmio_lock, flags); + if (get_kmmio_probe(p->addr)) { + ret = -EEXIST; + goto out; + } + kmmio_count++; + list_add_rcu(&p->list, &kmmio_probes); + while (size < p->len) { + if (add_kmmio_fault_page(p->addr + size)) + pr_err("kmmio: Unable to set page fault.\n"); + size += PAGE_SIZE; + } +out: + spin_unlock_irqrestore(&kmmio_lock, flags); + /* + * XXX: What should I do here? + * Here was a call to global_flush_tlb(), but it does not exist + * anymore. It seems it's not needed after all. + */ + return ret; +} +EXPORT_SYMBOL(register_kmmio_probe); + +static void rcu_free_kmmio_fault_pages(struct rcu_head *head) +{ + struct kmmio_delayed_release *dr = container_of( + head, + struct kmmio_delayed_release, + rcu); + struct kmmio_fault_page *p = dr->release_list; + while (p) { + struct kmmio_fault_page *next = p->release_next; + BUG_ON(p->count); + kfree(p); + p = next; + } + kfree(dr); +} + +static void remove_kmmio_fault_pages(struct rcu_head *head) +{ + struct kmmio_delayed_release *dr = container_of( + head, + struct kmmio_delayed_release, + rcu); + struct kmmio_fault_page *p = dr->release_list; + struct kmmio_fault_page **prevp = &dr->release_list; + unsigned long flags; + spin_lock_irqsave(&kmmio_lock, flags); + while (p) { + if (!p->count) + list_del_rcu(&p->list); + else + *prevp = p->release_next; + prevp = &p->release_next; + p = p->release_next; + } + spin_unlock_irqrestore(&kmmio_lock, flags); + /* This is the real RCU destroy call. */ + call_rcu(&dr->rcu, rcu_free_kmmio_fault_pages); +} + +/* + * Remove a kmmio probe. You have to synchronize_rcu() before you can be + * sure that the callbacks will not be called anymore. Only after that + * you may actually release your struct kmmio_probe. + * + * Unregistering a kmmio fault page has three steps: + * 1. release_kmmio_fault_page() + * Disarm the page, wait a grace period to let all faults finish. + * 2. remove_kmmio_fault_pages() + * Remove the pages from kmmio_page_table. + * 3. rcu_free_kmmio_fault_pages() + * Actally free the kmmio_fault_page structs as with RCU. + */ +void unregister_kmmio_probe(struct kmmio_probe *p) +{ + unsigned long flags; + unsigned long size = 0; + struct kmmio_fault_page *release_list = NULL; + struct kmmio_delayed_release *drelease; + + spin_lock_irqsave(&kmmio_lock, flags); + while (size < p->len) { + release_kmmio_fault_page(p->addr + size, &release_list); + size += PAGE_SIZE; + } + list_del_rcu(&p->list); + kmmio_count--; + spin_unlock_irqrestore(&kmmio_lock, flags); + + drelease = kmalloc(sizeof(*drelease), GFP_ATOMIC); + if (!drelease) { + pr_crit("kmmio: leaking kmmio_fault_page objects.\n"); + return; + } + drelease->release_list = release_list; + + /* + * This is not really RCU here. We have just disarmed a set of + * pages so that they cannot trigger page faults anymore. However, + * we cannot remove the pages from kmmio_page_table, + * because a probe hit might be in flight on another CPU. The + * pages are collected into a list, and they will be removed from + * kmmio_page_table when it is certain that no probe hit related to + * these pages can be in flight. RCU grace period sounds like a + * good choice. + * + * If we removed the pages too early, kmmio page fault handler might + * not find the respective kmmio_fault_page and determine it's not + * a kmmio fault, when it actually is. This would lead to madness. + */ + call_rcu(&drelease->rcu, remove_kmmio_fault_pages); +} +EXPORT_SYMBOL(unregister_kmmio_probe); + +static int kmmio_die_notifier(struct notifier_block *nb, unsigned long val, + void *args) +{ + struct die_args *arg = args; + + if (val == DIE_DEBUG && (arg->err & DR_STEP)) + if (post_kmmio_handler(arg->err, arg->regs) == 1) + return NOTIFY_STOP; + + return NOTIFY_DONE; +} + +static struct notifier_block nb_die = { + .notifier_call = kmmio_die_notifier +}; + +static int __init init_kmmio(void) +{ + int i; + for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++) + INIT_LIST_HEAD(&kmmio_page_table[i]); + return register_die_notifier(&nb_die); +} +fs_initcall(init_kmmio); /* should be before device_initcall() */ diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c new file mode 100644 index 000000000000..8256546d49bf --- /dev/null +++ b/arch/x86/mm/mmio-mod.c @@ -0,0 +1,457 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2005 + * Jeff Muizelaar, 2006, 2007 + * Pekka Paalanen, 2008 + * + * Derived from the read-mod example from relay-examples by Tom Zanussi. + */ +#define DEBUG 1 + +#include +#include +#include +#include +#include +#include +#include +#include +#include /* for ISA_START_ADDRESS */ +#include +#include + +#include "pf_in.h" + +#define NAME "mmiotrace: " + +struct trap_reason { + unsigned long addr; + unsigned long ip; + enum reason_type type; + int active_traces; +}; + +struct remap_trace { + struct list_head list; + struct kmmio_probe probe; + unsigned long phys; + unsigned long id; +}; + +/* Accessed per-cpu. */ +static DEFINE_PER_CPU(struct trap_reason, pf_reason); +static DEFINE_PER_CPU(struct mmiotrace_rw, cpu_trace); + +#if 0 /* XXX: no way gather this info anymore */ +/* Access to this is not per-cpu. */ +static DEFINE_PER_CPU(atomic_t, dropped); +#endif + +static struct dentry *marker_file; + +static DEFINE_MUTEX(mmiotrace_mutex); +static DEFINE_SPINLOCK(trace_lock); +static atomic_t mmiotrace_enabled; +static LIST_HEAD(trace_list); /* struct remap_trace */ + +/* + * Locking in this file: + * - mmiotrace_mutex enforces enable/disable_mmiotrace() critical sections. + * - mmiotrace_enabled may be modified only when holding mmiotrace_mutex + * and trace_lock. + * - Routines depending on is_enabled() must take trace_lock. + * - trace_list users must hold trace_lock. + * - is_enabled() guarantees that mmio_trace_record is allowed. + * - pre/post callbacks assume the effect of is_enabled() being true. + */ + +/* module parameters */ +static unsigned long filter_offset; +static int nommiotrace; +static int ISA_trace; +static int trace_pc; + +module_param(filter_offset, ulong, 0); +module_param(nommiotrace, bool, 0); +module_param(ISA_trace, bool, 0); +module_param(trace_pc, bool, 0); + +MODULE_PARM_DESC(filter_offset, "Start address of traced mappings."); +MODULE_PARM_DESC(nommiotrace, "Disable actual MMIO tracing."); +MODULE_PARM_DESC(ISA_trace, "Do not exclude the low ISA range."); +MODULE_PARM_DESC(trace_pc, "Record address of faulting instructions."); + +static bool is_enabled(void) +{ + return atomic_read(&mmiotrace_enabled); +} + +#if 0 /* XXX: needs rewrite */ +/* + * Write callback for the debugfs entry: + * Read a marker and write it to the mmio trace log + */ +static ssize_t write_marker(struct file *file, const char __user *buffer, + size_t count, loff_t *ppos) +{ + char *event = NULL; + struct mm_io_header *headp; + ssize_t len = (count > 65535) ? 65535 : count; + + event = kzalloc(sizeof(*headp) + len, GFP_KERNEL); + if (!event) + return -ENOMEM; + + headp = (struct mm_io_header *)event; + headp->type = MMIO_MAGIC | (MMIO_MARKER << MMIO_OPCODE_SHIFT); + headp->data_len = len; + + if (copy_from_user(event + sizeof(*headp), buffer, len)) { + kfree(event); + return -EFAULT; + } + + spin_lock_irq(&trace_lock); +#if 0 /* XXX: convert this to use tracing */ + if (is_enabled()) + relay_write(chan, event, sizeof(*headp) + len); + else +#endif + len = -EINVAL; + spin_unlock_irq(&trace_lock); + kfree(event); + return len; +} +#endif + +static void print_pte(unsigned long address) +{ + int level; + pte_t *pte = lookup_address(address, &level); + + if (!pte) { + pr_err(NAME "Error in %s: no pte for page 0x%08lx\n", + __func__, address); + return; + } + + if (level == PG_LEVEL_2M) { + pr_emerg(NAME "4MB pages are not currently supported: " + "0x%08lx\n", address); + BUG(); + } + pr_info(NAME "pte for 0x%lx: 0x%lx 0x%lx\n", address, pte_val(*pte), + pte_val(*pte) & _PAGE_PRESENT); +} + +/* + * For some reason the pre/post pairs have been called in an + * unmatched order. Report and die. + */ +static void die_kmmio_nesting_error(struct pt_regs *regs, unsigned long addr) +{ + const struct trap_reason *my_reason = &get_cpu_var(pf_reason); + pr_emerg(NAME "unexpected fault for address: 0x%08lx, " + "last fault for address: 0x%08lx\n", + addr, my_reason->addr); + print_pte(addr); + print_symbol(KERN_EMERG "faulting IP is at %s\n", regs->ip); + print_symbol(KERN_EMERG "last faulting IP was at %s\n", my_reason->ip); +#ifdef __i386__ + pr_emerg("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n", + regs->ax, regs->bx, regs->cx, regs->dx); + pr_emerg("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", + regs->si, regs->di, regs->bp, regs->sp); +#else + pr_emerg("rax: %016lx rcx: %016lx rdx: %016lx\n", + regs->ax, regs->cx, regs->dx); + pr_emerg("rsi: %016lx rdi: %016lx rbp: %016lx rsp: %016lx\n", + regs->si, regs->di, regs->bp, regs->sp); +#endif + put_cpu_var(pf_reason); + BUG(); +} + +static void pre(struct kmmio_probe *p, struct pt_regs *regs, + unsigned long addr) +{ + struct trap_reason *my_reason = &get_cpu_var(pf_reason); + struct mmiotrace_rw *my_trace = &get_cpu_var(cpu_trace); + const unsigned long instptr = instruction_pointer(regs); + const enum reason_type type = get_ins_type(instptr); + struct remap_trace *trace = p->user_data; + + /* it doesn't make sense to have more than one active trace per cpu */ + if (my_reason->active_traces) + die_kmmio_nesting_error(regs, addr); + else + my_reason->active_traces++; + + my_reason->type = type; + my_reason->addr = addr; + my_reason->ip = instptr; + + my_trace->phys = addr - trace->probe.addr + trace->phys; + my_trace->map_id = trace->id; + + /* + * Only record the program counter when requested. + * It may taint clean-room reverse engineering. + */ + if (trace_pc) + my_trace->pc = instptr; + else + my_trace->pc = 0; + + /* + * XXX: the timestamp recorded will be *after* the tracing has been + * done, not at the time we hit the instruction. SMP implications + * on event ordering? + */ + + switch (type) { + case REG_READ: + my_trace->opcode = MMIO_READ; + my_trace->width = get_ins_mem_width(instptr); + break; + case REG_WRITE: + my_trace->opcode = MMIO_WRITE; + my_trace->width = get_ins_mem_width(instptr); + my_trace->value = get_ins_reg_val(instptr, regs); + break; + case IMM_WRITE: + my_trace->opcode = MMIO_WRITE; + my_trace->width = get_ins_mem_width(instptr); + my_trace->value = get_ins_imm_val(instptr); + break; + default: + { + unsigned char *ip = (unsigned char *)instptr; + my_trace->opcode = MMIO_UNKNOWN_OP; + my_trace->width = 0; + my_trace->value = (*ip) << 16 | *(ip + 1) << 8 | + *(ip + 2); + } + } + put_cpu_var(cpu_trace); + put_cpu_var(pf_reason); +} + +static void post(struct kmmio_probe *p, unsigned long condition, + struct pt_regs *regs) +{ + struct trap_reason *my_reason = &get_cpu_var(pf_reason); + struct mmiotrace_rw *my_trace = &get_cpu_var(cpu_trace); + + /* this should always return the active_trace count to 0 */ + my_reason->active_traces--; + if (my_reason->active_traces) { + pr_emerg(NAME "unexpected post handler"); + BUG(); + } + + switch (my_reason->type) { + case REG_READ: + my_trace->value = get_ins_reg_val(my_reason->ip, regs); + break; + default: + break; + } + + mmio_trace_rw(my_trace); + put_cpu_var(cpu_trace); + put_cpu_var(pf_reason); +} + +static void ioremap_trace_core(unsigned long offset, unsigned long size, + void __iomem *addr) +{ + static atomic_t next_id; + struct remap_trace *trace = kmalloc(sizeof(*trace), GFP_KERNEL); + struct mmiotrace_map map = { + .phys = offset, + .virt = (unsigned long)addr, + .len = size, + .opcode = MMIO_PROBE + }; + + if (!trace) { + pr_err(NAME "kmalloc failed in ioremap\n"); + return; + } + + *trace = (struct remap_trace) { + .probe = { + .addr = (unsigned long)addr, + .len = size, + .pre_handler = pre, + .post_handler = post, + .user_data = trace + }, + .phys = offset, + .id = atomic_inc_return(&next_id) + }; + map.map_id = trace->id; + + spin_lock_irq(&trace_lock); + if (!is_enabled()) + goto not_enabled; + + mmio_trace_mapping(&map); + list_add_tail(&trace->list, &trace_list); + if (!nommiotrace) + register_kmmio_probe(&trace->probe); + +not_enabled: + spin_unlock_irq(&trace_lock); +} + +void +mmiotrace_ioremap(unsigned long offset, unsigned long size, void __iomem *addr) +{ + if (!is_enabled()) /* recheck and proper locking in *_core() */ + return; + + pr_debug(NAME "ioremap_*(0x%lx, 0x%lx) = %p\n", offset, size, addr); + if ((filter_offset) && (offset != filter_offset)) + return; + ioremap_trace_core(offset, size, addr); +} + +static void iounmap_trace_core(volatile void __iomem *addr) +{ + struct mmiotrace_map map = { + .phys = 0, + .virt = (unsigned long)addr, + .len = 0, + .opcode = MMIO_UNPROBE + }; + struct remap_trace *trace; + struct remap_trace *tmp; + struct remap_trace *found_trace = NULL; + + pr_debug(NAME "Unmapping %p.\n", addr); + + spin_lock_irq(&trace_lock); + if (!is_enabled()) + goto not_enabled; + + list_for_each_entry_safe(trace, tmp, &trace_list, list) { + if ((unsigned long)addr == trace->probe.addr) { + if (!nommiotrace) + unregister_kmmio_probe(&trace->probe); + list_del(&trace->list); + found_trace = trace; + break; + } + } + map.map_id = (found_trace) ? found_trace->id : -1; + mmio_trace_mapping(&map); + +not_enabled: + spin_unlock_irq(&trace_lock); + if (found_trace) { + synchronize_rcu(); /* unregister_kmmio_probe() requirement */ + kfree(found_trace); + } +} + +void mmiotrace_iounmap(volatile void __iomem *addr) +{ + might_sleep(); + if (is_enabled()) /* recheck and proper locking in *_core() */ + iounmap_trace_core(addr); +} + +static void clear_trace_list(void) +{ + struct remap_trace *trace; + struct remap_trace *tmp; + + /* + * No locking required, because the caller ensures we are in a + * critical section via mutex, and is_enabled() is false, + * i.e. nothing can traverse or modify this list. + * Caller also ensures is_enabled() cannot change. + */ + list_for_each_entry(trace, &trace_list, list) { + pr_notice(NAME "purging non-iounmapped " + "trace @0x%08lx, size 0x%lx.\n", + trace->probe.addr, trace->probe.len); + if (!nommiotrace) + unregister_kmmio_probe(&trace->probe); + } + synchronize_rcu(); /* unregister_kmmio_probe() requirement */ + + list_for_each_entry_safe(trace, tmp, &trace_list, list) { + list_del(&trace->list); + kfree(trace); + } +} + +#if 0 /* XXX: out of order */ +static struct file_operations fops_marker = { + .owner = THIS_MODULE, + .write = write_marker +}; +#endif + +void enable_mmiotrace(void) +{ + mutex_lock(&mmiotrace_mutex); + if (is_enabled()) + goto out; + +#if 0 /* XXX: tracing does not support text entries */ + marker_file = debugfs_create_file("marker", 0660, dir, NULL, + &fops_marker); + if (!marker_file) + pr_err(NAME "marker file creation failed.\n"); +#endif + + if (nommiotrace) + pr_info(NAME "MMIO tracing disabled.\n"); + if (ISA_trace) + pr_warning(NAME "Warning! low ISA range will be traced.\n"); + spin_lock_irq(&trace_lock); + atomic_inc(&mmiotrace_enabled); + spin_unlock_irq(&trace_lock); + pr_info(NAME "enabled.\n"); +out: + mutex_unlock(&mmiotrace_mutex); +} + +void disable_mmiotrace(void) +{ + mutex_lock(&mmiotrace_mutex); + if (!is_enabled()) + goto out; + + spin_lock_irq(&trace_lock); + atomic_dec(&mmiotrace_enabled); + BUG_ON(is_enabled()); + spin_unlock_irq(&trace_lock); + + clear_trace_list(); /* guarantees: no more kmmio callbacks */ + if (marker_file) { + debugfs_remove(marker_file); + marker_file = NULL; + } + + pr_info(NAME "disabled.\n"); +out: + mutex_unlock(&mmiotrace_mutex); +} diff --git a/arch/x86/mm/pf_in.c b/arch/x86/mm/pf_in.c new file mode 100644 index 000000000000..efa1911e20ca --- /dev/null +++ b/arch/x86/mm/pf_in.c @@ -0,0 +1,489 @@ +/* + * Fault Injection Test harness (FI) + * Copyright (C) Intel Crop. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, + * USA. + * + */ + +/* Id: pf_in.c,v 1.1.1.1 2002/11/12 05:56:32 brlock Exp + * Copyright by Intel Crop., 2002 + * Louis Zhuang (louis.zhuang@intel.com) + * + * Bjorn Steinbrink (B.Steinbrink@gmx.de), 2007 + */ + +#include +#include /* struct pt_regs */ +#include "pf_in.h" + +#ifdef __i386__ +/* IA32 Manual 3, 2-1 */ +static unsigned char prefix_codes[] = { + 0xF0, 0xF2, 0xF3, 0x2E, 0x36, 0x3E, 0x26, 0x64, + 0x65, 0x2E, 0x3E, 0x66, 0x67 +}; +/* IA32 Manual 3, 3-432*/ +static unsigned int reg_rop[] = { + 0x8A, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F +}; +static unsigned int reg_wop[] = { 0x88, 0x89 }; +static unsigned int imm_wop[] = { 0xC6, 0xC7 }; +/* IA32 Manual 3, 3-432*/ +static unsigned int rw8[] = { 0x88, 0x8A, 0xC6 }; +static unsigned int rw32[] = { + 0x89, 0x8B, 0xC7, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F +}; +static unsigned int mw8[] = { 0x88, 0x8A, 0xC6, 0xB60F, 0xBE0F }; +static unsigned int mw16[] = { 0xB70F, 0xBF0F }; +static unsigned int mw32[] = { 0x89, 0x8B, 0xC7 }; +static unsigned int mw64[] = {}; +#else /* not __i386__ */ +static unsigned char prefix_codes[] = { + 0x66, 0x67, 0x2E, 0x3E, 0x26, 0x64, 0x65, 0x36, + 0xF0, 0xF3, 0xF2, + /* REX Prefixes */ + 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, + 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f +}; +/* AMD64 Manual 3, Appendix A*/ +static unsigned int reg_rop[] = { + 0x8A, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F +}; +static unsigned int reg_wop[] = { 0x88, 0x89 }; +static unsigned int imm_wop[] = { 0xC6, 0xC7 }; +static unsigned int rw8[] = { 0xC6, 0x88, 0x8A }; +static unsigned int rw32[] = { + 0xC7, 0x89, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F +}; +/* 8 bit only */ +static unsigned int mw8[] = { 0xC6, 0x88, 0x8A, 0xB60F, 0xBE0F }; +/* 16 bit only */ +static unsigned int mw16[] = { 0xB70F, 0xBF0F }; +/* 16 or 32 bit */ +static unsigned int mw32[] = { 0xC7 }; +/* 16, 32 or 64 bit */ +static unsigned int mw64[] = { 0x89, 0x8B }; +#endif /* not __i386__ */ + +static int skip_prefix(unsigned char *addr, int *shorted, int *enlarged, + int *rexr) +{ + int i; + unsigned char *p = addr; + *shorted = 0; + *enlarged = 0; + *rexr = 0; + +restart: + for (i = 0; i < ARRAY_SIZE(prefix_codes); i++) { + if (*p == prefix_codes[i]) { + if (*p == 0x66) + *shorted = 1; +#ifdef __amd64__ + if ((*p & 0xf8) == 0x48) + *enlarged = 1; + if ((*p & 0xf4) == 0x44) + *rexr = 1; +#endif + p++; + goto restart; + } + } + + return (p - addr); +} + +static int get_opcode(unsigned char *addr, unsigned int *opcode) +{ + int len; + + if (*addr == 0x0F) { + /* 0x0F is extension instruction */ + *opcode = *(unsigned short *)addr; + len = 2; + } else { + *opcode = *addr; + len = 1; + } + + return len; +} + +#define CHECK_OP_TYPE(opcode, array, type) \ + for (i = 0; i < ARRAY_SIZE(array); i++) { \ + if (array[i] == opcode) { \ + rv = type; \ + goto exit; \ + } \ + } + +enum reason_type get_ins_type(unsigned long ins_addr) +{ + unsigned int opcode; + unsigned char *p; + int shorted, enlarged, rexr; + int i; + enum reason_type rv = OTHERS; + + p = (unsigned char *)ins_addr; + p += skip_prefix(p, &shorted, &enlarged, &rexr); + p += get_opcode(p, &opcode); + + CHECK_OP_TYPE(opcode, reg_rop, REG_READ); + CHECK_OP_TYPE(opcode, reg_wop, REG_WRITE); + CHECK_OP_TYPE(opcode, imm_wop, IMM_WRITE); + +exit: + return rv; +} +#undef CHECK_OP_TYPE + +static unsigned int get_ins_reg_width(unsigned long ins_addr) +{ + unsigned int opcode; + unsigned char *p; + int i, shorted, enlarged, rexr; + + p = (unsigned char *)ins_addr; + p += skip_prefix(p, &shorted, &enlarged, &rexr); + p += get_opcode(p, &opcode); + + for (i = 0; i < ARRAY_SIZE(rw8); i++) + if (rw8[i] == opcode) + return 1; + + for (i = 0; i < ARRAY_SIZE(rw32); i++) + if (rw32[i] == opcode) + return (shorted ? 2 : (enlarged ? 8 : 4)); + + printk(KERN_ERR "mmiotrace: Unknown opcode 0x%02x\n", opcode); + return 0; +} + +unsigned int get_ins_mem_width(unsigned long ins_addr) +{ + unsigned int opcode; + unsigned char *p; + int i, shorted, enlarged, rexr; + + p = (unsigned char *)ins_addr; + p += skip_prefix(p, &shorted, &enlarged, &rexr); + p += get_opcode(p, &opcode); + + for (i = 0; i < ARRAY_SIZE(mw8); i++) + if (mw8[i] == opcode) + return 1; + + for (i = 0; i < ARRAY_SIZE(mw16); i++) + if (mw16[i] == opcode) + return 2; + + for (i = 0; i < ARRAY_SIZE(mw32); i++) + if (mw32[i] == opcode) + return shorted ? 2 : 4; + + for (i = 0; i < ARRAY_SIZE(mw64); i++) + if (mw64[i] == opcode) + return shorted ? 2 : (enlarged ? 8 : 4); + + printk(KERN_ERR "mmiotrace: Unknown opcode 0x%02x\n", opcode); + return 0; +} + +/* + * Define register ident in mod/rm byte. + * Note: these are NOT the same as in ptrace-abi.h. + */ +enum { + arg_AL = 0, + arg_CL = 1, + arg_DL = 2, + arg_BL = 3, + arg_AH = 4, + arg_CH = 5, + arg_DH = 6, + arg_BH = 7, + + arg_AX = 0, + arg_CX = 1, + arg_DX = 2, + arg_BX = 3, + arg_SP = 4, + arg_BP = 5, + arg_SI = 6, + arg_DI = 7, +#ifdef __amd64__ + arg_R8 = 8, + arg_R9 = 9, + arg_R10 = 10, + arg_R11 = 11, + arg_R12 = 12, + arg_R13 = 13, + arg_R14 = 14, + arg_R15 = 15 +#endif +}; + +static unsigned char *get_reg_w8(int no, struct pt_regs *regs) +{ + unsigned char *rv = NULL; + + switch (no) { + case arg_AL: + rv = (unsigned char *)®s->ax; + break; + case arg_BL: + rv = (unsigned char *)®s->bx; + break; + case arg_CL: + rv = (unsigned char *)®s->cx; + break; + case arg_DL: + rv = (unsigned char *)®s->dx; + break; + case arg_AH: + rv = 1 + (unsigned char *)®s->ax; + break; + case arg_BH: + rv = 1 + (unsigned char *)®s->bx; + break; + case arg_CH: + rv = 1 + (unsigned char *)®s->cx; + break; + case arg_DH: + rv = 1 + (unsigned char *)®s->dx; + break; +#ifdef __amd64__ + case arg_R8: + rv = (unsigned char *)®s->r8; + break; + case arg_R9: + rv = (unsigned char *)®s->r9; + break; + case arg_R10: + rv = (unsigned char *)®s->r10; + break; + case arg_R11: + rv = (unsigned char *)®s->r11; + break; + case arg_R12: + rv = (unsigned char *)®s->r12; + break; + case arg_R13: + rv = (unsigned char *)®s->r13; + break; + case arg_R14: + rv = (unsigned char *)®s->r14; + break; + case arg_R15: + rv = (unsigned char *)®s->r15; + break; +#endif + default: + printk(KERN_ERR "mmiotrace: Error reg no# %d\n", no); + break; + } + return rv; +} + +static unsigned long *get_reg_w32(int no, struct pt_regs *regs) +{ + unsigned long *rv = NULL; + + switch (no) { + case arg_AX: + rv = ®s->ax; + break; + case arg_BX: + rv = ®s->bx; + break; + case arg_CX: + rv = ®s->cx; + break; + case arg_DX: + rv = ®s->dx; + break; + case arg_SP: + rv = ®s->sp; + break; + case arg_BP: + rv = ®s->bp; + break; + case arg_SI: + rv = ®s->si; + break; + case arg_DI: + rv = ®s->di; + break; +#ifdef __amd64__ + case arg_R8: + rv = ®s->r8; + break; + case arg_R9: + rv = ®s->r9; + break; + case arg_R10: + rv = ®s->r10; + break; + case arg_R11: + rv = ®s->r11; + break; + case arg_R12: + rv = ®s->r12; + break; + case arg_R13: + rv = ®s->r13; + break; + case arg_R14: + rv = ®s->r14; + break; + case arg_R15: + rv = ®s->r15; + break; +#endif + default: + printk(KERN_ERR "mmiotrace: Error reg no# %d\n", no); + } + + return rv; +} + +unsigned long get_ins_reg_val(unsigned long ins_addr, struct pt_regs *regs) +{ + unsigned int opcode; + unsigned char mod_rm; + int reg; + unsigned char *p; + int i, shorted, enlarged, rexr; + unsigned long rv; + + p = (unsigned char *)ins_addr; + p += skip_prefix(p, &shorted, &enlarged, &rexr); + p += get_opcode(p, &opcode); + for (i = 0; i < ARRAY_SIZE(reg_rop); i++) + if (reg_rop[i] == opcode) { + rv = REG_READ; + goto do_work; + } + + for (i = 0; i < ARRAY_SIZE(reg_wop); i++) + if (reg_wop[i] == opcode) { + rv = REG_WRITE; + goto do_work; + } + + printk(KERN_ERR "mmiotrace: Not a register instruction, opcode " + "0x%02x\n", opcode); + goto err; + +do_work: + mod_rm = *p; + reg = ((mod_rm >> 3) & 0x7) | (rexr << 3); + switch (get_ins_reg_width(ins_addr)) { + case 1: + return *get_reg_w8(reg, regs); + + case 2: + return *(unsigned short *)get_reg_w32(reg, regs); + + case 4: + return *(unsigned int *)get_reg_w32(reg, regs); + +#ifdef __amd64__ + case 8: + return *(unsigned long *)get_reg_w32(reg, regs); +#endif + + default: + printk(KERN_ERR "mmiotrace: Error width# %d\n", reg); + } + +err: + return 0; +} + +unsigned long get_ins_imm_val(unsigned long ins_addr) +{ + unsigned int opcode; + unsigned char mod_rm; + unsigned char mod; + unsigned char *p; + int i, shorted, enlarged, rexr; + unsigned long rv; + + p = (unsigned char *)ins_addr; + p += skip_prefix(p, &shorted, &enlarged, &rexr); + p += get_opcode(p, &opcode); + for (i = 0; i < ARRAY_SIZE(imm_wop); i++) + if (imm_wop[i] == opcode) { + rv = IMM_WRITE; + goto do_work; + } + + printk(KERN_ERR "mmiotrace: Not an immediate instruction, opcode " + "0x%02x\n", opcode); + goto err; + +do_work: + mod_rm = *p; + mod = mod_rm >> 6; + p++; + switch (mod) { + case 0: + /* if r/m is 5 we have a 32 disp (IA32 Manual 3, Table 2-2) */ + /* AMD64: XXX Check for address size prefix? */ + if ((mod_rm & 0x7) == 0x5) + p += 4; + break; + + case 1: + p += 1; + break; + + case 2: + p += 4; + break; + + case 3: + default: + printk(KERN_ERR "mmiotrace: not a memory access instruction " + "at 0x%lx, rm_mod=0x%02x\n", + ins_addr, mod_rm); + } + + switch (get_ins_reg_width(ins_addr)) { + case 1: + return *(unsigned char *)p; + + case 2: + return *(unsigned short *)p; + + case 4: + return *(unsigned int *)p; + +#ifdef __amd64__ + case 8: + return *(unsigned long *)p; +#endif + + default: + printk(KERN_ERR "mmiotrace: Error: width.\n"); + } + +err: + return 0; +} diff --git a/arch/x86/mm/pf_in.h b/arch/x86/mm/pf_in.h new file mode 100644 index 000000000000..e05341a51a27 --- /dev/null +++ b/arch/x86/mm/pf_in.h @@ -0,0 +1,39 @@ +/* + * Fault Injection Test harness (FI) + * Copyright (C) Intel Crop. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, + * USA. + * + */ + +#ifndef __PF_H_ +#define __PF_H_ + +enum reason_type { + NOT_ME, /* page fault is not in regions */ + NOTHING, /* access others point in regions */ + REG_READ, /* read from addr to reg */ + REG_WRITE, /* write from reg to addr */ + IMM_WRITE, /* write from imm to addr */ + OTHERS /* Other instructions can not intercept */ +}; + +enum reason_type get_ins_type(unsigned long ins_addr); +unsigned int get_ins_mem_width(unsigned long ins_addr); +unsigned long get_ins_reg_val(unsigned long ins_addr, struct pt_regs *regs); +unsigned long get_ins_imm_val(unsigned long ins_addr); + +#endif /* __PF_H_ */ diff --git a/arch/x86/mm/testmmiotrace.c b/arch/x86/mm/testmmiotrace.c new file mode 100644 index 000000000000..cfa60b227c8d --- /dev/null +++ b/arch/x86/mm/testmmiotrace.c @@ -0,0 +1,71 @@ +/* + * Written by Pekka Paalanen, 2008 + */ +#include +#include + +#define MODULE_NAME "testmmiotrace" + +static unsigned long mmio_address; +module_param(mmio_address, ulong, 0); +MODULE_PARM_DESC(mmio_address, "Start address of the mapping of 16 kB."); + +static void do_write_test(void __iomem *p) +{ + unsigned int i; + for (i = 0; i < 256; i++) + iowrite8(i, p + i); + for (i = 1024; i < (5 * 1024); i += 2) + iowrite16(i * 12 + 7, p + i); + for (i = (5 * 1024); i < (16 * 1024); i += 4) + iowrite32(i * 212371 + 13, p + i); +} + +static void do_read_test(void __iomem *p) +{ + unsigned int i; + for (i = 0; i < 256; i++) + ioread8(p + i); + for (i = 1024; i < (5 * 1024); i += 2) + ioread16(p + i); + for (i = (5 * 1024); i < (16 * 1024); i += 4) + ioread32(p + i); +} + +static void do_test(void) +{ + void __iomem *p = ioremap_nocache(mmio_address, 0x4000); + if (!p) { + pr_err(MODULE_NAME ": could not ioremap, aborting.\n"); + return; + } + do_write_test(p); + do_read_test(p); + iounmap(p); +} + +static int __init init(void) +{ + if (mmio_address == 0) { + pr_err(MODULE_NAME ": you have to use the module argument " + "mmio_address.\n"); + pr_err(MODULE_NAME ": DO NOT LOAD THIS MODULE UNLESS" + " YOU REALLY KNOW WHAT YOU ARE DOING!\n"); + return -ENXIO; + } + + pr_warning(MODULE_NAME ": WARNING: mapping 16 kB @ 0x%08lx " + "in PCI address space, and writing " + "rubbish in there.\n", mmio_address); + do_test(); + return 0; +} + +static void __exit cleanup(void) +{ + pr_debug(MODULE_NAME ": unloaded.\n"); +} + +module_init(init); +module_exit(cleanup); +MODULE_LICENSE("GPL"); -- cgit v1.2.3