From a9322f6488b432ddc1e89be88242c827c633fb63 Mon Sep 17 00:00:00 2001 From: Stefan Assmann Date: Wed, 11 Jun 2008 16:35:14 +0200 Subject: x86, pci: introduce pci=noioapicquirk kernel cmdline option Introduce pci=noioapicquirk kernel cmdline option to disable all boot interrupt quirks Signed-off-by: Stefan Assmann Signed-off-by: Olaf Dabrunz Signed-off-by: Ingo Molnar --- Documentation/kernel-parameters.txt | 3 +++ 1 file changed, 3 insertions(+) (limited to 'Documentation/kernel-parameters.txt') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 795c487af8e4..1aebe9dffbaa 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1518,6 +1518,9 @@ and is between 256 and 4096 characters. It is defined in the file nomsi [MSI] If the PCI_MSI kernel config parameter is enabled, this kernel boot option can be used to disable the use of MSI interrupts system-wide. + noioapicquirk [APIC] Disable all boot interrupt quirks. + Safety option to keep boot IRQs enabled. This + should never be necessary. biosirq [X86-32] Use PCI BIOS calls to get the interrupt routing table. These calls are known to be buggy on several machines and they hang the machine -- cgit v1.2.3 From 9197979b518573999d52d9e85bce1680682ed85c Mon Sep 17 00:00:00 2001 From: Stefan Assmann Date: Wed, 11 Jun 2008 16:35:15 +0200 Subject: x86, pci: introduce pci=ioapicreroute kernel cmdline option Introduce pci=ioapicreroute kernel cmdline option to enable rerouting of boot interrupts to the primary io-apic. Signed-off-by: Stefan Assmann Signed-off-by: Olaf Dabrunz Signed-off-by: Ingo Molnar --- Documentation/kernel-parameters.txt | 4 ++++ arch/x86/pci/common.c | 5 +++++ include/asm-x86/io_apic.h | 4 ++++ include/asm-x86/pci.h | 1 + 4 files changed, 14 insertions(+) (limited to 'Documentation/kernel-parameters.txt') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 1aebe9dffbaa..df262b3c3d6e 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1521,6 +1521,10 @@ and is between 256 and 4096 characters. It is defined in the file noioapicquirk [APIC] Disable all boot interrupt quirks. Safety option to keep boot IRQs enabled. This should never be necessary. + ioapicreroute [APIC] Enable rerouting of boot IRQs to the + primary IO-APIC for bridges that cannot disable + boot IRQs. This fixes a source of spurious IRQs + when the system masks IRQs. biosirq [X86-32] Use PCI BIOS calls to get the interrupt routing table. These calls are known to be buggy on several machines and they hang the machine diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index bc6a101ed7ec..0a9eaa736d94 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -23,6 +23,7 @@ unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2 | static int pci_bf_sort; int pci_routeirq; int noioapicquirk; +int noioapicreroute = 1; int pcibios_last_bus = -1; unsigned long pirq_table_addr; struct pci_bus *pci_root_bus; @@ -499,6 +500,10 @@ char * __devinit pcibios_setup(char *str) } else if (!strcmp(str, "noioapicquirk")) { noioapicquirk = 1; return NULL; + } else if (!strcmp(str, "ioapicreroute")) { + if (noioapicreroute != -1) + noioapicreroute = 0; + return NULL; } return str; } diff --git a/include/asm-x86/io_apic.h b/include/asm-x86/io_apic.h index 8ca0110819f4..a39670ae17df 100644 --- a/include/asm-x86/io_apic.h +++ b/include/asm-x86/io_apic.h @@ -160,12 +160,16 @@ extern int skip_ioapic_setup; /* 1 if "noapic" boot option passed */ extern int noioapicquirk; +/* -1 if "noapic" boot option passed */ +extern int noioapicreroute; + /* 1 if the timer IRQ uses the '8259A Virtual Wire' mode */ extern int timer_through_8259; static inline void disable_ioapic_setup(void) { noioapicquirk = 1; + noioapicreroute = -1; skip_ioapic_setup = 1; } diff --git a/include/asm-x86/pci.h b/include/asm-x86/pci.h index 30eec93a845e..52a29f7668ef 100644 --- a/include/asm-x86/pci.h +++ b/include/asm-x86/pci.h @@ -20,6 +20,7 @@ struct pci_sysdata { extern int pci_routeirq; extern int noioapicquirk; +extern int ioapicreroute; /* scan a bus after allocating a pci_sysdata for it */ extern struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops, -- cgit v1.2.3 From 41b9eb264c8407655db57b60b4457fe1b2ec9977 Mon Sep 17 00:00:00 2001 From: Stefan Assmann Date: Tue, 15 Jul 2008 13:48:55 +0200 Subject: x86, pci: introduce config option for pci reroute quirks (was: [PATCH 0/3] Boot IRQ quirks for Broadcom and AMD/ATI) This is against linux-2.6-tip, branch pci-ioapic-boot-irq-quirks. From: Stefan Assmann Subject: Introduce config option for pci reroute quirks The config option X86_REROUTE_FOR_BROKEN_BOOT_IRQS is introduced to enable (or disable) the redirection of the interrupt handler to the boot interrupt line by default. Depending on the existence of interrupt masking / threaded interrupt handling in the kernel (vanilla, rt, ...) and the maturity of the rerouting patch, users can enable or disable the redirection by default. This means that the reroute quirk can be applied to any kernel without changing it. Interrupt sharing could be increased if this option is enabled. However this option is vital for threaded interrupt handling, as done by the RT kernel. It should simplify the consolidation with the RT kernel. The option can be overridden by either pci=ioapicreroute or pci=noioapicreroute. Signed-off-by: Stefan Assmann Signed-off-by: Olaf Dabrunz Cc: Jesse Barnes Cc: Jon Masters Cc: Ihno Krumreich Cc: Sven Dietrich Cc: Daniel Gollub Cc: Felix Foerster Signed-off-by: Ingo Molnar --- Documentation/kernel-parameters.txt | 4 ++++ arch/x86/Kconfig | 24 ++++++++++++++++++++++++ arch/x86/pci/common.c | 8 ++++++++ drivers/pci/quirks.c | 2 +- include/asm-x86/pci.h | 2 +- 5 files changed, 38 insertions(+), 2 deletions(-) (limited to 'Documentation/kernel-parameters.txt') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index f5662b7a34d1..62b6e8067a5b 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1536,6 +1536,10 @@ and is between 256 and 4096 characters. It is defined in the file primary IO-APIC for bridges that cannot disable boot IRQs. This fixes a source of spurious IRQs when the system masks IRQs. + noioapicreroute [APIC] Disable workaround that uses the + boot IRQ equivalent of an IRQ that connects to + a chipset where boot IRQs cannot be disabled. + The opposite of ioapicreroute. biosirq [X86-32] Use PCI BIOS calls to get the interrupt routing table. These calls are known to be buggy on several machines and they hang the machine diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 96e0c2ebc388..09521332636b 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -665,6 +665,30 @@ config X86_VISWS_APIC def_bool y depends on X86_32 && X86_VISWS +config X86_REROUTE_FOR_BROKEN_BOOT_IRQS + bool "Reroute for broken boot IRQs" + default n + depends on X86_IO_APIC + help + This option enables a workaround that fixes a source of + spurious interrupts. This is recommended when threaded + interrupt handling is used on systems where the generation of + superfluous "boot interrupts" cannot be disabled. + + Some chipsets generate a legacy INTx "boot IRQ" when the IRQ + entry in the chipset's IO-APIC is masked (as, e.g. the RT + kernel does during interrupt handling). On chipsets where this + boot IRQ generation cannot be disabled, this workaround keeps + the original IRQ line masked so that only the equivalent "boot + IRQ" is delivered to the CPUs. The workaround also tells the + kernel to set up the IRQ handler on the boot IRQ line. In this + way only one interrupt is delivered to the kernel. Otherwise + the spurious second interrupt may cause the kernel to bring + down (vital) interrupt lines. + + Only affects "broken" chipsets. Interrupt sharing may be + increased on these systems. + config X86_MCE bool "Machine Check Exception" depends on !X86_VOYAGER diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 1485a26ddcef..bb1a01f089e2 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -24,7 +24,11 @@ unsigned int pci_early_dump_regs; static int pci_bf_sort; int pci_routeirq; int noioapicquirk; +#ifdef CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS +int noioapicreroute = 0; +#else int noioapicreroute = 1; +#endif int pcibios_last_bus = -1; unsigned long pirq_table_addr; struct pci_bus *pci_root_bus; @@ -528,6 +532,10 @@ char * __devinit pcibios_setup(char *str) if (noioapicreroute != -1) noioapicreroute = 0; return NULL; + } else if (!strcmp(str, "noioapicreroute")) { + if (noioapicreroute != -1) + noioapicreroute = 1; + return NULL; } return str; } diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 0911b0c60b64..c880dd0bbfb5 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -1397,7 +1397,7 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x260b, quirk_intel_pcie_pm); */ static void quirk_reroute_to_boot_interrupts_intel(struct pci_dev *dev) { - if (noioapicquirk) + if (noioapicquirk || noioapicreroute) return; dev->irq_reroute_variant = INTEL_IRQ_REROUTE_VARIANT; diff --git a/include/asm-x86/pci.h b/include/asm-x86/pci.h index 52a29f7668ef..9584d6d5eb93 100644 --- a/include/asm-x86/pci.h +++ b/include/asm-x86/pci.h @@ -20,7 +20,7 @@ struct pci_sysdata { extern int pci_routeirq; extern int noioapicquirk; -extern int ioapicreroute; +extern int noioapicreroute; /* scan a bus after allocating a pci_sysdata for it */ extern struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops, -- cgit v1.2.3 From 395628ef4ea12ff0748099f145363b5e33c69acb Mon Sep 17 00:00:00 2001 From: Alok Kataria Date: Fri, 24 Oct 2008 17:22:01 -0700 Subject: x86: Skip verification by the watchdog for TSC clocksource. Impact: Changes timekeeping on Vmware (or with tsc=reliable). This is achieved by resetting the CLOCKSOURCE_MUST_VERIFY flag. We add a tsc=reliable commandline option to enable this. This enables legacy hardware without HPET, LAPIC, or ACPI timers to enter high-resolution timer mode. Along with that have extended this to be used in virtualization environement too. Now we also set this flag if the X86_FEATURE_TSC_RELIABLE bit is set. This is important since there is a wrap-around problem with the acpi_pm timer. The acpi_pm counter is just 24bits and this can overflow in ~4 seconds. With the NO_HZ kernels in virtualized environment, there can be situations when the guest is descheduled for longer duration, as a result we may miss the wrap of the acpi counter. When TSC is used as a clocksource and acpi_pm timer is being used as the watchdog clocksource this error in acpi_pm results in TSC being marked as unstable, and essentially results in time dropping in chunks of 4 seconds whenever this wrap is missed. Since the virtualized TSC is reliable on VMware, we should always use the TSCs clocksource on VMware, so we skip the verfication at runtime, by checking for the feature bit. Since we reset the flag for mgeode systems too, i have combined the mgeode case with the feature bit check. Signed-off-by: Jeff Hansen Signed-off-by: Alok N Kataria Signed-off-by: Dan Hecht Signed-off-by: H. Peter Anvin --- Documentation/kernel-parameters.txt | 7 +++++++ arch/x86/kernel/tsc.c | 33 +++++++++++++++++++++------------ 2 files changed, 28 insertions(+), 12 deletions(-) (limited to 'Documentation/kernel-parameters.txt') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 1bbcaa8982b6..dc6b06f67fca 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2267,6 +2267,13 @@ and is between 256 and 4096 characters. It is defined in the file Format: ,,,,,,,, + tsc= Disable clocksource-must-verify flag for TSC. + Format: + [x86] reliable: mark tsc clocksource as reliable, this + disables clocksource verification at runtime. + Used to enable high-resolution timer mode on older + hardware, and in virtualized environment. + turbografx.map[2|3]= [HW,JOY] TurboGraFX parallel port interface Format: diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 6dbf0bcb44a8..ee01cd96b5e1 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -32,6 +32,7 @@ static int tsc_unstable; erroneous rdtsc usage on !cpu_has_tsc processors */ static int tsc_disabled = -1; +static int tsc_clocksource_reliable; /* * Scheduler clock - returns current time in nanosec units. */ @@ -99,6 +100,15 @@ int __init notsc_setup(char *str) __setup("notsc", notsc_setup); +static int __init tsc_setup(char *str) +{ + if (!strcmp(str, "reliable")) + tsc_clocksource_reliable = 1; + return 1; +} + +__setup("tsc=", tsc_setup); + #define MAX_RETRIES 5 #define SMI_TRESHOLD 50000 @@ -738,24 +748,21 @@ static struct dmi_system_id __initdata bad_tsc_dmi_table[] = { {} }; -/* - * Geode_LX - the OLPC CPU has a possibly a very reliable TSC - */ +static void __init check_system_tsc_reliable(void) +{ #ifdef CONFIG_MGEODE_LX -/* RTSC counts during suspend */ + /* RTSC counts during suspend */ #define RTSC_SUSP 0x100 - -static void __init check_geode_tsc_reliable(void) -{ unsigned long res_low, res_high; rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high); + /* Geode_LX - the OLPC CPU has a possibly a very reliable TSC */ if (res_low & RTSC_SUSP) - clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY; -} -#else -static inline void check_geode_tsc_reliable(void) { } + tsc_clocksource_reliable = 1; #endif + if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) + tsc_clocksource_reliable = 1; +} /* * Make an educated guess if the TSC is trustworthy and synchronized @@ -790,6 +797,8 @@ static void __init init_tsc_clocksource(void) { clocksource_tsc.mult = clocksource_khz2mult(tsc_khz, clocksource_tsc.shift); + if (tsc_clocksource_reliable) + clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY; /* lower the rating if we already know its unstable: */ if (check_tsc_unstable()) { clocksource_tsc.rating = 0; @@ -850,7 +859,7 @@ void __init tsc_init(void) if (unsynchronized_tsc()) mark_tsc_unstable("TSCs unsynchronized"); - check_geode_tsc_reliable(); + check_system_tsc_reliable(); init_tsc_clocksource(); } -- cgit v1.2.3 From d9e540762f5cdd89f24e518ad1fd31142d0b9726 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 1 Nov 2008 19:57:37 +0100 Subject: ftrace: ftrace_dump_on_oops=[tracer] Impact: add new (optional) debug boot option In order to facilitate early boot trouble, allow one to specify a tracer on the kernel boot line. Signed-off-by: Peter Zijlstra Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar --- Documentation/kernel-parameters.txt | 8 +++++ kernel/trace/trace.c | 58 ++++++++++++++++++++++++------------- 2 files changed, 46 insertions(+), 20 deletions(-) (limited to 'Documentation/kernel-parameters.txt') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 1bbcaa8982b6..4862284d3119 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -765,6 +765,14 @@ and is between 256 and 4096 characters. It is defined in the file parameter will force ia64_sal_cache_flush to call ia64_pal_cache_flush instead of SAL_CACHE_FLUSH. + ftrace=[tracer] + [ftrace] will set and start the specified tracer + as early as possible in order to facilitate early + boot debugging. + + ftrace_dump_on_oops + [ftrace] will dump the trace buffers on oops. + gamecon.map[2|3]= [HW,JOY] Multisystem joystick and NES/SNES/PSX pad support via parallel port (up to 5 devices per port) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index bdb1df00fb10..482583eb8001 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -79,6 +79,15 @@ static int tracing_disabled = 1; */ int ftrace_dump_on_oops; +static int tracing_set_tracer(char *buf); + +static int __init set_ftrace(char *str) +{ + tracing_set_tracer(str); + return 1; +} +__setup("ftrace", set_ftrace); + static int __init set_ftrace_dump_on_oops(char *str) { ftrace_dump_on_oops = 1; @@ -2394,29 +2403,11 @@ tracing_set_trace_read(struct file *filp, char __user *ubuf, return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); } -static ssize_t -tracing_set_trace_write(struct file *filp, const char __user *ubuf, - size_t cnt, loff_t *ppos) +static int tracing_set_tracer(char *buf) { struct trace_array *tr = &global_trace; struct tracer *t; - char buf[max_tracer_type_len+1]; - int i; - size_t ret; - - ret = cnt; - - if (cnt > max_tracer_type_len) - cnt = max_tracer_type_len; - - if (copy_from_user(&buf, ubuf, cnt)) - return -EFAULT; - - buf[cnt] = 0; - - /* strip ending whitespace. */ - for (i = cnt - 1; i > 0 && isspace(buf[i]); i--) - buf[i] = 0; + int ret = 0; mutex_lock(&trace_types_lock); for (t = trace_types; t; t = t->next) { @@ -2440,6 +2431,33 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf, out: mutex_unlock(&trace_types_lock); + return ret; +} + +static ssize_t +tracing_set_trace_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + char buf[max_tracer_type_len+1]; + int i; + size_t ret; + + if (cnt > max_tracer_type_len) + cnt = max_tracer_type_len; + + if (copy_from_user(&buf, ubuf, cnt)) + return -EFAULT; + + buf[cnt] = 0; + + /* strip ending whitespace. */ + for (i = cnt - 1; i > 0 && isspace(buf[i]); i--) + buf[i] = 0; + + ret = tracing_set_tracer(buf); + if (!ret) + ret = cnt; + if (ret > 0) filp->f_pos += ret; -- cgit v1.2.3 From 5b9a0e14eb4bf40a7cb780af4723560e06753f2d Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sat, 1 Nov 2008 18:06:51 +0300 Subject: x86: nmi - nmi_watchdog boot param docs cleanup Impact: documentation update 1) nmi_watchdog boot parameter is common to 32/64 bit modes. So move it from Documentation/x86/x86_64/boot-options.txt to Documentation/kernel-parameters.txt and integrate with. 2) Also fix [panic] keyword placement -- it ought to be at first position otherwise it will not be recognized. 3) Document lapic and ioapic keywords. Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- Documentation/kernel-parameters.txt | 15 ++++++++++++++- Documentation/x86/x86_64/boot-options.txt | 11 ----------- 2 files changed, 14 insertions(+), 12 deletions(-) (limited to 'Documentation/kernel-parameters.txt') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 343e0f0f84b6..6246220ac914 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1403,7 +1403,20 @@ and is between 256 and 4096 characters. It is defined in the file when a NMI is triggered. Format: [state][,regs][,debounce][,die] - nmi_watchdog= [KNL,BUGS=X86-32] Debugging features for SMP kernels + nmi_watchdog= [KNL,BUGS=X86-32,X86-64] Debugging features for SMP kernels + Format: [panic,][num] + Valid num: 0,1,2 + 0 - turn nmi_watchdog off + 1 - use the IO-APIC timer for the NMI watchdog + 2 - use the local APIC for the NMI watchdog using + a performance counter. Note: This will use one performance + counter and the local APIC's performance vector. + When panic is specified panic when an NMI watchdog timeout occurs. + This is useful when you use a panic=... timeout and need the box + quickly up again. + Instead of 1 and 2 it is possible to use the following + symbolic names: lapic and ioapic + Example: nmi_watchdog=2 or nmi_watchdog=panic,lapic no387 [BUGS=X86-32] Tells the kernel to use the 387 maths emulation library even if a 387 maths coprocessor diff --git a/Documentation/x86/x86_64/boot-options.txt b/Documentation/x86/x86_64/boot-options.txt index 72ffb5373ec7..bc8a339c1f27 100644 --- a/Documentation/x86/x86_64/boot-options.txt +++ b/Documentation/x86/x86_64/boot-options.txt @@ -79,17 +79,6 @@ Timing Report when timer interrupts are lost because some code turned off interrupts for too long. - nmi_watchdog=NUMBER[,panic] - NUMBER can be: - 0 don't use an NMI watchdog - 1 use the IO-APIC timer for the NMI watchdog - 2 use the local APIC for the NMI watchdog using a performance counter. Note - This will use one performance counter and the local APIC's performance - vector. - When panic is specified panic when an NMI watchdog timeout occurs. - This is useful when you use a panic=... timeout and need the box - quickly up again. - nohpet Don't use the HPET timer. -- cgit v1.2.3 From 1f29fae29709b4668979e244c09b2fa78ff1ad59 Mon Sep 17 00:00:00 2001 From: "Serge E. Hallyn" Date: Wed, 5 Nov 2008 16:08:52 -0600 Subject: file capabilities: add no_file_caps switch (v4) Add a no_file_caps boot option when file capabilities are compiled into the kernel (CONFIG_SECURITY_FILE_CAPABILITIES=y). This allows distributions to ship a kernel with file capabilities compiled in, without forcing users to use (and understand and trust) them. When no_file_caps is specified at boot, then when a process executes a file, any file capabilities stored with that file will not be used in the calculation of the process' new capability sets. This means that booting with the no_file_caps boot option will not be the same as booting a kernel with file capabilities compiled out - in particular a task with CAP_SETPCAP will not have any chance of passing capabilities to another task (which isn't "really" possible anyway, and which may soon by killed altogether by David Howells in any case), and it will instead be able to put new capabilities in its pI. However since fI will always be empty and pI is masked with fI, it gains the task nothing. We also support the extra prctl options, setting securebits and dropping capabilities from the per-process bounding set. The other remaining difference is that killpriv, task_setscheduler, setioprio, and setnice will continue to be hooked. That will be noticable in the case where a root task changed its uid while keeping some caps, and another task owned by the new uid tries to change settings for the more privileged task. Changelog: Nov 05 2008: (v4) trivial port on top of always-start-\ with-clear-caps patch Sep 23 2008: nixed file_caps_enabled when file caps are not compiled in as it isn't used. Document no_file_caps in kernel-parameters.txt. Signed-off-by: Serge Hallyn Acked-by: Andrew G. Morgan Signed-off-by: James Morris --- Documentation/kernel-parameters.txt | 4 ++++ include/linux/capability.h | 3 +++ kernel/capability.c | 11 +++++++++++ security/commoncap.c | 3 +++ 4 files changed, 21 insertions(+) (limited to 'Documentation/kernel-parameters.txt') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 1bbcaa8982b6..784443acca9c 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1459,6 +1459,10 @@ and is between 256 and 4096 characters. It is defined in the file instruction doesn't work correctly and not to use it. + no_file_caps Tells the kernel not to honor file capabilities. The + only way then for a file to be executed with privilege + is to be setuid root or executed by root. + nohalt [IA-64] Tells the kernel not to use the power saving function PAL_HALT_LIGHT when idle. This increases power-consumption. On the positive side, it reduces diff --git a/include/linux/capability.h b/include/linux/capability.h index 9d1fe30b6f6c..5bc145bd759a 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -68,6 +68,9 @@ typedef struct __user_cap_data_struct { #define VFS_CAP_U32 VFS_CAP_U32_2 #define VFS_CAP_REVISION VFS_CAP_REVISION_2 +#ifdef CONFIG_SECURITY_FILE_CAPABILITIES +extern int file_caps_enabled; +#endif struct vfs_cap_data { __le32 magic_etc; /* Little endian */ diff --git a/kernel/capability.c b/kernel/capability.c index 33e51e78c2d8..e13a68535ad5 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -33,6 +33,17 @@ EXPORT_SYMBOL(__cap_empty_set); EXPORT_SYMBOL(__cap_full_set); EXPORT_SYMBOL(__cap_init_eff_set); +#ifdef CONFIG_SECURITY_FILE_CAPABILITIES +int file_caps_enabled = 1; + +static int __init file_caps_disable(char *str) +{ + file_caps_enabled = 0; + return 1; +} +__setup("no_file_caps", file_caps_disable); +#endif + /* * More recent versions of libcap are available from: * diff --git a/security/commoncap.c b/security/commoncap.c index 3976613db829..f88119cb2bc2 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -281,6 +281,9 @@ static int get_file_caps(struct linux_binprm *bprm) bprm_clear_caps(bprm); + if (!file_caps_enabled) + return 0; + if (bprm->file->f_vfsmnt->mnt_flags & MNT_NOSUID) return 0; -- cgit v1.2.3 From f38f1d2aa5a3520cf05da7cd6bd12fe2b0c509b7 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 16 Dec 2008 23:06:40 -0500 Subject: trace: add a way to enable or disable the stack tracer Impact: enhancement to stack tracer The stack tracer currently is either on when configured in or off when it is not. It can not be disabled when it is configured on. (besides disabling the function tracer that it uses) This patch adds a way to enable or disable the stack tracer at run time. It defaults off on bootup, but a kernel parameter 'stacktrace' has been added to enable it on bootup. A new sysctl has been added "kernel.stack_tracer_enabled" to let the user enable or disable the stack tracer at run time. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- Documentation/kernel-parameters.txt | 4 +++ include/linux/ftrace.h | 8 ++++++ kernel/sysctl.c | 10 +++++++ kernel/trace/Kconfig | 13 +++++++--- kernel/trace/trace_stack.c | 52 ++++++++++++++++++++++++++++++++++--- 5 files changed, 79 insertions(+), 8 deletions(-) (limited to 'Documentation/kernel-parameters.txt') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 2919a2e91938..edab81c13182 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -89,6 +89,7 @@ parameter is applicable: SPARC Sparc architecture is enabled. SWSUSP Software suspend (hibernation) is enabled. SUSPEND System suspend states are enabled. + FTRACE Function tracing enabled. TS Appropriate touchscreen support is enabled. USB USB support is enabled. USBHID USB Human Interface Device support is enabled. @@ -2173,6 +2174,9 @@ and is between 256 and 4096 characters. It is defined in the file st= [HW,SCSI] SCSI tape parameters (buffers, etc.) See Documentation/scsi/st.txt. + stacktrace [FTRACE] + Enabled the stack tracer on boot up. + sti= [PARISC,HW] Format: Set the STI (builtin display/keyboard on the HP-PARISC diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 44020f31bd81..6b0db53caa7d 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -86,6 +86,14 @@ static inline void ftrace_stop(void) { } static inline void ftrace_start(void) { } #endif /* CONFIG_FUNCTION_TRACER */ +#ifdef CONFIG_STACK_TRACER +extern int stack_tracer_enabled; +int +stack_trace_sysctl(struct ctl_table *table, int write, + struct file *file, void __user *buffer, size_t *lenp, + loff_t *ppos); +#endif + #ifdef CONFIG_DYNAMIC_FTRACE /* asm/ftrace.h must be defined for archs supporting dynamic ftrace */ #include diff --git a/kernel/sysctl.c b/kernel/sysctl.c index c83f566e940a..6ac501a2dcc6 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -487,6 +487,16 @@ static struct ctl_table kern_table[] = { .proc_handler = &ftrace_enable_sysctl, }, #endif +#ifdef CONFIG_STACK_TRACER + { + .ctl_name = CTL_UNNUMBERED, + .procname = "stack_tracer_enabled", + .data = &stack_tracer_enabled, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &stack_trace_sysctl, + }, +#endif #ifdef CONFIG_TRACING { .ctl_name = CTL_UNNUMBERED, diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index d8bae6f4219e..e2a4ff6fc3a6 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -244,10 +244,15 @@ config STACK_TRACER This tracer works by hooking into every function call that the kernel executes, and keeping a maximum stack depth value and - stack-trace saved. Because this logic has to execute in every - kernel function, all the time, this option can slow down the - kernel measurably and is generally intended for kernel - developers only. + stack-trace saved. If this is configured with DYNAMIC_FTRACE + then it will not have any overhead while the stack tracer + is disabled. + + To enable the stack tracer on bootup, pass in 'stacktrace' + on the kernel command line. + + The stack tracer can also be enabled or disabled via the + sysctl kernel.stack_tracer_enabled Say N if unsure. diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index 0b863f2cbc8e..4842c969c785 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include "trace.h" @@ -31,6 +32,10 @@ static raw_spinlock_t max_stack_lock = static int stack_trace_disabled __read_mostly; static DEFINE_PER_CPU(int, trace_active); +static DEFINE_MUTEX(stack_sysctl_mutex); + +int stack_tracer_enabled; +static int last_stack_tracer_enabled; static inline void check_stack(void) { @@ -174,7 +179,7 @@ stack_max_size_write(struct file *filp, const char __user *ubuf, return count; } -static struct file_operations stack_max_size_fops = { +static const struct file_operations stack_max_size_fops = { .open = tracing_open_generic, .read = stack_max_size_read, .write = stack_max_size_write, @@ -272,7 +277,7 @@ static int t_show(struct seq_file *m, void *v) return 0; } -static struct seq_operations stack_trace_seq_ops = { +static const struct seq_operations stack_trace_seq_ops = { .start = t_start, .next = t_next, .stop = t_stop, @@ -288,12 +293,48 @@ static int stack_trace_open(struct inode *inode, struct file *file) return ret; } -static struct file_operations stack_trace_fops = { +static const struct file_operations stack_trace_fops = { .open = stack_trace_open, .read = seq_read, .llseek = seq_lseek, }; +int +stack_trace_sysctl(struct ctl_table *table, int write, + struct file *file, void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + int ret; + + mutex_lock(&stack_sysctl_mutex); + + ret = proc_dointvec(table, write, file, buffer, lenp, ppos); + + if (ret || !write || + (last_stack_tracer_enabled == stack_tracer_enabled)) + goto out; + + last_stack_tracer_enabled = stack_tracer_enabled; + + if (stack_tracer_enabled) + register_ftrace_function(&trace_ops); + else + unregister_ftrace_function(&trace_ops); + + out: + mutex_unlock(&stack_sysctl_mutex); + return ret; +} + +static int start_stack_trace __initdata; + +static __init int enable_stacktrace(char *str) +{ + start_stack_trace = 1; + return 1; +} +__setup("stacktrace", enable_stacktrace); + static __init int stack_trace_init(void) { struct dentry *d_tracer; @@ -311,7 +352,10 @@ static __init int stack_trace_init(void) if (!entry) pr_warning("Could not create debugfs 'stack_trace' entry\n"); - register_ftrace_function(&trace_ops); + if (start_stack_trace) { + register_ftrace_function(&trace_ops); + stack_tracer_enabled = 1; + } return 0; } -- cgit v1.2.3 From 2b1a61f0a8c714c96277bf16a823a84bafa1397d Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 25 Dec 2008 13:39:23 +0100 Subject: [S390] cpu topology: introduce kernel parameter Introduce a topology=[on|off] kernel parameter which allows to switch cpu topology on/off. Default will be off, since it looks like that for some workloards this doesn't behave very well (on s390). Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- Documentation/kernel-parameters.txt | 8 ++++++++ arch/s390/kernel/topology.c | 12 +++++++++++- 2 files changed, 19 insertions(+), 1 deletion(-) (limited to 'Documentation/kernel-parameters.txt') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index c9115c1b672c..09ede6c90ad7 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2252,6 +2252,14 @@ and is between 256 and 4096 characters. It is defined in the file See comment before function dc390_setup() in drivers/scsi/tmscsim.c. + topology= [S390] + Format: {off | on} + Specify if the kernel should make use of the cpu + topology informations if the hardware supports these. + The scheduler will make use of these informations and + e.g. base its process migration decisions on it. + Default is off. + tp720= [HW,PS2] trix= [HW,OSS] MediaTrix AudioTrix Pro diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 36faac50e774..71e6f56bfead 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -58,6 +58,7 @@ struct core_info { cpumask_t mask; }; +static int topology_enabled; static void topology_work_fn(struct work_struct *work); static struct tl_info *tl_info; static struct core_info core_info; @@ -78,7 +79,7 @@ cpumask_t cpu_coregroup_map(unsigned int cpu) cpumask_t mask; cpus_clear(mask); - if (!machine_has_topology) + if (!topology_enabled || !machine_has_topology) return cpu_possible_map; spin_lock_irqsave(&topology_lock, flags); while (core) { @@ -263,6 +264,15 @@ static void topology_interrupt(__u16 code) schedule_work(&topology_work); } +static int __init early_parse_topology(char *p) +{ + if (strncmp(p, "on", 2)) + return 0; + topology_enabled = 1; + return 0; +} +early_param("topology", early_parse_topology); + static int __init init_topology_update(void) { int rc; -- cgit v1.2.3 From cef7125def4dd104769f400c941199614da0aca1 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Thu, 25 Dec 2008 13:39:55 +0100 Subject: [S390] provide documentation for hvc_iucv kernel parameter. Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- Documentation/kernel-parameters.txt | 3 +++ 1 file changed, 3 insertions(+) (limited to 'Documentation/kernel-parameters.txt') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 09ede6c90ad7..12f0fb6faaf6 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -814,6 +814,9 @@ and is between 256 and 4096 characters. It is defined in the file hlt [BUGS=ARM,SH] + hvc_iucv= [S390] Number of z/VM IUCV Hypervisor console (HVC) + back-ends. Valid parameters: 0..8 + i8042.debug [HW] Toggle i8042 debug mode i8042.direct [HW] Put keyboard port into non-translated mode i8042.dumbkbd [HW] Pretend that controller can only read data from -- cgit v1.2.3 From 07983f0e36eab01a5385117e55154a2aa796eafc Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 6 Jan 2009 14:42:41 -0800 Subject: documentation: update header file paths Update several Documentation/ files and a few sub-dir files (only one change in each) to reflect changed header files locations. Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/DMA-mapping.txt | 2 +- Documentation/ioctl/ioctl-number.txt | 8 ++++---- Documentation/kernel-parameters.txt | 4 ++-- Documentation/magic-number.txt | 6 +++--- Documentation/mips/AU1xxx_IDE.README | 2 +- Documentation/powerpc/cpu_features.txt | 2 +- Documentation/x86/zero-page.txt | 2 +- 7 files changed, 13 insertions(+), 13 deletions(-) (limited to 'Documentation/kernel-parameters.txt') diff --git a/Documentation/DMA-mapping.txt b/Documentation/DMA-mapping.txt index c74fec8c2351..b2a4d6d244d9 100644 --- a/Documentation/DMA-mapping.txt +++ b/Documentation/DMA-mapping.txt @@ -26,7 +26,7 @@ mapped only for the time they are actually used and unmapped after the DMA transfer. The following API will work of course even on platforms where no such -hardware exists, see e.g. include/asm-i386/pci.h for how it is implemented on +hardware exists, see e.g. arch/x86/include/asm/pci.h for how it is implemented on top of the virt_to_bus interface. First of all, you should make sure diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt index 824699174436..f1d639903325 100644 --- a/Documentation/ioctl/ioctl-number.txt +++ b/Documentation/ioctl/ioctl-number.txt @@ -84,7 +84,7 @@ Code Seq# Include File Comments 'B' C0-FF advanced bbus 'C' all linux/soundcard.h -'D' all asm-s390/dasd.h +'D' all arch/s390/include/asm/dasd.h 'E' all linux/input.h 'F' all linux/fb.h 'H' all linux/hiddev.h @@ -105,7 +105,7 @@ Code Seq# Include File Comments 'S' 80-81 scsi/scsi_ioctl.h conflict! 'S' 82-FF scsi/scsi.h conflict! 'T' all linux/soundcard.h conflict! -'T' all asm-i386/ioctls.h conflict! +'T' all arch/x86/include/asm/ioctls.h conflict! 'U' 00-EF linux/drivers/usb/usb.h 'V' all linux/vt.h 'W' 00-1F linux/watchdog.h conflict! @@ -120,7 +120,7 @@ Code Seq# Include File Comments 'c' 00-7F linux/comstats.h conflict! 'c' 00-7F linux/coda.h conflict! -'c' 80-9F asm-s390/chsc.h +'c' 80-9F arch/s390/include/asm/chsc.h 'd' 00-FF linux/char/drm/drm/h conflict! 'd' 00-DF linux/video_decoder.h conflict! 'd' F0-FF linux/digi1.h @@ -170,7 +170,7 @@ Code Seq# Include File Comments 0x80 00-1F linux/fb.h 0x81 00-1F linux/videotext.h -0x89 00-06 asm-i386/sockios.h +0x89 00-06 arch/x86/include/asm/sockios.h 0x89 0B-DF linux/sockios.h 0x89 E0-EF linux/sockios.h SIOCPROTOPRIVATE range 0x89 F0-FF linux/sockios.h SIOCDEVPRIVATE range diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index a2d8805c03d5..7f0b694e02e8 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -469,8 +469,8 @@ and is between 256 and 4096 characters. It is defined in the file clearcpuid=BITNUM [X86] Disable CPUID feature X for the kernel. See - include/asm-x86/cpufeature.h for the valid bit numbers. - Note the Linux specific bits are not necessarily + arch/x86/include/asm/cpufeature.h for the valid bit + numbers. Note the Linux specific bits are not necessarily stable over kernel options, but the vendor specific ones should be. Also note that user programs calling CPUID directly diff --git a/Documentation/magic-number.txt b/Documentation/magic-number.txt index 95070028d15e..505f19607542 100644 --- a/Documentation/magic-number.txt +++ b/Documentation/magic-number.txt @@ -125,14 +125,14 @@ TRIDENT_CARD_MAGIC 0x5072696E trident_card sound/oss/trident.c ROUTER_MAGIC 0x524d4157 wan_device include/linux/wanrouter.h SCC_MAGIC 0x52696368 gs_port drivers/char/scc.h SAVEKMSG_MAGIC1 0x53415645 savekmsg arch/*/amiga/config.c -GDA_MAGIC 0x58464552 gda include/asm-mips64/sn/gda.h +GDA_MAGIC 0x58464552 gda arch/mips/include/asm/sn/gda.h RED_MAGIC1 0x5a2cf071 (any) mm/slab.c STL_PORTMAGIC 0x5a7182c9 stlport include/linux/stallion.h EEPROM_MAGIC_VALUE 0x5ab478d2 lanai_dev drivers/atm/lanai.c HDLCDRV_MAGIC 0x5ac6e778 hdlcdrv_state include/linux/hdlcdrv.h EPCA_MAGIC 0x5c6df104 channel include/linux/epca.h PCXX_MAGIC 0x5c6df104 channel drivers/char/pcxx.h -KV_MAGIC 0x5f4b565f kernel_vars_s include/asm-mips64/sn/klkernvars.h +KV_MAGIC 0x5f4b565f kernel_vars_s arch/mips/include/asm/sn/klkernvars.h I810_STATE_MAGIC 0x63657373 i810_state sound/oss/i810_audio.c TRIDENT_STATE_MAGIC 0x63657373 trient_state sound/oss/trident.c M3_CARD_MAGIC 0x646e6f50 m3_card sound/oss/maestro3.c @@ -158,7 +158,7 @@ CCB_MAGIC 0xf2691ad2 ccb drivers/scsi/ncr53c8xx.c QUEUE_MAGIC_FREE 0xf7e1c9a3 queue_entry drivers/scsi/arm/queue.c QUEUE_MAGIC_USED 0xf7e1cc33 queue_entry drivers/scsi/arm/queue.c HTB_CMAGIC 0xFEFAFEF1 htb_class net/sched/sch_htb.c -NMI_MAGIC 0x48414d4d455201 nmi_s include/asm-mips64/sn/nmi.h +NMI_MAGIC 0x48414d4d455201 nmi_s arch/mips/include/asm/sn/nmi.h Note that there are also defined special per-driver magic numbers in sound memory management. See include/sound/sndmagic.h for complete list of them. Many diff --git a/Documentation/mips/AU1xxx_IDE.README b/Documentation/mips/AU1xxx_IDE.README index 25a6ed1aaa5b..f54962aea84d 100644 --- a/Documentation/mips/AU1xxx_IDE.README +++ b/Documentation/mips/AU1xxx_IDE.README @@ -44,7 +44,7 @@ FILES, CONFIGS AND COMPATABILITY Two files are introduced: - a) 'include/asm-mips/mach-au1x00/au1xxx_ide.h' + a) 'arch/mips/include/asm/mach-au1x00/au1xxx_ide.h' containes : struct _auide_hwif timing parameters for PIO mode 0/1/2/3/4 timing parameters for MWDMA 0/1/2 diff --git a/Documentation/powerpc/cpu_features.txt b/Documentation/powerpc/cpu_features.txt index 472739880e87..ffa4183fdb8b 100644 --- a/Documentation/powerpc/cpu_features.txt +++ b/Documentation/powerpc/cpu_features.txt @@ -31,7 +31,7 @@ anyways). After detecting the processor type, the kernel patches out sections of code that shouldn't be used by writing nop's over it. Using cpufeatures requires -just 2 macros (found in include/asm-ppc/cputable.h), as seen in head.S +just 2 macros (found in arch/powerpc/include/asm/cputable.h), as seen in head.S transfer_to_handler: #ifdef CONFIG_ALTIVEC diff --git a/Documentation/x86/zero-page.txt b/Documentation/x86/zero-page.txt index 169ad423a3d1..4f913857b8a2 100644 --- a/Documentation/x86/zero-page.txt +++ b/Documentation/x86/zero-page.txt @@ -3,7 +3,7 @@ protocol of kernel. These should be filled by bootloader or 16-bit real-mode setup code of the kernel. References/settings to it mainly are in: - include/asm-x86/bootparam.h + arch/x86/include/asm/bootparam.h Offset Proto Name Meaning -- cgit v1.2.3 From 7c4be253d3a01ddc92033ec3a3812fddf703ef19 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 6 Jan 2009 14:42:44 -0800 Subject: docs: add more early params to kernel-parameters.txt Add some (more) early_param boot options to kernel-parameters.txt. Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/kernel-parameters.txt | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'Documentation/kernel-parameters.txt') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 7f0b694e02e8..0543370e702b 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1117,6 +1117,8 @@ and is between 256 and 4096 characters. It is defined in the file If there are multiple matching configurations changing the same attribute, the last one is used. + lmb=debug [KNL] Enable lmb debug messages. + load_ramdisk= [RAM] List of ramdisks to load from floppy See Documentation/blockdev/ramdisk.txt. @@ -1569,6 +1571,10 @@ and is between 256 and 4096 characters. It is defined in the file nr_uarts= [SERIAL] maximum number of UARTs to be registered. + ohci1394_dma=early [HW] enable debugging via the ohci1394 driver. + See Documentation/debugging-via-ohci1394.txt for more + info. + olpc_ec_timeout= [OLPC] ms delay when issuing EC commands Rather than timing out after 20 ms if an EC command is not properly ACKed, override the length -- cgit v1.2.3 From ecb08d81313a3c015225236775de259d99ab47fe Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 6 Jan 2009 14:42:44 -0800 Subject: doc: reformat some long lines in kernel-parameters.txt Reformat text to (mostly) stay within 80 columns of text. Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/kernel-parameters.txt | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'Documentation/kernel-parameters.txt') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 0543370e702b..3ccf1bc5affe 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1799,10 +1799,10 @@ and is between 256 and 4096 characters. It is defined in the file autoconfiguration. Ranges are in pairs (memory base and size). - dynamic_printk - Enables pr_debug()/dev_dbg() calls if - CONFIG_DYNAMIC_PRINTK_DEBUG has been enabled. These can also - be switched on/off via /dynamic_printk/modules + dynamic_printk Enables pr_debug()/dev_dbg() calls if + CONFIG_DYNAMIC_PRINTK_DEBUG has been enabled. + These can also be switched on/off via + /dynamic_printk/modules print-fatal-signals= [KNL] debug: print fatal signals @@ -1890,7 +1890,7 @@ and is between 256 and 4096 characters. It is defined in the file reboot= [BUGS=X86-32,BUGS=ARM,BUGS=IA-64] Rebooting mode Format: [,[,...]] - See arch/*/kernel/reboot.c or arch/*/kernel/process.c + See arch/*/kernel/reboot.c or arch/*/kernel/process.c relax_domain_level= [KNL, SMP] Set scheduler's default relax_domain_level. @@ -2438,8 +2438,8 @@ and is between 256 and 4096 characters. It is defined in the file Format: ,,,,,[,[,[,]]] - norandmaps Don't use address space randomization - Equivalent to echo 0 > /proc/sys/kernel/randomize_va_space + norandmaps Don't use address space randomization. Equivalent to + echo 0 > /proc/sys/kernel/randomize_va_space ______________________________________________________________________ -- cgit v1.2.3 From 4cb0e11b15d2badad455fcd538af0cccf05dc012 Mon Sep 17 00:00:00 2001 From: Hidehiro Kawai Date: Tue, 6 Jan 2009 14:42:47 -0800 Subject: coredump_filter: permit changing of the default filter Introduce a new kernel parameter `coredump_filter'. Setting a value to this parameter causes the default bitmask of coredump_filter to be changed. It is useful for users to change coredump_filter settings for the whole system at boot time. Without this parameter, users have to change coredump_filter settings for each /proc// in an initializing script. Signed-off-by: Hidehiro Kawai Cc: Roland McGrath Cc: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/kernel-parameters.txt | 5 +++++ kernel/fork.c | 15 +++++++++++++-- 2 files changed, 18 insertions(+), 2 deletions(-) (limited to 'Documentation/kernel-parameters.txt') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 3ccf1bc5affe..0b3f6711d2f1 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -551,6 +551,11 @@ and is between 256 and 4096 characters. It is defined in the file not work reliably with all consoles, but is known to work with serial and VGA consoles. + coredump_filter= + [KNL] Change the default value for + /proc//coredump_filter. + See also Documentation/filesystems/proc.txt. + cpcihp_generic= [HW,PCI] Generic port I/O CompactPCI driver Format: ,,,[,] diff --git a/kernel/fork.c b/kernel/fork.c index 23b912116675..7b8f2a78be3d 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -400,6 +400,18 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock); #define allocate_mm() (kmem_cache_alloc(mm_cachep, GFP_KERNEL)) #define free_mm(mm) (kmem_cache_free(mm_cachep, (mm))) +static unsigned long default_dump_filter = MMF_DUMP_FILTER_DEFAULT; + +static int __init coredump_filter_setup(char *s) +{ + default_dump_filter = + (simple_strtoul(s, NULL, 0) << MMF_DUMP_FILTER_SHIFT) & + MMF_DUMP_FILTER_MASK; + return 1; +} + +__setup("coredump_filter=", coredump_filter_setup); + #include static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) @@ -408,8 +420,7 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) atomic_set(&mm->mm_count, 1); init_rwsem(&mm->mmap_sem); INIT_LIST_HEAD(&mm->mmlist); - mm->flags = (current->mm) ? current->mm->flags - : MMF_DUMP_FILTER_DEFAULT; + mm->flags = (current->mm) ? current->mm->flags : default_dump_filter; mm->core_state = NULL; mm->nr_ptes = 0; set_mm_counter(mm, file_rss, 0); -- cgit v1.2.3 From d4f373e57d3916814110968c5ea1155a8d972b5a Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Mon, 10 Nov 2008 14:07:45 -0500 Subject: USB: usb-storage: add "quirks=" module parameter This patch (as1163b) adds a "quirks=" module parameter to usb-storage. This will allow people to make short-term changes to their unusual_devs list without rebuilding the entire driver. Testing will become much easier, and less-sophisticated users will be able to access their buggy devices after a simple config-file change instead of having to wait for a new kernel release. The patch also adds a documentation entry for usb-storage's "delay_use" parameter, which has been around for years but but was never listed among the kernel parameters. Signed-off-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- Documentation/kernel-parameters.txt | 29 +++++++++ drivers/usb/storage/usb.c | 113 ++++++++++++++++++++++++++++++++++++ 2 files changed, 142 insertions(+) (limited to 'Documentation/kernel-parameters.txt') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 0b3f6711d2f1..8eb6e35405cd 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -91,6 +91,7 @@ parameter is applicable: SUSPEND System suspend states are enabled. FTRACE Function tracing enabled. TS Appropriate touchscreen support is enabled. + UMS USB Mass Storage support is enabled. USB USB support is enabled. USBHID USB Human Interface Device support is enabled. V4L Video For Linux support is enabled. @@ -2383,6 +2384,34 @@ and is between 256 and 4096 characters. It is defined in the file usbhid.mousepoll= [USBHID] The interval which mice are to be polled at. + usb-storage.delay_use= + [UMS] The delay in seconds before a new device is + scanned for Logical Units (default 5). + + usb-storage.quirks= + [UMS] A list of quirks entries to supplement or + override the built-in unusual_devs list. List + entries are separated by commas. Each entry has + the form VID:PID:Flags where VID and PID are Vendor + and Product ID values (4-digit hex numbers) and + Flags is a set of characters, each corresponding + to a common usb-storage quirk flag as follows: + c = FIX_CAPACITY (decrease the reported + device capacity by one sector); + i = IGNORE_DEVICE (don't bind to this + device); + l = NOT_LOCKABLE (don't try to lock and + unlock ejectable media); + m = MAX_SECTORS_64 (don't transfer more + than 64 sectors = 32 KB at a time); + r = IGNORE_RESIDUE (the device reports + bogus residue values); + s = SINGLE_LUN (the device has only one + Logical Unit); + w = NO_WP_DETECT (don't test whether the + medium is write-protected). + Example: quirks=0419:aaf5:rl,0421:0433:rc + add_efi_memmap [EFI; x86-32,X86-64] Include EFI memory map in kernel's map of available physical RAM. diff --git a/drivers/usb/storage/usb.c b/drivers/usb/storage/usb.c index 27016fd2cad1..eb1a53a3e5ca 100644 --- a/drivers/usb/storage/usb.c +++ b/drivers/usb/storage/usb.c @@ -113,6 +113,16 @@ static unsigned int delay_use = 5; module_param(delay_use, uint, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(delay_use, "seconds to delay before using a new device"); +static char *quirks; +module_param(quirks, charp, S_IRUGO); +MODULE_PARM_DESC(quirks, "supplemental list of device IDs and their quirks"); + +struct quirks_entry { + u16 vid, pid; + u32 fflags; +}; +static struct quirks_entry *quirks_list, *quirks_end; + /* * The entries in this table correspond, line for line, @@ -473,6 +483,30 @@ static int associate_dev(struct us_data *us, struct usb_interface *intf) return 0; } +/* Adjust device flags based on the "quirks=" module parameter */ +static void adjust_quirks(struct us_data *us) +{ + u16 vid, pid; + struct quirks_entry *q; + unsigned int mask = (US_FL_FIX_CAPACITY | US_FL_IGNORE_DEVICE | + US_FL_NOT_LOCKABLE | US_FL_MAX_SECTORS_64 | + US_FL_IGNORE_RESIDUE | US_FL_SINGLE_LUN | + US_FL_NO_WP_DETECT); + + vid = le16_to_cpu(us->pusb_dev->descriptor.idVendor); + pid = le16_to_cpu(us->pusb_dev->descriptor.idProduct); + + for (q = quirks_list; q != quirks_end; ++q) { + if (q->vid == vid && q->pid == pid) { + us->fflags = (us->fflags & ~mask) | q->fflags; + dev_info(&us->pusb_intf->dev, "Quirks match for " + "vid %04x pid %04x: %x\n", + vid, pid, q->fflags); + break; + } + } +} + /* Find an unusual_dev descriptor (always succeeds in the current code) */ static struct us_unusual_dev *find_unusual(const struct usb_device_id *id) { @@ -497,6 +531,7 @@ static int get_device_info(struct us_data *us, const struct usb_device_id *id) idesc->bInterfaceProtocol : unusual_dev->useTransport; us->fflags = USB_US_ORIG_FLAGS(id->driver_info); + adjust_quirks(us); if (us->fflags & US_FL_IGNORE_DEVICE) { printk(KERN_INFO USB_STORAGE "device ignored\n"); @@ -1061,10 +1096,88 @@ static struct usb_driver usb_storage_driver = { .soft_unbind = 1, }; +/* Works only for digits and letters, but small and fast */ +#define TOLOWER(x) ((x) | 0x20) + +static void __init parse_quirks(void) +{ + int n, i; + char *p; + + if (!quirks) + return; + + /* Count the ':' characters to get 2 * the number of entries */ + n = 0; + for (p = quirks; *p; ++p) { + if (*p == ':') + ++n; + } + n /= 2; + if (n == 0) + return; /* Don't allocate 0 bytes */ + + quirks_list = kmalloc(n * sizeof(*quirks_list), GFP_KERNEL); + if (!quirks_list) + return; + + p = quirks; + quirks_end = quirks_list; + for (i = 0; i < n && *p; ++i) { + unsigned f = 0; + + /* Each entry consists of VID:PID:flags */ + quirks_end->vid = simple_strtoul(p, &p, 16); + if (*p != ':') + goto skip_to_next; + quirks_end->pid = simple_strtoul(p+1, &p, 16); + if (*p != ':') + goto skip_to_next; + + while (*++p && *p != ',') { + switch (TOLOWER(*p)) { + case 'c': + f |= US_FL_FIX_CAPACITY; + break; + case 'i': + f |= US_FL_IGNORE_DEVICE; + break; + case 'l': + f |= US_FL_NOT_LOCKABLE; + break; + case 'm': + f |= US_FL_MAX_SECTORS_64; + break; + case 'r': + f |= US_FL_IGNORE_RESIDUE; + break; + case 's': + f |= US_FL_SINGLE_LUN; + break; + case 'w': + f |= US_FL_NO_WP_DETECT; + break; + /* Ignore unrecognized flag characters */ + } + } + quirks_end->fflags = f; + ++quirks_end; + + skip_to_next: + /* Entries are separated by commas */ + while (*p) { + if (*p++ == ',') + break; + } + } /* for (i = 0; ...) */ +} + static int __init usb_stor_init(void) { int retval; + printk(KERN_INFO "Initializing USB Mass Storage driver...\n"); + parse_quirks(); /* register the driver, return usb_register return code if error */ retval = usb_register(&usb_storage_driver); -- cgit v1.2.3 From c838ea4626d6e982489ff519f9ecf5e1649ca90b Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Mon, 15 Dec 2008 10:40:06 -0500 Subject: USB: storage: make the "quirks=" module parameter writable This patch (as1190) makes usb-storage's "quirks=" module parameter writable, so that users can add entries for their devices at runtime with no need to reboot or reload usb-storage. New codes are added for the SANE_SENSE, CAPACITY_HEURISTICS, and CAPACITY_OK flags. Signed-off-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- Documentation/kernel-parameters.txt | 7 ++ drivers/usb/storage/usb.c | 169 +++++++++++++++--------------------- 2 files changed, 76 insertions(+), 100 deletions(-) (limited to 'Documentation/kernel-parameters.txt') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 8eb6e35405cd..a58fc8b73398 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2396,14 +2396,21 @@ and is between 256 and 4096 characters. It is defined in the file and Product ID values (4-digit hex numbers) and Flags is a set of characters, each corresponding to a common usb-storage quirk flag as follows: + a = SANE_SENSE (collect more than 18 bytes + of sense data); c = FIX_CAPACITY (decrease the reported device capacity by one sector); + h = CAPACITY_HEURISTICS (decrease the + reported device capacity by one + sector if the number is odd); i = IGNORE_DEVICE (don't bind to this device); l = NOT_LOCKABLE (don't try to lock and unlock ejectable media); m = MAX_SECTORS_64 (don't transfer more than 64 sectors = 32 KB at a time); + o = CAPACITY_OK (accept the capacity + reported by the device); r = IGNORE_RESIDUE (the device reports bogus residue values); s = SINGLE_LUN (the device has only one diff --git a/drivers/usb/storage/usb.c b/drivers/usb/storage/usb.c index 80e234bf4e50..4becf495ca2d 100644 --- a/drivers/usb/storage/usb.c +++ b/drivers/usb/storage/usb.c @@ -111,16 +111,10 @@ static unsigned int delay_use = 5; module_param(delay_use, uint, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(delay_use, "seconds to delay before using a new device"); -static char *quirks; -module_param(quirks, charp, S_IRUGO); +static char quirks[128]; +module_param_string(quirks, quirks, sizeof(quirks), S_IRUGO | S_IWUSR); MODULE_PARM_DESC(quirks, "supplemental list of device IDs and their quirks"); -struct quirks_entry { - u16 vid, pid; - u32 fflags; -}; -static struct quirks_entry *quirks_list, *quirks_end; - /* * The entries in this table correspond, line for line, @@ -481,28 +475,80 @@ static int associate_dev(struct us_data *us, struct usb_interface *intf) return 0; } +/* Works only for digits and letters, but small and fast */ +#define TOLOWER(x) ((x) | 0x20) + /* Adjust device flags based on the "quirks=" module parameter */ static void adjust_quirks(struct us_data *us) { - u16 vid, pid; - struct quirks_entry *q; - unsigned int mask = (US_FL_FIX_CAPACITY | US_FL_IGNORE_DEVICE | + char *p; + u16 vid = le16_to_cpu(us->pusb_dev->descriptor.idVendor); + u16 pid = le16_to_cpu(us->pusb_dev->descriptor.idProduct); + unsigned f = 0; + unsigned int mask = (US_FL_SANE_SENSE | US_FL_FIX_CAPACITY | + US_FL_CAPACITY_HEURISTICS | US_FL_IGNORE_DEVICE | US_FL_NOT_LOCKABLE | US_FL_MAX_SECTORS_64 | - US_FL_IGNORE_RESIDUE | US_FL_SINGLE_LUN | - US_FL_NO_WP_DETECT); - - vid = le16_to_cpu(us->pusb_dev->descriptor.idVendor); - pid = le16_to_cpu(us->pusb_dev->descriptor.idProduct); - - for (q = quirks_list; q != quirks_end; ++q) { - if (q->vid == vid && q->pid == pid) { - us->fflags = (us->fflags & ~mask) | q->fflags; - dev_info(&us->pusb_intf->dev, "Quirks match for " - "vid %04x pid %04x: %x\n", - vid, pid, q->fflags); + US_FL_CAPACITY_OK | US_FL_IGNORE_RESIDUE | + US_FL_SINGLE_LUN | US_FL_NO_WP_DETECT); + + p = quirks; + while (*p) { + /* Each entry consists of VID:PID:flags */ + if (vid == simple_strtoul(p, &p, 16) && + *p == ':' && + pid == simple_strtoul(p+1, &p, 16) && + *p == ':') break; + + /* Move forward to the next entry */ + while (*p) { + if (*p++ == ',') + break; } } + if (!*p) /* No match */ + return; + + /* Collect the flags */ + while (*++p && *p != ',') { + switch (TOLOWER(*p)) { + case 'a': + f |= US_FL_SANE_SENSE; + break; + case 'c': + f |= US_FL_FIX_CAPACITY; + break; + case 'h': + f |= US_FL_CAPACITY_HEURISTICS; + break; + case 'i': + f |= US_FL_IGNORE_DEVICE; + break; + case 'l': + f |= US_FL_NOT_LOCKABLE; + break; + case 'm': + f |= US_FL_MAX_SECTORS_64; + break; + case 'o': + f |= US_FL_CAPACITY_OK; + break; + case 'r': + f |= US_FL_IGNORE_RESIDUE; + break; + case 's': + f |= US_FL_SINGLE_LUN; + break; + case 'w': + f |= US_FL_NO_WP_DETECT; + break; + /* Ignore unrecognized flag characters */ + } + } + us->fflags = (us->fflags & ~mask) | f; + dev_info(&us->pusb_intf->dev, "Quirks match for " + "vid %04x pid %04x: %x\n", + vid, pid, f); } /* Find an unusual_dev descriptor (always succeeds in the current code) */ @@ -1092,88 +1138,11 @@ static struct usb_driver usb_storage_driver = { .soft_unbind = 1, }; -/* Works only for digits and letters, but small and fast */ -#define TOLOWER(x) ((x) | 0x20) - -static void __init parse_quirks(void) -{ - int n, i; - char *p; - - if (!quirks) - return; - - /* Count the ':' characters to get 2 * the number of entries */ - n = 0; - for (p = quirks; *p; ++p) { - if (*p == ':') - ++n; - } - n /= 2; - if (n == 0) - return; /* Don't allocate 0 bytes */ - - quirks_list = kmalloc(n * sizeof(*quirks_list), GFP_KERNEL); - if (!quirks_list) - return; - - p = quirks; - quirks_end = quirks_list; - for (i = 0; i < n && *p; ++i) { - unsigned f = 0; - - /* Each entry consists of VID:PID:flags */ - quirks_end->vid = simple_strtoul(p, &p, 16); - if (*p != ':') - goto skip_to_next; - quirks_end->pid = simple_strtoul(p+1, &p, 16); - if (*p != ':') - goto skip_to_next; - - while (*++p && *p != ',') { - switch (TOLOWER(*p)) { - case 'c': - f |= US_FL_FIX_CAPACITY; - break; - case 'i': - f |= US_FL_IGNORE_DEVICE; - break; - case 'l': - f |= US_FL_NOT_LOCKABLE; - break; - case 'm': - f |= US_FL_MAX_SECTORS_64; - break; - case 'r': - f |= US_FL_IGNORE_RESIDUE; - break; - case 's': - f |= US_FL_SINGLE_LUN; - break; - case 'w': - f |= US_FL_NO_WP_DETECT; - break; - /* Ignore unrecognized flag characters */ - } - } - quirks_end->fflags = f; - ++quirks_end; - - skip_to_next: - /* Entries are separated by commas */ - while (*p) { - if (*p++ == ',') - break; - } - } /* for (i = 0; ...) */ -} - static int __init usb_stor_init(void) { int retval; printk(KERN_INFO "Initializing USB Mass Storage driver...\n"); - parse_quirks(); /* register the driver, return usb_register return code if error */ retval = usb_register(&usb_storage_driver); -- cgit v1.2.3 From e8de1481fd7126ee9e93d6889da6f00c05e1e019 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Wed, 22 Oct 2008 19:55:31 -0700 Subject: resource: allow MMIO exclusivity for device drivers Device drivers that use pci_request_regions() (and similar APIs) have a reasonable expectation that they are the only ones accessing their device. As part of the e1000e hunt, we were afraid that some userland (X or some bootsplash stuff) was mapping the MMIO region that the driver thought it had exclusively via /dev/mem or via various sysfs resource mappings. This patch adds the option for device drivers to cause their reserved regions to the "banned from /dev/mem use" list, so now both kernel memory and device-exclusive MMIO regions are banned. NOTE: This is only active when CONFIG_STRICT_DEVMEM is set. In addition to the config option, a kernel parameter iomem=relaxed is provided for the cases where developers want to diagnose, in the field, drivers issues from userspace. Reviewed-by: Matthew Wilcox Signed-off-by: Arjan van de Ven Signed-off-by: Jesse Barnes --- Documentation/kernel-parameters.txt | 4 ++ arch/x86/mm/init_32.c | 2 + arch/x86/mm/init_64.c | 2 + drivers/net/e1000e/netdev.c | 2 +- drivers/pci/pci-sysfs.c | 3 + drivers/pci/pci.c | 107 ++++++++++++++++++++++++++++++++---- include/linux/ioport.h | 11 +++- include/linux/pci.h | 3 + kernel/resource.c | 61 +++++++++++++++++++- 9 files changed, 176 insertions(+), 19 deletions(-) (limited to 'Documentation/kernel-parameters.txt') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 0b3f6711d2f1..0072fabb1dd1 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -918,6 +918,10 @@ and is between 256 and 4096 characters. It is defined in the file inttest= [IA64] + iomem= Disable strict checking of access to MMIO memory + strict regions from userspace. + relaxed + iommu= [x86] off force diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 544d724caeee..88f1b10de3be 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -328,6 +328,8 @@ int devmem_is_allowed(unsigned long pagenr) { if (pagenr <= 256) return 1; + if (iomem_is_exclusive(pagenr << PAGE_SHIFT)) + return 0; if (!page_is_ram(pagenr)) return 1; return 0; diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 54c437e96541..23f68e77ad1f 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -888,6 +888,8 @@ int devmem_is_allowed(unsigned long pagenr) { if (pagenr <= 256) return 1; + if (iomem_is_exclusive(pagenr << PAGE_SHIFT)) + return 0; if (!page_is_ram(pagenr)) return 1; return 0; diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c index d4639facd1bd..91817d0afcaf 100644 --- a/drivers/net/e1000e/netdev.c +++ b/drivers/net/e1000e/netdev.c @@ -4807,7 +4807,7 @@ static int __devinit e1000_probe(struct pci_dev *pdev, } } - err = pci_request_selected_regions(pdev, + err = pci_request_selected_regions_exclusive(pdev, pci_select_bars(pdev, IORESOURCE_MEM), e1000e_driver_name); if (err) diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index 388440e0d222..d5cdccf27a69 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -620,6 +620,9 @@ pci_mmap_resource(struct kobject *kobj, struct bin_attribute *attr, vma->vm_pgoff += start >> PAGE_SHIFT; mmap_type = res->flags & IORESOURCE_MEM ? pci_mmap_mem : pci_mmap_io; + if (res->flags & IORESOURCE_MEM && iomem_is_exclusive(start)) + return -EINVAL; + return pci_mmap_page_range(pdev, vma, mmap_type, write_combine); } diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 2cfa41e367a7..47663dc0daf7 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1395,7 +1395,8 @@ void pci_release_region(struct pci_dev *pdev, int bar) * Returns 0 on success, or %EBUSY on error. A warning * message is also printed on failure. */ -int pci_request_region(struct pci_dev *pdev, int bar, const char *res_name) +static int __pci_request_region(struct pci_dev *pdev, int bar, const char *res_name, + int exclusive) { struct pci_devres *dr; @@ -1408,8 +1409,9 @@ int pci_request_region(struct pci_dev *pdev, int bar, const char *res_name) goto err_out; } else if (pci_resource_flags(pdev, bar) & IORESOURCE_MEM) { - if (!request_mem_region(pci_resource_start(pdev, bar), - pci_resource_len(pdev, bar), res_name)) + if (!__request_mem_region(pci_resource_start(pdev, bar), + pci_resource_len(pdev, bar), res_name, + exclusive)) goto err_out; } @@ -1427,6 +1429,47 @@ err_out: return -EBUSY; } +/** + * pci_request_region - Reserved PCI I/O and memory resource + * @pdev: PCI device whose resources are to be reserved + * @bar: BAR to be reserved + * @res_name: Name to be associated with resource. + * + * Mark the PCI region associated with PCI device @pdev BR @bar as + * being reserved by owner @res_name. Do not access any + * address inside the PCI regions unless this call returns + * successfully. + * + * Returns 0 on success, or %EBUSY on error. A warning + * message is also printed on failure. + */ +int pci_request_region(struct pci_dev *pdev, int bar, const char *res_name) +{ + return __pci_request_region(pdev, bar, res_name, 0); +} + +/** + * pci_request_region_exclusive - Reserved PCI I/O and memory resource + * @pdev: PCI device whose resources are to be reserved + * @bar: BAR to be reserved + * @res_name: Name to be associated with resource. + * + * Mark the PCI region associated with PCI device @pdev BR @bar as + * being reserved by owner @res_name. Do not access any + * address inside the PCI regions unless this call returns + * successfully. + * + * Returns 0 on success, or %EBUSY on error. A warning + * message is also printed on failure. + * + * The key difference that _exclusive makes it that userspace is + * explicitly not allowed to map the resource via /dev/mem or + * sysfs. + */ +int pci_request_region_exclusive(struct pci_dev *pdev, int bar, const char *res_name) +{ + return __pci_request_region(pdev, bar, res_name, IORESOURCE_EXCLUSIVE); +} /** * pci_release_selected_regions - Release selected PCI I/O and memory resources * @pdev: PCI device whose resources were previously reserved @@ -1444,20 +1487,14 @@ void pci_release_selected_regions(struct pci_dev *pdev, int bars) pci_release_region(pdev, i); } -/** - * pci_request_selected_regions - Reserve selected PCI I/O and memory resources - * @pdev: PCI device whose resources are to be reserved - * @bars: Bitmask of BARs to be requested - * @res_name: Name to be associated with resource - */ -int pci_request_selected_regions(struct pci_dev *pdev, int bars, - const char *res_name) +int __pci_request_selected_regions(struct pci_dev *pdev, int bars, + const char *res_name, int excl) { int i; for (i = 0; i < 6; i++) if (bars & (1 << i)) - if(pci_request_region(pdev, i, res_name)) + if (__pci_request_region(pdev, i, res_name, excl)) goto err_out; return 0; @@ -1469,6 +1506,26 @@ err_out: return -EBUSY; } + +/** + * pci_request_selected_regions - Reserve selected PCI I/O and memory resources + * @pdev: PCI device whose resources are to be reserved + * @bars: Bitmask of BARs to be requested + * @res_name: Name to be associated with resource + */ +int pci_request_selected_regions(struct pci_dev *pdev, int bars, + const char *res_name) +{ + return __pci_request_selected_regions(pdev, bars, res_name, 0); +} + +int pci_request_selected_regions_exclusive(struct pci_dev *pdev, + int bars, const char *res_name) +{ + return __pci_request_selected_regions(pdev, bars, res_name, + IORESOURCE_EXCLUSIVE); +} + /** * pci_release_regions - Release reserved PCI I/O and memory resources * @pdev: PCI device whose resources were previously reserved by pci_request_regions @@ -1501,6 +1558,29 @@ int pci_request_regions(struct pci_dev *pdev, const char *res_name) return pci_request_selected_regions(pdev, ((1 << 6) - 1), res_name); } +/** + * pci_request_regions_exclusive - Reserved PCI I/O and memory resources + * @pdev: PCI device whose resources are to be reserved + * @res_name: Name to be associated with resource. + * + * Mark all PCI regions associated with PCI device @pdev as + * being reserved by owner @res_name. Do not access any + * address inside the PCI regions unless this call returns + * successfully. + * + * pci_request_regions_exclusive() will mark the region so that + * /dev/mem and the sysfs MMIO access will not be allowed. + * + * Returns 0 on success, or %EBUSY on error. A warning + * message is also printed on failure. + */ +int pci_request_regions_exclusive(struct pci_dev *pdev, const char *res_name) +{ + return pci_request_selected_regions_exclusive(pdev, + ((1 << 6) - 1), res_name); +} + + /** * pci_set_master - enables bus-mastering for device dev * @dev: the PCI device to enable @@ -2149,10 +2229,13 @@ EXPORT_SYMBOL(pci_find_capability); EXPORT_SYMBOL(pci_bus_find_capability); EXPORT_SYMBOL(pci_release_regions); EXPORT_SYMBOL(pci_request_regions); +EXPORT_SYMBOL(pci_request_regions_exclusive); EXPORT_SYMBOL(pci_release_region); EXPORT_SYMBOL(pci_request_region); +EXPORT_SYMBOL(pci_request_region_exclusive); EXPORT_SYMBOL(pci_release_selected_regions); EXPORT_SYMBOL(pci_request_selected_regions); +EXPORT_SYMBOL(pci_request_selected_regions_exclusive); EXPORT_SYMBOL(pci_set_master); EXPORT_SYMBOL(pci_set_mwi); EXPORT_SYMBOL(pci_try_set_mwi); diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 041e95aac2bf..f6bb2ca8e3ba 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -49,6 +49,7 @@ struct resource_list { #define IORESOURCE_SIZEALIGN 0x00020000 /* size indicates alignment */ #define IORESOURCE_STARTALIGN 0x00040000 /* start field is alignment */ +#define IORESOURCE_EXCLUSIVE 0x08000000 /* Userland may not map this resource */ #define IORESOURCE_DISABLED 0x10000000 #define IORESOURCE_UNSET 0x20000000 #define IORESOURCE_AUTO 0x40000000 @@ -133,13 +134,16 @@ static inline unsigned long resource_type(struct resource *res) } /* Convenience shorthand with allocation */ -#define request_region(start,n,name) __request_region(&ioport_resource, (start), (n), (name)) -#define request_mem_region(start,n,name) __request_region(&iomem_resource, (start), (n), (name)) +#define request_region(start,n,name) __request_region(&ioport_resource, (start), (n), (name), 0) +#define __request_mem_region(start,n,name, excl) __request_region(&iomem_resource, (start), (n), (name), excl) +#define request_mem_region(start,n,name) __request_region(&iomem_resource, (start), (n), (name), 0) +#define request_mem_region_exclusive(start,n,name) \ + __request_region(&iomem_resource, (start), (n), (name), IORESOURCE_EXCLUSIVE) #define rename_region(region, newname) do { (region)->name = (newname); } while (0) extern struct resource * __request_region(struct resource *, resource_size_t start, - resource_size_t n, const char *name); + resource_size_t n, const char *name, int relaxed); /* Compatibility cruft */ #define release_region(start,n) __release_region(&ioport_resource, (start), (n)) @@ -175,6 +179,7 @@ extern struct resource * __devm_request_region(struct device *dev, extern void __devm_release_region(struct device *dev, struct resource *parent, resource_size_t start, resource_size_t n); extern int iomem_map_sanity_check(resource_size_t addr, unsigned long size); +extern int iomem_is_exclusive(u64 addr); #endif /* __ASSEMBLY__ */ #endif /* _LINUX_IOPORT_H */ diff --git a/include/linux/pci.h b/include/linux/pci.h index 59a3dc2059d3..bfcb39ca8879 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -686,10 +686,13 @@ void pci_fixup_irqs(u8 (*)(struct pci_dev *, u8 *), int (*)(struct pci_dev *, u8, u8)); #define HAVE_PCI_REQ_REGIONS 2 int __must_check pci_request_regions(struct pci_dev *, const char *); +int __must_check pci_request_regions_exclusive(struct pci_dev *, const char *); void pci_release_regions(struct pci_dev *); int __must_check pci_request_region(struct pci_dev *, int, const char *); +int __must_check pci_request_region_exclusive(struct pci_dev *, int, const char *); void pci_release_region(struct pci_dev *, int); int pci_request_selected_regions(struct pci_dev *, int, const char *); +int pci_request_selected_regions_exclusive(struct pci_dev *, int, const char *); void pci_release_selected_regions(struct pci_dev *, int); /* drivers/pci/bus.c */ diff --git a/kernel/resource.c b/kernel/resource.c index e633106b12f6..ca6a1536b205 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -623,7 +623,7 @@ resource_size_t resource_alignment(struct resource *res) */ struct resource * __request_region(struct resource *parent, resource_size_t start, resource_size_t n, - const char *name) + const char *name, int flags) { struct resource *res = kzalloc(sizeof(*res), GFP_KERNEL); @@ -634,6 +634,7 @@ struct resource * __request_region(struct resource *parent, res->start = start; res->end = start + n - 1; res->flags = IORESOURCE_BUSY; + res->flags |= flags; write_lock(&resource_lock); @@ -679,7 +680,7 @@ int __check_region(struct resource *parent, resource_size_t start, { struct resource * res; - res = __request_region(parent, start, n, "check-region"); + res = __request_region(parent, start, n, "check-region", 0); if (!res) return -EBUSY; @@ -776,7 +777,7 @@ struct resource * __devm_request_region(struct device *dev, dr->start = start; dr->n = n; - res = __request_region(parent, start, n, name); + res = __request_region(parent, start, n, name, 0); if (res) devres_add(dev, dr); else @@ -876,3 +877,57 @@ int iomem_map_sanity_check(resource_size_t addr, unsigned long size) return err; } + +#ifdef CONFIG_STRICT_DEVMEM +static int strict_iomem_checks = 1; +#else +static int strict_iomem_checks; +#endif + +/* + * check if an address is reserved in the iomem resource tree + * returns 1 if reserved, 0 if not reserved. + */ +int iomem_is_exclusive(u64 addr) +{ + struct resource *p = &iomem_resource; + int err = 0; + loff_t l; + int size = PAGE_SIZE; + + if (!strict_iomem_checks) + return 0; + + addr = addr & PAGE_MASK; + + read_lock(&resource_lock); + for (p = p->child; p ; p = r_next(NULL, p, &l)) { + /* + * We can probably skip the resources without + * IORESOURCE_IO attribute? + */ + if (p->start >= addr + size) + break; + if (p->end < addr) + continue; + if (p->flags & IORESOURCE_BUSY && + p->flags & IORESOURCE_EXCLUSIVE) { + err = 1; + break; + } + } + read_unlock(&resource_lock); + + return err; +} + +static int __init strict_iomem(char *str) +{ + if (strstr(str, "relaxed")) + strict_iomem_checks = 0; + if (strstr(str, "strict")) + strict_iomem_checks = 1; + return 1; +} + +__setup("iomem=", strict_iomem); -- cgit v1.2.3 From c077719be8e9e6b55702117513d1b5f41d80404a Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Wed, 7 Jan 2009 18:07:57 -0800 Subject: memcg: mem+swap controller Kconfig Config and control variable for mem+swap controller. This patch adds CONFIG_CGROUP_MEM_RES_CTLR_SWAP (memory resource controller swap extension.) For accounting swap, it's obvious that we have to use additional memory to remember "who uses swap". This adds more overhead. So, it's better to offer "choice" to users. This patch adds 2 choices. This patch adds 2 parameters to enable swap extension or not. - CONFIG - boot option Reviewed-by: Daisuke Nishimura Signed-off-by: KAMEZAWA Hiroyuki Cc: Li Zefan Cc: Balbir Singh Cc: Pavel Emelyanov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/kernel-parameters.txt | 3 +++ include/linux/memcontrol.h | 3 +++ init/Kconfig | 17 +++++++++++++++++ mm/memcontrol.c | 34 ++++++++++++++++++++++++++++++++++ 4 files changed, 57 insertions(+) (limited to 'Documentation/kernel-parameters.txt') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 532eacbbed62..fb849020aea9 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1562,6 +1562,9 @@ and is between 256 and 4096 characters. It is defined in the file nosoftlockup [KNL] Disable the soft-lockup detector. + noswapaccount [KNL] Disable accounting of swap in memory resource + controller. (See Documentation/controllers/memory.txt) + nosync [HW,M68K] Disables sync negotiation for all devices. notsc [BUGS=X86-32] Disable Time Stamp Counter diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index b095f5f6ecf7..41b46cc9d1f1 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -77,6 +77,9 @@ extern void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem, extern long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, struct zone *zone, int priority, enum lru_list lru); +#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP +extern int do_swap_account; +#endif #else /* CONFIG_CGROUP_MEM_RES_CTLR */ struct mem_cgroup; diff --git a/init/Kconfig b/init/Kconfig index 7cbe1f43ca22..a724a149bf3f 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -428,6 +428,23 @@ config CGROUP_MEM_RES_CTLR config MM_OWNER bool +config CGROUP_MEM_RES_CTLR_SWAP + bool "Memory Resource Controller Swap Extension(EXPERIMENTAL)" + depends on CGROUP_MEM_RES_CTLR && SWAP && EXPERIMENTAL + help + Add swap management feature to memory resource controller. When you + enable this, you can limit mem+swap usage per cgroup. In other words, + when you disable this, memory resource controller has no cares to + usage of swap...a process can exhaust all of the swap. This extension + is useful when you want to avoid exhaustion swap but this itself + adds more overheads and consumes memory for remembering information. + Especially if you use 32bit system or small memory system, please + be careful about enabling this. When memory resource controller + is disabled by boot option, this will be automatically disabled and + there will be no overhead from this. Even when you set this config=y, + if boot option "noswapaccount" is set, swap will not be accounted. + + endmenu config SYSFS_DEPRECATED diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 7288e9d85ca7..59dd8c116372 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -41,6 +41,15 @@ struct cgroup_subsys mem_cgroup_subsys __read_mostly; #define MEM_CGROUP_RECLAIM_RETRIES 5 +#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP +/* Turned on only when memory cgroup is enabled && really_do_swap_account = 0 */ +int do_swap_account __read_mostly; +static int really_do_swap_account __initdata = 1; /* for remember boot option*/ +#else +#define do_swap_account (0) +#endif + + /* * Statistics for memory cgroup. */ @@ -1404,6 +1413,18 @@ static void mem_cgroup_free(struct mem_cgroup *mem) } +#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP +static void __init enable_swap_cgroup(void) +{ + if (!mem_cgroup_subsys.disabled && really_do_swap_account) + do_swap_account = 1; +} +#else +static void __init enable_swap_cgroup(void) +{ +} +#endif + static struct cgroup_subsys_state * mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) { @@ -1419,6 +1440,9 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) for_each_node_state(node, N_POSSIBLE) if (alloc_mem_cgroup_per_zone_info(mem, node)) goto free_out; + /* root ? */ + if (cont->parent == NULL) + enable_swap_cgroup(); return &mem->css; free_out: @@ -1490,3 +1514,13 @@ struct cgroup_subsys mem_cgroup_subsys = { .attach = mem_cgroup_move_task, .early_init = 0, }; + +#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP + +static int __init disable_swap_account(char *s) +{ + really_do_swap_account = 0; + return 1; +} +__setup("noswapaccount", disable_swap_account); +#endif -- cgit v1.2.3