From 0dc8335aa3e59f1adbb0fce7c9c0b342146cd036 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Thu, 7 May 2009 15:09:33 +0200 Subject: oprofile: remove irq_flags in struct op_entry This became obsolete with this commit: 304cc6a ring_buffer: remove unused flags parameter, fix Signed-off-by: Robert Richter --- include/linux/oprofile.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/oprofile.h b/include/linux/oprofile.h index 1d9518bc4c58..dbbe2dbc4418 100644 --- a/include/linux/oprofile.h +++ b/include/linux/oprofile.h @@ -171,7 +171,6 @@ struct op_sample; struct op_entry { struct ring_buffer_event *event; struct op_sample *sample; - unsigned long irq_flags; unsigned long size; unsigned long *data; }; -- cgit v1.2.3 From 51563a0e5650d0d76539625388d72d62b34c726e Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 3 Jun 2009 20:54:56 +0200 Subject: x86/oprofile: introduce oprofile_add_data64() The IBS implemention writes 64 bit register values to the cpu buffer by writing two 32 values using oprofile_add_data(). This patch introduces oprofile_add_data64() to write a single 64 bit value to the buffer. Signed-off-by: Robert Richter --- arch/x86/oprofile/op_model_amd.c | 27 +++++++++------------------ drivers/oprofile/cpu_buffer.c | 15 +++++++++++++++ include/linux/oprofile.h | 1 + 3 files changed, 25 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index 6493ef7ae9ad..cc930467575d 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c @@ -140,13 +140,10 @@ op_amd_handle_ibs(struct pt_regs * const regs, rdmsrl(MSR_AMD64_IBSFETCHLINAD, val); oprofile_write_reserve(&entry, regs, val, IBS_FETCH_CODE, IBS_FETCH_SIZE); - oprofile_add_data(&entry, (u32)val); - oprofile_add_data(&entry, (u32)(val >> 32)); - oprofile_add_data(&entry, (u32)ctl); - oprofile_add_data(&entry, (u32)(ctl >> 32)); + oprofile_add_data64(&entry, val); + oprofile_add_data64(&entry, ctl); rdmsrl(MSR_AMD64_IBSFETCHPHYSAD, val); - oprofile_add_data(&entry, (u32)val); - oprofile_add_data(&entry, (u32)(val >> 32)); + oprofile_add_data64(&entry, val); oprofile_write_commit(&entry); /* reenable the IRQ */ @@ -162,23 +159,17 @@ op_amd_handle_ibs(struct pt_regs * const regs, rdmsrl(MSR_AMD64_IBSOPRIP, val); oprofile_write_reserve(&entry, regs, val, IBS_OP_CODE, IBS_OP_SIZE); - oprofile_add_data(&entry, (u32)val); - oprofile_add_data(&entry, (u32)(val >> 32)); + oprofile_add_data64(&entry, val); rdmsrl(MSR_AMD64_IBSOPDATA, val); - oprofile_add_data(&entry, (u32)val); - oprofile_add_data(&entry, (u32)(val >> 32)); + oprofile_add_data64(&entry, val); rdmsrl(MSR_AMD64_IBSOPDATA2, val); - oprofile_add_data(&entry, (u32)val); - oprofile_add_data(&entry, (u32)(val >> 32)); + oprofile_add_data64(&entry, val); rdmsrl(MSR_AMD64_IBSOPDATA3, val); - oprofile_add_data(&entry, (u32)val); - oprofile_add_data(&entry, (u32)(val >> 32)); + oprofile_add_data64(&entry, val); rdmsrl(MSR_AMD64_IBSDCLINAD, val); - oprofile_add_data(&entry, (u32)val); - oprofile_add_data(&entry, (u32)(val >> 32)); + oprofile_add_data64(&entry, val); rdmsrl(MSR_AMD64_IBSDCPHYSAD, val); - oprofile_add_data(&entry, (u32)val); - oprofile_add_data(&entry, (u32)(val >> 32)); + oprofile_add_data64(&entry, val); oprofile_write_commit(&entry); /* reenable the IRQ */ diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c index 50640cc5eef2..a7aae24f2889 100644 --- a/drivers/oprofile/cpu_buffer.c +++ b/drivers/oprofile/cpu_buffer.c @@ -406,6 +406,21 @@ int oprofile_add_data(struct op_entry *entry, unsigned long val) return op_cpu_buffer_add_data(entry, val); } +int oprofile_add_data64(struct op_entry *entry, u64 val) +{ + if (!entry->event) + return 0; + if (op_cpu_buffer_get_size(entry) < 2) + /* + * the function returns 0 to indicate a too small + * buffer, even if there is some space left + */ + return 0; + if (!op_cpu_buffer_add_data(entry, (u32)val)) + return 0; + return op_cpu_buffer_add_data(entry, (u32)(val >> 32)); +} + int oprofile_write_commit(struct op_entry *entry) { if (!entry->event) diff --git a/include/linux/oprofile.h b/include/linux/oprofile.h index dbbe2dbc4418..d68d2ed94f15 100644 --- a/include/linux/oprofile.h +++ b/include/linux/oprofile.h @@ -179,6 +179,7 @@ void oprofile_write_reserve(struct op_entry *entry, struct pt_regs * const regs, unsigned long pc, int code, int size); int oprofile_add_data(struct op_entry *entry, unsigned long val); +int oprofile_add_data64(struct op_entry *entry, u64 val); int oprofile_write_commit(struct op_entry *entry); #endif /* OPROFILE_H */ -- cgit v1.2.3 From a635f9dd83f3382577f4544a96df12356e951a40 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Fri, 12 Jun 2009 15:20:55 +0200 Subject: HID: use debugfs for report dumping descriptor It is a little bit inconvenient for people who have some non-standard HID hardware (usually violating the HID specification) to have to recompile kernel with CONFIG_HID_DEBUG to be able to see kernel's perspective of the HID report descriptor and observe the parsed events. Plus the messages are then mixed up inconveniently with the rest of the dmesg stuff. This patch implements /sys/kernel/debug/hid//rdesc file, which represents the kernel's view of report descriptor (both the raw report descriptor data and parsed contents). With all the device-specific debug data being available through debugfs, there is no need for keeping CONFIG_HID_DEBUG, as the 'debug' parameter to the hid module will now only output only driver-specific debugging options, which has absolutely minimal memory footprint, just a few error messages and one global flag (hid_debug). We use the current set of output formatting functions. The ones that need to be used both for one-shot rdesc seq_file and also for continuous flow of data (individual reports, as being sent by the device) distinguish according to the passed seq_file parameter, and if it is NULL, it still output to kernel ringbuffer, otherwise the corresponding seq_file is used for output. The format of the output is preserved. Signed-off-by: Jiri Kosina --- drivers/hid/Kconfig | 15 --- drivers/hid/Makefile | 5 +- drivers/hid/hid-core.c | 13 ++- drivers/hid/hid-debug.c | 227 +++++++++++++++++++++++++++++------------- drivers/hid/hid-input.c | 13 +-- drivers/hid/usbhid/hid-core.c | 8 +- include/linux/hid-debug.h | 32 +++--- include/linux/hid.h | 4 + 8 files changed, 195 insertions(+), 122 deletions(-) (limited to 'include/linux') diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig index 7e67dcb3d4f6..05950a783560 100644 --- a/drivers/hid/Kconfig +++ b/drivers/hid/Kconfig @@ -31,21 +31,6 @@ config HID If unsure, say Y. -config HID_DEBUG - bool "HID debugging support" - default y - depends on HID - ---help--- - This option lets the HID layer output diagnostics about its internal - state, resolve HID usages, dump HID fields, etc. Individual HID drivers - use this debugging facility to output information about individual HID - devices, etc. - - This feature is useful for those who are either debugging the HID parser - or any HID hardware device. - - If unsure, say Y. - config HIDRAW bool "/dev/hidraw raw HID device support" depends on HID diff --git a/drivers/hid/Makefile b/drivers/hid/Makefile index 1f7cb0fd4505..cf3687d1ed63 100644 --- a/drivers/hid/Makefile +++ b/drivers/hid/Makefile @@ -3,9 +3,12 @@ # hid-objs := hid-core.o hid-input.o +ifdef CONFIG_DEBUG_FS + hid-objs += hid-debug.o +endif + obj-$(CONFIG_HID) += hid.o -hid-$(CONFIG_HID_DEBUG) += hid-debug.o hid-$(CONFIG_HIDRAW) += hidraw.o hid-logitech-objs := hid-lg.o diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 8551693d645f..d4317db85b54 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -44,12 +44,10 @@ #define DRIVER_DESC "HID core driver" #define DRIVER_LICENSE "GPL" -#ifdef CONFIG_HID_DEBUG int hid_debug = 0; module_param_named(debug, hid_debug, int, 0600); MODULE_PARM_DESC(debug, "HID debugging (0=off, 1=probing info, 2=continuous data dumping)"); EXPORT_SYMBOL_GPL(hid_debug); -#endif /* * Register a new report for a device. @@ -987,7 +985,6 @@ int hid_set_field(struct hid_field *field, unsigned offset, __s32 value) if (offset >= field->report_count) { dbg_hid("offset (%d) exceeds report_count (%d)\n", offset, field->report_count); - hid_dump_field(field, 8); return -1; } if (field->logical_minimum < 0) { @@ -1721,6 +1718,8 @@ int hid_add_device(struct hid_device *hdev) if (!ret) hdev->status |= HID_STAT_ADDED; + hid_debug_register(hdev, dev_name(&hdev->dev)); + return ret; } EXPORT_SYMBOL_GPL(hid_add_device); @@ -1768,6 +1767,7 @@ static void hid_remove_device(struct hid_device *hdev) { if (hdev->status & HID_STAT_ADDED) { device_del(&hdev->dev); + hid_debug_unregister(hdev); hdev->status &= ~HID_STAT_ADDED; } } @@ -1843,6 +1843,10 @@ static int __init hid_init(void) { int ret; + if (hid_debug) + printk(KERN_WARNING "HID: hid_debug parameter has been deprecated. " + "Debugging data are now provided via debugfs\n"); + ret = bus_register(&hid_bus_type); if (ret) { printk(KERN_ERR "HID: can't register hid bus\n"); @@ -1853,6 +1857,8 @@ static int __init hid_init(void) if (ret) goto err_bus; + hid_debug_init(); + return 0; err_bus: bus_unregister(&hid_bus_type); @@ -1862,6 +1868,7 @@ err: static void __exit hid_exit(void) { + hid_debug_exit(); hidraw_exit(); bus_unregister(&hid_bus_type); } diff --git a/drivers/hid/hid-debug.c b/drivers/hid/hid-debug.c index 47ac1a7d66e1..067e173aa3e4 100644 --- a/drivers/hid/hid-debug.c +++ b/drivers/hid/hid-debug.c @@ -1,9 +1,9 @@ /* * (c) 1999 Andreas Gal * (c) 2000-2001 Vojtech Pavlik - * (c) 2007 Jiri Kosina + * (c) 2007-2009 Jiri Kosina * - * Some debug stuff for the HID parser. + * HID debugging support */ /* @@ -26,9 +26,13 @@ * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic */ +#include +#include #include #include +static struct dentry *hid_debug_root; + struct hid_usage_entry { unsigned page; unsigned usage; @@ -331,72 +335,83 @@ static const struct hid_usage_entry hid_usage_table[] = { { 0, 0, NULL } }; -static void resolv_usage_page(unsigned page) { +static void resolv_usage_page(unsigned page, struct seq_file *f) { const struct hid_usage_entry *p; for (p = hid_usage_table; p->description; p++) if (p->page == page) { - printk("%s", p->description); + if (!f) + printk("%s", p->description); + else + seq_printf(f, "%s", p->description); return; } - printk("%04x", page); + if (!f) + printk("%04x", page); + else + seq_printf(f, "%04x", page); } -void hid_resolv_usage(unsigned usage) { +void hid_resolv_usage(unsigned usage, struct seq_file *f) { const struct hid_usage_entry *p; - if (!hid_debug) - return; - - resolv_usage_page(usage >> 16); - printk("."); + resolv_usage_page(usage >> 16, f); + if (!f) + printk("."); + else + seq_printf(f, "."); for (p = hid_usage_table; p->description; p++) if (p->page == (usage >> 16)) { for(++p; p->description && p->usage != 0; p++) if (p->usage == (usage & 0xffff)) { - printk("%s", p->description); + if (!f) + printk("%s", p->description); + else + seq_printf(f, + "%s", + p->description); return; } break; } - printk("%04x", usage & 0xffff); + if (!f) + printk("%04x", usage & 0xffff); + else + seq_printf(f, "%04x", usage & 0xffff); } EXPORT_SYMBOL_GPL(hid_resolv_usage); -static void tab(int n) { - printk(KERN_DEBUG "%*s", n, ""); +static void tab(int n, struct seq_file *f) { + seq_printf(f, "%*s", n, ""); } -void hid_dump_field(struct hid_field *field, int n) { +void hid_dump_field(struct hid_field *field, int n, struct seq_file *f) { int j; - if (!hid_debug) - return; - if (field->physical) { - tab(n); - printk("Physical("); - hid_resolv_usage(field->physical); printk(")\n"); + tab(n, f); + seq_printf(f, "Physical("); + hid_resolv_usage(field->physical, f); seq_printf(f, ")\n"); } if (field->logical) { - tab(n); - printk("Logical("); - hid_resolv_usage(field->logical); printk(")\n"); + tab(n, f); + seq_printf(f, "Logical("); + hid_resolv_usage(field->logical, f); seq_printf(f, ")\n"); } - tab(n); printk("Usage(%d)\n", field->maxusage); + tab(n, f); seq_printf(f, "Usage(%d)\n", field->maxusage); for (j = 0; j < field->maxusage; j++) { - tab(n+2); hid_resolv_usage(field->usage[j].hid); printk("\n"); + tab(n+2, f); hid_resolv_usage(field->usage[j].hid, f); seq_printf(f, "\n"); } if (field->logical_minimum != field->logical_maximum) { - tab(n); printk("Logical Minimum(%d)\n", field->logical_minimum); - tab(n); printk("Logical Maximum(%d)\n", field->logical_maximum); + tab(n, f); seq_printf(f, "Logical Minimum(%d)\n", field->logical_minimum); + tab(n, f); seq_printf(f, "Logical Maximum(%d)\n", field->logical_maximum); } if (field->physical_minimum != field->physical_maximum) { - tab(n); printk("Physical Minimum(%d)\n", field->physical_minimum); - tab(n); printk("Physical Maximum(%d)\n", field->physical_maximum); + tab(n, f); seq_printf(f, "Physical Minimum(%d)\n", field->physical_minimum); + tab(n, f); seq_printf(f, "Physical Maximum(%d)\n", field->physical_maximum); } if (field->unit_exponent) { - tab(n); printk("Unit Exponent(%d)\n", field->unit_exponent); + tab(n, f); seq_printf(f, "Unit Exponent(%d)\n", field->unit_exponent); } if (field->unit) { static const char *systems[5] = { "None", "SI Linear", "SI Rotation", "English Linear", "English Rotation" }; @@ -417,77 +432,75 @@ void hid_dump_field(struct hid_field *field, int n) { data >>= 4; if(sys > 4) { - tab(n); printk("Unit(Invalid)\n"); + tab(n, f); seq_printf(f, "Unit(Invalid)\n"); } else { int earlier_unit = 0; - tab(n); printk("Unit(%s : ", systems[sys]); + tab(n, f); seq_printf(f, "Unit(%s : ", systems[sys]); for (i=1 ; i>= 4; if (nibble != 0) { if(earlier_unit++ > 0) - printk("*"); - printk("%s", units[sys][i]); + seq_printf(f, "*"); + seq_printf(f, "%s", units[sys][i]); if(nibble != 1) { /* This is a _signed_ nibble(!) */ int val = nibble & 0x7; if(nibble & 0x08) val = -((0x7 & ~val) +1); - printk("^%d", val); + seq_printf(f, "^%d", val); } } } - printk(")\n"); + seq_printf(f, ")\n"); } } - tab(n); printk("Report Size(%u)\n", field->report_size); - tab(n); printk("Report Count(%u)\n", field->report_count); - tab(n); printk("Report Offset(%u)\n", field->report_offset); + tab(n, f); seq_printf(f, "Report Size(%u)\n", field->report_size); + tab(n, f); seq_printf(f, "Report Count(%u)\n", field->report_count); + tab(n, f); seq_printf(f, "Report Offset(%u)\n", field->report_offset); - tab(n); printk("Flags( "); + tab(n, f); seq_printf(f, "Flags( "); j = field->flags; - printk("%s", HID_MAIN_ITEM_CONSTANT & j ? "Constant " : ""); - printk("%s", HID_MAIN_ITEM_VARIABLE & j ? "Variable " : "Array "); - printk("%s", HID_MAIN_ITEM_RELATIVE & j ? "Relative " : "Absolute "); - printk("%s", HID_MAIN_ITEM_WRAP & j ? "Wrap " : ""); - printk("%s", HID_MAIN_ITEM_NONLINEAR & j ? "NonLinear " : ""); - printk("%s", HID_MAIN_ITEM_NO_PREFERRED & j ? "NoPreferredState " : ""); - printk("%s", HID_MAIN_ITEM_NULL_STATE & j ? "NullState " : ""); - printk("%s", HID_MAIN_ITEM_VOLATILE & j ? "Volatile " : ""); - printk("%s", HID_MAIN_ITEM_BUFFERED_BYTE & j ? "BufferedByte " : ""); - printk(")\n"); + seq_printf(f, "%s", HID_MAIN_ITEM_CONSTANT & j ? "Constant " : ""); + seq_printf(f, "%s", HID_MAIN_ITEM_VARIABLE & j ? "Variable " : "Array "); + seq_printf(f, "%s", HID_MAIN_ITEM_RELATIVE & j ? "Relative " : "Absolute "); + seq_printf(f, "%s", HID_MAIN_ITEM_WRAP & j ? "Wrap " : ""); + seq_printf(f, "%s", HID_MAIN_ITEM_NONLINEAR & j ? "NonLinear " : ""); + seq_printf(f, "%s", HID_MAIN_ITEM_NO_PREFERRED & j ? "NoPreferredState " : ""); + seq_printf(f, "%s", HID_MAIN_ITEM_NULL_STATE & j ? "NullState " : ""); + seq_printf(f, "%s", HID_MAIN_ITEM_VOLATILE & j ? "Volatile " : ""); + seq_printf(f, "%s", HID_MAIN_ITEM_BUFFERED_BYTE & j ? "BufferedByte " : ""); + seq_printf(f, ")\n"); } EXPORT_SYMBOL_GPL(hid_dump_field); -void hid_dump_device(struct hid_device *device) { +void hid_dump_device(struct hid_device *device, struct seq_file *f) +{ struct hid_report_enum *report_enum; struct hid_report *report; struct list_head *list; unsigned i,k; static const char *table[] = {"INPUT", "OUTPUT", "FEATURE"}; - if (!hid_debug) - return; - for (i = 0; i < HID_REPORT_TYPES; i++) { report_enum = device->report_enum + i; list = report_enum->report_list.next; while (list != &report_enum->report_list) { report = (struct hid_report *) list; - tab(2); - printk("%s", table[i]); + tab(2, f); + seq_printf(f, "%s", table[i]); if (report->id) - printk("(%d)", report->id); - printk("[%s]", table[report->type]); - printk("\n"); + seq_printf(f, "(%d)", report->id); + seq_printf(f, "[%s]", table[report->type]); + seq_printf(f, "\n"); for (k = 0; k < report->maxfield; k++) { - tab(4); - printk("Field(%d)\n", k); - hid_dump_field(report->field[k], 6); + tab(4, f); + seq_printf(f, "Field(%d)\n", k); + hid_dump_field(report->field[k], 6, f); } list = list->next; } @@ -500,7 +513,7 @@ void hid_dump_input(struct hid_usage *usage, __s32 value) { return; printk(KERN_DEBUG "hid-debug: input "); - hid_resolv_usage(usage->hid); + hid_resolv_usage(usage->hid, NULL); printk(" = %d\n", value); } EXPORT_SYMBOL_GPL(hid_dump_input); @@ -767,12 +780,84 @@ static const char **names[EV_MAX + 1] = { [EV_SND] = sounds, [EV_REP] = repeats, }; -void hid_resolv_event(__u8 type, __u16 code) { - - if (!hid_debug) - return; +void hid_resolv_event(__u8 type, __u16 code, struct seq_file *f) { - printk("%s.%s", events[type] ? events[type] : "?", + seq_printf(f, "%s.%s", events[type] ? events[type] : "?", names[type] ? (names[type][code] ? names[type][code] : "?") : "?"); } -EXPORT_SYMBOL_GPL(hid_resolv_event); + +void hid_dump_input_mapping(struct hid_device *hid, struct seq_file *f) +{ + int i, j, k; + struct hid_report *report; + struct hid_usage *usage; + + for (k = HID_INPUT_REPORT; k <= HID_OUTPUT_REPORT; k++) { + list_for_each_entry(report, &hid->report_enum[k].report_list, list) { + for (i = 0; i < report->maxfield; i++) { + for ( j = 0; j < report->field[i]->maxusage; j++) { + usage = report->field[i]->usage + j; + hid_resolv_usage(usage->hid, f); + seq_printf(f, " ---> "); + hid_resolv_event(usage->type, usage->code, f); + seq_printf(f, "\n"); + } + } + } + } + +} + +static int hid_debug_rdesc_show(struct seq_file *f, void *p) +{ + struct hid_device *hdev = f->private; + int i; + + /* dump HID report descriptor */ + for (i = 0; i < hdev->rsize; i++) + seq_printf(f, "%02x ", hdev->rdesc[i]); + seq_printf(f, "\n\n"); + + /* dump parsed data and input mappings */ + hid_dump_device(hdev, f); + seq_printf(f, "\n"); + hid_dump_input_mapping(hdev, f); + + return 0; +} + +static int hid_debug_rdesc_open(struct inode *inode, struct file *file) +{ + return single_open(file, hid_debug_rdesc_show, inode->i_private); +} + +static const struct file_operations hid_debug_rdesc_fops = { + .open = hid_debug_rdesc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +void hid_debug_register(struct hid_device *hdev, const char *name) +{ + hdev->debug_dir = debugfs_create_dir(name, hid_debug_root); + hdev->debug_rdesc = debugfs_create_file("rdesc", 0400, + hdev->debug_dir, hdev, &hid_debug_rdesc_fops); +} + +void hid_debug_unregister(struct hid_device *hdev) +{ + debugfs_remove(hdev->debug_rdesc); + debugfs_remove(hdev->debug_dir); +} + +void hid_debug_init(void) +{ + hid_debug_root = debugfs_create_dir("hid", NULL); +} + +void hid_debug_exit(void) +{ + debugfs_remove_recursive(hid_debug_root); +} + diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index 7f183b7147e1..5862b0f3b55d 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -159,17 +159,12 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel field->hidinput = hidinput; - dbg_hid("Mapping: "); - hid_resolv_usage(usage->hid); - dbg_hid_line(" ---> "); - if (field->flags & HID_MAIN_ITEM_CONSTANT) goto ignore; /* only LED usages are supported in output fields */ if (field->report_type == HID_OUTPUT_REPORT && (usage->hid & HID_USAGE_PAGE) != HID_UP_LED) { - dbg_hid_line(" [non-LED output field] "); goto ignore; } @@ -561,15 +556,9 @@ mapped: set_bit(MSC_SCAN, input->mscbit); } - hid_resolv_event(usage->type, usage->code); - - dbg_hid_line("\n"); - - return; - ignore: - dbg_hid_line("IGNORED\n"); return; + } void hidinput_hid_event(struct hid_device *hid, struct hid_field *field, struct hid_usage *usage, __s32 value) diff --git a/drivers/hid/usbhid/hid-core.c b/drivers/hid/usbhid/hid-core.c index ac8049b5f1e9..708aa52d0753 100644 --- a/drivers/hid/usbhid/hid-core.c +++ b/drivers/hid/usbhid/hid-core.c @@ -4,8 +4,8 @@ * Copyright (c) 1999 Andreas Gal * Copyright (c) 2000-2005 Vojtech Pavlik * Copyright (c) 2005 Michael Haboustak for Concept2, Inc - * Copyright (c) 2006-2008 Jiri Kosina * Copyright (c) 2007-2008 Oliver Neukum + * Copyright (c) 2006-2009 Jiri Kosina */ /* @@ -886,11 +886,6 @@ static int usbhid_parse(struct hid_device *hid) goto err; } - dbg_hid("report descriptor (size %u, read %d) = ", rsize, n); - for (n = 0; n < rsize; n++) - dbg_hid_line(" %02x", (unsigned char) rdesc[n]); - dbg_hid_line("\n"); - ret = hid_parse_report(hid, rdesc, rsize); kfree(rdesc); if (ret) { @@ -1005,7 +1000,6 @@ static int usbhid_start(struct hid_device *hid) usbhid->urbctrl->transfer_flags |= (URB_NO_TRANSFER_DMA_MAP | URB_NO_SETUP_DMA_MAP); usbhid_init_reports(hid); - hid_dump_device(hid); set_bit(HID_STARTED, &usbhid->iofl); diff --git a/include/linux/hid-debug.h b/include/linux/hid-debug.h index 50d568ec178a..516e12c33235 100644 --- a/include/linux/hid-debug.h +++ b/include/linux/hid-debug.h @@ -2,7 +2,7 @@ #define __HID_DEBUG_H /* - * Copyright (c) 2007 Jiri Kosina + * Copyright (c) 2007-2009 Jiri Kosina */ /* @@ -22,24 +22,30 @@ * */ -#ifdef CONFIG_HID_DEBUG +#ifdef CONFIG_DEBUG_FS void hid_dump_input(struct hid_usage *, __s32); -void hid_dump_device(struct hid_device *); -void hid_dump_field(struct hid_field *, int); -void hid_resolv_usage(unsigned); -void hid_resolv_event(__u8, __u16); +void hid_dump_device(struct hid_device *, struct seq_file *); +void hid_dump_field(struct hid_field *, int, struct seq_file *); +void hid_resolv_usage(unsigned, struct seq_file *); +void hid_debug_register(struct hid_device *, const char *); +void hid_debug_unregister(struct hid_device *); +void hid_debug_init(void); +void hid_debug_exit(void); #else -#define hid_dump_input(a,b) do { } while (0) -#define hid_dump_device(c) do { } while (0) -#define hid_dump_field(a,b) do { } while (0) -#define hid_resolv_usage(a) do { } while (0) -#define hid_resolv_event(a,b) do { } while (0) - -#endif /* CONFIG_HID_DEBUG */ +#define hid_dump_input(a,b) do { } while (0) +#define hid_dump_device(c) do { } while (0) +#define hid_dump_field(a,b) do { } while (0) +#define hid_resolv_usage(a) do { } while (0) +#define hid_resolv_event(a,b) do { } while (0) +#define hid_debug_register(a, b) do { } while (0) +#define hid_debug_unregister(a) do { } while (0) +#define hid_debug_init() do { } while (0) +#define hid_debug_exit() do { } while (0) +#endif #endif diff --git a/include/linux/hid.h b/include/linux/hid.h index a72876e43589..da09ab140ef1 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -451,6 +451,10 @@ struct hid_device { /* device report descriptor */ char phys[64]; /* Device physical location */ char uniq[64]; /* Device unique identifier (serial #) */ + /* debugfs */ + struct dentry *debug_dir; + struct dentry *debug_rdesc; + void *driver_data; /* temporary hid_ff handling (until moved to the drivers) */ -- cgit v1.2.3 From cd667ce24796700e1a0e6e7528efc61c96ff832e Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Fri, 12 Jun 2009 15:20:57 +0200 Subject: HID: use debugfs for events/reports dumping This is a followup patch to the one implemeting rdesc representation in debugfs rather than being dependent on compile-time CONFIG_HID_DEBUG setting. The API of the appropriate formatting functions is slightly modified -- if they are passed seq_file pointer, the one-shot output for 'rdesc' file mode is used, and therefore the message is formatted into the corresponding seq_file immediately. Otherwise the called function allocated a new buffer, formats the text into the buffer and returns the pointer to it, so that it can be queued into the ring-buffer of the processess blocked waiting on input on 'events' file in debugfs. 'debug' parameter to the 'hid' module is now used solely for the prupose of inetrnal driver state debugging (parser, transport, etc). Signed-off-by: Jiri Kosina --- drivers/hid/hid-core.c | 42 ++++++-- drivers/hid/hid-debug.c | 239 ++++++++++++++++++++++++++++++++++++++++++---- include/linux/hid-debug.h | 18 +++- include/linux/hid.h | 26 ++--- 4 files changed, 276 insertions(+), 49 deletions(-) (limited to 'include/linux') diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index d4317db85b54..449bd747d116 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -46,7 +46,7 @@ int hid_debug = 0; module_param_named(debug, hid_debug, int, 0600); -MODULE_PARM_DESC(debug, "HID debugging (0=off, 1=probing info, 2=continuous data dumping)"); +MODULE_PARM_DESC(debug, "toggle HID debugging messages"); EXPORT_SYMBOL_GPL(hid_debug); /* @@ -859,7 +859,7 @@ static void hid_process_event(struct hid_device *hid, struct hid_field *field, struct hid_driver *hdrv = hid->driver; int ret; - hid_dump_input(usage, value); + hid_dump_input(hid, usage, value); if (hdrv && hdrv->event && hid_match_usage(hid, usage)) { ret = hdrv->event(hid, field, usage, value); @@ -981,7 +981,7 @@ int hid_set_field(struct hid_field *field, unsigned offset, __s32 value) { unsigned size = field->report_size; - hid_dump_input(field->usage + offset, value); + hid_dump_input(field->report->device, field->usage + offset, value); if (offset >= field->report_count) { dbg_hid("offset (%d) exceeds report_count (%d)\n", offset, field->report_count); @@ -1075,6 +1075,7 @@ int hid_input_report(struct hid_device *hid, int type, u8 *data, int size, int i struct hid_report_enum *report_enum = hid->report_enum + type; struct hid_driver *hdrv = hid->driver; struct hid_report *report; + char *buf; unsigned int i; int ret; @@ -1086,18 +1087,36 @@ int hid_input_report(struct hid_device *hid, int type, u8 *data, int size, int i return -1; } - dbg_hid("report (size %u) (%snumbered)\n", size, report_enum->numbered ? "" : "un"); + buf = kmalloc(sizeof(char) * HID_DEBUG_BUFSIZE, + interrupt ? GFP_ATOMIC : GFP_KERNEL); + + if (!buf) { + report = hid_get_report(report_enum, data); + goto nomem; + } + + snprintf(buf, HID_DEBUG_BUFSIZE - 1, + "\nreport (size %u) (%snumbered)\n", size, report_enum->numbered ? "" : "un"); + hid_debug_event(hid, buf); report = hid_get_report(report_enum, data); if (!report) return -1; /* dump the report */ - dbg_hid("report %d (size %u) = ", report->id, size); - for (i = 0; i < size; i++) - dbg_hid_line(" %02x", data[i]); - dbg_hid_line("\n"); + snprintf(buf, HID_DEBUG_BUFSIZE - 1, + "report %d (size %u) = ", report->id, size); + hid_debug_event(hid, buf); + for (i = 0; i < size; i++) { + snprintf(buf, HID_DEBUG_BUFSIZE - 1, + " %02x", data[i]); + hid_debug_event(hid, buf); + } + hid_debug_event(hid, "\n"); + + kfree(buf); +nomem: if (hdrv && hdrv->raw_event && hid_match_report(hid, report)) { ret = hdrv->raw_event(hid, report, data, size); if (ret != 0) @@ -1756,6 +1775,9 @@ struct hid_device *hid_allocate_device(void) for (i = 0; i < HID_REPORT_TYPES; i++) INIT_LIST_HEAD(&hdev->report_enum[i].report_list); + init_waitqueue_head(&hdev->debug_wait); + INIT_LIST_HEAD(&hdev->debug_list); + return hdev; err: put_device(&hdev->dev); @@ -1844,8 +1866,8 @@ static int __init hid_init(void) int ret; if (hid_debug) - printk(KERN_WARNING "HID: hid_debug parameter has been deprecated. " - "Debugging data are now provided via debugfs\n"); + printk(KERN_WARNING "HID: hid_debug is now used solely for parser and driver debugging.\n" + "HID: debugfs is now used for inspecting the device (report descriptor, reports)\n"); ret = bus_register(&hid_bus_type); if (ret) { diff --git a/drivers/hid/hid-debug.c b/drivers/hid/hid-debug.c index 067e173aa3e4..a331a1821e85 100644 --- a/drivers/hid/hid-debug.c +++ b/drivers/hid/hid-debug.c @@ -28,6 +28,10 @@ #include #include +#include +#include +#include + #include #include @@ -335,49 +339,86 @@ static const struct hid_usage_entry hid_usage_table[] = { { 0, 0, NULL } }; -static void resolv_usage_page(unsigned page, struct seq_file *f) { +/* Either output directly into simple seq_file, or (if f == NULL) + * allocate a separate buffer that will then be passed to the 'events' + * ringbuffer. + * + * This is because these functions can be called both for "one-shot" + * "rdesc" while resolving, or for blocking "events". + * + * This holds both for resolv_usage_page() and hid_resolv_usage(). + */ +static char *resolv_usage_page(unsigned page, struct seq_file *f) { const struct hid_usage_entry *p; + char *buf = NULL; + + if (!f) { + buf = kzalloc(sizeof(char) * HID_DEBUG_BUFSIZE, GFP_ATOMIC); + if (!buf) + return ERR_PTR(-ENOMEM); + } for (p = hid_usage_table; p->description; p++) if (p->page == page) { - if (!f) - printk("%s", p->description); - else + if (!f) { + snprintf(buf, HID_DEBUG_BUFSIZE, "%s", + p->description); + return buf; + } + else { seq_printf(f, "%s", p->description); - return; + return NULL; + } } if (!f) - printk("%04x", page); + snprintf(buf, HID_DEBUG_BUFSIZE, "%04x", page); else seq_printf(f, "%04x", page); + return buf; } -void hid_resolv_usage(unsigned usage, struct seq_file *f) { +char *hid_resolv_usage(unsigned usage, struct seq_file *f) { const struct hid_usage_entry *p; + char *buf = NULL; + int len = 0; - resolv_usage_page(usage >> 16, f); - if (!f) - printk("."); - else + buf = resolv_usage_page(usage >> 16, f); + if (IS_ERR(buf)) { + printk(KERN_ERR "error allocating HID debug buffer\n"); + return NULL; + } + + + if (!f) { + len = strlen(buf); + snprintf(buf+len, max(0, HID_DEBUG_BUFSIZE - len), "."); + len++; + } + else { seq_printf(f, "."); + } for (p = hid_usage_table; p->description; p++) if (p->page == (usage >> 16)) { for(++p; p->description && p->usage != 0; p++) if (p->usage == (usage & 0xffff)) { if (!f) - printk("%s", p->description); + snprintf(buf + len, + max(0,HID_DEBUG_BUFSIZE - len - 1), + "%s", p->description); else seq_printf(f, "%s", p->description); - return; + return buf; } break; } if (!f) - printk("%04x", usage & 0xffff); + snprintf(buf + len, max(0, HID_DEBUG_BUFSIZE - len - 1), + "%04x", usage & 0xffff); else seq_printf(f, "%04x", usage & 0xffff); + return buf; } EXPORT_SYMBOL_GPL(hid_resolv_usage); @@ -508,13 +549,37 @@ void hid_dump_device(struct hid_device *device, struct seq_file *f) } EXPORT_SYMBOL_GPL(hid_dump_device); -void hid_dump_input(struct hid_usage *usage, __s32 value) { - if (hid_debug < 2) +/* enqueue string to 'events' ring buffer */ +void hid_debug_event(struct hid_device *hdev, char *buf) +{ + int i; + struct hid_debug_list *list; + + list_for_each_entry(list, &hdev->debug_list, node) { + for (i = 0; i <= strlen(buf); i++) + list->hid_debug_buf[(list->tail + i) % (HID_DEBUG_BUFSIZE - 1)] = + buf[i]; + list->tail = (list->tail + i) % (HID_DEBUG_BUFSIZE - 1); + } +} +EXPORT_SYMBOL_GPL(hid_debug_event); + +void hid_dump_input(struct hid_device *hdev, struct hid_usage *usage, __s32 value) +{ + char *buf; + int len; + + buf = hid_resolv_usage(usage->hid, NULL); + if (!buf) return; + len = strlen(buf); + snprintf(buf + len, HID_DEBUG_BUFSIZE - len - 1, " = %d\n", value); + + hid_debug_event(hdev, buf); + + kfree(buf); + wake_up_interruptible(&hdev->debug_wait); - printk(KERN_DEBUG "hid-debug: input "); - hid_resolv_usage(usage->hid, NULL); - printk(" = %d\n", value); } EXPORT_SYMBOL_GPL(hid_dump_input); @@ -808,6 +873,7 @@ void hid_dump_input_mapping(struct hid_device *hid, struct seq_file *f) } + static int hid_debug_rdesc_show(struct seq_file *f, void *p) { struct hid_device *hdev = f->private; @@ -831,6 +897,126 @@ static int hid_debug_rdesc_open(struct inode *inode, struct file *file) return single_open(file, hid_debug_rdesc_show, inode->i_private); } +static int hid_debug_events_open(struct inode *inode, struct file *file) +{ + int err = 0; + struct hid_debug_list *list; + + if (!(list = kzalloc(sizeof(struct hid_debug_list), GFP_KERNEL))) { + err = -ENOMEM; + goto out; + } + + if (!(list->hid_debug_buf = kzalloc(sizeof(char) * HID_DEBUG_BUFSIZE, GFP_KERNEL))) { + err = -ENOMEM; + goto out; + } + list->hdev = (struct hid_device *) inode->i_private; + file->private_data = list; + mutex_init(&list->read_mutex); + + list_add_tail(&list->node, &list->hdev->debug_list); + +out: + return err; +} + +static ssize_t hid_debug_events_read(struct file *file, char __user *buffer, + size_t count, loff_t *ppos) +{ + struct hid_debug_list *list = file->private_data; + int ret = 0, len; + DECLARE_WAITQUEUE(wait, current); + + while (ret == 0) { + mutex_lock(&list->read_mutex); + if (list->head == list->tail) { + add_wait_queue(&list->hdev->debug_wait, &wait); + set_current_state(TASK_INTERRUPTIBLE); + + while (list->head == list->tail) { + if (file->f_flags & O_NONBLOCK) { + ret = -EAGAIN; + break; + } + if (signal_pending(current)) { + ret = -ERESTARTSYS; + break; + } + + if (!list->hdev || !list->hdev->debug) { + ret = -EIO; + break; + } + + /* allow O_NONBLOCK from other threads */ + mutex_unlock(&list->read_mutex); + schedule(); + mutex_lock(&list->read_mutex); + set_current_state(TASK_INTERRUPTIBLE); + } + + set_current_state(TASK_RUNNING); + remove_wait_queue(&list->hdev->debug_wait, &wait); + } + + if (ret) + goto out; + + /* pass the ringbuffer contents to userspace */ +copy_rest: + if (list->tail == list->head) + goto out; + if (list->tail > list->head) { + len = list->tail - list->head; + + if (copy_to_user(buffer + ret, &list->hid_debug_buf[list->head], len)) { + ret = -EFAULT; + goto out; + } + ret += len; + list->head += len; + } else { + len = HID_DEBUG_BUFSIZE - list->head; + + if (copy_to_user(buffer, &list->hid_debug_buf[list->head], len)) { + ret = -EFAULT; + goto out; + } + list->head = 0; + ret += len; + goto copy_rest; + } + + } +out: + mutex_unlock(&list->read_mutex); + return ret; +} + +static unsigned int hid_debug_events_poll(struct file *file, poll_table *wait) +{ + struct hid_debug_list *list = file->private_data; + + poll_wait(file, &list->hdev->debug_wait, wait); + if (list->head != list->tail) + return POLLIN | POLLRDNORM; + if (!list->hdev->debug) + return POLLERR | POLLHUP; + return 0; +} + +static int hid_debug_events_release(struct inode *inode, struct file *file) +{ + struct hid_debug_list *list = file->private_data; + + list_del(&list->node); + kfree(list->hid_debug_buf); + kfree(list); + + return 0; +} + static const struct file_operations hid_debug_rdesc_fops = { .open = hid_debug_rdesc_open, .read = seq_read, @@ -838,16 +1024,31 @@ static const struct file_operations hid_debug_rdesc_fops = { .release = single_release, }; +static const struct file_operations hid_debug_events_fops = { + .owner = THIS_MODULE, + .open = hid_debug_events_open, + .read = hid_debug_events_read, + .poll = hid_debug_events_poll, + .release = hid_debug_events_release, +}; + + void hid_debug_register(struct hid_device *hdev, const char *name) { hdev->debug_dir = debugfs_create_dir(name, hid_debug_root); hdev->debug_rdesc = debugfs_create_file("rdesc", 0400, hdev->debug_dir, hdev, &hid_debug_rdesc_fops); + hdev->debug_events = debugfs_create_file("events", 0400, + hdev->debug_dir, hdev, &hid_debug_events_fops); + hdev->debug = 1; } void hid_debug_unregister(struct hid_device *hdev) { + hdev->debug = 0; + wake_up_interruptible(&hdev->debug_wait); debugfs_remove(hdev->debug_rdesc); + debugfs_remove(hdev->debug_events); debugfs_remove(hdev->debug_dir); } diff --git a/include/linux/hid-debug.h b/include/linux/hid-debug.h index 516e12c33235..ec08ac1ad687 100644 --- a/include/linux/hid-debug.h +++ b/include/linux/hid-debug.h @@ -24,14 +24,27 @@ #ifdef CONFIG_DEBUG_FS -void hid_dump_input(struct hid_usage *, __s32); +void hid_dump_input(struct hid_device *, struct hid_usage *, __s32); void hid_dump_device(struct hid_device *, struct seq_file *); void hid_dump_field(struct hid_field *, int, struct seq_file *); -void hid_resolv_usage(unsigned, struct seq_file *); +char *hid_resolv_usage(unsigned, struct seq_file *); void hid_debug_register(struct hid_device *, const char *); void hid_debug_unregister(struct hid_device *); void hid_debug_init(void); void hid_debug_exit(void); +void hid_debug_event(struct hid_device *, char *); + +#define HID_DEBUG_BUFSIZE 512 + +struct hid_debug_list { + char *hid_debug_buf; + int head; + int tail; + struct fasync_struct *fasync; + struct hid_device *hdev; + struct list_head node; + struct mutex read_mutex; +}; #else @@ -44,6 +57,7 @@ void hid_debug_exit(void); #define hid_debug_unregister(a) do { } while (0) #define hid_debug_init() do { } while (0) #define hid_debug_exit() do { } while (0) +#define hid_debug_event(a,b) do { } while (0) #endif diff --git a/include/linux/hid.h b/include/linux/hid.h index da09ab140ef1..60fa52913f89 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -451,10 +451,6 @@ struct hid_device { /* device report descriptor */ char phys[64]; /* Device physical location */ char uniq[64]; /* Device unique identifier (serial #) */ - /* debugfs */ - struct dentry *debug_dir; - struct dentry *debug_rdesc; - void *driver_data; /* temporary hid_ff handling (until moved to the drivers) */ @@ -468,6 +464,14 @@ struct hid_device { /* device report descriptor */ /* handler for raw output data, used by hidraw */ int (*hid_output_raw_report) (struct hid_device *, __u8 *, size_t); + + /* debugging support via debugfs */ + unsigned short debug; + struct dentry *debug_dir; + struct dentry *debug_rdesc; + struct dentry *debug_events; + struct list_head debug_list; + wait_queue_head_t debug_wait; }; static inline void *hid_get_drvdata(struct hid_device *hdev) @@ -625,9 +629,7 @@ struct hid_ll_driver { /* HID core API */ -#ifdef CONFIG_HID_DEBUG extern int hid_debug; -#endif extern int hid_add_device(struct hid_device *); extern void hid_destroy_device(struct hid_device *); @@ -783,21 +785,9 @@ int hid_pidff_init(struct hid_device *hid); #define hid_pidff_init NULL #endif -#ifdef CONFIG_HID_DEBUG #define dbg_hid(format, arg...) if (hid_debug) \ printk(KERN_DEBUG "%s: " format ,\ __FILE__ , ## arg) -#define dbg_hid_line(format, arg...) if (hid_debug) \ - printk(format, ## arg) -#else -static inline int __attribute__((format(printf, 1, 2))) -dbg_hid(const char *fmt, ...) -{ - return 0; -} -#define dbg_hid_line dbg_hid -#endif /* HID_DEBUG */ - #define err_hid(format, arg...) printk(KERN_ERR "%s: " format "\n" , \ __FILE__ , ## arg) #endif /* HID_FF */ -- cgit v1.2.3 From ca94c442535a44d508c99a77e54f21a59f4fc462 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Mon, 15 Jun 2009 17:17:47 +0200 Subject: sched: Introduce SCHED_RESET_ON_FORK scheduling policy flag This patch introduces a new flag SCHED_RESET_ON_FORK which can be passed to the kernel via sched_setscheduler(), ORed in the policy parameter. If set this will make sure that when the process forks a) the scheduling priority is reset to DEFAULT_PRIO if it was higher and b) the scheduling policy is reset to SCHED_NORMAL if it was either SCHED_FIFO or SCHED_RR. Why have this? Currently, if a process is real-time scheduled this will 'leak' to all its child processes. For security reasons it is often (always?) a good idea to make sure that if a process acquires RT scheduling this is confined to this process and only this process. More specifically this makes the per-process resource limit RLIMIT_RTTIME useful for security purposes, because it makes it impossible to use a fork bomb to circumvent the per-process RLIMIT_RTTIME accounting. This feature is also useful for tools like 'renice' which can then change the nice level of a process without having this spill to all its child processes. Why expose this via sched_setscheduler() and not other syscalls such as prctl() or sched_setparam()? prctl() does not take a pid parameter. Due to that it would be impossible to modify this flag for other processes than the current one. The struct passed to sched_setparam() can unfortunately not be extended without breaking compatibility, since sched_setparam() lacks a size parameter. How to use this from userspace? In your RT program simply replace this: sched_setscheduler(pid, SCHED_FIFO, ¶m); by this: sched_setscheduler(pid, SCHED_FIFO|SCHED_RESET_ON_FORK, ¶m); Signed-off-by: Lennart Poettering Acked-by: Peter Zijlstra LKML-Reference: <20090615152714.GA29092@tango.0pointer.de> Signed-off-by: Ingo Molnar --- include/linux/sched.h | 6 ++++++ kernel/sched.c | 49 ++++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 46 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 4896fdfec913..d4a2c6662f7d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -38,6 +38,8 @@ #define SCHED_BATCH 3 /* SCHED_ISO: reserved but not implemented yet */ #define SCHED_IDLE 5 +/* Can be ORed in to make sure the process is reverted back to SCHED_NORMAL on fork */ +#define SCHED_RESET_ON_FORK 0x40000000 #ifdef __KERNEL__ @@ -1209,6 +1211,10 @@ struct task_struct { unsigned did_exec:1; unsigned in_execve:1; /* Tell the LSMs that the process is doing an * execve */ + + /* Revert to default priority/policy when forking */ + unsigned sched_reset_on_fork:1; + pid_t pid; pid_t tgid; diff --git a/kernel/sched.c b/kernel/sched.c index 8ec9d13140be..32e6ede85255 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2613,12 +2613,28 @@ void sched_fork(struct task_struct *p, int clone_flags) set_task_cpu(p, cpu); /* - * Make sure we do not leak PI boosting priority to the child: + * Revert to default priority/policy on fork if requested. Make sure we + * do not leak PI boosting priority to the child. */ - p->prio = current->normal_prio; + if (current->sched_reset_on_fork && + (p->policy == SCHED_FIFO || p->policy == SCHED_RR)) + p->policy = SCHED_NORMAL; + + if (current->sched_reset_on_fork && + (current->normal_prio < DEFAULT_PRIO)) + p->prio = DEFAULT_PRIO; + else + p->prio = current->normal_prio; + if (!rt_prio(p->prio)) p->sched_class = &fair_sched_class; + /* + * We don't need the reset flag anymore after the fork. It has + * fulfilled its duty: + */ + p->sched_reset_on_fork = 0; + #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) if (likely(sched_info_on())) memset(&p->sched_info, 0, sizeof(p->sched_info)); @@ -6094,17 +6110,25 @@ static int __sched_setscheduler(struct task_struct *p, int policy, unsigned long flags; const struct sched_class *prev_class = p->sched_class; struct rq *rq; + int reset_on_fork; /* may grab non-irq protected spin_locks */ BUG_ON(in_interrupt()); recheck: /* double check policy once rq lock held */ - if (policy < 0) + if (policy < 0) { + reset_on_fork = p->sched_reset_on_fork; policy = oldpolicy = p->policy; - else if (policy != SCHED_FIFO && policy != SCHED_RR && - policy != SCHED_NORMAL && policy != SCHED_BATCH && - policy != SCHED_IDLE) - return -EINVAL; + } else { + reset_on_fork = !!(policy & SCHED_RESET_ON_FORK); + policy &= ~SCHED_RESET_ON_FORK; + + if (policy != SCHED_FIFO && policy != SCHED_RR && + policy != SCHED_NORMAL && policy != SCHED_BATCH && + policy != SCHED_IDLE) + return -EINVAL; + } + /* * Valid priorities for SCHED_FIFO and SCHED_RR are * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL, @@ -6148,6 +6172,10 @@ recheck: /* can't change other user's priorities */ if (!check_same_owner(p)) return -EPERM; + + /* Normal users shall not reset the sched_reset_on_fork flag */ + if (p->sched_reset_on_fork && !reset_on_fork) + return -EPERM; } if (user) { @@ -6191,6 +6219,8 @@ recheck: if (running) p->sched_class->put_prev_task(rq, p); + p->sched_reset_on_fork = reset_on_fork; + oldprio = p->prio; __setscheduler(rq, p, policy, param->sched_priority); @@ -6307,14 +6337,15 @@ SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid) if (p) { retval = security_task_getscheduler(p); if (!retval) - retval = p->policy; + retval = p->policy + | (p->sched_reset_on_fork ? SCHED_RESET_ON_FORK : 0); } read_unlock(&tasklist_lock); return retval; } /** - * sys_sched_getscheduler - get the RT priority of a thread + * sys_sched_getparam - get the RT priority of a thread * @pid: the pid in question. * @param: structure containing the RT priority. */ -- cgit v1.2.3 From 5b739ef8a4e8cf5201d21abff897e292c232477b Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Thu, 18 Jun 2009 19:50:21 +0800 Subject: random: Add optional continuous repetition test to entropy store based rngs FIPS-140 requires that all random number generators implement continuous self tests in which each extracted block of data is compared against the last block for repetition. The ansi_cprng implements such a test, but it would be nice if the hw rng's did the same thing. Obviously its not something thats always needed, but it seems like it would be a nice feature to have on occasion. I've written the below patch which allows individual entropy stores to be flagged as desiring a continuous test to be run on them as is extracted. By default this option is off, but is enabled in the event that fips mode is selected during bootup. Signed-off-by: Neil Horman Acked-by: Matt Mackall Signed-off-by: Herbert Xu --- crypto/internal.h | 7 +------ drivers/char/random.c | 14 ++++++++++++++ include/linux/fips.h | 10 ++++++++++ 3 files changed, 25 insertions(+), 6 deletions(-) create mode 100644 include/linux/fips.h (limited to 'include/linux') diff --git a/crypto/internal.h b/crypto/internal.h index 113579a82dff..95baaea21fbc 100644 --- a/crypto/internal.h +++ b/crypto/internal.h @@ -25,12 +25,7 @@ #include #include #include - -#ifdef CONFIG_CRYPTO_FIPS -extern int fips_enabled; -#else -#define fips_enabled 0 -#endif +#include /* Crypto notification events. */ enum { diff --git a/drivers/char/random.c b/drivers/char/random.c index 8c7444857a4b..d8a9255e1a3f 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -240,6 +240,7 @@ #include #include #include +#include #ifdef CONFIG_GENERIC_HARDIRQS # include @@ -413,6 +414,7 @@ struct entropy_store { unsigned add_ptr; int entropy_count; int input_rotate; + __u8 *last_data; }; static __u32 input_pool_data[INPUT_POOL_WORDS]; @@ -852,12 +854,21 @@ static ssize_t extract_entropy(struct entropy_store *r, void *buf, { ssize_t ret = 0, i; __u8 tmp[EXTRACT_SIZE]; + unsigned long flags; xfer_secondary_pool(r, nbytes); nbytes = account(r, nbytes, min, reserved); while (nbytes) { extract_buf(r, tmp); + + if (r->last_data) { + spin_lock_irqsave(&r->lock, flags); + if (!memcmp(tmp, r->last_data, EXTRACT_SIZE)) + panic("Hardware RNG duplicated output!\n"); + memcpy(r->last_data, tmp, EXTRACT_SIZE); + spin_unlock_irqrestore(&r->lock, flags); + } i = min_t(int, nbytes, EXTRACT_SIZE); memcpy(buf, tmp, i); nbytes -= i; @@ -940,6 +951,9 @@ static void init_std_data(struct entropy_store *r) now = ktime_get_real(); mix_pool_bytes(r, &now, sizeof(now)); mix_pool_bytes(r, utsname(), sizeof(*(utsname()))); + /* Enable continuous test in fips mode */ + if (fips_enabled) + r->last_data = kmalloc(EXTRACT_SIZE, GFP_KERNEL); } static int rand_initialize(void) diff --git a/include/linux/fips.h b/include/linux/fips.h new file mode 100644 index 000000000000..f8fb07b0b6b8 --- /dev/null +++ b/include/linux/fips.h @@ -0,0 +1,10 @@ +#ifndef _FIPS_H +#define _FIPS_H + +#ifdef CONFIG_CRYPTO_FIPS +extern int fips_enabled; +#else +#define fips_enabled 0 +#endif + +#endif -- cgit v1.2.3 From e74e396204bfcb67570ba4517b08f5918e69afea Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 30 Mar 2009 19:07:44 +0900 Subject: percpu: use dynamic percpu allocator as the default percpu allocator This patch makes most !CONFIG_HAVE_SETUP_PER_CPU_AREA archs use dynamic percpu allocator. The first chunk is allocated using embedding helper and 8k is reserved for modules. This ensures that the new allocator behaves almost identically to the original allocator as long as static percpu variables are concerned, so it shouldn't introduce much breakage. s390 and alpha use custom SHIFT_PERCPU_PTR() to work around addressing range limit the addressing model imposes. Unfortunately, this breaks if the address is specified using a variable, so for now, the two archs aren't converted. The following architectures are affected by this change. * sh * arm * cris * mips * sparc(32) * blackfin * avr32 * parisc (broken, under investigation) * m32r * powerpc(32) As this change makes the dynamic allocator the default one, CONFIG_HAVE_DYNAMIC_PER_CPU_AREA is replaced with its invert - CONFIG_HAVE_LEGACY_PER_CPU_AREA, which is added to yet-to-be converted archs. These archs implement their own setup_per_cpu_areas() and the conversion is not trivial. * powerpc(64) * sparc(64) * ia64 * alpha * s390 Boot and batch alloc/free tests on x86_32 with debug code (x86_32 doesn't use default first chunk initialization). Compile tested on sparc(32), powerpc(32), arm and alpha. Kyle McMartin reported that this change breaks parisc. The problem is still under investigation and he is okay with pushing this patch forward and fixing parisc later. [ Impact: use dynamic allocator for most archs w/o custom percpu setup ] Signed-off-by: Tejun Heo Acked-by: Rusty Russell Acked-by: David S. Miller Acked-by: Benjamin Herrenschmidt Acked-by: Martin Schwidefsky Reviewed-by: Christoph Lameter Cc: Paul Mundt Cc: Russell King Cc: Mikael Starvik Cc: Ralf Baechle Cc: Bryan Wu Cc: Kyle McMartin Cc: Matthew Wilcox Cc: Grant Grundler Cc: Hirokazu Takata Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Heiko Carstens Cc: Ingo Molnar --- arch/alpha/Kconfig | 3 +++ arch/ia64/Kconfig | 3 +++ arch/powerpc/Kconfig | 3 +++ arch/s390/Kconfig | 3 +++ arch/sparc/Kconfig | 3 +++ arch/x86/Kconfig | 3 --- include/linux/percpu.h | 12 +++++++++--- init/main.c | 24 ------------------------ kernel/module.c | 6 +++--- mm/Makefile | 2 +- mm/allocpercpu.c | 28 ++++++++++++++++++++++++++++ mm/percpu.c | 40 +++++++++++++++++++++++++++++++++++++++- 12 files changed, 95 insertions(+), 35 deletions(-) (limited to 'include/linux') diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index 9fb8aae5c391..05d86407188c 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig @@ -70,6 +70,9 @@ config AUTO_IRQ_AFFINITY depends on SMP default y +config HAVE_LEGACY_PER_CPU_AREA + def_bool y + source "init/Kconfig" source "kernel/Kconfig.freezer" diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 170042b420d4..328d2f8b8c3f 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -89,6 +89,9 @@ config GENERIC_TIME_VSYSCALL bool default y +config HAVE_LEGACY_PER_CPU_AREA + def_bool y + config HAVE_SETUP_PER_CPU_AREA def_bool y diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index bf6cedfa05db..a774c2acbe69 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -46,6 +46,9 @@ config GENERIC_HARDIRQS_NO__DO_IRQ bool default y +config HAVE_LEGACY_PER_CPU_AREA + def_bool PPC64 + config HAVE_SETUP_PER_CPU_AREA def_bool PPC64 diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index a14dba0e4d67..f4a3cc62d28f 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -75,6 +75,9 @@ config VIRT_CPU_ACCOUNTING config ARCH_SUPPORTS_DEBUG_PAGEALLOC def_bool y +config HAVE_LEGACY_PER_CPU_AREA + def_bool y + mainmenu "Linux Kernel Configuration" config S390 diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 3f8b6a92eabd..7a8698b913fe 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -92,6 +92,9 @@ config AUDIT_ARCH bool default y +config HAVE_LEGACY_PER_CPU_AREA + def_bool y if SPARC64 + config HAVE_SETUP_PER_CPU_AREA def_bool y if SPARC64 diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index d1430ef6b4f9..a48a90076d83 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -149,9 +149,6 @@ config ARCH_HAS_CACHE_LINE_SIZE config HAVE_SETUP_PER_CPU_AREA def_bool y -config HAVE_DYNAMIC_PER_CPU_AREA - def_bool y - config HAVE_CPUMASK_OF_CPU_MAP def_bool X86_64_SMP diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 26fd9d12f050..e5000343dd61 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -34,7 +34,7 @@ #ifdef CONFIG_SMP -#ifdef CONFIG_HAVE_DYNAMIC_PER_CPU_AREA +#ifndef CONFIG_HAVE_LEGACY_PER_CPU_AREA /* minimum unit size, also is the maximum supported allocation size */ #define PCPU_MIN_UNIT_SIZE PFN_ALIGN(64 << 10) @@ -80,7 +80,7 @@ extern ssize_t __init pcpu_embed_first_chunk( extern void *__alloc_reserved_percpu(size_t size, size_t align); -#else /* CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */ +#else /* CONFIG_HAVE_LEGACY_PER_CPU_AREA */ struct percpu_data { void *ptrs[1]; @@ -99,11 +99,15 @@ struct percpu_data { (__typeof__(ptr))__p->ptrs[(cpu)]; \ }) -#endif /* CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */ +#endif /* CONFIG_HAVE_LEGACY_PER_CPU_AREA */ extern void *__alloc_percpu(size_t size, size_t align); extern void free_percpu(void *__pdata); +#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA +extern void __init setup_per_cpu_areas(void); +#endif + #else /* CONFIG_SMP */ #define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); }) @@ -124,6 +128,8 @@ static inline void free_percpu(void *p) kfree(p); } +static inline void __init setup_per_cpu_areas(void) { } + #endif /* CONFIG_SMP */ #define alloc_percpu(type) (type *)__alloc_percpu(sizeof(type), \ diff --git a/init/main.c b/init/main.c index 09131ec090c1..602d724afa5c 100644 --- a/init/main.c +++ b/init/main.c @@ -357,7 +357,6 @@ static void __init smp_init(void) #define smp_init() do { } while (0) #endif -static inline void setup_per_cpu_areas(void) { } static inline void setup_nr_cpu_ids(void) { } static inline void smp_prepare_cpus(unsigned int maxcpus) { } @@ -378,29 +377,6 @@ static void __init setup_nr_cpu_ids(void) nr_cpu_ids = find_last_bit(cpumask_bits(cpu_possible_mask),NR_CPUS) + 1; } -#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA -unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; - -EXPORT_SYMBOL(__per_cpu_offset); - -static void __init setup_per_cpu_areas(void) -{ - unsigned long size, i; - char *ptr; - unsigned long nr_possible_cpus = num_possible_cpus(); - - /* Copy section for each CPU (we discard the original) */ - size = ALIGN(PERCPU_ENOUGH_ROOM, PAGE_SIZE); - ptr = alloc_bootmem_pages(size * nr_possible_cpus); - - for_each_possible_cpu(i) { - __per_cpu_offset[i] = ptr - __per_cpu_start; - memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); - ptr += size; - } -} -#endif /* CONFIG_HAVE_SETUP_PER_CPU_AREA */ - /* Called by boot processor to activate the rest. */ static void __init smp_init(void) { diff --git a/kernel/module.c b/kernel/module.c index 38928fcaff2b..f5934954fa99 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -364,7 +364,7 @@ EXPORT_SYMBOL_GPL(find_module); #ifdef CONFIG_SMP -#ifdef CONFIG_HAVE_DYNAMIC_PER_CPU_AREA +#ifndef CONFIG_HAVE_LEGACY_PER_CPU_AREA static void *percpu_modalloc(unsigned long size, unsigned long align, const char *name) @@ -389,7 +389,7 @@ static void percpu_modfree(void *freeme) free_percpu(freeme); } -#else /* ... !CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */ +#else /* ... CONFIG_HAVE_LEGACY_PER_CPU_AREA */ /* Number of blocks used and allocated. */ static unsigned int pcpu_num_used, pcpu_num_allocated; @@ -535,7 +535,7 @@ static int percpu_modinit(void) } __initcall(percpu_modinit); -#endif /* CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */ +#endif /* CONFIG_HAVE_LEGACY_PER_CPU_AREA */ static unsigned int find_pcpusec(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, diff --git a/mm/Makefile b/mm/Makefile index 5e0bd6426693..c77c6487552f 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -33,7 +33,7 @@ obj-$(CONFIG_FAILSLAB) += failslab.o obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o obj-$(CONFIG_FS_XIP) += filemap_xip.o obj-$(CONFIG_MIGRATION) += migrate.o -ifdef CONFIG_HAVE_DYNAMIC_PER_CPU_AREA +ifndef CONFIG_HAVE_LEGACY_PER_CPU_AREA obj-$(CONFIG_SMP) += percpu.o else obj-$(CONFIG_SMP) += allocpercpu.o diff --git a/mm/allocpercpu.c b/mm/allocpercpu.c index dfdee6a47359..df34ceae0c67 100644 --- a/mm/allocpercpu.c +++ b/mm/allocpercpu.c @@ -5,6 +5,8 @@ */ #include #include +#include +#include #ifndef cache_line_size #define cache_line_size() L1_CACHE_BYTES @@ -147,3 +149,29 @@ void free_percpu(void *__pdata) kfree(__percpu_disguise(__pdata)); } EXPORT_SYMBOL_GPL(free_percpu); + +/* + * Generic percpu area setup. + */ +#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA +unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; + +EXPORT_SYMBOL(__per_cpu_offset); + +void __init setup_per_cpu_areas(void) +{ + unsigned long size, i; + char *ptr; + unsigned long nr_possible_cpus = num_possible_cpus(); + + /* Copy section for each CPU (we discard the original) */ + size = ALIGN(PERCPU_ENOUGH_ROOM, PAGE_SIZE); + ptr = alloc_bootmem_pages(size * nr_possible_cpus); + + for_each_possible_cpu(i) { + __per_cpu_offset[i] = ptr - __per_cpu_start; + memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); + ptr += size; + } +} +#endif /* CONFIG_HAVE_SETUP_PER_CPU_AREA */ diff --git a/mm/percpu.c b/mm/percpu.c index b70f2acd8853..b14984566f5a 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -43,7 +43,7 @@ * * To use this allocator, arch code should do the followings. * - * - define CONFIG_HAVE_DYNAMIC_PER_CPU_AREA + * - drop CONFIG_HAVE_LEGACY_PER_CPU_AREA * * - define __addr_to_pcpu_ptr() and __pcpu_ptr_to_addr() to translate * regular address to percpu pointer and back if they need to be @@ -1275,3 +1275,41 @@ ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size, reserved_size, dyn_size, pcpue_unit_size, pcpue_ptr, NULL); } + +/* + * Generic percpu area setup. + * + * The embedding helper is used because its behavior closely resembles + * the original non-dynamic generic percpu area setup. This is + * important because many archs have addressing restrictions and might + * fail if the percpu area is located far away from the previous + * location. As an added bonus, in non-NUMA cases, embedding is + * generally a good idea TLB-wise because percpu area can piggy back + * on the physical linear memory mapping which uses large page + * mappings on applicable archs. + */ +#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA +unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; +EXPORT_SYMBOL(__per_cpu_offset); + +void __init setup_per_cpu_areas(void) +{ + size_t static_size = __per_cpu_end - __per_cpu_start; + ssize_t unit_size; + unsigned long delta; + unsigned int cpu; + + /* + * Always reserve area for module percpu variables. That's + * what the legacy allocator did. + */ + unit_size = pcpu_embed_first_chunk(static_size, PERCPU_MODULE_RESERVE, + PERCPU_DYNAMIC_RESERVE, -1); + if (unit_size < 0) + panic("Failed to initialized percpu areas."); + + delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; + for_each_possible_cpu(cpu) + __per_cpu_offset[cpu] = delta + cpu * unit_size; +} +#endif /* CONFIG_HAVE_SETUP_PER_CPU_AREA */ -- cgit v1.2.3 From 7c756e6e19e71f0327760d8955f7077118ebb2b1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 24 Jun 2009 15:13:50 +0900 Subject: percpu: implement optional weak percpu definitions Some archs (alpha and s390) need to use weak definitions for percpu variables in modules so that the compiler generates external references for them. This patch implements weak percpu definitions which arch can enable by defining ARCH_NEEDS_WEAK_PER_CPU in arch percpu header file. This weak definition adds the following two restrictions on percpu variable definitions. 1. percpu symbols must be unique whether static or not 2. percpu variables can't be defined inside a function To ensure that these restrictions are observed in generic code, config option DEBUG_FORCE_WEAK_PER_CPU enables weak percpu definitions for all cases. This patch is inspired by Ivan Kokshaysky's alpha percpu patch. [ Impact: stricter rules for percpu variables, one more debug config option ] Signed-off-by: Tejun Heo Cc: Ingo Molnar Cc: David Howells Cc: Ivan Kokshaysky --- include/linux/percpu-defs.h | 65 ++++++++++++++++++++++++++++++++++++++------- lib/Kconfig.debug | 15 +++++++++++ 2 files changed, 71 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index 8f921d74f49f..cf32838ad0fa 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -10,21 +10,68 @@ /* * Base implementations of per-CPU variable declarations and definitions, where * the section in which the variable is to be placed is provided by the - * 'section' argument. This may be used to affect the parameters governing the + * 'sec' argument. This may be used to affect the parameters governing the * variable's storage. * * NOTE! The sections for the DECLARE and for the DEFINE must match, lest * linkage errors occur due the compiler generating the wrong code to access * that section. */ -#define DECLARE_PER_CPU_SECTION(type, name, section) \ - extern \ - __attribute__((__section__(PER_CPU_BASE_SECTION section))) \ - PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name - -#define DEFINE_PER_CPU_SECTION(type, name, section) \ - __attribute__((__section__(PER_CPU_BASE_SECTION section))) \ - PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name +#define __PCPU_ATTRS(sec) \ + __attribute__((section(PER_CPU_BASE_SECTION sec))) \ + PER_CPU_ATTRIBUTES + +#define __PCPU_DUMMY_ATTRS \ + __attribute__((section(".discard"), unused)) + +/* + * s390 and alpha modules require percpu variables to be defined as + * weak to force the compiler to generate GOT based external + * references for them. This is necessary because percpu sections + * will be located outside of the usually addressable area. + * + * This definition puts the following two extra restrictions when + * defining percpu variables. + * + * 1. The symbol must be globally unique, even the static ones. + * 2. Static percpu variables cannot be defined inside a function. + * + * Archs which need weak percpu definitions should define + * ARCH_NEEDS_WEAK_PER_CPU in asm/percpu.h when necessary. + * + * To ensure that the generic code observes the above two + * restrictions, if CONFIG_DEBUG_FORCE_WEAK_PER_CPU is set weak + * definition is used for all cases. + */ +#if defined(ARCH_NEEDS_WEAK_PER_CPU) || defined(CONFIG_DEBUG_FORCE_WEAK_PER_CPU) +/* + * __pcpu_scope_* dummy variable is used to enforce scope. It + * receives the static modifier when it's used in front of + * DEFINE_PER_CPU() and will trigger build failure if + * DECLARE_PER_CPU() is used for the same variable. + * + * __pcpu_unique_* dummy variable is used to enforce symbol uniqueness + * such that hidden weak symbol collision, which will cause unrelated + * variables to share the same address, can be detected during build. + */ +#define DECLARE_PER_CPU_SECTION(type, name, sec) \ + extern __PCPU_DUMMY_ATTRS char __pcpu_scope_##name; \ + extern __PCPU_ATTRS(sec) __weak __typeof__(type) per_cpu__##name + +#define DEFINE_PER_CPU_SECTION(type, name, sec) \ + __PCPU_DUMMY_ATTRS char __pcpu_scope_##name; \ + __PCPU_DUMMY_ATTRS char __pcpu_unique_##name; \ + __PCPU_ATTRS(sec) __weak __typeof__(type) per_cpu__##name +#else +/* + * Normal declaration and definition macros. + */ +#define DECLARE_PER_CPU_SECTION(type, name, sec) \ + extern __PCPU_ATTRS(sec) __typeof__(type) per_cpu__##name + +#define DEFINE_PER_CPU_SECTION(type, name, sec) \ + __PCPU_ATTRS(sec) __typeof__(type) per_cpu__##name +#endif /* * Variant on the per-CPU variable declaration/definition theme used for diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 23067ab1a73c..77e0d8b1b7c5 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -777,6 +777,21 @@ config DEBUG_BLOCK_EXT_DEVT Say N if you are unsure. +config DEBUG_FORCE_WEAK_PER_CPU + bool "Force weak per-cpu definitions" + depends on DEBUG_KERNEL + help + s390 and alpha require percpu variables in modules to be + defined weak to work around addressing range issue which + puts the following two restrictions on percpu variable + definitions. + + 1. percpu symbols must be unique whether static or not + 2. percpu variables can't be defined inside a function + + To ensure that generic code follows the above rules, this + option forces all percpu variables to be defined as weak. + config LKDTM tristate "Linux Kernel Dump Test Tool Module" depends on DEBUG_KERNEL -- cgit v1.2.3 From c17ef45342cc033fdf7bdd5b28615e0090f8d2e7 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 23 Jun 2009 17:12:47 -0700 Subject: rcu: Remove Classic RCU Remove Classic RCU, given that the combination of Tree RCU and the proposed Bloatwatch RCU do everything that Classic RCU can with fewer bugs. Tree RCU has been default in x86 builds for almost six months, and seems to be quite reliable, so there does not seem to be much justification for keeping the Classic RCU code and config complexity around anymore. Signed-off-by: Paul E. McKenney Cc: akpm@linux-foundation.org Cc: niv@us.ibm.com Cc: dvhltc@us.ibm.com Cc: dipankar@in.ibm.com Cc: dhowells@redhat.com Cc: lethal@linux-sh.org Cc: kernel@wantstofly.org Signed-off-by: Ingo Molnar --- include/linux/rcuclassic.h | 178 ---------- include/linux/rcupdate.h | 4 +- init/Kconfig | 12 +- kernel/Makefile | 1 - kernel/rcuclassic.c | 807 --------------------------------------------- 5 files changed, 3 insertions(+), 999 deletions(-) delete mode 100644 include/linux/rcuclassic.h delete mode 100644 kernel/rcuclassic.c (limited to 'include/linux') diff --git a/include/linux/rcuclassic.h b/include/linux/rcuclassic.h deleted file mode 100644 index bfd92e1e5d2c..000000000000 --- a/include/linux/rcuclassic.h +++ /dev/null @@ -1,178 +0,0 @@ -/* - * Read-Copy Update mechanism for mutual exclusion (classic version) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * - * Copyright IBM Corporation, 2001 - * - * Author: Dipankar Sarma - * - * Based on the original work by Paul McKenney - * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen. - * Papers: - * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf - * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001) - * - * For detailed explanation of Read-Copy Update mechanism see - - * Documentation/RCU - * - */ - -#ifndef __LINUX_RCUCLASSIC_H -#define __LINUX_RCUCLASSIC_H - -#include -#include -#include -#include -#include - -#ifdef CONFIG_RCU_CPU_STALL_DETECTOR -#define RCU_SECONDS_TILL_STALL_CHECK (10 * HZ) /* for rcp->jiffies_stall */ -#define RCU_SECONDS_TILL_STALL_RECHECK (30 * HZ) /* for rcp->jiffies_stall */ -#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ - -/* Global control variables for rcupdate callback mechanism. */ -struct rcu_ctrlblk { - long cur; /* Current batch number. */ - long completed; /* Number of the last completed batch */ - long pending; /* Number of the last pending batch */ -#ifdef CONFIG_RCU_CPU_STALL_DETECTOR - unsigned long gp_start; /* Time at which GP started in jiffies. */ - unsigned long jiffies_stall; - /* Time at which to check for CPU stalls. */ -#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ - - int signaled; - - spinlock_t lock ____cacheline_internodealigned_in_smp; - DECLARE_BITMAP(cpumask, NR_CPUS); /* CPUs that need to switch for */ - /* current batch to proceed. */ -} ____cacheline_internodealigned_in_smp; - -/* Is batch a before batch b ? */ -static inline int rcu_batch_before(long a, long b) -{ - return (a - b) < 0; -} - -/* Is batch a after batch b ? */ -static inline int rcu_batch_after(long a, long b) -{ - return (a - b) > 0; -} - -/* Per-CPU data for Read-Copy UPdate. */ -struct rcu_data { - /* 1) quiescent state handling : */ - long quiescbatch; /* Batch # for grace period */ - int passed_quiesc; /* User-mode/idle loop etc. */ - int qs_pending; /* core waits for quiesc state */ - - /* 2) batch handling */ - /* - * if nxtlist is not NULL, then: - * batch: - * The batch # for the last entry of nxtlist - * [*nxttail[1], NULL = *nxttail[2]): - * Entries that batch # <= batch - * [*nxttail[0], *nxttail[1]): - * Entries that batch # <= batch - 1 - * [nxtlist, *nxttail[0]): - * Entries that batch # <= batch - 2 - * The grace period for these entries has completed, and - * the other grace-period-completed entries may be moved - * here temporarily in rcu_process_callbacks(). - */ - long batch; - struct rcu_head *nxtlist; - struct rcu_head **nxttail[3]; - long qlen; /* # of queued callbacks */ - struct rcu_head *donelist; - struct rcu_head **donetail; - long blimit; /* Upper limit on a processed batch */ - int cpu; - struct rcu_head barrier; -}; - -/* - * Increment the quiescent state counter. - * The counter is a bit degenerated: We do not need to know - * how many quiescent states passed, just if there was at least - * one since the start of the grace period. Thus just a flag. - */ -extern void rcu_qsctr_inc(int cpu); -extern void rcu_bh_qsctr_inc(int cpu); - -extern int rcu_pending(int cpu); -extern int rcu_needs_cpu(int cpu); - -#ifdef CONFIG_DEBUG_LOCK_ALLOC -extern struct lockdep_map rcu_lock_map; -# define rcu_read_acquire() \ - lock_acquire(&rcu_lock_map, 0, 0, 2, 1, NULL, _THIS_IP_) -# define rcu_read_release() lock_release(&rcu_lock_map, 1, _THIS_IP_) -#else -# define rcu_read_acquire() do { } while (0) -# define rcu_read_release() do { } while (0) -#endif - -#define __rcu_read_lock() \ - do { \ - preempt_disable(); \ - __acquire(RCU); \ - rcu_read_acquire(); \ - } while (0) -#define __rcu_read_unlock() \ - do { \ - rcu_read_release(); \ - __release(RCU); \ - preempt_enable(); \ - } while (0) -#define __rcu_read_lock_bh() \ - do { \ - local_bh_disable(); \ - __acquire(RCU_BH); \ - rcu_read_acquire(); \ - } while (0) -#define __rcu_read_unlock_bh() \ - do { \ - rcu_read_release(); \ - __release(RCU_BH); \ - local_bh_enable(); \ - } while (0) - -#define __synchronize_sched() synchronize_rcu() - -#define call_rcu_sched(head, func) call_rcu(head, func) - -extern void __rcu_init(void); -#define rcu_init_sched() do { } while (0) -extern void rcu_check_callbacks(int cpu, int user); -extern void rcu_restart_cpu(int cpu); - -extern long rcu_batches_completed(void); -extern long rcu_batches_completed_bh(void); - -#define rcu_enter_nohz() do { } while (0) -#define rcu_exit_nohz() do { } while (0) - -/* A context switch is a grace period for rcuclassic. */ -static inline int rcu_blocking_is_gp(void) -{ - return num_online_cpus() == 1; -} - -#endif /* __LINUX_RCUCLASSIC_H */ diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 15fbb3ca634d..0cdfdb622faa 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -54,9 +54,7 @@ struct rcu_head { /* Internal to kernel, but needed by rcupreempt.h. */ extern int rcu_scheduler_active; -#if defined(CONFIG_CLASSIC_RCU) -#include -#elif defined(CONFIG_TREE_RCU) +#if defined(CONFIG_TREE_RCU) #include #elif defined(CONFIG_PREEMPT_RCU) #include diff --git a/init/Kconfig b/init/Kconfig index 1ce05a4cb5f6..d10f31dfa0b2 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -316,21 +316,13 @@ choice prompt "RCU Implementation" default TREE_RCU -config CLASSIC_RCU - bool "Classic RCU" - help - This option selects the classic RCU implementation that is - designed for best read-side performance on non-realtime - systems. - - Select this option if you are unsure. - config TREE_RCU bool "Tree-based hierarchical RCU" help This option selects the RCU implementation that is designed for very large SMP system with hundreds or - thousands of CPUs. + thousands of CPUs. It also scales down nicely to + smaller systems. config PREEMPT_RCU bool "Preemptible RCU" diff --git a/kernel/Makefile b/kernel/Makefile index 0a32cb21ec97..d6042bcc9e99 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -80,7 +80,6 @@ obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o obj-$(CONFIG_GENERIC_HARDIRQS) += irq/ obj-$(CONFIG_SECCOMP) += seccomp.o obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o -obj-$(CONFIG_CLASSIC_RCU) += rcuclassic.o obj-$(CONFIG_TREE_RCU) += rcutree.o obj-$(CONFIG_PREEMPT_RCU) += rcupreempt.o obj-$(CONFIG_TREE_RCU_TRACE) += rcutree_trace.o diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c deleted file mode 100644 index 0f2b0b311304..000000000000 --- a/kernel/rcuclassic.c +++ /dev/null @@ -1,807 +0,0 @@ -/* - * Read-Copy Update mechanism for mutual exclusion - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * - * Copyright IBM Corporation, 2001 - * - * Authors: Dipankar Sarma - * Manfred Spraul - * - * Based on the original work by Paul McKenney - * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen. - * Papers: - * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf - * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001) - * - * For detailed explanation of Read-Copy Update mechanism see - - * Documentation/RCU - * - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef CONFIG_DEBUG_LOCK_ALLOC -static struct lock_class_key rcu_lock_key; -struct lockdep_map rcu_lock_map = - STATIC_LOCKDEP_MAP_INIT("rcu_read_lock", &rcu_lock_key); -EXPORT_SYMBOL_GPL(rcu_lock_map); -#endif - - -/* Definition for rcupdate control block. */ -static struct rcu_ctrlblk rcu_ctrlblk = { - .cur = -300, - .completed = -300, - .pending = -300, - .lock = __SPIN_LOCK_UNLOCKED(&rcu_ctrlblk.lock), - .cpumask = CPU_BITS_NONE, -}; - -static struct rcu_ctrlblk rcu_bh_ctrlblk = { - .cur = -300, - .completed = -300, - .pending = -300, - .lock = __SPIN_LOCK_UNLOCKED(&rcu_bh_ctrlblk.lock), - .cpumask = CPU_BITS_NONE, -}; - -static DEFINE_PER_CPU(struct rcu_data, rcu_data); -static DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); - -/* - * Increment the quiescent state counter. - * The counter is a bit degenerated: We do not need to know - * how many quiescent states passed, just if there was at least - * one since the start of the grace period. Thus just a flag. - */ -void rcu_qsctr_inc(int cpu) -{ - struct rcu_data *rdp = &per_cpu(rcu_data, cpu); - rdp->passed_quiesc = 1; -} - -void rcu_bh_qsctr_inc(int cpu) -{ - struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu); - rdp->passed_quiesc = 1; -} - -static int blimit = 10; -static int qhimark = 10000; -static int qlowmark = 100; - -#ifdef CONFIG_SMP -static void force_quiescent_state(struct rcu_data *rdp, - struct rcu_ctrlblk *rcp) -{ - int cpu; - unsigned long flags; - - set_need_resched(); - spin_lock_irqsave(&rcp->lock, flags); - if (unlikely(!rcp->signaled)) { - rcp->signaled = 1; - /* - * Don't send IPI to itself. With irqs disabled, - * rdp->cpu is the current cpu. - * - * cpu_online_mask is updated by the _cpu_down() - * using __stop_machine(). Since we're in irqs disabled - * section, __stop_machine() is not exectuting, hence - * the cpu_online_mask is stable. - * - * However, a cpu might have been offlined _just_ before - * we disabled irqs while entering here. - * And rcu subsystem might not yet have handled the CPU_DEAD - * notification, leading to the offlined cpu's bit - * being set in the rcp->cpumask. - * - * Hence cpumask = (rcp->cpumask & cpu_online_mask) to prevent - * sending smp_reschedule() to an offlined CPU. - */ - for_each_cpu_and(cpu, - to_cpumask(rcp->cpumask), cpu_online_mask) { - if (cpu != rdp->cpu) - smp_send_reschedule(cpu); - } - } - spin_unlock_irqrestore(&rcp->lock, flags); -} -#else -static inline void force_quiescent_state(struct rcu_data *rdp, - struct rcu_ctrlblk *rcp) -{ - set_need_resched(); -} -#endif - -static void __call_rcu(struct rcu_head *head, struct rcu_ctrlblk *rcp, - struct rcu_data *rdp) -{ - long batch; - - head->next = NULL; - smp_mb(); /* Read of rcu->cur must happen after any change by caller. */ - - /* - * Determine the batch number of this callback. - * - * Using ACCESS_ONCE to avoid the following error when gcc eliminates - * local variable "batch" and emits codes like this: - * 1) rdp->batch = rcp->cur + 1 # gets old value - * ...... - * 2)rcu_batch_after(rcp->cur + 1, rdp->batch) # gets new value - * then [*nxttail[0], *nxttail[1]) may contain callbacks - * that batch# = rdp->batch, see the comment of struct rcu_data. - */ - batch = ACCESS_ONCE(rcp->cur) + 1; - - if (rdp->nxtlist && rcu_batch_after(batch, rdp->batch)) { - /* process callbacks */ - rdp->nxttail[0] = rdp->nxttail[1]; - rdp->nxttail[1] = rdp->nxttail[2]; - if (rcu_batch_after(batch - 1, rdp->batch)) - rdp->nxttail[0] = rdp->nxttail[2]; - } - - rdp->batch = batch; - *rdp->nxttail[2] = head; - rdp->nxttail[2] = &head->next; - - if (unlikely(++rdp->qlen > qhimark)) { - rdp->blimit = INT_MAX; - force_quiescent_state(rdp, &rcu_ctrlblk); - } -} - -#ifdef CONFIG_RCU_CPU_STALL_DETECTOR - -static void record_gp_stall_check_time(struct rcu_ctrlblk *rcp) -{ - rcp->gp_start = jiffies; - rcp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_CHECK; -} - -static void print_other_cpu_stall(struct rcu_ctrlblk *rcp) -{ - int cpu; - long delta; - unsigned long flags; - - /* Only let one CPU complain about others per time interval. */ - - spin_lock_irqsave(&rcp->lock, flags); - delta = jiffies - rcp->jiffies_stall; - if (delta < 2 || rcp->cur != rcp->completed) { - spin_unlock_irqrestore(&rcp->lock, flags); - return; - } - rcp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_RECHECK; - spin_unlock_irqrestore(&rcp->lock, flags); - - /* OK, time to rat on our buddy... */ - - printk(KERN_ERR "INFO: RCU detected CPU stalls:"); - for_each_possible_cpu(cpu) { - if (cpumask_test_cpu(cpu, to_cpumask(rcp->cpumask))) - printk(" %d", cpu); - } - printk(" (detected by %d, t=%ld jiffies)\n", - smp_processor_id(), (long)(jiffies - rcp->gp_start)); -} - -static void print_cpu_stall(struct rcu_ctrlblk *rcp) -{ - unsigned long flags; - - printk(KERN_ERR "INFO: RCU detected CPU %d stall (t=%lu/%lu jiffies)\n", - smp_processor_id(), jiffies, - jiffies - rcp->gp_start); - dump_stack(); - spin_lock_irqsave(&rcp->lock, flags); - if ((long)(jiffies - rcp->jiffies_stall) >= 0) - rcp->jiffies_stall = - jiffies + RCU_SECONDS_TILL_STALL_RECHECK; - spin_unlock_irqrestore(&rcp->lock, flags); - set_need_resched(); /* kick ourselves to get things going. */ -} - -static void check_cpu_stall(struct rcu_ctrlblk *rcp) -{ - long delta; - - delta = jiffies - rcp->jiffies_stall; - if (cpumask_test_cpu(smp_processor_id(), to_cpumask(rcp->cpumask)) && - delta >= 0) { - - /* We haven't checked in, so go dump stack. */ - print_cpu_stall(rcp); - - } else if (rcp->cur != rcp->completed && delta >= 2) { - - /* They had two seconds to dump stack, so complain. */ - print_other_cpu_stall(rcp); - } -} - -#else /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ - -static void record_gp_stall_check_time(struct rcu_ctrlblk *rcp) -{ -} - -static inline void check_cpu_stall(struct rcu_ctrlblk *rcp) -{ -} - -#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ - -/** - * call_rcu - Queue an RCU callback for invocation after a grace period. - * @head: structure to be used for queueing the RCU updates. - * @func: actual update function to be invoked after the grace period - * - * The update function will be invoked some time after a full grace - * period elapses, in other words after all currently executing RCU - * read-side critical sections have completed. RCU read-side critical - * sections are delimited by rcu_read_lock() and rcu_read_unlock(), - * and may be nested. - */ -void call_rcu(struct rcu_head *head, - void (*func)(struct rcu_head *rcu)) -{ - unsigned long flags; - - head->func = func; - local_irq_save(flags); - __call_rcu(head, &rcu_ctrlblk, &__get_cpu_var(rcu_data)); - local_irq_restore(flags); -} -EXPORT_SYMBOL_GPL(call_rcu); - -/** - * call_rcu_bh - Queue an RCU for invocation after a quicker grace period. - * @head: structure to be used for queueing the RCU updates. - * @func: actual update function to be invoked after the grace period - * - * The update function will be invoked some time after a full grace - * period elapses, in other words after all currently executing RCU - * read-side critical sections have completed. call_rcu_bh() assumes - * that the read-side critical sections end on completion of a softirq - * handler. This means that read-side critical sections in process - * context must not be interrupted by softirqs. This interface is to be - * used when most of the read-side critical sections are in softirq context. - * RCU read-side critical sections are delimited by rcu_read_lock() and - * rcu_read_unlock(), * if in interrupt context or rcu_read_lock_bh() - * and rcu_read_unlock_bh(), if in process context. These may be nested. - */ -void call_rcu_bh(struct rcu_head *head, - void (*func)(struct rcu_head *rcu)) -{ - unsigned long flags; - - head->func = func; - local_irq_save(flags); - __call_rcu(head, &rcu_bh_ctrlblk, &__get_cpu_var(rcu_bh_data)); - local_irq_restore(flags); -} -EXPORT_SYMBOL_GPL(call_rcu_bh); - -/* - * Return the number of RCU batches processed thus far. Useful - * for debug and statistics. - */ -long rcu_batches_completed(void) -{ - return rcu_ctrlblk.completed; -} -EXPORT_SYMBOL_GPL(rcu_batches_completed); - -/* - * Return the number of RCU batches processed thus far. Useful - * for debug and statistics. - */ -long rcu_batches_completed_bh(void) -{ - return rcu_bh_ctrlblk.completed; -} -EXPORT_SYMBOL_GPL(rcu_batches_completed_bh); - -/* Raises the softirq for processing rcu_callbacks. */ -static inline void raise_rcu_softirq(void) -{ - raise_softirq(RCU_SOFTIRQ); -} - -/* - * Invoke the completed RCU callbacks. They are expected to be in - * a per-cpu list. - */ -static void rcu_do_batch(struct rcu_data *rdp) -{ - unsigned long flags; - struct rcu_head *next, *list; - int count = 0; - - list = rdp->donelist; - while (list) { - next = list->next; - prefetch(next); - list->func(list); - list = next; - if (++count >= rdp->blimit) - break; - } - rdp->donelist = list; - - local_irq_save(flags); - rdp->qlen -= count; - local_irq_restore(flags); - if (rdp->blimit == INT_MAX && rdp->qlen <= qlowmark) - rdp->blimit = blimit; - - if (!rdp->donelist) - rdp->donetail = &rdp->donelist; - else - raise_rcu_softirq(); -} - -/* - * Grace period handling: - * The grace period handling consists out of two steps: - * - A new grace period is started. - * This is done by rcu_start_batch. The start is not broadcasted to - * all cpus, they must pick this up by comparing rcp->cur with - * rdp->quiescbatch. All cpus are recorded in the - * rcu_ctrlblk.cpumask bitmap. - * - All cpus must go through a quiescent state. - * Since the start of the grace period is not broadcasted, at least two - * calls to rcu_check_quiescent_state are required: - * The first call just notices that a new grace period is running. The - * following calls check if there was a quiescent state since the beginning - * of the grace period. If so, it updates rcu_ctrlblk.cpumask. If - * the bitmap is empty, then the grace period is completed. - * rcu_check_quiescent_state calls rcu_start_batch(0) to start the next grace - * period (if necessary). - */ - -/* - * Register a new batch of callbacks, and start it up if there is currently no - * active batch and the batch to be registered has not already occurred. - * Caller must hold rcu_ctrlblk.lock. - */ -static void rcu_start_batch(struct rcu_ctrlblk *rcp) -{ - if (rcp->cur != rcp->pending && - rcp->completed == rcp->cur) { - rcp->cur++; - record_gp_stall_check_time(rcp); - - /* - * Accessing nohz_cpu_mask before incrementing rcp->cur needs a - * Barrier Otherwise it can cause tickless idle CPUs to be - * included in rcp->cpumask, which will extend graceperiods - * unnecessarily. - */ - smp_mb(); - cpumask_andnot(to_cpumask(rcp->cpumask), - cpu_online_mask, nohz_cpu_mask); - - rcp->signaled = 0; - } -} - -/* - * cpu went through a quiescent state since the beginning of the grace period. - * Clear it from the cpu mask and complete the grace period if it was the last - * cpu. Start another grace period if someone has further entries pending - */ -static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp) -{ - cpumask_clear_cpu(cpu, to_cpumask(rcp->cpumask)); - if (cpumask_empty(to_cpumask(rcp->cpumask))) { - /* batch completed ! */ - rcp->completed = rcp->cur; - rcu_start_batch(rcp); - } -} - -/* - * Check if the cpu has gone through a quiescent state (say context - * switch). If so and if it already hasn't done so in this RCU - * quiescent cycle, then indicate that it has done so. - */ -static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp, - struct rcu_data *rdp) -{ - unsigned long flags; - - if (rdp->quiescbatch != rcp->cur) { - /* start new grace period: */ - rdp->qs_pending = 1; - rdp->passed_quiesc = 0; - rdp->quiescbatch = rcp->cur; - return; - } - - /* Grace period already completed for this cpu? - * qs_pending is checked instead of the actual bitmap to avoid - * cacheline trashing. - */ - if (!rdp->qs_pending) - return; - - /* - * Was there a quiescent state since the beginning of the grace - * period? If no, then exit and wait for the next call. - */ - if (!rdp->passed_quiesc) - return; - rdp->qs_pending = 0; - - spin_lock_irqsave(&rcp->lock, flags); - /* - * rdp->quiescbatch/rcp->cur and the cpu bitmap can come out of sync - * during cpu startup. Ignore the quiescent state. - */ - if (likely(rdp->quiescbatch == rcp->cur)) - cpu_quiet(rdp->cpu, rcp); - - spin_unlock_irqrestore(&rcp->lock, flags); -} - - -#ifdef CONFIG_HOTPLUG_CPU - -/* warning! helper for rcu_offline_cpu. do not use elsewhere without reviewing - * locking requirements, the list it's pulling from has to belong to a cpu - * which is dead and hence not processing interrupts. - */ -static void rcu_move_batch(struct rcu_data *this_rdp, struct rcu_head *list, - struct rcu_head **tail, long batch) -{ - unsigned long flags; - - if (list) { - local_irq_save(flags); - this_rdp->batch = batch; - *this_rdp->nxttail[2] = list; - this_rdp->nxttail[2] = tail; - local_irq_restore(flags); - } -} - -static void __rcu_offline_cpu(struct rcu_data *this_rdp, - struct rcu_ctrlblk *rcp, struct rcu_data *rdp) -{ - unsigned long flags; - - /* - * if the cpu going offline owns the grace period - * we can block indefinitely waiting for it, so flush - * it here - */ - spin_lock_irqsave(&rcp->lock, flags); - if (rcp->cur != rcp->completed) - cpu_quiet(rdp->cpu, rcp); - rcu_move_batch(this_rdp, rdp->donelist, rdp->donetail, rcp->cur + 1); - rcu_move_batch(this_rdp, rdp->nxtlist, rdp->nxttail[2], rcp->cur + 1); - spin_unlock(&rcp->lock); - - this_rdp->qlen += rdp->qlen; - local_irq_restore(flags); -} - -static void rcu_offline_cpu(int cpu) -{ - struct rcu_data *this_rdp = &get_cpu_var(rcu_data); - struct rcu_data *this_bh_rdp = &get_cpu_var(rcu_bh_data); - - __rcu_offline_cpu(this_rdp, &rcu_ctrlblk, - &per_cpu(rcu_data, cpu)); - __rcu_offline_cpu(this_bh_rdp, &rcu_bh_ctrlblk, - &per_cpu(rcu_bh_data, cpu)); - put_cpu_var(rcu_data); - put_cpu_var(rcu_bh_data); -} - -#else - -static void rcu_offline_cpu(int cpu) -{ -} - -#endif - -/* - * This does the RCU processing work from softirq context. - */ -static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp, - struct rcu_data *rdp) -{ - unsigned long flags; - long completed_snap; - - if (rdp->nxtlist) { - local_irq_save(flags); - completed_snap = ACCESS_ONCE(rcp->completed); - - /* - * move the other grace-period-completed entries to - * [rdp->nxtlist, *rdp->nxttail[0]) temporarily - */ - if (!rcu_batch_before(completed_snap, rdp->batch)) - rdp->nxttail[0] = rdp->nxttail[1] = rdp->nxttail[2]; - else if (!rcu_batch_before(completed_snap, rdp->batch - 1)) - rdp->nxttail[0] = rdp->nxttail[1]; - - /* - * the grace period for entries in - * [rdp->nxtlist, *rdp->nxttail[0]) has completed and - * move these entries to donelist - */ - if (rdp->nxttail[0] != &rdp->nxtlist) { - *rdp->donetail = rdp->nxtlist; - rdp->donetail = rdp->nxttail[0]; - rdp->nxtlist = *rdp->nxttail[0]; - *rdp->donetail = NULL; - - if (rdp->nxttail[1] == rdp->nxttail[0]) - rdp->nxttail[1] = &rdp->nxtlist; - if (rdp->nxttail[2] == rdp->nxttail[0]) - rdp->nxttail[2] = &rdp->nxtlist; - rdp->nxttail[0] = &rdp->nxtlist; - } - - local_irq_restore(flags); - - if (rcu_batch_after(rdp->batch, rcp->pending)) { - unsigned long flags2; - - /* and start it/schedule start if it's a new batch */ - spin_lock_irqsave(&rcp->lock, flags2); - if (rcu_batch_after(rdp->batch, rcp->pending)) { - rcp->pending = rdp->batch; - rcu_start_batch(rcp); - } - spin_unlock_irqrestore(&rcp->lock, flags2); - } - } - - rcu_check_quiescent_state(rcp, rdp); - if (rdp->donelist) - rcu_do_batch(rdp); -} - -static void rcu_process_callbacks(struct softirq_action *unused) -{ - /* - * Memory references from any prior RCU read-side critical sections - * executed by the interrupted code must be see before any RCU - * grace-period manupulations below. - */ - - smp_mb(); /* See above block comment. */ - - __rcu_process_callbacks(&rcu_ctrlblk, &__get_cpu_var(rcu_data)); - __rcu_process_callbacks(&rcu_bh_ctrlblk, &__get_cpu_var(rcu_bh_data)); - - /* - * Memory references from any later RCU read-side critical sections - * executed by the interrupted code must be see after any RCU - * grace-period manupulations above. - */ - - smp_mb(); /* See above block comment. */ -} - -static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp) -{ - /* Check for CPU stalls, if enabled. */ - check_cpu_stall(rcp); - - if (rdp->nxtlist) { - long completed_snap = ACCESS_ONCE(rcp->completed); - - /* - * This cpu has pending rcu entries and the grace period - * for them has completed. - */ - if (!rcu_batch_before(completed_snap, rdp->batch)) - return 1; - if (!rcu_batch_before(completed_snap, rdp->batch - 1) && - rdp->nxttail[0] != rdp->nxttail[1]) - return 1; - if (rdp->nxttail[0] != &rdp->nxtlist) - return 1; - - /* - * This cpu has pending rcu entries and the new batch - * for then hasn't been started nor scheduled start - */ - if (rcu_batch_after(rdp->batch, rcp->pending)) - return 1; - } - - /* This cpu has finished callbacks to invoke */ - if (rdp->donelist) - return 1; - - /* The rcu core waits for a quiescent state from the cpu */ - if (rdp->quiescbatch != rcp->cur || rdp->qs_pending) - return 1; - - /* nothing to do */ - return 0; -} - -/* - * Check to see if there is any immediate RCU-related work to be done - * by the current CPU, returning 1 if so. This function is part of the - * RCU implementation; it is -not- an exported member of the RCU API. - */ -int rcu_pending(int cpu) -{ - return __rcu_pending(&rcu_ctrlblk, &per_cpu(rcu_data, cpu)) || - __rcu_pending(&rcu_bh_ctrlblk, &per_cpu(rcu_bh_data, cpu)); -} - -/* - * Check to see if any future RCU-related work will need to be done - * by the current CPU, even if none need be done immediately, returning - * 1 if so. This function is part of the RCU implementation; it is -not- - * an exported member of the RCU API. - */ -int rcu_needs_cpu(int cpu) -{ - struct rcu_data *rdp = &per_cpu(rcu_data, cpu); - struct rcu_data *rdp_bh = &per_cpu(rcu_bh_data, cpu); - - return !!rdp->nxtlist || !!rdp_bh->nxtlist || rcu_pending(cpu); -} - -/* - * Top-level function driving RCU grace-period detection, normally - * invoked from the scheduler-clock interrupt. This function simply - * increments counters that are read only from softirq by this same - * CPU, so there are no memory barriers required. - */ -void rcu_check_callbacks(int cpu, int user) -{ - if (user || - (idle_cpu(cpu) && rcu_scheduler_active && - !in_softirq() && hardirq_count() <= (1 << HARDIRQ_SHIFT))) { - - /* - * Get here if this CPU took its interrupt from user - * mode or from the idle loop, and if this is not a - * nested interrupt. In this case, the CPU is in - * a quiescent state, so count it. - * - * Also do a memory barrier. This is needed to handle - * the case where writes from a preempt-disable section - * of code get reordered into schedule() by this CPU's - * write buffer. The memory barrier makes sure that - * the rcu_qsctr_inc() and rcu_bh_qsctr_inc() are see - * by other CPUs to happen after any such write. - */ - - smp_mb(); /* See above block comment. */ - rcu_qsctr_inc(cpu); - rcu_bh_qsctr_inc(cpu); - - } else if (!in_softirq()) { - - /* - * Get here if this CPU did not take its interrupt from - * softirq, in other words, if it is not interrupting - * a rcu_bh read-side critical section. This is an _bh - * critical section, so count it. The memory barrier - * is needed for the same reason as is the above one. - */ - - smp_mb(); /* See above block comment. */ - rcu_bh_qsctr_inc(cpu); - } - raise_rcu_softirq(); -} - -static void __cpuinit rcu_init_percpu_data(int cpu, struct rcu_ctrlblk *rcp, - struct rcu_data *rdp) -{ - unsigned long flags; - - spin_lock_irqsave(&rcp->lock, flags); - memset(rdp, 0, sizeof(*rdp)); - rdp->nxttail[0] = rdp->nxttail[1] = rdp->nxttail[2] = &rdp->nxtlist; - rdp->donetail = &rdp->donelist; - rdp->quiescbatch = rcp->completed; - rdp->qs_pending = 0; - rdp->cpu = cpu; - rdp->blimit = blimit; - spin_unlock_irqrestore(&rcp->lock, flags); -} - -static void __cpuinit rcu_online_cpu(int cpu) -{ - struct rcu_data *rdp = &per_cpu(rcu_data, cpu); - struct rcu_data *bh_rdp = &per_cpu(rcu_bh_data, cpu); - - rcu_init_percpu_data(cpu, &rcu_ctrlblk, rdp); - rcu_init_percpu_data(cpu, &rcu_bh_ctrlblk, bh_rdp); - open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); -} - -static int __cpuinit rcu_cpu_notify(struct notifier_block *self, - unsigned long action, void *hcpu) -{ - long cpu = (long)hcpu; - - switch (action) { - case CPU_UP_PREPARE: - case CPU_UP_PREPARE_FROZEN: - rcu_online_cpu(cpu); - break; - case CPU_DEAD: - case CPU_DEAD_FROZEN: - rcu_offline_cpu(cpu); - break; - default: - break; - } - return NOTIFY_OK; -} - -static struct notifier_block __cpuinitdata rcu_nb = { - .notifier_call = rcu_cpu_notify, -}; - -/* - * Initializes rcu mechanism. Assumed to be called early. - * That is before local timer(SMP) or jiffie timer (uniproc) is setup. - * Note that rcu_qsctr and friends are implicitly - * initialized due to the choice of ``0'' for RCU_CTR_INVALID. - */ -void __init __rcu_init(void) -{ -#ifdef CONFIG_RCU_CPU_STALL_DETECTOR - printk(KERN_INFO "RCU-based detection of stalled CPUs is enabled.\n"); -#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ - rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE, - (void *)(long)smp_processor_id()); - /* Register notifier for non-boot CPUs */ - register_cpu_notifier(&rcu_nb); -} - -module_param(blimit, int, 0); -module_param(qhimark, int, 0); -module_param(qlowmark, int, 0); -- cgit v1.2.3 From 9e48858f7d36a6a3849f1d1b40c3bf5624b4ee7c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 7 May 2009 19:26:19 +1000 Subject: security: rename ptrace_may_access => ptrace_access_check The ->ptrace_may_access() methods are named confusingly - the real ptrace_may_access() returns a bool, while these security checks have a retval convention. Rename it to ptrace_access_check, to reduce the confusion factor. [ Impact: cleanup, no code changed ] Signed-off-by: Ingo Molnar Signed-off-by: James Morris --- include/linux/security.h | 14 +++++++------- kernel/ptrace.c | 2 +- security/capability.c | 2 +- security/commoncap.c | 4 ++-- security/security.c | 4 ++-- security/selinux/hooks.c | 6 +++--- security/smack/smack_lsm.c | 8 ++++---- 7 files changed, 20 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index 5eff459b3833..145909165dbf 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -52,7 +52,7 @@ struct audit_krule; extern int cap_capable(struct task_struct *tsk, const struct cred *cred, int cap, int audit); extern int cap_settime(struct timespec *ts, struct timezone *tz); -extern int cap_ptrace_may_access(struct task_struct *child, unsigned int mode); +extern int cap_ptrace_access_check(struct task_struct *child, unsigned int mode); extern int cap_ptrace_traceme(struct task_struct *parent); extern int cap_capget(struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); extern int cap_capset(struct cred *new, const struct cred *old, @@ -1209,7 +1209,7 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * @alter contains the flag indicating whether changes are to be made. * Return 0 if permission is granted. * - * @ptrace_may_access: + * @ptrace_access_check: * Check permission before allowing the current process to trace the * @child process. * Security modules may also want to perform a process tracing check @@ -1224,7 +1224,7 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * Check that the @parent process has sufficient permission to trace the * current process before allowing the current process to present itself * to the @parent process for tracing. - * The parent process will still have to undergo the ptrace_may_access + * The parent process will still have to undergo the ptrace_access_check * checks before it is allowed to trace this one. * @parent contains the task_struct structure for debugger process. * Return 0 if permission is granted. @@ -1336,7 +1336,7 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) struct security_operations { char name[SECURITY_NAME_MAX + 1]; - int (*ptrace_may_access) (struct task_struct *child, unsigned int mode); + int (*ptrace_access_check) (struct task_struct *child, unsigned int mode); int (*ptrace_traceme) (struct task_struct *parent); int (*capget) (struct task_struct *target, kernel_cap_t *effective, @@ -1617,7 +1617,7 @@ extern int security_module_enable(struct security_operations *ops); extern int register_security(struct security_operations *ops); /* Security operations */ -int security_ptrace_may_access(struct task_struct *child, unsigned int mode); +int security_ptrace_access_check(struct task_struct *child, unsigned int mode); int security_ptrace_traceme(struct task_struct *parent); int security_capget(struct task_struct *target, kernel_cap_t *effective, @@ -1798,10 +1798,10 @@ static inline int security_init(void) return 0; } -static inline int security_ptrace_may_access(struct task_struct *child, +static inline int security_ptrace_access_check(struct task_struct *child, unsigned int mode) { - return cap_ptrace_may_access(child, mode); + return cap_ptrace_access_check(child, mode); } static inline int security_ptrace_traceme(struct task_struct *parent) diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 61c78b2c07ba..9a4184e04f29 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -152,7 +152,7 @@ int __ptrace_may_access(struct task_struct *task, unsigned int mode) if (!dumpable && !capable(CAP_SYS_PTRACE)) return -EPERM; - return security_ptrace_may_access(task, mode); + return security_ptrace_access_check(task, mode); } bool ptrace_may_access(struct task_struct *task, unsigned int mode) diff --git a/security/capability.c b/security/capability.c index 21b6cead6a8e..f218dd361647 100644 --- a/security/capability.c +++ b/security/capability.c @@ -863,7 +863,7 @@ struct security_operations default_security_ops = { void security_fixup_ops(struct security_operations *ops) { - set_to_cap_if_null(ops, ptrace_may_access); + set_to_cap_if_null(ops, ptrace_access_check); set_to_cap_if_null(ops, ptrace_traceme); set_to_cap_if_null(ops, capget); set_to_cap_if_null(ops, capset); diff --git a/security/commoncap.c b/security/commoncap.c index 48b7e0228fa3..aa97704564d4 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -101,7 +101,7 @@ int cap_settime(struct timespec *ts, struct timezone *tz) } /** - * cap_ptrace_may_access - Determine whether the current process may access + * cap_ptrace_access_check - Determine whether the current process may access * another * @child: The process to be accessed * @mode: The mode of attachment. @@ -109,7 +109,7 @@ int cap_settime(struct timespec *ts, struct timezone *tz) * Determine whether a process may access another, returning 0 if permission * granted, -ve if denied. */ -int cap_ptrace_may_access(struct task_struct *child, unsigned int mode) +int cap_ptrace_access_check(struct task_struct *child, unsigned int mode) { int ret = 0; diff --git a/security/security.c b/security/security.c index dc7674fbfc7a..4501c5e1f988 100644 --- a/security/security.c +++ b/security/security.c @@ -124,9 +124,9 @@ int register_security(struct security_operations *ops) /* Security operations */ -int security_ptrace_may_access(struct task_struct *child, unsigned int mode) +int security_ptrace_access_check(struct task_struct *child, unsigned int mode) { - return security_ops->ptrace_may_access(child, mode); + return security_ops->ptrace_access_check(child, mode); } int security_ptrace_traceme(struct task_struct *parent) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index d6f64783acd1..e3b4f3083dd7 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -1854,12 +1854,12 @@ static inline u32 open_file_to_av(struct file *file) /* Hook functions begin here. */ -static int selinux_ptrace_may_access(struct task_struct *child, +static int selinux_ptrace_access_check(struct task_struct *child, unsigned int mode) { int rc; - rc = cap_ptrace_may_access(child, mode); + rc = cap_ptrace_access_check(child, mode); if (rc) return rc; @@ -5315,7 +5315,7 @@ static int selinux_key_getsecurity(struct key *key, char **_buffer) static struct security_operations selinux_ops = { .name = "selinux", - .ptrace_may_access = selinux_ptrace_may_access, + .ptrace_access_check = selinux_ptrace_access_check, .ptrace_traceme = selinux_ptrace_traceme, .capget = selinux_capget, .capset = selinux_capset, diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 0023182078c7..1c9bdbcbe3d2 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -91,7 +91,7 @@ struct inode_smack *new_inode_smack(char *smack) */ /** - * smack_ptrace_may_access - Smack approval on PTRACE_ATTACH + * smack_ptrace_access_check - Smack approval on PTRACE_ATTACH * @ctp: child task pointer * @mode: ptrace attachment mode * @@ -99,13 +99,13 @@ struct inode_smack *new_inode_smack(char *smack) * * Do the capability checks, and require read and write. */ -static int smack_ptrace_may_access(struct task_struct *ctp, unsigned int mode) +static int smack_ptrace_access_check(struct task_struct *ctp, unsigned int mode) { int rc; struct smk_audit_info ad; char *sp, *tsp; - rc = cap_ptrace_may_access(ctp, mode); + rc = cap_ptrace_access_check(ctp, mode); if (rc != 0) return rc; @@ -3032,7 +3032,7 @@ static void smack_release_secctx(char *secdata, u32 seclen) struct security_operations smack_ops = { .name = "smack", - .ptrace_may_access = smack_ptrace_may_access, + .ptrace_access_check = smack_ptrace_access_check, .ptrace_traceme = smack_ptrace_traceme, .syslog = smack_syslog, -- cgit v1.2.3 From 38b7f49a0654cb52cac61c6455807248eee3059d Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Fri, 26 Jun 2009 10:48:34 +0200 Subject: HID: fix debugfs build with !CONFIG_DEBUG_FS Fix the debug function prototypes to be correct even in the !CONFIG_DEBUG_FS case. Reported-by: Stephen Rothwell Signed-off-by: Jiri Kosina --- include/linux/hid-debug.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hid-debug.h b/include/linux/hid-debug.h index ec08ac1ad687..53744fa1c8b7 100644 --- a/include/linux/hid-debug.h +++ b/include/linux/hid-debug.h @@ -22,6 +22,8 @@ * */ +#define HID_DEBUG_BUFSIZE 512 + #ifdef CONFIG_DEBUG_FS void hid_dump_input(struct hid_device *, struct hid_usage *, __s32); @@ -34,7 +36,6 @@ void hid_debug_init(void); void hid_debug_exit(void); void hid_debug_event(struct hid_device *, char *); -#define HID_DEBUG_BUFSIZE 512 struct hid_debug_list { char *hid_debug_buf; @@ -48,11 +49,10 @@ struct hid_debug_list { #else -#define hid_dump_input(a,b) do { } while (0) -#define hid_dump_device(c) do { } while (0) -#define hid_dump_field(a,b) do { } while (0) -#define hid_resolv_usage(a) do { } while (0) -#define hid_resolv_event(a,b) do { } while (0) +#define hid_dump_input(a,b,c) do { } while (0) +#define hid_dump_device(a,b) do { } while (0) +#define hid_dump_field(a,b,c) do { } while (0) +#define hid_resolv_usage(a,b) do { } while (0) #define hid_debug_register(a, b) do { } while (0) #define hid_debug_unregister(a) do { } while (0) #define hid_debug_init() do { } while (0) -- cgit v1.2.3 From 1a8dd307cc0a2119be4e578c517795464e6dabba Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 29 Jun 2009 17:45:39 +0900 Subject: percpu: use __weak only in the definition of weak percpu variables __weak is necessary only for definition and might even not work in declaration. Drop it from declaration. This change was suggested by Ivan Kokshaysky. Signed-off-by: Tejun Heo Acked-by: Ivan Kokshaysky --- include/linux/percpu-defs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index cf32838ad0fa..9b7a53cc16eb 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -56,7 +56,7 @@ */ #define DECLARE_PER_CPU_SECTION(type, name, sec) \ extern __PCPU_DUMMY_ATTRS char __pcpu_scope_##name; \ - extern __PCPU_ATTRS(sec) __weak __typeof__(type) per_cpu__##name + extern __PCPU_ATTRS(sec) __typeof__(type) per_cpu__##name #define DEFINE_PER_CPU_SECTION(type, name, sec) \ __PCPU_DUMMY_ATTRS char __pcpu_scope_##name; \ -- cgit v1.2.3 From b294a290d24d1196d68399cc3a9b8c50bfb55abd Mon Sep 17 00:00:00 2001 From: Andres Salomon Date: Tue, 30 Jun 2009 02:13:01 -0400 Subject: Revert "power: remove POWER_SUPPLY_PROP_CAPACITY_LEVEL" This reverts commit 8efe444038a205e79b38b7ad03878824901849a8 and 4cbc76eadf56399cd11fb736b33c53aec9caab8c. Richard@laptop.org was apparently using CAPACITY_LEVEL for debugging battery/EC problems, and was upset that it was removed. This readds it. Conflicts: Documentation/power_supply_class.txt Signed-off-by: Andres Salomon Signed-off-by: Anton Vorontsov --- Documentation/power/power_supply_class.txt | 2 ++ drivers/power/olpc_battery.c | 9 +++++++++ drivers/power/power_supply_sysfs.c | 6 ++++++ include/linux/power_supply.h | 10 ++++++++++ 4 files changed, 27 insertions(+) (limited to 'include/linux') diff --git a/Documentation/power/power_supply_class.txt b/Documentation/power/power_supply_class.txt index c6cd4956047c..709d95571d7b 100644 --- a/Documentation/power/power_supply_class.txt +++ b/Documentation/power/power_supply_class.txt @@ -108,6 +108,8 @@ relative, time-based measurements. ENERGY_FULL, ENERGY_EMPTY - same as above but for energy. CAPACITY - capacity in percents. +CAPACITY_LEVEL - capacity level. This corresponds to +POWER_SUPPLY_CAPACITY_LEVEL_*. TEMP - temperature of the power supply. TEMP_AMBIENT - ambient temperature. diff --git a/drivers/power/olpc_battery.c b/drivers/power/olpc_battery.c index 58e419299cd6..3a589df09376 100644 --- a/drivers/power/olpc_battery.c +++ b/drivers/power/olpc_battery.c @@ -276,6 +276,14 @@ static int olpc_bat_get_property(struct power_supply *psy, return ret; val->intval = ec_byte; break; + case POWER_SUPPLY_PROP_CAPACITY_LEVEL: + if (ec_byte & BAT_STAT_FULL) + val->intval = POWER_SUPPLY_CAPACITY_LEVEL_FULL; + else if (ec_byte & BAT_STAT_LOW) + val->intval = POWER_SUPPLY_CAPACITY_LEVEL_LOW; + else + val->intval = POWER_SUPPLY_CAPACITY_LEVEL_NORMAL; + break; case POWER_SUPPLY_PROP_TEMP: ret = olpc_ec_cmd(EC_BAT_TEMP, NULL, 0, (void *)&ec_word, 2); if (ret) @@ -321,6 +329,7 @@ static enum power_supply_property olpc_bat_props[] = { POWER_SUPPLY_PROP_VOLTAGE_AVG, POWER_SUPPLY_PROP_CURRENT_AVG, POWER_SUPPLY_PROP_CAPACITY, + POWER_SUPPLY_PROP_CAPACITY_LEVEL, POWER_SUPPLY_PROP_TEMP, POWER_SUPPLY_PROP_TEMP_AMBIENT, POWER_SUPPLY_PROP_MANUFACTURER, diff --git a/drivers/power/power_supply_sysfs.c b/drivers/power/power_supply_sysfs.c index da73591017f9..9deabbde6fd6 100644 --- a/drivers/power/power_supply_sysfs.c +++ b/drivers/power/power_supply_sysfs.c @@ -51,6 +51,9 @@ static ssize_t power_supply_show_property(struct device *dev, "Unknown", "NiMH", "Li-ion", "Li-poly", "LiFe", "NiCd", "LiMn" }; + static char *capacity_level_text[] = { + "Unknown", "Critical", "Low", "Normal", "High", "Full" + }; ssize_t ret; struct power_supply *psy = dev_get_drvdata(dev); const ptrdiff_t off = attr - power_supply_attrs; @@ -71,6 +74,8 @@ static ssize_t power_supply_show_property(struct device *dev, return sprintf(buf, "%s\n", health_text[value.intval]); else if (off == POWER_SUPPLY_PROP_TECHNOLOGY) return sprintf(buf, "%s\n", technology_text[value.intval]); + else if (off == POWER_SUPPLY_PROP_CAPACITY_LEVEL) + return sprintf(buf, "%s\n", capacity_level_text[value.intval]); else if (off >= POWER_SUPPLY_PROP_MODEL_NAME) return sprintf(buf, "%s\n", value.strval); @@ -109,6 +114,7 @@ static struct device_attribute power_supply_attrs[] = { POWER_SUPPLY_ATTR(energy_now), POWER_SUPPLY_ATTR(energy_avg), POWER_SUPPLY_ATTR(capacity), + POWER_SUPPLY_ATTR(capacity_level), POWER_SUPPLY_ATTR(temp), POWER_SUPPLY_ATTR(temp_ambient), POWER_SUPPLY_ATTR(time_to_empty_now), diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 594c494ac3f0..0ab6aa171241 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -58,6 +58,15 @@ enum { POWER_SUPPLY_TECHNOLOGY_LiMn, }; +enum { + POWER_SUPPLY_CAPACITY_LEVEL_UNKNOWN = 0, + POWER_SUPPLY_CAPACITY_LEVEL_CRITICAL, + POWER_SUPPLY_CAPACITY_LEVEL_LOW, + POWER_SUPPLY_CAPACITY_LEVEL_NORMAL, + POWER_SUPPLY_CAPACITY_LEVEL_HIGH, + POWER_SUPPLY_CAPACITY_LEVEL_FULL, +}; + enum power_supply_property { /* Properties of type `int' */ POWER_SUPPLY_PROP_STATUS = 0, @@ -89,6 +98,7 @@ enum power_supply_property { POWER_SUPPLY_PROP_ENERGY_NOW, POWER_SUPPLY_PROP_ENERGY_AVG, POWER_SUPPLY_PROP_CAPACITY, /* in percents! */ + POWER_SUPPLY_PROP_CAPACITY_LEVEL, POWER_SUPPLY_PROP_TEMP, POWER_SUPPLY_PROP_TEMP_AMBIENT, POWER_SUPPLY_PROP_TIME_TO_EMPTY_NOW, -- cgit v1.2.3 From ee8076ed3e1cdd0cd1e61318386932669c90b92f Mon Sep 17 00:00:00 2001 From: Andres Salomon Date: Thu, 2 Jul 2009 09:45:18 -0400 Subject: power_supply: Add a charge_type property, and use it for olpc driver This adds a new sysfs file called 'charge_type' which displays the type of charging (unknown, n/a, trickle charge, or fast charging). This allows things like battery diagnostics to determine what the battery/EC is doing without resorting to changing the 'status' sysfs output. Signed-off-by: Andres Salomon Acked-by: Mark Brown Signed-off-by: Anton Vorontsov --- Documentation/power/power_supply_class.txt | 5 +++++ drivers/power/olpc_battery.c | 9 +++++++++ drivers/power/power_supply_sysfs.c | 6 ++++++ include/linux/power_supply.h | 8 ++++++++ 4 files changed, 28 insertions(+) (limited to 'include/linux') diff --git a/Documentation/power/power_supply_class.txt b/Documentation/power/power_supply_class.txt index 709d95571d7b..9f16c5178b66 100644 --- a/Documentation/power/power_supply_class.txt +++ b/Documentation/power/power_supply_class.txt @@ -76,6 +76,11 @@ STATUS - this attribute represents operating status (charging, full, discharging (i.e. powering a load), etc.). This corresponds to BATTERY_STATUS_* values, as defined in battery.h. +CHARGE_TYPE - batteries can typically charge at different rates. +This defines trickle and fast charges. For batteries that +are already charged or discharging, 'n/a' can be displayed (or +'unknown', if the status is not known). + HEALTH - represents health of the battery, values corresponds to POWER_SUPPLY_HEALTH_*, defined in battery.h. diff --git a/drivers/power/olpc_battery.c b/drivers/power/olpc_battery.c index 602bbd008f78..8fefe5a73558 100644 --- a/drivers/power/olpc_battery.c +++ b/drivers/power/olpc_battery.c @@ -233,6 +233,14 @@ static int olpc_bat_get_property(struct power_supply *psy, if (ret) return ret; break; + case POWER_SUPPLY_PROP_CHARGE_TYPE: + if (ec_byte & BAT_STAT_TRICKLE) + val->intval = POWER_SUPPLY_CHARGE_TYPE_TRICKLE; + else if (ec_byte & BAT_STAT_CHARGING) + val->intval = POWER_SUPPLY_CHARGE_TYPE_FAST; + else + val->intval = POWER_SUPPLY_CHARGE_TYPE_NONE; + break; case POWER_SUPPLY_PROP_PRESENT: val->intval = !!(ec_byte & (BAT_STAT_PRESENT | BAT_STAT_TRICKLE)); @@ -325,6 +333,7 @@ static int olpc_bat_get_property(struct power_supply *psy, static enum power_supply_property olpc_bat_props[] = { POWER_SUPPLY_PROP_STATUS, + POWER_SUPPLY_PROP_CHARGE_TYPE, POWER_SUPPLY_PROP_PRESENT, POWER_SUPPLY_PROP_HEALTH, POWER_SUPPLY_PROP_TECHNOLOGY, diff --git a/drivers/power/power_supply_sysfs.c b/drivers/power/power_supply_sysfs.c index 9deabbde6fd6..08144393d64b 100644 --- a/drivers/power/power_supply_sysfs.c +++ b/drivers/power/power_supply_sysfs.c @@ -43,6 +43,9 @@ static ssize_t power_supply_show_property(struct device *dev, static char *status_text[] = { "Unknown", "Charging", "Discharging", "Not charging", "Full" }; + static char *charge_type[] = { + "Unknown", "N/A", "Trickle", "Fast" + }; static char *health_text[] = { "Unknown", "Good", "Overheat", "Dead", "Over voltage", "Unspecified failure", "Cold", @@ -70,6 +73,8 @@ static ssize_t power_supply_show_property(struct device *dev, if (off == POWER_SUPPLY_PROP_STATUS) return sprintf(buf, "%s\n", status_text[value.intval]); + else if (off == POWER_SUPPLY_PROP_CHARGE_TYPE) + return sprintf(buf, "%s\n", charge_type[value.intval]); else if (off == POWER_SUPPLY_PROP_HEALTH) return sprintf(buf, "%s\n", health_text[value.intval]); else if (off == POWER_SUPPLY_PROP_TECHNOLOGY) @@ -86,6 +91,7 @@ static ssize_t power_supply_show_property(struct device *dev, static struct device_attribute power_supply_attrs[] = { /* Properties of type `int' */ POWER_SUPPLY_ATTR(status), + POWER_SUPPLY_ATTR(charge_type), POWER_SUPPLY_ATTR(health), POWER_SUPPLY_ATTR(present), POWER_SUPPLY_ATTR(online), diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 0ab6aa171241..4c7c6fc35487 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -38,6 +38,13 @@ enum { POWER_SUPPLY_STATUS_FULL, }; +enum { + POWER_SUPPLY_CHARGE_TYPE_UNKNOWN = 0, + POWER_SUPPLY_CHARGE_TYPE_NONE, + POWER_SUPPLY_CHARGE_TYPE_TRICKLE, + POWER_SUPPLY_CHARGE_TYPE_FAST, +}; + enum { POWER_SUPPLY_HEALTH_UNKNOWN = 0, POWER_SUPPLY_HEALTH_GOOD, @@ -70,6 +77,7 @@ enum { enum power_supply_property { /* Properties of type `int' */ POWER_SUPPLY_PROP_STATUS = 0, + POWER_SUPPLY_PROP_CHARGE_TYPE, POWER_SUPPLY_PROP_HEALTH, POWER_SUPPLY_PROP_PRESENT, POWER_SUPPLY_PROP_ONLINE, -- cgit v1.2.3 From 03b042bf1dc14a268a3d65d38b4ec2a4261e8477 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 25 Jun 2009 09:08:16 -0700 Subject: rcu: Add synchronize_sched_expedited() primitive This adds the synchronize_sched_expedited() primitive that implements the "big hammer" expedited RCU grace periods. This primitive is placed in kernel/sched.c rather than kernel/rcupdate.c due to its need to interact closely with the migration_thread() kthread. The idea is to wake up this kthread with req->task set to NULL, in response to which the kthread reports the quiescent state resulting from the kthread having been scheduled. Because this patch needs to fallback to the slow versions of the primitives in response to some races with CPU onlining and offlining, a new synchronize_rcu_bh() primitive is added as well. Signed-off-by: Paul E. McKenney Cc: akpm@linux-foundation.org Cc: torvalds@linux-foundation.org Cc: davem@davemloft.net Cc: dada1@cosmosbay.com Cc: zbr@ioremap.net Cc: jeff.chua.linux@gmail.com Cc: paulus@samba.org Cc: laijs@cn.fujitsu.com Cc: jengelh@medozas.de Cc: r000n@r000n.net Cc: benh@kernel.crashing.org Cc: mathieu.desnoyers@polymtl.ca LKML-Reference: <12459460982947-git-send-email-> Signed-off-by: Ingo Molnar --- include/linux/rcupdate.h | 25 ++++----- include/linux/rcupreempt.h | 10 ++++ include/linux/rcutree.h | 12 ++++- kernel/rcupdate.c | 25 +++++++++ kernel/sched.c | 129 ++++++++++++++++++++++++++++++++++++++++++++- 5 files changed, 186 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 0cdfdb622faa..3c89d6a2591f 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -51,7 +51,19 @@ struct rcu_head { void (*func)(struct rcu_head *head); }; -/* Internal to kernel, but needed by rcupreempt.h. */ +/* Exported common interfaces */ +extern void synchronize_rcu(void); +extern void synchronize_rcu_bh(void); +extern void rcu_barrier(void); +extern void rcu_barrier_bh(void); +extern void rcu_barrier_sched(void); +extern void synchronize_sched_expedited(void); +extern int sched_expedited_torture_stats(char *page); + +/* Internal to kernel */ +extern void rcu_init(void); +extern void rcu_scheduler_starting(void); +extern int rcu_needs_cpu(int cpu); extern int rcu_scheduler_active; #if defined(CONFIG_TREE_RCU) @@ -257,15 +269,4 @@ extern void call_rcu(struct rcu_head *head, extern void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *head)); -/* Exported common interfaces */ -extern void synchronize_rcu(void); -extern void rcu_barrier(void); -extern void rcu_barrier_bh(void); -extern void rcu_barrier_sched(void); - -/* Internal to kernel */ -extern void rcu_init(void); -extern void rcu_scheduler_starting(void); -extern int rcu_needs_cpu(int cpu); - #endif /* __LINUX_RCUPDATE_H */ diff --git a/include/linux/rcupreempt.h b/include/linux/rcupreempt.h index fce522782ffa..f164ac9b7807 100644 --- a/include/linux/rcupreempt.h +++ b/include/linux/rcupreempt.h @@ -74,6 +74,16 @@ extern int rcu_needs_cpu(int cpu); extern void __synchronize_sched(void); +static inline void synchronize_rcu_expedited(void) +{ + synchronize_rcu(); /* Placeholder for new rcupreempt implementation. */ +} + +static inline void synchronize_rcu_bh_expedited(void) +{ + synchronize_rcu_bh(); /* Placeholder for new rcupreempt impl. */ +} + extern void __rcu_init(void); extern void rcu_init_sched(void); extern void rcu_check_callbacks(int cpu, int user); diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 5a5153806c42..d4dfd2489633 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -286,8 +286,14 @@ static inline void __rcu_read_unlock_bh(void) #define call_rcu_sched(head, func) call_rcu(head, func) -static inline void rcu_init_sched(void) +static inline void synchronize_rcu_expedited(void) +{ + synchronize_sched_expedited(); +} + +static inline void synchronize_rcu_bh_expedited(void) { + synchronize_sched_expedited(); } extern void __rcu_init(void); @@ -297,6 +303,10 @@ extern void rcu_restart_cpu(int cpu); extern long rcu_batches_completed(void); extern long rcu_batches_completed_bh(void); +static inline void rcu_init_sched(void) +{ +} + #ifdef CONFIG_NO_HZ void rcu_enter_nohz(void); void rcu_exit_nohz(void); diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index a967c9feb90a..eae29c25fb14 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -98,6 +98,30 @@ void synchronize_rcu(void) } EXPORT_SYMBOL_GPL(synchronize_rcu); +/** + * synchronize_rcu_bh - wait until an rcu_bh grace period has elapsed. + * + * Control will return to the caller some time after a full rcu_bh grace + * period has elapsed, in other words after all currently executing rcu_bh + * read-side critical sections have completed. RCU read-side critical + * sections are delimited by rcu_read_lock_bh() and rcu_read_unlock_bh(), + * and may be nested. + */ +void synchronize_rcu_bh(void) +{ + struct rcu_synchronize rcu; + + if (rcu_blocking_is_gp()) + return; + + init_completion(&rcu.completion); + /* Will wake me after RCU finished. */ + call_rcu_bh(&rcu.head, wakeme_after_rcu); + /* Wait for it. */ + wait_for_completion(&rcu.completion); +} +EXPORT_SYMBOL_GPL(synchronize_rcu_bh); + static void rcu_barrier_callback(struct rcu_head *notused) { if (atomic_dec_and_test(&rcu_barrier_cpu_count)) @@ -129,6 +153,7 @@ static void rcu_barrier_func(void *type) static inline void wait_migrated_callbacks(void) { wait_event(rcu_migrate_wq, !atomic_read(&rcu_migrate_type_count)); + smp_mb(); /* In case we didn't sleep. */ } /* diff --git a/kernel/sched.c b/kernel/sched.c index 7c9098d186e6..9ae80bec1c1e 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -7024,6 +7024,11 @@ fail: return ret; } +#define RCU_MIGRATION_IDLE 0 +#define RCU_MIGRATION_NEED_QS 1 +#define RCU_MIGRATION_GOT_QS 2 +#define RCU_MIGRATION_MUST_SYNC 3 + /* * migration_thread - this is a highprio system thread that performs * thread migration by bumping thread off CPU then 'pushing' onto @@ -7031,6 +7036,7 @@ fail: */ static int migration_thread(void *data) { + int badcpu; int cpu = (long)data; struct rq *rq; @@ -7065,8 +7071,17 @@ static int migration_thread(void *data) req = list_entry(head->next, struct migration_req, list); list_del_init(head->next); - spin_unlock(&rq->lock); - __migrate_task(req->task, cpu, req->dest_cpu); + if (req->task != NULL) { + spin_unlock(&rq->lock); + __migrate_task(req->task, cpu, req->dest_cpu); + } else if (likely(cpu == (badcpu = smp_processor_id()))) { + req->dest_cpu = RCU_MIGRATION_GOT_QS; + spin_unlock(&rq->lock); + } else { + req->dest_cpu = RCU_MIGRATION_MUST_SYNC; + spin_unlock(&rq->lock); + WARN_ONCE(1, "migration_thread() on CPU %d, expected %d\n", badcpu, cpu); + } local_irq_enable(); complete(&req->done); @@ -10554,3 +10569,113 @@ struct cgroup_subsys cpuacct_subsys = { .subsys_id = cpuacct_subsys_id, }; #endif /* CONFIG_CGROUP_CPUACCT */ + +#ifndef CONFIG_SMP + +int rcu_expedited_torture_stats(char *page) +{ + return 0; +} +EXPORT_SYMBOL_GPL(rcu_expedited_torture_stats); + +void synchronize_sched_expedited(void) +{ +} +EXPORT_SYMBOL_GPL(synchronize_sched_expedited); + +#else /* #ifndef CONFIG_SMP */ + +static DEFINE_PER_CPU(struct migration_req, rcu_migration_req); +static DEFINE_MUTEX(rcu_sched_expedited_mutex); + +#define RCU_EXPEDITED_STATE_POST -2 +#define RCU_EXPEDITED_STATE_IDLE -1 + +static int rcu_expedited_state = RCU_EXPEDITED_STATE_IDLE; + +int rcu_expedited_torture_stats(char *page) +{ + int cnt = 0; + int cpu; + + cnt += sprintf(&page[cnt], "state: %d /", rcu_expedited_state); + for_each_online_cpu(cpu) { + cnt += sprintf(&page[cnt], " %d:%d", + cpu, per_cpu(rcu_migration_req, cpu).dest_cpu); + } + cnt += sprintf(&page[cnt], "\n"); + return cnt; +} +EXPORT_SYMBOL_GPL(rcu_expedited_torture_stats); + +static long synchronize_sched_expedited_count; + +/* + * Wait for an rcu-sched grace period to elapse, but use "big hammer" + * approach to force grace period to end quickly. This consumes + * significant time on all CPUs, and is thus not recommended for + * any sort of common-case code. + * + * Note that it is illegal to call this function while holding any + * lock that is acquired by a CPU-hotplug notifier. Failing to + * observe this restriction will result in deadlock. + */ +void synchronize_sched_expedited(void) +{ + int cpu; + unsigned long flags; + bool need_full_sync = 0; + struct rq *rq; + struct migration_req *req; + long snap; + int trycount = 0; + + smp_mb(); /* ensure prior mod happens before capturing snap. */ + snap = ACCESS_ONCE(synchronize_sched_expedited_count) + 1; + get_online_cpus(); + while (!mutex_trylock(&rcu_sched_expedited_mutex)) { + put_online_cpus(); + if (trycount++ < 10) + udelay(trycount * num_online_cpus()); + else { + synchronize_sched(); + return; + } + if (ACCESS_ONCE(synchronize_sched_expedited_count) - snap > 0) { + smp_mb(); /* ensure test happens before caller kfree */ + return; + } + get_online_cpus(); + } + rcu_expedited_state = RCU_EXPEDITED_STATE_POST; + for_each_online_cpu(cpu) { + rq = cpu_rq(cpu); + req = &per_cpu(rcu_migration_req, cpu); + init_completion(&req->done); + req->task = NULL; + req->dest_cpu = RCU_MIGRATION_NEED_QS; + spin_lock_irqsave(&rq->lock, flags); + list_add(&req->list, &rq->migration_queue); + spin_unlock_irqrestore(&rq->lock, flags); + wake_up_process(rq->migration_thread); + } + for_each_online_cpu(cpu) { + rcu_expedited_state = cpu; + req = &per_cpu(rcu_migration_req, cpu); + rq = cpu_rq(cpu); + wait_for_completion(&req->done); + spin_lock_irqsave(&rq->lock, flags); + if (unlikely(req->dest_cpu == RCU_MIGRATION_MUST_SYNC)) + need_full_sync = 1; + req->dest_cpu = RCU_MIGRATION_IDLE; + spin_unlock_irqrestore(&rq->lock, flags); + } + rcu_expedited_state = RCU_EXPEDITED_STATE_IDLE; + mutex_unlock(&rcu_sched_expedited_mutex); + put_online_cpus(); + if (need_full_sync) + synchronize_sched(); +} +EXPORT_SYMBOL_GPL(synchronize_sched_expedited); + +#endif /* #else #ifndef CONFIG_SMP */ -- cgit v1.2.3 From 788e5abc5441e9046dd91c995c6f1f75bbd144bf Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sat, 4 Jul 2009 08:10:58 +0900 Subject: percpu: drop @unit_size from embed first chunk allocator The only extra feature @unit_size provides is making dead space at the end of the first chunk which doesn't have any valid usecase. Drop the parameter. This will increase consistency with generalized 4k allocator. James Bottomley spotted missing conversion for the default setup_per_cpu_areas() which caused build breakage on all arcsh which use it. [ Impact: drop unused code path ] Signed-off-by: Tejun Heo Cc: James Bottomley Cc: Ingo Molnar --- arch/x86/kernel/setup_percpu.c | 2 +- include/linux/percpu.h | 2 +- mm/percpu.c | 18 ++++++------------ 3 files changed, 8 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 29a3eef7cf4a..14728206fb52 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -342,7 +342,7 @@ static ssize_t __init setup_pcpu_embed(size_t static_size, bool chosen) return -EINVAL; return pcpu_embed_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE, - reserve - PERCPU_FIRST_CHUNK_RESERVE, -1); + reserve - PERCPU_FIRST_CHUNK_RESERVE); } /* diff --git a/include/linux/percpu.h b/include/linux/percpu.h index e5000343dd61..83bff053bd1c 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -69,7 +69,7 @@ extern size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, extern ssize_t __init pcpu_embed_first_chunk( size_t static_size, size_t reserved_size, - ssize_t dyn_size, ssize_t unit_size); + ssize_t dyn_size); /* * Use this to get to a cpu's version of the per-cpu object diff --git a/mm/percpu.c b/mm/percpu.c index 19dd83b5cbdc..fc6babe6e554 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -1207,7 +1207,6 @@ static struct page * __init pcpue_get_page(unsigned int cpu, int pageno) * @static_size: the size of static percpu area in bytes * @reserved_size: the size of reserved percpu area in bytes * @dyn_size: free size for dynamic allocation in bytes, -1 for auto - * @unit_size: unit size in bytes, must be multiple of PAGE_SIZE, -1 for auto * * This is a helper to ease setting up embedded first percpu chunk and * can be called where pcpu_setup_first_chunk() is expected. @@ -1219,9 +1218,9 @@ static struct page * __init pcpue_get_page(unsigned int cpu, int pageno) * page size. * * When @dyn_size is positive, dynamic area might be larger than - * specified to fill page alignment. Also, when @dyn_size is auto, - * @dyn_size does not fill the whole first chunk but only what's - * necessary for page alignment after static and reserved areas. + * specified to fill page alignment. When @dyn_size is auto, + * @dyn_size is just big enough to fill page alignment after static + * and reserved areas. * * If the needed size is smaller than the minimum or specified unit * size, the leftover is returned to the bootmem allocator. @@ -1231,7 +1230,7 @@ static struct page * __init pcpue_get_page(unsigned int cpu, int pageno) * percpu access on success, -errno on failure. */ ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size, - ssize_t dyn_size, ssize_t unit_size) + ssize_t dyn_size) { size_t chunk_size; unsigned int cpu; @@ -1242,12 +1241,7 @@ ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size, if (dyn_size != 0) dyn_size = pcpue_size - static_size - reserved_size; - if (unit_size >= 0) { - BUG_ON(unit_size < pcpue_size); - pcpue_unit_size = unit_size; - } else - pcpue_unit_size = max_t(size_t, pcpue_size, PCPU_MIN_UNIT_SIZE); - + pcpue_unit_size = max_t(size_t, pcpue_size, PCPU_MIN_UNIT_SIZE); chunk_size = pcpue_unit_size * num_possible_cpus(); pcpue_ptr = __alloc_bootmem_nopanic(chunk_size, PAGE_SIZE, @@ -1304,7 +1298,7 @@ void __init setup_per_cpu_areas(void) * what the legacy allocator did. */ unit_size = pcpu_embed_first_chunk(static_size, PERCPU_MODULE_RESERVE, - PERCPU_DYNAMIC_RESERVE, -1); + PERCPU_DYNAMIC_RESERVE); if (unit_size < 0) panic("Failed to initialized percpu areas."); -- cgit v1.2.3 From d4b95f80399471e4bce5e992700ff7f06ef91f6a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sat, 4 Jul 2009 08:10:59 +0900 Subject: x86,percpu: generalize 4k first chunk allocator Generalize and move x86 setup_pcpu_4k() into pcpu_4k_first_chunk(). setup_pcpu_4k() now is a simple wrapper around the generalized version. Other than taking size parameters and using arch supplied callbacks to allocate/free memory, pcpu_4k_first_chunk() is identical to the original implementation. This simplifies arch code and will help converting more archs to dynamic percpu allocator. While at it, s/pcpu_populate_pte_fn_t/pcpu_fc_populate_pte_fn_t/ for consistency. [ Impact: code reorganization and generalization ] Signed-off-by: Tejun Heo Cc: Ingo Molnar --- arch/x86/kernel/setup_percpu.c | 78 ++++++++++---------------------------- include/linux/percpu.h | 12 +++++- mm/percpu.c | 85 +++++++++++++++++++++++++++++++++++++++++- 3 files changed, 113 insertions(+), 62 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 14728206fb52..ab896b31e80b 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -123,6 +123,19 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size, #endif } +/* + * Helpers for first chunk memory allocation + */ +static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size) +{ + return pcpu_alloc_bootmem(cpu, size, size); +} + +static void __init pcpu_fc_free(void *ptr, size_t size) +{ + free_bootmem(__pa(ptr), size); +} + /* * Large page remap allocator * @@ -346,22 +359,11 @@ static ssize_t __init setup_pcpu_embed(size_t static_size, bool chosen) } /* - * 4k page allocator + * 4k allocator * - * This is the basic allocator. Static percpu area is allocated - * page-by-page and most of initialization is done by the generic - * setup function. + * Boring fallback 4k allocator. This allocator puts more pressure on + * PTE TLBs but other than that behaves nicely on both UMA and NUMA. */ -static struct page **pcpu4k_pages __initdata; -static int pcpu4k_nr_static_pages __initdata; - -static struct page * __init pcpu4k_get_page(unsigned int cpu, int pageno) -{ - if (pageno < pcpu4k_nr_static_pages) - return pcpu4k_pages[cpu * pcpu4k_nr_static_pages + pageno]; - return NULL; -} - static void __init pcpu4k_populate_pte(unsigned long addr) { populate_extra_pte(addr); @@ -369,51 +371,9 @@ static void __init pcpu4k_populate_pte(unsigned long addr) static ssize_t __init setup_pcpu_4k(size_t static_size) { - size_t pages_size; - unsigned int cpu; - int i, j; - ssize_t ret; - - pcpu4k_nr_static_pages = PFN_UP(static_size); - - /* unaligned allocations can't be freed, round up to page size */ - pages_size = PFN_ALIGN(pcpu4k_nr_static_pages * num_possible_cpus() - * sizeof(pcpu4k_pages[0])); - pcpu4k_pages = alloc_bootmem(pages_size); - - /* allocate and copy */ - j = 0; - for_each_possible_cpu(cpu) - for (i = 0; i < pcpu4k_nr_static_pages; i++) { - void *ptr; - - ptr = pcpu_alloc_bootmem(cpu, PAGE_SIZE, PAGE_SIZE); - if (!ptr) { - pr_warning("PERCPU: failed to allocate " - "4k page for cpu%u\n", cpu); - goto enomem; - } - - memcpy(ptr, __per_cpu_load + i * PAGE_SIZE, PAGE_SIZE); - pcpu4k_pages[j++] = virt_to_page(ptr); - } - - /* we're ready, commit */ - pr_info("PERCPU: Allocated %d 4k pages, static data %zu bytes\n", - pcpu4k_nr_static_pages, static_size); - - ret = pcpu_setup_first_chunk(pcpu4k_get_page, static_size, - PERCPU_FIRST_CHUNK_RESERVE, -1, - -1, NULL, pcpu4k_populate_pte); - goto out_free_ar; - -enomem: - while (--j >= 0) - free_bootmem(__pa(page_address(pcpu4k_pages[j])), PAGE_SIZE); - ret = -ENOMEM; -out_free_ar: - free_bootmem(__pa(pcpu4k_pages), pages_size); - return ret; + return pcpu_4k_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE, + pcpu_fc_alloc, pcpu_fc_free, + pcpu4k_populate_pte); } /* for explicit first chunk allocator selection */ diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 83bff053bd1c..41b5bfab4195 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -59,18 +59,26 @@ extern void *pcpu_base_addr; typedef struct page * (*pcpu_get_page_fn_t)(unsigned int cpu, int pageno); -typedef void (*pcpu_populate_pte_fn_t)(unsigned long addr); +typedef void * (*pcpu_fc_alloc_fn_t)(unsigned int cpu, size_t size); +typedef void (*pcpu_fc_free_fn_t)(void *ptr, size_t size); +typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr); extern size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, size_t static_size, size_t reserved_size, ssize_t dyn_size, ssize_t unit_size, void *base_addr, - pcpu_populate_pte_fn_t populate_pte_fn); + pcpu_fc_populate_pte_fn_t populate_pte_fn); extern ssize_t __init pcpu_embed_first_chunk( size_t static_size, size_t reserved_size, ssize_t dyn_size); +extern ssize_t __init pcpu_4k_first_chunk( + size_t static_size, size_t reserved_size, + pcpu_fc_alloc_fn_t alloc_fn, + pcpu_fc_free_fn_t free_fn, + pcpu_fc_populate_pte_fn_t populate_pte_fn); + /* * Use this to get to a cpu's version of the per-cpu object * dynamically allocated. Non-atomic access to the current CPU's diff --git a/mm/percpu.c b/mm/percpu.c index fc6babe6e554..27b0f40a3ea8 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -1037,7 +1037,7 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, size_t static_size, size_t reserved_size, ssize_t dyn_size, ssize_t unit_size, void *base_addr, - pcpu_populate_pte_fn_t populate_pte_fn) + pcpu_fc_populate_pte_fn_t populate_pte_fn) { static struct vm_struct first_vm; static int smap[2], dmap[2]; @@ -1270,6 +1270,89 @@ ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size, pcpue_unit_size, pcpue_ptr, NULL); } +/* + * 4k page first chunk setup helper. + */ +static struct page **pcpu4k_pages __initdata; +static int pcpu4k_nr_static_pages __initdata; + +static struct page * __init pcpu4k_get_page(unsigned int cpu, int pageno) +{ + if (pageno < pcpu4k_nr_static_pages) + return pcpu4k_pages[cpu * pcpu4k_nr_static_pages + pageno]; + return NULL; +} + +/** + * pcpu_4k_first_chunk - map the first chunk using PAGE_SIZE pages + * @static_size: the size of static percpu area in bytes + * @reserved_size: the size of reserved percpu area in bytes + * @alloc_fn: function to allocate percpu page, always called with PAGE_SIZE + * @free_fn: funtion to free percpu page, always called with PAGE_SIZE + * @populate_pte_fn: function to populate pte + * + * This is a helper to ease setting up embedded first percpu chunk and + * can be called where pcpu_setup_first_chunk() is expected. + * + * This is the basic allocator. Static percpu area is allocated + * page-by-page into vmalloc area. + * + * RETURNS: + * The determined pcpu_unit_size which can be used to initialize + * percpu access on success, -errno on failure. + */ +ssize_t __init pcpu_4k_first_chunk(size_t static_size, size_t reserved_size, + pcpu_fc_alloc_fn_t alloc_fn, + pcpu_fc_free_fn_t free_fn, + pcpu_fc_populate_pte_fn_t populate_pte_fn) +{ + size_t pages_size; + unsigned int cpu; + int i, j; + ssize_t ret; + + pcpu4k_nr_static_pages = PFN_UP(static_size); + + /* unaligned allocations can't be freed, round up to page size */ + pages_size = PFN_ALIGN(pcpu4k_nr_static_pages * num_possible_cpus() * + sizeof(pcpu4k_pages[0])); + pcpu4k_pages = alloc_bootmem(pages_size); + + /* allocate and copy */ + j = 0; + for_each_possible_cpu(cpu) + for (i = 0; i < pcpu4k_nr_static_pages; i++) { + void *ptr; + + ptr = alloc_fn(cpu, PAGE_SIZE); + if (!ptr) { + pr_warning("PERCPU: failed to allocate " + "4k page for cpu%u\n", cpu); + goto enomem; + } + + memcpy(ptr, __per_cpu_load + i * PAGE_SIZE, PAGE_SIZE); + pcpu4k_pages[j++] = virt_to_page(ptr); + } + + /* we're ready, commit */ + pr_info("PERCPU: Allocated %d 4k pages, static data %zu bytes\n", + pcpu4k_nr_static_pages, static_size); + + ret = pcpu_setup_first_chunk(pcpu4k_get_page, static_size, + reserved_size, -1, + -1, NULL, populate_pte_fn); + goto out_free_ar; + +enomem: + while (--j >= 0) + free_fn(page_address(pcpu4k_pages[j]), PAGE_SIZE); + ret = -ENOMEM; +out_free_ar: + free_bootmem(__pa(pcpu4k_pages), pages_size); + return ret; +} + /* * Generic percpu area setup. * -- cgit v1.2.3 From 8c4bfc6e8801616ab2e01c38140b2159b388d2ff Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sat, 4 Jul 2009 08:10:59 +0900 Subject: x86,percpu: generalize lpage first chunk allocator Generalize and move x86 setup_pcpu_lpage() into pcpu_lpage_first_chunk(). setup_pcpu_lpage() now is a simple wrapper around the generalized version. Other than taking size parameters and using arch supplied callbacks to allocate/free/map memory, pcpu_lpage_first_chunk() is identical to the original implementation. This simplifies arch code and will help converting more archs to dynamic percpu allocator. While at it, factor out pcpu_calc_fc_sizes() which is common to pcpu_embed_first_chunk() and pcpu_lpage_first_chunk(). [ Impact: code reorganization and generalization ] Signed-off-by: Tejun Heo Cc: Ingo Molnar --- arch/x86/include/asm/percpu.h | 9 -- arch/x86/kernel/setup_percpu.c | 169 +++------------------------------ arch/x86/mm/pageattr.c | 1 + include/linux/percpu.h | 27 ++++++ mm/percpu.c | 209 ++++++++++++++++++++++++++++++++++++++++- 5 files changed, 244 insertions(+), 171 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 103f1ddb0d85..a18c038a3079 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -156,15 +156,6 @@ do { \ /* We can use this directly for local CPU (faster). */ DECLARE_PER_CPU(unsigned long, this_cpu_off); -#ifdef CONFIG_NEED_MULTIPLE_NODES -void *pcpu_lpage_remapped(void *kaddr); -#else -static inline void *pcpu_lpage_remapped(void *kaddr) -{ - return NULL; -} -#endif - #endif /* !__ASSEMBLY__ */ #ifdef CONFIG_SMP diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index ab896b31e80b..4f2e0ac9130b 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -137,44 +137,21 @@ static void __init pcpu_fc_free(void *ptr, size_t size) } /* - * Large page remap allocator - * - * This allocator uses PMD page as unit. A PMD page is allocated for - * each cpu and each is remapped into vmalloc area using PMD mapping. - * As PMD page is quite large, only part of it is used for the first - * chunk. Unused part is returned to the bootmem allocator. - * - * So, the PMD pages are mapped twice - once to the physical mapping - * and to the vmalloc area for the first percpu chunk. The double - * mapping does add one more PMD TLB entry pressure but still is much - * better than only using 4k mappings while still being NUMA friendly. + * Large page remapping allocator */ #ifdef CONFIG_NEED_MULTIPLE_NODES -struct pcpul_ent { - unsigned int cpu; - void *ptr; -}; - -static size_t pcpul_size; -static struct pcpul_ent *pcpul_map; -static struct vm_struct pcpul_vm; - -static struct page * __init pcpul_get_page(unsigned int cpu, int pageno) +static void __init pcpul_map(void *ptr, size_t size, void *addr) { - size_t off = (size_t)pageno << PAGE_SHIFT; + pmd_t *pmd, pmd_v; - if (off >= pcpul_size) - return NULL; - - return virt_to_page(pcpul_map[cpu].ptr + off); + pmd = populate_extra_pmd((unsigned long)addr); + pmd_v = pfn_pmd(page_to_pfn(virt_to_page(ptr)), PAGE_KERNEL_LARGE); + set_pmd(pmd, pmd_v); } static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) { - size_t map_size, dyn_size; - unsigned int cpu; - int i, j; - ssize_t ret; + size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE; if (!chosen) { size_t vm_size = VMALLOC_END - VMALLOC_START; @@ -198,134 +175,10 @@ static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) return -EINVAL; } - /* - * Currently supports only single page. Supporting multiple - * pages won't be too difficult if it ever becomes necessary. - */ - pcpul_size = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE + - PERCPU_DYNAMIC_RESERVE); - if (pcpul_size > PMD_SIZE) { - pr_warning("PERCPU: static data is larger than large page, " - "can't use large page\n"); - return -EINVAL; - } - dyn_size = pcpul_size - static_size - PERCPU_FIRST_CHUNK_RESERVE; - - /* allocate pointer array and alloc large pages */ - map_size = PFN_ALIGN(num_possible_cpus() * sizeof(pcpul_map[0])); - pcpul_map = alloc_bootmem(map_size); - - for_each_possible_cpu(cpu) { - pcpul_map[cpu].cpu = cpu; - pcpul_map[cpu].ptr = pcpu_alloc_bootmem(cpu, PMD_SIZE, - PMD_SIZE); - if (!pcpul_map[cpu].ptr) { - pr_warning("PERCPU: failed to allocate large page " - "for cpu%u\n", cpu); - goto enomem; - } - - /* - * Only use pcpul_size bytes and give back the rest. - * - * Ingo: The 2MB up-rounding bootmem is needed to make - * sure the partial 2MB page is still fully RAM - it's - * not well-specified to have a PAT-incompatible area - * (unmapped RAM, device memory, etc.) in that hole. - */ - free_bootmem(__pa(pcpul_map[cpu].ptr + pcpul_size), - PMD_SIZE - pcpul_size); - - memcpy(pcpul_map[cpu].ptr, __per_cpu_load, static_size); - } - - /* allocate address and map */ - pcpul_vm.flags = VM_ALLOC; - pcpul_vm.size = num_possible_cpus() * PMD_SIZE; - vm_area_register_early(&pcpul_vm, PMD_SIZE); - - for_each_possible_cpu(cpu) { - pmd_t *pmd, pmd_v; - - pmd = populate_extra_pmd((unsigned long)pcpul_vm.addr + - cpu * PMD_SIZE); - pmd_v = pfn_pmd(page_to_pfn(virt_to_page(pcpul_map[cpu].ptr)), - PAGE_KERNEL_LARGE); - set_pmd(pmd, pmd_v); - } - - /* we're ready, commit */ - pr_info("PERCPU: Remapped at %p with large pages, static data " - "%zu bytes\n", pcpul_vm.addr, static_size); - - ret = pcpu_setup_first_chunk(pcpul_get_page, static_size, - PERCPU_FIRST_CHUNK_RESERVE, dyn_size, - PMD_SIZE, pcpul_vm.addr, NULL); - - /* sort pcpul_map array for pcpu_lpage_remapped() */ - for (i = 0; i < num_possible_cpus() - 1; i++) - for (j = i + 1; j < num_possible_cpus(); j++) - if (pcpul_map[i].ptr > pcpul_map[j].ptr) { - struct pcpul_ent tmp = pcpul_map[i]; - pcpul_map[i] = pcpul_map[j]; - pcpul_map[j] = tmp; - } - - return ret; - -enomem: - for_each_possible_cpu(cpu) - if (pcpul_map[cpu].ptr) - free_bootmem(__pa(pcpul_map[cpu].ptr), pcpul_size); - free_bootmem(__pa(pcpul_map), map_size); - return -ENOMEM; -} - -/** - * pcpu_lpage_remapped - determine whether a kaddr is in pcpul recycled area - * @kaddr: the kernel address in question - * - * Determine whether @kaddr falls in the pcpul recycled area. This is - * used by pageattr to detect VM aliases and break up the pcpu PMD - * mapping such that the same physical page is not mapped under - * different attributes. - * - * The recycled area is always at the tail of a partially used PMD - * page. - * - * RETURNS: - * Address of corresponding remapped pcpu address if match is found; - * otherwise, NULL. - */ -void *pcpu_lpage_remapped(void *kaddr) -{ - void *pmd_addr = (void *)((unsigned long)kaddr & PMD_MASK); - unsigned long offset = (unsigned long)kaddr & ~PMD_MASK; - int left = 0, right = num_possible_cpus() - 1; - int pos; - - /* pcpul in use at all? */ - if (!pcpul_map) - return NULL; - - /* okay, perform binary search */ - while (left <= right) { - pos = (left + right) / 2; - - if (pcpul_map[pos].ptr < pmd_addr) - left = pos + 1; - else if (pcpul_map[pos].ptr > pmd_addr) - right = pos - 1; - else { - /* it shouldn't be in the area for the first chunk */ - WARN_ON(offset < pcpul_size); - - return pcpul_vm.addr + - pcpul_map[pos].cpu * PMD_SIZE + offset; - } - } - - return NULL; + return pcpu_lpage_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE, + reserve - PERCPU_FIRST_CHUNK_RESERVE, + PMD_SIZE, + pcpu_fc_alloc, pcpu_fc_free, pcpul_map); } #else static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 1b734d7a8966..c106f7852424 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 41b5bfab4195..9f6bfd7d4b92 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -62,6 +62,7 @@ typedef struct page * (*pcpu_get_page_fn_t)(unsigned int cpu, int pageno); typedef void * (*pcpu_fc_alloc_fn_t)(unsigned int cpu, size_t size); typedef void (*pcpu_fc_free_fn_t)(void *ptr, size_t size); typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr); +typedef void (*pcpu_fc_map_fn_t)(void *ptr, size_t size, void *addr); extern size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, size_t static_size, size_t reserved_size, @@ -79,6 +80,32 @@ extern ssize_t __init pcpu_4k_first_chunk( pcpu_fc_free_fn_t free_fn, pcpu_fc_populate_pte_fn_t populate_pte_fn); +#ifdef CONFIG_NEED_MULTIPLE_NODES +extern ssize_t __init pcpu_lpage_first_chunk( + size_t static_size, size_t reserved_size, + ssize_t dyn_size, size_t lpage_size, + pcpu_fc_alloc_fn_t alloc_fn, + pcpu_fc_free_fn_t free_fn, + pcpu_fc_map_fn_t map_fn); + +extern void *pcpu_lpage_remapped(void *kaddr); +#else +static inline ssize_t __init pcpu_lpage_first_chunk( + size_t static_size, size_t reserved_size, + ssize_t dyn_size, size_t lpage_size, + pcpu_fc_alloc_fn_t alloc_fn, + pcpu_fc_free_fn_t free_fn, + pcpu_fc_map_fn_t map_fn) +{ + return -EINVAL; +} + +static inline void *pcpu_lpage_remapped(void *kaddr) +{ + return NULL; +} +#endif + /* * Use this to get to a cpu's version of the per-cpu object * dynamically allocated. Non-atomic access to the current CPU's diff --git a/mm/percpu.c b/mm/percpu.c index f3fe7bc7378f..17db527ee2e2 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -1190,6 +1190,19 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, return pcpu_unit_size; } +static size_t pcpu_calc_fc_sizes(size_t static_size, size_t reserved_size, + ssize_t *dyn_sizep) +{ + size_t size_sum; + + size_sum = PFN_ALIGN(static_size + reserved_size + + (*dyn_sizep >= 0 ? *dyn_sizep : 0)); + if (*dyn_sizep != 0) + *dyn_sizep = size_sum - static_size - reserved_size; + + return size_sum; +} + /* * Embedding first chunk setup helper. */ @@ -1241,10 +1254,7 @@ ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size, unsigned int cpu; /* determine parameters and allocate */ - pcpue_size = PFN_ALIGN(static_size + reserved_size + - (dyn_size >= 0 ? dyn_size : 0)); - if (dyn_size != 0) - dyn_size = pcpue_size - static_size - reserved_size; + pcpue_size = pcpu_calc_fc_sizes(static_size, reserved_size, &dyn_size); pcpue_unit_size = max_t(size_t, pcpue_size, PCPU_MIN_UNIT_SIZE); chunk_size = pcpue_unit_size * num_possible_cpus(); @@ -1390,6 +1400,197 @@ out_free_ar: return ret; } +/* + * Large page remapping first chunk setup helper + */ +#ifdef CONFIG_NEED_MULTIPLE_NODES +struct pcpul_ent { + unsigned int cpu; + void *ptr; +}; + +static size_t pcpul_size; +static size_t pcpul_unit_size; +static struct pcpul_ent *pcpul_map; +static struct vm_struct pcpul_vm; + +static struct page * __init pcpul_get_page(unsigned int cpu, int pageno) +{ + size_t off = (size_t)pageno << PAGE_SHIFT; + + if (off >= pcpul_size) + return NULL; + + return virt_to_page(pcpul_map[cpu].ptr + off); +} + +/** + * pcpu_lpage_first_chunk - remap the first percpu chunk using large page + * @static_size: the size of static percpu area in bytes + * @reserved_size: the size of reserved percpu area in bytes + * @dyn_size: free size for dynamic allocation in bytes, -1 for auto + * @lpage_size: the size of a large page + * @alloc_fn: function to allocate percpu lpage, always called with lpage_size + * @free_fn: function to free percpu memory, @size <= lpage_size + * @map_fn: function to map percpu lpage, always called with lpage_size + * + * This allocator uses large page as unit. A large page is allocated + * for each cpu and each is remapped into vmalloc area using large + * page mapping. As large page can be quite large, only part of it is + * used for the first chunk. Unused part is returned to the bootmem + * allocator. + * + * So, the large pages are mapped twice - once to the physical mapping + * and to the vmalloc area for the first percpu chunk. The double + * mapping does add one more large TLB entry pressure but still is + * much better than only using 4k mappings while still being NUMA + * friendly. + * + * RETURNS: + * The determined pcpu_unit_size which can be used to initialize + * percpu access on success, -errno on failure. + */ +ssize_t __init pcpu_lpage_first_chunk(size_t static_size, size_t reserved_size, + ssize_t dyn_size, size_t lpage_size, + pcpu_fc_alloc_fn_t alloc_fn, + pcpu_fc_free_fn_t free_fn, + pcpu_fc_map_fn_t map_fn) +{ + size_t size_sum; + size_t map_size; + unsigned int cpu; + int i, j; + ssize_t ret; + + /* + * Currently supports only single page. Supporting multiple + * pages won't be too difficult if it ever becomes necessary. + */ + size_sum = pcpu_calc_fc_sizes(static_size, reserved_size, &dyn_size); + + pcpul_unit_size = lpage_size; + pcpul_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE); + if (pcpul_size > pcpul_unit_size) { + pr_warning("PERCPU: static data is larger than large page, " + "can't use large page\n"); + return -EINVAL; + } + + /* allocate pointer array and alloc large pages */ + map_size = PFN_ALIGN(num_possible_cpus() * sizeof(pcpul_map[0])); + pcpul_map = alloc_bootmem(map_size); + + for_each_possible_cpu(cpu) { + void *ptr; + + ptr = alloc_fn(cpu, lpage_size); + if (!ptr) { + pr_warning("PERCPU: failed to allocate large page " + "for cpu%u\n", cpu); + goto enomem; + } + + /* + * Only use pcpul_size bytes and give back the rest. + * + * Ingo: The lpage_size up-rounding bootmem is needed + * to make sure the partial lpage is still fully RAM - + * it's not well-specified to have a incompatible area + * (unmapped RAM, device memory, etc.) in that hole. + */ + free_fn(ptr + pcpul_size, lpage_size - pcpul_size); + + pcpul_map[cpu].cpu = cpu; + pcpul_map[cpu].ptr = ptr; + + memcpy(ptr, __per_cpu_load, static_size); + } + + /* allocate address and map */ + pcpul_vm.flags = VM_ALLOC; + pcpul_vm.size = num_possible_cpus() * pcpul_unit_size; + vm_area_register_early(&pcpul_vm, pcpul_unit_size); + + for_each_possible_cpu(cpu) + map_fn(pcpul_map[cpu].ptr, pcpul_unit_size, + pcpul_vm.addr + cpu * pcpul_unit_size); + + /* we're ready, commit */ + pr_info("PERCPU: Remapped at %p with large pages, static data " + "%zu bytes\n", pcpul_vm.addr, static_size); + + ret = pcpu_setup_first_chunk(pcpul_get_page, static_size, + reserved_size, dyn_size, pcpul_unit_size, + pcpul_vm.addr, NULL); + + /* sort pcpul_map array for pcpu_lpage_remapped() */ + for (i = 0; i < num_possible_cpus() - 1; i++) + for (j = i + 1; j < num_possible_cpus(); j++) + if (pcpul_map[i].ptr > pcpul_map[j].ptr) { + struct pcpul_ent tmp = pcpul_map[i]; + pcpul_map[i] = pcpul_map[j]; + pcpul_map[j] = tmp; + } + + return ret; + +enomem: + for_each_possible_cpu(cpu) + if (pcpul_map[cpu].ptr) + free_fn(pcpul_map[cpu].ptr, pcpul_size); + free_bootmem(__pa(pcpul_map), map_size); + return -ENOMEM; +} + +/** + * pcpu_lpage_remapped - determine whether a kaddr is in pcpul recycled area + * @kaddr: the kernel address in question + * + * Determine whether @kaddr falls in the pcpul recycled area. This is + * used by pageattr to detect VM aliases and break up the pcpu large + * page mapping such that the same physical page is not mapped under + * different attributes. + * + * The recycled area is always at the tail of a partially used large + * page. + * + * RETURNS: + * Address of corresponding remapped pcpu address if match is found; + * otherwise, NULL. + */ +void *pcpu_lpage_remapped(void *kaddr) +{ + unsigned long unit_mask = pcpul_unit_size - 1; + void *lpage_addr = (void *)((unsigned long)kaddr & ~unit_mask); + unsigned long offset = (unsigned long)kaddr & unit_mask; + int left = 0, right = num_possible_cpus() - 1; + int pos; + + /* pcpul in use at all? */ + if (!pcpul_map) + return NULL; + + /* okay, perform binary search */ + while (left <= right) { + pos = (left + right) / 2; + + if (pcpul_map[pos].ptr < lpage_addr) + left = pos + 1; + else if (pcpul_map[pos].ptr > lpage_addr) + right = pos - 1; + else { + /* it shouldn't be in the area for the first chunk */ + WARN_ON(offset < pcpul_size); + + return pcpul_vm.addr + + pcpul_map[pos].cpu * pcpul_unit_size + offset; + } + } + + return NULL; +} +#endif + /* * Generic percpu area setup. * -- cgit v1.2.3 From 38a6be525460f52ac6f2de1c3f73c5615a8853cd Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sat, 4 Jul 2009 08:10:59 +0900 Subject: percpu: simplify pcpu_setup_first_chunk() Now that all first chunk allocator helpers allocate and map the first chunk themselves, there's no need to have optional default alloc/map in pcpu_setup_first_chunk(). Drop @populate_pte_fn and only leave @dyn_size optional and make all other params mandatory. This makes it much easier to follow what pcpu_setup_first_chunk() is doing and what actual differences tweaking each parameter results in. [ Impact: drop unused code path ] Signed-off-by: Tejun Heo Cc: Ingo Molnar --- arch/sparc/kernel/smp_64.c | 2 +- include/linux/percpu.h | 5 +-- mm/percpu.c | 104 +++++++++++++-------------------------------- 3 files changed, 33 insertions(+), 78 deletions(-) (limited to 'include/linux') diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index fa44eaf8d897..ccad7b20ae75 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -1528,7 +1528,7 @@ void __init setup_per_cpu_areas(void) pcpu_unit_size = pcpu_setup_first_chunk(pcpur_get_page, static_size, PERCPU_MODULE_RESERVE, dyn_size, - PCPU_CHUNK_SIZE, vm.addr, NULL); + PCPU_CHUNK_SIZE, vm.addr); free_bootmem(__pa(pcpur_ptrs), ptrs_size); diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 9f6bfd7d4b92..ec64357e1762 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -66,9 +66,8 @@ typedef void (*pcpu_fc_map_fn_t)(void *ptr, size_t size, void *addr); extern size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, size_t static_size, size_t reserved_size, - ssize_t dyn_size, ssize_t unit_size, - void *base_addr, - pcpu_fc_populate_pte_fn_t populate_pte_fn); + ssize_t dyn_size, size_t unit_size, + void *base_addr); extern ssize_t __init pcpu_embed_first_chunk( size_t static_size, size_t reserved_size, diff --git a/mm/percpu.c b/mm/percpu.c index 17db527ee2e2..21d938a10662 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -983,24 +983,22 @@ EXPORT_SYMBOL_GPL(free_percpu); * pcpu_setup_first_chunk - initialize the first percpu chunk * @get_page_fn: callback to fetch page pointer * @static_size: the size of static percpu area in bytes - * @reserved_size: the size of reserved percpu area in bytes + * @reserved_size: the size of reserved percpu area in bytes, 0 for none * @dyn_size: free size for dynamic allocation in bytes, -1 for auto - * @unit_size: unit size in bytes, must be multiple of PAGE_SIZE, -1 for auto - * @base_addr: mapped address, NULL for auto - * @populate_pte_fn: callback to allocate pagetable, NULL if unnecessary + * @unit_size: unit size in bytes, must be multiple of PAGE_SIZE + * @base_addr: mapped address * * Initialize the first percpu chunk which contains the kernel static * perpcu area. This function is to be called from arch percpu area - * setup path. The first two parameters are mandatory. The rest are - * optional. + * setup path. * * @get_page_fn() should return pointer to percpu page given cpu * number and page number. It should at least return enough pages to * cover the static area. The returned pages for static area should - * have been initialized with valid data. If @unit_size is specified, - * it can also return pages after the static area. NULL return - * indicates end of pages for the cpu. Note that @get_page_fn() must - * return the same number of pages for all cpus. + * have been initialized with valid data. It can also return pages + * after the static area. NULL return indicates end of pages for the + * cpu. Note that @get_page_fn() must return the same number of pages + * for all cpus. * * @reserved_size, if non-zero, specifies the amount of bytes to * reserve after the static area in the first chunk. This reserves @@ -1015,17 +1013,12 @@ EXPORT_SYMBOL_GPL(free_percpu); * non-negative value makes percpu leave alone the area beyond * @static_size + @reserved_size + @dyn_size. * - * @unit_size, if non-negative, specifies unit size and must be - * aligned to PAGE_SIZE and equal to or larger than @static_size + - * @reserved_size + if non-negative, @dyn_size. - * - * Non-null @base_addr means that the caller already allocated virtual - * region for the first chunk and mapped it. percpu must not mess - * with the chunk. Note that @base_addr with 0 @unit_size or non-NULL - * @populate_pte_fn doesn't make any sense. + * @unit_size specifies unit size and must be aligned to PAGE_SIZE and + * equal to or larger than @static_size + @reserved_size + if + * non-negative, @dyn_size. * - * @populate_pte_fn is used to populate the pagetable. NULL means the - * caller already populated the pagetable. + * The caller should have mapped the first chunk at @base_addr and + * copied static data to each unit. * * If the first chunk ends up with both reserved and dynamic areas, it * is served by two chunks - one to serve the core static and reserved @@ -1040,9 +1033,8 @@ EXPORT_SYMBOL_GPL(free_percpu); */ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, size_t static_size, size_t reserved_size, - ssize_t dyn_size, ssize_t unit_size, - void *base_addr, - pcpu_fc_populate_pte_fn_t populate_pte_fn) + ssize_t dyn_size, size_t unit_size, + void *base_addr) { static struct vm_struct first_vm; static int smap[2], dmap[2]; @@ -1050,27 +1042,18 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, (dyn_size >= 0 ? dyn_size : 0); struct pcpu_chunk *schunk, *dchunk = NULL; unsigned int cpu; - int nr_pages; - int err, i; + int i, nr_pages; /* santiy checks */ BUILD_BUG_ON(ARRAY_SIZE(smap) >= PCPU_DFL_MAP_ALLOC || ARRAY_SIZE(dmap) >= PCPU_DFL_MAP_ALLOC); BUG_ON(!static_size); - if (unit_size >= 0) { - BUG_ON(unit_size < size_sum); - BUG_ON(unit_size & ~PAGE_MASK); - BUG_ON(unit_size < PCPU_MIN_UNIT_SIZE); - } else - BUG_ON(base_addr); - BUG_ON(base_addr && populate_pte_fn); - - if (unit_size >= 0) - pcpu_unit_pages = unit_size >> PAGE_SHIFT; - else - pcpu_unit_pages = max_t(int, PCPU_MIN_UNIT_SIZE >> PAGE_SHIFT, - PFN_UP(size_sum)); + BUG_ON(!base_addr); + BUG_ON(unit_size < size_sum); + BUG_ON(unit_size & ~PAGE_MASK); + BUG_ON(unit_size < PCPU_MIN_UNIT_SIZE); + pcpu_unit_pages = unit_size >> PAGE_SHIFT; pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT; pcpu_chunk_size = num_possible_cpus() * pcpu_unit_size; pcpu_chunk_struct_size = sizeof(struct pcpu_chunk) @@ -1079,6 +1062,10 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, if (dyn_size < 0) dyn_size = pcpu_unit_size - static_size - reserved_size; + first_vm.flags = VM_ALLOC; + first_vm.size = pcpu_chunk_size; + first_vm.addr = base_addr; + /* * Allocate chunk slots. The additional last slot is for * empty chunks. @@ -1101,6 +1088,7 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, schunk->map = smap; schunk->map_alloc = ARRAY_SIZE(smap); schunk->page = schunk->page_ar; + schunk->immutable = true; if (reserved_size) { schunk->free_size = reserved_size; @@ -1124,31 +1112,13 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, dchunk->map = dmap; dchunk->map_alloc = ARRAY_SIZE(dmap); dchunk->page = schunk->page_ar; /* share page map with schunk */ + dchunk->immutable = true; dchunk->contig_hint = dchunk->free_size = dyn_size; dchunk->map[dchunk->map_used++] = -pcpu_reserved_chunk_limit; dchunk->map[dchunk->map_used++] = dchunk->free_size; } - /* allocate vm address */ - first_vm.flags = VM_ALLOC; - first_vm.size = pcpu_chunk_size; - - if (!base_addr) - vm_area_register_early(&first_vm, PAGE_SIZE); - else { - /* - * Pages already mapped. No need to remap into - * vmalloc area. In this case the first chunks can't - * be mapped or unmapped by percpu and are marked - * immutable. - */ - first_vm.addr = base_addr; - schunk->immutable = true; - if (dchunk) - dchunk->immutable = true; - } - /* assign pages */ nr_pages = -1; for_each_possible_cpu(cpu) { @@ -1168,19 +1138,6 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, BUG_ON(nr_pages != i); } - /* map them */ - if (populate_pte_fn) { - for_each_possible_cpu(cpu) - for (i = 0; i < nr_pages; i++) - populate_pte_fn(pcpu_chunk_addr(schunk, - cpu, i)); - - err = pcpu_map(schunk, 0, nr_pages); - if (err) - panic("failed to setup static percpu area, err=%d\n", - err); - } - /* link the first chunk in */ pcpu_first_chunk = dchunk ?: schunk; pcpu_chunk_relocate(pcpu_first_chunk, -1); @@ -1282,7 +1239,7 @@ ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size, return pcpu_setup_first_chunk(pcpue_get_page, static_size, reserved_size, dyn_size, - pcpue_unit_size, pcpue_ptr, NULL); + pcpue_unit_size, pcpue_ptr); } /* @@ -1387,8 +1344,7 @@ ssize_t __init pcpu_4k_first_chunk(size_t static_size, size_t reserved_size, ret = pcpu_setup_first_chunk(pcpu4k_get_page, static_size, reserved_size, -1, - pcpu4k_unit_pages << PAGE_SHIFT, vm.addr, - NULL); + pcpu4k_unit_pages << PAGE_SHIFT, vm.addr); goto out_free_ar; enomem: @@ -1521,7 +1477,7 @@ ssize_t __init pcpu_lpage_first_chunk(size_t static_size, size_t reserved_size, ret = pcpu_setup_first_chunk(pcpul_get_page, static_size, reserved_size, dyn_size, pcpul_unit_size, - pcpul_vm.addr, NULL); + pcpul_vm.addr); /* sort pcpul_map array for pcpu_lpage_remapped() */ for (i = 0; i < num_possible_cpus() - 1; i++) -- cgit v1.2.3 From ce3141a277ff6cc37e51008b8888dc2cb7456ef1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sat, 4 Jul 2009 08:11:00 +0900 Subject: percpu: drop pcpu_chunk->page[] percpu core doesn't need to tack all the allocated pages. It needs to know whether certain pages are populated and a way to reverse map address to page when freeing. This patch drops pcpu_chunk->page[] and use populated bitmap and vmalloc_to_page() lookup instead. Using vmalloc_to_page() exclusively is also possible but complicates first chunk handling, inflates cache footprint and prevents non-standard memory allocation for percpu memory. pcpu_chunk->page[] was used to track each page's allocation and allowed asymmetric population which happens during failure path; however, with single bitmap for all units, this is no longer possible. Bite the bullet and rewrite (de)populate functions so that things are done in clearly separated steps such that asymmetric population doesn't happen. This makes the (de)population process much more modular and will also ease implementing non-standard memory usage in the future (e.g. large pages). This makes @get_page_fn parameter to pcpu_setup_first_chunk() unnecessary. The parameter is dropped and all first chunk helpers are updated accordingly. Please note that despite the volume most changes to first chunk helpers are symbol renames for variables which don't need to be referenced outside of the helper anymore. This change reduces memory usage and cache footprint of pcpu_chunk. Now only #unit_pages bits are necessary per chunk. [ Impact: reduced memory usage and cache footprint for bookkeeping ] Signed-off-by: Tejun Heo Cc: Ingo Molnar Cc: David Miller --- arch/sparc/kernel/smp_64.c | 42 ++-- include/linux/percpu.h | 3 +- mm/percpu.c | 604 ++++++++++++++++++++++++++++----------------- 3 files changed, 400 insertions(+), 249 deletions(-) (limited to 'include/linux') diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index ccad7b20ae75..f2f22ee97a7a 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -1415,19 +1415,6 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size, #endif } -static size_t pcpur_size __initdata; -static void **pcpur_ptrs __initdata; - -static struct page * __init pcpur_get_page(unsigned int cpu, int pageno) -{ - size_t off = (size_t)pageno << PAGE_SHIFT; - - if (off >= pcpur_size) - return NULL; - - return virt_to_page(pcpur_ptrs[cpu] + off); -} - #define PCPU_CHUNK_SIZE (4UL * 1024UL * 1024UL) static void __init pcpu_map_range(unsigned long start, unsigned long end, @@ -1491,25 +1478,26 @@ void __init setup_per_cpu_areas(void) size_t dyn_size, static_size = __per_cpu_end - __per_cpu_start; static struct vm_struct vm; unsigned long delta, cpu; - size_t pcpu_unit_size; + size_t size_sum, pcpu_unit_size; size_t ptrs_size; + void **ptrs; - pcpur_size = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE + - PERCPU_DYNAMIC_RESERVE); - dyn_size = pcpur_size - static_size - PERCPU_MODULE_RESERVE; + size_sum = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE + + PERCPU_DYNAMIC_RESERVE); + dyn_size = size_sum - static_size - PERCPU_MODULE_RESERVE; - ptrs_size = PFN_ALIGN(num_possible_cpus() * sizeof(pcpur_ptrs[0])); - pcpur_ptrs = alloc_bootmem(ptrs_size); + ptrs_size = PFN_ALIGN(num_possible_cpus() * sizeof(ptrs[0])); + ptrs = alloc_bootmem(ptrs_size); for_each_possible_cpu(cpu) { - pcpur_ptrs[cpu] = pcpu_alloc_bootmem(cpu, PCPU_CHUNK_SIZE, - PCPU_CHUNK_SIZE); + ptrs[cpu] = pcpu_alloc_bootmem(cpu, PCPU_CHUNK_SIZE, + PCPU_CHUNK_SIZE); - free_bootmem(__pa(pcpur_ptrs[cpu] + pcpur_size), - PCPU_CHUNK_SIZE - pcpur_size); + free_bootmem(__pa(ptrs[cpu] + size_sum), + PCPU_CHUNK_SIZE - size_sum); - memcpy(pcpur_ptrs[cpu], __per_cpu_load, static_size); + memcpy(ptrs[cpu], __per_cpu_load, static_size); } /* allocate address and map */ @@ -1523,14 +1511,14 @@ void __init setup_per_cpu_areas(void) start += cpu * PCPU_CHUNK_SIZE; end = start + PCPU_CHUNK_SIZE; - pcpu_map_range(start, end, virt_to_page(pcpur_ptrs[cpu])); + pcpu_map_range(start, end, virt_to_page(ptrs[cpu])); } - pcpu_unit_size = pcpu_setup_first_chunk(pcpur_get_page, static_size, + pcpu_unit_size = pcpu_setup_first_chunk(static_size, PERCPU_MODULE_RESERVE, dyn_size, PCPU_CHUNK_SIZE, vm.addr); - free_bootmem(__pa(pcpur_ptrs), ptrs_size); + free_bootmem(__pa(ptrs), ptrs_size); delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; for_each_possible_cpu(cpu) { diff --git a/include/linux/percpu.h b/include/linux/percpu.h index ec64357e1762..63c8b7a23e66 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -58,13 +58,12 @@ extern void *pcpu_base_addr; -typedef struct page * (*pcpu_get_page_fn_t)(unsigned int cpu, int pageno); typedef void * (*pcpu_fc_alloc_fn_t)(unsigned int cpu, size_t size); typedef void (*pcpu_fc_free_fn_t)(void *ptr, size_t size); typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr); typedef void (*pcpu_fc_map_fn_t)(void *ptr, size_t size, void *addr); -extern size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, +extern size_t __init pcpu_setup_first_chunk( size_t static_size, size_t reserved_size, ssize_t dyn_size, size_t unit_size, void *base_addr); diff --git a/mm/percpu.c b/mm/percpu.c index 639fce4d2caf..21756814d99f 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -94,8 +94,7 @@ struct pcpu_chunk { int map_alloc; /* # of map entries allocated */ int *map; /* allocation map */ bool immutable; /* no [de]population allowed */ - struct page **page; /* points to page array */ - struct page *page_ar[]; /* #cpus * UNIT_PAGES */ + unsigned long populated[]; /* populated bitmap */ }; static int pcpu_unit_pages __read_mostly; @@ -129,9 +128,9 @@ static int pcpu_reserved_chunk_limit; * Synchronization rules. * * There are two locks - pcpu_alloc_mutex and pcpu_lock. The former - * protects allocation/reclaim paths, chunks and chunk->page arrays. - * The latter is a spinlock and protects the index data structures - - * chunk slots, chunks and area maps in chunks. + * protects allocation/reclaim paths, chunks, populated bitmap and + * vmalloc mapping. The latter is a spinlock and protects the index + * data structures - chunk slots, chunks and area maps in chunks. * * During allocation, pcpu_alloc_mutex is kept locked all the time and * pcpu_lock is grabbed and released as necessary. All actual memory @@ -188,16 +187,13 @@ static unsigned long pcpu_chunk_addr(struct pcpu_chunk *chunk, (pcpu_page_idx(cpu, page_idx) << PAGE_SHIFT); } -static struct page **pcpu_chunk_pagep(struct pcpu_chunk *chunk, - unsigned int cpu, int page_idx) +static struct page *pcpu_chunk_page(struct pcpu_chunk *chunk, + unsigned int cpu, int page_idx) { - return &chunk->page[pcpu_page_idx(cpu, page_idx)]; -} + /* must not be used on pre-mapped chunk */ + WARN_ON(chunk->immutable); -static bool pcpu_chunk_page_occupied(struct pcpu_chunk *chunk, - int page_idx) -{ - return *pcpu_chunk_pagep(chunk, 0, page_idx) != NULL; + return vmalloc_to_page((void *)pcpu_chunk_addr(chunk, cpu, page_idx)); } /* set the pointer to a chunk in a page struct */ @@ -212,6 +208,34 @@ static struct pcpu_chunk *pcpu_get_page_chunk(struct page *page) return (struct pcpu_chunk *)page->index; } +static void pcpu_next_unpop(struct pcpu_chunk *chunk, int *rs, int *re, int end) +{ + *rs = find_next_zero_bit(chunk->populated, end, *rs); + *re = find_next_bit(chunk->populated, end, *rs + 1); +} + +static void pcpu_next_pop(struct pcpu_chunk *chunk, int *rs, int *re, int end) +{ + *rs = find_next_bit(chunk->populated, end, *rs); + *re = find_next_zero_bit(chunk->populated, end, *rs + 1); +} + +/* + * (Un)populated page region iterators. Iterate over (un)populated + * page regions betwen @start and @end in @chunk. @rs and @re should + * be integer variables and will be set to start and end page index of + * the current region. + */ +#define pcpu_for_each_unpop_region(chunk, rs, re, start, end) \ + for ((rs) = (start), pcpu_next_unpop((chunk), &(rs), &(re), (end)); \ + (rs) < (re); \ + (rs) = (re) + 1, pcpu_next_unpop((chunk), &(rs), &(re), (end))) + +#define pcpu_for_each_pop_region(chunk, rs, re, start, end) \ + for ((rs) = (start), pcpu_next_pop((chunk), &(rs), &(re), (end)); \ + (rs) < (re); \ + (rs) = (re) + 1, pcpu_next_pop((chunk), &(rs), &(re), (end))) + /** * pcpu_mem_alloc - allocate memory * @size: bytes to allocate @@ -545,42 +569,197 @@ static void pcpu_free_area(struct pcpu_chunk *chunk, int freeme) } /** - * pcpu_unmap - unmap pages out of a pcpu_chunk + * pcpu_get_pages_and_bitmap - get temp pages array and bitmap + * @chunk: chunk of interest + * @bitmapp: output parameter for bitmap + * @may_alloc: may allocate the array + * + * Returns pointer to array of pointers to struct page and bitmap, + * both of which can be indexed with pcpu_page_idx(). The returned + * array is cleared to zero and *@bitmapp is copied from + * @chunk->populated. Note that there is only one array and bitmap + * and access exclusion is the caller's responsibility. + * + * CONTEXT: + * pcpu_alloc_mutex and does GFP_KERNEL allocation if @may_alloc. + * Otherwise, don't care. + * + * RETURNS: + * Pointer to temp pages array on success, NULL on failure. + */ +static struct page **pcpu_get_pages_and_bitmap(struct pcpu_chunk *chunk, + unsigned long **bitmapp, + bool may_alloc) +{ + static struct page **pages; + static unsigned long *bitmap; + size_t pages_size = num_possible_cpus() * pcpu_unit_pages * + sizeof(pages[0]); + size_t bitmap_size = BITS_TO_LONGS(pcpu_unit_pages) * + sizeof(unsigned long); + + if (!pages || !bitmap) { + if (may_alloc && !pages) + pages = pcpu_mem_alloc(pages_size); + if (may_alloc && !bitmap) + bitmap = pcpu_mem_alloc(bitmap_size); + if (!pages || !bitmap) + return NULL; + } + + memset(pages, 0, pages_size); + bitmap_copy(bitmap, chunk->populated, pcpu_unit_pages); + + *bitmapp = bitmap; + return pages; +} + +/** + * pcpu_free_pages - free pages which were allocated for @chunk + * @chunk: chunk pages were allocated for + * @pages: array of pages to be freed, indexed by pcpu_page_idx() + * @populated: populated bitmap + * @page_start: page index of the first page to be freed + * @page_end: page index of the last page to be freed + 1 + * + * Free pages [@page_start and @page_end) in @pages for all units. + * The pages were allocated for @chunk. + */ +static void pcpu_free_pages(struct pcpu_chunk *chunk, + struct page **pages, unsigned long *populated, + int page_start, int page_end) +{ + unsigned int cpu; + int i; + + for_each_possible_cpu(cpu) { + for (i = page_start; i < page_end; i++) { + struct page *page = pages[pcpu_page_idx(cpu, i)]; + + if (page) + __free_page(page); + } + } +} + +/** + * pcpu_alloc_pages - allocates pages for @chunk + * @chunk: target chunk + * @pages: array to put the allocated pages into, indexed by pcpu_page_idx() + * @populated: populated bitmap + * @page_start: page index of the first page to be allocated + * @page_end: page index of the last page to be allocated + 1 + * + * Allocate pages [@page_start,@page_end) into @pages for all units. + * The allocation is for @chunk. Percpu core doesn't care about the + * content of @pages and will pass it verbatim to pcpu_map_pages(). + */ +static int pcpu_alloc_pages(struct pcpu_chunk *chunk, + struct page **pages, unsigned long *populated, + int page_start, int page_end) +{ + const gfp_t gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_COLD; + unsigned int cpu; + int i; + + for_each_possible_cpu(cpu) { + for (i = page_start; i < page_end; i++) { + struct page **pagep = &pages[pcpu_page_idx(cpu, i)]; + + *pagep = alloc_pages_node(cpu_to_node(cpu), gfp, 0); + if (!*pagep) { + pcpu_free_pages(chunk, pages, populated, + page_start, page_end); + return -ENOMEM; + } + } + } + return 0; +} + +/** + * pcpu_pre_unmap_flush - flush cache prior to unmapping + * @chunk: chunk the regions to be flushed belongs to + * @page_start: page index of the first page to be flushed + * @page_end: page index of the last page to be flushed + 1 + * + * Pages in [@page_start,@page_end) of @chunk are about to be + * unmapped. Flush cache. As each flushing trial can be very + * expensive, issue flush on the whole region at once rather than + * doing it for each cpu. This could be an overkill but is more + * scalable. + */ +static void pcpu_pre_unmap_flush(struct pcpu_chunk *chunk, + int page_start, int page_end) +{ + unsigned int last = num_possible_cpus() - 1; + + flush_cache_vunmap(pcpu_chunk_addr(chunk, 0, page_start), + pcpu_chunk_addr(chunk, last, page_end)); +} + +static void __pcpu_unmap_pages(unsigned long addr, int nr_pages) +{ + unmap_kernel_range_noflush(addr, nr_pages << PAGE_SHIFT); +} + +/** + * pcpu_unmap_pages - unmap pages out of a pcpu_chunk * @chunk: chunk of interest + * @pages: pages array which can be used to pass information to free + * @populated: populated bitmap * @page_start: page index of the first page to unmap * @page_end: page index of the last page to unmap + 1 - * @flush_tlb: whether to flush tlb or not * * For each cpu, unmap pages [@page_start,@page_end) out of @chunk. - * If @flush is true, vcache is flushed before unmapping and tlb - * after. + * Corresponding elements in @pages were cleared by the caller and can + * be used to carry information to pcpu_free_pages() which will be + * called after all unmaps are finished. The caller should call + * proper pre/post flush functions. */ -static void pcpu_unmap(struct pcpu_chunk *chunk, int page_start, int page_end, - bool flush_tlb) +static void pcpu_unmap_pages(struct pcpu_chunk *chunk, + struct page **pages, unsigned long *populated, + int page_start, int page_end) { - unsigned int last = num_possible_cpus() - 1; unsigned int cpu; + int i; - /* unmap must not be done on immutable chunk */ - WARN_ON(chunk->immutable); + for_each_possible_cpu(cpu) { + for (i = page_start; i < page_end; i++) { + struct page *page; - /* - * Each flushing trial can be very expensive, issue flush on - * the whole region at once rather than doing it for each cpu. - * This could be an overkill but is more scalable. - */ - flush_cache_vunmap(pcpu_chunk_addr(chunk, 0, page_start), - pcpu_chunk_addr(chunk, last, page_end)); + page = pcpu_chunk_page(chunk, cpu, i); + WARN_ON(!page); + pages[pcpu_page_idx(cpu, i)] = page; + } + __pcpu_unmap_pages(pcpu_chunk_addr(chunk, cpu, page_start), + page_end - page_start); + } - for_each_possible_cpu(cpu) - unmap_kernel_range_noflush( - pcpu_chunk_addr(chunk, cpu, page_start), - (page_end - page_start) << PAGE_SHIFT); - - /* ditto as flush_cache_vunmap() */ - if (flush_tlb) - flush_tlb_kernel_range(pcpu_chunk_addr(chunk, 0, page_start), - pcpu_chunk_addr(chunk, last, page_end)); + for (i = page_start; i < page_end; i++) + __clear_bit(i, populated); +} + +/** + * pcpu_post_unmap_tlb_flush - flush TLB after unmapping + * @chunk: pcpu_chunk the regions to be flushed belong to + * @page_start: page index of the first page to be flushed + * @page_end: page index of the last page to be flushed + 1 + * + * Pages [@page_start,@page_end) of @chunk have been unmapped. Flush + * TLB for the regions. This can be skipped if the area is to be + * returned to vmalloc as vmalloc will handle TLB flushing lazily. + * + * As with pcpu_pre_unmap_flush(), TLB flushing also is done at once + * for the whole region. + */ +static void pcpu_post_unmap_tlb_flush(struct pcpu_chunk *chunk, + int page_start, int page_end) +{ + unsigned int last = num_possible_cpus() - 1; + + flush_tlb_kernel_range(pcpu_chunk_addr(chunk, 0, page_start), + pcpu_chunk_addr(chunk, last, page_end)); } static int __pcpu_map_pages(unsigned long addr, struct page **pages, @@ -591,35 +770,76 @@ static int __pcpu_map_pages(unsigned long addr, struct page **pages, } /** - * pcpu_map - map pages into a pcpu_chunk + * pcpu_map_pages - map pages into a pcpu_chunk * @chunk: chunk of interest + * @pages: pages array containing pages to be mapped + * @populated: populated bitmap * @page_start: page index of the first page to map * @page_end: page index of the last page to map + 1 * - * For each cpu, map pages [@page_start,@page_end) into @chunk. - * vcache is flushed afterwards. + * For each cpu, map pages [@page_start,@page_end) into @chunk. The + * caller is responsible for calling pcpu_post_map_flush() after all + * mappings are complete. + * + * This function is responsible for setting corresponding bits in + * @chunk->populated bitmap and whatever is necessary for reverse + * lookup (addr -> chunk). */ -static int pcpu_map(struct pcpu_chunk *chunk, int page_start, int page_end) +static int pcpu_map_pages(struct pcpu_chunk *chunk, + struct page **pages, unsigned long *populated, + int page_start, int page_end) { - unsigned int last = num_possible_cpus() - 1; - unsigned int cpu; - int err; - - /* map must not be done on immutable chunk */ - WARN_ON(chunk->immutable); + unsigned int cpu, tcpu; + int i, err; for_each_possible_cpu(cpu) { err = __pcpu_map_pages(pcpu_chunk_addr(chunk, cpu, page_start), - pcpu_chunk_pagep(chunk, cpu, page_start), + &pages[pcpu_page_idx(cpu, page_start)], page_end - page_start); if (err < 0) - return err; + goto err; } + /* mapping successful, link chunk and mark populated */ + for (i = page_start; i < page_end; i++) { + for_each_possible_cpu(cpu) + pcpu_set_page_chunk(pages[pcpu_page_idx(cpu, i)], + chunk); + __set_bit(i, populated); + } + + return 0; + +err: + for_each_possible_cpu(tcpu) { + if (tcpu == cpu) + break; + __pcpu_unmap_pages(pcpu_chunk_addr(chunk, tcpu, page_start), + page_end - page_start); + } + return err; +} + +/** + * pcpu_post_map_flush - flush cache after mapping + * @chunk: pcpu_chunk the regions to be flushed belong to + * @page_start: page index of the first page to be flushed + * @page_end: page index of the last page to be flushed + 1 + * + * Pages [@page_start,@page_end) of @chunk have been mapped. Flush + * cache. + * + * As with pcpu_pre_unmap_flush(), TLB flushing also is done at once + * for the whole region. + */ +static void pcpu_post_map_flush(struct pcpu_chunk *chunk, + int page_start, int page_end) +{ + unsigned int last = num_possible_cpus() - 1; + /* flush at once, please read comments in pcpu_unmap() */ flush_cache_vmap(pcpu_chunk_addr(chunk, 0, page_start), pcpu_chunk_addr(chunk, last, page_end)); - return 0; } /** @@ -636,39 +856,45 @@ static int pcpu_map(struct pcpu_chunk *chunk, int page_start, int page_end) * CONTEXT: * pcpu_alloc_mutex. */ -static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, int off, int size, - bool flush) +static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, int off, int size) { int page_start = PFN_DOWN(off); int page_end = PFN_UP(off + size); - int unmap_start = -1; - int uninitialized_var(unmap_end); - unsigned int cpu; - int i; + struct page **pages; + unsigned long *populated; + int rs, re; + + /* quick path, check whether it's empty already */ + pcpu_for_each_unpop_region(chunk, rs, re, page_start, page_end) { + if (rs == page_start && re == page_end) + return; + break; + } - for (i = page_start; i < page_end; i++) { - for_each_possible_cpu(cpu) { - struct page **pagep = pcpu_chunk_pagep(chunk, cpu, i); + /* immutable chunks can't be depopulated */ + WARN_ON(chunk->immutable); - if (!*pagep) - continue; + /* + * If control reaches here, there must have been at least one + * successful population attempt so the temp pages array must + * be available now. + */ + pages = pcpu_get_pages_and_bitmap(chunk, &populated, false); + BUG_ON(!pages); - __free_page(*pagep); + /* unmap and free */ + pcpu_pre_unmap_flush(chunk, page_start, page_end); - /* - * If it's partial depopulation, it might get - * populated or depopulated again. Mark the - * page gone. - */ - *pagep = NULL; + pcpu_for_each_pop_region(chunk, rs, re, page_start, page_end) + pcpu_unmap_pages(chunk, pages, populated, rs, re); - unmap_start = unmap_start < 0 ? i : unmap_start; - unmap_end = i + 1; - } - } + /* no need to flush tlb, vmalloc will handle it lazily */ + + pcpu_for_each_pop_region(chunk, rs, re, page_start, page_end) + pcpu_free_pages(chunk, pages, populated, rs, re); - if (unmap_start >= 0) - pcpu_unmap(chunk, unmap_start, unmap_end, flush); + /* commit new bitmap */ + bitmap_copy(chunk->populated, populated, pcpu_unit_pages); } /** @@ -685,50 +911,61 @@ static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, int off, int size, */ static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size) { - const gfp_t alloc_mask = GFP_KERNEL | __GFP_HIGHMEM | __GFP_COLD; int page_start = PFN_DOWN(off); int page_end = PFN_UP(off + size); - int map_start = -1; - int uninitialized_var(map_end); + int free_end = page_start, unmap_end = page_start; + struct page **pages; + unsigned long *populated; unsigned int cpu; - int i; + int rs, re, rc; - for (i = page_start; i < page_end; i++) { - if (pcpu_chunk_page_occupied(chunk, i)) { - if (map_start >= 0) { - if (pcpu_map(chunk, map_start, map_end)) - goto err; - map_start = -1; - } - continue; - } + /* quick path, check whether all pages are already there */ + pcpu_for_each_pop_region(chunk, rs, re, page_start, page_end) { + if (rs == page_start && re == page_end) + goto clear; + break; + } - map_start = map_start < 0 ? i : map_start; - map_end = i + 1; + /* need to allocate and map pages, this chunk can't be immutable */ + WARN_ON(chunk->immutable); - for_each_possible_cpu(cpu) { - struct page **pagep = pcpu_chunk_pagep(chunk, cpu, i); + pages = pcpu_get_pages_and_bitmap(chunk, &populated, true); + if (!pages) + return -ENOMEM; - *pagep = alloc_pages_node(cpu_to_node(cpu), - alloc_mask, 0); - if (!*pagep) - goto err; - pcpu_set_page_chunk(*pagep, chunk); - } + /* alloc and map */ + pcpu_for_each_unpop_region(chunk, rs, re, page_start, page_end) { + rc = pcpu_alloc_pages(chunk, pages, populated, rs, re); + if (rc) + goto err_free; + free_end = re; } - if (map_start >= 0 && pcpu_map(chunk, map_start, map_end)) - goto err; + pcpu_for_each_unpop_region(chunk, rs, re, page_start, page_end) { + rc = pcpu_map_pages(chunk, pages, populated, rs, re); + if (rc) + goto err_unmap; + unmap_end = re; + } + pcpu_post_map_flush(chunk, page_start, page_end); + /* commit new bitmap */ + bitmap_copy(chunk->populated, populated, pcpu_unit_pages); +clear: for_each_possible_cpu(cpu) memset(chunk->vm->addr + cpu * pcpu_unit_size + off, 0, size); - return 0; -err: - /* likely under heavy memory pressure, give memory back */ - pcpu_depopulate_chunk(chunk, off, size, true); - return -ENOMEM; + +err_unmap: + pcpu_pre_unmap_flush(chunk, page_start, unmap_end); + pcpu_for_each_unpop_region(chunk, rs, re, page_start, unmap_end) + pcpu_unmap_pages(chunk, pages, populated, rs, re); + pcpu_post_unmap_tlb_flush(chunk, page_start, unmap_end); +err_free: + pcpu_for_each_unpop_region(chunk, rs, re, page_start, free_end) + pcpu_free_pages(chunk, pages, populated, rs, re); + return rc; } static void free_pcpu_chunk(struct pcpu_chunk *chunk) @@ -752,7 +989,6 @@ static struct pcpu_chunk *alloc_pcpu_chunk(void) chunk->map = pcpu_mem_alloc(PCPU_DFL_MAP_ALLOC * sizeof(chunk->map[0])); chunk->map_alloc = PCPU_DFL_MAP_ALLOC; chunk->map[chunk->map_used++] = pcpu_unit_size; - chunk->page = chunk->page_ar; chunk->vm = get_vm_area(pcpu_chunk_size, GFP_KERNEL); if (!chunk->vm) { @@ -933,7 +1169,7 @@ static void pcpu_reclaim(struct work_struct *work) mutex_unlock(&pcpu_alloc_mutex); list_for_each_entry_safe(chunk, next, &todo, list) { - pcpu_depopulate_chunk(chunk, 0, pcpu_unit_size, false); + pcpu_depopulate_chunk(chunk, 0, pcpu_unit_size); free_pcpu_chunk(chunk); } } @@ -981,7 +1217,6 @@ EXPORT_SYMBOL_GPL(free_percpu); /** * pcpu_setup_first_chunk - initialize the first percpu chunk - * @get_page_fn: callback to fetch page pointer * @static_size: the size of static percpu area in bytes * @reserved_size: the size of reserved percpu area in bytes, 0 for none * @dyn_size: free size for dynamic allocation in bytes, -1 for auto @@ -992,14 +1227,6 @@ EXPORT_SYMBOL_GPL(free_percpu); * perpcu area. This function is to be called from arch percpu area * setup path. * - * @get_page_fn() should return pointer to percpu page given cpu - * number and page number. It should at least return enough pages to - * cover the static area. The returned pages for static area should - * have been initialized with valid data. It can also return pages - * after the static area. NULL return indicates end of pages for the - * cpu. Note that @get_page_fn() must return the same number of pages - * for all cpus. - * * @reserved_size, if non-zero, specifies the amount of bytes to * reserve after the static area in the first chunk. This reserves * the first chunk such that it's available only through reserved @@ -1031,8 +1258,7 @@ EXPORT_SYMBOL_GPL(free_percpu); * The determined pcpu_unit_size which can be used to initialize * percpu access. */ -size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, - size_t static_size, size_t reserved_size, +size_t __init pcpu_setup_first_chunk(size_t static_size, size_t reserved_size, ssize_t dyn_size, size_t unit_size, void *base_addr) { @@ -1041,8 +1267,7 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, size_t size_sum = static_size + reserved_size + (dyn_size >= 0 ? dyn_size : 0); struct pcpu_chunk *schunk, *dchunk = NULL; - unsigned int cpu; - int i, nr_pages; + int i; /* santiy checks */ BUILD_BUG_ON(ARRAY_SIZE(smap) >= PCPU_DFL_MAP_ALLOC || @@ -1056,8 +1281,8 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, pcpu_unit_pages = unit_size >> PAGE_SHIFT; pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT; pcpu_chunk_size = num_possible_cpus() * pcpu_unit_size; - pcpu_chunk_struct_size = sizeof(struct pcpu_chunk) - + num_possible_cpus() * pcpu_unit_pages * sizeof(struct page *); + pcpu_chunk_struct_size = sizeof(struct pcpu_chunk) + + BITS_TO_LONGS(pcpu_unit_pages) * sizeof(unsigned long); if (dyn_size < 0) dyn_size = pcpu_unit_size - static_size - reserved_size; @@ -1087,8 +1312,8 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, schunk->vm = &first_vm; schunk->map = smap; schunk->map_alloc = ARRAY_SIZE(smap); - schunk->page = schunk->page_ar; schunk->immutable = true; + bitmap_fill(schunk->populated, pcpu_unit_pages); if (reserved_size) { schunk->free_size = reserved_size; @@ -1106,38 +1331,19 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, /* init dynamic chunk if necessary */ if (dyn_size) { - dchunk = alloc_bootmem(sizeof(struct pcpu_chunk)); + dchunk = alloc_bootmem(pcpu_chunk_struct_size); INIT_LIST_HEAD(&dchunk->list); dchunk->vm = &first_vm; dchunk->map = dmap; dchunk->map_alloc = ARRAY_SIZE(dmap); - dchunk->page = schunk->page_ar; /* share page map with schunk */ dchunk->immutable = true; + bitmap_fill(dchunk->populated, pcpu_unit_pages); dchunk->contig_hint = dchunk->free_size = dyn_size; dchunk->map[dchunk->map_used++] = -pcpu_reserved_chunk_limit; dchunk->map[dchunk->map_used++] = dchunk->free_size; } - /* assign pages */ - nr_pages = -1; - for_each_possible_cpu(cpu) { - for (i = 0; i < pcpu_unit_pages; i++) { - struct page *page = get_page_fn(cpu, i); - - if (!page) - break; - *pcpu_chunk_pagep(schunk, cpu, i) = page; - } - - BUG_ON(i < PFN_UP(static_size)); - - if (nr_pages < 0) - nr_pages = i; - else - BUG_ON(nr_pages != i); - } - /* link the first chunk in */ pcpu_first_chunk = dchunk ?: schunk; pcpu_chunk_relocate(pcpu_first_chunk, -1); @@ -1160,23 +1366,6 @@ static size_t pcpu_calc_fc_sizes(size_t static_size, size_t reserved_size, return size_sum; } -/* - * Embedding first chunk setup helper. - */ -static void *pcpue_ptr __initdata; -static size_t pcpue_size __initdata; -static size_t pcpue_unit_size __initdata; - -static struct page * __init pcpue_get_page(unsigned int cpu, int pageno) -{ - size_t off = (size_t)pageno << PAGE_SHIFT; - - if (off >= pcpue_size) - return NULL; - - return virt_to_page(pcpue_ptr + cpu * pcpue_unit_size + off); -} - /** * pcpu_embed_first_chunk - embed the first percpu chunk into bootmem * @static_size: the size of static percpu area in bytes @@ -1207,18 +1396,19 @@ static struct page * __init pcpue_get_page(unsigned int cpu, int pageno) ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size, ssize_t dyn_size) { - size_t chunk_size; + size_t size_sum, unit_size, chunk_size; + void *base; unsigned int cpu; /* determine parameters and allocate */ - pcpue_size = pcpu_calc_fc_sizes(static_size, reserved_size, &dyn_size); + size_sum = pcpu_calc_fc_sizes(static_size, reserved_size, &dyn_size); - pcpue_unit_size = max_t(size_t, pcpue_size, PCPU_MIN_UNIT_SIZE); - chunk_size = pcpue_unit_size * num_possible_cpus(); + unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE); + chunk_size = unit_size * num_possible_cpus(); - pcpue_ptr = __alloc_bootmem_nopanic(chunk_size, PAGE_SIZE, - __pa(MAX_DMA_ADDRESS)); - if (!pcpue_ptr) { + base = __alloc_bootmem_nopanic(chunk_size, PAGE_SIZE, + __pa(MAX_DMA_ADDRESS)); + if (!base) { pr_warning("PERCPU: failed to allocate %zu bytes for " "embedding\n", chunk_size); return -ENOMEM; @@ -1226,33 +1416,18 @@ ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size, /* return the leftover and copy */ for_each_possible_cpu(cpu) { - void *ptr = pcpue_ptr + cpu * pcpue_unit_size; + void *ptr = base + cpu * unit_size; - free_bootmem(__pa(ptr + pcpue_size), - pcpue_unit_size - pcpue_size); + free_bootmem(__pa(ptr + size_sum), unit_size - size_sum); memcpy(ptr, __per_cpu_load, static_size); } /* we're ready, commit */ pr_info("PERCPU: Embedded %zu pages at %p, static data %zu bytes\n", - pcpue_size >> PAGE_SHIFT, pcpue_ptr, static_size); + size_sum >> PAGE_SHIFT, base, static_size); - return pcpu_setup_first_chunk(pcpue_get_page, static_size, - reserved_size, dyn_size, - pcpue_unit_size, pcpue_ptr); -} - -/* - * 4k page first chunk setup helper. - */ -static struct page **pcpu4k_pages __initdata; -static int pcpu4k_unit_pages __initdata; - -static struct page * __init pcpu4k_get_page(unsigned int cpu, int pageno) -{ - if (pageno < pcpu4k_unit_pages) - return pcpu4k_pages[cpu * pcpu4k_unit_pages + pageno]; - return NULL; + return pcpu_setup_first_chunk(static_size, reserved_size, dyn_size, + unit_size, base); } /** @@ -1279,23 +1454,25 @@ ssize_t __init pcpu_4k_first_chunk(size_t static_size, size_t reserved_size, pcpu_fc_populate_pte_fn_t populate_pte_fn) { static struct vm_struct vm; + int unit_pages; size_t pages_size; + struct page **pages; unsigned int cpu; int i, j; ssize_t ret; - pcpu4k_unit_pages = PFN_UP(max_t(size_t, static_size + reserved_size, - PCPU_MIN_UNIT_SIZE)); + unit_pages = PFN_UP(max_t(size_t, static_size + reserved_size, + PCPU_MIN_UNIT_SIZE)); /* unaligned allocations can't be freed, round up to page size */ - pages_size = PFN_ALIGN(pcpu4k_unit_pages * num_possible_cpus() * - sizeof(pcpu4k_pages[0])); - pcpu4k_pages = alloc_bootmem(pages_size); + pages_size = PFN_ALIGN(unit_pages * num_possible_cpus() * + sizeof(pages[0])); + pages = alloc_bootmem(pages_size); /* allocate pages */ j = 0; for_each_possible_cpu(cpu) - for (i = 0; i < pcpu4k_unit_pages; i++) { + for (i = 0; i < unit_pages; i++) { void *ptr; ptr = alloc_fn(cpu, PAGE_SIZE); @@ -1304,25 +1481,24 @@ ssize_t __init pcpu_4k_first_chunk(size_t static_size, size_t reserved_size, "4k page for cpu%u\n", cpu); goto enomem; } - pcpu4k_pages[j++] = virt_to_page(ptr); + pages[j++] = virt_to_page(ptr); } /* allocate vm area, map the pages and copy static data */ vm.flags = VM_ALLOC; - vm.size = num_possible_cpus() * pcpu4k_unit_pages << PAGE_SHIFT; + vm.size = num_possible_cpus() * unit_pages << PAGE_SHIFT; vm_area_register_early(&vm, PAGE_SIZE); for_each_possible_cpu(cpu) { unsigned long unit_addr = (unsigned long)vm.addr + - (cpu * pcpu4k_unit_pages << PAGE_SHIFT); + (cpu * unit_pages << PAGE_SHIFT); - for (i = 0; i < pcpu4k_unit_pages; i++) + for (i = 0; i < unit_pages; i++) populate_pte_fn(unit_addr + (i << PAGE_SHIFT)); /* pte already populated, the following shouldn't fail */ - ret = __pcpu_map_pages(unit_addr, - &pcpu4k_pages[cpu * pcpu4k_unit_pages], - pcpu4k_unit_pages); + ret = __pcpu_map_pages(unit_addr, &pages[cpu * unit_pages], + unit_pages); if (ret < 0) panic("failed to map percpu area, err=%zd\n", ret); @@ -1340,19 +1516,18 @@ ssize_t __init pcpu_4k_first_chunk(size_t static_size, size_t reserved_size, /* we're ready, commit */ pr_info("PERCPU: %d 4k pages per cpu, static data %zu bytes\n", - pcpu4k_unit_pages, static_size); + unit_pages, static_size); - ret = pcpu_setup_first_chunk(pcpu4k_get_page, static_size, - reserved_size, -1, - pcpu4k_unit_pages << PAGE_SHIFT, vm.addr); + ret = pcpu_setup_first_chunk(static_size, reserved_size, -1, + unit_pages << PAGE_SHIFT, vm.addr); goto out_free_ar; enomem: while (--j >= 0) - free_fn(page_address(pcpu4k_pages[j]), PAGE_SIZE); + free_fn(page_address(pages[j]), PAGE_SIZE); ret = -ENOMEM; out_free_ar: - free_bootmem(__pa(pcpu4k_pages), pages_size); + free_bootmem(__pa(pages), pages_size); return ret; } @@ -1370,16 +1545,6 @@ static size_t pcpul_unit_size; static struct pcpul_ent *pcpul_map; static struct vm_struct pcpul_vm; -static struct page * __init pcpul_get_page(unsigned int cpu, int pageno) -{ - size_t off = (size_t)pageno << PAGE_SHIFT; - - if (off >= pcpul_size) - return NULL; - - return virt_to_page(pcpul_map[cpu].ptr + off); -} - /** * pcpu_lpage_first_chunk - remap the first percpu chunk using large page * @static_size: the size of static percpu area in bytes @@ -1475,9 +1640,8 @@ ssize_t __init pcpu_lpage_first_chunk(size_t static_size, size_t reserved_size, pr_info("PERCPU: Remapped at %p with large pages, static data " "%zu bytes\n", pcpul_vm.addr, static_size); - ret = pcpu_setup_first_chunk(pcpul_get_page, static_size, - reserved_size, dyn_size, pcpul_unit_size, - pcpul_vm.addr); + ret = pcpu_setup_first_chunk(static_size, reserved_size, dyn_size, + pcpul_unit_size, pcpul_vm.addr); /* sort pcpul_map array for pcpu_lpage_remapped() */ for (i = 0; i < num_possible_cpus() - 1; i++) -- cgit v1.2.3 From 2f39e637ea240efb74cf807d31c93a71a0b89174 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sat, 4 Jul 2009 08:11:00 +0900 Subject: percpu: allow non-linear / sparse cpu -> unit mapping Currently cpu and unit are always identity mapped. To allow more efficient large page support on NUMA and lazy allocation for possible but offline cpus, cpu -> unit mapping needs to be non-linear and/or sparse. This can be easily implemented by adding a cpu -> unit mapping array and using it whenever looking up the matching unit for a cpu. The only unusal conversion is in pcpu_chunk_addr_search(). The passed in address is unit0 based and unit0 might not be in use so it needs to be converted to address of an in-use unit. This is easily done by adding the unit offset for the current processor. [ Impact: allows non-linear/sparse cpu -> unit mapping, no visible change yet ] Signed-off-by: Tejun Heo Cc: Ingo Molnar Cc: David Miller --- arch/sparc/kernel/smp_64.c | 2 +- include/linux/percpu.h | 3 +- mm/percpu.c | 129 +++++++++++++++++++++++++++++++++------------ 3 files changed, 97 insertions(+), 37 deletions(-) (limited to 'include/linux') diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index f2f22ee97a7a..6970333b48b8 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -1516,7 +1516,7 @@ void __init setup_per_cpu_areas(void) pcpu_unit_size = pcpu_setup_first_chunk(static_size, PERCPU_MODULE_RESERVE, dyn_size, - PCPU_CHUNK_SIZE, vm.addr); + PCPU_CHUNK_SIZE, vm.addr, NULL); free_bootmem(__pa(ptrs), ptrs_size); diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 63c8b7a23e66..1e0e8878dc2a 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -57,6 +57,7 @@ #endif extern void *pcpu_base_addr; +extern const int *pcpu_unit_map; typedef void * (*pcpu_fc_alloc_fn_t)(unsigned int cpu, size_t size); typedef void (*pcpu_fc_free_fn_t)(void *ptr, size_t size); @@ -66,7 +67,7 @@ typedef void (*pcpu_fc_map_fn_t)(void *ptr, size_t size, void *addr); extern size_t __init pcpu_setup_first_chunk( size_t static_size, size_t reserved_size, ssize_t dyn_size, size_t unit_size, - void *base_addr); + void *base_addr, const int *unit_map); extern ssize_t __init pcpu_embed_first_chunk( size_t static_size, size_t reserved_size, diff --git a/mm/percpu.c b/mm/percpu.c index 21756814d99f..2196fae24f00 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -8,12 +8,13 @@ * * This is percpu allocator which can handle both static and dynamic * areas. Percpu areas are allocated in chunks in vmalloc area. Each - * chunk is consisted of num_possible_cpus() units and the first chunk - * is used for static percpu variables in the kernel image (special - * boot time alloc/init handling necessary as these areas need to be - * brought up before allocation services are running). Unit grows as - * necessary and all units grow or shrink in unison. When a chunk is - * filled up, another chunk is allocated. ie. in vmalloc area + * chunk is consisted of boot-time determined number of units and the + * first chunk is used for static percpu variables in the kernel image + * (special boot time alloc/init handling necessary as these areas + * need to be brought up before allocation services are running). + * Unit grows as necessary and all units grow or shrink in unison. + * When a chunk is filled up, another chunk is allocated. ie. in + * vmalloc area * * c0 c1 c2 * ------------------- ------------------- ------------ @@ -22,11 +23,13 @@ * * Allocation is done in offset-size areas of single unit space. Ie, * an area of 512 bytes at 6k in c1 occupies 512 bytes at 6k of c1:u0, - * c1:u1, c1:u2 and c1:u3. Percpu access can be done by configuring - * percpu base registers pcpu_unit_size apart. + * c1:u1, c1:u2 and c1:u3. On UMA, units corresponds directly to + * cpus. On NUMA, the mapping can be non-linear and even sparse. + * Percpu access can be done by configuring percpu base registers + * according to cpu to unit mapping and pcpu_unit_size. * - * There are usually many small percpu allocations many of them as - * small as 4 bytes. The allocator organizes chunks into lists + * There are usually many small percpu allocations many of them being + * as small as 4 bytes. The allocator organizes chunks into lists * according to free size and tries to allocate from the fullest one. * Each chunk keeps the maximum contiguous area size hint which is * guaranteed to be eqaul to or larger than the maximum contiguous @@ -99,14 +102,22 @@ struct pcpu_chunk { static int pcpu_unit_pages __read_mostly; static int pcpu_unit_size __read_mostly; +static int pcpu_nr_units __read_mostly; static int pcpu_chunk_size __read_mostly; static int pcpu_nr_slots __read_mostly; static size_t pcpu_chunk_struct_size __read_mostly; +/* cpus with the lowest and highest unit numbers */ +static unsigned int pcpu_first_unit_cpu __read_mostly; +static unsigned int pcpu_last_unit_cpu __read_mostly; + /* the address of the first chunk which starts with the kernel static area */ void *pcpu_base_addr __read_mostly; EXPORT_SYMBOL_GPL(pcpu_base_addr); +/* cpu -> unit map */ +const int *pcpu_unit_map __read_mostly; + /* * The first chunk which always exists. Note that unlike other * chunks, this one can be allocated and mapped in several different @@ -177,7 +188,7 @@ static int pcpu_chunk_slot(const struct pcpu_chunk *chunk) static int pcpu_page_idx(unsigned int cpu, int page_idx) { - return cpu * pcpu_unit_pages + page_idx; + return pcpu_unit_map[cpu] * pcpu_unit_pages + page_idx; } static unsigned long pcpu_chunk_addr(struct pcpu_chunk *chunk, @@ -321,6 +332,14 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr) return pcpu_first_chunk; } + /* + * The address is relative to unit0 which might be unused and + * thus unmapped. Offset the address to the unit space of the + * current processor before looking it up in the vmalloc + * space. Note that any possible cpu id can be used here, so + * there's no need to worry about preemption or cpu hotplug. + */ + addr += pcpu_unit_map[smp_processor_id()] * pcpu_unit_size; return pcpu_get_page_chunk(vmalloc_to_page(addr)); } @@ -593,8 +612,7 @@ static struct page **pcpu_get_pages_and_bitmap(struct pcpu_chunk *chunk, { static struct page **pages; static unsigned long *bitmap; - size_t pages_size = num_possible_cpus() * pcpu_unit_pages * - sizeof(pages[0]); + size_t pages_size = pcpu_nr_units * pcpu_unit_pages * sizeof(pages[0]); size_t bitmap_size = BITS_TO_LONGS(pcpu_unit_pages) * sizeof(unsigned long); @@ -692,10 +710,9 @@ static int pcpu_alloc_pages(struct pcpu_chunk *chunk, static void pcpu_pre_unmap_flush(struct pcpu_chunk *chunk, int page_start, int page_end) { - unsigned int last = num_possible_cpus() - 1; - - flush_cache_vunmap(pcpu_chunk_addr(chunk, 0, page_start), - pcpu_chunk_addr(chunk, last, page_end)); + flush_cache_vunmap( + pcpu_chunk_addr(chunk, pcpu_first_unit_cpu, page_start), + pcpu_chunk_addr(chunk, pcpu_last_unit_cpu, page_end)); } static void __pcpu_unmap_pages(unsigned long addr, int nr_pages) @@ -756,10 +773,9 @@ static void pcpu_unmap_pages(struct pcpu_chunk *chunk, static void pcpu_post_unmap_tlb_flush(struct pcpu_chunk *chunk, int page_start, int page_end) { - unsigned int last = num_possible_cpus() - 1; - - flush_tlb_kernel_range(pcpu_chunk_addr(chunk, 0, page_start), - pcpu_chunk_addr(chunk, last, page_end)); + flush_tlb_kernel_range( + pcpu_chunk_addr(chunk, pcpu_first_unit_cpu, page_start), + pcpu_chunk_addr(chunk, pcpu_last_unit_cpu, page_end)); } static int __pcpu_map_pages(unsigned long addr, struct page **pages, @@ -835,11 +851,9 @@ err: static void pcpu_post_map_flush(struct pcpu_chunk *chunk, int page_start, int page_end) { - unsigned int last = num_possible_cpus() - 1; - - /* flush at once, please read comments in pcpu_unmap() */ - flush_cache_vmap(pcpu_chunk_addr(chunk, 0, page_start), - pcpu_chunk_addr(chunk, last, page_end)); + flush_cache_vmap( + pcpu_chunk_addr(chunk, pcpu_first_unit_cpu, page_start), + pcpu_chunk_addr(chunk, pcpu_last_unit_cpu, page_end)); } /** @@ -953,8 +967,7 @@ static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size) bitmap_copy(chunk->populated, populated, pcpu_unit_pages); clear: for_each_possible_cpu(cpu) - memset(chunk->vm->addr + cpu * pcpu_unit_size + off, 0, - size); + memset((void *)pcpu_chunk_addr(chunk, cpu, 0) + off, 0, size); return 0; err_unmap: @@ -1088,6 +1101,7 @@ area_found: mutex_unlock(&pcpu_alloc_mutex); + /* return address relative to unit0 */ return __addr_to_pcpu_ptr(chunk->vm->addr + off); fail_unlock: @@ -1222,6 +1236,7 @@ EXPORT_SYMBOL_GPL(free_percpu); * @dyn_size: free size for dynamic allocation in bytes, -1 for auto * @unit_size: unit size in bytes, must be multiple of PAGE_SIZE * @base_addr: mapped address + * @unit_map: cpu -> unit map, NULL for sequential mapping * * Initialize the first percpu chunk which contains the kernel static * perpcu area. This function is to be called from arch percpu area @@ -1260,16 +1275,17 @@ EXPORT_SYMBOL_GPL(free_percpu); */ size_t __init pcpu_setup_first_chunk(size_t static_size, size_t reserved_size, ssize_t dyn_size, size_t unit_size, - void *base_addr) + void *base_addr, const int *unit_map) { static struct vm_struct first_vm; static int smap[2], dmap[2]; size_t size_sum = static_size + reserved_size + (dyn_size >= 0 ? dyn_size : 0); struct pcpu_chunk *schunk, *dchunk = NULL; + unsigned int cpu, tcpu; int i; - /* santiy checks */ + /* sanity checks */ BUILD_BUG_ON(ARRAY_SIZE(smap) >= PCPU_DFL_MAP_ALLOC || ARRAY_SIZE(dmap) >= PCPU_DFL_MAP_ALLOC); BUG_ON(!static_size); @@ -1278,9 +1294,52 @@ size_t __init pcpu_setup_first_chunk(size_t static_size, size_t reserved_size, BUG_ON(unit_size & ~PAGE_MASK); BUG_ON(unit_size < PCPU_MIN_UNIT_SIZE); + /* determine number of units and verify and initialize pcpu_unit_map */ + if (unit_map) { + int first_unit = INT_MAX, last_unit = INT_MIN; + + for_each_possible_cpu(cpu) { + int unit = unit_map[cpu]; + + BUG_ON(unit < 0); + for_each_possible_cpu(tcpu) { + if (tcpu == cpu) + break; + /* the mapping should be one-to-one */ + BUG_ON(unit_map[tcpu] == unit); + } + + if (unit < first_unit) { + pcpu_first_unit_cpu = cpu; + first_unit = unit; + } + if (unit > last_unit) { + pcpu_last_unit_cpu = cpu; + last_unit = unit; + } + } + pcpu_nr_units = last_unit + 1; + pcpu_unit_map = unit_map; + } else { + int *identity_map; + + /* #units == #cpus, identity mapped */ + identity_map = alloc_bootmem(num_possible_cpus() * + sizeof(identity_map[0])); + + for_each_possible_cpu(cpu) + identity_map[cpu] = cpu; + + pcpu_first_unit_cpu = 0; + pcpu_last_unit_cpu = pcpu_nr_units - 1; + pcpu_nr_units = num_possible_cpus(); + pcpu_unit_map = identity_map; + } + + /* determine basic parameters */ pcpu_unit_pages = unit_size >> PAGE_SHIFT; pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT; - pcpu_chunk_size = num_possible_cpus() * pcpu_unit_size; + pcpu_chunk_size = pcpu_nr_units * pcpu_unit_size; pcpu_chunk_struct_size = sizeof(struct pcpu_chunk) + BITS_TO_LONGS(pcpu_unit_pages) * sizeof(unsigned long); @@ -1349,7 +1408,7 @@ size_t __init pcpu_setup_first_chunk(size_t static_size, size_t reserved_size, pcpu_chunk_relocate(pcpu_first_chunk, -1); /* we're done */ - pcpu_base_addr = (void *)pcpu_chunk_addr(schunk, 0, 0); + pcpu_base_addr = schunk->vm->addr; return pcpu_unit_size; } @@ -1427,7 +1486,7 @@ ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size, size_sum >> PAGE_SHIFT, base, static_size); return pcpu_setup_first_chunk(static_size, reserved_size, dyn_size, - unit_size, base); + unit_size, base, NULL); } /** @@ -1519,7 +1578,7 @@ ssize_t __init pcpu_4k_first_chunk(size_t static_size, size_t reserved_size, unit_pages, static_size); ret = pcpu_setup_first_chunk(static_size, reserved_size, -1, - unit_pages << PAGE_SHIFT, vm.addr); + unit_pages << PAGE_SHIFT, vm.addr, NULL); goto out_free_ar; enomem: @@ -1641,7 +1700,7 @@ ssize_t __init pcpu_lpage_first_chunk(size_t static_size, size_t reserved_size, "%zu bytes\n", pcpul_vm.addr, static_size); ret = pcpu_setup_first_chunk(static_size, reserved_size, dyn_size, - pcpul_unit_size, pcpul_vm.addr); + pcpul_unit_size, pcpul_vm.addr, NULL); /* sort pcpul_map array for pcpu_lpage_remapped() */ for (i = 0; i < num_possible_cpus() - 1; i++) -- cgit v1.2.3 From a530b7958612bafe2027e21359083dba84f0b3b4 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sat, 4 Jul 2009 08:11:00 +0900 Subject: percpu: teach large page allocator about NUMA Large page first chunk allocator is primarily used for NUMA machines; however, its NUMA handling is extremely simplistic. Regardless of their proximity, each cpu is put into separate large page just to return most of the allocated space back wasting large amount of vmalloc space and increasing cache footprint. This patch teachs NUMA details to large page allocator. Given processor proximity information, pcpu_lpage_build_unit_map() will find fitting cpu -> unit mapping in which cpus in LOCAL_DISTANCE share the same large page and not too much virtual address space is wasted. This greatly reduces the unit and thus chunk size and wastes much less address space for the first chunk. For example, on 4/4 NUMA machine, the original code occupied 16MB of virtual space for the first chunk while the new code only uses 4MB - one 2MB page for each node. [ Impact: much better space efficiency on NUMA machines ] Signed-off-by: Tejun Heo Cc: Ingo Molnar Cc: Jan Beulich Cc: Andi Kleen Cc: David Miller --- arch/x86/kernel/setup_percpu.c | 72 +++++++-- include/linux/percpu.h | 24 ++- mm/percpu.c | 358 ++++++++++++++++++++++++++++++++--------- 3 files changed, 359 insertions(+), 95 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 4f2e0ac9130b..7501bb14bd51 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -149,36 +149,73 @@ static void __init pcpul_map(void *ptr, size_t size, void *addr) set_pmd(pmd, pmd_v); } +static int pcpu_lpage_cpu_distance(unsigned int from, unsigned int to) +{ + if (early_cpu_to_node(from) == early_cpu_to_node(to)) + return LOCAL_DISTANCE; + else + return REMOTE_DISTANCE; +} + static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) { size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE; + size_t dyn_size = reserve - PERCPU_FIRST_CHUNK_RESERVE; + size_t unit_map_size, unit_size; + int *unit_map; + int nr_units; + ssize_t ret; + + /* on non-NUMA, embedding is better */ + if (!chosen && !pcpu_need_numa()) + return -EINVAL; + + /* need PSE */ + if (!cpu_has_pse) { + pr_warning("PERCPU: lpage allocator requires PSE\n"); + return -EINVAL; + } + /* allocate and build unit_map */ + unit_map_size = num_possible_cpus() * sizeof(int); + unit_map = alloc_bootmem_nopanic(unit_map_size); + if (!unit_map) { + pr_warning("PERCPU: failed to allocate unit_map\n"); + return -ENOMEM; + } + + ret = pcpu_lpage_build_unit_map(static_size, + PERCPU_FIRST_CHUNK_RESERVE, + &dyn_size, &unit_size, PMD_SIZE, + unit_map, pcpu_lpage_cpu_distance); + if (ret < 0) { + pr_warning("PERCPU: failed to build unit_map\n"); + goto out_free; + } + nr_units = ret; + + /* do the parameters look okay? */ if (!chosen) { size_t vm_size = VMALLOC_END - VMALLOC_START; - size_t tot_size = num_possible_cpus() * PMD_SIZE; - - /* on non-NUMA, embedding is better */ - if (!pcpu_need_numa()) - return -EINVAL; + size_t tot_size = nr_units * unit_size; /* don't consume more than 20% of vmalloc area */ if (tot_size > vm_size / 5) { pr_info("PERCPU: too large chunk size %zuMB for " "large page remap\n", tot_size >> 20); - return -EINVAL; + ret = -EINVAL; + goto out_free; } } - /* need PSE */ - if (!cpu_has_pse) { - pr_warning("PERCPU: lpage allocator requires PSE\n"); - return -EINVAL; - } - - return pcpu_lpage_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE, - reserve - PERCPU_FIRST_CHUNK_RESERVE, - PMD_SIZE, - pcpu_fc_alloc, pcpu_fc_free, pcpul_map); + ret = pcpu_lpage_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE, + dyn_size, unit_size, PMD_SIZE, + unit_map, nr_units, + pcpu_fc_alloc, pcpu_fc_free, pcpul_map); +out_free: + if (ret < 0) + free_bootmem(__pa(unit_map), unit_map_size); + return ret; } #else static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) @@ -299,7 +336,8 @@ void __init setup_per_cpu_areas(void) /* alrighty, percpu areas up and running */ delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; for_each_possible_cpu(cpu) { - per_cpu_offset(cpu) = delta + cpu * pcpu_unit_size; + per_cpu_offset(cpu) = + delta + pcpu_unit_map[cpu] * pcpu_unit_size; per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu); per_cpu(cpu_number, cpu) = cpu; setup_percpu_segment(cpu); diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 1e0e8878dc2a..8ce91af4aa19 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -62,6 +62,7 @@ extern const int *pcpu_unit_map; typedef void * (*pcpu_fc_alloc_fn_t)(unsigned int cpu, size_t size); typedef void (*pcpu_fc_free_fn_t)(void *ptr, size_t size); typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr); +typedef int (pcpu_fc_cpu_distance_fn_t)(unsigned int from, unsigned int to); typedef void (*pcpu_fc_map_fn_t)(void *ptr, size_t size, void *addr); extern size_t __init pcpu_setup_first_chunk( @@ -80,18 +81,37 @@ extern ssize_t __init pcpu_4k_first_chunk( pcpu_fc_populate_pte_fn_t populate_pte_fn); #ifdef CONFIG_NEED_MULTIPLE_NODES +extern int __init pcpu_lpage_build_unit_map( + size_t static_size, size_t reserved_size, + ssize_t *dyn_sizep, size_t *unit_sizep, + size_t lpage_size, int *unit_map, + pcpu_fc_cpu_distance_fn_t cpu_distance_fn); + extern ssize_t __init pcpu_lpage_first_chunk( size_t static_size, size_t reserved_size, - ssize_t dyn_size, size_t lpage_size, + size_t dyn_size, size_t unit_size, + size_t lpage_size, const int *unit_map, + int nr_units, pcpu_fc_alloc_fn_t alloc_fn, pcpu_fc_free_fn_t free_fn, pcpu_fc_map_fn_t map_fn); extern void *pcpu_lpage_remapped(void *kaddr); #else +static inline int pcpu_lpage_build_unit_map( + size_t static_size, size_t reserved_size, + ssize_t *dyn_sizep, size_t *unit_sizep, + size_t lpage_size, int *unit_map, + pcpu_fc_cpu_distance_fn_t cpu_distance_fn) +{ + return -EINVAL; +} + static inline ssize_t __init pcpu_lpage_first_chunk( size_t static_size, size_t reserved_size, - ssize_t dyn_size, size_t lpage_size, + size_t dyn_size, size_t unit_size, + size_t lpage_size, const int *unit_map, + int nr_units, pcpu_fc_alloc_fn_t alloc_fn, pcpu_fc_free_fn_t free_fn, pcpu_fc_map_fn_t map_fn) diff --git a/mm/percpu.c b/mm/percpu.c index 2196fae24f00..b3d0bcff8c7c 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -59,6 +59,7 @@ #include #include #include +#include #include #include #include @@ -1594,75 +1595,259 @@ out_free_ar: * Large page remapping first chunk setup helper */ #ifdef CONFIG_NEED_MULTIPLE_NODES + +/** + * pcpu_lpage_build_unit_map - build unit_map for large page remapping + * @static_size: the size of static percpu area in bytes + * @reserved_size: the size of reserved percpu area in bytes + * @dyn_sizep: in/out parameter for dynamic size, -1 for auto + * @unit_sizep: out parameter for unit size + * @unit_map: unit_map to be filled + * @cpu_distance_fn: callback to determine distance between cpus + * + * This function builds cpu -> unit map and determine other parameters + * considering needed percpu size, large page size and distances + * between CPUs in NUMA. + * + * CPUs which are of LOCAL_DISTANCE both ways are grouped together and + * may share units in the same large page. The returned configuration + * is guaranteed to have CPUs on different nodes on different large + * pages and >=75% usage of allocated virtual address space. + * + * RETURNS: + * On success, fills in @unit_map, sets *@dyn_sizep, *@unit_sizep and + * returns the number of units to be allocated. -errno on failure. + */ +int __init pcpu_lpage_build_unit_map(size_t static_size, size_t reserved_size, + ssize_t *dyn_sizep, size_t *unit_sizep, + size_t lpage_size, int *unit_map, + pcpu_fc_cpu_distance_fn_t cpu_distance_fn) +{ + static int group_map[NR_CPUS] __initdata; + static int group_cnt[NR_CPUS] __initdata; + int group_cnt_max = 0; + size_t size_sum, min_unit_size, alloc_size; + int upa, max_upa, uninitialized_var(best_upa); /* units_per_alloc */ + int last_allocs; + unsigned int cpu, tcpu; + int group, unit; + + /* + * Determine min_unit_size, alloc_size and max_upa such that + * alloc_size is multiple of lpage_size and is the smallest + * which can accomodate 4k aligned segments which are equal to + * or larger than min_unit_size. + */ + size_sum = pcpu_calc_fc_sizes(static_size, reserved_size, dyn_sizep); + min_unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE); + + alloc_size = roundup(min_unit_size, lpage_size); + upa = alloc_size / min_unit_size; + while (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK)) + upa--; + max_upa = upa; + + /* group cpus according to their proximity */ + for_each_possible_cpu(cpu) { + group = 0; + next_group: + for_each_possible_cpu(tcpu) { + if (cpu == tcpu) + break; + if (group_map[tcpu] == group && + (cpu_distance_fn(cpu, tcpu) > LOCAL_DISTANCE || + cpu_distance_fn(tcpu, cpu) > LOCAL_DISTANCE)) { + group++; + goto next_group; + } + } + group_map[cpu] = group; + group_cnt[group]++; + group_cnt_max = max(group_cnt_max, group_cnt[group]); + } + + /* + * Expand unit size until address space usage goes over 75% + * and then as much as possible without using more address + * space. + */ + last_allocs = INT_MAX; + for (upa = max_upa; upa; upa--) { + int allocs = 0, wasted = 0; + + if (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK)) + continue; + + for (group = 0; group_cnt[group]; group++) { + int this_allocs = DIV_ROUND_UP(group_cnt[group], upa); + allocs += this_allocs; + wasted += this_allocs * upa - group_cnt[group]; + } + + /* + * Don't accept if wastage is over 25%. The + * greater-than comparison ensures upa==1 always + * passes the following check. + */ + if (wasted > num_possible_cpus() / 3) + continue; + + /* and then don't consume more memory */ + if (allocs > last_allocs) + break; + last_allocs = allocs; + best_upa = upa; + } + *unit_sizep = alloc_size / best_upa; + + /* assign units to cpus accordingly */ + unit = 0; + for (group = 0; group_cnt[group]; group++) { + for_each_possible_cpu(cpu) + if (group_map[cpu] == group) + unit_map[cpu] = unit++; + unit = roundup(unit, best_upa); + } + + return unit; /* unit contains aligned number of units */ +} + struct pcpul_ent { - unsigned int cpu; void *ptr; + void *map_addr; }; static size_t pcpul_size; -static size_t pcpul_unit_size; +static size_t pcpul_lpage_size; +static int pcpul_nr_lpages; static struct pcpul_ent *pcpul_map; -static struct vm_struct pcpul_vm; + +static bool __init pcpul_unit_to_cpu(int unit, const int *unit_map, + unsigned int *cpup) +{ + unsigned int cpu; + + for_each_possible_cpu(cpu) + if (unit_map[cpu] == unit) { + if (cpup) + *cpup = cpu; + return true; + } + + return false; +} + +static void __init pcpul_lpage_dump_cfg(const char *lvl, size_t static_size, + size_t reserved_size, size_t dyn_size, + size_t unit_size, size_t lpage_size, + const int *unit_map, int nr_units) +{ + int width = 1, v = nr_units; + char empty_str[] = "--------"; + int upl, lpl; /* units per lpage, lpage per line */ + unsigned int cpu; + int lpage, unit; + + while (v /= 10) + width++; + empty_str[min_t(int, width, sizeof(empty_str) - 1)] = '\0'; + + upl = max_t(int, lpage_size / unit_size, 1); + lpl = rounddown_pow_of_two(max_t(int, 60 / (upl * (width + 1) + 2), 1)); + + printk("%spcpu-lpage: sta/res/dyn=%zu/%zu/%zu unit=%zu lpage=%zu", lvl, + static_size, reserved_size, dyn_size, unit_size, lpage_size); + + for (lpage = 0, unit = 0; unit < nr_units; unit++) { + if (!(unit % upl)) { + if (!(lpage++ % lpl)) { + printk("\n"); + printk("%spcpu-lpage: ", lvl); + } else + printk("| "); + } + if (pcpul_unit_to_cpu(unit, unit_map, &cpu)) + printk("%0*d ", width, cpu); + else + printk("%s ", empty_str); + } + printk("\n"); +} /** * pcpu_lpage_first_chunk - remap the first percpu chunk using large page * @static_size: the size of static percpu area in bytes * @reserved_size: the size of reserved percpu area in bytes - * @dyn_size: free size for dynamic allocation in bytes, -1 for auto + * @dyn_size: free size for dynamic allocation in bytes + * @unit_size: unit size in bytes * @lpage_size: the size of a large page + * @unit_map: cpu -> unit mapping + * @nr_units: the number of units * @alloc_fn: function to allocate percpu lpage, always called with lpage_size * @free_fn: function to free percpu memory, @size <= lpage_size * @map_fn: function to map percpu lpage, always called with lpage_size * - * This allocator uses large page as unit. A large page is allocated - * for each cpu and each is remapped into vmalloc area using large - * page mapping. As large page can be quite large, only part of it is - * used for the first chunk. Unused part is returned to the bootmem - * allocator. - * - * So, the large pages are mapped twice - once to the physical mapping - * and to the vmalloc area for the first percpu chunk. The double - * mapping does add one more large TLB entry pressure but still is - * much better than only using 4k mappings while still being NUMA - * friendly. + * This allocator uses large page to build and map the first chunk. + * Unlike other helpers, the caller should always specify @dyn_size + * and @unit_size. These parameters along with @unit_map and + * @nr_units can be determined using pcpu_lpage_build_unit_map(). + * This two stage initialization is to allow arch code to evaluate the + * parameters before committing to it. + * + * Large pages are allocated as directed by @unit_map and other + * parameters and mapped to vmalloc space. Unused holes are returned + * to the page allocator. Note that these holes end up being actively + * mapped twice - once to the physical mapping and to the vmalloc area + * for the first percpu chunk. Depending on architecture, this might + * cause problem when changing page attributes of the returned area. + * These double mapped areas can be detected using + * pcpu_lpage_remapped(). * * RETURNS: * The determined pcpu_unit_size which can be used to initialize * percpu access on success, -errno on failure. */ ssize_t __init pcpu_lpage_first_chunk(size_t static_size, size_t reserved_size, - ssize_t dyn_size, size_t lpage_size, + size_t dyn_size, size_t unit_size, + size_t lpage_size, const int *unit_map, + int nr_units, pcpu_fc_alloc_fn_t alloc_fn, pcpu_fc_free_fn_t free_fn, pcpu_fc_map_fn_t map_fn) { - size_t size_sum; + static struct vm_struct vm; + size_t chunk_size = unit_size * nr_units; size_t map_size; unsigned int cpu; - int i, j; ssize_t ret; + int i, j, unit; - /* - * Currently supports only single page. Supporting multiple - * pages won't be too difficult if it ever becomes necessary. - */ - size_sum = pcpu_calc_fc_sizes(static_size, reserved_size, &dyn_size); + pcpul_lpage_dump_cfg(KERN_DEBUG, static_size, reserved_size, dyn_size, + unit_size, lpage_size, unit_map, nr_units); - pcpul_unit_size = lpage_size; - pcpul_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE); - if (pcpul_size > pcpul_unit_size) { - pr_warning("PERCPU: static data is larger than large page, " - "can't use large page\n"); - return -EINVAL; - } + BUG_ON(chunk_size % lpage_size); + + pcpul_size = static_size + reserved_size + dyn_size; + pcpul_lpage_size = lpage_size; + pcpul_nr_lpages = chunk_size / lpage_size; /* allocate pointer array and alloc large pages */ - map_size = PFN_ALIGN(num_possible_cpus() * sizeof(pcpul_map[0])); + map_size = pcpul_nr_lpages * sizeof(pcpul_map[0]); pcpul_map = alloc_bootmem(map_size); - for_each_possible_cpu(cpu) { + /* allocate all pages */ + for (i = 0; i < pcpul_nr_lpages; i++) { + size_t offset = i * lpage_size; + int first_unit = offset / unit_size; + int last_unit = (offset + lpage_size - 1) / unit_size; void *ptr; + /* find out which cpu is mapped to this unit */ + for (unit = first_unit; unit <= last_unit; unit++) + if (pcpul_unit_to_cpu(unit, unit_map, &cpu)) + goto found; + continue; + found: ptr = alloc_fn(cpu, lpage_size); if (!ptr) { pr_warning("PERCPU: failed to allocate large page " @@ -1670,53 +1855,79 @@ ssize_t __init pcpu_lpage_first_chunk(size_t static_size, size_t reserved_size, goto enomem; } - /* - * Only use pcpul_size bytes and give back the rest. - * - * Ingo: The lpage_size up-rounding bootmem is needed - * to make sure the partial lpage is still fully RAM - - * it's not well-specified to have a incompatible area - * (unmapped RAM, device memory, etc.) in that hole. - */ - free_fn(ptr + pcpul_size, lpage_size - pcpul_size); - - pcpul_map[cpu].cpu = cpu; - pcpul_map[cpu].ptr = ptr; + pcpul_map[i].ptr = ptr; + } - memcpy(ptr, __per_cpu_load, static_size); + /* return unused holes */ + for (unit = 0; unit < nr_units; unit++) { + size_t start = unit * unit_size; + size_t end = start + unit_size; + size_t off, next; + + /* don't free used part of occupied unit */ + if (pcpul_unit_to_cpu(unit, unit_map, NULL)) + start += pcpul_size; + + /* unit can span more than one page, punch the holes */ + for (off = start; off < end; off = next) { + void *ptr = pcpul_map[off / lpage_size].ptr; + next = min(roundup(off + 1, lpage_size), end); + if (ptr) + free_fn(ptr + off % lpage_size, next - off); + } } - /* allocate address and map */ - pcpul_vm.flags = VM_ALLOC; - pcpul_vm.size = num_possible_cpus() * pcpul_unit_size; - vm_area_register_early(&pcpul_vm, pcpul_unit_size); + /* allocate address, map and copy */ + vm.flags = VM_ALLOC; + vm.size = chunk_size; + vm_area_register_early(&vm, unit_size); + + for (i = 0; i < pcpul_nr_lpages; i++) { + if (!pcpul_map[i].ptr) + continue; + pcpul_map[i].map_addr = vm.addr + i * lpage_size; + map_fn(pcpul_map[i].ptr, lpage_size, pcpul_map[i].map_addr); + } for_each_possible_cpu(cpu) - map_fn(pcpul_map[cpu].ptr, pcpul_unit_size, - pcpul_vm.addr + cpu * pcpul_unit_size); + memcpy(vm.addr + unit_map[cpu] * unit_size, __per_cpu_load, + static_size); /* we're ready, commit */ pr_info("PERCPU: Remapped at %p with large pages, static data " - "%zu bytes\n", pcpul_vm.addr, static_size); + "%zu bytes\n", vm.addr, static_size); ret = pcpu_setup_first_chunk(static_size, reserved_size, dyn_size, - pcpul_unit_size, pcpul_vm.addr, NULL); - - /* sort pcpul_map array for pcpu_lpage_remapped() */ - for (i = 0; i < num_possible_cpus() - 1; i++) - for (j = i + 1; j < num_possible_cpus(); j++) - if (pcpul_map[i].ptr > pcpul_map[j].ptr) { - struct pcpul_ent tmp = pcpul_map[i]; - pcpul_map[i] = pcpul_map[j]; - pcpul_map[j] = tmp; - } + unit_size, vm.addr, unit_map); + + /* + * Sort pcpul_map array for pcpu_lpage_remapped(). Unmapped + * lpages are pushed to the end and trimmed. + */ + for (i = 0; i < pcpul_nr_lpages - 1; i++) + for (j = i + 1; j < pcpul_nr_lpages; j++) { + struct pcpul_ent tmp; + + if (!pcpul_map[j].ptr) + continue; + if (pcpul_map[i].ptr && + pcpul_map[i].ptr < pcpul_map[j].ptr) + continue; + + tmp = pcpul_map[i]; + pcpul_map[i] = pcpul_map[j]; + pcpul_map[j] = tmp; + } + + while (pcpul_nr_lpages && !pcpul_map[pcpul_nr_lpages - 1].ptr) + pcpul_nr_lpages--; return ret; enomem: - for_each_possible_cpu(cpu) - if (pcpul_map[cpu].ptr) - free_fn(pcpul_map[cpu].ptr, pcpul_size); + for (i = 0; i < pcpul_nr_lpages; i++) + if (pcpul_map[i].ptr) + free_fn(pcpul_map[i].ptr, lpage_size); free_bootmem(__pa(pcpul_map), map_size); return -ENOMEM; } @@ -1739,10 +1950,10 @@ enomem: */ void *pcpu_lpage_remapped(void *kaddr) { - unsigned long unit_mask = pcpul_unit_size - 1; - void *lpage_addr = (void *)((unsigned long)kaddr & ~unit_mask); - unsigned long offset = (unsigned long)kaddr & unit_mask; - int left = 0, right = num_possible_cpus() - 1; + unsigned long lpage_mask = pcpul_lpage_size - 1; + void *lpage_addr = (void *)((unsigned long)kaddr & ~lpage_mask); + unsigned long offset = (unsigned long)kaddr & lpage_mask; + int left = 0, right = pcpul_nr_lpages - 1; int pos; /* pcpul in use at all? */ @@ -1757,13 +1968,8 @@ void *pcpu_lpage_remapped(void *kaddr) left = pos + 1; else if (pcpul_map[pos].ptr > lpage_addr) right = pos - 1; - else { - /* it shouldn't be in the area for the first chunk */ - WARN_ON(offset < pcpul_size); - - return pcpul_vm.addr + - pcpul_map[pos].cpu * pcpul_unit_size + offset; - } + else + return pcpul_map[pos].map_addr + offset; } return NULL; -- cgit v1.2.3 From 96ccd4a43a4d80c80be636cd025a69959cf47424 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 5 Jul 2009 12:47:52 +0200 Subject: genirq: Remove obsolete defines and typedefs The defines and typedefs (hw_interrupt_type, no_irq_type, irq_desc_t) have been kept around for migration reasons. The last users are gone, remove them. Signed-off-by: Thomas Gleixner --- Documentation/feature-removal-schedule.txt | 9 --------- include/linux/irq.h | 7 ------- 2 files changed, 16 deletions(-) (limited to 'include/linux') diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index f8cd450be9aa..2af78126b053 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -394,15 +394,6 @@ Who: Thomas Gleixner ----------------------------- -What: obsolete generic irq defines and typedefs -When: 2.6.30 -Why: The defines and typedefs (hw_interrupt_type, no_irq_type, irq_desc_t) - have been kept around for migration reasons. After more than two years - it's time to remove them finally -Who: Thomas Gleixner - ---------------------------- - What: fakephp and associated sysfs files in /sys/bus/pci/slots/ When: 2011 Why: In 2.6.27, the semantics of /sys/bus/pci/slots was redefined to diff --git a/include/linux/irq.h b/include/linux/irq.h index cb2e77a3f7f7..6956df9961ab 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -219,13 +219,6 @@ static inline struct irq_desc *move_irq_desc(struct irq_desc *desc, int node) extern struct irq_desc *irq_to_desc_alloc_node(unsigned int irq, int node); -/* - * Migration helpers for obsolete names, they will go away: - */ -#define hw_interrupt_type irq_chip -#define no_irq_type no_irq_chip -typedef struct irq_desc irq_desc_t; - /* * Pick up the arch-dependent methods: */ -- cgit v1.2.3 From 0e8635a8e1f2d4a9e1bfc6c3b21419a5921e674f Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 20 Jun 2009 00:53:25 +0000 Subject: net: remove NET_RX_BAD and NET_RX_CN* defines almost no users in the tree; and the few that use them treat them like NET_RX_DROP. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- drivers/net/rionet.c | 5 ----- drivers/net/wan/farsync.c | 19 ------------------- drivers/staging/otus/wrap_pkt.c | 3 --- include/linux/netdevice.h | 4 ---- net/decnet/dn_route.c | 2 +- net/lapb/lapb_iface.c | 2 +- 6 files changed, 2 insertions(+), 33 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/rionet.c b/drivers/net/rionet.c index 8702e7acdee6..74cdb6f8f84f 100644 --- a/drivers/net/rionet.c +++ b/drivers/net/rionet.c @@ -114,11 +114,6 @@ static int rionet_rx_clean(struct net_device *ndev) if (error == NET_RX_DROP) { ndev->stats.rx_dropped++; - } else if (error == NET_RX_BAD) { - if (netif_msg_rx_err(rnet)) - printk(KERN_WARNING "%s: bad rx packet\n", - DRV_NAME); - ndev->stats.rx_errors++; } else { ndev->stats.rx_packets++; ndev->stats.rx_bytes += RIO_MAX_MSG_SIZE; diff --git a/drivers/net/wan/farsync.c b/drivers/net/wan/farsync.c index 25c9ef6a1815..90c0a317d9d3 100644 --- a/drivers/net/wan/farsync.c +++ b/drivers/net/wan/farsync.c @@ -792,25 +792,6 @@ fst_process_rx_status(int rx_status, char *name) */ break; } - - case NET_RX_CN_LOW: - { - dbg(DBG_ASS, "%s: Receive Low Congestion\n", name); - break; - } - - case NET_RX_CN_MOD: - { - dbg(DBG_ASS, "%s: Receive Moderate Congestion\n", name); - break; - } - - case NET_RX_CN_HIGH: - { - dbg(DBG_ASS, "%s: Receive High Congestion\n", name); - break; - } - case NET_RX_DROP: { dbg(DBG_ASS, "%s: Received packet dropped\n", name); diff --git a/drivers/staging/otus/wrap_pkt.c b/drivers/staging/otus/wrap_pkt.c index 5db0004c8739..89a6b92f5972 100644 --- a/drivers/staging/otus/wrap_pkt.c +++ b/drivers/staging/otus/wrap_pkt.c @@ -156,10 +156,7 @@ void zfLnxRecvEth(zdev_t* dev, zbuf_t* buf, u16_t port) switch(netif_rx(buf)) #endif { - case NET_RX_BAD: case NET_RX_DROP: - case NET_RX_CN_MOD: - case NET_RX_CN_HIGH: break; default: macp->drv_stats.net_stats.rx_packets++; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d4a4d9867794..9f25ab2899de 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -72,10 +72,6 @@ struct wireless_dev; /* Backlog congestion levels */ #define NET_RX_SUCCESS 0 /* keep 'em coming, baby */ #define NET_RX_DROP 1 /* packet dropped */ -#define NET_RX_CN_LOW 2 /* storm alert, just in case */ -#define NET_RX_CN_MOD 3 /* Storm on its way! */ -#define NET_RX_CN_HIGH 4 /* The storm is here */ -#define NET_RX_BAD 5 /* packet dropped due to kernel error */ /* NET_XMIT_CN is special. It does not guarantee that this packet is lost. It * indicates that the device will soon be dropping packets, or already drops diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 1d6ca8a98dc6..9383d3e5a1ab 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -774,7 +774,7 @@ static int dn_rt_bug(struct sk_buff *skb) kfree_skb(skb); - return NET_RX_BAD; + return NET_RX_DROP; } static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res) diff --git a/net/lapb/lapb_iface.c b/net/lapb/lapb_iface.c index 2ba1bc4f3c3a..bda96d18fd98 100644 --- a/net/lapb/lapb_iface.c +++ b/net/lapb/lapb_iface.c @@ -407,7 +407,7 @@ int lapb_data_indication(struct lapb_cb *lapb, struct sk_buff *skb) return lapb->callbacks.data_indication(lapb->dev, skb); kfree_skb(skb); - return NET_RX_CN_HIGH; /* For now; must be != NET_RX_DROP */ + return NET_RX_SUCCESS; /* For now; must be != NET_RX_DROP */ } int lapb_data_transmit(struct lapb_cb *lapb, struct sk_buff *skb) -- cgit v1.2.3 From 6650613d3387dcc30685e2781818ea7d0f840027 Mon Sep 17 00:00:00 2001 From: "oscar.medina@motorola.com" Date: Tue, 30 Jun 2009 03:25:39 +0000 Subject: tipc: Add socket options to get number of queued messages This patch allows a TIPC application to determine the number of messages currently waiting in a socket's receive queue (TIPC_SOCK_RECVQ_DEPTH) or in all TIPC socket receive queues (TIPC_NODE_RECVQ_DEPTH). Signed-off-by: Oscar Medina Signed-off-by: Allan Stephens Signed-off-by: David S. Miller --- include/linux/tipc.h | 2 ++ net/tipc/socket.c | 6 ++++++ 2 files changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tipc.h b/include/linux/tipc.h index bea469455a0c..3d92396639de 100644 --- a/include/linux/tipc.h +++ b/include/linux/tipc.h @@ -209,5 +209,7 @@ struct sockaddr_tipc { #define TIPC_SRC_DROPPABLE 128 /* Default: 0 (resend congested msg) */ #define TIPC_DEST_DROPPABLE 129 /* Default: based on socket type */ #define TIPC_CONN_TIMEOUT 130 /* Default: 8000 (ms) */ +#define TIPC_NODE_RECVQ_DEPTH 131 /* Default: none (read only) */ +#define TIPC_SOCK_RECVQ_DEPTH 132 /* Default: none (read only) */ #endif diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 1848693ebb82..e8254e809b79 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1748,6 +1748,12 @@ static int getsockopt(struct socket *sock, value = jiffies_to_msecs(sk->sk_rcvtimeo); /* no need to set "res", since already 0 at this point */ break; + case TIPC_NODE_RECVQ_DEPTH: + value = (u32)atomic_read(&tipc_queue_size); + break; + case TIPC_SOCK_RECVQ_DEPTH: + value = skb_queue_len(&sk->sk_receive_queue); + break; default: res = -EINVAL; } -- cgit v1.2.3 From 7a6d3c8b3049d07123628f2bf57127bba2cc878f Mon Sep 17 00:00:00 2001 From: Csaba Henk Date: Wed, 1 Jul 2009 17:28:41 -0700 Subject: fuse: make the number of max background requests and congestion threshold tunable The practical values for these limits depend on the design of the filesystem server so let userspace set them at initialization time. Signed-off-by: Csaba Henk Signed-off-by: Miklos Szeredi --- fs/fuse/dev.c | 10 +++++----- fs/fuse/fuse_i.h | 12 ++++++------ fs/fuse/inode.c | 14 ++++++++++++++ include/linux/fuse.h | 9 +++++++-- 4 files changed, 32 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index f58ecbc416c8..b152761c1bf6 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -250,7 +250,7 @@ static void queue_request(struct fuse_conn *fc, struct fuse_req *req) static void flush_bg_queue(struct fuse_conn *fc) { - while (fc->active_background < FUSE_MAX_BACKGROUND && + while (fc->active_background < fc->max_background && !list_empty(&fc->bg_queue)) { struct fuse_req *req; @@ -280,11 +280,11 @@ __releases(&fc->lock) list_del(&req->intr_entry); req->state = FUSE_REQ_FINISHED; if (req->background) { - if (fc->num_background == FUSE_MAX_BACKGROUND) { + if (fc->num_background == fc->max_background) { fc->blocked = 0; wake_up_all(&fc->blocked_waitq); } - if (fc->num_background == FUSE_CONGESTION_THRESHOLD && + if (fc->num_background == fc->congestion_threshold && fc->connected && fc->bdi_initialized) { clear_bdi_congested(&fc->bdi, READ); clear_bdi_congested(&fc->bdi, WRITE); @@ -410,9 +410,9 @@ static void fuse_request_send_nowait_locked(struct fuse_conn *fc, { req->background = 1; fc->num_background++; - if (fc->num_background == FUSE_MAX_BACKGROUND) + if (fc->num_background == fc->max_background) fc->blocked = 1; - if (fc->num_background == FUSE_CONGESTION_THRESHOLD && + if (fc->num_background == fc->congestion_threshold && fc->bdi_initialized) { set_bdi_congested(&fc->bdi, READ); set_bdi_congested(&fc->bdi, WRITE); diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 52b641fc0faf..6bcfab04396f 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -25,12 +25,6 @@ /** Max number of pages that can be used in a single read request */ #define FUSE_MAX_PAGES_PER_REQ 32 -/** Maximum number of outstanding background requests */ -#define FUSE_MAX_BACKGROUND 12 - -/** Congestion starts at 75% of maximum */ -#define FUSE_CONGESTION_THRESHOLD (FUSE_MAX_BACKGROUND * 75 / 100) - /** Bias for fi->writectr, meaning new writepages must not be sent */ #define FUSE_NOWRITE INT_MIN @@ -349,6 +343,12 @@ struct fuse_conn { /** rbtree of fuse_files waiting for poll events indexed by ph */ struct rb_root polled_files; + /** Maximum number of outstanding background requests */ + unsigned max_background; + + /** Number of background requests at which congestion starts */ + unsigned congestion_threshold; + /** Number of requests currently in the background */ unsigned num_background; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index f91ccc4a189d..9aa6f46d0c32 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -32,6 +32,12 @@ DEFINE_MUTEX(fuse_mutex); #define FUSE_DEFAULT_BLKSIZE 512 +/** Maximum number of outstanding background requests */ +#define FUSE_DEFAULT_MAX_BACKGROUND 12 + +/** Congestion starts at 75% of maximum */ +#define FUSE_DEFAULT_CONGESTION_THRESHOLD (FUSE_DEFAULT_MAX_BACKGROUND * 3 / 4) + struct fuse_mount_data { int fd; unsigned rootmode; @@ -517,6 +523,8 @@ void fuse_conn_init(struct fuse_conn *fc) INIT_LIST_HEAD(&fc->bg_queue); INIT_LIST_HEAD(&fc->entry); atomic_set(&fc->num_waiting, 0); + fc->max_background = FUSE_DEFAULT_MAX_BACKGROUND; + fc->congestion_threshold = FUSE_DEFAULT_CONGESTION_THRESHOLD; fc->khctr = 0; fc->polled_files = RB_ROOT; fc->reqctr = 0; @@ -736,6 +744,12 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) else { unsigned long ra_pages; + if (arg->minor >= 13) { + if (arg->max_background) + fc->max_background = arg->max_background; + if (arg->congestion_threshold) + fc->congestion_threshold = arg->congestion_threshold; + } if (arg->minor >= 6) { ra_pages = arg->max_readahead / PAGE_CACHE_SIZE; if (arg->flags & FUSE_ASYNC_READ) diff --git a/include/linux/fuse.h b/include/linux/fuse.h index cf593bf9fd32..b3700f0ac268 100644 --- a/include/linux/fuse.h +++ b/include/linux/fuse.h @@ -30,6 +30,10 @@ * - add umask flag to input argument of open, mknod and mkdir * - add notification messages for invalidation of inodes and * directory entries + * + * 7.13 + * - make max number of background requests and congestion threshold + * tunables */ #ifndef _LINUX_FUSE_H @@ -41,7 +45,7 @@ #define FUSE_KERNEL_VERSION 7 /** Minor version number of this interface */ -#define FUSE_KERNEL_MINOR_VERSION 12 +#define FUSE_KERNEL_MINOR_VERSION 13 /** The node ID of the root inode */ #define FUSE_ROOT_ID 1 @@ -427,7 +431,8 @@ struct fuse_init_out { __u32 minor; __u32 max_readahead; __u32 flags; - __u32 unused; + __u16 max_background; + __u16 congestion_threshold; __u32 max_write; }; -- cgit v1.2.3 From 37d217f029a56a6d385f99773fb27dfcb51f9a46 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 8 Jul 2009 18:17:58 +0200 Subject: fuse: document protocol version negotiation Clarify how the protocol version should be negotiated between kernel and userspace. Notably libfuse didn't correctly handle the case when the supported major versions didn't match. Signed-off-by: Miklos Szeredi --- include/linux/fuse.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fuse.h b/include/linux/fuse.h index b3700f0ac268..3e2925a34bf0 100644 --- a/include/linux/fuse.h +++ b/include/linux/fuse.h @@ -41,6 +41,26 @@ #include +/* + * Version negotiation: + * + * Both the kernel and userspace send the version they support in the + * INIT request and reply respectively. + * + * If the major versions match then both shall use the smallest + * of the two minor versions for communication. + * + * If the kernel supports a larger major version, then userspace shall + * reply with the major version it supports, ignore the rest of the + * INIT message and expect a new INIT message from the kernel with a + * matching major version. + * + * If the library supports a larger major version, then it shall fall + * back to the major protocol version sent by the kernel for + * communication and reply with that major version (and an arbitrary + * supported minor version). + */ + /** Version number of this interface */ #define FUSE_KERNEL_VERSION 7 -- cgit v1.2.3 From ed5215a21460f63d6bdc118cb55a9e6d1b433f35 Mon Sep 17 00:00:00 2001 From: Thomas Liu Date: Thu, 9 Jul 2009 10:00:29 -0400 Subject: Move variable function in lsm_audit.h into SMACK private space Moved variable function in include/linux/lsm_audit.h into the smack_audit_data struct since it is never used outside of it. Also removed setting of function in the COMMON_AUDIT_DATA_INIT macro because that variable is now private to SMACK. Signed-off-by: Thomas Liu Acked-by: Eric Paris I-dont-see-any-problems-with-it: Casey Schaufler Signed-off-by: James Morris --- include/linux/lsm_audit.h | 4 ++-- security/smack/smack.h | 2 +- security/smack/smack_access.c | 7 ++++--- 3 files changed, 7 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_audit.h b/include/linux/lsm_audit.h index e461b2c3d711..68f7bce572b0 100644 --- a/include/linux/lsm_audit.h +++ b/include/linux/lsm_audit.h @@ -66,11 +66,11 @@ struct common_audit_data { } key_struct; #endif } u; - const char *function; /* this union contains LSM specific data */ union { /* SMACK data */ struct smack_audit_data { + const char *function; char *subject; char *object; char *request; @@ -104,7 +104,7 @@ int ipv6_skb_to_auditdata(struct sk_buff *skb, /* Initialize an LSM audit data structure. */ #define COMMON_AUDIT_DATA_INIT(_d, _t) \ { memset((_d), 0, sizeof(struct common_audit_data)); \ - (_d)->type = LSM_AUDIT_DATA_##_t; (_d)->function = __func__; } + (_d)->type = LSM_AUDIT_DATA_##_t; } void common_lsm_audit(struct common_audit_data *a); diff --git a/security/smack/smack.h b/security/smack/smack.h index 243bec175be0..ff180ede3e47 100644 --- a/security/smack/smack.h +++ b/security/smack/smack.h @@ -275,7 +275,7 @@ static inline void smk_ad_init(struct smk_audit_info *a, const char *func, { memset(a, 0, sizeof(*a)); a->a.type = type; - a->a.function = func; + a->a.lsm_priv.smack_audit_data.function = func; } static inline void smk_ad_setfield_u_tsk(struct smk_audit_info *a, diff --git a/security/smack/smack_access.c b/security/smack/smack_access.c index 513dc1aa16dd..dd84877dff30 100644 --- a/security/smack/smack_access.c +++ b/security/smack/smack_access.c @@ -241,7 +241,8 @@ static void smack_log_callback(struct audit_buffer *ab, void *a) { struct common_audit_data *ad = a; struct smack_audit_data *sad = &ad->lsm_priv.smack_audit_data; - audit_log_format(ab, "lsm=SMACK fn=%s action=%s", ad->function, + audit_log_format(ab, "lsm=SMACK fn=%s action=%s", + ad->lsm_priv.smack_audit_data.function, sad->result ? "denied" : "granted"); audit_log_format(ab, " subject="); audit_log_untrustedstring(ab, sad->subject); @@ -274,8 +275,8 @@ void smack_log(char *subject_label, char *object_label, int request, if (result == 0 && (log_policy & SMACK_AUDIT_ACCEPT) == 0) return; - if (a->function == NULL) - a->function = "unknown"; + if (a->lsm_priv.smack_audit_data.function == NULL) + a->lsm_priv.smack_audit_data.function = "unknown"; /* end preparing the audit data */ sad = &a->lsm_priv.smack_audit_data; -- cgit v1.2.3 From d4131ded4d4c1a5c1363ddd93ca104ed97dd0458 Mon Sep 17 00:00:00 2001 From: Thomas Liu Date: Thu, 9 Jul 2009 10:00:30 -0400 Subject: security: Make lsm_priv union in lsm_audit.h anonymous Made the lsm_priv union in include/linux/lsm_audit.h anonymous. Signed-off-by: Thomas Liu Acked-by: Eric Paris Signed-off-by: James Morris --- include/linux/lsm_audit.h | 2 +- security/smack/smack.h | 2 +- security/smack/smack_access.c | 10 +++++----- 3 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_audit.h b/include/linux/lsm_audit.h index 68f7bce572b0..40d1b84f2a3c 100644 --- a/include/linux/lsm_audit.h +++ b/include/linux/lsm_audit.h @@ -86,7 +86,7 @@ struct common_audit_data { struct av_decision *avd; int result; } selinux_audit_data; - } lsm_priv; + }; /* these callback will be implemented by a specific LSM */ void (*lsm_pre_audit)(struct audit_buffer *, void *); void (*lsm_post_audit)(struct audit_buffer *, void *); diff --git a/security/smack/smack.h b/security/smack/smack.h index ff180ede3e47..c6e9acae72e4 100644 --- a/security/smack/smack.h +++ b/security/smack/smack.h @@ -275,7 +275,7 @@ static inline void smk_ad_init(struct smk_audit_info *a, const char *func, { memset(a, 0, sizeof(*a)); a->a.type = type; - a->a.lsm_priv.smack_audit_data.function = func; + a->a.smack_audit_data.function = func; } static inline void smk_ad_setfield_u_tsk(struct smk_audit_info *a, diff --git a/security/smack/smack_access.c b/security/smack/smack_access.c index dd84877dff30..0f9ac8146900 100644 --- a/security/smack/smack_access.c +++ b/security/smack/smack_access.c @@ -240,9 +240,9 @@ static inline void smack_str_from_perm(char *string, int access) static void smack_log_callback(struct audit_buffer *ab, void *a) { struct common_audit_data *ad = a; - struct smack_audit_data *sad = &ad->lsm_priv.smack_audit_data; + struct smack_audit_data *sad = &ad->smack_audit_data; audit_log_format(ab, "lsm=SMACK fn=%s action=%s", - ad->lsm_priv.smack_audit_data.function, + ad->smack_audit_data.function, sad->result ? "denied" : "granted"); audit_log_format(ab, " subject="); audit_log_untrustedstring(ab, sad->subject); @@ -275,11 +275,11 @@ void smack_log(char *subject_label, char *object_label, int request, if (result == 0 && (log_policy & SMACK_AUDIT_ACCEPT) == 0) return; - if (a->lsm_priv.smack_audit_data.function == NULL) - a->lsm_priv.smack_audit_data.function = "unknown"; + if (a->smack_audit_data.function == NULL) + a->smack_audit_data.function = "unknown"; /* end preparing the audit data */ - sad = &a->lsm_priv.smack_audit_data; + sad = &a->smack_audit_data; smack_str_from_perm(request_buffer, request); sad->subject = subject_label; sad->object = object_label; -- cgit v1.2.3 From 65c3f0a2d0f72d210c879e4974c2d222b7951321 Mon Sep 17 00:00:00 2001 From: Thomas Liu Date: Thu, 9 Jul 2009 10:00:31 -0400 Subject: security: Wrap SMACK and SELINUX audit data structs in ifdefs Wrapped the smack_audit_data and selinux_audit_data structs in include/linux/lsm_audit.h in ifdefs so that the union will always be the correct size. Signed-off-by: Thomas Liu Acked-by: Eric Paris Signed-off-by: James Morris --- include/linux/lsm_audit.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/lsm_audit.h b/include/linux/lsm_audit.h index 40d1b84f2a3c..a5514a3a4f17 100644 --- a/include/linux/lsm_audit.h +++ b/include/linux/lsm_audit.h @@ -68,6 +68,7 @@ struct common_audit_data { } u; /* this union contains LSM specific data */ union { +#ifdef CONFIG_SECURITY_SMACK /* SMACK data */ struct smack_audit_data { const char *function; @@ -76,6 +77,8 @@ struct common_audit_data { char *request; int result; } smack_audit_data; +#endif +#ifdef CONFIG_SECURITY_SELINUX /* SELinux data */ struct { u32 ssid; @@ -86,6 +89,7 @@ struct common_audit_data { struct av_decision *avd; int result; } selinux_audit_data; +#endif }; /* these callback will be implemented by a specific LSM */ void (*lsm_pre_audit)(struct audit_buffer *, void *); -- cgit v1.2.3 From a33e9e7f35ef6dcab528e0327f29188475f60691 Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Tue, 16 Jun 2009 17:17:27 +0300 Subject: usbnet: Add stop function pointer to 'struct rndis_data'. Allow minidriver to know that netdev has stopped. This is to let wireless turn off radio when usbnet dev is stopped. Signed-off-by: Jussi Kivilinna Acked-by: David Brownell Acked-by: David S. Miller Signed-off-by: John W. Linville --- drivers/net/usb/usbnet.c | 14 ++++++++++++++ include/linux/usb/usbnet.h | 3 +++ 2 files changed, 17 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index edfd9e10ceba..25e435c49040 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -575,7 +575,9 @@ EXPORT_SYMBOL_GPL(usbnet_unlink_rx_urbs); int usbnet_stop (struct net_device *net) { struct usbnet *dev = netdev_priv(net); + struct driver_info *info = dev->driver_info; int temp; + int retval; DECLARE_WAIT_QUEUE_HEAD_ONSTACK (unlink_wakeup); DECLARE_WAITQUEUE (wait, current); @@ -587,6 +589,18 @@ int usbnet_stop (struct net_device *net) net->stats.rx_errors, net->stats.tx_errors ); + /* allow minidriver to stop correctly (wireless devices to turn off + * radio etc) */ + if (info->stop) { + retval = info->stop(dev); + if (retval < 0 && netif_msg_ifdown(dev)) + devinfo(dev, + "stop fail (%d) usbnet usb-%s-%s, %s", + retval, + dev->udev->bus->bus_name, dev->udev->devpath, + info->description); + } + // ensure there are no more active urbs add_wait_queue (&unlink_wakeup, &wait); dev->wait = &unlink_wakeup; diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index 310e18a880ff..7c17b2efba86 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -97,6 +97,9 @@ struct driver_info { /* reset device ... can sleep */ int (*reset)(struct usbnet *); + /* stop device ... can sleep */ + int (*stop)(struct usbnet *); + /* see if peer is connected ... can sleep */ int (*check_connect)(struct usbnet *); -- cgit v1.2.3 From aff89a9b9084931e51b89d8f3ee3c547bea6c422 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 1 Jul 2009 21:26:51 +0200 Subject: cfg80211: introduce nl80211 testmode command This introduces a new NL80211_CMD_TESTMODE for testing and calibration use with nl80211. There's no multiplexing like like iwpriv had, and the command is not available by default, it needs to be explicitly enabled in Kconfig and shouldn't be enabled in most kernels. The command requires a wiphy index or interface index to identify the device to operate on, and the new TESTDATA attribute. There also is API for sending replies to the command, and testmode multicast messages (on a testmode multicast group). I've also updated mac80211 to be able to pass through the command to the driver, since it itself doesn't implement the testmode command. Additionally, to give people an idea of how to use the command, I've added a little code to hwsim that makes use of the new command to set the powersave mode, this is currently done via debugfs and should remain there, and the testmode command only serves as an example of how to use this best -- with nested netlink attributes in the TESTDATA attribute. A hwsim testmode tool can be found at http://git.sipsolutions.net/hwsim.git/. This tool is BSD licensed so people can easily use it as a basis for their own internal fabrication and validation tools. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- drivers/net/wireless/mac80211_hwsim.c | 68 +++++++++++++++++ include/linux/nl80211.h | 11 +++ include/net/cfg80211.h | 83 +++++++++++++++++++++ include/net/mac80211.h | 5 ++ net/mac80211/cfg.c | 13 ++++ net/wireless/Kconfig | 15 ++++ net/wireless/core.h | 4 + net/wireless/nl80211.c | 136 ++++++++++++++++++++++++++++++++++ 8 files changed, 335 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 93c1c4a73e6c..6ac8565072e9 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -700,6 +700,73 @@ static int mac80211_hwsim_conf_tx( return 0; } +#ifdef CONFIG_NL80211_TESTMODE +/* + * This section contains example code for using netlink + * attributes with the testmode command in nl80211. + */ + +/* These enums need to be kept in sync with userspace */ +enum hwsim_testmode_attr { + __HWSIM_TM_ATTR_INVALID = 0, + HWSIM_TM_ATTR_CMD = 1, + HWSIM_TM_ATTR_PS = 2, + + /* keep last */ + __HWSIM_TM_ATTR_AFTER_LAST, + HWSIM_TM_ATTR_MAX = __HWSIM_TM_ATTR_AFTER_LAST - 1 +}; + +enum hwsim_testmode_cmd { + HWSIM_TM_CMD_SET_PS = 0, + HWSIM_TM_CMD_GET_PS = 1, +}; + +static const struct nla_policy hwsim_testmode_policy[HWSIM_TM_ATTR_MAX + 1] = { + [HWSIM_TM_ATTR_CMD] = { .type = NLA_U32 }, + [HWSIM_TM_ATTR_PS] = { .type = NLA_U32 }, +}; + +static int hwsim_fops_ps_write(void *dat, u64 val); + +int mac80211_hwsim_testmode_cmd(struct ieee80211_hw *hw, void *data, int len) +{ + struct mac80211_hwsim_data *hwsim = hw->priv; + struct nlattr *tb[HWSIM_TM_ATTR_MAX + 1]; + struct sk_buff *skb; + int err, ps; + + err = nla_parse(tb, HWSIM_TM_ATTR_MAX, data, len, + hwsim_testmode_policy); + if (err) + return err; + + if (!tb[HWSIM_TM_ATTR_CMD]) + return -EINVAL; + + switch (nla_get_u32(tb[HWSIM_TM_ATTR_CMD])) { + case HWSIM_TM_CMD_SET_PS: + if (!tb[HWSIM_TM_ATTR_PS]) + return -EINVAL; + ps = nla_get_u32(tb[HWSIM_TM_ATTR_PS]); + return hwsim_fops_ps_write(hwsim, ps); + case HWSIM_TM_CMD_GET_PS: + skb = cfg80211_testmode_alloc_reply_skb(hw->wiphy, + nla_total_size(sizeof(u32))); + if (!skb) + return -ENOMEM; + NLA_PUT_U32(skb, HWSIM_TM_ATTR_PS, hwsim->ps); + return cfg80211_testmode_reply(skb); + default: + return -EOPNOTSUPP; + } + + nla_put_failure: + kfree_skb(skb); + return -ENOBUFS; +} +#endif + static const struct ieee80211_ops mac80211_hwsim_ops = { .tx = mac80211_hwsim_tx, @@ -713,6 +780,7 @@ static const struct ieee80211_ops mac80211_hwsim_ops = .sta_notify = mac80211_hwsim_sta_notify, .set_tim = mac80211_hwsim_set_tim, .conf_tx = mac80211_hwsim_conf_tx, + CFG80211_TESTMODE_CMD(mac80211_hwsim_testmode_cmd) }; diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index dbea93b694e5..651b18839088 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -242,6 +242,10 @@ * @NL80211_CMD_LEAVE_IBSS: Leave the IBSS -- no special arguments, the IBSS is * determined by the network interface. * + * @NL80211_CMD_TESTMODE: testmode command, takes a wiphy (or ifindex) attribute + * to identify the device, and the TESTDATA blob attribute to pass through + * to the driver. + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -310,6 +314,8 @@ enum nl80211_commands { NL80211_CMD_JOIN_IBSS, NL80211_CMD_LEAVE_IBSS, + NL80211_CMD_TESTMODE, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ @@ -511,6 +517,9 @@ enum nl80211_commands { * authorized by user space. Otherwise, port is marked authorized by * default in station mode. * + * @NL80211_ATTR_TESTDATA: Testmode data blob, passed through to the driver. + * We recommend using nested, driver-specific attributes within this. + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -619,6 +628,8 @@ enum nl80211_attrs { NL80211_ATTR_CONTROL_PORT, + NL80211_ATTR_TESTDATA, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 10eb53e2bc9f..885d4e5bc4b5 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -857,6 +857,8 @@ enum tx_power_setting { * * @rfkill_poll: polls the hw rfkill line, use cfg80211 reporting * functions to adjust rfkill hw state + * + * @testmode_cmd: run a test mode command */ struct cfg80211_ops { int (*suspend)(struct wiphy *wiphy); @@ -955,6 +957,10 @@ struct cfg80211_ops { int (*get_tx_power)(struct wiphy *wiphy, int *dbm); void (*rfkill_poll)(struct wiphy *wiphy); + +#ifdef CONFIG_NL80211_TESTMODE + int (*testmode_cmd)(struct wiphy *wiphy, void *data, int len); +#endif }; /* @@ -1705,4 +1711,81 @@ void wiphy_rfkill_start_polling(struct wiphy *wiphy); */ void wiphy_rfkill_stop_polling(struct wiphy *wiphy); +#ifdef CONFIG_NL80211_TESTMODE +/** + * cfg80211_testmode_alloc_reply_skb - allocate testmode reply + * @wiphy: the wiphy + * @approxlen: an upper bound of the length of the data that will + * be put into the skb + * + * This function allocates and pre-fills an skb for a reply to + * the testmode command. Since it is intended for a reply, calling + * it outside of the @testmode_cmd operation is invalid. + * + * The returned skb (or %NULL if any errors happen) is pre-filled + * with the wiphy index and set up in a way that any data that is + * put into the skb (with skb_put(), nla_put() or similar) will end + * up being within the %NL80211_ATTR_TESTDATA attribute, so all that + * needs to be done with the skb is adding data for the corresponding + * userspace tool which can then read that data out of the testdata + * attribute. You must not modify the skb in any other way. + * + * When done, call cfg80211_testmode_reply() with the skb and return + * its error code as the result of the @testmode_cmd operation. + */ +struct sk_buff *cfg80211_testmode_alloc_reply_skb(struct wiphy *wiphy, + int approxlen); + +/** + * cfg80211_testmode_reply - send the reply skb + * @skb: The skb, must have been allocated with + * cfg80211_testmode_alloc_reply_skb() + * + * Returns an error code or 0 on success, since calling this + * function will usually be the last thing before returning + * from the @testmode_cmd you should return the error code. + * Note that this function consumes the skb regardless of the + * return value. + */ +int cfg80211_testmode_reply(struct sk_buff *skb); + +/** + * cfg80211_testmode_alloc_event_skb - allocate testmode event + * @wiphy: the wiphy + * @approxlen: an upper bound of the length of the data that will + * be put into the skb + * @gfp: allocation flags + * + * This function allocates and pre-fills an skb for an event on the + * testmode multicast group. + * + * The returned skb (or %NULL if any errors happen) is set up in the + * same way as with cfg80211_testmode_alloc_reply_skb() but prepared + * for an event. As there, you should simply add data to it that will + * then end up in the %NL80211_ATTR_TESTDATA attribute. Again, you must + * not modify the skb in any other way. + * + * When done filling the skb, call cfg80211_testmode_event() with the + * skb to send the event. + */ +struct sk_buff *cfg80211_testmode_alloc_event_skb(struct wiphy *wiphy, + int approxlen, gfp_t gfp); + +/** + * cfg80211_testmode_event - send the event + * @skb: The skb, must have been allocated with + * cfg80211_testmode_alloc_event_skb() + * @gfp: allocation flags + * + * This function sends the given @skb, which must have been allocated + * by cfg80211_testmode_alloc_event_skb(), as an event. It always + * consumes it. + */ +void cfg80211_testmode_event(struct sk_buff *skb, gfp_t gfp); + +#define CFG80211_TESTMODE_CMD(cmd) .testmode_cmd = (cmd), +#else +#define CFG80211_TESTMODE_CMD(cmd) +#endif + #endif /* __NET_CFG80211_H */ diff --git a/include/net/mac80211.h b/include/net/mac80211.h index fe80771d95f1..ce7cb1b5d453 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1416,6 +1416,8 @@ enum ieee80211_ampdu_mlme_action { * @rfkill_poll: Poll rfkill hardware state. If you need this, you also * need to set wiphy->rfkill_poll to %true before registration, * and need to call wiphy_rfkill_set_hw_state() in the callback. + * + * @testmode_cmd: Implement a cfg80211 test mode command. */ struct ieee80211_ops { int (*tx)(struct ieee80211_hw *hw, struct sk_buff *skb); @@ -1466,6 +1468,9 @@ struct ieee80211_ops { struct ieee80211_sta *sta, u16 tid, u16 *ssn); void (*rfkill_poll)(struct ieee80211_hw *hw); +#ifdef CONFIG_NL80211_TESTMODE + int (*testmode_cmd)(struct ieee80211_hw *hw, void *data, int len); +#endif }; /** diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index eb93eb6a9cc7..c34c1a41019a 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1376,6 +1376,18 @@ static void ieee80211_rfkill_poll(struct wiphy *wiphy) drv_rfkill_poll(local); } +#ifdef CONFIG_NL80211_TESTMODE +int ieee80211_testmode_cmd(struct wiphy *wiphy, void *data, int len) +{ + struct ieee80211_local *local = wiphy_priv(wiphy); + + if (!local->ops->testmode_cmd) + return -EOPNOTSUPP; + + return local->ops->testmode_cmd(&local->hw, data, len); +} +#endif + struct cfg80211_ops mac80211_config_ops = { .add_virtual_intf = ieee80211_add_iface, .del_virtual_intf = ieee80211_del_iface, @@ -1418,4 +1430,5 @@ struct cfg80211_ops mac80211_config_ops = { .set_tx_power = ieee80211_set_tx_power, .get_tx_power = ieee80211_get_tx_power, .rfkill_poll = ieee80211_rfkill_poll, + CFG80211_TESTMODE_CMD(ieee80211_testmode_cmd) }; diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig index ec64571c4c23..040263118a20 100644 --- a/net/wireless/Kconfig +++ b/net/wireless/Kconfig @@ -2,6 +2,21 @@ config CFG80211 tristate "Improved wireless configuration API" depends on RFKILL || !RFKILL +config NL80211_TESTMODE + bool "nl80211 testmode command" + depends on CFG80211 + help + The nl80211 testmode command helps implementing things like + factory calibration or validation tools for wireless chips. + + Select this option ONLY for kernels that are specifically + built for such purposes. + + Debugging tools that are supposed to end up in the hands of + users should better be implemented with debugfs. + + Say N. + config CFG80211_REG_DEBUG bool "cfg80211 regulatory debugging" depends on CFG80211 diff --git a/net/wireless/core.h b/net/wireless/core.h index bfa340c7abb5..bc084b68865c 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -58,6 +58,10 @@ struct cfg80211_registered_device { struct cfg80211_scan_request *scan_req; /* protected by RTNL */ unsigned long suspend_at; +#ifdef CONFIG_NL80211_TESTMODE + struct genl_info *testmode_info; +#endif + #ifdef CONFIG_CFG80211_DEBUGFS /* Debugfs entries */ struct wiphy_debugfsdentries { diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 01523ba81baf..bb8de268a6bf 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3416,6 +3416,128 @@ unlock_rtnl: return err; } +#ifdef CONFIG_NL80211_TESTMODE +static struct genl_multicast_group nl80211_testmode_mcgrp = { + .name = "testmode", +}; + +static int nl80211_testmode_do(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *rdev; + int err; + + if (!info->attrs[NL80211_ATTR_TESTDATA]) + return -EINVAL; + + rtnl_lock(); + + rdev = cfg80211_get_dev_from_info(info); + if (IS_ERR(rdev)) { + err = PTR_ERR(rdev); + goto unlock_rtnl; + } + + err = -EOPNOTSUPP; + if (rdev->ops->testmode_cmd) { + rdev->testmode_info = info; + err = rdev->ops->testmode_cmd(&rdev->wiphy, + nla_data(info->attrs[NL80211_ATTR_TESTDATA]), + nla_len(info->attrs[NL80211_ATTR_TESTDATA])); + rdev->testmode_info = NULL; + } + + cfg80211_put_dev(rdev); + + unlock_rtnl: + rtnl_unlock(); + return err; +} + +static struct sk_buff * +__cfg80211_testmode_alloc_skb(struct cfg80211_registered_device *rdev, + int approxlen, u32 pid, u32 seq, gfp_t gfp) +{ + struct sk_buff *skb; + void *hdr; + struct nlattr *data; + + skb = nlmsg_new(approxlen + 100, gfp); + if (!skb) + return NULL; + + hdr = nl80211hdr_put(skb, pid, seq, 0, NL80211_CMD_TESTMODE); + if (!hdr) { + kfree_skb(skb); + return NULL; + } + + NLA_PUT_U32(skb, NL80211_ATTR_WIPHY, rdev->wiphy_idx); + data = nla_nest_start(skb, NL80211_ATTR_TESTDATA); + + ((void **)skb->cb)[0] = rdev; + ((void **)skb->cb)[1] = hdr; + ((void **)skb->cb)[2] = data; + + return skb; + + nla_put_failure: + kfree_skb(skb); + return NULL; +} + +struct sk_buff *cfg80211_testmode_alloc_reply_skb(struct wiphy *wiphy, + int approxlen) +{ + struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + + if (WARN_ON(!rdev->testmode_info)) + return NULL; + + return __cfg80211_testmode_alloc_skb(rdev, approxlen, + rdev->testmode_info->snd_pid, + rdev->testmode_info->snd_seq, + GFP_KERNEL); +} +EXPORT_SYMBOL(cfg80211_testmode_alloc_reply_skb); + +int cfg80211_testmode_reply(struct sk_buff *skb) +{ + struct cfg80211_registered_device *rdev = ((void **)skb->cb)[0]; + void *hdr = ((void **)skb->cb)[1]; + struct nlattr *data = ((void **)skb->cb)[2]; + + if (WARN_ON(!rdev->testmode_info)) { + kfree_skb(skb); + return -EINVAL; + } + + nla_nest_end(skb, data); + genlmsg_end(skb, hdr); + return genlmsg_reply(skb, rdev->testmode_info); +} +EXPORT_SYMBOL(cfg80211_testmode_reply); + +struct sk_buff *cfg80211_testmode_alloc_event_skb(struct wiphy *wiphy, + int approxlen, gfp_t gfp) +{ + struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + + return __cfg80211_testmode_alloc_skb(rdev, approxlen, 0, 0, gfp); +} +EXPORT_SYMBOL(cfg80211_testmode_alloc_event_skb); + +void cfg80211_testmode_event(struct sk_buff *skb, gfp_t gfp) +{ + void *hdr = ((void **)skb->cb)[1]; + struct nlattr *data = ((void **)skb->cb)[2]; + + nla_nest_end(skb, data); + genlmsg_end(skb, hdr); + genlmsg_multicast(skb, 0, nl80211_testmode_mcgrp.id, gfp); +} +EXPORT_SYMBOL(cfg80211_testmode_event); +#endif + static struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_GET_WIPHY, @@ -3629,6 +3751,14 @@ static struct genl_ops nl80211_ops[] = { .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, }, +#ifdef CONFIG_NL80211_TESTMODE + { + .cmd = NL80211_CMD_TESTMODE, + .doit = nl80211_testmode_do, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + }, +#endif }; static struct genl_multicast_group nl80211_mlme_mcgrp = { .name = "mlme", @@ -4102,6 +4232,12 @@ int nl80211_init(void) if (err) goto err_out; +#ifdef CONFIG_NL80211_TESTMODE + err = genl_register_mc_group(&nl80211_fam, &nl80211_testmode_mcgrp); + if (err) + goto err_out; +#endif + return 0; err_out: genl_unregister_family(&nl80211_fam); -- cgit v1.2.3 From 6a669e65c5ec393a650362874e13f7d3365a7827 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 1 Jul 2009 21:26:53 +0200 Subject: wireless: define AKM suites We'll need these values for some drivers using connect API and for wext compat code, so let's define them. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/ieee80211.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index a9173d5434d1..23343ab0bb03 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1196,6 +1196,10 @@ enum ieee80211_sa_query_action { #define WLAN_CIPHER_SUITE_WEP104 0x000FAC05 #define WLAN_CIPHER_SUITE_AES_CMAC 0x000FAC06 +/* AKM suite selectors */ +#define WLAN_AKM_SUITE_8021X 0x000FAC01 +#define WLAN_AKM_SUITE_PSK 0x000FAC02 + #define WLAN_MAX_KEY_LEN 32 /** -- cgit v1.2.3 From b23aa676ab9d54469cda9f7151f51a2851c6f36e Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Wed, 1 Jul 2009 21:26:54 +0200 Subject: cfg80211: connect/disconnect API This patch introduces the cfg80211 connect/disconnect API. The goal here is to run the AUTH and ASSOC steps in one call. This is needed for some fullmac cards that run both steps directly from the target, after the host driver sends a connect command. Additionally, all the new crypto parameters for connect() are now also valid for associate() -- although associate requires the IEs to be used, the information can be useful for drivers and should be given. Signed-off-by: Samuel Ortiz Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/nl80211.h | 80 +++++++++++ include/net/cfg80211.h | 135 +++++++++++++++++- net/mac80211/cfg.c | 2 +- net/wireless/Makefile | 2 +- net/wireless/core.c | 16 ++- net/wireless/core.h | 7 + net/wireless/nl80211.c | 368 +++++++++++++++++++++++++++++++++++++++++++++++- net/wireless/nl80211.h | 13 ++ net/wireless/sme.c | 224 +++++++++++++++++++++++++++++ 9 files changed, 829 insertions(+), 18 deletions(-) create mode 100644 net/wireless/sme.c (limited to 'include/linux') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 651b18839088..b34c17f52f3e 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -246,6 +246,22 @@ * to identify the device, and the TESTDATA blob attribute to pass through * to the driver. * + * @NL80211_CMD_CONNECT: connection request and notification; this command + * requests to connect to a specified network but without separating + * auth and assoc steps. For this, you need to specify the SSID in a + * %NL80211_ATTR_SSID attribute, and can optionally specify the association + * IEs in %NL80211_ATTR_IE, %NL80211_ATTR_AUTH_TYPE, %NL80211_ATTR_MAC, + * %NL80211_ATTR_WIPHY_FREQ and %NL80211_ATTR_CONTROL_PORT. + * It is also sent as an event, with the BSSID and response IEs when the + * connection is established or failed to be established. This can be + * determined by the STATUS_CODE attribute. + * @NL80211_CMD_ROAM: request that the card roam (currently not implemented), + * sent as an event when the card/driver roamed by itself. + * @NL80211_CMD_DISCONNECT: drop a given connection; also used to notify + * userspace that a connection was dropped by the AP or due to other + * reasons, for this the %NL80211_ATTR_DISCONNECTED_BY_AP and + * %NL80211_ATTR_REASON_CODE attributes are used. + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -316,6 +332,10 @@ enum nl80211_commands { NL80211_CMD_TESTMODE, + NL80211_CMD_CONNECT, + NL80211_CMD_ROAM, + NL80211_CMD_DISCONNECT, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ @@ -520,6 +540,30 @@ enum nl80211_commands { * @NL80211_ATTR_TESTDATA: Testmode data blob, passed through to the driver. * We recommend using nested, driver-specific attributes within this. * + * @NL80211_ATTR_DISCONNECTED_BY_AP: A flag indicating that the DISCONNECT + * event was due to the AP disconnecting the station, and not due to + * a local disconnect request. + * @NL80211_ATTR_STATUS_CODE: StatusCode for the %NL80211_CMD_CONNECT + * event (u16) + * @NL80211_ATTR_PRIVACY: Flag attribute, used with connect(), indicating + * that protected APs should be used. + * + * @NL80211_ATTR_CIPHERS_PAIRWISE: Used with CONNECT and ASSOCIATE to + * indicate which unicast key ciphers will be used with the connection + * (an array of u32). + * @NL80211_ATTR_CIPHER_GROUP: Used with CONNECT and ASSOCIATE to indicate + * which group key cipher will be used with the connection (a u32). + * @NL80211_ATTR_WPA_VERSIONS: Used with CONNECT and ASSOCIATE to indicate + * which WPA version(s) the AP we want to associate with is using + * (a u32 with flags from &enum nl80211_wpa_versions). + * @NL80211_ATTR_AKM_SUITES: Used with CONNECT and ASSOCIATE to indicate + * which key management algorithm(s) to use (an array of u32). + * + * @NL80211_ATTR_REQ_IE: (Re)association request information elements as + * sent out by the card, for ROAM and successful CONNECT events. + * @NL80211_ATTR_RESP_IE: (Re)association response information elements as + * sent by peer, for ROAM and successful CONNECT events. + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -630,6 +674,19 @@ enum nl80211_attrs { NL80211_ATTR_TESTDATA, + NL80211_ATTR_PRIVACY, + + NL80211_ATTR_DISCONNECTED_BY_AP, + NL80211_ATTR_STATUS_CODE, + + NL80211_ATTR_CIPHER_SUITES_PAIRWISE, + NL80211_ATTR_CIPHER_SUITE_GROUP, + NL80211_ATTR_WPA_VERSIONS, + NL80211_ATTR_AKM_SUITES, + + NL80211_ATTR_REQ_IE, + NL80211_ATTR_RESP_IE, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -640,6 +697,7 @@ enum nl80211_attrs { * Allow user space programs to use #ifdef on new attributes by defining them * here */ +#define NL80211_CMD_CONNECT NL80211_CMD_CONNECT #define NL80211_ATTR_HT_CAPABILITY NL80211_ATTR_HT_CAPABILITY #define NL80211_ATTR_BSS_BASIC_RATES NL80211_ATTR_BSS_BASIC_RATES #define NL80211_ATTR_WIPHY_TXQ_PARAMS NL80211_ATTR_WIPHY_TXQ_PARAMS @@ -653,6 +711,10 @@ enum nl80211_attrs { #define NL80211_ATTR_SSID NL80211_ATTR_SSID #define NL80211_ATTR_AUTH_TYPE NL80211_ATTR_AUTH_TYPE #define NL80211_ATTR_REASON_CODE NL80211_ATTR_REASON_CODE +#define NL80211_ATTR_CIPHER_SUITES_PAIRWISE NL80211_ATTR_CIPHER_SUITES_PAIRWISE +#define NL80211_ATTR_CIPHER_SUITE_GROUP NL80211_ATTR_CIPHER_SUITE_GROUP +#define NL80211_ATTR_WPA_VERSIONS NL80211_ATTR_WPA_VERSIONS +#define NL80211_ATTR_AKM_SUITES NL80211_ATTR_AKM_SUITES #define NL80211_MAX_SUPP_RATES 32 #define NL80211_MAX_SUPP_REG_RULES 32 @@ -661,6 +723,9 @@ enum nl80211_attrs { #define NL80211_TKIP_DATA_OFFSET_RX_MIC_KEY 24 #define NL80211_HT_CAPABILITY_LEN 26 +#define NL80211_MAX_NR_CIPHER_SUITES 5 +#define NL80211_MAX_NR_AKM_SUITES 2 + /** * enum nl80211_iftype - (virtual) interface types * @@ -1205,12 +1270,22 @@ enum nl80211_bss { * @NL80211_AUTHTYPE_SHARED_KEY: Shared Key authentication (WEP only) * @NL80211_AUTHTYPE_FT: Fast BSS Transition (IEEE 802.11r) * @NL80211_AUTHTYPE_NETWORK_EAP: Network EAP (some Cisco APs and mainly LEAP) + * @__NL80211_AUTHTYPE_NUM: internal + * @NL80211_AUTHTYPE_MAX: maximum valid auth algorithm + * @NL80211_AUTHTYPE_AUTOMATIC: determine automatically (if necessary by + * trying multiple times); this is invalid in netlink -- leave out + * the attribute for this on CONNECT commands. */ enum nl80211_auth_type { NL80211_AUTHTYPE_OPEN_SYSTEM, NL80211_AUTHTYPE_SHARED_KEY, NL80211_AUTHTYPE_FT, NL80211_AUTHTYPE_NETWORK_EAP, + + /* keep last */ + __NL80211_AUTHTYPE_NUM, + NL80211_AUTHTYPE_MAX = __NL80211_AUTHTYPE_NUM - 1, + NL80211_AUTHTYPE_AUTOMATIC }; /** @@ -1235,4 +1310,9 @@ enum nl80211_mfp { NL80211_MFP_REQUIRED, }; +enum nl80211_wpa_versions { + NL80211_WPA_VERSION_1 = 1 << 0, + NL80211_WPA_VERSION_2 = 1 << 1, +}; + #endif /* __LINUX_NL80211_H */ diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 885d4e5bc4b5..68e11321ed74 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -604,6 +604,30 @@ struct cfg80211_bss { u8 priv[0] __attribute__((__aligned__(sizeof(void *)))); }; +/** + * struct cfg80211_crypto_settings - Crypto settings + * @wpa_versions: indicates which, if any, WPA versions are enabled + * (from enum nl80211_wpa_versions) + * @cipher_group: group key cipher suite (or 0 if unset) + * @n_ciphers_pairwise: number of AP supported unicast ciphers + * @ciphers_pairwise: unicast key cipher suites + * @n_akm_suites: number of AKM suites + * @akm_suites: AKM suites + * @control_port: Whether user space controls IEEE 802.1X port, i.e., + * sets/clears %NL80211_STA_FLAG_AUTHORIZED. If true, the driver is + * required to assume that the port is unauthorized until authorized by + * user space. Otherwise, port is marked authorized by default. + */ +struct cfg80211_crypto_settings { + u32 wpa_versions; + u32 cipher_group; + int n_ciphers_pairwise; + u32 ciphers_pairwise[NL80211_MAX_NR_CIPHER_SUITES]; + int n_akm_suites; + u32 akm_suites[NL80211_MAX_NR_AKM_SUITES]; + bool control_port; +}; + /** * struct cfg80211_auth_request - Authentication request data * @@ -658,10 +682,7 @@ struct cfg80211_auth_request { * @ie: Extra IEs to add to (Re)Association Request frame or %NULL * @ie_len: Length of ie buffer in octets * @use_mfp: Use management frame protection (IEEE 802.11w) in this association - * @control_port: Whether user space controls IEEE 802.1X port, i.e., - * sets/clears %NL80211_STA_FLAG_AUTHORIZED. If true, the driver is - * required to assume that the port is unauthorized until authorized by - * user space. Otherwise, port is marked authorized by default. + * @crypto: crypto settings */ struct cfg80211_assoc_request { struct ieee80211_channel *chan; @@ -671,7 +692,7 @@ struct cfg80211_assoc_request { const u8 *ie; size_t ie_len; bool use_mfp; - bool control_port; + struct cfg80211_crypto_settings crypto; }; /** @@ -737,6 +758,36 @@ struct cfg80211_ibss_params { bool channel_fixed; }; +/** + * struct cfg80211_connect_params - Connection parameters + * + * This structure provides information needed to complete IEEE 802.11 + * authentication and association. + * + * @channel: The channel to use or %NULL if not specified (auto-select based + * on scan results) + * @bssid: The AP BSSID or %NULL if not specified (auto-select based on scan + * results) + * @ssid: SSID + * @ssid_len: Length of ssid in octets + * @auth_type: Authentication type (algorithm) + * @assoc_ie: IEs for association request + * @assoc_ie_len: Length of assoc_ie in octets + * @privacy: indicates whether privacy-enabled APs should be used + * @crypto: crypto settings + */ +struct cfg80211_connect_params { + struct ieee80211_channel *channel; + u8 *bssid; + u8 *ssid; + size_t ssid_len; + enum nl80211_auth_type auth_type; + u8 *ie; + size_t ie_len; + bool privacy; + struct cfg80211_crypto_settings crypto; +}; + /** * enum wiphy_params_flags - set_wiphy_params bitfield values * WIPHY_PARAM_RETRY_SHORT: wiphy->retry_short has changed @@ -841,6 +892,12 @@ enum tx_power_setting { * @deauth: Request to deauthenticate from the specified peer * @disassoc: Request to disassociate from the specified peer * + * @connect: Connect to the ESS with the specified parameters. When connected, + * call cfg80211_connect_result() with status code %WLAN_STATUS_SUCCESS. + * If the connection fails for some reason, call cfg80211_connect_result() + * with the status from the AP. + * @disconnect: Disconnect from the BSS/ESS. + * * @join_ibss: Join the specified IBSS (or create if necessary). Once done, call * cfg80211_ibss_joined(), also call that function when changing BSSID due * to a merge. @@ -946,6 +1003,11 @@ struct cfg80211_ops { int (*disassoc)(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_disassoc_request *req); + int (*connect)(struct wiphy *wiphy, struct net_device *dev, + struct cfg80211_connect_params *sme); + int (*disconnect)(struct wiphy *wiphy, struct net_device *dev, + u16 reason_code); + int (*join_ibss)(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_ibss_params *params); int (*leave_ibss)(struct wiphy *wiphy, struct net_device *dev); @@ -1174,10 +1236,15 @@ struct wireless_dev { struct list_head list; struct net_device *netdev; - /* currently used for IBSS - might be rearranged in the future */ + /* currently used for IBSS and SME - might be rearranged later */ struct cfg80211_bss *current_bss; u8 ssid[IEEE80211_MAX_SSID_LEN]; u8 ssid_len; + enum { + CFG80211_SME_IDLE, + CFG80211_SME_CONNECTING, /* ->connect called */ + CFG80211_SME_CONNECTED, + } sme_state; #ifdef CONFIG_WIRELESS_EXT /* wext data */ @@ -1788,4 +1855,60 @@ void cfg80211_testmode_event(struct sk_buff *skb, gfp_t gfp); #define CFG80211_TESTMODE_CMD(cmd) #endif +/** + * cfg80211_connect_result - notify cfg80211 of connection result + * + * @dev: network device + * @bssid: the BSSID of the AP + * @req_ie: association request IEs (maybe be %NULL) + * @req_ie_len: association request IEs length + * @resp_ie: association response IEs (may be %NULL) + * @resp_ie_len: assoc response IEs length + * @status: status code, 0 for successful connection, use + * %WLAN_STATUS_UNSPECIFIED_FAILURE if your device cannot give you + * the real status code for failures. + * @gfp: allocation flags + * + * It should be called by the underlying driver whenever connect() has + * succeeded. + */ +void cfg80211_connect_result(struct net_device *dev, const u8 *bssid, + const u8 *req_ie, size_t req_ie_len, + const u8 *resp_ie, size_t resp_ie_len, + u16 status, gfp_t gfp); + +/** + * cfg80211_roamed - notify cfg80211 of roaming + * + * @dev: network device + * @bssid: the BSSID of the new AP + * @req_ie: association request IEs (maybe be %NULL) + * @req_ie_len: association request IEs length + * @resp_ie: association response IEs (may be %NULL) + * @resp_ie_len: assoc response IEs length + * @gfp: allocation flags + * + * It should be called by the underlying driver whenever it roamed + * from one AP to another while connected. + */ +void cfg80211_roamed(struct net_device *dev, const u8 *bssid, + const u8 *req_ie, size_t req_ie_len, + const u8 *resp_ie, size_t resp_ie_len, gfp_t gfp); + +/** + * cfg80211_disconnected - notify cfg80211 that connection was dropped + * + * @dev: network device + * @ie: information elements of the deauth/disassoc frame (may be %NULL) + * @ie_len: length of IEs + * @reason: reason code for the disconnection, set it to 0 if unknown + * @gfp: allocation flags + * + * After it calls this function, the driver should enter an idle state + * and not try to connect to any AP any more. + */ +void cfg80211_disconnected(struct net_device *dev, u16 reason, + u8 *ie, size_t ie_len, gfp_t gfp); + + #endif /* __NET_CFG80211_H */ diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index c34c1a41019a..03de4024597a 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1262,7 +1262,7 @@ static int ieee80211_assoc(struct wiphy *wiphy, struct net_device *dev, sdata->u.mgd.flags &= ~IEEE80211_STA_MFP_ENABLED; } - if (req->control_port) + if (req->crypto.control_port) sdata->u.mgd.flags |= IEEE80211_STA_CONTROL_PORT; else sdata->u.mgd.flags &= ~IEEE80211_STA_CONTROL_PORT; diff --git a/net/wireless/Makefile b/net/wireless/Makefile index f78c4832a9ca..750c08e31b10 100644 --- a/net/wireless/Makefile +++ b/net/wireless/Makefile @@ -5,7 +5,7 @@ obj-$(CONFIG_LIB80211_CRYPT_WEP) += lib80211_crypt_wep.o obj-$(CONFIG_LIB80211_CRYPT_CCMP) += lib80211_crypt_ccmp.o obj-$(CONFIG_LIB80211_CRYPT_TKIP) += lib80211_crypt_tkip.o -cfg80211-y += core.o sysfs.o radiotap.o util.o reg.o scan.o nl80211.o mlme.o ibss.o +cfg80211-y += core.o sysfs.o radiotap.o util.o reg.o scan.o nl80211.o mlme.o ibss.o sme.o cfg80211-$(CONFIG_CFG80211_DEBUGFS) += debugfs.o cfg80211-$(CONFIG_WIRELESS_EXT) += wext-compat.o diff --git a/net/wireless/core.c b/net/wireless/core.c index d41b7412b212..314e00f70e3b 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -546,6 +546,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb, "symlink to netdev!\n"); } wdev->netdev = dev; + wdev->sme_state = CFG80211_SME_IDLE; #ifdef CONFIG_WIRELESS_EXT wdev->wext.default_key = -1; wdev->wext.default_mgmt_key = -1; @@ -553,11 +554,20 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb, mutex_unlock(&rdev->devlist_mtx); break; case NETDEV_GOING_DOWN: - if (wdev->iftype != NL80211_IFTYPE_ADHOC) - break; if (!wdev->ssid_len) break; - cfg80211_leave_ibss(rdev, dev, true); + + switch (wdev->iftype) { + case NL80211_IFTYPE_ADHOC: + cfg80211_leave_ibss(rdev, dev, true); + break; + case NL80211_IFTYPE_STATION: + cfg80211_disconnect(rdev, dev, + WLAN_REASON_DEAUTH_LEAVING); + break; + default: + break; + } break; case NETDEV_UP: #ifdef CONFIG_WIRELESS_EXT diff --git a/net/wireless/core.h b/net/wireless/core.h index bc084b68865c..f93f96f85d2b 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -174,6 +174,13 @@ void cfg80211_clear_ibss(struct net_device *dev, bool nowext); int cfg80211_leave_ibss(struct cfg80211_registered_device *rdev, struct net_device *dev, bool nowext); +/* SME */ +int cfg80211_connect(struct cfg80211_registered_device *rdev, + struct net_device *dev, + struct cfg80211_connect_params *connect); +int cfg80211_disconnect(struct cfg80211_registered_device *rdev, + struct net_device *dev, u16 reason); + /* internal helpers */ int cfg80211_validate_key_settings(struct key_params *params, int key_idx, const u8 *mac_addr); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index bb8de268a6bf..89dd3793e03c 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -128,6 +128,9 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = { .len = sizeof(struct nl80211_sta_flag_update), }, [NL80211_ATTR_CONTROL_PORT] = { .type = NLA_FLAG }, + [NL80211_ATTR_PRIVACY] = { .type = NLA_FLAG }, + [NL80211_ATTR_CIPHER_SUITE_GROUP] = { .type = NLA_U32 }, + [NL80211_ATTR_WPA_VERSIONS] = { .type = NLA_U32 }, }; /* IE validation */ @@ -347,6 +350,17 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, CMD(join_ibss, JOIN_IBSS); #undef CMD + + if (dev->ops->connect) { + i++; + NLA_PUT_U32(msg, i, NL80211_CMD_CONNECT); + } + + if (dev->ops->disconnect) { + i++; + NLA_PUT_U32(msg, i, NL80211_CMD_DISCONNECT); + } + nla_nest_end(msg, nl_cmds); return genlmsg_end(msg, hdr); @@ -3001,12 +3015,31 @@ static int nl80211_dump_scan(struct sk_buff *skb, static bool nl80211_valid_auth_type(enum nl80211_auth_type auth_type) { - return auth_type == NL80211_AUTHTYPE_OPEN_SYSTEM || - auth_type == NL80211_AUTHTYPE_SHARED_KEY || - auth_type == NL80211_AUTHTYPE_FT || - auth_type == NL80211_AUTHTYPE_NETWORK_EAP; + return auth_type <= NL80211_AUTHTYPE_MAX; +} + +static bool nl80211_valid_wpa_versions(u32 wpa_versions) +{ + return !(wpa_versions & ~(NL80211_WPA_VERSION_1 | + NL80211_WPA_VERSION_2)); +} + +static bool nl80211_valid_akm_suite(u32 akm) +{ + return akm == WLAN_AKM_SUITE_8021X || + akm == WLAN_AKM_SUITE_PSK; +} + +static bool nl80211_valid_cipher_suite(u32 cipher) +{ + return cipher == WLAN_CIPHER_SUITE_WEP40 || + cipher == WLAN_CIPHER_SUITE_WEP104 || + cipher == WLAN_CIPHER_SUITE_TKIP || + cipher == WLAN_CIPHER_SUITE_CCMP || + cipher == WLAN_CIPHER_SUITE_AES_CMAC; } + static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *drv; @@ -3086,6 +3119,68 @@ unlock_rtnl: return err; } +static int nl80211_crypto_settings(struct genl_info *info, + struct cfg80211_crypto_settings *settings) +{ + settings->control_port = info->attrs[NL80211_ATTR_CONTROL_PORT]; + + if (info->attrs[NL80211_ATTR_CIPHER_SUITES_PAIRWISE]) { + void *data; + int len, i; + + data = nla_data(info->attrs[NL80211_ATTR_CIPHER_SUITES_PAIRWISE]); + len = nla_len(info->attrs[NL80211_ATTR_CIPHER_SUITES_PAIRWISE]); + settings->n_ciphers_pairwise = len / sizeof(u32); + + if (len % sizeof(u32)) + return -EINVAL; + + if (settings->n_ciphers_pairwise > NL80211_MAX_NR_CIPHER_SUITES) + return -EINVAL; + + memcpy(settings->ciphers_pairwise, data, len); + + for (i = 0; i < settings->n_ciphers_pairwise; i++) + if (!nl80211_valid_cipher_suite( + settings->ciphers_pairwise[i])) + return -EINVAL; + } + + if (info->attrs[NL80211_ATTR_CIPHER_SUITE_GROUP]) { + settings->cipher_group = + nla_get_u32(info->attrs[NL80211_ATTR_CIPHER_SUITE_GROUP]); + if (!nl80211_valid_cipher_suite(settings->cipher_group)) + return -EINVAL; + } + + if (info->attrs[NL80211_ATTR_WPA_VERSIONS]) { + settings->wpa_versions = + nla_get_u32(info->attrs[NL80211_ATTR_WPA_VERSIONS]); + if (!nl80211_valid_wpa_versions(settings->wpa_versions)) + return -EINVAL; + } + + if (info->attrs[NL80211_ATTR_AKM_SUITES]) { + void *data; + int len, i; + + data = nla_data(info->attrs[NL80211_ATTR_AKM_SUITES]); + len = nla_len(info->attrs[NL80211_ATTR_AKM_SUITES]); + settings->n_akm_suites = len / sizeof(u32); + + if (len % sizeof(u32)) + return -EINVAL; + + memcpy(settings->akm_suites, data, len); + + for (i = 0; i < settings->n_ciphers_pairwise; i++) + if (!nl80211_valid_akm_suite(settings->akm_suites[i])) + return -EINVAL; + } + + return 0; +} + static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *drv; @@ -3156,9 +3251,9 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) } } - req.control_port = info->attrs[NL80211_ATTR_CONTROL_PORT]; - - err = drv->ops->assoc(&drv->wiphy, dev, &req); + err = nl80211_crypto_settings(info, &req.crypto); + if (!err) + err = drv->ops->assoc(&drv->wiphy, dev, &req); out: cfg80211_put_dev(drv); @@ -3538,6 +3633,130 @@ void cfg80211_testmode_event(struct sk_buff *skb, gfp_t gfp) EXPORT_SYMBOL(cfg80211_testmode_event); #endif +static int nl80211_connect(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *drv; + struct net_device *dev; + struct cfg80211_connect_params connect; + struct wiphy *wiphy; + int err; + + memset(&connect, 0, sizeof(connect)); + + if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_IE])) + return -EINVAL; + + if (!info->attrs[NL80211_ATTR_SSID] || + !nla_len(info->attrs[NL80211_ATTR_SSID])) + return -EINVAL; + + if (info->attrs[NL80211_ATTR_AUTH_TYPE]) { + connect.auth_type = + nla_get_u32(info->attrs[NL80211_ATTR_AUTH_TYPE]); + if (!nl80211_valid_auth_type(connect.auth_type)) + return -EINVAL; + } else + connect.auth_type = NL80211_AUTHTYPE_AUTOMATIC; + + connect.privacy = info->attrs[NL80211_ATTR_PRIVACY]; + + err = nl80211_crypto_settings(info, &connect.crypto); + if (err) + return err; + rtnl_lock(); + + err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); + if (err) + goto unlock_rtnl; + + if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION) { + err = -EOPNOTSUPP; + goto out; + } + + if (!netif_running(dev)) { + err = -ENETDOWN; + goto out; + } + + wiphy = &drv->wiphy; + + connect.bssid = NULL; + connect.channel = NULL; + connect.auth_type = NL80211_AUTHTYPE_OPEN_SYSTEM; + + if (info->attrs[NL80211_ATTR_MAC]) + connect.bssid = nla_data(info->attrs[NL80211_ATTR_MAC]); + connect.ssid = nla_data(info->attrs[NL80211_ATTR_SSID]); + connect.ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]); + + if (info->attrs[NL80211_ATTR_IE]) { + connect.ie = nla_data(info->attrs[NL80211_ATTR_IE]); + connect.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]); + } + + if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) { + connect.channel = + ieee80211_get_channel(wiphy, + nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ])); + if (!connect.channel || + connect.channel->flags & IEEE80211_CHAN_DISABLED) { + err = -EINVAL; + goto out; + } + } + + err = cfg80211_connect(drv, dev, &connect); + +out: + cfg80211_put_dev(drv); + dev_put(dev); +unlock_rtnl: + rtnl_unlock(); + return err; +} + +static int nl80211_disconnect(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *drv; + struct net_device *dev; + int err; + u16 reason; + + if (!info->attrs[NL80211_ATTR_REASON_CODE]) + reason = WLAN_REASON_DEAUTH_LEAVING; + else + reason = nla_get_u16(info->attrs[NL80211_ATTR_REASON_CODE]); + + if (reason == 0) + return -EINVAL; + + rtnl_lock(); + + err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); + if (err) + goto unlock_rtnl; + + if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION) { + err = -EOPNOTSUPP; + goto out; + } + + if (!netif_running(dev)) { + err = -ENETDOWN; + goto out; + } + + err = cfg80211_disconnect(drv, dev, reason); + +out: + cfg80211_put_dev(drv); + dev_put(dev); +unlock_rtnl: + rtnl_unlock(); + return err; +} + static struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_GET_WIPHY, @@ -3759,6 +3978,18 @@ static struct genl_ops nl80211_ops[] = { .flags = GENL_ADMIN_PERM, }, #endif + { + .cmd = NL80211_CMD_CONNECT, + .doit = nl80211_connect, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = NL80211_CMD_DISCONNECT, + .doit = nl80211_disconnect, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + }, }; static struct genl_multicast_group nl80211_mlme_mcgrp = { .name = "mlme", @@ -4077,6 +4308,129 @@ void nl80211_send_assoc_timeout(struct cfg80211_registered_device *rdev, addr, gfp); } +void nl80211_send_connect_result(struct cfg80211_registered_device *rdev, + struct net_device *netdev, const u8 *bssid, + const u8 *req_ie, size_t req_ie_len, + const u8 *resp_ie, size_t resp_ie_len, + u16 status, gfp_t gfp) +{ + struct sk_buff *msg; + void *hdr; + + msg = nlmsg_new(NLMSG_GOODSIZE, gfp); + if (!msg) + return; + + hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_CONNECT); + if (!hdr) { + nlmsg_free(msg); + return; + } + + NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx); + NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex); + if (bssid) + NLA_PUT(msg, NL80211_ATTR_MAC, ETH_ALEN, bssid); + NLA_PUT_U16(msg, NL80211_ATTR_STATUS_CODE, status); + if (req_ie) + NLA_PUT(msg, NL80211_ATTR_REQ_IE, req_ie_len, req_ie); + if (resp_ie) + NLA_PUT(msg, NL80211_ATTR_RESP_IE, resp_ie_len, resp_ie); + + if (genlmsg_end(msg, hdr) < 0) { + nlmsg_free(msg); + return; + } + + genlmsg_multicast(msg, 0, nl80211_mlme_mcgrp.id, gfp); + return; + + nla_put_failure: + genlmsg_cancel(msg, hdr); + nlmsg_free(msg); + +} + +void nl80211_send_roamed(struct cfg80211_registered_device *rdev, + struct net_device *netdev, const u8 *bssid, + const u8 *req_ie, size_t req_ie_len, + const u8 *resp_ie, size_t resp_ie_len, gfp_t gfp) +{ + struct sk_buff *msg; + void *hdr; + + msg = nlmsg_new(NLMSG_GOODSIZE, gfp); + if (!msg) + return; + + hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_ROAM); + if (!hdr) { + nlmsg_free(msg); + return; + } + + NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx); + NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex); + NLA_PUT(msg, NL80211_ATTR_MAC, ETH_ALEN, bssid); + if (req_ie) + NLA_PUT(msg, NL80211_ATTR_REQ_IE, req_ie_len, req_ie); + if (resp_ie) + NLA_PUT(msg, NL80211_ATTR_RESP_IE, resp_ie_len, resp_ie); + + if (genlmsg_end(msg, hdr) < 0) { + nlmsg_free(msg); + return; + } + + genlmsg_multicast(msg, 0, nl80211_mlme_mcgrp.id, gfp); + return; + + nla_put_failure: + genlmsg_cancel(msg, hdr); + nlmsg_free(msg); + +} + +void nl80211_send_disconnected(struct cfg80211_registered_device *rdev, + struct net_device *netdev, u16 reason, + u8 *ie, size_t ie_len, bool from_ap, gfp_t gfp) +{ + struct sk_buff *msg; + void *hdr; + + msg = nlmsg_new(NLMSG_GOODSIZE, gfp); + if (!msg) + return; + + hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_DISCONNECT); + if (!hdr) { + nlmsg_free(msg); + return; + } + + NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx); + NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex); + if (from_ap && reason) + NLA_PUT_U16(msg, NL80211_ATTR_REASON_CODE, reason); + if (from_ap) + NLA_PUT_FLAG(msg, NL80211_ATTR_DISCONNECTED_BY_AP); + if (ie) + NLA_PUT(msg, NL80211_ATTR_IE, ie_len, ie); + + if (genlmsg_end(msg, hdr) < 0) { + nlmsg_free(msg); + return; + } + + genlmsg_multicast(msg, 0, nl80211_mlme_mcgrp.id, gfp); + return; + + nla_put_failure: + genlmsg_cancel(msg, hdr); + nlmsg_free(msg); + +} + void nl80211_send_ibss_bssid(struct cfg80211_registered_device *rdev, struct net_device *netdev, const u8 *bssid, gfp_t gfp) diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h index 662c216e8d4f..cf3708b48c29 100644 --- a/net/wireless/nl80211.h +++ b/net/wireless/nl80211.h @@ -31,6 +31,19 @@ void nl80211_send_auth_timeout(struct cfg80211_registered_device *rdev, void nl80211_send_assoc_timeout(struct cfg80211_registered_device *rdev, struct net_device *netdev, const u8 *addr, gfp_t gfp); +void nl80211_send_connect_result(struct cfg80211_registered_device *rdev, + struct net_device *netdev, const u8 *bssid, + const u8 *req_ie, size_t req_ie_len, + const u8 *resp_ie, size_t resp_ie_len, + u16 status, gfp_t gfp); +void nl80211_send_roamed(struct cfg80211_registered_device *rdev, + struct net_device *netdev, const u8 *bssid, + const u8 *req_ie, size_t req_ie_len, + const u8 *resp_ie, size_t resp_ie_len, gfp_t gfp); +void nl80211_send_disconnected(struct cfg80211_registered_device *rdev, + struct net_device *netdev, u16 reason, + u8 *ie, size_t ie_len, bool from_ap, gfp_t gfp); + void nl80211_michael_mic_failure(struct cfg80211_registered_device *rdev, struct net_device *netdev, const u8 *addr, diff --git a/net/wireless/sme.c b/net/wireless/sme.c new file mode 100644 index 000000000000..fc117031d0bb --- /dev/null +++ b/net/wireless/sme.c @@ -0,0 +1,224 @@ +/* + * SME code for cfg80211's connect emulation. + * + * Copyright 2009 Johannes Berg + * Copyright (C) 2009 Intel Corporation. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include "nl80211.h" + + +void cfg80211_connect_result(struct net_device *dev, const u8 *bssid, + const u8 *req_ie, size_t req_ie_len, + const u8 *resp_ie, size_t resp_ie_len, + u16 status, gfp_t gfp) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct cfg80211_bss *bss; +#ifdef CONFIG_WIRELESS_EXT + union iwreq_data wrqu; +#endif + + if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION)) + return; + + if (WARN_ON(wdev->sme_state != CFG80211_SME_CONNECTING)) + return; + + if (wdev->current_bss) { + cfg80211_unhold_bss(wdev->current_bss); + cfg80211_put_bss(wdev->current_bss); + wdev->current_bss = NULL; + } + + if (status == WLAN_STATUS_SUCCESS) { + bss = cfg80211_get_bss(wdev->wiphy, NULL, bssid, + wdev->ssid, wdev->ssid_len, + WLAN_CAPABILITY_ESS, + WLAN_CAPABILITY_ESS); + + if (WARN_ON(!bss)) + return; + + cfg80211_hold_bss(bss); + wdev->current_bss = bss; + + wdev->sme_state = CFG80211_SME_CONNECTED; + } else { + wdev->sme_state = CFG80211_SME_IDLE; + } + + nl80211_send_connect_result(wiphy_to_dev(wdev->wiphy), dev, bssid, + req_ie, req_ie_len, resp_ie, resp_ie_len, + status, gfp); + +#ifdef CONFIG_WIRELESS_EXT + if (req_ie && status == WLAN_STATUS_SUCCESS) { + memset(&wrqu, 0, sizeof(wrqu)); + wrqu.data.length = req_ie_len; + wireless_send_event(dev, IWEVASSOCRESPIE, &wrqu, req_ie); + } + + if (resp_ie && status == WLAN_STATUS_SUCCESS) { + memset(&wrqu, 0, sizeof(wrqu)); + wrqu.data.length = resp_ie_len; + wireless_send_event(dev, IWEVASSOCRESPIE, &wrqu, resp_ie); + } + + memset(&wrqu, 0, sizeof(wrqu)); + wrqu.ap_addr.sa_family = ARPHRD_ETHER; + if (bssid) + memcpy(wrqu.ap_addr.sa_data, bssid, ETH_ALEN); + wireless_send_event(dev, SIOCGIWAP, &wrqu, NULL); +#endif +} +EXPORT_SYMBOL(cfg80211_connect_result); + +void cfg80211_roamed(struct net_device *dev, const u8 *bssid, + const u8 *req_ie, size_t req_ie_len, + const u8 *resp_ie, size_t resp_ie_len, gfp_t gfp) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct cfg80211_bss *bss; +#ifdef CONFIG_WIRELESS_EXT + union iwreq_data wrqu; +#endif + + if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION)) + return; + + if (WARN_ON(wdev->sme_state != CFG80211_SME_CONNECTED)) + return; + + /* internal error -- how did we get to CONNECTED w/o BSS? */ + if (WARN_ON(!wdev->current_bss)) { + return; + } + + cfg80211_unhold_bss(wdev->current_bss); + cfg80211_put_bss(wdev->current_bss); + wdev->current_bss = NULL; + + bss = cfg80211_get_bss(wdev->wiphy, NULL, bssid, + wdev->ssid, wdev->ssid_len, + WLAN_CAPABILITY_ESS, WLAN_CAPABILITY_ESS); + + if (WARN_ON(!bss)) + return; + + cfg80211_hold_bss(bss); + wdev->current_bss = bss; + + nl80211_send_roamed(wiphy_to_dev(wdev->wiphy), dev, bssid, + req_ie, req_ie_len, resp_ie, resp_ie_len, gfp); + +#ifdef CONFIG_WIRELESS_EXT + if (req_ie) { + memset(&wrqu, 0, sizeof(wrqu)); + wrqu.data.length = req_ie_len; + wireless_send_event(dev, IWEVASSOCRESPIE, &wrqu, req_ie); + } + + if (resp_ie) { + memset(&wrqu, 0, sizeof(wrqu)); + wrqu.data.length = resp_ie_len; + wireless_send_event(dev, IWEVASSOCRESPIE, &wrqu, resp_ie); + } + + memset(&wrqu, 0, sizeof(wrqu)); + wrqu.ap_addr.sa_family = ARPHRD_ETHER; + memcpy(wrqu.ap_addr.sa_data, bssid, ETH_ALEN); + wireless_send_event(dev, SIOCGIWAP, &wrqu, NULL); +#endif +} +EXPORT_SYMBOL(cfg80211_roamed); + +static void __cfg80211_disconnected(struct net_device *dev, gfp_t gfp, + u8 *ie, size_t ie_len, u16 reason, + bool from_ap) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; +#ifdef CONFIG_WIRELESS_EXT + union iwreq_data wrqu; +#endif + + if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION)) + return; + + if (WARN_ON(wdev->sme_state != CFG80211_SME_CONNECTED)) + return; + + if (wdev->current_bss) { + cfg80211_unhold_bss(wdev->current_bss); + cfg80211_put_bss(wdev->current_bss); + } + + wdev->current_bss = NULL; + wdev->sme_state = CFG80211_SME_IDLE; + + nl80211_send_disconnected(wiphy_to_dev(wdev->wiphy), dev, + reason, ie, ie_len, from_ap, gfp); + +#ifdef CONFIG_WIRELESS_EXT + memset(&wrqu, 0, sizeof(wrqu)); + wrqu.ap_addr.sa_family = ARPHRD_ETHER; + wireless_send_event(dev, SIOCGIWAP, &wrqu, NULL); +#endif +} + +void cfg80211_disconnected(struct net_device *dev, u16 reason, + u8 *ie, size_t ie_len, gfp_t gfp) +{ + __cfg80211_disconnected(dev, reason, ie, ie_len, true, gfp); +} +EXPORT_SYMBOL(cfg80211_disconnected); + +int cfg80211_connect(struct cfg80211_registered_device *rdev, + struct net_device *dev, + struct cfg80211_connect_params *connect) +{ + int err; + struct wireless_dev *wdev = dev->ieee80211_ptr; + + if (wdev->sme_state != CFG80211_SME_IDLE) + return -EALREADY; + + if (!rdev->ops->connect) { + return -EOPNOTSUPP; + } else { + wdev->sme_state = CFG80211_SME_CONNECTING; + err = rdev->ops->connect(&rdev->wiphy, dev, connect); + if (err) { + wdev->sme_state = CFG80211_SME_IDLE; + return err; + } + } + + memcpy(wdev->ssid, connect->ssid, connect->ssid_len); + wdev->ssid_len = connect->ssid_len; + + return 0; +} + +int cfg80211_disconnect(struct cfg80211_registered_device *rdev, + struct net_device *dev, u16 reason) +{ + int err; + + if (!rdev->ops->disconnect) { + return -EOPNOTSUPP; + } else { + err = rdev->ops->disconnect(&rdev->wiphy, dev, reason); + if (err) + return err; + } + + __cfg80211_disconnected(dev, 0, NULL, 0, false, GFP_KERNEL); + + return 0; +} -- cgit v1.2.3 From 1be491fca12ff599c37ceaf7e9042ebee9f0068e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 5 Jul 2009 14:51:06 +0200 Subject: rfkill: prep for rfkill API changes We've designed the /dev/rfkill API in a way that we can increase the event struct by adding members at the end, should it become necessary. To validate the events, userspace and the kernel need to have the proper event size to check for -- when reading from the other end they need to verify that it's at least version 1 of the event API, with the current struct size, so define a constant for that and make the code a little more 'future proof'. Not that I expect that we'll have to change the event size any time soon, but it's better to write the code in a way that lends itself to extending. Due to the current size of the event struct, the code is currently equivalent, but should the event struct ever need to be increased the new code might not need changing. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/rfkill.h | 14 ++++++++++++++ net/rfkill/core.c | 10 ++++++++-- 2 files changed, 22 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h index 2ce29831feb6..f3d5812693d6 100644 --- a/include/linux/rfkill.h +++ b/include/linux/rfkill.h @@ -82,6 +82,20 @@ struct rfkill_event { __u8 soft, hard; } __packed; +/* + * We are planning to be backward and forward compatible with changes + * to the event struct, by adding new, optional, members at the end. + * When reading an event (whether the kernel from userspace or vice + * versa) we need to accept anything that's at least as large as the + * version 1 event size, but might be able to accept other sizes in + * the future. + * + * One exception is the kernel -- we already have two event sizes in + * that we've made the 'hard' member optional since our only option + * is to ignore it anyway. + */ +#define RFKILL_EVENT_SIZE_V1 8 + /* ioctl for turning off rfkill-input (if present) */ #define RFKILL_IOC_MAGIC 'R' #define RFKILL_IOC_NOINPUT 1 diff --git a/net/rfkill/core.c b/net/rfkill/core.c index 79693fe2001e..47497c97c8d9 100644 --- a/net/rfkill/core.c +++ b/net/rfkill/core.c @@ -1076,10 +1076,16 @@ static ssize_t rfkill_fop_write(struct file *file, const char __user *buf, struct rfkill_event ev; /* we don't need the 'hard' variable but accept it */ - if (count < sizeof(ev) - 1) + if (count < RFKILL_EVENT_SIZE_V1 - 1) return -EINVAL; - if (copy_from_user(&ev, buf, sizeof(ev) - 1)) + /* + * Copy as much data as we can accept into our 'ev' buffer, + * but tell userspace how much we've copied so it can determine + * our API version even in a write() call, if it cares. + */ + count = min(count, sizeof(ev)); + if (copy_from_user(&ev, buf, count)) return -EFAULT; if (ev.op != RFKILL_OP_CHANGE && ev.op != RFKILL_OP_CHANGE_ALL) -- cgit v1.2.3 From 3e5d7649a64e558e4146ddfad4dfcf13fc65dd47 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 7 Jul 2009 14:37:26 +0200 Subject: cfg80211: let SME control reassociation vs. association Since we don't really know that well in the kernel, let's let the SME control whether it wants to use reassociation or not, by allowing it to give the previous BSSID in the associate() parameters. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/nl80211.h | 5 +++++ include/net/cfg80211.h | 3 ++- net/mac80211/cfg.c | 6 ++++++ net/mac80211/mlme.c | 7 ------- net/wireless/core.h | 3 ++- net/wireless/mlme.c | 4 +++- net/wireless/nl80211.c | 10 +++++++--- net/wireless/sme.c | 8 +++++++- 8 files changed, 32 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index b34c17f52f3e..e496a2daf7ef 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -564,6 +564,9 @@ enum nl80211_commands { * @NL80211_ATTR_RESP_IE: (Re)association response information elements as * sent by peer, for ROAM and successful CONNECT events. * + * @NL80211_ATTR_PREV_BSSID: previous BSSID, to be used by in ASSOCIATE + * commands to specify using a reassociate frame + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -687,6 +690,8 @@ enum nl80211_attrs { NL80211_ATTR_REQ_IE, NL80211_ATTR_RESP_IE, + NL80211_ATTR_PREV_BSSID, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index ca986cc91098..71847d3c2640 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -664,10 +664,11 @@ struct cfg80211_auth_request { * @ie_len: Length of ie buffer in octets * @use_mfp: Use management frame protection (IEEE 802.11w) in this association * @crypto: crypto settings + * @prev_bssid: previous BSSID, if not %NULL use reassociate frame */ struct cfg80211_assoc_request { struct cfg80211_bss *bss; - const u8 *ie; + const u8 *ie, *prev_bssid; size_t ie_len; struct cfg80211_crypto_settings crypto; bool use_mfp; diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 0f29cd0580c9..e6d8860f26f2 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1256,6 +1256,12 @@ static int ieee80211_assoc(struct wiphy *wiphy, struct net_device *dev, sdata->u.mgd.flags &= ~IEEE80211_STA_MFP_ENABLED; } + if (req->prev_bssid) { + sdata->u.mgd.flags |= IEEE80211_STA_PREV_BSSID_SET; + memcpy(sdata->u.mgd.prev_bssid, req->prev_bssid, ETH_ALEN); + } else + sdata->u.mgd.flags &= ~IEEE80211_STA_PREV_BSSID_SET; + if (req->crypto.control_port) sdata->u.mgd.flags |= IEEE80211_STA_CONTROL_PORT; else diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index aa1829ae431d..24486455e505 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -879,9 +879,6 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, ieee80211_rx_bss_put(local, bss); } - ifmgd->flags |= IEEE80211_STA_PREV_BSSID_SET; - memcpy(ifmgd->prev_bssid, sdata->u.mgd.bssid, ETH_ALEN); - ifmgd->last_probe = jiffies; ieee80211_led_assoc(local, 1); @@ -1470,10 +1467,6 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, if (status_code != WLAN_STATUS_SUCCESS) { printk(KERN_DEBUG "%s: AP denied association (code=%d)\n", sdata->dev->name, status_code); - /* if this was a reassociation, ensure we try a "full" - * association next time. This works around some broken APs - * which do not correctly reject reassociation requests. */ - ifmgd->flags &= ~IEEE80211_STA_PREV_BSSID_SET; cfg80211_send_rx_assoc(sdata->dev, (u8 *) mgmt, len, GFP_KERNEL); /* Wait for SME to decide what to do next */ diff --git a/net/wireless/core.h b/net/wireless/core.h index 82918f5896a5..4554453c116a 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -202,7 +202,8 @@ int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev, const u8 *ie, int ie_len); int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, struct net_device *dev, struct ieee80211_channel *chan, - const u8 *bssid, const u8 *ssid, int ssid_len, + const u8 *bssid, const u8 *prev_bssid, + const u8 *ssid, int ssid_len, const u8 *ie, int ie_len, bool use_mfp, struct cfg80211_crypto_settings *crypt); int cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev, diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 020f33b38467..087d3377958f 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -335,7 +335,8 @@ int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev, int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, struct net_device *dev, struct ieee80211_channel *chan, - const u8 *bssid, const u8 *ssid, int ssid_len, + const u8 *bssid, const u8 *prev_bssid, + const u8 *ssid, int ssid_len, const u8 *ie, int ie_len, bool use_mfp, struct cfg80211_crypto_settings *crypt) { @@ -353,6 +354,7 @@ int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, req.ie_len = ie_len; memcpy(&req.crypto, crypt, sizeof(req.crypto)); req.use_mfp = use_mfp; + req.prev_bssid = prev_bssid; req.bss = cfg80211_get_bss(&rdev->wiphy, chan, bssid, ssid, ssid_len, WLAN_CAPABILITY_ESS, WLAN_CAPABILITY_ESS); if (!req.bss) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 723512b48f2e..44c520c264fc 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -71,6 +71,7 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = { [NL80211_ATTR_IFNAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ-1 }, [NL80211_ATTR_MAC] = { .type = NLA_BINARY, .len = ETH_ALEN }, + [NL80211_ATTR_PREV_BSSID] = { .type = NLA_BINARY, .len = ETH_ALEN }, [NL80211_ATTR_KEY_DATA] = { .type = NLA_BINARY, .len = WLAN_MAX_KEY_LEN }, @@ -3187,7 +3188,7 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) struct net_device *dev; struct cfg80211_crypto_settings crypto; struct ieee80211_channel *chan; - const u8 *bssid, *ssid, *ie = NULL; + const u8 *bssid, *ssid, *ie = NULL, *prev_bssid = NULL; int err, ssid_len, ie_len = 0; bool use_mfp = false; @@ -3248,10 +3249,13 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) } } + if (info->attrs[NL80211_ATTR_PREV_BSSID]) + prev_bssid = nla_data(info->attrs[NL80211_ATTR_PREV_BSSID]); + err = nl80211_crypto_settings(info, &crypto, 1); if (!err) - err = cfg80211_mlme_assoc(rdev, dev, chan, bssid, ssid, - ssid_len, ie, ie_len, use_mfp, + err = cfg80211_mlme_assoc(rdev, dev, chan, bssid, prev_bssid, + ssid, ssid_len, ie, ie_len, use_mfp, &crypto); out: diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 412161f7b08e..066a19ef9d73 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -125,8 +125,14 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev) case CFG80211_CONN_ASSOCIATE_NEXT: BUG_ON(!drv->ops->assoc); wdev->conn->state = CFG80211_CONN_ASSOCIATING; + /* + * We could, later, implement roaming here and then actually + * set prev_bssid to non-NULL. But then we need to be aware + * that some APs don't like that -- so we'd need to retry + * the association. + */ err = cfg80211_mlme_assoc(drv, wdev->netdev, - params->channel, params->bssid, + params->channel, params->bssid, NULL, params->ssid, params->ssid_len, params->ie, params->ie_len, false, ¶ms->crypto); -- cgit v1.2.3 From 5a421ce3c062a87db0a9e7f2a0a7ee0a5b869aab Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Fri, 10 Jul 2009 12:37:40 +0300 Subject: nfsd41: gather and report statistics also for v4.1 ops Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields --- include/linux/nfs4.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index bd2eba530667..aff924a24abb 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -234,7 +234,7 @@ enum nfs_opnum4 { Needs to be updated if more operations are defined in future.*/ #define FIRST_NFS4_OP OP_ACCESS -#define LAST_NFS4_OP OP_RELEASE_LOCKOWNER +#define LAST_NFS4_OP OP_RECLAIM_COMPLETE enum nfsstat4 { NFS4_OK = 0, -- cgit v1.2.3 From aef73cfcb913eae3d0deeb60eb385f75039db40b Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 11 Jul 2009 22:22:14 +0800 Subject: crypto: async - Use kzfree for requests This patch changes the kfree call to kzfree for async requests. As the request may contain sensitive data it needs to be zeroed before it can be reallocated by others. Signed-off-by: Herbert Xu --- include/crypto/hash.h | 2 +- include/linux/crypto.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/crypto/hash.h b/include/crypto/hash.h index 3c4cce6a425c..f74214a4b01b 100644 --- a/include/crypto/hash.h +++ b/include/crypto/hash.h @@ -186,7 +186,7 @@ static inline struct ahash_request *ahash_request_alloc( static inline void ahash_request_free(struct ahash_request *req) { - kfree(req); + kzfree(req); } static inline struct ahash_request *ahash_request_cast( diff --git a/include/linux/crypto.h b/include/linux/crypto.h index ec29fa268b94..274f9c7da90c 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -770,7 +770,7 @@ static inline struct ablkcipher_request *ablkcipher_request_alloc( static inline void ablkcipher_request_free(struct ablkcipher_request *req) { - kfree(req); + kzfree(req); } static inline void ablkcipher_request_set_callback( @@ -901,7 +901,7 @@ static inline struct aead_request *aead_request_alloc(struct crypto_aead *tfm, static inline void aead_request_free(struct aead_request *req) { - kfree(req); + kzfree(req); } static inline void aead_request_set_callback(struct aead_request *req, -- cgit v1.2.3 From b3a633c8527ef155b1a4e22e8f5abc58f7af54c9 Mon Sep 17 00:00:00 2001 From: Julien Tinnes Date: Fri, 10 Jul 2009 10:46:30 -0700 Subject: personality handling: fix PER_CLEAR_ON_SETID for security reasons We have found that the current PER_CLEAR_ON_SETID mask on Linux doesn't include neither ADDR_COMPAT_LAYOUT, nor MMAP_PAGE_ZERO. The current mask is READ_IMPLIES_EXEC|ADDR_NO_RANDOMIZE. We believe it is important to add MMAP_PAGE_ZERO, because by using this personality it is possible to have the first page mapped inside a process running as setuid root. This could be used in those scenarios: - Exploiting a NULL pointer dereference issue in a setuid root binary - Bypassing the mmap_min_addr restrictions of the Linux kernel: by running a setuid binary that would drop privileges before giving us control back (for instance by loading a user-supplied library), we could get the first page mapped in a process we control. By further using mremap and mprotect on this mapping, we can then completely bypass the mmap_min_addr restrictions. Less importantly, we believe ADDR_COMPAT_LAYOUT should also be added since on x86 32bits it will in practice disable most of the address space layout randomization (only the stack will remain randomized). Signed-off-by: Julien Tinnes Signed-off-by: Tavis Ormandy Acked-by: Christoph Hellwig Acked-by: Kees Cook Signed-off-by: James Morris --- include/linux/personality.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/personality.h b/include/linux/personality.h index a84e9ff9b27e..b7f578dac544 100644 --- a/include/linux/personality.h +++ b/include/linux/personality.h @@ -40,7 +40,11 @@ enum { * Security-relevant compatibility flags that must be * cleared upon setuid or setgid exec: */ -#define PER_CLEAR_ON_SETID (READ_IMPLIES_EXEC|ADDR_NO_RANDOMIZE) +#define PER_CLEAR_ON_SETID \ + (READ_IMPLIES_EXEC | \ + ADDR_NO_RANDOMIZE | \ + ADDR_COMPAT_LAYOUT | \ + MMAP_PAGE_ZERO) /* * Personality types. -- cgit v1.2.3 From 440c1ce178d6a6743e02d136a55b2de3f6d62a20 Mon Sep 17 00:00:00 2001 From: Huang Weiyi Date: Fri, 10 Jul 2009 15:33:49 +0000 Subject: dropmon: remove duplicated #include Remove duplicated #include('s) in include/linux/net_dropmon.h Signed-off-by: Huang Weiyi Signed-off-by: David S. Miller --- include/linux/net_dropmon.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/net_dropmon.h b/include/linux/net_dropmon.h index 3ceb0cc1bc78..2a739462caeb 100644 --- a/include/linux/net_dropmon.h +++ b/include/linux/net_dropmon.h @@ -3,7 +3,6 @@ #include #include -#include struct net_dm_drop_point { __u8 pc[8]; -- cgit v1.2.3 From 88056ec346ccf41f63dbc7080b24b5fd19d1358d Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 14 Jul 2009 12:28:26 +0800 Subject: crypto: ahash - Convert to new style algorithms This patch converts crypto_ahash to the new style. The old ahash algorithm type is retained until the existing ahash implementations are also converted. All ahash users will automatically get the new crypto_ahash type. Signed-off-by: Herbert Xu --- crypto/ahash.c | 82 +++++++++++++++++++++---------- crypto/shash.c | 8 +-- include/crypto/hash.h | 109 +++++++++++++++++++++++++++++------------ include/crypto/internal/hash.h | 11 +++-- include/linux/crypto.h | 29 ++--------- 5 files changed, 148 insertions(+), 91 deletions(-) (limited to 'include/linux') diff --git a/crypto/ahash.c b/crypto/ahash.c index f3476374f764..838519386215 100644 --- a/crypto/ahash.c +++ b/crypto/ahash.c @@ -24,6 +24,12 @@ #include "internal.h" +static inline struct ahash_alg *crypto_ahash_alg(struct crypto_ahash *hash) +{ + return container_of(crypto_hash_alg_common(hash), struct ahash_alg, + halg); +} + static int hash_walk_next(struct crypto_hash_walk *walk) { unsigned int alignmask = walk->alignmask; @@ -169,30 +175,11 @@ static int ahash_nosetkey(struct crypto_ahash *tfm, const u8 *key, return -ENOSYS; } -int crypto_ahash_import(struct ahash_request *req, const u8 *in) -{ - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct ahash_alg *alg = crypto_ahash_alg(tfm); - - memcpy(ahash_request_ctx(req), in, crypto_ahash_reqsize(tfm)); - - if (alg->reinit) - alg->reinit(req); - - return 0; -} -EXPORT_SYMBOL_GPL(crypto_ahash_import); - -static unsigned int crypto_ahash_ctxsize(struct crypto_alg *alg, u32 type, - u32 mask) -{ - return alg->cra_ctxsize; -} - static int crypto_init_ahash_ops(struct crypto_tfm *tfm, u32 type, u32 mask) { - struct ahash_alg *alg = &tfm->__crt_alg->cra_ahash; - struct ahash_tfm *crt = &tfm->crt_ahash; + struct old_ahash_alg *alg = &tfm->__crt_alg->cra_ahash; + struct crypto_ahash *crt = __crypto_ahash_cast(tfm); + struct ahash_alg *nalg = crypto_ahash_alg(crt); if (alg->digestsize > PAGE_SIZE / 8) return -EINVAL; @@ -204,9 +191,42 @@ static int crypto_init_ahash_ops(struct crypto_tfm *tfm, u32 type, u32 mask) crt->setkey = alg->setkey ? ahash_setkey : ahash_nosetkey; crt->digestsize = alg->digestsize; + nalg->setkey = alg->setkey; + nalg->halg.digestsize = alg->digestsize; + return 0; } +static int crypto_ahash_init_tfm(struct crypto_tfm *tfm) +{ + struct crypto_ahash *hash = __crypto_ahash_cast(tfm); + struct ahash_alg *alg = crypto_ahash_alg(hash); + struct old_ahash_alg *oalg = crypto_old_ahash_alg(hash); + + if (tfm->__crt_alg->cra_type != &crypto_ahash_type) + return crypto_init_shash_ops_async(tfm); + + if (oalg->init) + return crypto_init_ahash_ops(tfm, 0, 0); + + hash->init = alg->init; + hash->update = alg->update; + hash->final = alg->final; + hash->digest = alg->digest; + hash->setkey = alg->setkey ? ahash_setkey : ahash_nosetkey; + hash->digestsize = alg->halg.digestsize; + + return 0; +} + +static unsigned int crypto_ahash_extsize(struct crypto_alg *alg) +{ + if (alg->cra_type == &crypto_ahash_type) + return alg->cra_ctxsize; + + return sizeof(struct crypto_shash *); +} + static void crypto_ahash_show(struct seq_file *m, struct crypto_alg *alg) __attribute__ ((unused)); static void crypto_ahash_show(struct seq_file *m, struct crypto_alg *alg) @@ -215,17 +235,29 @@ static void crypto_ahash_show(struct seq_file *m, struct crypto_alg *alg) seq_printf(m, "async : %s\n", alg->cra_flags & CRYPTO_ALG_ASYNC ? "yes" : "no"); seq_printf(m, "blocksize : %u\n", alg->cra_blocksize); - seq_printf(m, "digestsize : %u\n", alg->cra_ahash.digestsize); + seq_printf(m, "digestsize : %u\n", + __crypto_hash_alg_common(alg)->digestsize); } const struct crypto_type crypto_ahash_type = { - .ctxsize = crypto_ahash_ctxsize, - .init = crypto_init_ahash_ops, + .extsize = crypto_ahash_extsize, + .init_tfm = crypto_ahash_init_tfm, #ifdef CONFIG_PROC_FS .show = crypto_ahash_show, #endif + .maskclear = ~CRYPTO_ALG_TYPE_MASK, + .maskset = CRYPTO_ALG_TYPE_AHASH_MASK, + .type = CRYPTO_ALG_TYPE_AHASH, + .tfmsize = offsetof(struct crypto_ahash, base), }; EXPORT_SYMBOL_GPL(crypto_ahash_type); +struct crypto_ahash *crypto_alloc_ahash(const char *alg_name, u32 type, + u32 mask) +{ + return crypto_alloc_tfm(alg_name, &crypto_ahash_type, type, mask); +} +EXPORT_SYMBOL_GPL(crypto_alloc_ahash); + MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Asynchronous cryptographic hash type"); diff --git a/crypto/shash.c b/crypto/shash.c index 7063e1421504..615a5f4d9b7a 100644 --- a/crypto/shash.c +++ b/crypto/shash.c @@ -267,11 +267,11 @@ static void crypto_exit_shash_ops_async(struct crypto_tfm *tfm) crypto_free_shash(*ctx); } -static int crypto_init_shash_ops_async(struct crypto_tfm *tfm) +int crypto_init_shash_ops_async(struct crypto_tfm *tfm) { struct crypto_alg *calg = tfm->__crt_alg; struct shash_alg *alg = __crypto_shash_alg(calg); - struct ahash_tfm *crt = &tfm->crt_ahash; + struct crypto_ahash *crt = __crypto_ahash_cast(tfm); struct crypto_shash **ctx = crypto_tfm_ctx(tfm); struct crypto_shash *shash; @@ -428,8 +428,6 @@ static int crypto_init_shash_ops(struct crypto_tfm *tfm, u32 type, u32 mask) switch (mask & CRYPTO_ALG_TYPE_MASK) { case CRYPTO_ALG_TYPE_HASH_MASK: return crypto_init_shash_ops_compat(tfm); - case CRYPTO_ALG_TYPE_AHASH_MASK: - return crypto_init_shash_ops_async(tfm); } return -EINVAL; @@ -441,8 +439,6 @@ static unsigned int crypto_shash_ctxsize(struct crypto_alg *alg, u32 type, switch (mask & CRYPTO_ALG_TYPE_MASK) { case CRYPTO_ALG_TYPE_HASH_MASK: return sizeof(struct shash_desc *); - case CRYPTO_ALG_TYPE_AHASH_MASK: - return sizeof(struct crypto_shash *); } return 0; diff --git a/include/crypto/hash.h b/include/crypto/hash.h index fcc02d978231..262861d8f0cb 100644 --- a/include/crypto/hash.h +++ b/include/crypto/hash.h @@ -15,6 +15,39 @@ #include +struct crypto_ahash; + +struct hash_alg_common { + unsigned int digestsize; + unsigned int statesize; + + struct crypto_alg base; +}; + +struct ahash_request { + struct crypto_async_request base; + + unsigned int nbytes; + struct scatterlist *src; + u8 *result; + + void *__ctx[] CRYPTO_MINALIGN_ATTR; +}; + +struct ahash_alg { + int (*init)(struct ahash_request *req); + int (*update)(struct ahash_request *req); + int (*final)(struct ahash_request *req); + int (*finup)(struct ahash_request *req); + int (*digest)(struct ahash_request *req); + int (*export)(struct ahash_request *req, void *out); + int (*import)(struct ahash_request *req, const void *in); + int (*setkey)(struct crypto_ahash *tfm, const u8 *key, + unsigned int keylen); + + struct hash_alg_common halg; +}; + struct shash_desc { struct crypto_shash *tfm; u32 flags; @@ -37,6 +70,8 @@ struct shash_alg { unsigned int keylen); unsigned int descsize; + + /* These fields must match hash_alg_common. */ unsigned int digestsize; unsigned int statesize; @@ -44,6 +79,18 @@ struct shash_alg { }; struct crypto_ahash { + int (*init)(struct ahash_request *req); + int (*update)(struct ahash_request *req); + int (*final)(struct ahash_request *req); + int (*finup)(struct ahash_request *req); + int (*digest)(struct ahash_request *req); + int (*export)(struct ahash_request *req, void *out); + int (*import)(struct ahash_request *req, const void *in); + int (*setkey)(struct crypto_ahash *tfm, const u8 *key, + unsigned int keylen); + + unsigned int digestsize; + unsigned int reqsize; struct crypto_tfm base; }; @@ -54,19 +101,11 @@ struct crypto_shash { static inline struct crypto_ahash *__crypto_ahash_cast(struct crypto_tfm *tfm) { - return (struct crypto_ahash *)tfm; + return container_of(tfm, struct crypto_ahash, base); } -static inline struct crypto_ahash *crypto_alloc_ahash(const char *alg_name, - u32 type, u32 mask) -{ - type &= ~CRYPTO_ALG_TYPE_MASK; - mask &= ~CRYPTO_ALG_TYPE_MASK; - type |= CRYPTO_ALG_TYPE_AHASH; - mask |= CRYPTO_ALG_TYPE_AHASH_MASK; - - return __crypto_ahash_cast(crypto_alloc_base(alg_name, type, mask)); -} +struct crypto_ahash *crypto_alloc_ahash(const char *alg_name, u32 type, + u32 mask); static inline struct crypto_tfm *crypto_ahash_tfm(struct crypto_ahash *tfm) { @@ -75,7 +114,7 @@ static inline struct crypto_tfm *crypto_ahash_tfm(struct crypto_ahash *tfm) static inline void crypto_free_ahash(struct crypto_ahash *tfm) { - crypto_free_tfm(crypto_ahash_tfm(tfm)); + crypto_destroy_tfm(tfm, crypto_ahash_tfm(tfm)); } static inline unsigned int crypto_ahash_alignmask( @@ -84,14 +123,26 @@ static inline unsigned int crypto_ahash_alignmask( return crypto_tfm_alg_alignmask(crypto_ahash_tfm(tfm)); } -static inline struct ahash_tfm *crypto_ahash_crt(struct crypto_ahash *tfm) +static inline struct hash_alg_common *__crypto_hash_alg_common( + struct crypto_alg *alg) { - return &crypto_ahash_tfm(tfm)->crt_ahash; + return container_of(alg, struct hash_alg_common, base); +} + +static inline struct hash_alg_common *crypto_hash_alg_common( + struct crypto_ahash *tfm) +{ + return __crypto_hash_alg_common(crypto_ahash_tfm(tfm)->__crt_alg); } static inline unsigned int crypto_ahash_digestsize(struct crypto_ahash *tfm) { - return crypto_ahash_crt(tfm)->digestsize; + return tfm->digestsize; +} + +static inline unsigned int crypto_ahash_statesize(struct crypto_ahash *tfm) +{ + return crypto_hash_alg_common(tfm)->statesize; } static inline u32 crypto_ahash_get_flags(struct crypto_ahash *tfm) @@ -117,7 +168,7 @@ static inline struct crypto_ahash *crypto_ahash_reqtfm( static inline unsigned int crypto_ahash_reqsize(struct crypto_ahash *tfm) { - return crypto_ahash_crt(tfm)->reqsize; + return tfm->reqsize; } static inline void *ahash_request_ctx(struct ahash_request *req) @@ -128,41 +179,37 @@ static inline void *ahash_request_ctx(struct ahash_request *req) static inline int crypto_ahash_setkey(struct crypto_ahash *tfm, const u8 *key, unsigned int keylen) { - struct ahash_tfm *crt = crypto_ahash_crt(tfm); - - return crt->setkey(tfm, key, keylen); + return tfm->setkey(tfm, key, keylen); } static inline int crypto_ahash_digest(struct ahash_request *req) { - struct ahash_tfm *crt = crypto_ahash_crt(crypto_ahash_reqtfm(req)); - return crt->digest(req); + return crypto_ahash_reqtfm(req)->digest(req); } -static inline void crypto_ahash_export(struct ahash_request *req, u8 *out) +static inline int crypto_ahash_export(struct ahash_request *req, void *out) { - memcpy(out, ahash_request_ctx(req), - crypto_ahash_reqsize(crypto_ahash_reqtfm(req))); + return crypto_ahash_reqtfm(req)->export(req, out); } -int crypto_ahash_import(struct ahash_request *req, const u8 *in); +static inline int crypto_ahash_import(struct ahash_request *req, const void *in) +{ + return crypto_ahash_reqtfm(req)->import(req, in); +} static inline int crypto_ahash_init(struct ahash_request *req) { - struct ahash_tfm *crt = crypto_ahash_crt(crypto_ahash_reqtfm(req)); - return crt->init(req); + return crypto_ahash_reqtfm(req)->init(req); } static inline int crypto_ahash_update(struct ahash_request *req) { - struct ahash_tfm *crt = crypto_ahash_crt(crypto_ahash_reqtfm(req)); - return crt->update(req); + return crypto_ahash_reqtfm(req)->update(req); } static inline int crypto_ahash_final(struct ahash_request *req) { - struct ahash_tfm *crt = crypto_ahash_crt(crypto_ahash_reqtfm(req)); - return crt->final(req); + return crypto_ahash_reqtfm(req)->final(req); } static inline void ahash_request_set_tfm(struct ahash_request *req, diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h index 5e45818f3351..08bdffafefad 100644 --- a/include/crypto/internal/hash.h +++ b/include/crypto/internal/hash.h @@ -51,6 +51,9 @@ int crypto_hash_walk_first_compat(struct hash_desc *hdesc, struct crypto_hash_walk *walk, struct scatterlist *sg, unsigned int len); +int crypto_register_ahash(struct ahash_alg *alg); +int crypto_unregister_ahash(struct ahash_alg *alg); + int crypto_register_shash(struct shash_alg *alg); int crypto_unregister_shash(struct shash_alg *alg); int shash_register_instance(struct crypto_template *tmpl, @@ -66,12 +69,14 @@ struct shash_alg *shash_attr_alg(struct rtattr *rta, u32 type, u32 mask); int shash_ahash_update(struct ahash_request *req, struct shash_desc *desc); int shash_ahash_digest(struct ahash_request *req, struct shash_desc *desc); +int crypto_init_shash_ops_async(struct crypto_tfm *tfm); + static inline void *crypto_ahash_ctx(struct crypto_ahash *tfm) { - return crypto_tfm_ctx(&tfm->base); + return crypto_tfm_ctx(crypto_ahash_tfm(tfm)); } -static inline struct ahash_alg *crypto_ahash_alg( +static inline struct old_ahash_alg *crypto_old_ahash_alg( struct crypto_ahash *tfm) { return &crypto_ahash_tfm(tfm)->__crt_alg->cra_ahash; @@ -80,7 +85,7 @@ static inline struct ahash_alg *crypto_ahash_alg( static inline void crypto_ahash_set_reqsize(struct crypto_ahash *tfm, unsigned int reqsize) { - crypto_ahash_crt(tfm)->reqsize = reqsize; + tfm->reqsize = reqsize; } static inline int ahash_enqueue_request(struct crypto_queue *queue, diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 274f9c7da90c..9e7e9b62a3dc 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -120,6 +120,7 @@ struct crypto_rng; struct crypto_tfm; struct crypto_type; struct aead_givcrypt_request; +struct ahash_request; struct skcipher_givcrypt_request; typedef void (*crypto_completion_t)(struct crypto_async_request *req, int err); @@ -146,16 +147,6 @@ struct ablkcipher_request { void *__ctx[] CRYPTO_MINALIGN_ATTR; }; -struct ahash_request { - struct crypto_async_request base; - - unsigned int nbytes; - struct scatterlist *src; - u8 *result; - - void *__ctx[] CRYPTO_MINALIGN_ATTR; -}; - /** * struct aead_request - AEAD request * @base: Common attributes for async crypto requests @@ -220,7 +211,7 @@ struct ablkcipher_alg { unsigned int ivsize; }; -struct ahash_alg { +struct old_ahash_alg { int (*init)(struct ahash_request *req); int (*reinit)(struct ahash_request *req); int (*update)(struct ahash_request *req); @@ -346,7 +337,7 @@ struct crypto_alg { struct cipher_alg cipher; struct digest_alg digest; struct hash_alg hash; - struct ahash_alg ahash; + struct old_ahash_alg ahash; struct compress_alg compress; struct rng_alg rng; } cra_u; @@ -433,18 +424,6 @@ struct hash_tfm { unsigned int digestsize; }; -struct ahash_tfm { - int (*init)(struct ahash_request *req); - int (*update)(struct ahash_request *req); - int (*final)(struct ahash_request *req); - int (*digest)(struct ahash_request *req); - int (*setkey)(struct crypto_ahash *tfm, const u8 *key, - unsigned int keylen); - - unsigned int digestsize; - unsigned int reqsize; -}; - struct compress_tfm { int (*cot_compress)(struct crypto_tfm *tfm, const u8 *src, unsigned int slen, @@ -465,7 +444,6 @@ struct rng_tfm { #define crt_blkcipher crt_u.blkcipher #define crt_cipher crt_u.cipher #define crt_hash crt_u.hash -#define crt_ahash crt_u.ahash #define crt_compress crt_u.compress #define crt_rng crt_u.rng @@ -479,7 +457,6 @@ struct crypto_tfm { struct blkcipher_tfm blkcipher; struct cipher_tfm cipher; struct hash_tfm hash; - struct ahash_tfm ahash; struct compress_tfm compress; struct rng_tfm rng; } crt_u; -- cgit v1.2.3 From 500b3e3c3dc8e4845b77ae81e5b7b085ab183ce6 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 14 Jul 2009 20:29:57 +0800 Subject: crypto: ahash - Remove old_ahash_alg Now that all ahash implementations have been converted to the new ahash type, we can remove old_ahash_alg and its associated support. Signed-off-by: Herbert Xu --- crypto/ahash.c | 27 --------------------------- crypto/shash.c | 2 -- include/crypto/hash.h | 3 +-- include/crypto/internal/hash.h | 6 ------ include/linux/crypto.h | 16 ---------------- 5 files changed, 1 insertion(+), 53 deletions(-) (limited to 'include/linux') diff --git a/crypto/ahash.c b/crypto/ahash.c index 7f599d26086a..cc824ef25830 100644 --- a/crypto/ahash.c +++ b/crypto/ahash.c @@ -175,46 +175,19 @@ static int ahash_nosetkey(struct crypto_ahash *tfm, const u8 *key, return -ENOSYS; } -static int crypto_init_ahash_ops(struct crypto_tfm *tfm, u32 type, u32 mask) -{ - struct old_ahash_alg *alg = &tfm->__crt_alg->cra_ahash; - struct crypto_ahash *crt = __crypto_ahash_cast(tfm); - struct ahash_alg *nalg = crypto_ahash_alg(crt); - - if (alg->digestsize > PAGE_SIZE / 8) - return -EINVAL; - - crt->init = alg->init; - crt->update = alg->update; - crt->final = alg->final; - crt->digest = alg->digest; - crt->setkey = alg->setkey ? ahash_setkey : ahash_nosetkey; - crt->digestsize = alg->digestsize; - - nalg->setkey = alg->setkey; - nalg->halg.digestsize = alg->digestsize; - - return 0; -} - static int crypto_ahash_init_tfm(struct crypto_tfm *tfm) { struct crypto_ahash *hash = __crypto_ahash_cast(tfm); struct ahash_alg *alg = crypto_ahash_alg(hash); - struct old_ahash_alg *oalg = crypto_old_ahash_alg(hash); if (tfm->__crt_alg->cra_type != &crypto_ahash_type) return crypto_init_shash_ops_async(tfm); - if (oalg->init) - return crypto_init_ahash_ops(tfm, 0, 0); - hash->init = alg->init; hash->update = alg->update; hash->final = alg->final; hash->digest = alg->digest; hash->setkey = alg->setkey ? ahash_setkey : ahash_nosetkey; - hash->digestsize = alg->halg.digestsize; return 0; } diff --git a/crypto/shash.c b/crypto/shash.c index 615a5f4d9b7a..fd92c03b38fc 100644 --- a/crypto/shash.c +++ b/crypto/shash.c @@ -270,7 +270,6 @@ static void crypto_exit_shash_ops_async(struct crypto_tfm *tfm) int crypto_init_shash_ops_async(struct crypto_tfm *tfm) { struct crypto_alg *calg = tfm->__crt_alg; - struct shash_alg *alg = __crypto_shash_alg(calg); struct crypto_ahash *crt = __crypto_ahash_cast(tfm); struct crypto_shash **ctx = crypto_tfm_ctx(tfm); struct crypto_shash *shash; @@ -293,7 +292,6 @@ int crypto_init_shash_ops_async(struct crypto_tfm *tfm) crt->digest = shash_async_digest; crt->setkey = shash_async_setkey; - crt->digestsize = alg->digestsize; crt->reqsize = sizeof(struct shash_desc) + crypto_shash_descsize(shash); return 0; diff --git a/include/crypto/hash.h b/include/crypto/hash.h index 262861d8f0cb..45c2bddfdf32 100644 --- a/include/crypto/hash.h +++ b/include/crypto/hash.h @@ -89,7 +89,6 @@ struct crypto_ahash { int (*setkey)(struct crypto_ahash *tfm, const u8 *key, unsigned int keylen); - unsigned int digestsize; unsigned int reqsize; struct crypto_tfm base; }; @@ -137,7 +136,7 @@ static inline struct hash_alg_common *crypto_hash_alg_common( static inline unsigned int crypto_ahash_digestsize(struct crypto_ahash *tfm) { - return tfm->digestsize; + return crypto_hash_alg_common(tfm)->digestsize; } static inline unsigned int crypto_ahash_statesize(struct crypto_ahash *tfm) diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h index e3a82514d61e..179dd8fdfa14 100644 --- a/include/crypto/internal/hash.h +++ b/include/crypto/internal/hash.h @@ -109,12 +109,6 @@ static inline struct ahash_alg *__crypto_ahash_alg(struct crypto_alg *alg) halg); } -static inline struct old_ahash_alg *crypto_old_ahash_alg( - struct crypto_ahash *tfm) -{ - return &crypto_ahash_tfm(tfm)->__crt_alg->cra_ahash; -} - static inline void crypto_ahash_set_reqsize(struct crypto_ahash *tfm, unsigned int reqsize) { diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 9e7e9b62a3dc..fd929889e8dc 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -115,12 +115,10 @@ struct crypto_async_request; struct crypto_aead; struct crypto_blkcipher; struct crypto_hash; -struct crypto_ahash; struct crypto_rng; struct crypto_tfm; struct crypto_type; struct aead_givcrypt_request; -struct ahash_request; struct skcipher_givcrypt_request; typedef void (*crypto_completion_t)(struct crypto_async_request *req, int err); @@ -211,18 +209,6 @@ struct ablkcipher_alg { unsigned int ivsize; }; -struct old_ahash_alg { - int (*init)(struct ahash_request *req); - int (*reinit)(struct ahash_request *req); - int (*update)(struct ahash_request *req); - int (*final)(struct ahash_request *req); - int (*digest)(struct ahash_request *req); - int (*setkey)(struct crypto_ahash *tfm, const u8 *key, - unsigned int keylen); - - unsigned int digestsize; -}; - struct aead_alg { int (*setkey)(struct crypto_aead *tfm, const u8 *key, unsigned int keylen); @@ -309,7 +295,6 @@ struct rng_alg { #define cra_cipher cra_u.cipher #define cra_digest cra_u.digest #define cra_hash cra_u.hash -#define cra_ahash cra_u.ahash #define cra_compress cra_u.compress #define cra_rng cra_u.rng @@ -337,7 +322,6 @@ struct crypto_alg { struct cipher_alg cipher; struct digest_alg digest; struct hash_alg hash; - struct old_ahash_alg ahash; struct compress_alg compress; struct rng_alg rng; } cra_u; -- cgit v1.2.3 From 4bd9b0f4afc76cf972578c702e1bc1b6f2d10ba5 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Wed, 24 Jun 2009 15:37:45 -0400 Subject: nfsd41: use globals for DRC limits The version 4.1 DRC memory limit and tracking variables are server wide and session specific. Replace struct svc_serv fields with globals. Stop using the svc_serv sv_lock. Add a spinlock to serialize access to the DRC limit management variables which change on session creation and deletion (usage counter) or (future) administrative action to adjust the total DRC memory limit. Signed-off-by: Andy Adamson Signed-off-by: Benny Halevy --- fs/nfsd/nfs4state.c | 10 +++++----- fs/nfsd/nfssvc.c | 19 +++++++++++++++---- include/linux/nfsd/nfsd.h | 3 +++ include/linux/sunrpc/svc.h | 2 -- 4 files changed, 23 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 980a216a48c8..2e6a44e3d2fe 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -430,11 +430,11 @@ static int set_forechannel_maxreqs(struct nfsd4_channel_attrs *fchan) else if (fchan->maxreqs > NFSD_MAX_SLOTS_PER_SESSION) fchan->maxreqs = NFSD_MAX_SLOTS_PER_SESSION; - spin_lock(&nfsd_serv->sv_lock); - if (np + nfsd_serv->sv_drc_pages_used > nfsd_serv->sv_drc_max_pages) - np = nfsd_serv->sv_drc_max_pages - nfsd_serv->sv_drc_pages_used; - nfsd_serv->sv_drc_pages_used += np; - spin_unlock(&nfsd_serv->sv_lock); + spin_lock(&nfsd_drc_lock); + if (np + nfsd_drc_pages_used > nfsd_drc_max_pages) + np = nfsd_drc_max_pages - nfsd_drc_pages_used; + nfsd_drc_pages_used += np; + spin_unlock(&nfsd_drc_lock); if (np <= 0) { status = nfserr_resource; diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index d4c9884cd54b..78d8fcd883fb 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -67,6 +67,16 @@ struct timeval nfssvc_boot; DEFINE_MUTEX(nfsd_mutex); struct svc_serv *nfsd_serv; +/* + * nfsd_drc_lock protects nfsd_drc_max_pages and nfsd_drc_pages_used. + * nfsd_drc_max_pages limits the total amount of memory available for + * version 4.1 DRC caches. + * nfsd_drc_pages_used tracks the current version 4.1 DRC memory usage. + */ +spinlock_t nfsd_drc_lock; +unsigned int nfsd_drc_max_pages; +unsigned int nfsd_drc_pages_used; + #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) static struct svc_stat nfsd_acl_svcstats; static struct svc_version * nfsd_acl_version[] = { @@ -238,11 +248,12 @@ static void set_max_drc(void) { /* The percent of nr_free_buffer_pages used by the V4.1 server DRC */ #define NFSD_DRC_SIZE_SHIFT 7 - nfsd_serv->sv_drc_max_pages = nr_free_buffer_pages() + nfsd_drc_max_pages = nr_free_buffer_pages() >> NFSD_DRC_SIZE_SHIFT; - nfsd_serv->sv_drc_pages_used = 0; - dprintk("%s svc_drc_max_pages %u\n", __func__, - nfsd_serv->sv_drc_max_pages); + nfsd_drc_pages_used = 0; + spin_lock_init(&nfsd_drc_lock); + dprintk("%s nfsd_drc_max_pages %u\n", __func__, + nfsd_drc_max_pages); } int nfsd_create_serv(void) diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h index 2b49d676d0c9..2571f856908f 100644 --- a/include/linux/nfsd/nfsd.h +++ b/include/linux/nfsd/nfsd.h @@ -56,6 +56,9 @@ extern struct svc_version nfsd_version2, nfsd_version3, extern u32 nfsd_supported_minorversion; extern struct mutex nfsd_mutex; extern struct svc_serv *nfsd_serv; +extern spinlock_t nfsd_drc_lock; +extern unsigned int nfsd_drc_max_pages; +extern unsigned int nfsd_drc_pages_used; extern struct seq_operations nfs_exports_op; diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index ea8009695c69..52e8cb0a7569 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -94,8 +94,6 @@ struct svc_serv { struct module * sv_module; /* optional module to count when * adding threads */ svc_thread_fn sv_function; /* main function for threads */ - unsigned int sv_drc_max_pages; /* Total pages for DRC */ - unsigned int sv_drc_pages_used;/* DRC pages used */ #if defined(CONFIG_NFS_V4_1) struct list_head sv_cb_list; /* queue for callback requests * that arrive over the same -- cgit v1.2.3 From 3d39cecc4841e8d4c4abdb401d10180f5faaded0 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 8 Jul 2009 15:23:30 +0100 Subject: intel-iommu: Remove superfluous iova_alloc_lock from IOVA code We only ever obtain this lock immediately before the iova_rbtree_lock, and release it immediately after the iova_rbtree_lock. So ditch it and just use iova_rbtree_lock. [v2: Remove the lockdep bits this time too] Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 3 --- drivers/pci/iova.c | 16 ++++------------ include/linux/iova.h | 1 - 3 files changed, 4 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index c5f7c73cbb55..d6a857397ec3 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -1309,7 +1309,6 @@ static void iommu_detach_domain(struct dmar_domain *domain, } static struct iova_domain reserved_iova_list; -static struct lock_class_key reserved_alloc_key; static struct lock_class_key reserved_rbtree_key; static void dmar_init_reserved_ranges(void) @@ -1320,8 +1319,6 @@ static void dmar_init_reserved_ranges(void) init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN); - lockdep_set_class(&reserved_iova_list.iova_alloc_lock, - &reserved_alloc_key); lockdep_set_class(&reserved_iova_list.iova_rbtree_lock, &reserved_rbtree_key); diff --git a/drivers/pci/iova.c b/drivers/pci/iova.c index 46dd440e2315..7914951ef29a 100644 --- a/drivers/pci/iova.c +++ b/drivers/pci/iova.c @@ -22,7 +22,6 @@ void init_iova_domain(struct iova_domain *iovad, unsigned long pfn_32bit) { - spin_lock_init(&iovad->iova_alloc_lock); spin_lock_init(&iovad->iova_rbtree_lock); iovad->rbroot = RB_ROOT; iovad->cached32_node = NULL; @@ -205,7 +204,6 @@ alloc_iova(struct iova_domain *iovad, unsigned long size, unsigned long limit_pfn, bool size_aligned) { - unsigned long flags; struct iova *new_iova; int ret; @@ -219,11 +217,9 @@ alloc_iova(struct iova_domain *iovad, unsigned long size, if (size_aligned) size = __roundup_pow_of_two(size); - spin_lock_irqsave(&iovad->iova_alloc_lock, flags); ret = __alloc_and_insert_iova_range(iovad, size, limit_pfn, new_iova, size_aligned); - spin_unlock_irqrestore(&iovad->iova_alloc_lock, flags); if (ret) { free_iova_mem(new_iova); return NULL; @@ -381,8 +377,7 @@ reserve_iova(struct iova_domain *iovad, struct iova *iova; unsigned int overlap = 0; - spin_lock_irqsave(&iovad->iova_alloc_lock, flags); - spin_lock(&iovad->iova_rbtree_lock); + spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); for (node = rb_first(&iovad->rbroot); node; node = rb_next(node)) { if (__is_range_overlap(node, pfn_lo, pfn_hi)) { iova = container_of(node, struct iova, node); @@ -402,8 +397,7 @@ reserve_iova(struct iova_domain *iovad, iova = __insert_new_range(iovad, pfn_lo, pfn_hi); finish: - spin_unlock(&iovad->iova_rbtree_lock); - spin_unlock_irqrestore(&iovad->iova_alloc_lock, flags); + spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); return iova; } @@ -420,8 +414,7 @@ copy_reserved_iova(struct iova_domain *from, struct iova_domain *to) unsigned long flags; struct rb_node *node; - spin_lock_irqsave(&from->iova_alloc_lock, flags); - spin_lock(&from->iova_rbtree_lock); + spin_lock_irqsave(&from->iova_rbtree_lock, flags); for (node = rb_first(&from->rbroot); node; node = rb_next(node)) { struct iova *iova = container_of(node, struct iova, node); struct iova *new_iova; @@ -430,6 +423,5 @@ copy_reserved_iova(struct iova_domain *from, struct iova_domain *to) printk(KERN_ERR "Reserve iova range %lx@%lx failed\n", iova->pfn_lo, iova->pfn_lo); } - spin_unlock(&from->iova_rbtree_lock); - spin_unlock_irqrestore(&from->iova_alloc_lock, flags); + spin_unlock_irqrestore(&from->iova_rbtree_lock, flags); } diff --git a/include/linux/iova.h b/include/linux/iova.h index 228f6c94b69c..76a0759e88ec 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -28,7 +28,6 @@ struct iova { /* holds all the iova translations for a domain */ struct iova_domain { - spinlock_t iova_alloc_lock;/* Lock to protect iova allocation */ spinlock_t iova_rbtree_lock; /* Lock to protect update of rbtree */ struct rb_root rbroot; /* iova domain rbtree root */ struct rb_node *cached32_node; /* Save last alloced node */ -- cgit v1.2.3 From a76761b621bcd8336065c4fe3a74f046858bc34c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 15 Jul 2009 23:35:14 +0900 Subject: percpu: add dummy pcpu_lpage_remapped() for !CONFIG_SMP !CONFIG_SMP was missing pcpu_lpage_remapped() definition causing build failure. Add dummy implementation. This was discovered by linux-next testing. Signed-off-by: Tejun Heo Cc: Randy Dunlap Cc: Kamalesh Babulal Cc: Stephen Rothwell --- include/linux/percpu.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 8ce91af4aa19..e134c8229631 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -184,6 +184,11 @@ static inline void free_percpu(void *p) static inline void __init setup_per_cpu_areas(void) { } +static inline void *pcpu_lpage_remapped(void *kaddr) +{ + return NULL; +} + #endif /* CONFIG_SMP */ #define alloc_percpu(type) (type *)__alloc_percpu(sizeof(type), \ -- cgit v1.2.3 From 1dacc76d0014a034b8aca14237c127d7c19d7726 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 1 Jul 2009 11:26:02 +0000 Subject: net/compat/wext: send different messages to compat tasks Wireless extensions have the unfortunate problem that events are multicast netlink messages, and are not independent of pointer size. Thus, currently 32-bit tasks on 64-bit platforms cannot properly receive events and fail with all kinds of strange problems, for instance wpa_supplicant never notices disassociations, due to the way the 64-bit event looks (to a 32-bit process), the fact that the address is all zeroes is lost, it thinks instead it is 00:00:00:00:01:00. The same problem existed with the ioctls, until David Miller fixed those some time ago in an heroic effort. A different problem caused by this is that we cannot send the ASSOCREQIE/ASSOCRESPIE events because sending them causes a 32-bit wpa_supplicant on a 64-bit system to overwrite its internal information, which is worse than it not getting the information at all -- so we currently resort to sending a custom string event that it then parses. This, however, has a severe size limitation we are frequently hitting with modern access points; this limitation would can be lifted after this patch by sending the correct binary, not custom, event. A similar problem apparently happens for some other netlink users on x86_64 with 32-bit tasks due to the alignment for 64-bit quantities. In order to fix these problems, I have implemented a way to send compat messages to tasks. When sending an event, we send the non-compat event data together with a compat event data in skb_shinfo(main_skb)->frag_list. Then, when the event is read from the socket, the netlink code makes sure to pass out only the skb that is compatible with the task. This approach was suggested by David Miller, my original approach required always sending two skbs but that had various small problems. To determine whether compat is needed or not, I have used the MSG_CMSG_COMPAT flag, and adjusted the call path for recv and recvfrom to include it, even if those calls do not have a cmsg parameter. I have not solved one small part of the problem, and I don't think it is necessary to: if a 32-bit application uses read() rather than any form of recvmsg() it will still get the wrong (64-bit) event. However, neither do applications actually do this, nor would it be a regression. Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- arch/mips/kernel/scall64-n32.S | 2 +- arch/mips/kernel/scall64-o32.S | 4 +-- arch/sparc/kernel/sys32.S | 2 +- include/linux/wireless.h | 8 +++++ net/Kconfig | 20 +++++++++++ net/compat.c | 17 +++++++-- net/netlink/af_netlink.c | 36 ++++++++++++++++++- net/wireless/wext.c | 78 ++++++++++++++++++++++++++++++++++++++++++ 8 files changed, 160 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S index 15874f9812cc..7c4a94f43706 100644 --- a/arch/mips/kernel/scall64-n32.S +++ b/arch/mips/kernel/scall64-n32.S @@ -164,7 +164,7 @@ EXPORT(sysn32_call_table) PTR sys_connect PTR sys_accept PTR sys_sendto - PTR sys_recvfrom + PTR compat_sys_recvfrom PTR compat_sys_sendmsg /* 6045 */ PTR compat_sys_recvmsg PTR sys_shutdown diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S index 781e0f1e9533..821fc978673d 100644 --- a/arch/mips/kernel/scall64-o32.S +++ b/arch/mips/kernel/scall64-o32.S @@ -378,8 +378,8 @@ sys_call_table: PTR sys_getsockname PTR sys_getsockopt PTR sys_listen - PTR sys_recv /* 4175 */ - PTR sys_recvfrom + PTR compat_sys_recv /* 4175 */ + PTR compat_sys_recvfrom PTR compat_sys_recvmsg PTR sys_send PTR compat_sys_sendmsg diff --git a/arch/sparc/kernel/sys32.S b/arch/sparc/kernel/sys32.S index f061c4dda9ef..3762f6c78944 100644 --- a/arch/sparc/kernel/sys32.S +++ b/arch/sparc/kernel/sys32.S @@ -121,7 +121,7 @@ SIGN2(sys32_syslog, sys_syslog, %o0, %o2) SIGN1(sys32_umask, sys_umask, %o0) SIGN3(sys32_tgkill, sys_tgkill, %o0, %o1, %o2) SIGN1(sys32_sendto, sys_sendto, %o0) -SIGN1(sys32_recvfrom, sys_recvfrom, %o0) +SIGN1(sys32_recvfrom, compat_sys_recvfrom, %o0) SIGN3(sys32_socket, sys_socket, %o0, %o1, %o2) SIGN2(sys32_connect, sys_connect, %o0, %o2) SIGN2(sys32_bind, sys_bind, %o0, %o2) diff --git a/include/linux/wireless.h b/include/linux/wireless.h index cb24204851f7..5b4c6c772a9b 100644 --- a/include/linux/wireless.h +++ b/include/linux/wireless.h @@ -1132,6 +1132,14 @@ struct __compat_iw_event { }; #define IW_EV_COMPAT_LCP_LEN offsetof(struct __compat_iw_event, pointer) #define IW_EV_COMPAT_POINT_OFF offsetof(struct compat_iw_point, length) + +/* Size of the various events for compat */ +#define IW_EV_COMPAT_CHAR_LEN (IW_EV_COMPAT_LCP_LEN + IFNAMSIZ) +#define IW_EV_COMPAT_UINT_LEN (IW_EV_COMPAT_LCP_LEN + sizeof(__u32)) +#define IW_EV_COMPAT_FREQ_LEN (IW_EV_COMPAT_LCP_LEN + sizeof(struct iw_freq)) +#define IW_EV_COMPAT_PARAM_LEN (IW_EV_COMPAT_LCP_LEN + sizeof(struct iw_param)) +#define IW_EV_COMPAT_ADDR_LEN (IW_EV_COMPAT_LCP_LEN + sizeof(struct sockaddr)) +#define IW_EV_COMPAT_QUAL_LEN (IW_EV_COMPAT_LCP_LEN + sizeof(struct iw_quality)) #define IW_EV_COMPAT_POINT_LEN \ (IW_EV_COMPAT_LCP_LEN + sizeof(struct compat_iw_point) - \ IW_EV_COMPAT_POINT_OFF) diff --git a/net/Kconfig b/net/Kconfig index 7051b9710675..041c35edb763 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -23,6 +23,26 @@ menuconfig NET if NET +config WANT_COMPAT_NETLINK_MESSAGES + bool + help + This option can be selected by other options that need compat + netlink messages. + +config COMPAT_NETLINK_MESSAGES + def_bool y + depends on COMPAT + depends on WIRELESS_EXT || WANT_COMPAT_NETLINK_MESSAGES + help + This option makes it possible to send different netlink messages + to tasks depending on whether the task is a compat task or not. To + achieve this, you need to set skb_shinfo(skb)->frag_list to the + compat skb before sending the skb, the netlink code will sort out + which message to actually pass to the task. + + Newly written code should NEVER need this option but do + compat-independent messages instead! + menu "Networking options" source "net/packet/Kconfig" diff --git a/net/compat.c b/net/compat.c index 8d739053afe4..12728b17a226 100644 --- a/net/compat.c +++ b/net/compat.c @@ -743,6 +743,18 @@ asmlinkage long compat_sys_recvmsg(int fd, struct compat_msghdr __user *msg, uns return sys_recvmsg(fd, (struct msghdr __user *)msg, flags | MSG_CMSG_COMPAT); } +asmlinkage long compat_sys_recv(int fd, void __user *buf, size_t len, unsigned flags) +{ + return sys_recv(fd, buf, len, flags | MSG_CMSG_COMPAT); +} + +asmlinkage long compat_sys_recvfrom(int fd, void __user *buf, size_t len, + unsigned flags, struct sockaddr __user *addr, + int __user *addrlen) +{ + return sys_recvfrom(fd, buf, len, flags | MSG_CMSG_COMPAT, addr, addrlen); +} + asmlinkage long compat_sys_socketcall(int call, u32 __user *args) { int ret; @@ -788,10 +800,11 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args) ret = sys_sendto(a0, compat_ptr(a1), a[2], a[3], compat_ptr(a[4]), a[5]); break; case SYS_RECV: - ret = sys_recv(a0, compat_ptr(a1), a[2], a[3]); + ret = compat_sys_recv(a0, compat_ptr(a1), a[2], a[3]); break; case SYS_RECVFROM: - ret = sys_recvfrom(a0, compat_ptr(a1), a[2], a[3], compat_ptr(a[4]), compat_ptr(a[5])); + ret = compat_sys_recvfrom(a0, compat_ptr(a1), a[2], a[3], + compat_ptr(a[4]), compat_ptr(a[5])); break; case SYS_SHUTDOWN: ret = sys_shutdown(a0,a1); diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index d46da6cb92e4..da3163d15ef0 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1361,7 +1361,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, struct netlink_sock *nlk = nlk_sk(sk); int noblock = flags&MSG_DONTWAIT; size_t copied; - struct sk_buff *skb; + struct sk_buff *skb, *frag __maybe_unused = NULL; int err; if (flags&MSG_OOB) @@ -1373,6 +1373,35 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, if (skb == NULL) goto out; +#ifdef CONFIG_COMPAT_NETLINK_MESSAGES + if (unlikely(skb_shinfo(skb)->frag_list)) { + bool need_compat = !!(flags & MSG_CMSG_COMPAT); + + /* + * If this skb has a frag_list, then here that means that + * we will have to use the frag_list skb for compat tasks + * and the regular skb for non-compat tasks. + * + * The skb might (and likely will) be cloned, so we can't + * just reset frag_list and go on with things -- we need to + * keep that. For the compat case that's easy -- simply get + * a reference to the compat skb and free the regular one + * including the frag. For the non-compat case, we need to + * avoid sending the frag to the user -- so assign NULL but + * restore it below before freeing the skb. + */ + if (need_compat) { + struct sk_buff *compskb = skb_shinfo(skb)->frag_list; + skb_get(compskb); + kfree_skb(skb); + skb = compskb; + } else { + frag = skb_shinfo(skb)->frag_list; + skb_shinfo(skb)->frag_list = NULL; + } + } +#endif + msg->msg_namelen = 0; copied = skb->len; @@ -1403,6 +1432,11 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, siocb->scm->creds = *NETLINK_CREDS(skb); if (flags & MSG_TRUNC) copied = skb->len; + +#ifdef CONFIG_COMPAT_NETLINK_MESSAGES + skb_shinfo(skb)->frag_list = frag; +#endif + skb_free_datagram(sk, skb); if (nlk->cb && atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) diff --git a/net/wireless/wext.c b/net/wireless/wext.c index ee35e6422f1f..3fe3c2c0ce11 100644 --- a/net/wireless/wext.c +++ b/net/wireless/wext.c @@ -417,6 +417,21 @@ static const int event_type_size[] = { IW_EV_QUAL_LEN, /* IW_HEADER_TYPE_QUAL */ }; +#ifdef CONFIG_COMPAT +static const int compat_event_type_size[] = { + IW_EV_COMPAT_LCP_LEN, /* IW_HEADER_TYPE_NULL */ + 0, + IW_EV_COMPAT_CHAR_LEN, /* IW_HEADER_TYPE_CHAR */ + 0, + IW_EV_COMPAT_UINT_LEN, /* IW_HEADER_TYPE_UINT */ + IW_EV_COMPAT_FREQ_LEN, /* IW_HEADER_TYPE_FREQ */ + IW_EV_COMPAT_ADDR_LEN, /* IW_HEADER_TYPE_ADDR */ + 0, + IW_EV_COMPAT_POINT_LEN, /* Without variable payload */ + IW_EV_COMPAT_PARAM_LEN, /* IW_HEADER_TYPE_PARAM */ + IW_EV_COMPAT_QUAL_LEN, /* IW_HEADER_TYPE_QUAL */ +}; +#endif /************************ COMMON SUBROUTINES ************************/ /* @@ -1348,6 +1363,22 @@ void wireless_send_event(struct net_device * dev, struct sk_buff *skb; struct nlmsghdr *nlh; struct nlattr *nla; +#ifdef CONFIG_COMPAT + struct __compat_iw_event *compat_event; + struct compat_iw_point compat_wrqu; + struct sk_buff *compskb; +#endif + + /* + * Nothing in the kernel sends scan events with data, be safe. + * This is necessary because we cannot fix up scan event data + * for compat, due to being contained in 'extra', but normally + * applications are required to retrieve the scan data anyway + * and no data is included in the event, this codifies that + * practice. + */ + if (WARN_ON(cmd == SIOCGIWSCAN && extra)) + extra = NULL; /* Get the description of the Event */ if (cmd <= SIOCIWLAST) { @@ -1446,7 +1477,54 @@ void wireless_send_event(struct net_device * dev, memcpy(((char *) event) + hdr_len, extra, extra_len); nlmsg_end(skb, nlh); +#ifdef CONFIG_COMPAT + hdr_len = compat_event_type_size[descr->header_type]; + event_len = hdr_len + extra_len; + compskb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); + if (!compskb) { + kfree_skb(skb); + return; + } + + /* Send via the RtNetlink event channel */ + nlh = rtnetlink_ifinfo_prep(dev, compskb); + if (WARN_ON(!nlh)) { + kfree_skb(skb); + kfree_skb(compskb); + return; + } + + /* Add the wireless events in the netlink packet */ + nla = nla_reserve(compskb, IFLA_WIRELESS, event_len); + if (!nla) { + kfree_skb(skb); + kfree_skb(compskb); + return; + } + compat_event = nla_data(nla); + + compat_event->len = event_len; + compat_event->cmd = cmd; + if (descr->header_type == IW_HEADER_TYPE_POINT) { + compat_wrqu.length = wrqu->data.length; + compat_wrqu.flags = wrqu->data.flags; + memcpy(&compat_event->pointer, + ((char *) &compat_wrqu) + IW_EV_COMPAT_POINT_OFF, + hdr_len - IW_EV_COMPAT_LCP_LEN); + if (extra_len) + memcpy(((char *) compat_event) + hdr_len, + extra, extra_len); + } else { + /* extra_len must be zero, so no if (extra) needed */ + memcpy(&compat_event->pointer, wrqu, + hdr_len - IW_EV_COMPAT_LCP_LEN); + } + + nlmsg_end(compskb, nlh); + + skb_shinfo(skb)->frag_list = compskb; +#endif skb_queue_tail(&dev_net(dev)->wext_nlevents, skb); schedule_work(&wireless_nlevent_work); } -- cgit v1.2.3 From 5bb459bb45d1ad3c177485dcf0af01580aa31125 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 10 Jul 2009 03:48:23 +0200 Subject: kernel: rename is_single_threaded(task) to current_is_single_threaded(void) - is_single_threaded(task) is not safe unless task == current, we can't use task->signal or task->mm. - it doesn't make sense unless task == current, the task can fork right after the check. Rename it to current_is_single_threaded() and kill the argument. Signed-off-by: Oleg Nesterov Acked-by: David Howells Signed-off-by: James Morris --- include/linux/sched.h | 2 +- lib/is_single_threaded.c | 3 ++- security/keys/process_keys.c | 2 +- security/selinux/hooks.c | 2 +- 4 files changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 16a982e389fb..0839a2c9b952 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2075,7 +2075,7 @@ static inline unsigned long wait_task_inactive(struct task_struct *p, #define for_each_process(p) \ for (p = &init_task ; (p = next_task(p)) != &init_task ; ) -extern bool is_single_threaded(struct task_struct *); +extern bool current_is_single_threaded(void); /* * Careful: do_each_thread/while_each_thread is a double loop so diff --git a/lib/is_single_threaded.c b/lib/is_single_threaded.c index 2762516e0a5e..434010980bdf 100644 --- a/lib/is_single_threaded.c +++ b/lib/is_single_threaded.c @@ -15,8 +15,9 @@ /* * Returns true if the task does not share ->mm with another thread/process. */ -bool is_single_threaded(struct task_struct *task) +bool current_is_single_threaded(void) { + struct task_struct *task = current; struct mm_struct *mm = task->mm; struct task_struct *p, *t; bool ret; diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index 276d27882ce8..ed929af466d3 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -702,7 +702,7 @@ long join_session_keyring(const char *name) /* only permit this if there's a single thread in the thread group - * this avoids us having to adjust the creds on all threads and risking * ENOMEM */ - if (!is_single_threaded(current)) + if (!current_is_single_threaded()) return -EMLINK; new = prepare_creds(); diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 2081055f6783..e65677da36bd 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -5187,7 +5187,7 @@ static int selinux_setprocattr(struct task_struct *p, /* Only allow single threaded processes to change context */ error = -EPERM; - if (!is_single_threaded(p)) { + if (!current_is_single_threaded()) { error = security_bounded_transition(tsec->sid, sid); if (error) goto abort_change; -- cgit v1.2.3 From e36aa25a533962b08402530e8443ac804a454e27 Mon Sep 17 00:00:00 2001 From: Sridhar Samudrala Date: Tue, 14 Jul 2009 14:21:04 +0000 Subject: tun: Allow tap device to send/receive UFO packets. - Allow setting UFO on tap device and handle UFO packets. Signed-off-by: Sridhar Samudrala --------------------------------------------------------- Signed-off-by: David S. Miller --- drivers/net/tun.c | 13 ++++++++++++- include/linux/if_tun.h | 1 + 2 files changed, 13 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/net/tun.c b/drivers/net/tun.c index dfc1054e4cbd..a998b6a9c245 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -641,6 +641,9 @@ static __inline__ ssize_t tun_get_user(struct tun_struct *tun, case VIRTIO_NET_HDR_GSO_TCPV6: skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6; break; + case VIRTIO_NET_HDR_GSO_UDP: + skb_shinfo(skb)->gso_type = SKB_GSO_UDP; + break; default: tun->dev->stats.rx_frame_errors++; kfree_skb(skb); @@ -726,6 +729,8 @@ static __inline__ ssize_t tun_put_user(struct tun_struct *tun, gso.gso_type = VIRTIO_NET_HDR_GSO_TCPV4; else if (sinfo->gso_type & SKB_GSO_TCPV6) gso.gso_type = VIRTIO_NET_HDR_GSO_TCPV6; + else if (sinfo->gso_type & SKB_GSO_UDP) + gso.gso_type = VIRTIO_NET_HDR_GSO_UDP; else BUG(); if (sinfo->gso_type & SKB_GSO_TCP_ECN) @@ -1073,7 +1078,8 @@ static int set_offload(struct net_device *dev, unsigned long arg) old_features = dev->features; /* Unset features, set them as we chew on the arg. */ features = (old_features & ~(NETIF_F_HW_CSUM|NETIF_F_SG|NETIF_F_FRAGLIST - |NETIF_F_TSO_ECN|NETIF_F_TSO|NETIF_F_TSO6)); + |NETIF_F_TSO_ECN|NETIF_F_TSO|NETIF_F_TSO6 + |NETIF_F_UFO)); if (arg & TUN_F_CSUM) { features |= NETIF_F_HW_CSUM|NETIF_F_SG|NETIF_F_FRAGLIST; @@ -1090,6 +1096,11 @@ static int set_offload(struct net_device *dev, unsigned long arg) features |= NETIF_F_TSO6; arg &= ~(TUN_F_TSO4|TUN_F_TSO6); } + + if (arg & TUN_F_UFO) { + features |= NETIF_F_UFO; + arg &= ~TUN_F_UFO; + } } /* This gives the user a way to test for new features in future by diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h index 915ba5789f0e..3f5fd523b49d 100644 --- a/include/linux/if_tun.h +++ b/include/linux/if_tun.h @@ -62,6 +62,7 @@ #define TUN_F_TSO4 0x02 /* I can handle TSO for IPv4 packets */ #define TUN_F_TSO6 0x04 /* I can handle TSO for IPv6 packets */ #define TUN_F_TSO_ECN 0x08 /* I can handle TSO with ECN bits. */ +#define TUN_F_UFO 0x10 /* I can handle UFO packets */ /* Protocol info prepended to the packets (when IFF_NO_PI is not set) */ #define TUN_PKT_STRIP 0x0001 -- cgit v1.2.3 From 0741241c6b80bfd58417e95de984d60c9e9ef2a0 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Fri, 17 Jul 2009 10:13:21 -0700 Subject: connector: make callback argument type explicit The connector documentation states that the argument to the callback function is always a pointer to a struct cn_msg, but rather than encode it in the API itself, it uses a void pointer everywhere. This doesn't make much sense to encode the pointer in documentation as it prevents proper C type checking from occurring and can easily allow people to use the wrong pointer type. So convert the argument type to an explicit struct cn_msg pointer. Signed-off-by: Mike Frysinger Signed-off-by: David S. Miller --- Documentation/connector/cn_test.c | 4 +--- drivers/connector/cn_proc.c | 3 +-- drivers/connector/cn_queue.c | 7 +++++-- drivers/connector/connector.c | 6 +++--- drivers/staging/dst/dcore.c | 3 +-- drivers/video/uvesafb.c | 3 +-- drivers/w1/w1_netlink.c | 3 +-- include/linux/connector.h | 6 +++--- 8 files changed, 16 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/Documentation/connector/cn_test.c b/Documentation/connector/cn_test.c index f688eba87704..50d5ce4899c8 100644 --- a/Documentation/connector/cn_test.c +++ b/Documentation/connector/cn_test.c @@ -32,10 +32,8 @@ static char cn_test_name[] = "cn_test"; static struct sock *nls; static struct timer_list cn_test_timer; -void cn_test_callback(void *data) +void cn_test_callback(struct cn_msg *msg) { - struct cn_msg *msg = (struct cn_msg *)data; - printk("%s: %lu: idx=%x, val=%x, seq=%u, ack=%u, len=%d: %s.\n", __func__, jiffies, msg->id.idx, msg->id.val, msg->seq, msg->ack, msg->len, (char *)msg->data); diff --git a/drivers/connector/cn_proc.c b/drivers/connector/cn_proc.c index c5afc98e2675..85e5dc0431fe 100644 --- a/drivers/connector/cn_proc.c +++ b/drivers/connector/cn_proc.c @@ -202,9 +202,8 @@ static void cn_proc_ack(int err, int rcvd_seq, int rcvd_ack) * cn_proc_mcast_ctl * @data: message sent from userspace via the connector */ -static void cn_proc_mcast_ctl(void *data) +static void cn_proc_mcast_ctl(struct cn_msg *msg) { - struct cn_msg *msg = data; enum proc_cn_mcast_op *mc_op = NULL; int err = 0; diff --git a/drivers/connector/cn_queue.c b/drivers/connector/cn_queue.c index c769ef269fb5..d478aefcd3be 100644 --- a/drivers/connector/cn_queue.c +++ b/drivers/connector/cn_queue.c @@ -87,7 +87,9 @@ void cn_queue_wrapper(struct work_struct *work) kfree(d->free); } -static struct cn_callback_entry *cn_queue_alloc_callback_entry(char *name, struct cb_id *id, void (*callback)(void *)) +static struct cn_callback_entry * +cn_queue_alloc_callback_entry(char *name, struct cb_id *id, + void (*callback)(struct cn_msg *)) { struct cn_callback_entry *cbq; @@ -120,7 +122,8 @@ int cn_cb_equal(struct cb_id *i1, struct cb_id *i2) return ((i1->idx == i2->idx) && (i1->val == i2->val)); } -int cn_queue_add_callback(struct cn_queue_dev *dev, char *name, struct cb_id *id, void (*callback)(void *)) +int cn_queue_add_callback(struct cn_queue_dev *dev, char *name, struct cb_id *id, + void (*callback)(struct cn_msg *)) { struct cn_callback_entry *cbq, *__cbq; int found = 0; diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c index fd336c5a9057..3f45669f5d76 100644 --- a/drivers/connector/connector.c +++ b/drivers/connector/connector.c @@ -269,7 +269,8 @@ static void cn_notify(struct cb_id *id, u32 notify_event) * * May sleep. */ -int cn_add_callback(struct cb_id *id, char *name, void (*callback)(void *)) +int cn_add_callback(struct cb_id *id, char *name, + void (*callback)(struct cn_msg *)) { int err; struct cn_dev *dev = &cdev; @@ -351,9 +352,8 @@ static int cn_ctl_msg_equals(struct cn_ctl_msg *m1, struct cn_ctl_msg *m2) * * Used for notification of a request's processing. */ -static void cn_callback(void *data) +static void cn_callback(struct cn_msg *msg) { - struct cn_msg *msg = data; struct cn_ctl_msg *ctl; struct cn_ctl_entry *ent; u32 size; diff --git a/drivers/staging/dst/dcore.c b/drivers/staging/dst/dcore.c index fad25b753042..84724187ec3e 100644 --- a/drivers/staging/dst/dcore.c +++ b/drivers/staging/dst/dcore.c @@ -846,10 +846,9 @@ static dst_command_func dst_commands[] = { /* * Configuration parser. */ -static void cn_dst_callback(void *data) +static void cn_dst_callback(struct cn_msg *msg) { struct dst_ctl *ctl; - struct cn_msg *msg = data; int err; struct dst_ctl_ack ack; struct dst_node *n = NULL, *tmp; diff --git a/drivers/video/uvesafb.c b/drivers/video/uvesafb.c index ca5b4643a401..e98baf6916b8 100644 --- a/drivers/video/uvesafb.c +++ b/drivers/video/uvesafb.c @@ -67,9 +67,8 @@ static DEFINE_MUTEX(uvfb_lock); * find the kernel part of the task struct, copy the registers and * the buffer contents and then complete the task. */ -static void uvesafb_cn_callback(void *data) +static void uvesafb_cn_callback(struct cn_msg *msg) { - struct cn_msg *msg = data; struct uvesafb_task *utask; struct uvesafb_ktask *task; diff --git a/drivers/w1/w1_netlink.c b/drivers/w1/w1_netlink.c index fdf72851c574..52ccb3d3a963 100644 --- a/drivers/w1/w1_netlink.c +++ b/drivers/w1/w1_netlink.c @@ -306,9 +306,8 @@ static int w1_netlink_send_error(struct cn_msg *rcmsg, struct w1_netlink_msg *rm return error; } -static void w1_cn_callback(void *data) +static void w1_cn_callback(struct cn_msg *msg) { - struct cn_msg *msg = data; struct w1_netlink_msg *m = (struct w1_netlink_msg *)(msg + 1); struct w1_netlink_cmd *cmd; struct w1_slave *sl; diff --git a/include/linux/connector.h b/include/linux/connector.h index b68d27850d51..47ebf416f512 100644 --- a/include/linux/connector.h +++ b/include/linux/connector.h @@ -136,7 +136,7 @@ struct cn_callback_data { void *ddata; void *callback_priv; - void (*callback) (void *); + void (*callback) (struct cn_msg *); void *free; }; @@ -167,11 +167,11 @@ struct cn_dev { struct cn_queue_dev *cbdev; }; -int cn_add_callback(struct cb_id *, char *, void (*callback) (void *)); +int cn_add_callback(struct cb_id *, char *, void (*callback) (struct cn_msg *)); void cn_del_callback(struct cb_id *); int cn_netlink_send(struct cn_msg *, u32, gfp_t); -int cn_queue_add_callback(struct cn_queue_dev *dev, char *name, struct cb_id *id, void (*callback)(void *)); +int cn_queue_add_callback(struct cn_queue_dev *dev, char *name, struct cb_id *id, void (*callback)(struct cn_msg *)); void cn_queue_del_callback(struct cn_queue_dev *dev, struct cb_id *id); int queue_cn_work(struct cn_callback_entry *cbq, struct work_struct *work); -- cgit v1.2.3 From e09758fae8ccde97e026c704319eaa18d488dc86 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 16 Jul 2009 15:44:29 +0200 Subject: sched: Cover the CONFIG_DEBUG_SPINLOCK_SLEEP off-case for __might_sleep() Cover the off case for __might_sleep(), so that we avoid #ifdefs in files that make use of it. Especially, this prepares for the __might_sleep() pull up on cond_resched(). Signed-off-by: Frederic Weisbecker Signed-off-by: Peter Zijlstra LKML-Reference: <1247725694-6082-3-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- include/linux/kernel.h | 1 + kernel/sched.c | 3 +-- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index d6320a3e8def..b804f694ae6a 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -139,6 +139,7 @@ extern int _cond_resched(void); # define might_sleep() \ do { __might_sleep(__FILE__, __LINE__); might_resched(); } while (0) #else + static inline void __might_sleep(char *file, int line) { } # define might_sleep() do { might_resched(); } while (0) #endif diff --git a/kernel/sched.c b/kernel/sched.c index 4d39e96044b9..370a6c31c5e1 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -6610,9 +6610,8 @@ static inline int should_resched(void) static void __cond_resched(void) { -#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP __might_sleep(__FILE__, __LINE__); -#endif + add_preempt_count(PREEMPT_ACTIVE); schedule(); sub_preempt_count(PREEMPT_ACTIVE); -- cgit v1.2.3 From e4aafea2d4bde8b44d6500c4ee7195bbfc51269e Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 16 Jul 2009 15:44:29 +0200 Subject: sched: Add a preempt count base offset to __might_sleep() Add a preempt count base offset to compare against the current preempt level count. It prepares to pull up the might_sleep check from cond_resched() to cond_resched_lock() and cond_resched_bh(). For these two helpers, we need to respectively ensure that once we'll unlock the given spinlock / reenable local softirqs, we will reach a sleepable state. Signed-off-by: Frederic Weisbecker [ Move and rename preempt_count_equals() ] Signed-off-by: Peter Zijlstra LKML-Reference: <1247725694-6082-4-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- include/linux/kernel.h | 6 +++--- kernel/sched.c | 15 +++++++++++---- 2 files changed, 14 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index b804f694ae6a..2b5b1e0899a8 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -125,7 +125,7 @@ extern int _cond_resched(void); #endif #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP - void __might_sleep(char *file, int line); + void __might_sleep(char *file, int line, int preempt_offset); /** * might_sleep - annotation for functions that can sleep * @@ -137,9 +137,9 @@ extern int _cond_resched(void); * supposed to. */ # define might_sleep() \ - do { __might_sleep(__FILE__, __LINE__); might_resched(); } while (0) + do { __might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0) #else - static inline void __might_sleep(char *file, int line) { } + static inline void __might_sleep(char *file, int line, int preempt_offset) { } # define might_sleep() do { might_resched(); } while (0) #endif diff --git a/kernel/sched.c b/kernel/sched.c index 370a6c31c5e1..3ff4d004bd95 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -6610,7 +6610,7 @@ static inline int should_resched(void) static void __cond_resched(void) { - __might_sleep(__FILE__, __LINE__); + __might_sleep(__FILE__, __LINE__, 0); add_preempt_count(PREEMPT_ACTIVE); schedule(); @@ -9429,13 +9429,20 @@ void __init sched_init(void) } #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP -void __might_sleep(char *file, int line) +static inline int preempt_count_equals(int preempt_offset) +{ + int nested = preempt_count() & ~PREEMPT_ACTIVE; + + return (nested == PREEMPT_INATOMIC_BASE + preempt_offset); +} + +void __might_sleep(char *file, int line, int preempt_offset) { #ifdef in_atomic static unsigned long prev_jiffy; /* ratelimiting */ - if ((!in_atomic() && !irqs_disabled()) || - system_state != SYSTEM_RUNNING || oops_in_progress) + if ((preempt_count_equals(preempt_offset) && !irqs_disabled()) || + system_state != SYSTEM_RUNNING || oops_in_progress) return; if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy) return; -- cgit v1.2.3 From 6f80bd985fe242c2e6a8b6209ed20b0495d3d63b Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 16 Jul 2009 15:44:29 +0200 Subject: sched: Remove the CONFIG_PREEMPT_BKL case definition of cond_resched() CONFIG_PREEMPT_BKL doesn't exist anymore. So remove this config-on case definition of cond_resched(). Reported-by: Peter Zijlstra Reported-by: Ingo Molnar Signed-off-by: Frederic Weisbecker Signed-off-by: Peter Zijlstra LKML-Reference: <1247725694-6082-5-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- include/linux/sched.h | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 9bada20e2b23..e2bdf18e05c4 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2285,17 +2285,12 @@ static inline int need_resched(void) * cond_resched_softirq() will enable bhs before scheduling. */ extern int _cond_resched(void); -#ifdef CONFIG_PREEMPT_BKL -static inline int cond_resched(void) -{ - return 0; -} -#else + static inline int cond_resched(void) { return _cond_resched(); } -#endif + extern int cond_resched_lock(spinlock_t * lock); extern int cond_resched_softirq(void); static inline int cond_resched_bkl(void) -- cgit v1.2.3 From 613afbf83298efaead05ebcac23d2285609d7160 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 16 Jul 2009 15:44:29 +0200 Subject: sched: Pull up the might_sleep() check into cond_resched() might_sleep() is called late-ish in cond_resched(), after the need_resched()/preempt enabled/system running tests are checked. It's better to check the sleeps while atomic earlier and not depend on some environment datas that reduce the chances to detect a problem. Also define cond_resched_*() helpers as macros, so that the FILE/LINE reported in the sleeping while atomic warning displays the real origin and not sched.h Changes in v2: - Call __might_sleep() directly instead of might_sleep() which may call cond_resched() - Turn cond_resched() into a macro so that the file:line couple reported refers to the caller of cond_resched() and not __cond_resched() itself. Changes in v3: - Also propagate this __might_sleep() pull up to cond_resched_lock() and cond_resched_softirq() Signed-off-by: Frederic Weisbecker Signed-off-by: Peter Zijlstra LKML-Reference: <1247725694-6082-6-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- fs/dcache.c | 1 + include/linux/sched.h | 29 +++++++++++++++++++---------- kernel/sched.c | 12 +++++------- 3 files changed, 25 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/fs/dcache.c b/fs/dcache.c index 9e5cd3c3a6ba..a100fa35a48f 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -32,6 +32,7 @@ #include #include #include +#include #include "internal.h" int sysctl_vfs_cache_pressure __read_mostly = 100; diff --git a/include/linux/sched.h b/include/linux/sched.h index e2bdf18e05c4..c41d424db887 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2286,17 +2286,26 @@ static inline int need_resched(void) */ extern int _cond_resched(void); -static inline int cond_resched(void) -{ - return _cond_resched(); -} +#define cond_resched() ({ \ + __might_sleep(__FILE__, __LINE__, 0); \ + _cond_resched(); \ +}) -extern int cond_resched_lock(spinlock_t * lock); -extern int cond_resched_softirq(void); -static inline int cond_resched_bkl(void) -{ - return _cond_resched(); -} +extern int __cond_resched_lock(spinlock_t *lock); + +#define cond_resched_lock(lock) ({ \ + __might_sleep(__FILE__, __LINE__, PREEMPT_OFFSET); \ + __cond_resched_lock(lock); \ +}) + +extern int __cond_resched_softirq(void); + +#define cond_resched_softirq() ({ \ + __might_sleep(__FILE__, __LINE__, SOFTIRQ_OFFSET); \ + __cond_resched_softirq(); \ +}) + +#define cond_resched_bkl() cond_resched() /* * Does a critical section need to be broken due to another diff --git a/kernel/sched.c b/kernel/sched.c index 3ff4d004bd95..1f7919add8ae 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -6610,8 +6610,6 @@ static inline int should_resched(void) static void __cond_resched(void) { - __might_sleep(__FILE__, __LINE__, 0); - add_preempt_count(PREEMPT_ACTIVE); schedule(); sub_preempt_count(PREEMPT_ACTIVE); @@ -6628,14 +6626,14 @@ int __sched _cond_resched(void) EXPORT_SYMBOL(_cond_resched); /* - * cond_resched_lock() - if a reschedule is pending, drop the given lock, + * __cond_resched_lock() - if a reschedule is pending, drop the given lock, * call schedule, and on return reacquire the lock. * * This works OK both with and without CONFIG_PREEMPT. We do strange low-level * operations here to prevent schedule() from being called twice (once via * spin_unlock(), once by hand). */ -int cond_resched_lock(spinlock_t *lock) +int __cond_resched_lock(spinlock_t *lock) { int resched = should_resched(); int ret = 0; @@ -6651,9 +6649,9 @@ int cond_resched_lock(spinlock_t *lock) } return ret; } -EXPORT_SYMBOL(cond_resched_lock); +EXPORT_SYMBOL(__cond_resched_lock); -int __sched cond_resched_softirq(void) +int __sched __cond_resched_softirq(void) { BUG_ON(!in_softirq()); @@ -6665,7 +6663,7 @@ int __sched cond_resched_softirq(void) } return 0; } -EXPORT_SYMBOL(cond_resched_softirq); +EXPORT_SYMBOL(__cond_resched_softirq); /** * yield - yield the current processor to other threads. -- cgit v1.2.3 From def01bc53d03881acfc393bd10a5c7575187e008 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 16 Jul 2009 15:44:29 +0200 Subject: sched: Convert the only user of cond_resched_bkl to use cond_resched() fs/locks.c:flock_lock_file() is the only user of cond_resched_bkl() This helper doesn't do anything more than cond_resched(). The latter naming is enough to explain that we are rescheduling if needed. The bkl suffix suggests another semantics but it's actually a synonym of cond_resched(). Reported-by: Peter Zijlstra Signed-off-by: Frederic Weisbecker Signed-off-by: Peter Zijlstra LKML-Reference: <1247725694-6082-7-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- fs/locks.c | 2 +- include/linux/sched.h | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/fs/locks.c b/fs/locks.c index b6440f52178f..2eb81975c99c 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -768,7 +768,7 @@ static int flock_lock_file(struct file *filp, struct file_lock *request) * give it the opportunity to lock the file. */ if (found) - cond_resched_bkl(); + cond_resched(); find_conflict: for_each_lock(inode, before) { diff --git a/include/linux/sched.h b/include/linux/sched.h index c41d424db887..cbbfca69aa4a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2305,8 +2305,6 @@ extern int __cond_resched_softirq(void); __cond_resched_softirq(); \ }) -#define cond_resched_bkl() cond_resched() - /* * Does a critical section need to be broken due to another * task waiting?: (technically does not depend on CONFIG_PREEMPT, -- cgit v1.2.3 From 719a72b7c75bb239ca6184190ab994b71a31c6dc Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Fri, 17 Jul 2009 14:59:55 +0000 Subject: usb: r8a66597-hcd platform data on_chip support Convert the r8a66597-hcd driver to use the on_chip flag from platform data to enable on chip behaviour instead of relying on CONFIG_SUPERH_ON_CHIP_R8A66597 ugliness. This makes the code cleaner and also allows us to support both external and internal r8a66597 with the same kernel. It also makes the Kconfig part more future proof since we with this patch can add support for new processors with on-chip r8a66597 without modifying the Kconfig. Signed-off-by: Magnus Damm Signed-off-by: Paul Mundt --- arch/sh/boards/mach-se/7724/setup.c | 1 + arch/sh/kernel/cpu/sh4a/setup-sh7366.c | 2 +- arch/sh/kernel/cpu/sh4a/setup-sh7723.c | 2 +- drivers/usb/host/Kconfig | 7 -- drivers/usb/host/r8a66597-hcd.c | 187 +++++++++++++++++++-------------- drivers/usb/host/r8a66597.h | 76 ++++++-------- include/linux/usb/r8a66597.h | 3 + 7 files changed, 147 insertions(+), 131 deletions(-) (limited to 'include/linux') diff --git a/arch/sh/boards/mach-se/7724/setup.c b/arch/sh/boards/mach-se/7724/setup.c index 8fed45a2fb85..4fb7e48e2843 100644 --- a/arch/sh/boards/mach-se/7724/setup.c +++ b/arch/sh/boards/mach-se/7724/setup.c @@ -304,6 +304,7 @@ static struct platform_device sh_eth_device = { }; static struct r8a66597_platdata sh7724_usb0_host_data = { + .on_chip = 1, }; static struct resource sh7724_usb0_host_resources[] = { diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7366.c b/arch/sh/kernel/cpu/sh4a/setup-sh7366.c index c18f7d09281b..f6d208813564 100644 --- a/arch/sh/kernel/cpu/sh4a/setup-sh7366.c +++ b/arch/sh/kernel/cpu/sh4a/setup-sh7366.c @@ -40,7 +40,7 @@ static struct platform_device iic_device = { }; static struct r8a66597_platdata r8a66597_data = { - /* This set zero to all members */ + .on_chip = 1, }; static struct resource usb_host_resources[] = { diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7723.c b/arch/sh/kernel/cpu/sh4a/setup-sh7723.c index e1bb80b2a27b..28516499a2c4 100644 --- a/arch/sh/kernel/cpu/sh4a/setup-sh7723.c +++ b/arch/sh/kernel/cpu/sh4a/setup-sh7723.c @@ -398,7 +398,7 @@ static struct platform_device rtc_device = { }; static struct r8a66597_platdata r8a66597_data = { - /* This set zero to all members */ + .on_chip = 1, }; static struct resource sh7723_usb_host_resources[] = { diff --git a/drivers/usb/host/Kconfig b/drivers/usb/host/Kconfig index 1a920c70b5a1..f21ca7d27a43 100644 --- a/drivers/usb/host/Kconfig +++ b/drivers/usb/host/Kconfig @@ -336,13 +336,6 @@ config USB_R8A66597_HCD To compile this driver as a module, choose M here: the module will be called r8a66597-hcd. -config SUPERH_ON_CHIP_R8A66597 - boolean "Enable SuperH on-chip R8A66597 USB" - depends on USB_R8A66597_HCD && (CPU_SUBTYPE_SH7366 || CPU_SUBTYPE_SH7723 || CPU_SUBTYPE_SH7724) - help - This driver enables support for the on-chip R8A66597 in the - SH7366, SH7723 and SH7724 processors. - config USB_WHCI_HCD tristate "Wireless USB Host Controller Interface (WHCI) driver (EXPERIMENTAL)" depends on EXPERIMENTAL diff --git a/drivers/usb/host/r8a66597-hcd.c b/drivers/usb/host/r8a66597-hcd.c index 09895a97c10b..82dce3e0d4d7 100644 --- a/drivers/usb/host/r8a66597-hcd.c +++ b/drivers/usb/host/r8a66597-hcd.c @@ -91,43 +91,43 @@ static int r8a66597_clock_enable(struct r8a66597 *r8a66597) u16 tmp; int i = 0; -#if defined(CONFIG_SUPERH_ON_CHIP_R8A66597) -#if defined(CONFIG_HAVE_CLK) - clk_enable(r8a66597->clk); + if (r8a66597->pdata->on_chip) { +#ifdef CONFIG_HAVE_CLK + clk_enable(r8a66597->clk); #endif - do { - r8a66597_write(r8a66597, SCKE, SYSCFG0); - tmp = r8a66597_read(r8a66597, SYSCFG0); - if (i++ > 1000) { - printk(KERN_ERR "r8a66597: register access fail.\n"); - return -ENXIO; - } - } while ((tmp & SCKE) != SCKE); - r8a66597_write(r8a66597, 0x04, 0x02); -#else - do { - r8a66597_write(r8a66597, USBE, SYSCFG0); - tmp = r8a66597_read(r8a66597, SYSCFG0); - if (i++ > 1000) { - printk(KERN_ERR "r8a66597: register access fail.\n"); - return -ENXIO; - } - } while ((tmp & USBE) != USBE); - r8a66597_bclr(r8a66597, USBE, SYSCFG0); - r8a66597_mdfy(r8a66597, get_xtal_from_pdata(r8a66597->pdata), XTAL, - SYSCFG0); + do { + r8a66597_write(r8a66597, SCKE, SYSCFG0); + tmp = r8a66597_read(r8a66597, SYSCFG0); + if (i++ > 1000) { + printk(KERN_ERR "r8a66597: reg access fail.\n"); + return -ENXIO; + } + } while ((tmp & SCKE) != SCKE); + r8a66597_write(r8a66597, 0x04, 0x02); + } else { + do { + r8a66597_write(r8a66597, USBE, SYSCFG0); + tmp = r8a66597_read(r8a66597, SYSCFG0); + if (i++ > 1000) { + printk(KERN_ERR "r8a66597: reg access fail.\n"); + return -ENXIO; + } + } while ((tmp & USBE) != USBE); + r8a66597_bclr(r8a66597, USBE, SYSCFG0); + r8a66597_mdfy(r8a66597, get_xtal_from_pdata(r8a66597->pdata), + XTAL, SYSCFG0); - i = 0; - r8a66597_bset(r8a66597, XCKE, SYSCFG0); - do { - msleep(1); - tmp = r8a66597_read(r8a66597, SYSCFG0); - if (i++ > 500) { - printk(KERN_ERR "r8a66597: register access fail.\n"); - return -ENXIO; - } - } while ((tmp & SCKE) != SCKE); -#endif /* #if defined(CONFIG_SUPERH_ON_CHIP_R8A66597) */ + i = 0; + r8a66597_bset(r8a66597, XCKE, SYSCFG0); + do { + msleep(1); + tmp = r8a66597_read(r8a66597, SYSCFG0); + if (i++ > 500) { + printk(KERN_ERR "r8a66597: reg access fail.\n"); + return -ENXIO; + } + } while ((tmp & SCKE) != SCKE); + } return 0; } @@ -136,15 +136,16 @@ static void r8a66597_clock_disable(struct r8a66597 *r8a66597) { r8a66597_bclr(r8a66597, SCKE, SYSCFG0); udelay(1); -#if defined(CONFIG_SUPERH_ON_CHIP_R8A66597) -#if defined(CONFIG_HAVE_CLK) - clk_disable(r8a66597->clk); -#endif -#else - r8a66597_bclr(r8a66597, PLLC, SYSCFG0); - r8a66597_bclr(r8a66597, XCKE, SYSCFG0); - r8a66597_bclr(r8a66597, USBE, SYSCFG0); + + if (r8a66597->pdata->on_chip) { +#ifdef CONFIG_HAVE_CLK + clk_disable(r8a66597->clk); #endif + } else { + r8a66597_bclr(r8a66597, PLLC, SYSCFG0); + r8a66597_bclr(r8a66597, XCKE, SYSCFG0); + r8a66597_bclr(r8a66597, USBE, SYSCFG0); + } } static void r8a66597_enable_port(struct r8a66597 *r8a66597, int port) @@ -205,7 +206,7 @@ static int enable_controller(struct r8a66597 *r8a66597) r8a66597_bset(r8a66597, SIGNE | SACKE, INTENB1); - for (port = 0; port < R8A66597_MAX_ROOT_HUB; port++) + for (port = 0; port < r8a66597->max_root_hub; port++) r8a66597_enable_port(r8a66597, port); return 0; @@ -218,7 +219,7 @@ static void disable_controller(struct r8a66597 *r8a66597) r8a66597_write(r8a66597, 0, INTENB0); r8a66597_write(r8a66597, 0, INTSTS0); - for (port = 0; port < R8A66597_MAX_ROOT_HUB; port++) + for (port = 0; port < r8a66597->max_root_hub; port++) r8a66597_disable_port(r8a66597, port); r8a66597_clock_disable(r8a66597); @@ -249,11 +250,12 @@ static int is_hub_limit(char *devpath) return ((strlen(devpath) >= 4) ? 1 : 0); } -static void get_port_number(char *devpath, u16 *root_port, u16 *hub_port) +static void get_port_number(struct r8a66597 *r8a66597, + char *devpath, u16 *root_port, u16 *hub_port) { if (root_port) { *root_port = (devpath[0] & 0x0F) - 1; - if (*root_port >= R8A66597_MAX_ROOT_HUB) + if (*root_port >= r8a66597->max_root_hub) printk(KERN_ERR "r8a66597: Illegal root port number.\n"); } if (hub_port) @@ -355,7 +357,8 @@ static int make_r8a66597_device(struct r8a66597 *r8a66597, INIT_LIST_HEAD(&dev->device_list); list_add_tail(&dev->device_list, &r8a66597->child_device); - get_port_number(urb->dev->devpath, &dev->root_port, &dev->hub_port); + get_port_number(r8a66597, urb->dev->devpath, + &dev->root_port, &dev->hub_port); if (!is_child_device(urb->dev->devpath)) r8a66597->root_hub[dev->root_port].dev = dev; @@ -420,7 +423,7 @@ static void free_usb_address(struct r8a66597 *r8a66597, list_del(&dev->device_list); kfree(dev); - for (port = 0; port < R8A66597_MAX_ROOT_HUB; port++) { + for (port = 0; port < r8a66597->max_root_hub; port++) { if (r8a66597->root_hub[port].dev == dev) { r8a66597->root_hub[port].dev = NULL; break; @@ -495,10 +498,20 @@ static void r8a66597_pipe_toggle(struct r8a66597 *r8a66597, r8a66597_bset(r8a66597, SQCLR, pipe->pipectr); } +static inline unsigned short mbw_value(struct r8a66597 *r8a66597) +{ + if (r8a66597->pdata->on_chip) + return MBW_32; + else + return MBW_16; +} + /* this function must be called with interrupt disabled */ static inline void cfifo_change(struct r8a66597 *r8a66597, u16 pipenum) { - r8a66597_mdfy(r8a66597, MBW | pipenum, MBW | CURPIPE, CFIFOSEL); + unsigned short mbw = mbw_value(r8a66597); + + r8a66597_mdfy(r8a66597, mbw | pipenum, mbw | CURPIPE, CFIFOSEL); r8a66597_reg_wait(r8a66597, CFIFOSEL, CURPIPE, pipenum); } @@ -506,11 +519,13 @@ static inline void cfifo_change(struct r8a66597 *r8a66597, u16 pipenum) static inline void fifo_change_from_pipe(struct r8a66597 *r8a66597, struct r8a66597_pipe *pipe) { + unsigned short mbw = mbw_value(r8a66597); + cfifo_change(r8a66597, 0); - r8a66597_mdfy(r8a66597, MBW | 0, MBW | CURPIPE, D0FIFOSEL); - r8a66597_mdfy(r8a66597, MBW | 0, MBW | CURPIPE, D1FIFOSEL); + r8a66597_mdfy(r8a66597, mbw | 0, mbw | CURPIPE, D0FIFOSEL); + r8a66597_mdfy(r8a66597, mbw | 0, mbw | CURPIPE, D1FIFOSEL); - r8a66597_mdfy(r8a66597, MBW | pipe->info.pipenum, MBW | CURPIPE, + r8a66597_mdfy(r8a66597, mbw | pipe->info.pipenum, mbw | CURPIPE, pipe->fifosel); r8a66597_reg_wait(r8a66597, pipe->fifosel, CURPIPE, pipe->info.pipenum); } @@ -742,9 +757,13 @@ static void enable_r8a66597_pipe_dma(struct r8a66597 *r8a66597, struct r8a66597_pipe *pipe, struct urb *urb) { -#if !defined(CONFIG_SUPERH_ON_CHIP_R8A66597) int i; struct r8a66597_pipe_info *info = &pipe->info; + unsigned short mbw = mbw_value(r8a66597); + + /* pipe dma is only for external controlles */ + if (r8a66597->pdata->on_chip) + return; if ((pipe->info.pipenum != 0) && (info->type != R8A66597_INT)) { for (i = 0; i < R8A66597_MAX_DMA_CHANNEL; i++) { @@ -763,8 +782,8 @@ static void enable_r8a66597_pipe_dma(struct r8a66597 *r8a66597, set_pipe_reg_addr(pipe, i); cfifo_change(r8a66597, 0); - r8a66597_mdfy(r8a66597, MBW | pipe->info.pipenum, - MBW | CURPIPE, pipe->fifosel); + r8a66597_mdfy(r8a66597, mbw | pipe->info.pipenum, + mbw | CURPIPE, pipe->fifosel); r8a66597_reg_wait(r8a66597, pipe->fifosel, CURPIPE, pipe->info.pipenum); @@ -772,7 +791,6 @@ static void enable_r8a66597_pipe_dma(struct r8a66597 *r8a66597, break; } } -#endif /* #if defined(CONFIG_SUPERH_ON_CHIP_R8A66597) */ } /* this function must be called with interrupt disabled */ @@ -1769,7 +1787,7 @@ static void r8a66597_timer(unsigned long _r8a66597) spin_lock_irqsave(&r8a66597->lock, flags); - for (port = 0; port < R8A66597_MAX_ROOT_HUB; port++) + for (port = 0; port < r8a66597->max_root_hub; port++) r8a66597_root_hub_control(r8a66597, port); spin_unlock_irqrestore(&r8a66597->lock, flags); @@ -1807,7 +1825,7 @@ static void set_address_zero(struct r8a66597 *r8a66597, struct urb *urb) u16 root_port, hub_port; if (usb_address == 0) { - get_port_number(urb->dev->devpath, + get_port_number(r8a66597, urb->dev->devpath, &root_port, &hub_port); set_devadd_reg(r8a66597, 0, get_r8a66597_usb_speed(urb->dev->speed), @@ -2082,7 +2100,7 @@ static int r8a66597_hub_status_data(struct usb_hcd *hcd, char *buf) *buf = 0; /* initialize (no change) */ - for (i = 0; i < R8A66597_MAX_ROOT_HUB; i++) { + for (i = 0; i < r8a66597->max_root_hub; i++) { if (r8a66597->root_hub[i].port & 0xffff0000) *buf |= 1 << (i + 1); } @@ -2097,11 +2115,11 @@ static void r8a66597_hub_descriptor(struct r8a66597 *r8a66597, { desc->bDescriptorType = 0x29; desc->bHubContrCurrent = 0; - desc->bNbrPorts = R8A66597_MAX_ROOT_HUB; + desc->bNbrPorts = r8a66597->max_root_hub; desc->bDescLength = 9; desc->bPwrOn2PwrGood = 0; desc->wHubCharacteristics = cpu_to_le16(0x0011); - desc->bitmap[0] = ((1 << R8A66597_MAX_ROOT_HUB) - 1) << 1; + desc->bitmap[0] = ((1 << r8a66597->max_root_hub) - 1) << 1; desc->bitmap[1] = ~0; } @@ -2129,7 +2147,7 @@ static int r8a66597_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, } break; case ClearPortFeature: - if (wIndex > R8A66597_MAX_ROOT_HUB) + if (wIndex > r8a66597->max_root_hub) goto error; if (wLength != 0) goto error; @@ -2162,12 +2180,12 @@ static int r8a66597_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, *buf = 0x00; break; case GetPortStatus: - if (wIndex > R8A66597_MAX_ROOT_HUB) + if (wIndex > r8a66597->max_root_hub) goto error; *(__le32 *)buf = cpu_to_le32(rh->port); break; case SetPortFeature: - if (wIndex > R8A66597_MAX_ROOT_HUB) + if (wIndex > r8a66597->max_root_hub) goto error; if (wLength != 0) goto error; @@ -2216,7 +2234,7 @@ static int r8a66597_bus_suspend(struct usb_hcd *hcd) dbg("%s", __func__); - for (port = 0; port < R8A66597_MAX_ROOT_HUB; port++) { + for (port = 0; port < r8a66597->max_root_hub; port++) { struct r8a66597_root_hub *rh = &r8a66597->root_hub[port]; unsigned long dvstctr_reg = get_dvstctr_reg(port); @@ -2247,7 +2265,7 @@ static int r8a66597_bus_resume(struct usb_hcd *hcd) dbg("%s", __func__); - for (port = 0; port < R8A66597_MAX_ROOT_HUB; port++) { + for (port = 0; port < r8a66597->max_root_hub; port++) { struct r8a66597_root_hub *rh = &r8a66597->root_hub[port]; unsigned long dvstctr_reg = get_dvstctr_reg(port); @@ -2314,7 +2332,7 @@ static int r8a66597_suspend(struct device *dev) disable_controller(r8a66597); - for (port = 0; port < R8A66597_MAX_ROOT_HUB; port++) { + for (port = 0; port < r8a66597->max_root_hub; port++) { struct r8a66597_root_hub *rh = &r8a66597->root_hub[port]; rh->port = 0x00000000; @@ -2354,8 +2372,9 @@ static int __init_or_module r8a66597_remove(struct platform_device *pdev) del_timer_sync(&r8a66597->rh_timer); usb_remove_hcd(hcd); iounmap((void *)r8a66597->reg); -#if defined(CONFIG_SUPERH_ON_CHIP_R8A66597) && defined(CONFIG_HAVE_CLK) - clk_put(r8a66597->clk); +#ifdef CONFIG_HAVE_CLK + if (r8a66597->pdata->on_chip) + clk_put(r8a66597->clk); #endif usb_put_hcd(hcd); return 0; @@ -2363,7 +2382,7 @@ static int __init_or_module r8a66597_remove(struct platform_device *pdev) static int __devinit r8a66597_probe(struct platform_device *pdev) { -#if defined(CONFIG_SUPERH_ON_CHIP_R8A66597) && defined(CONFIG_HAVE_CLK) +#ifdef CONFIG_HAVE_CLK char clk_name[8]; #endif struct resource *res = NULL, *ires; @@ -2425,15 +2444,20 @@ static int __devinit r8a66597_probe(struct platform_device *pdev) r8a66597->pdata = pdev->dev.platform_data; r8a66597->irq_sense_low = irq_trigger == IRQF_TRIGGER_LOW; -#if defined(CONFIG_SUPERH_ON_CHIP_R8A66597) && defined(CONFIG_HAVE_CLK) - snprintf(clk_name, sizeof(clk_name), "usb%d", pdev->id); - r8a66597->clk = clk_get(&pdev->dev, clk_name); - if (IS_ERR(r8a66597->clk)) { - dev_err(&pdev->dev, "cannot get clock \"%s\"\n", clk_name); - ret = PTR_ERR(r8a66597->clk); - goto clean_up2; - } + if (r8a66597->pdata->on_chip) { +#ifdef CONFIG_HAVE_CLK + snprintf(clk_name, sizeof(clk_name), "usb%d", pdev->id); + r8a66597->clk = clk_get(&pdev->dev, clk_name); + if (IS_ERR(r8a66597->clk)) { + dev_err(&pdev->dev, "cannot get clock \"%s\"\n", + clk_name); + ret = PTR_ERR(r8a66597->clk); + goto clean_up2; + } #endif + r8a66597->max_root_hub = 1; + } else + r8a66597->max_root_hub = 2; spin_lock_init(&r8a66597->lock); init_timer(&r8a66597->rh_timer); @@ -2463,8 +2487,9 @@ static int __devinit r8a66597_probe(struct platform_device *pdev) return 0; clean_up3: -#if defined(CONFIG_SUPERH_ON_CHIP_R8A66597) && defined(CONFIG_HAVE_CLK) - clk_put(r8a66597->clk); +#ifdef CONFIG_HAVE_CLK + if (r8a66597->pdata->on_chip) + clk_put(r8a66597->clk); clean_up2: #endif usb_put_hcd(hcd); diff --git a/drivers/usb/host/r8a66597.h b/drivers/usb/host/r8a66597.h index d72680b433f9..eecbd917bc81 100644 --- a/drivers/usb/host/r8a66597.h +++ b/drivers/usb/host/r8a66597.h @@ -26,7 +26,7 @@ #ifndef __R8A66597_H__ #define __R8A66597_H__ -#if defined(CONFIG_SUPERH_ON_CHIP_R8A66597) && defined(CONFIG_HAVE_CLK) +#ifdef CONFIG_HAVE_CLK #include #endif @@ -193,13 +193,9 @@ #define REW 0x4000 /* b14: Buffer rewind */ #define DCLRM 0x2000 /* b13: DMA buffer clear mode */ #define DREQE 0x1000 /* b12: DREQ output enable */ -#if defined(CONFIG_SUPERH_ON_CHIP_R8A66597) -#define MBW 0x0800 -#else -#define MBW 0x0400 /* b10: Maximum bit width for FIFO access */ -#endif #define MBW_8 0x0000 /* 8bit */ #define MBW_16 0x0400 /* 16bit */ +#define MBW_32 0x0800 /* 32bit */ #define BIGEND 0x0100 /* b8: Big endian mode */ #define BYTE_LITTLE 0x0000 /* little dendian */ #define BYTE_BIG 0x0100 /* big endifan */ @@ -405,11 +401,7 @@ #define R8A66597_MAX_NUM_PIPE 10 #define R8A66597_BUF_BSIZE 8 #define R8A66597_MAX_DEVICE 10 -#if defined(CONFIG_SUPERH_ON_CHIP_R8A66597) -#define R8A66597_MAX_ROOT_HUB 1 -#else #define R8A66597_MAX_ROOT_HUB 2 -#endif #define R8A66597_MAX_SAMPLING 5 #define R8A66597_RH_POLL_TIME 10 #define R8A66597_MAX_DMA_CHANNEL 2 @@ -487,7 +479,7 @@ struct r8a66597_root_hub { struct r8a66597 { spinlock_t lock; unsigned long reg; -#if defined(CONFIG_SUPERH_ON_CHIP_R8A66597) && defined(CONFIG_HAVE_CLK) +#ifdef CONFIG_HAVE_CLK struct clk *clk; #endif struct r8a66597_platdata *pdata; @@ -504,6 +496,7 @@ struct r8a66597 { unsigned short interval_map; unsigned char pipe_cnt[R8A66597_MAX_NUM_PIPE]; unsigned char dma_map; + unsigned int max_root_hub; struct list_head child_device; unsigned long child_connect_map[4]; @@ -550,21 +543,22 @@ static inline void r8a66597_read_fifo(struct r8a66597 *r8a66597, unsigned long offset, u16 *buf, int len) { -#if defined(CONFIG_SUPERH_ON_CHIP_R8A66597) unsigned long fifoaddr = r8a66597->reg + offset; unsigned long count; - count = len / 4; - insl(fifoaddr, buf, count); + if (r8a66597->pdata->on_chip) { + count = len / 4; + insl(fifoaddr, buf, count); - if (len & 0x00000003) { - unsigned long tmp = inl(fifoaddr); - memcpy((unsigned char *)buf + count * 4, &tmp, len & 0x03); + if (len & 0x00000003) { + unsigned long tmp = inl(fifoaddr); + memcpy((unsigned char *)buf + count * 4, &tmp, + len & 0x03); + } + } else { + len = (len + 1) / 2; + insw(fifoaddr, buf, len); } -#else - len = (len + 1) / 2; - insw(r8a66597->reg + offset, buf, len); -#endif } static inline void r8a66597_write(struct r8a66597 *r8a66597, u16 val, @@ -578,33 +572,33 @@ static inline void r8a66597_write_fifo(struct r8a66597 *r8a66597, int len) { unsigned long fifoaddr = r8a66597->reg + offset; -#if defined(CONFIG_SUPERH_ON_CHIP_R8A66597) unsigned long count; unsigned char *pb; int i; - count = len / 4; - outsl(fifoaddr, buf, count); + if (r8a66597->pdata->on_chip) { + count = len / 4; + outsl(fifoaddr, buf, count); + + if (len & 0x00000003) { + pb = (unsigned char *)buf + count * 4; + for (i = 0; i < (len & 0x00000003); i++) { + if (r8a66597_read(r8a66597, CFIFOSEL) & BIGEND) + outb(pb[i], fifoaddr + i); + else + outb(pb[i], fifoaddr + 3 - i); + } + } + } else { + int odd = len & 0x0001; - if (len & 0x00000003) { - pb = (unsigned char *)buf + count * 4; - for (i = 0; i < (len & 0x00000003); i++) { - if (r8a66597_read(r8a66597, CFIFOSEL) & BIGEND) - outb(pb[i], fifoaddr + i); - else - outb(pb[i], fifoaddr + 3 - i); + len = len / 2; + outsw(fifoaddr, buf, len); + if (unlikely(odd)) { + buf = &buf[len]; + outb((unsigned char)*buf, fifoaddr); } } -#else - int odd = len & 0x0001; - - len = len / 2; - outsw(fifoaddr, buf, len); - if (unlikely(odd)) { - buf = &buf[len]; - outb((unsigned char)*buf, fifoaddr); - } -#endif } static inline void r8a66597_mdfy(struct r8a66597 *r8a66597, diff --git a/include/linux/usb/r8a66597.h b/include/linux/usb/r8a66597.h index e9f0384fa20c..460ee3f6a2c6 100644 --- a/include/linux/usb/r8a66597.h +++ b/include/linux/usb/r8a66597.h @@ -31,6 +31,9 @@ struct r8a66597_platdata { /* This ops can controll port power instead of DVSTCTR register. */ void (*port_power)(int port, int power); + /* set one = on chip controller, set zero = external controller */ + unsigned on_chip:1; + /* (external controller only) set R8A66597_PLATDATA_XTAL_nnMHZ */ unsigned xtal:2; -- cgit v1.2.3 From 1c388ad054fb1ead3dc354b1719570b99e464135 Mon Sep 17 00:00:00 2001 From: Paul Menage Date: Fri, 17 Jul 2009 16:16:18 -0700 Subject: include/linux/cred.h: work around gcc-4.2.4 warning in get_cred() With gcc 4.2.4 (building UML) I get the warning include/linux/cred.h: In function 'get_cred': include/linux/cred.h:189: warning: passing argument 1 of 'get_new_cred' discards qualifiers from pointer target type Inserting an additional local variable appears to keep the compiler happy, although it's not clear to me why this should be needed. Signed-off-by: Paul Menage Signed-off-by: Andrew Morton Acked-by: David Howells Signed-off-by: James Morris --- include/linux/cred.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cred.h b/include/linux/cred.h index 4fa999696310..b3c76e815d66 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -186,7 +186,8 @@ static inline struct cred *get_new_cred(struct cred *cred) */ static inline const struct cred *get_cred(const struct cred *cred) { - return get_new_cred((struct cred *) cred); + struct cred *nonconst_cred = (struct cred *) cred; + return get_new_cred(nonconst_cred); } /** -- cgit v1.2.3 From 4d4036e0e7299c6cbb2d2421b4b30b7a409ce61a Mon Sep 17 00:00:00 2001 From: Jason Yeh Date: Wed, 8 Jul 2009 13:49:38 +0200 Subject: oprofile: Implement performance counter multiplexing The number of hardware counters is limited. The multiplexing feature enables OProfile to gather more events than counters are provided by the hardware. This is realized by switching between events at an user specified time interval. A new file (/dev/oprofile/time_slice) is added for the user to specify the timer interval in ms. If the number of events to profile is higher than the number of hardware counters available, the patch will schedule a work queue that switches the event counter and re-writes the different sets of values into it. The switching mechanism needs to be implemented for each architecture to support multiplexing. This patch only implements AMD CPU support, but multiplexing can be easily extended for other models and architectures. There are follow-on patches that rework parts of this patch. Signed-off-by: Jason Yeh Signed-off-by: Robert Richter --- arch/Kconfig | 12 +++ arch/x86/oprofile/nmi_int.c | 162 ++++++++++++++++++++++++++++++++++++-- arch/x86/oprofile/op_counter.h | 2 +- arch/x86/oprofile/op_model_amd.c | 110 ++++++++++++++++++++++---- arch/x86/oprofile/op_model_p4.c | 4 + arch/x86/oprofile/op_model_ppro.c | 2 + arch/x86/oprofile/op_x86_model.h | 7 ++ drivers/oprofile/oprof.c | 78 ++++++++++++++++++ drivers/oprofile/oprof.h | 2 + drivers/oprofile/oprofile_files.c | 43 ++++++++++ drivers/oprofile/oprofile_stats.c | 10 +++ include/linux/oprofile.h | 3 + 12 files changed, 415 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/arch/Kconfig b/arch/Kconfig index 99193b160232..beea3ccebb5e 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -30,6 +30,18 @@ config OPROFILE_IBS If unsure, say N. +config OPROFILE_EVENT_MULTIPLEX + bool "OProfile multiplexing support (EXPERIMENTAL)" + default n + depends on OPROFILE && X86 + help + The number of hardware counters is limited. The multiplexing + feature enables OProfile to gather more events than counters + are provided by the hardware. This is realized by switching + between events at an user specified time interval. + + If unsure, say N. + config HAVE_OPROFILE bool diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index fca8dc94531e..e54f6a0b35ac 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -1,11 +1,14 @@ /** * @file nmi_int.c * - * @remark Copyright 2002-2008 OProfile authors + * @remark Copyright 2002-2009 OProfile authors * @remark Read the file COPYING * * @author John Levon * @author Robert Richter + * @author Barry Kasindorf + * @author Jason Yeh + * @author Suravee Suthikulpanit */ #include @@ -24,6 +27,12 @@ #include "op_counter.h" #include "op_x86_model.h" + +#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX +DEFINE_PER_CPU(int, switch_index); +#endif + + static struct op_x86_model_spec const *model; static DEFINE_PER_CPU(struct op_msrs, cpu_msrs); static DEFINE_PER_CPU(unsigned long, saved_lvtpc); @@ -31,6 +40,13 @@ static DEFINE_PER_CPU(unsigned long, saved_lvtpc); /* 0 == registered but off, 1 == registered and on */ static int nmi_enabled = 0; + +#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX +extern atomic_t multiplex_counter; +#endif + +struct op_counter_config counter_config[OP_MAX_COUNTER]; + /* common functions */ u64 op_x86_get_ctrl(struct op_x86_model_spec const *model, @@ -95,6 +111,11 @@ static void free_msrs(void) per_cpu(cpu_msrs, i).counters = NULL; kfree(per_cpu(cpu_msrs, i).controls); per_cpu(cpu_msrs, i).controls = NULL; + +#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX + kfree(per_cpu(cpu_msrs, i).multiplex); + per_cpu(cpu_msrs, i).multiplex = NULL; +#endif } } @@ -103,6 +124,9 @@ static int allocate_msrs(void) int success = 1; size_t controls_size = sizeof(struct op_msr) * model->num_controls; size_t counters_size = sizeof(struct op_msr) * model->num_counters; +#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX + size_t multiplex_size = sizeof(struct op_msr) * model->num_virt_counters; +#endif int i; for_each_possible_cpu(i) { @@ -118,6 +142,14 @@ static int allocate_msrs(void) success = 0; break; } +#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX + per_cpu(cpu_msrs, i).multiplex = + kmalloc(multiplex_size, GFP_KERNEL); + if (!per_cpu(cpu_msrs, i).multiplex) { + success = 0; + break; + } +#endif } if (!success) @@ -126,6 +158,25 @@ static int allocate_msrs(void) return success; } +#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX + +static void nmi_setup_cpu_mux(struct op_msrs const * const msrs) +{ + int i; + struct op_msr *multiplex = msrs->multiplex; + + for (i = 0; i < model->num_virt_counters; ++i) { + if (counter_config[i].enabled) { + multiplex[i].saved = -(u64)counter_config[i].count; + } else { + multiplex[i].addr = 0; + multiplex[i].saved = 0; + } + } +} + +#endif + static void nmi_cpu_setup(void *dummy) { int cpu = smp_processor_id(); @@ -133,6 +184,9 @@ static void nmi_cpu_setup(void *dummy) nmi_cpu_save_registers(msrs); spin_lock(&oprofilefs_lock); model->setup_ctrs(model, msrs); +#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX + nmi_setup_cpu_mux(msrs); +#endif spin_unlock(&oprofilefs_lock); per_cpu(saved_lvtpc, cpu) = apic_read(APIC_LVTPC); apic_write(APIC_LVTPC, APIC_DM_NMI); @@ -173,14 +227,52 @@ static int nmi_setup(void) memcpy(per_cpu(cpu_msrs, cpu).controls, per_cpu(cpu_msrs, 0).controls, sizeof(struct op_msr) * model->num_controls); +#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX + memcpy(per_cpu(cpu_msrs, cpu).multiplex, + per_cpu(cpu_msrs, 0).multiplex, + sizeof(struct op_msr) * model->num_virt_counters); +#endif } - } on_each_cpu(nmi_cpu_setup, NULL, 1); nmi_enabled = 1; return 0; } +#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX + +static void nmi_cpu_save_mpx_registers(struct op_msrs *msrs) +{ + unsigned int si = __get_cpu_var(switch_index); + struct op_msr *multiplex = msrs->multiplex; + unsigned int i; + + for (i = 0; i < model->num_counters; ++i) { + int offset = i + si; + if (multiplex[offset].addr) { + rdmsrl(multiplex[offset].addr, + multiplex[offset].saved); + } + } +} + +static void nmi_cpu_restore_mpx_registers(struct op_msrs *msrs) +{ + unsigned int si = __get_cpu_var(switch_index); + struct op_msr *multiplex = msrs->multiplex; + unsigned int i; + + for (i = 0; i < model->num_counters; ++i) { + int offset = i + si; + if (multiplex[offset].addr) { + wrmsrl(multiplex[offset].addr, + multiplex[offset].saved); + } + } +} + +#endif + static void nmi_cpu_restore_registers(struct op_msrs *msrs) { struct op_msr *counters = msrs->counters; @@ -214,6 +306,9 @@ static void nmi_cpu_shutdown(void *dummy) apic_write(APIC_LVTPC, per_cpu(saved_lvtpc, cpu)); apic_write(APIC_LVTERR, v); nmi_cpu_restore_registers(msrs); +#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX + __get_cpu_var(switch_index) = 0; +#endif } static void nmi_shutdown(void) @@ -252,16 +347,15 @@ static void nmi_stop(void) on_each_cpu(nmi_cpu_stop, NULL, 1); } -struct op_counter_config counter_config[OP_MAX_COUNTER]; - static int nmi_create_files(struct super_block *sb, struct dentry *root) { unsigned int i; - for (i = 0; i < model->num_counters; ++i) { + for (i = 0; i < model->num_virt_counters; ++i) { struct dentry *dir; char buf[4]; +#ifndef CONFIG_OPROFILE_EVENT_MULTIPLEX /* quick little hack to _not_ expose a counter if it is not * available for use. This should protect userspace app. * NOTE: assumes 1:1 mapping here (that counters are organized @@ -269,6 +363,7 @@ static int nmi_create_files(struct super_block *sb, struct dentry *root) */ if (unlikely(!avail_to_resrv_perfctr_nmi_bit(i))) continue; +#endif /* CONFIG_OPROFILE_EVENT_MULTIPLEX */ snprintf(buf, sizeof(buf), "%d", i); dir = oprofilefs_mkdir(sb, root, buf); @@ -283,6 +378,57 @@ static int nmi_create_files(struct super_block *sb, struct dentry *root) return 0; } +#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX + +static void nmi_cpu_switch(void *dummy) +{ + int cpu = smp_processor_id(); + int si = per_cpu(switch_index, cpu); + struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu); + + nmi_cpu_stop(NULL); + nmi_cpu_save_mpx_registers(msrs); + + /* move to next set */ + si += model->num_counters; + if ((si > model->num_virt_counters) || (counter_config[si].count == 0)) + per_cpu(switch_index, cpu) = 0; + else + per_cpu(switch_index, cpu) = si; + + model->switch_ctrl(model, msrs); + nmi_cpu_restore_mpx_registers(msrs); + + nmi_cpu_start(NULL); +} + + +/* + * Quick check to see if multiplexing is necessary. + * The check should be sufficient since counters are used + * in ordre. + */ +static int nmi_multiplex_on(void) +{ + return counter_config[model->num_counters].count ? 0 : -EINVAL; +} + +static int nmi_switch_event(void) +{ + if (!model->switch_ctrl) + return -ENOSYS; /* not implemented */ + if (nmi_multiplex_on() < 0) + return -EINVAL; /* not necessary */ + + on_each_cpu(nmi_cpu_switch, NULL, 1); + + atomic_inc(&multiplex_counter); + + return 0; +} + +#endif + #ifdef CONFIG_SMP static int oprofile_cpu_notifier(struct notifier_block *b, unsigned long action, void *data) @@ -516,12 +662,18 @@ int __init op_nmi_init(struct oprofile_operations *ops) register_cpu_notifier(&oprofile_cpu_nb); #endif /* default values, can be overwritten by model */ +#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX + __raw_get_cpu_var(switch_index) = 0; +#endif ops->create_files = nmi_create_files; ops->setup = nmi_setup; ops->shutdown = nmi_shutdown; ops->start = nmi_start; ops->stop = nmi_stop; ops->cpu_type = cpu_type; +#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX + ops->switch_events = nmi_switch_event; +#endif if (model->init) ret = model->init(ops); diff --git a/arch/x86/oprofile/op_counter.h b/arch/x86/oprofile/op_counter.h index 91b6a116165e..e28398df0df2 100644 --- a/arch/x86/oprofile/op_counter.h +++ b/arch/x86/oprofile/op_counter.h @@ -10,7 +10,7 @@ #ifndef OP_COUNTER_H #define OP_COUNTER_H -#define OP_MAX_COUNTER 8 +#define OP_MAX_COUNTER 32 /* Per-perfctr configuration as set via * oprofilefs. diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index f676f8825a3f..fdbed3a0c877 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c @@ -9,12 +9,15 @@ * @author Philippe Elie * @author Graydon Hoare * @author Robert Richter - * @author Barry Kasindorf + * @author Barry Kasindorf + * @author Jason Yeh + * @author Suravee Suthikulpanit */ #include #include #include +#include #include #include @@ -25,12 +28,23 @@ #define NUM_COUNTERS 4 #define NUM_CONTROLS 4 +#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX +#define NUM_VIRT_COUNTERS 32 +#define NUM_VIRT_CONTROLS 32 +#else +#define NUM_VIRT_COUNTERS NUM_COUNTERS +#define NUM_VIRT_CONTROLS NUM_CONTROLS +#endif + #define OP_EVENT_MASK 0x0FFF #define OP_CTR_OVERFLOW (1ULL<<31) #define MSR_AMD_EVENTSEL_RESERVED ((0xFFFFFCF0ULL<<32)|(1ULL<<21)) -static unsigned long reset_value[NUM_COUNTERS]; +static unsigned long reset_value[NUM_VIRT_COUNTERS]; +#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX +DECLARE_PER_CPU(int, switch_index); +#endif #ifdef CONFIG_OPROFILE_IBS @@ -82,6 +96,16 @@ static void op_amd_fill_in_addresses(struct op_msrs * const msrs) else msrs->controls[i].addr = 0; } + +#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX + for (i = 0; i < NUM_VIRT_COUNTERS; i++) { + int hw_counter = i % NUM_CONTROLS; + if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i)) + msrs->multiplex[i].addr = MSR_K7_PERFCTR0 + hw_counter; + else + msrs->multiplex[i].addr = 0; + } +#endif } static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, @@ -90,6 +114,15 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, u64 val; int i; + /* setup reset_value */ + for (i = 0; i < NUM_VIRT_COUNTERS; ++i) { + if (counter_config[i].enabled) { + reset_value[i] = counter_config[i].count; + } else { + reset_value[i] = 0; + } + } + /* clear all counters */ for (i = 0; i < NUM_CONTROLS; ++i) { if (unlikely(!msrs->controls[i].addr)) @@ -108,20 +141,49 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, /* enable active counters */ for (i = 0; i < NUM_COUNTERS; ++i) { - if (counter_config[i].enabled && msrs->counters[i].addr) { - reset_value[i] = counter_config[i].count; - wrmsrl(msrs->counters[i].addr, - -(u64)counter_config[i].count); +#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX + int offset = i + __get_cpu_var(switch_index); +#else + int offset = i; +#endif + if (counter_config[offset].enabled && msrs->counters[i].addr) { + /* setup counter registers */ + wrmsrl(msrs->counters[i].addr, -(u64)reset_value[offset]); + + /* setup control registers */ rdmsrl(msrs->controls[i].addr, val); val &= model->reserved; - val |= op_x86_get_ctrl(model, &counter_config[i]); + val |= op_x86_get_ctrl(model, &counter_config[offset]); + wrmsrl(msrs->controls[i].addr, val); + } + } +} + + +#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX + +static void op_amd_switch_ctrl(struct op_x86_model_spec const *model, + struct op_msrs const * const msrs) +{ + u64 val; + int i; + + /* enable active counters */ + for (i = 0; i < NUM_COUNTERS; ++i) { + int offset = i + __get_cpu_var(switch_index); + if (counter_config[offset].enabled) { + /* setup control registers */ + rdmsrl(msrs->controls[i].addr, val); + val &= model->reserved; + val |= op_x86_get_ctrl(model, &counter_config[offset]); wrmsrl(msrs->controls[i].addr, val); - } else { - reset_value[i] = 0; } } } +#endif + + #ifdef CONFIG_OPROFILE_IBS static inline int @@ -230,14 +292,19 @@ static int op_amd_check_ctrs(struct pt_regs * const regs, int i; for (i = 0; i < NUM_COUNTERS; ++i) { - if (!reset_value[i]) +#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX + int offset = i + __get_cpu_var(switch_index); +#else + int offset = i; +#endif + if (!reset_value[offset]) continue; rdmsrl(msrs->counters[i].addr, val); /* bit is clear if overflowed: */ if (val & OP_CTR_OVERFLOW) continue; - oprofile_add_sample(regs, i); - wrmsrl(msrs->counters[i].addr, -(u64)reset_value[i]); + oprofile_add_sample(regs, offset); + wrmsrl(msrs->counters[i].addr, -(u64)reset_value[offset]); } op_amd_handle_ibs(regs, msrs); @@ -250,8 +317,14 @@ static void op_amd_start(struct op_msrs const * const msrs) { u64 val; int i; + for (i = 0; i < NUM_COUNTERS; ++i) { - if (reset_value[i]) { +#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX + int offset = i + __get_cpu_var(switch_index); +#else + int offset = i; +#endif + if (reset_value[offset]) { rdmsrl(msrs->controls[i].addr, val); val |= ARCH_PERFMON_EVENTSEL0_ENABLE; wrmsrl(msrs->controls[i].addr, val); @@ -271,7 +344,11 @@ static void op_amd_stop(struct op_msrs const * const msrs) * pm callback */ for (i = 0; i < NUM_COUNTERS; ++i) { +#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX + if (!reset_value[i + per_cpu(switch_index, smp_processor_id())]) +#else if (!reset_value[i]) +#endif continue; rdmsrl(msrs->controls[i].addr, val); val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; @@ -289,7 +366,7 @@ static void op_amd_shutdown(struct op_msrs const * const msrs) if (msrs->counters[i].addr) release_perfctr_nmi(MSR_K7_PERFCTR0 + i); } - for (i = 0; i < NUM_CONTROLS; ++i) { + for (i = 0; i < NUM_COUNTERS; ++i) { if (msrs->controls[i].addr) release_evntsel_nmi(MSR_K7_EVNTSEL0 + i); } @@ -463,6 +540,8 @@ static void op_amd_exit(void) {} struct op_x86_model_spec const op_amd_spec = { .num_counters = NUM_COUNTERS, .num_controls = NUM_CONTROLS, + .num_virt_counters = NUM_VIRT_COUNTERS, + .num_virt_controls = NUM_VIRT_CONTROLS, .reserved = MSR_AMD_EVENTSEL_RESERVED, .event_mask = OP_EVENT_MASK, .init = op_amd_init, @@ -473,4 +552,7 @@ struct op_x86_model_spec const op_amd_spec = { .start = &op_amd_start, .stop = &op_amd_stop, .shutdown = &op_amd_shutdown, +#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX + .switch_ctrl = &op_amd_switch_ctrl, +#endif }; diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c index 5921b7fc724b..65b9237cde8b 100644 --- a/arch/x86/oprofile/op_model_p4.c +++ b/arch/x86/oprofile/op_model_p4.c @@ -698,6 +698,8 @@ static void p4_shutdown(struct op_msrs const * const msrs) struct op_x86_model_spec const op_p4_ht2_spec = { .num_counters = NUM_COUNTERS_HT2, .num_controls = NUM_CONTROLS_HT2, + .num_virt_counters = NUM_COUNTERS_HT2, + .num_virt_controls = NUM_CONTROLS_HT2, .fill_in_addresses = &p4_fill_in_addresses, .setup_ctrs = &p4_setup_ctrs, .check_ctrs = &p4_check_ctrs, @@ -710,6 +712,8 @@ struct op_x86_model_spec const op_p4_ht2_spec = { struct op_x86_model_spec const op_p4_spec = { .num_counters = NUM_COUNTERS_NON_HT, .num_controls = NUM_CONTROLS_NON_HT, + .num_virt_counters = NUM_COUNTERS_NON_HT, + .num_virt_controls = NUM_CONTROLS_NON_HT, .fill_in_addresses = &p4_fill_in_addresses, .setup_ctrs = &p4_setup_ctrs, .check_ctrs = &p4_check_ctrs, diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c index 570d717c3308..098cbca5c0b0 100644 --- a/arch/x86/oprofile/op_model_ppro.c +++ b/arch/x86/oprofile/op_model_ppro.c @@ -206,6 +206,8 @@ static void ppro_shutdown(struct op_msrs const * const msrs) struct op_x86_model_spec const op_ppro_spec = { .num_counters = 2, .num_controls = 2, + .num_virt_counters = 2, + .num_virt_controls = 2, .reserved = MSR_PPRO_EVENTSEL_RESERVED, .fill_in_addresses = &ppro_fill_in_addresses, .setup_ctrs = &ppro_setup_ctrs, diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h index 505489873b9d..0d07d23cb062 100644 --- a/arch/x86/oprofile/op_x86_model.h +++ b/arch/x86/oprofile/op_x86_model.h @@ -23,6 +23,7 @@ struct op_msr { struct op_msrs { struct op_msr *counters; struct op_msr *controls; + struct op_msr *multiplex; }; struct pt_regs; @@ -35,6 +36,8 @@ struct oprofile_operations; struct op_x86_model_spec { unsigned int num_counters; unsigned int num_controls; + unsigned int num_virt_counters; + unsigned int num_virt_controls; u64 reserved; u16 event_mask; int (*init)(struct oprofile_operations *ops); @@ -47,6 +50,10 @@ struct op_x86_model_spec { void (*start)(struct op_msrs const * const msrs); void (*stop)(struct op_msrs const * const msrs); void (*shutdown)(struct op_msrs const * const msrs); +#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX + void (*switch_ctrl)(struct op_x86_model_spec const *model, + struct op_msrs const * const msrs); +#endif }; struct op_counter_config; diff --git a/drivers/oprofile/oprof.c b/drivers/oprofile/oprof.c index 3cffce90f82a..7bc64af7cf99 100644 --- a/drivers/oprofile/oprof.c +++ b/drivers/oprofile/oprof.c @@ -12,6 +12,8 @@ #include #include #include +#include +#include #include #include "oprof.h" @@ -27,6 +29,15 @@ unsigned long oprofile_backtrace_depth; static unsigned long is_setup; static DEFINE_MUTEX(start_mutex); +#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX + +static void switch_worker(struct work_struct *work); +static DECLARE_DELAYED_WORK(switch_work, switch_worker); +unsigned long timeout_jiffies; +#define MULTIPLEXING_TIMER_DEFAULT 1 + +#endif + /* timer 0 - use performance monitoring hardware if available 1 - use the timer int mechanism regardless @@ -87,6 +98,20 @@ out: return err; } +#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX + +static void start_switch_worker(void) +{ + schedule_delayed_work(&switch_work, timeout_jiffies); +} + +static void switch_worker(struct work_struct *work) +{ + if (!oprofile_ops.switch_events()) + start_switch_worker(); +} + +#endif /* Actually start profiling (echo 1>/dev/oprofile/enable) */ int oprofile_start(void) @@ -108,6 +133,11 @@ int oprofile_start(void) if ((err = oprofile_ops.start())) goto out; +#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX + if (oprofile_ops.switch_events) + start_switch_worker(); +#endif + oprofile_started = 1; out: mutex_unlock(&start_mutex); @@ -123,6 +153,11 @@ void oprofile_stop(void) goto out; oprofile_ops.stop(); oprofile_started = 0; + +#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX + cancel_delayed_work_sync(&switch_work); +#endif + /* wake up the daemon to read what remains */ wake_up_buffer_waiter(); out: @@ -155,6 +190,36 @@ post_sync: mutex_unlock(&start_mutex); } +#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX + +/* User inputs in ms, converts to jiffies */ +int oprofile_set_timeout(unsigned long val_msec) +{ + int err = 0; + + mutex_lock(&start_mutex); + + if (oprofile_started) { + err = -EBUSY; + goto out; + } + + if (!oprofile_ops.switch_events) { + err = -EINVAL; + goto out; + } + + timeout_jiffies = msecs_to_jiffies(val_msec); + if (timeout_jiffies == MAX_JIFFY_OFFSET) + timeout_jiffies = msecs_to_jiffies(MULTIPLEXING_TIMER_DEFAULT); + +out: + mutex_unlock(&start_mutex); + return err; + +} + +#endif int oprofile_set_backtrace(unsigned long val) { @@ -179,10 +244,23 @@ out: return err; } +#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX + +static void __init oprofile_multiplexing_init(void) +{ + timeout_jiffies = msecs_to_jiffies(MULTIPLEXING_TIMER_DEFAULT); +} + +#endif + static int __init oprofile_init(void) { int err; +#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX + oprofile_multiplexing_init(); +#endif + err = oprofile_arch_init(&oprofile_ops); if (err < 0 || timer) { diff --git a/drivers/oprofile/oprof.h b/drivers/oprofile/oprof.h index c288d3c24b50..ee38abcc74f3 100644 --- a/drivers/oprofile/oprof.h +++ b/drivers/oprofile/oprof.h @@ -27,6 +27,7 @@ extern unsigned long oprofile_buffer_watershed; extern struct oprofile_operations oprofile_ops; extern unsigned long oprofile_started; extern unsigned long oprofile_backtrace_depth; +extern unsigned long timeout_jiffies; struct super_block; struct dentry; @@ -35,5 +36,6 @@ void oprofile_create_files(struct super_block *sb, struct dentry *root); void oprofile_timer_init(struct oprofile_operations *ops); int oprofile_set_backtrace(unsigned long depth); +int oprofile_set_timeout(unsigned long time); #endif /* OPROF_H */ diff --git a/drivers/oprofile/oprofile_files.c b/drivers/oprofile/oprofile_files.c index 5d36ffc30dd5..468ec3e4f856 100644 --- a/drivers/oprofile/oprofile_files.c +++ b/drivers/oprofile/oprofile_files.c @@ -9,6 +9,7 @@ #include #include +#include #include "event_buffer.h" #include "oprofile_stats.h" @@ -22,6 +23,45 @@ unsigned long oprofile_buffer_size; unsigned long oprofile_cpu_buffer_size; unsigned long oprofile_buffer_watershed; +#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX + +static ssize_t timeout_read(struct file *file, char __user *buf, + size_t count, loff_t *offset) +{ + return oprofilefs_ulong_to_user(jiffies_to_msecs(timeout_jiffies), + buf, count, offset); +} + + +static ssize_t timeout_write(struct file *file, char const __user *buf, + size_t count, loff_t *offset) +{ + unsigned long val; + int retval; + + if (*offset) + return -EINVAL; + + retval = oprofilefs_ulong_from_user(&val, buf, count); + if (retval) + return retval; + + retval = oprofile_set_timeout(val); + + if (retval) + return retval; + return count; +} + + +static const struct file_operations timeout_fops = { + .read = timeout_read, + .write = timeout_write, +}; + +#endif + + static ssize_t depth_read(struct file *file, char __user *buf, size_t count, loff_t *offset) { return oprofilefs_ulong_to_user(oprofile_backtrace_depth, buf, count, @@ -139,6 +179,9 @@ void oprofile_create_files(struct super_block *sb, struct dentry *root) oprofilefs_create_file(sb, root, "cpu_type", &cpu_type_fops); oprofilefs_create_file(sb, root, "backtrace_depth", &depth_fops); oprofilefs_create_file(sb, root, "pointer_size", &pointer_size_fops); +#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX + oprofilefs_create_file(sb, root, "time_slice", &timeout_fops); +#endif oprofile_create_stats_files(sb, root); if (oprofile_ops.create_files) oprofile_ops.create_files(sb, root); diff --git a/drivers/oprofile/oprofile_stats.c b/drivers/oprofile/oprofile_stats.c index 3c2270a8300c..77a57a6792f6 100644 --- a/drivers/oprofile/oprofile_stats.c +++ b/drivers/oprofile/oprofile_stats.c @@ -16,6 +16,9 @@ #include "cpu_buffer.h" struct oprofile_stat_struct oprofile_stats; +#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX +atomic_t multiplex_counter; +#endif void oprofile_reset_stats(void) { @@ -34,6 +37,9 @@ void oprofile_reset_stats(void) atomic_set(&oprofile_stats.sample_lost_no_mapping, 0); atomic_set(&oprofile_stats.event_lost_overflow, 0); atomic_set(&oprofile_stats.bt_lost_no_mapping, 0); +#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX + atomic_set(&multiplex_counter, 0); +#endif } @@ -76,4 +82,8 @@ void oprofile_create_stats_files(struct super_block *sb, struct dentry *root) &oprofile_stats.event_lost_overflow); oprofilefs_create_ro_atomic(sb, dir, "bt_lost_no_mapping", &oprofile_stats.bt_lost_no_mapping); +#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX + oprofilefs_create_ro_atomic(sb, dir, "multiplex_counter", + &multiplex_counter); +#endif } diff --git a/include/linux/oprofile.h b/include/linux/oprofile.h index d68d2ed94f15..5171639ecf0f 100644 --- a/include/linux/oprofile.h +++ b/include/linux/oprofile.h @@ -67,6 +67,9 @@ struct oprofile_operations { /* Initiate a stack backtrace. Optional. */ void (*backtrace)(struct pt_regs * const regs, unsigned int depth); + + /* Multiplex between different events. Optional. */ + int (*switch_events)(void); /* CPU identification string. */ char * cpu_type; }; -- cgit v1.2.3 From 99fde513f57db2c8e1b202ade4be7d47033ff09b Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Mon, 20 Jul 2009 22:28:50 -0700 Subject: Input: wm97xx - add possibility to control the GPIO_STATUS shift This patch allows tweaking the behaviour of GPIO_STATUS register shift quirk that's in wm97xx-core. The problem with GPIO_STATUS register being shifted by one doesn't appear on all hardware it seems and causes problems with accelerated touchscreen drivers on Palm hardware. Therefore an accelerated touchscreen driver can select if the shift is/isn't happening on the hardware. Signed-off-by: Marek Vasut Acked-by: Mark Brown Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/mainstone-wm97xx.c | 3 +++ drivers/input/touchscreen/wm97xx-core.c | 6 ++++-- include/linux/wm97xx.h | 7 +++++++ 3 files changed, 14 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/input/touchscreen/mainstone-wm97xx.c b/drivers/input/touchscreen/mainstone-wm97xx.c index c797bc04ee83..8fc3b08deb3b 100644 --- a/drivers/input/touchscreen/mainstone-wm97xx.c +++ b/drivers/input/touchscreen/mainstone-wm97xx.c @@ -198,6 +198,9 @@ static int wm97xx_acc_startup(struct wm97xx *wm) if (machine_is_palmt5() || machine_is_palmtx() || machine_is_palmld()) { pen_int = 1; irq = 27; + /* There is some obscure mutant of WM9712 interbred with WM9713 + * used on Palm HW */ + wm->variant = WM97xx_WM1613; } else if (machine_is_mainstone() && pen_int) irq = 4; diff --git a/drivers/input/touchscreen/wm97xx-core.c b/drivers/input/touchscreen/wm97xx-core.c index 2957d48e0045..252eb11fe9db 100644 --- a/drivers/input/touchscreen/wm97xx-core.c +++ b/drivers/input/touchscreen/wm97xx-core.c @@ -204,7 +204,7 @@ void wm97xx_set_gpio(struct wm97xx *wm, u32 gpio, else reg &= ~gpio; - if (wm->id == WM9712_ID2) + if (wm->id == WM9712_ID2 && wm->variant != WM97xx_WM1613) wm97xx_reg_write(wm, AC97_GPIO_STATUS, reg << 1); else wm97xx_reg_write(wm, AC97_GPIO_STATUS, reg); @@ -307,7 +307,7 @@ static void wm97xx_pen_irq_worker(struct work_struct *work) WM97XX_GPIO_13); } - if (wm->id == WM9712_ID2) + if (wm->id == WM9712_ID2 && wm->variant != WM97xx_WM1613) wm97xx_reg_write(wm, AC97_GPIO_STATUS, (status & ~WM97XX_GPIO_13) << 1); else @@ -582,6 +582,8 @@ static int wm97xx_probe(struct device *dev) wm->id = wm97xx_reg_read(wm, AC97_VENDOR_ID2); + wm->variant = WM97xx_GENERIC; + dev_info(wm->dev, "detected a wm97%02x codec\n", wm->id & 0xff); switch (wm->id & 0xff) { diff --git a/include/linux/wm97xx.h b/include/linux/wm97xx.h index 6f69968eab24..0c9878123d5f 100644 --- a/include/linux/wm97xx.h +++ b/include/linux/wm97xx.h @@ -15,6 +15,12 @@ #include /* Input device layer */ #include +/* + * WM97xx variants + */ +#define WM97xx_GENERIC 0x0000 +#define WM97xx_WM1613 0x1613 + /* * WM97xx AC97 Touchscreen registers */ @@ -283,6 +289,7 @@ struct wm97xx { unsigned pen_is_down:1; /* Pen is down */ unsigned aux_waiting:1; /* aux measurement waiting */ unsigned pen_probably_down:1; /* used in polling mode */ + u16 variant; /* WM97xx chip variant */ u16 suspend_mode; /* PRP in suspend mode */ }; -- cgit v1.2.3 From d7aacaddcac3971e33cf52d7e610c06696cb347f Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Wed, 8 Jul 2009 13:21:31 +0200 Subject: Driver Core: Add platform device arch data V3 Allow architecture specific data in struct platform_device V3. With this patch struct pdev_archdata is added to struct platform_device, similar to struct dev_archdata in found in struct device. Useful for architecture code that needs to keep extra data associated with each platform device. Struct pdev_archdata is different from dev.platform_data, the convention is that dev.platform_data points to driver-specific data. It may or may not be required by the driver. The format of this depends on driver but is the same across architectures. The structure pdev_archdata is a place for architecture specific data. This data is handled by architecture specific code (for example runtime PM), and since it is architecture specific it should _never_ be touched by device driver code. Exactly like struct dev_archdata but for platform devices. [rjw: This change is for power management mostly and that's why it goes through the suspend tree.] Signed-off-by: Magnus Damm Acked-by: Kevin Hilman Acked-by: Greg Kroah-Hartman Signed-off-by: Rafael J. Wysocki --- arch/arm/include/asm/device.h | 3 +++ arch/ia64/include/asm/device.h | 3 +++ arch/microblaze/include/asm/device.h | 3 +++ arch/powerpc/include/asm/device.h | 3 +++ arch/sparc/include/asm/device.h | 3 +++ arch/x86/include/asm/device.h | 3 +++ include/asm-generic/device.h | 3 +++ include/linux/platform_device.h | 3 +++ 8 files changed, 24 insertions(+) (limited to 'include/linux') diff --git a/arch/arm/include/asm/device.h b/arch/arm/include/asm/device.h index c61642b40603..9f390ce335cb 100644 --- a/arch/arm/include/asm/device.h +++ b/arch/arm/include/asm/device.h @@ -12,4 +12,7 @@ struct dev_archdata { #endif }; +struct pdev_archdata { +}; + #endif diff --git a/arch/ia64/include/asm/device.h b/arch/ia64/include/asm/device.h index 41ab85d66f33..d66d446b127c 100644 --- a/arch/ia64/include/asm/device.h +++ b/arch/ia64/include/asm/device.h @@ -15,4 +15,7 @@ struct dev_archdata { #endif }; +struct pdev_archdata { +}; + #endif /* _ASM_IA64_DEVICE_H */ diff --git a/arch/microblaze/include/asm/device.h b/arch/microblaze/include/asm/device.h index c042830793ed..30286db27c1c 100644 --- a/arch/microblaze/include/asm/device.h +++ b/arch/microblaze/include/asm/device.h @@ -16,6 +16,9 @@ struct dev_archdata { struct device_node *of_node; }; +struct pdev_archdata { +}; + #endif /* _ASM_MICROBLAZE_DEVICE_H */ diff --git a/arch/powerpc/include/asm/device.h b/arch/powerpc/include/asm/device.h index 7d2277cef09a..e3e06e0f7fc0 100644 --- a/arch/powerpc/include/asm/device.h +++ b/arch/powerpc/include/asm/device.h @@ -30,4 +30,7 @@ dev_archdata_get_node(const struct dev_archdata *ad) return ad->of_node; } +struct pdev_archdata { +}; + #endif /* _ASM_POWERPC_DEVICE_H */ diff --git a/arch/sparc/include/asm/device.h b/arch/sparc/include/asm/device.h index 3702e087df2c..f3b85b6b0b76 100644 --- a/arch/sparc/include/asm/device.h +++ b/arch/sparc/include/asm/device.h @@ -32,4 +32,7 @@ dev_archdata_get_node(const struct dev_archdata *ad) return ad->prom_node; } +struct pdev_archdata { +}; + #endif /* _ASM_SPARC_DEVICE_H */ diff --git a/arch/x86/include/asm/device.h b/arch/x86/include/asm/device.h index 4994a20acbcb..cee34e9ca45b 100644 --- a/arch/x86/include/asm/device.h +++ b/arch/x86/include/asm/device.h @@ -13,4 +13,7 @@ struct dma_map_ops *dma_ops; #endif }; +struct pdev_archdata { +}; + #endif /* _ASM_X86_DEVICE_H */ diff --git a/include/asm-generic/device.h b/include/asm-generic/device.h index c17c9600f220..d7c76bba640d 100644 --- a/include/asm-generic/device.h +++ b/include/asm-generic/device.h @@ -9,4 +9,7 @@ struct dev_archdata { }; +struct pdev_archdata { +}; + #endif /* _ASM_GENERIC_DEVICE_H */ diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index 8dc5123b6305..672a69849735 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -22,6 +22,9 @@ struct platform_device { struct resource * resource; struct platform_device_id *id_entry; + + /* arch specific additions */ + struct pdev_archdata archdata; }; #define platform_get_device_id(pdev) ((pdev)->id_entry) -- cgit v1.2.3 From 511647ff58fd0f1c1f415d2c757d841650edac91 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Wed, 8 Jul 2009 13:23:07 +0200 Subject: PM: Remove platform device suspend_late()/resume_early() V2 This is V2 of the platform driver power management late/early callback removal patch. The callbacks ->suspend_late() and ->resume_early() are removed since all in-tree users now have been migrated to dev_pm_ops. Signed-off-by: Magnus Damm Acked-by: Greg Kroah-Hartman Acked-by: Pavel Machek Signed-off-by: Rafael J. Wysocki --- drivers/base/platform.c | 36 ------------------------------------ include/linux/platform_device.h | 2 -- 2 files changed, 38 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/platform.c b/drivers/base/platform.c index 81cb01bfc356..455e55971d0e 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -628,30 +628,6 @@ static int platform_legacy_suspend(struct device *dev, pm_message_t mesg) return ret; } -static int platform_legacy_suspend_late(struct device *dev, pm_message_t mesg) -{ - struct platform_driver *pdrv = to_platform_driver(dev->driver); - struct platform_device *pdev = to_platform_device(dev); - int ret = 0; - - if (dev->driver && pdrv->suspend_late) - ret = pdrv->suspend_late(pdev, mesg); - - return ret; -} - -static int platform_legacy_resume_early(struct device *dev) -{ - struct platform_driver *pdrv = to_platform_driver(dev->driver); - struct platform_device *pdev = to_platform_device(dev); - int ret = 0; - - if (dev->driver && pdrv->resume_early) - ret = pdrv->resume_early(pdev); - - return ret; -} - static int platform_legacy_resume(struct device *dev) { struct platform_driver *pdrv = to_platform_driver(dev->driver); @@ -714,8 +690,6 @@ static int platform_pm_suspend_noirq(struct device *dev) if (drv->pm) { if (drv->pm->suspend_noirq) ret = drv->pm->suspend_noirq(dev); - } else { - ret = platform_legacy_suspend_late(dev, PMSG_SUSPEND); } return ret; @@ -750,8 +724,6 @@ static int platform_pm_resume_noirq(struct device *dev) if (drv->pm) { if (drv->pm->resume_noirq) ret = drv->pm->resume_noirq(dev); - } else { - ret = platform_legacy_resume_early(dev); } return ret; @@ -797,8 +769,6 @@ static int platform_pm_freeze_noirq(struct device *dev) if (drv->pm) { if (drv->pm->freeze_noirq) ret = drv->pm->freeze_noirq(dev); - } else { - ret = platform_legacy_suspend_late(dev, PMSG_FREEZE); } return ret; @@ -833,8 +803,6 @@ static int platform_pm_thaw_noirq(struct device *dev) if (drv->pm) { if (drv->pm->thaw_noirq) ret = drv->pm->thaw_noirq(dev); - } else { - ret = platform_legacy_resume_early(dev); } return ret; @@ -869,8 +837,6 @@ static int platform_pm_poweroff_noirq(struct device *dev) if (drv->pm) { if (drv->pm->poweroff_noirq) ret = drv->pm->poweroff_noirq(dev); - } else { - ret = platform_legacy_suspend_late(dev, PMSG_HIBERNATE); } return ret; @@ -905,8 +871,6 @@ static int platform_pm_restore_noirq(struct device *dev) if (drv->pm) { if (drv->pm->restore_noirq) ret = drv->pm->restore_noirq(dev); - } else { - ret = platform_legacy_resume_early(dev); } return ret; diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index 672a69849735..3c6675c2444b 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -60,8 +60,6 @@ struct platform_driver { int (*remove)(struct platform_device *); void (*shutdown)(struct platform_device *); int (*suspend)(struct platform_device *, pm_message_t state); - int (*suspend_late)(struct platform_device *, pm_message_t state); - int (*resume_early)(struct platform_device *); int (*resume)(struct platform_device *); struct device_driver driver; struct platform_device_id *id_table; -- cgit v1.2.3 From fbd90375d7531927d312766b548376d909811b4d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 22 Jul 2009 13:40:14 +0200 Subject: hrtimer: Remove cb_entry from struct hrtimer It's unused, remove it. Signed-off-by: Peter Zijlstra Signed-off-by: Thomas Gleixner LKML-Reference: --- include/linux/hrtimer.h | 2 -- kernel/hrtimer.c | 1 - 2 files changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 54648e625efd..40e7d54fc424 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -91,7 +91,6 @@ enum hrtimer_restart { * @function: timer expiry callback function * @base: pointer to the timer base (per cpu and per clock) * @state: state information (See bit values above) - * @cb_entry: list head to enqueue an expired timer into the callback list * @start_site: timer statistics field to store the site where the timer * was started * @start_comm: timer statistics field to store the name of the process which @@ -108,7 +107,6 @@ struct hrtimer { enum hrtimer_restart (*function)(struct hrtimer *); struct hrtimer_clock_base *base; unsigned long state; - struct list_head cb_entry; #ifdef CONFIG_TIMER_STATS int start_pid; void *start_site; diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 43d151f185b6..052a0f53e4eb 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -1092,7 +1092,6 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id, clock_id = CLOCK_MONOTONIC; timer->base = &cpu_base->clock_base[clock_id]; - INIT_LIST_HEAD(&timer->cb_entry); hrtimer_init_timer_hres(timer); #ifdef CONFIG_TIMER_STATS -- cgit v1.2.3 From cf4f1e76c49dacfde0680b170b9a9b6a42f296bb Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Wed, 22 Jul 2009 14:32:03 +0000 Subject: usb: move r8a66597 register defines Move r8a66597 hardware register definitions from the host controller header file to the platform data header file. With this change in place we can easily share register definitions between the host controller driver and a future gadget driver. Signed-off-by: Magnus Damm Signed-off-by: Paul Mundt --- drivers/usb/host/r8a66597.h | 366 ------------------------------------------ include/linux/usb/r8a66597.h | 372 ++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 370 insertions(+), 368 deletions(-) (limited to 'include/linux') diff --git a/drivers/usb/host/r8a66597.h b/drivers/usb/host/r8a66597.h index eecbd917bc81..228e3fb23854 100644 --- a/drivers/usb/host/r8a66597.h +++ b/drivers/usb/host/r8a66597.h @@ -32,372 +32,6 @@ #include -#define SYSCFG0 0x00 -#define SYSCFG1 0x02 -#define SYSSTS0 0x04 -#define SYSSTS1 0x06 -#define DVSTCTR0 0x08 -#define DVSTCTR1 0x0A -#define TESTMODE 0x0C -#define PINCFG 0x0E -#define DMA0CFG 0x10 -#define DMA1CFG 0x12 -#define CFIFO 0x14 -#define D0FIFO 0x18 -#define D1FIFO 0x1C -#define CFIFOSEL 0x20 -#define CFIFOCTR 0x22 -#define CFIFOSIE 0x24 -#define D0FIFOSEL 0x28 -#define D0FIFOCTR 0x2A -#define D1FIFOSEL 0x2C -#define D1FIFOCTR 0x2E -#define INTENB0 0x30 -#define INTENB1 0x32 -#define INTENB2 0x34 -#define BRDYENB 0x36 -#define NRDYENB 0x38 -#define BEMPENB 0x3A -#define SOFCFG 0x3C -#define INTSTS0 0x40 -#define INTSTS1 0x42 -#define INTSTS2 0x44 -#define BRDYSTS 0x46 -#define NRDYSTS 0x48 -#define BEMPSTS 0x4A -#define FRMNUM 0x4C -#define UFRMNUM 0x4E -#define USBADDR 0x50 -#define USBREQ 0x54 -#define USBVAL 0x56 -#define USBINDX 0x58 -#define USBLENG 0x5A -#define DCPCFG 0x5C -#define DCPMAXP 0x5E -#define DCPCTR 0x60 -#define PIPESEL 0x64 -#define PIPECFG 0x68 -#define PIPEBUF 0x6A -#define PIPEMAXP 0x6C -#define PIPEPERI 0x6E -#define PIPE1CTR 0x70 -#define PIPE2CTR 0x72 -#define PIPE3CTR 0x74 -#define PIPE4CTR 0x76 -#define PIPE5CTR 0x78 -#define PIPE6CTR 0x7A -#define PIPE7CTR 0x7C -#define PIPE8CTR 0x7E -#define PIPE9CTR 0x80 -#define PIPE1TRE 0x90 -#define PIPE1TRN 0x92 -#define PIPE2TRE 0x94 -#define PIPE2TRN 0x96 -#define PIPE3TRE 0x98 -#define PIPE3TRN 0x9A -#define PIPE4TRE 0x9C -#define PIPE4TRN 0x9E -#define PIPE5TRE 0xA0 -#define PIPE5TRN 0xA2 -#define DEVADD0 0xD0 -#define DEVADD1 0xD2 -#define DEVADD2 0xD4 -#define DEVADD3 0xD6 -#define DEVADD4 0xD8 -#define DEVADD5 0xDA -#define DEVADD6 0xDC -#define DEVADD7 0xDE -#define DEVADD8 0xE0 -#define DEVADD9 0xE2 -#define DEVADDA 0xE4 - -/* System Configuration Control Register */ -#define XTAL 0xC000 /* b15-14: Crystal selection */ -#define XTAL48 0x8000 /* 48MHz */ -#define XTAL24 0x4000 /* 24MHz */ -#define XTAL12 0x0000 /* 12MHz */ -#define XCKE 0x2000 /* b13: External clock enable */ -#define PLLC 0x0800 /* b11: PLL control */ -#define SCKE 0x0400 /* b10: USB clock enable */ -#define PCSDIS 0x0200 /* b9: not CS wakeup */ -#define LPSME 0x0100 /* b8: Low power sleep mode */ -#define HSE 0x0080 /* b7: Hi-speed enable */ -#define DCFM 0x0040 /* b6: Controller function select */ -#define DRPD 0x0020 /* b5: D+/- pull down control */ -#define DPRPU 0x0010 /* b4: D+ pull up control */ -#define USBE 0x0001 /* b0: USB module operation enable */ - -/* System Configuration Status Register */ -#define OVCBIT 0x8000 /* b15-14: Over-current bit */ -#define OVCMON 0xC000 /* b15-14: Over-current monitor */ -#define SOFEA 0x0020 /* b5: SOF monitor */ -#define IDMON 0x0004 /* b3: ID-pin monitor */ -#define LNST 0x0003 /* b1-0: D+, D- line status */ -#define SE1 0x0003 /* SE1 */ -#define FS_KSTS 0x0002 /* Full-Speed K State */ -#define FS_JSTS 0x0001 /* Full-Speed J State */ -#define LS_JSTS 0x0002 /* Low-Speed J State */ -#define LS_KSTS 0x0001 /* Low-Speed K State */ -#define SE0 0x0000 /* SE0 */ - -/* Device State Control Register */ -#define EXTLP0 0x0400 /* b10: External port */ -#define VBOUT 0x0200 /* b9: VBUS output */ -#define WKUP 0x0100 /* b8: Remote wakeup */ -#define RWUPE 0x0080 /* b7: Remote wakeup sense */ -#define USBRST 0x0040 /* b6: USB reset enable */ -#define RESUME 0x0020 /* b5: Resume enable */ -#define UACT 0x0010 /* b4: USB bus enable */ -#define RHST 0x0007 /* b1-0: Reset handshake status */ -#define HSPROC 0x0004 /* HS handshake is processing */ -#define HSMODE 0x0003 /* Hi-Speed mode */ -#define FSMODE 0x0002 /* Full-Speed mode */ -#define LSMODE 0x0001 /* Low-Speed mode */ -#define UNDECID 0x0000 /* Undecided */ - -/* Test Mode Register */ -#define UTST 0x000F /* b3-0: Test select */ -#define H_TST_PACKET 0x000C /* HOST TEST Packet */ -#define H_TST_SE0_NAK 0x000B /* HOST TEST SE0 NAK */ -#define H_TST_K 0x000A /* HOST TEST K */ -#define H_TST_J 0x0009 /* HOST TEST J */ -#define H_TST_NORMAL 0x0000 /* HOST Normal Mode */ -#define P_TST_PACKET 0x0004 /* PERI TEST Packet */ -#define P_TST_SE0_NAK 0x0003 /* PERI TEST SE0 NAK */ -#define P_TST_K 0x0002 /* PERI TEST K */ -#define P_TST_J 0x0001 /* PERI TEST J */ -#define P_TST_NORMAL 0x0000 /* PERI Normal Mode */ - -/* Data Pin Configuration Register */ -#define LDRV 0x8000 /* b15: Drive Current Adjust */ -#define VIF1 0x0000 /* VIF = 1.8V */ -#define VIF3 0x8000 /* VIF = 3.3V */ -#define INTA 0x0001 /* b1: USB INT-pin active */ - -/* DMAx Pin Configuration Register */ -#define DREQA 0x4000 /* b14: Dreq active select */ -#define BURST 0x2000 /* b13: Burst mode */ -#define DACKA 0x0400 /* b10: Dack active select */ -#define DFORM 0x0380 /* b9-7: DMA mode select */ -#define CPU_ADR_RD_WR 0x0000 /* Address + RD/WR mode (CPU bus) */ -#define CPU_DACK_RD_WR 0x0100 /* DACK + RD/WR mode (CPU bus) */ -#define CPU_DACK_ONLY 0x0180 /* DACK only mode (CPU bus) */ -#define SPLIT_DACK_ONLY 0x0200 /* DACK only mode (SPLIT bus) */ -#define DENDA 0x0040 /* b6: Dend active select */ -#define PKTM 0x0020 /* b5: Packet mode */ -#define DENDE 0x0010 /* b4: Dend enable */ -#define OBUS 0x0004 /* b2: OUTbus mode */ - -/* CFIFO/DxFIFO Port Select Register */ -#define RCNT 0x8000 /* b15: Read count mode */ -#define REW 0x4000 /* b14: Buffer rewind */ -#define DCLRM 0x2000 /* b13: DMA buffer clear mode */ -#define DREQE 0x1000 /* b12: DREQ output enable */ -#define MBW_8 0x0000 /* 8bit */ -#define MBW_16 0x0400 /* 16bit */ -#define MBW_32 0x0800 /* 32bit */ -#define BIGEND 0x0100 /* b8: Big endian mode */ -#define BYTE_LITTLE 0x0000 /* little dendian */ -#define BYTE_BIG 0x0100 /* big endifan */ -#define ISEL 0x0020 /* b5: DCP FIFO port direction select */ -#define CURPIPE 0x000F /* b2-0: PIPE select */ - -/* CFIFO/DxFIFO Port Control Register */ -#define BVAL 0x8000 /* b15: Buffer valid flag */ -#define BCLR 0x4000 /* b14: Buffer clear */ -#define FRDY 0x2000 /* b13: FIFO ready */ -#define DTLN 0x0FFF /* b11-0: FIFO received data length */ - -/* Interrupt Enable Register 0 */ -#define VBSE 0x8000 /* b15: VBUS interrupt */ -#define RSME 0x4000 /* b14: Resume interrupt */ -#define SOFE 0x2000 /* b13: Frame update interrupt */ -#define DVSE 0x1000 /* b12: Device state transition interrupt */ -#define CTRE 0x0800 /* b11: Control transfer stage transition interrupt */ -#define BEMPE 0x0400 /* b10: Buffer empty interrupt */ -#define NRDYE 0x0200 /* b9: Buffer not ready interrupt */ -#define BRDYE 0x0100 /* b8: Buffer ready interrupt */ - -/* Interrupt Enable Register 1 */ -#define OVRCRE 0x8000 /* b15: Over-current interrupt */ -#define BCHGE 0x4000 /* b14: USB us chenge interrupt */ -#define DTCHE 0x1000 /* b12: Detach sense interrupt */ -#define ATTCHE 0x0800 /* b11: Attach sense interrupt */ -#define EOFERRE 0x0040 /* b6: EOF error interrupt */ -#define SIGNE 0x0020 /* b5: SETUP IGNORE interrupt */ -#define SACKE 0x0010 /* b4: SETUP ACK interrupt */ - -/* BRDY Interrupt Enable/Status Register */ -#define BRDY9 0x0200 /* b9: PIPE9 */ -#define BRDY8 0x0100 /* b8: PIPE8 */ -#define BRDY7 0x0080 /* b7: PIPE7 */ -#define BRDY6 0x0040 /* b6: PIPE6 */ -#define BRDY5 0x0020 /* b5: PIPE5 */ -#define BRDY4 0x0010 /* b4: PIPE4 */ -#define BRDY3 0x0008 /* b3: PIPE3 */ -#define BRDY2 0x0004 /* b2: PIPE2 */ -#define BRDY1 0x0002 /* b1: PIPE1 */ -#define BRDY0 0x0001 /* b1: PIPE0 */ - -/* NRDY Interrupt Enable/Status Register */ -#define NRDY9 0x0200 /* b9: PIPE9 */ -#define NRDY8 0x0100 /* b8: PIPE8 */ -#define NRDY7 0x0080 /* b7: PIPE7 */ -#define NRDY6 0x0040 /* b6: PIPE6 */ -#define NRDY5 0x0020 /* b5: PIPE5 */ -#define NRDY4 0x0010 /* b4: PIPE4 */ -#define NRDY3 0x0008 /* b3: PIPE3 */ -#define NRDY2 0x0004 /* b2: PIPE2 */ -#define NRDY1 0x0002 /* b1: PIPE1 */ -#define NRDY0 0x0001 /* b1: PIPE0 */ - -/* BEMP Interrupt Enable/Status Register */ -#define BEMP9 0x0200 /* b9: PIPE9 */ -#define BEMP8 0x0100 /* b8: PIPE8 */ -#define BEMP7 0x0080 /* b7: PIPE7 */ -#define BEMP6 0x0040 /* b6: PIPE6 */ -#define BEMP5 0x0020 /* b5: PIPE5 */ -#define BEMP4 0x0010 /* b4: PIPE4 */ -#define BEMP3 0x0008 /* b3: PIPE3 */ -#define BEMP2 0x0004 /* b2: PIPE2 */ -#define BEMP1 0x0002 /* b1: PIPE1 */ -#define BEMP0 0x0001 /* b0: PIPE0 */ - -/* SOF Pin Configuration Register */ -#define TRNENSEL 0x0100 /* b8: Select transaction enable period */ -#define BRDYM 0x0040 /* b6: BRDY clear timing */ -#define INTL 0x0020 /* b5: Interrupt sense select */ -#define EDGESTS 0x0010 /* b4: */ -#define SOFMODE 0x000C /* b3-2: SOF pin select */ -#define SOF_125US 0x0008 /* SOF OUT 125us Frame Signal */ -#define SOF_1MS 0x0004 /* SOF OUT 1ms Frame Signal */ -#define SOF_DISABLE 0x0000 /* SOF OUT Disable */ - -/* Interrupt Status Register 0 */ -#define VBINT 0x8000 /* b15: VBUS interrupt */ -#define RESM 0x4000 /* b14: Resume interrupt */ -#define SOFR 0x2000 /* b13: SOF frame update interrupt */ -#define DVST 0x1000 /* b12: Device state transition interrupt */ -#define CTRT 0x0800 /* b11: Control transfer stage transition interrupt */ -#define BEMP 0x0400 /* b10: Buffer empty interrupt */ -#define NRDY 0x0200 /* b9: Buffer not ready interrupt */ -#define BRDY 0x0100 /* b8: Buffer ready interrupt */ -#define VBSTS 0x0080 /* b7: VBUS input port */ -#define DVSQ 0x0070 /* b6-4: Device state */ -#define DS_SPD_CNFG 0x0070 /* Suspend Configured */ -#define DS_SPD_ADDR 0x0060 /* Suspend Address */ -#define DS_SPD_DFLT 0x0050 /* Suspend Default */ -#define DS_SPD_POWR 0x0040 /* Suspend Powered */ -#define DS_SUSP 0x0040 /* Suspend */ -#define DS_CNFG 0x0030 /* Configured */ -#define DS_ADDS 0x0020 /* Address */ -#define DS_DFLT 0x0010 /* Default */ -#define DS_POWR 0x0000 /* Powered */ -#define DVSQS 0x0030 /* b5-4: Device state */ -#define VALID 0x0008 /* b3: Setup packet detected flag */ -#define CTSQ 0x0007 /* b2-0: Control transfer stage */ -#define CS_SQER 0x0006 /* Sequence error */ -#define CS_WRND 0x0005 /* Control write nodata status stage */ -#define CS_WRSS 0x0004 /* Control write status stage */ -#define CS_WRDS 0x0003 /* Control write data stage */ -#define CS_RDSS 0x0002 /* Control read status stage */ -#define CS_RDDS 0x0001 /* Control read data stage */ -#define CS_IDST 0x0000 /* Idle or setup stage */ - -/* Interrupt Status Register 1 */ -#define OVRCR 0x8000 /* b15: Over-current interrupt */ -#define BCHG 0x4000 /* b14: USB bus chenge interrupt */ -#define DTCH 0x1000 /* b12: Detach sense interrupt */ -#define ATTCH 0x0800 /* b11: Attach sense interrupt */ -#define EOFERR 0x0040 /* b6: EOF-error interrupt */ -#define SIGN 0x0020 /* b5: Setup ignore interrupt */ -#define SACK 0x0010 /* b4: Setup acknowledge interrupt */ - -/* Frame Number Register */ -#define OVRN 0x8000 /* b15: Overrun error */ -#define CRCE 0x4000 /* b14: Received data error */ -#define FRNM 0x07FF /* b10-0: Frame number */ - -/* Micro Frame Number Register */ -#define UFRNM 0x0007 /* b2-0: Micro frame number */ - -/* Default Control Pipe Maxpacket Size Register */ -/* Pipe Maxpacket Size Register */ -#define DEVSEL 0xF000 /* b15-14: Device address select */ -#define MAXP 0x007F /* b6-0: Maxpacket size of default control pipe */ - -/* Default Control Pipe Control Register */ -#define BSTS 0x8000 /* b15: Buffer status */ -#define SUREQ 0x4000 /* b14: Send USB request */ -#define CSCLR 0x2000 /* b13: complete-split status clear */ -#define CSSTS 0x1000 /* b12: complete-split status */ -#define SUREQCLR 0x0800 /* b11: stop setup request */ -#define SQCLR 0x0100 /* b8: Sequence toggle bit clear */ -#define SQSET 0x0080 /* b7: Sequence toggle bit set */ -#define SQMON 0x0040 /* b6: Sequence toggle bit monitor */ -#define PBUSY 0x0020 /* b5: pipe busy */ -#define PINGE 0x0010 /* b4: ping enable */ -#define CCPL 0x0004 /* b2: Enable control transfer complete */ -#define PID 0x0003 /* b1-0: Response PID */ -#define PID_STALL11 0x0003 /* STALL */ -#define PID_STALL 0x0002 /* STALL */ -#define PID_BUF 0x0001 /* BUF */ -#define PID_NAK 0x0000 /* NAK */ - -/* Pipe Window Select Register */ -#define PIPENM 0x0007 /* b2-0: Pipe select */ - -/* Pipe Configuration Register */ -#define R8A66597_TYP 0xC000 /* b15-14: Transfer type */ -#define R8A66597_ISO 0xC000 /* Isochronous */ -#define R8A66597_INT 0x8000 /* Interrupt */ -#define R8A66597_BULK 0x4000 /* Bulk */ -#define R8A66597_BFRE 0x0400 /* b10: Buffer ready interrupt mode select */ -#define R8A66597_DBLB 0x0200 /* b9: Double buffer mode select */ -#define R8A66597_CNTMD 0x0100 /* b8: Continuous transfer mode select */ -#define R8A66597_SHTNAK 0x0080 /* b7: Transfer end NAK */ -#define R8A66597_DIR 0x0010 /* b4: Transfer direction select */ -#define R8A66597_EPNUM 0x000F /* b3-0: Eendpoint number select */ - -/* Pipe Buffer Configuration Register */ -#define BUFSIZE 0x7C00 /* b14-10: Pipe buffer size */ -#define BUFNMB 0x007F /* b6-0: Pipe buffer number */ -#define PIPE0BUF 256 -#define PIPExBUF 64 - -/* Pipe Maxpacket Size Register */ -#define MXPS 0x07FF /* b10-0: Maxpacket size */ - -/* Pipe Cycle Configuration Register */ -#define IFIS 0x1000 /* b12: Isochronous in-buffer flush mode select */ -#define IITV 0x0007 /* b2-0: Isochronous interval */ - -/* Pipex Control Register */ -#define BSTS 0x8000 /* b15: Buffer status */ -#define INBUFM 0x4000 /* b14: IN buffer monitor (Only for PIPE1 to 5) */ -#define CSCLR 0x2000 /* b13: complete-split status clear */ -#define CSSTS 0x1000 /* b12: complete-split status */ -#define ATREPM 0x0400 /* b10: Auto repeat mode */ -#define ACLRM 0x0200 /* b9: Out buffer auto clear mode */ -#define SQCLR 0x0100 /* b8: Sequence toggle bit clear */ -#define SQSET 0x0080 /* b7: Sequence toggle bit set */ -#define SQMON 0x0040 /* b6: Sequence toggle bit monitor */ -#define PBUSY 0x0020 /* b5: pipe busy */ -#define PID 0x0003 /* b1-0: Response PID */ - -/* PIPExTRE */ -#define TRENB 0x0200 /* b9: Transaction counter enable */ -#define TRCLR 0x0100 /* b8: Transaction counter clear */ - -/* PIPExTRN */ -#define TRNCNT 0xFFFF /* b15-0: Transaction counter */ - -/* DEVADDx */ -#define UPPHUB 0x7800 -#define HUBPORT 0x0700 -#define USBSPD 0x00C0 -#define RTPORT 0x0001 - #define R8A66597_MAX_NUM_PIPE 10 #define R8A66597_BUF_BSIZE 8 #define R8A66597_MAX_DEVICE 10 diff --git a/include/linux/usb/r8a66597.h b/include/linux/usb/r8a66597.h index 460ee3f6a2c6..26d216734057 100644 --- a/include/linux/usb/r8a66597.h +++ b/include/linux/usb/r8a66597.h @@ -28,7 +28,7 @@ #define R8A66597_PLATDATA_XTAL_48MHZ 0x03 struct r8a66597_platdata { - /* This ops can controll port power instead of DVSTCTR register. */ + /* This callback can control port power instead of DVSTCTR register. */ void (*port_power)(int port, int power); /* set one = on chip controller, set zero = external controller */ @@ -43,5 +43,373 @@ struct r8a66597_platdata { /* set one = big endian, set zero = little endian */ unsigned endian:1; }; -#endif + +/* Register definitions */ +#define SYSCFG0 0x00 +#define SYSCFG1 0x02 +#define SYSSTS0 0x04 +#define SYSSTS1 0x06 +#define DVSTCTR0 0x08 +#define DVSTCTR1 0x0A +#define TESTMODE 0x0C +#define PINCFG 0x0E +#define DMA0CFG 0x10 +#define DMA1CFG 0x12 +#define CFIFO 0x14 +#define D0FIFO 0x18 +#define D1FIFO 0x1C +#define CFIFOSEL 0x20 +#define CFIFOCTR 0x22 +#define CFIFOSIE 0x24 +#define D0FIFOSEL 0x28 +#define D0FIFOCTR 0x2A +#define D1FIFOSEL 0x2C +#define D1FIFOCTR 0x2E +#define INTENB0 0x30 +#define INTENB1 0x32 +#define INTENB2 0x34 +#define BRDYENB 0x36 +#define NRDYENB 0x38 +#define BEMPENB 0x3A +#define SOFCFG 0x3C +#define INTSTS0 0x40 +#define INTSTS1 0x42 +#define INTSTS2 0x44 +#define BRDYSTS 0x46 +#define NRDYSTS 0x48 +#define BEMPSTS 0x4A +#define FRMNUM 0x4C +#define UFRMNUM 0x4E +#define USBADDR 0x50 +#define USBREQ 0x54 +#define USBVAL 0x56 +#define USBINDX 0x58 +#define USBLENG 0x5A +#define DCPCFG 0x5C +#define DCPMAXP 0x5E +#define DCPCTR 0x60 +#define PIPESEL 0x64 +#define PIPECFG 0x68 +#define PIPEBUF 0x6A +#define PIPEMAXP 0x6C +#define PIPEPERI 0x6E +#define PIPE1CTR 0x70 +#define PIPE2CTR 0x72 +#define PIPE3CTR 0x74 +#define PIPE4CTR 0x76 +#define PIPE5CTR 0x78 +#define PIPE6CTR 0x7A +#define PIPE7CTR 0x7C +#define PIPE8CTR 0x7E +#define PIPE9CTR 0x80 +#define PIPE1TRE 0x90 +#define PIPE1TRN 0x92 +#define PIPE2TRE 0x94 +#define PIPE2TRN 0x96 +#define PIPE3TRE 0x98 +#define PIPE3TRN 0x9A +#define PIPE4TRE 0x9C +#define PIPE4TRN 0x9E +#define PIPE5TRE 0xA0 +#define PIPE5TRN 0xA2 +#define DEVADD0 0xD0 +#define DEVADD1 0xD2 +#define DEVADD2 0xD4 +#define DEVADD3 0xD6 +#define DEVADD4 0xD8 +#define DEVADD5 0xDA +#define DEVADD6 0xDC +#define DEVADD7 0xDE +#define DEVADD8 0xE0 +#define DEVADD9 0xE2 +#define DEVADDA 0xE4 + +/* System Configuration Control Register */ +#define XTAL 0xC000 /* b15-14: Crystal selection */ +#define XTAL48 0x8000 /* 48MHz */ +#define XTAL24 0x4000 /* 24MHz */ +#define XTAL12 0x0000 /* 12MHz */ +#define XCKE 0x2000 /* b13: External clock enable */ +#define PLLC 0x0800 /* b11: PLL control */ +#define SCKE 0x0400 /* b10: USB clock enable */ +#define PCSDIS 0x0200 /* b9: not CS wakeup */ +#define LPSME 0x0100 /* b8: Low power sleep mode */ +#define HSE 0x0080 /* b7: Hi-speed enable */ +#define DCFM 0x0040 /* b6: Controller function select */ +#define DRPD 0x0020 /* b5: D+/- pull down control */ +#define DPRPU 0x0010 /* b4: D+ pull up control */ +#define USBE 0x0001 /* b0: USB module operation enable */ + +/* System Configuration Status Register */ +#define OVCBIT 0x8000 /* b15-14: Over-current bit */ +#define OVCMON 0xC000 /* b15-14: Over-current monitor */ +#define SOFEA 0x0020 /* b5: SOF monitor */ +#define IDMON 0x0004 /* b3: ID-pin monitor */ +#define LNST 0x0003 /* b1-0: D+, D- line status */ +#define SE1 0x0003 /* SE1 */ +#define FS_KSTS 0x0002 /* Full-Speed K State */ +#define FS_JSTS 0x0001 /* Full-Speed J State */ +#define LS_JSTS 0x0002 /* Low-Speed J State */ +#define LS_KSTS 0x0001 /* Low-Speed K State */ +#define SE0 0x0000 /* SE0 */ + +/* Device State Control Register */ +#define EXTLP0 0x0400 /* b10: External port */ +#define VBOUT 0x0200 /* b9: VBUS output */ +#define WKUP 0x0100 /* b8: Remote wakeup */ +#define RWUPE 0x0080 /* b7: Remote wakeup sense */ +#define USBRST 0x0040 /* b6: USB reset enable */ +#define RESUME 0x0020 /* b5: Resume enable */ +#define UACT 0x0010 /* b4: USB bus enable */ +#define RHST 0x0007 /* b1-0: Reset handshake status */ +#define HSPROC 0x0004 /* HS handshake is processing */ +#define HSMODE 0x0003 /* Hi-Speed mode */ +#define FSMODE 0x0002 /* Full-Speed mode */ +#define LSMODE 0x0001 /* Low-Speed mode */ +#define UNDECID 0x0000 /* Undecided */ + +/* Test Mode Register */ +#define UTST 0x000F /* b3-0: Test select */ +#define H_TST_PACKET 0x000C /* HOST TEST Packet */ +#define H_TST_SE0_NAK 0x000B /* HOST TEST SE0 NAK */ +#define H_TST_K 0x000A /* HOST TEST K */ +#define H_TST_J 0x0009 /* HOST TEST J */ +#define H_TST_NORMAL 0x0000 /* HOST Normal Mode */ +#define P_TST_PACKET 0x0004 /* PERI TEST Packet */ +#define P_TST_SE0_NAK 0x0003 /* PERI TEST SE0 NAK */ +#define P_TST_K 0x0002 /* PERI TEST K */ +#define P_TST_J 0x0001 /* PERI TEST J */ +#define P_TST_NORMAL 0x0000 /* PERI Normal Mode */ + +/* Data Pin Configuration Register */ +#define LDRV 0x8000 /* b15: Drive Current Adjust */ +#define VIF1 0x0000 /* VIF = 1.8V */ +#define VIF3 0x8000 /* VIF = 3.3V */ +#define INTA 0x0001 /* b1: USB INT-pin active */ + +/* DMAx Pin Configuration Register */ +#define DREQA 0x4000 /* b14: Dreq active select */ +#define BURST 0x2000 /* b13: Burst mode */ +#define DACKA 0x0400 /* b10: Dack active select */ +#define DFORM 0x0380 /* b9-7: DMA mode select */ +#define CPU_ADR_RD_WR 0x0000 /* Address + RD/WR mode (CPU bus) */ +#define CPU_DACK_RD_WR 0x0100 /* DACK + RD/WR mode (CPU bus) */ +#define CPU_DACK_ONLY 0x0180 /* DACK only mode (CPU bus) */ +#define SPLIT_DACK_ONLY 0x0200 /* DACK only mode (SPLIT bus) */ +#define DENDA 0x0040 /* b6: Dend active select */ +#define PKTM 0x0020 /* b5: Packet mode */ +#define DENDE 0x0010 /* b4: Dend enable */ +#define OBUS 0x0004 /* b2: OUTbus mode */ + +/* CFIFO/DxFIFO Port Select Register */ +#define RCNT 0x8000 /* b15: Read count mode */ +#define REW 0x4000 /* b14: Buffer rewind */ +#define DCLRM 0x2000 /* b13: DMA buffer clear mode */ +#define DREQE 0x1000 /* b12: DREQ output enable */ +#define MBW_8 0x0000 /* 8bit */ +#define MBW_16 0x0400 /* 16bit */ +#define MBW_32 0x0800 /* 32bit */ +#define BIGEND 0x0100 /* b8: Big endian mode */ +#define BYTE_LITTLE 0x0000 /* little dendian */ +#define BYTE_BIG 0x0100 /* big endifan */ +#define ISEL 0x0020 /* b5: DCP FIFO port direction select */ +#define CURPIPE 0x000F /* b2-0: PIPE select */ + +/* CFIFO/DxFIFO Port Control Register */ +#define BVAL 0x8000 /* b15: Buffer valid flag */ +#define BCLR 0x4000 /* b14: Buffer clear */ +#define FRDY 0x2000 /* b13: FIFO ready */ +#define DTLN 0x0FFF /* b11-0: FIFO received data length */ + +/* Interrupt Enable Register 0 */ +#define VBSE 0x8000 /* b15: VBUS interrupt */ +#define RSME 0x4000 /* b14: Resume interrupt */ +#define SOFE 0x2000 /* b13: Frame update interrupt */ +#define DVSE 0x1000 /* b12: Device state transition interrupt */ +#define CTRE 0x0800 /* b11: Control transfer stage transition interrupt */ +#define BEMPE 0x0400 /* b10: Buffer empty interrupt */ +#define NRDYE 0x0200 /* b9: Buffer not ready interrupt */ +#define BRDYE 0x0100 /* b8: Buffer ready interrupt */ + +/* Interrupt Enable Register 1 */ +#define OVRCRE 0x8000 /* b15: Over-current interrupt */ +#define BCHGE 0x4000 /* b14: USB us chenge interrupt */ +#define DTCHE 0x1000 /* b12: Detach sense interrupt */ +#define ATTCHE 0x0800 /* b11: Attach sense interrupt */ +#define EOFERRE 0x0040 /* b6: EOF error interrupt */ +#define SIGNE 0x0020 /* b5: SETUP IGNORE interrupt */ +#define SACKE 0x0010 /* b4: SETUP ACK interrupt */ + +/* BRDY Interrupt Enable/Status Register */ +#define BRDY9 0x0200 /* b9: PIPE9 */ +#define BRDY8 0x0100 /* b8: PIPE8 */ +#define BRDY7 0x0080 /* b7: PIPE7 */ +#define BRDY6 0x0040 /* b6: PIPE6 */ +#define BRDY5 0x0020 /* b5: PIPE5 */ +#define BRDY4 0x0010 /* b4: PIPE4 */ +#define BRDY3 0x0008 /* b3: PIPE3 */ +#define BRDY2 0x0004 /* b2: PIPE2 */ +#define BRDY1 0x0002 /* b1: PIPE1 */ +#define BRDY0 0x0001 /* b1: PIPE0 */ + +/* NRDY Interrupt Enable/Status Register */ +#define NRDY9 0x0200 /* b9: PIPE9 */ +#define NRDY8 0x0100 /* b8: PIPE8 */ +#define NRDY7 0x0080 /* b7: PIPE7 */ +#define NRDY6 0x0040 /* b6: PIPE6 */ +#define NRDY5 0x0020 /* b5: PIPE5 */ +#define NRDY4 0x0010 /* b4: PIPE4 */ +#define NRDY3 0x0008 /* b3: PIPE3 */ +#define NRDY2 0x0004 /* b2: PIPE2 */ +#define NRDY1 0x0002 /* b1: PIPE1 */ +#define NRDY0 0x0001 /* b1: PIPE0 */ + +/* BEMP Interrupt Enable/Status Register */ +#define BEMP9 0x0200 /* b9: PIPE9 */ +#define BEMP8 0x0100 /* b8: PIPE8 */ +#define BEMP7 0x0080 /* b7: PIPE7 */ +#define BEMP6 0x0040 /* b6: PIPE6 */ +#define BEMP5 0x0020 /* b5: PIPE5 */ +#define BEMP4 0x0010 /* b4: PIPE4 */ +#define BEMP3 0x0008 /* b3: PIPE3 */ +#define BEMP2 0x0004 /* b2: PIPE2 */ +#define BEMP1 0x0002 /* b1: PIPE1 */ +#define BEMP0 0x0001 /* b0: PIPE0 */ + +/* SOF Pin Configuration Register */ +#define TRNENSEL 0x0100 /* b8: Select transaction enable period */ +#define BRDYM 0x0040 /* b6: BRDY clear timing */ +#define INTL 0x0020 /* b5: Interrupt sense select */ +#define EDGESTS 0x0010 /* b4: */ +#define SOFMODE 0x000C /* b3-2: SOF pin select */ +#define SOF_125US 0x0008 /* SOF OUT 125us Frame Signal */ +#define SOF_1MS 0x0004 /* SOF OUT 1ms Frame Signal */ +#define SOF_DISABLE 0x0000 /* SOF OUT Disable */ + +/* Interrupt Status Register 0 */ +#define VBINT 0x8000 /* b15: VBUS interrupt */ +#define RESM 0x4000 /* b14: Resume interrupt */ +#define SOFR 0x2000 /* b13: SOF frame update interrupt */ +#define DVST 0x1000 /* b12: Device state transition interrupt */ +#define CTRT 0x0800 /* b11: Control transfer stage transition interrupt */ +#define BEMP 0x0400 /* b10: Buffer empty interrupt */ +#define NRDY 0x0200 /* b9: Buffer not ready interrupt */ +#define BRDY 0x0100 /* b8: Buffer ready interrupt */ +#define VBSTS 0x0080 /* b7: VBUS input port */ +#define DVSQ 0x0070 /* b6-4: Device state */ +#define DS_SPD_CNFG 0x0070 /* Suspend Configured */ +#define DS_SPD_ADDR 0x0060 /* Suspend Address */ +#define DS_SPD_DFLT 0x0050 /* Suspend Default */ +#define DS_SPD_POWR 0x0040 /* Suspend Powered */ +#define DS_SUSP 0x0040 /* Suspend */ +#define DS_CNFG 0x0030 /* Configured */ +#define DS_ADDS 0x0020 /* Address */ +#define DS_DFLT 0x0010 /* Default */ +#define DS_POWR 0x0000 /* Powered */ +#define DVSQS 0x0030 /* b5-4: Device state */ +#define VALID 0x0008 /* b3: Setup packet detected flag */ +#define CTSQ 0x0007 /* b2-0: Control transfer stage */ +#define CS_SQER 0x0006 /* Sequence error */ +#define CS_WRND 0x0005 /* Control write nodata status stage */ +#define CS_WRSS 0x0004 /* Control write status stage */ +#define CS_WRDS 0x0003 /* Control write data stage */ +#define CS_RDSS 0x0002 /* Control read status stage */ +#define CS_RDDS 0x0001 /* Control read data stage */ +#define CS_IDST 0x0000 /* Idle or setup stage */ + +/* Interrupt Status Register 1 */ +#define OVRCR 0x8000 /* b15: Over-current interrupt */ +#define BCHG 0x4000 /* b14: USB bus chenge interrupt */ +#define DTCH 0x1000 /* b12: Detach sense interrupt */ +#define ATTCH 0x0800 /* b11: Attach sense interrupt */ +#define EOFERR 0x0040 /* b6: EOF-error interrupt */ +#define SIGN 0x0020 /* b5: Setup ignore interrupt */ +#define SACK 0x0010 /* b4: Setup acknowledge interrupt */ + +/* Frame Number Register */ +#define OVRN 0x8000 /* b15: Overrun error */ +#define CRCE 0x4000 /* b14: Received data error */ +#define FRNM 0x07FF /* b10-0: Frame number */ + +/* Micro Frame Number Register */ +#define UFRNM 0x0007 /* b2-0: Micro frame number */ + +/* Default Control Pipe Maxpacket Size Register */ +/* Pipe Maxpacket Size Register */ +#define DEVSEL 0xF000 /* b15-14: Device address select */ +#define MAXP 0x007F /* b6-0: Maxpacket size of default control pipe */ + +/* Default Control Pipe Control Register */ +#define BSTS 0x8000 /* b15: Buffer status */ +#define SUREQ 0x4000 /* b14: Send USB request */ +#define CSCLR 0x2000 /* b13: complete-split status clear */ +#define CSSTS 0x1000 /* b12: complete-split status */ +#define SUREQCLR 0x0800 /* b11: stop setup request */ +#define SQCLR 0x0100 /* b8: Sequence toggle bit clear */ +#define SQSET 0x0080 /* b7: Sequence toggle bit set */ +#define SQMON 0x0040 /* b6: Sequence toggle bit monitor */ +#define PBUSY 0x0020 /* b5: pipe busy */ +#define PINGE 0x0010 /* b4: ping enable */ +#define CCPL 0x0004 /* b2: Enable control transfer complete */ +#define PID 0x0003 /* b1-0: Response PID */ +#define PID_STALL11 0x0003 /* STALL */ +#define PID_STALL 0x0002 /* STALL */ +#define PID_BUF 0x0001 /* BUF */ +#define PID_NAK 0x0000 /* NAK */ + +/* Pipe Window Select Register */ +#define PIPENM 0x0007 /* b2-0: Pipe select */ + +/* Pipe Configuration Register */ +#define R8A66597_TYP 0xC000 /* b15-14: Transfer type */ +#define R8A66597_ISO 0xC000 /* Isochronous */ +#define R8A66597_INT 0x8000 /* Interrupt */ +#define R8A66597_BULK 0x4000 /* Bulk */ +#define R8A66597_BFRE 0x0400 /* b10: Buffer ready interrupt mode select */ +#define R8A66597_DBLB 0x0200 /* b9: Double buffer mode select */ +#define R8A66597_CNTMD 0x0100 /* b8: Continuous transfer mode select */ +#define R8A66597_SHTNAK 0x0080 /* b7: Transfer end NAK */ +#define R8A66597_DIR 0x0010 /* b4: Transfer direction select */ +#define R8A66597_EPNUM 0x000F /* b3-0: Eendpoint number select */ + +/* Pipe Buffer Configuration Register */ +#define BUFSIZE 0x7C00 /* b14-10: Pipe buffer size */ +#define BUFNMB 0x007F /* b6-0: Pipe buffer number */ +#define PIPE0BUF 256 +#define PIPExBUF 64 + +/* Pipe Maxpacket Size Register */ +#define MXPS 0x07FF /* b10-0: Maxpacket size */ + +/* Pipe Cycle Configuration Register */ +#define IFIS 0x1000 /* b12: Isochronous in-buffer flush mode select */ +#define IITV 0x0007 /* b2-0: Isochronous interval */ + +/* Pipex Control Register */ +#define BSTS 0x8000 /* b15: Buffer status */ +#define INBUFM 0x4000 /* b14: IN buffer monitor (Only for PIPE1 to 5) */ +#define CSCLR 0x2000 /* b13: complete-split status clear */ +#define CSSTS 0x1000 /* b12: complete-split status */ +#define ATREPM 0x0400 /* b10: Auto repeat mode */ +#define ACLRM 0x0200 /* b9: Out buffer auto clear mode */ +#define SQCLR 0x0100 /* b8: Sequence toggle bit clear */ +#define SQSET 0x0080 /* b7: Sequence toggle bit set */ +#define SQMON 0x0040 /* b6: Sequence toggle bit monitor */ +#define PBUSY 0x0020 /* b5: pipe busy */ +#define PID 0x0003 /* b1-0: Response PID */ + +/* PIPExTRE */ +#define TRENB 0x0200 /* b9: Transaction counter enable */ +#define TRCLR 0x0100 /* b8: Transaction counter clear */ + +/* PIPExTRN */ +#define TRNCNT 0xFFFF /* b15-0: Transaction counter */ + +/* DEVADDx */ +#define UPPHUB 0x7800 +#define HUBPORT 0x0700 +#define USBSPD 0x00C0 +#define RTPORT 0x0001 + +#endif /* __LINUX_USB_R8A66597_H */ -- cgit v1.2.3 From 2c59b0b70b9d5d61c726f179724660c4c2423f31 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Wed, 22 Jul 2009 14:41:35 +0000 Subject: usb: m66592-udc platform data on_chip support Convert the m66592-udc driver to use the on_chip flag from platform data to enable on chip behaviour instead of relying on CONFIG_SUPERH_BUILT_IN_M66592 ugliness. This makes the code cleaner and also allows us to support both external and internal m66592 with the same kernel. It also makes the Kconfig part more future proof since we with this patch can add support for new processors with on-chip m66592 without modifying the Kconfig. The patch adds a m66592 header file for platform data and ties in platform data to the existing m66592 devices. Signed-off-by: Magnus Damm Signed-off-by: Paul Mundt --- arch/sh/boards/mach-highlander/setup.c | 7 + arch/sh/boards/mach-x3proto/setup.c | 7 + arch/sh/kernel/cpu/sh4a/setup-sh7722.c | 8 +- drivers/usb/gadget/Kconfig | 10 -- drivers/usb/gadget/m66592-udc.c | 252 +++++++++++++++++++-------------- drivers/usb/gadget/m66592-udc.h | 89 ++++++------ include/linux/usb/m66592.h | 44 ++++++ 7 files changed, 257 insertions(+), 160 deletions(-) create mode 100644 include/linux/usb/m66592.h (limited to 'include/linux') diff --git a/arch/sh/boards/mach-highlander/setup.c b/arch/sh/boards/mach-highlander/setup.c index 1639f8915000..566e69d8d729 100644 --- a/arch/sh/boards/mach-highlander/setup.c +++ b/arch/sh/boards/mach-highlander/setup.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -60,6 +61,11 @@ static struct platform_device r8a66597_usb_host_device = { .resource = r8a66597_usb_host_resources, }; +static struct m66592_platdata usbf_platdata = { + .xtal = M66592_PLATDATA_XTAL_24MHZ, + .vif = 1, +}; + static struct resource m66592_usb_peripheral_resources[] = { [0] = { .name = "m66592_udc", @@ -81,6 +87,7 @@ static struct platform_device m66592_usb_peripheral_device = { .dev = { .dma_mask = NULL, /* don't use dma */ .coherent_dma_mask = 0xffffffff, + .platform_data = &usbf_platdata, }, .num_resources = ARRAY_SIZE(m66592_usb_peripheral_resources), .resource = m66592_usb_peripheral_resources, diff --git a/arch/sh/boards/mach-x3proto/setup.c b/arch/sh/boards/mach-x3proto/setup.c index 8913ae39a802..efe4cb9f8a77 100644 --- a/arch/sh/boards/mach-x3proto/setup.c +++ b/arch/sh/boards/mach-x3proto/setup.c @@ -17,6 +17,7 @@ #include #include #include +#include #include static struct resource heartbeat_resources[] = { @@ -89,6 +90,11 @@ static struct platform_device r8a66597_usb_host_device = { .resource = r8a66597_usb_host_resources, }; +static struct m66592_platdata usbf_platdata = { + .xtal = M66592_PLATDATA_XTAL_24MHZ, + .vif = 1, +}; + static struct resource m66592_usb_peripheral_resources[] = { [0] = { .name = "m66592_udc", @@ -109,6 +115,7 @@ static struct platform_device m66592_usb_peripheral_device = { .dev = { .dma_mask = NULL, /* don't use dma */ .coherent_dma_mask = 0xffffffff, + .platform_data = &usbf_platdata, }, .num_resources = ARRAY_SIZE(m66592_usb_peripheral_resources), .resource = m66592_usb_peripheral_resources, diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7722.c b/arch/sh/kernel/cpu/sh4a/setup-sh7722.c index ea524a2da3e4..0bad14a44238 100644 --- a/arch/sh/kernel/cpu/sh4a/setup-sh7722.c +++ b/arch/sh/kernel/cpu/sh4a/setup-sh7722.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -47,9 +48,13 @@ static struct platform_device rtc_device = { .resource = rtc_resources, }; +static struct m66592_platdata usbf_platdata = { + .on_chip = 1, +}; + static struct resource usbf_resources[] = { [0] = { - .name = "m66592_udc", + .name = "USBF", .start = 0x04480000, .end = 0x044800FF, .flags = IORESOURCE_MEM, @@ -67,6 +72,7 @@ static struct platform_device usbf_device = { .dev = { .dma_mask = NULL, .coherent_dma_mask = 0xffffffff, + .platform_data = &usbf_platdata, }, .num_resources = ARRAY_SIZE(usbf_resources), .resource = usbf_resources, diff --git a/drivers/usb/gadget/Kconfig b/drivers/usb/gadget/Kconfig index 7f8e83a954ac..b7f10bc25c2c 100644 --- a/drivers/usb/gadget/Kconfig +++ b/drivers/usb/gadget/Kconfig @@ -360,16 +360,6 @@ config USB_M66592 default USB_GADGET select USB_GADGET_SELECTED -config SUPERH_BUILT_IN_M66592 - boolean "Enable SuperH built-in USB like the M66592" - depends on USB_GADGET_M66592 && CPU_SUBTYPE_SH7722 - help - SH7722 has USB like the M66592. - - The transfer rate is very slow when use "Ethernet Gadget". - However, this problem is improved if change a value of - NET_IP_ALIGN to 4. - # # Controllers available only in discrete form (and all PCI controllers) # diff --git a/drivers/usb/gadget/m66592-udc.c b/drivers/usb/gadget/m66592-udc.c index 0dddd2f8ff35..a61c70caff12 100644 --- a/drivers/usb/gadget/m66592-udc.c +++ b/drivers/usb/gadget/m66592-udc.c @@ -31,38 +31,12 @@ #include "m66592-udc.h" - MODULE_DESCRIPTION("M66592 USB gadget driver"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Yoshihiro Shimoda"); MODULE_ALIAS("platform:m66592_udc"); -#define DRIVER_VERSION "26 Jun 2009" - -/* module parameters */ -#if defined(CONFIG_SUPERH_BUILT_IN_M66592) -static unsigned short endian = M66592_LITTLE; -module_param(endian, ushort, 0644); -MODULE_PARM_DESC(endian, "data endian: big=0, little=0 (default=0)"); -#else -static unsigned short clock = M66592_XTAL24; -module_param(clock, ushort, 0644); -MODULE_PARM_DESC(clock, "input clock: 48MHz=32768, 24MHz=16384, 12MHz=0 " - "(default=16384)"); - -static unsigned short vif = M66592_LDRV; -module_param(vif, ushort, 0644); -MODULE_PARM_DESC(vif, "input VIF: 3.3V=32768, 1.5V=0 (default=32768)"); - -static unsigned short endian; -module_param(endian, ushort, 0644); -MODULE_PARM_DESC(endian, "data endian: big=256, little=0 (default=0)"); - -static unsigned short irq_sense = M66592_INTL; -module_param(irq_sense, ushort, 0644); -MODULE_PARM_DESC(irq_sense, "IRQ sense: low level=2, falling edge=0 " - "(default=2)"); -#endif +#define DRIVER_VERSION "21 July 2009" static const char udc_name[] = "m66592_udc"; static const char *m66592_ep_name[] = { @@ -244,6 +218,7 @@ static inline int get_buffer_size(struct m66592 *m66592, u16 pipenum) static inline void pipe_change(struct m66592 *m66592, u16 pipenum) { struct m66592_ep *ep = m66592->pipenum2ep[pipenum]; + unsigned short mbw; if (ep->use_dma) return; @@ -252,7 +227,12 @@ static inline void pipe_change(struct m66592 *m66592, u16 pipenum) ndelay(450); - m66592_bset(m66592, M66592_MBW, ep->fifosel); + if (m66592->pdata->on_chip) + mbw = M66592_MBW_32; + else + mbw = M66592_MBW_16; + + m66592_bset(m66592, mbw, ep->fifosel); } static int pipe_buffer_setting(struct m66592 *m66592, @@ -332,6 +312,7 @@ static void pipe_buffer_release(struct m66592 *m66592, static void pipe_initialize(struct m66592_ep *ep) { struct m66592 *m66592 = ep->m66592; + unsigned short mbw; m66592_mdfy(m66592, 0, M66592_CURPIPE, ep->fifosel); @@ -343,7 +324,12 @@ static void pipe_initialize(struct m66592_ep *ep) ndelay(450); - m66592_bset(m66592, M66592_MBW, ep->fifosel); + if (m66592->pdata->on_chip) + mbw = M66592_MBW_32; + else + mbw = M66592_MBW_16; + + m66592_bset(m66592, mbw, ep->fifosel); } } @@ -359,15 +345,13 @@ static void m66592_ep_setting(struct m66592 *m66592, struct m66592_ep *ep, ep->fifosel = M66592_D0FIFOSEL; ep->fifoctr = M66592_D0FIFOCTR; ep->fifotrn = M66592_D0FIFOTRN; -#if !defined(CONFIG_SUPERH_BUILT_IN_M66592) - } else if (m66592->num_dma == 1) { + } else if (!m66592->pdata->on_chip && m66592->num_dma == 1) { m66592->num_dma++; ep->use_dma = 1; ep->fifoaddr = M66592_D1FIFO; ep->fifosel = M66592_D1FIFOSEL; ep->fifoctr = M66592_D1FIFOCTR; ep->fifotrn = M66592_D1FIFOTRN; -#endif } else { ep->use_dma = 0; ep->fifoaddr = M66592_CFIFO; @@ -612,76 +596,120 @@ static void start_ep0(struct m66592_ep *ep, struct m66592_request *req) } } -#if defined(CONFIG_SUPERH_BUILT_IN_M66592) static void init_controller(struct m66592 *m66592) { - m66592_bset(m66592, M66592_HSE, M66592_SYSCFG); /* High spd */ - m66592_bclr(m66592, M66592_USBE, M66592_SYSCFG); - m66592_bclr(m66592, M66592_DPRPU, M66592_SYSCFG); - m66592_bset(m66592, M66592_USBE, M66592_SYSCFG); + unsigned int endian; - /* This is a workaound for SH7722 2nd cut */ - m66592_bset(m66592, 0x8000, M66592_DVSTCTR); - m66592_bset(m66592, 0x1000, M66592_TESTMODE); - m66592_bclr(m66592, 0x8000, M66592_DVSTCTR); + if (m66592->pdata->on_chip) { + if (m66592->pdata->endian) + endian = 0; /* big endian */ + else + endian = M66592_LITTLE; /* little endian */ - m66592_bset(m66592, M66592_INTL, M66592_INTENB1); + m66592_bset(m66592, M66592_HSE, M66592_SYSCFG); /* High spd */ + m66592_bclr(m66592, M66592_USBE, M66592_SYSCFG); + m66592_bclr(m66592, M66592_DPRPU, M66592_SYSCFG); + m66592_bset(m66592, M66592_USBE, M66592_SYSCFG); - m66592_write(m66592, 0, M66592_CFBCFG); - m66592_write(m66592, 0, M66592_D0FBCFG); - m66592_bset(m66592, endian, M66592_CFBCFG); - m66592_bset(m66592, endian, M66592_D0FBCFG); -} -#else /* #if defined(CONFIG_SUPERH_BUILT_IN_M66592) */ -static void init_controller(struct m66592 *m66592) -{ - m66592_bset(m66592, (vif & M66592_LDRV) | (endian & M66592_BIGEND), - M66592_PINCFG); - m66592_bset(m66592, M66592_HSE, M66592_SYSCFG); /* High spd */ - m66592_mdfy(m66592, clock & M66592_XTAL, M66592_XTAL, M66592_SYSCFG); + /* This is a workaound for SH7722 2nd cut */ + m66592_bset(m66592, 0x8000, M66592_DVSTCTR); + m66592_bset(m66592, 0x1000, M66592_TESTMODE); + m66592_bclr(m66592, 0x8000, M66592_DVSTCTR); - m66592_bclr(m66592, M66592_USBE, M66592_SYSCFG); - m66592_bclr(m66592, M66592_DPRPU, M66592_SYSCFG); - m66592_bset(m66592, M66592_USBE, M66592_SYSCFG); + m66592_bset(m66592, M66592_INTL, M66592_INTENB1); + + m66592_write(m66592, 0, M66592_CFBCFG); + m66592_write(m66592, 0, M66592_D0FBCFG); + m66592_bset(m66592, endian, M66592_CFBCFG); + m66592_bset(m66592, endian, M66592_D0FBCFG); + } else { + unsigned int clock, vif, irq_sense; + + if (m66592->pdata->endian) + endian = M66592_BIGEND; /* big endian */ + else + endian = 0; /* little endian */ + + if (m66592->pdata->vif) + vif = M66592_LDRV; /* 3.3v */ + else + vif = 0; /* 1.5v */ + + switch (m66592->pdata->xtal) { + case M66592_PLATDATA_XTAL_12MHZ: + clock = M66592_XTAL12; + break; + case M66592_PLATDATA_XTAL_24MHZ: + clock = M66592_XTAL24; + break; + case M66592_PLATDATA_XTAL_48MHZ: + clock = M66592_XTAL48; + break; + default: + pr_warning("m66592-udc: xtal configuration error\n"); + clock = 0; + } - m66592_bset(m66592, M66592_XCKE, M66592_SYSCFG); + switch (m66592->irq_trigger) { + case IRQF_TRIGGER_LOW: + irq_sense = M66592_INTL; + break; + case IRQF_TRIGGER_FALLING: + irq_sense = 0; + break; + default: + pr_warning("m66592-udc: irq trigger config error\n"); + irq_sense = 0; + } - msleep(3); + m66592_bset(m66592, + (vif & M66592_LDRV) | (endian & M66592_BIGEND), + M66592_PINCFG); + m66592_bset(m66592, M66592_HSE, M66592_SYSCFG); /* High spd */ + m66592_mdfy(m66592, clock & M66592_XTAL, M66592_XTAL, + M66592_SYSCFG); + m66592_bclr(m66592, M66592_USBE, M66592_SYSCFG); + m66592_bclr(m66592, M66592_DPRPU, M66592_SYSCFG); + m66592_bset(m66592, M66592_USBE, M66592_SYSCFG); - m66592_bset(m66592, M66592_RCKE | M66592_PLLC, M66592_SYSCFG); + m66592_bset(m66592, M66592_XCKE, M66592_SYSCFG); + + msleep(3); - msleep(1); + m66592_bset(m66592, M66592_RCKE | M66592_PLLC, M66592_SYSCFG); - m66592_bset(m66592, M66592_SCKE, M66592_SYSCFG); + msleep(1); - m66592_bset(m66592, irq_sense & M66592_INTL, M66592_INTENB1); - m66592_write(m66592, M66592_BURST | M66592_CPU_ADR_RD_WR, - M66592_DMA0CFG); + m66592_bset(m66592, M66592_SCKE, M66592_SYSCFG); + + m66592_bset(m66592, irq_sense & M66592_INTL, M66592_INTENB1); + m66592_write(m66592, M66592_BURST | M66592_CPU_ADR_RD_WR, + M66592_DMA0CFG); + } } -#endif /* #if defined(CONFIG_SUPERH_BUILT_IN_M66592) */ static void disable_controller(struct m66592 *m66592) { -#if !defined(CONFIG_SUPERH_BUILT_IN_M66592) - m66592_bclr(m66592, M66592_SCKE, M66592_SYSCFG); - udelay(1); - m66592_bclr(m66592, M66592_PLLC, M66592_SYSCFG); - udelay(1); - m66592_bclr(m66592, M66592_RCKE, M66592_SYSCFG); - udelay(1); - m66592_bclr(m66592, M66592_XCKE, M66592_SYSCFG); -#endif + if (!m66592->pdata->on_chip) { + m66592_bclr(m66592, M66592_SCKE, M66592_SYSCFG); + udelay(1); + m66592_bclr(m66592, M66592_PLLC, M66592_SYSCFG); + udelay(1); + m66592_bclr(m66592, M66592_RCKE, M66592_SYSCFG); + udelay(1); + m66592_bclr(m66592, M66592_XCKE, M66592_SYSCFG); + } } static void m66592_start_xclock(struct m66592 *m66592) { -#if !defined(CONFIG_SUPERH_BUILT_IN_M66592) u16 tmp; - tmp = m66592_read(m66592, M66592_SYSCFG); - if (!(tmp & M66592_XCKE)) - m66592_bset(m66592, M66592_XCKE, M66592_SYSCFG); -#endif + if (!m66592->pdata->on_chip) { + tmp = m66592_read(m66592, M66592_SYSCFG); + if (!(tmp & M66592_XCKE)) + m66592_bset(m66592, M66592_XCKE, M66592_SYSCFG); + } } /*-------------------------------------------------------------------------*/ @@ -1169,8 +1197,7 @@ static irqreturn_t m66592_irq(int irq, void *_m66592) intsts0 = m66592_read(m66592, M66592_INTSTS0); intenb0 = m66592_read(m66592, M66592_INTENB0); -#if defined(CONFIG_SUPERH_BUILT_IN_M66592) - if (!intsts0 && !intenb0) { + if (m66592->pdata->on_chip && !intsts0 && !intenb0) { /* * When USB clock stops, it cannot read register. Even if a * clock stops, the interrupt occurs. So this driver turn on @@ -1180,7 +1207,6 @@ static irqreturn_t m66592_irq(int irq, void *_m66592) intsts0 = m66592_read(m66592, M66592_INTSTS0); intenb0 = m66592_read(m66592, M66592_INTENB0); } -#endif savepipe = m66592_read(m66592, M66592_CFIFOSEL); @@ -1526,9 +1552,11 @@ static int __exit m66592_remove(struct platform_device *pdev) iounmap(m66592->reg); free_irq(platform_get_irq(pdev, 0), m66592); m66592_free_request(&m66592->ep[0].ep, m66592->ep0_req); -#if defined(CONFIG_SUPERH_BUILT_IN_M66592) && defined(CONFIG_HAVE_CLK) - clk_disable(m66592->clk); - clk_put(m66592->clk); +#ifdef CONFIG_HAVE_CLK + if (m66592->pdata->on_chip) { + clk_disable(m66592->clk); + clk_put(m66592->clk); + } #endif kfree(m66592); return 0; @@ -1540,11 +1568,10 @@ static void nop_completion(struct usb_ep *ep, struct usb_request *r) static int __init m66592_probe(struct platform_device *pdev) { - struct resource *res; - int irq; + struct resource *res, *ires; void __iomem *reg = NULL; struct m66592 *m66592 = NULL; -#if defined(CONFIG_SUPERH_BUILT_IN_M66592) && defined(CONFIG_HAVE_CLK) +#ifdef CONFIG_HAVE_CLK char clk_name[8]; #endif int ret = 0; @@ -1557,10 +1584,11 @@ static int __init m66592_probe(struct platform_device *pdev) goto clean_up; } - irq = platform_get_irq(pdev, 0); - if (irq < 0) { + ires = platform_get_resource(pdev, IORESOURCE_IRQ, 0); + if (!ires) { ret = -ENODEV; - pr_err("platform_get_irq error.\n"); + dev_err(&pdev->dev, + "platform_get_resource IORESOURCE_IRQ error.\n"); goto clean_up; } @@ -1571,6 +1599,12 @@ static int __init m66592_probe(struct platform_device *pdev) goto clean_up; } + if (pdev->dev.platform_data == NULL) { + dev_err(&pdev->dev, "no platform data\n"); + ret = -ENODEV; + goto clean_up; + } + /* initialize ucd */ m66592 = kzalloc(sizeof(struct m66592), GFP_KERNEL); if (m66592 == NULL) { @@ -1578,6 +1612,9 @@ static int __init m66592_probe(struct platform_device *pdev) goto clean_up; } + m66592->pdata = pdev->dev.platform_data; + m66592->irq_trigger = ires->flags & IRQF_TRIGGER_MASK; + spin_lock_init(&m66592->lock); dev_set_drvdata(&pdev->dev, m66592); @@ -1595,22 +1632,25 @@ static int __init m66592_probe(struct platform_device *pdev) m66592->timer.data = (unsigned long)m66592; m66592->reg = reg; - ret = request_irq(irq, m66592_irq, IRQF_DISABLED | IRQF_SHARED, + ret = request_irq(ires->start, m66592_irq, IRQF_DISABLED | IRQF_SHARED, udc_name, m66592); if (ret < 0) { pr_err("request_irq error (%d)\n", ret); goto clean_up; } -#if defined(CONFIG_SUPERH_BUILT_IN_M66592) && defined(CONFIG_HAVE_CLK) - snprintf(clk_name, sizeof(clk_name), "usbf%d", pdev->id); - m66592->clk = clk_get(&pdev->dev, clk_name); - if (IS_ERR(m66592->clk)) { - dev_err(&pdev->dev, "cannot get clock \"%s\"\n", clk_name); - ret = PTR_ERR(m66592->clk); - goto clean_up2; +#ifdef CONFIG_HAVE_CLK + if (m66592->pdata->on_chip) { + snprintf(clk_name, sizeof(clk_name), "usbf%d", pdev->id); + m66592->clk = clk_get(&pdev->dev, clk_name); + if (IS_ERR(m66592->clk)) { + dev_err(&pdev->dev, "cannot get clock \"%s\"\n", + clk_name); + ret = PTR_ERR(m66592->clk); + goto clean_up2; + } + clk_enable(m66592->clk); } - clk_enable(m66592->clk); #endif INIT_LIST_HEAD(&m66592->gadget.ep_list); m66592->gadget.ep0 = &m66592->ep[0].ep; @@ -1652,12 +1692,14 @@ static int __init m66592_probe(struct platform_device *pdev) return 0; clean_up3: -#if defined(CONFIG_SUPERH_BUILT_IN_M66592) && defined(CONFIG_HAVE_CLK) - clk_disable(m66592->clk); - clk_put(m66592->clk); +#ifdef CONFIG_HAVE_CLK + if (m66592->pdata->on_chip) { + clk_disable(m66592->clk); + clk_put(m66592->clk); + } clean_up2: #endif - free_irq(irq, m66592); + free_irq(ires->start, m66592); clean_up: if (m66592) { if (m66592->ep0_req) diff --git a/drivers/usb/gadget/m66592-udc.h b/drivers/usb/gadget/m66592-udc.h index 9a9c2bf9fbd5..8b960deed680 100644 --- a/drivers/usb/gadget/m66592-udc.h +++ b/drivers/usb/gadget/m66592-udc.h @@ -23,10 +23,12 @@ #ifndef __M66592_UDC_H__ #define __M66592_UDC_H__ -#if defined(CONFIG_SUPERH_BUILT_IN_M66592) && defined(CONFIG_HAVE_CLK) +#ifdef CONFIG_HAVE_CLK #include #endif +#include + #define M66592_SYSCFG 0x00 #define M66592_XTAL 0xC000 /* b15-14: Crystal selection */ #define M66592_XTAL48 0x8000 /* 48MHz */ @@ -76,11 +78,11 @@ #define M66592_P_TST_J 0x0001 /* PERI TEST J */ #define M66592_P_TST_NORMAL 0x0000 /* PERI Normal Mode */ -#if defined(CONFIG_SUPERH_BUILT_IN_M66592) +/* built-in registers */ #define M66592_CFBCFG 0x0A #define M66592_D0FBCFG 0x0C #define M66592_LITTLE 0x0100 /* b8: Little endian mode */ -#else +/* external chip case */ #define M66592_PINCFG 0x0A #define M66592_LDRV 0x8000 /* b15: Drive Current Adjust */ #define M66592_BIGEND 0x0100 /* b8: Big endian mode */ @@ -100,8 +102,8 @@ #define M66592_PKTM 0x0020 /* b5: Packet mode */ #define M66592_DENDE 0x0010 /* b4: Dend enable */ #define M66592_OBUS 0x0004 /* b2: OUTbus mode */ -#endif /* #if defined(CONFIG_SUPERH_BUILT_IN_M66592) */ +/* common case */ #define M66592_CFIFO 0x10 #define M66592_D0FIFO 0x14 #define M66592_D1FIFO 0x18 @@ -113,13 +115,9 @@ #define M66592_REW 0x4000 /* b14: Buffer rewind */ #define M66592_DCLRM 0x2000 /* b13: DMA buffer clear mode */ #define M66592_DREQE 0x1000 /* b12: DREQ output enable */ -#if defined(CONFIG_SUPERH_BUILT_IN_M66592) -#define M66592_MBW 0x0800 /* b11: Maximum bit width for FIFO */ -#else -#define M66592_MBW 0x0400 /* b10: Maximum bit width for FIFO */ -#define M66592_MBW_8 0x0000 /* 8bit */ -#define M66592_MBW_16 0x0400 /* 16bit */ -#endif /* #if defined(CONFIG_SUPERH_BUILT_IN_M66592) */ +#define M66592_MBW_8 0x0000 /* 8bit */ +#define M66592_MBW_16 0x0400 /* 16bit */ +#define M66592_MBW_32 0x0800 /* 32bit */ #define M66592_TRENB 0x0200 /* b9: Transaction counter enable */ #define M66592_TRCLR 0x0100 /* b8: Transaction counter clear */ #define M66592_DEZPM 0x0080 /* b7: Zero-length packet mode */ @@ -480,9 +478,11 @@ struct m66592_ep { struct m66592 { spinlock_t lock; void __iomem *reg; -#if defined(CONFIG_SUPERH_BUILT_IN_M66592) && defined(CONFIG_HAVE_CLK) +#ifdef CONFIG_HAVE_CLK struct clk *clk; #endif + struct m66592_platdata *pdata; + unsigned long irq_trigger; struct usb_gadget gadget; struct usb_gadget_driver *driver; @@ -546,13 +546,13 @@ static inline void m66592_read_fifo(struct m66592 *m66592, { unsigned long fifoaddr = (unsigned long)m66592->reg + offset; -#if defined(CONFIG_SUPERH_BUILT_IN_M66592) - len = (len + 3) / 4; - insl(fifoaddr, buf, len); -#else - len = (len + 1) / 2; - insw(fifoaddr, buf, len); -#endif + if (m66592->pdata->on_chip) { + len = (len + 3) / 4; + insl(fifoaddr, buf, len); + } else { + len = (len + 1) / 2; + insw(fifoaddr, buf, len); + } } static inline void m66592_write(struct m66592 *m66592, u16 val, @@ -566,33 +566,34 @@ static inline void m66592_write_fifo(struct m66592 *m66592, void *buf, unsigned long len) { unsigned long fifoaddr = (unsigned long)m66592->reg + offset; -#if defined(CONFIG_SUPERH_BUILT_IN_M66592) - unsigned long count; - unsigned char *pb; - int i; - - count = len / 4; - outsl(fifoaddr, buf, count); - - if (len & 0x00000003) { - pb = buf + count * 4; - for (i = 0; i < (len & 0x00000003); i++) { - if (m66592_read(m66592, M66592_CFBCFG)) /* little */ - outb(pb[i], fifoaddr + (3 - i)); - else - outb(pb[i], fifoaddr + i); + + if (m66592->pdata->on_chip) { + unsigned long count; + unsigned char *pb; + int i; + + count = len / 4; + outsl(fifoaddr, buf, count); + + if (len & 0x00000003) { + pb = buf + count * 4; + for (i = 0; i < (len & 0x00000003); i++) { + if (m66592_read(m66592, M66592_CFBCFG)) /* le */ + outb(pb[i], fifoaddr + (3 - i)); + else + outb(pb[i], fifoaddr + i); + } + } + } else { + unsigned long odd = len & 0x0001; + + len = len / 2; + outsw(fifoaddr, buf, len); + if (odd) { + unsigned char *p = buf + len*2; + outb(*p, fifoaddr); } } -#else - unsigned long odd = len & 0x0001; - - len = len / 2; - outsw(fifoaddr, buf, len); - if (odd) { - unsigned char *p = buf + len*2; - outb(*p, fifoaddr); - } -#endif /* #if defined(CONFIG_SUPERH_BUILT_IN_M66592) */ } static inline void m66592_mdfy(struct m66592 *m66592, u16 val, u16 pat, diff --git a/include/linux/usb/m66592.h b/include/linux/usb/m66592.h new file mode 100644 index 000000000000..cda9625e7df0 --- /dev/null +++ b/include/linux/usb/m66592.h @@ -0,0 +1,44 @@ +/* + * M66592 driver platform data + * + * Copyright (C) 2009 Renesas Solutions Corp. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef __LINUX_USB_M66592_H +#define __LINUX_USB_M66592_H + +#define M66592_PLATDATA_XTAL_12MHZ 0x01 +#define M66592_PLATDATA_XTAL_24MHZ 0x02 +#define M66592_PLATDATA_XTAL_48MHZ 0x03 + +struct m66592_platdata { + /* one = on chip controller, zero = external controller */ + unsigned on_chip:1; + + /* one = big endian, zero = little endian */ + unsigned endian:1; + + /* (external controller only) M66592_PLATDATA_XTAL_nnMHZ */ + unsigned xtal:2; + + /* (external controller only) one = 3.3V, zero = 1.5V */ + unsigned vif:1; + +}; + +#endif /* __LINUX_USB_M66592_H */ + -- cgit v1.2.3 From d9ab77161d811ffb0bccf396f7155cc905c1b9e1 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Wed, 22 Jul 2009 00:37:25 +0200 Subject: Driver Core: Make PM operations a const pointer They are not supposed to be modified by drivers, so make them const. Signed-off-by: Dmitry Torokhov Acked-by: Greg Kroah-Hartman Signed-off-by: Rafael J. Wysocki --- drivers/base/platform.c | 2 +- drivers/base/power/main.c | 8 +++++--- drivers/pci/pci-driver.c | 16 ++++++++-------- include/linux/device.h | 9 +++++---- 4 files changed, 19 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/platform.c b/drivers/base/platform.c index 455e55971d0e..ae5c4aa6d269 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -889,7 +889,7 @@ static int platform_pm_restore_noirq(struct device *dev) #endif /* !CONFIG_HIBERNATION */ -static struct dev_pm_ops platform_dev_pm_ops = { +static const struct dev_pm_ops platform_dev_pm_ops = { .prepare = platform_pm_prepare, .complete = platform_pm_complete, .suspend = platform_pm_suspend, diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 58a3e572f2c9..1b1a786b7dec 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -157,8 +157,9 @@ void device_pm_move_last(struct device *dev) * @ops: PM operations to choose from. * @state: PM transition of the system being carried out. */ -static int pm_op(struct device *dev, struct dev_pm_ops *ops, - pm_message_t state) +static int pm_op(struct device *dev, + const struct dev_pm_ops *ops, + pm_message_t state) { int error = 0; @@ -220,7 +221,8 @@ static int pm_op(struct device *dev, struct dev_pm_ops *ops, * The operation is executed with interrupts disabled by the only remaining * functional CPU in the system. */ -static int pm_noirq_op(struct device *dev, struct dev_pm_ops *ops, +static int pm_noirq_op(struct device *dev, + const struct dev_pm_ops *ops, pm_message_t state) { int error = 0; diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index d76c4c85367e..0c2ea44ae5e1 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -575,7 +575,7 @@ static void pci_pm_complete(struct device *dev) static int pci_pm_suspend(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); - struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; + const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; if (pci_has_legacy_pm_support(pci_dev)) return pci_legacy_suspend(dev, PMSG_SUSPEND); @@ -613,7 +613,7 @@ static int pci_pm_suspend(struct device *dev) static int pci_pm_suspend_noirq(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); - struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; + const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; if (pci_has_legacy_pm_support(pci_dev)) return pci_legacy_suspend_late(dev, PMSG_SUSPEND); @@ -672,7 +672,7 @@ static int pci_pm_resume_noirq(struct device *dev) static int pci_pm_resume(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); - struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; + const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; int error = 0; /* @@ -711,7 +711,7 @@ static int pci_pm_resume(struct device *dev) static int pci_pm_freeze(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); - struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; + const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; if (pci_has_legacy_pm_support(pci_dev)) return pci_legacy_suspend(dev, PMSG_FREEZE); @@ -780,7 +780,7 @@ static int pci_pm_thaw_noirq(struct device *dev) static int pci_pm_thaw(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); - struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; + const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; int error = 0; if (pci_has_legacy_pm_support(pci_dev)) @@ -799,7 +799,7 @@ static int pci_pm_thaw(struct device *dev) static int pci_pm_poweroff(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); - struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; + const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; if (pci_has_legacy_pm_support(pci_dev)) return pci_legacy_suspend(dev, PMSG_HIBERNATE); @@ -872,7 +872,7 @@ static int pci_pm_restore_noirq(struct device *dev) static int pci_pm_restore(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); - struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; + const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; int error = 0; /* @@ -910,7 +910,7 @@ static int pci_pm_restore(struct device *dev) #endif /* !CONFIG_HIBERNATION */ -struct dev_pm_ops pci_dev_pm_ops = { +const struct dev_pm_ops pci_dev_pm_ops = { .prepare = pci_pm_prepare, .complete = pci_pm_complete, .suspend = pci_pm_suspend, diff --git a/include/linux/device.h b/include/linux/device.h index aebb81036db2..a28642975053 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -62,7 +62,7 @@ struct bus_type { int (*suspend)(struct device *dev, pm_message_t state); int (*resume)(struct device *dev); - struct dev_pm_ops *pm; + const struct dev_pm_ops *pm; struct bus_type_private *p; }; @@ -132,7 +132,7 @@ struct device_driver { int (*resume) (struct device *dev); struct attribute_group **groups; - struct dev_pm_ops *pm; + const struct dev_pm_ops *pm; struct driver_private *p; }; @@ -200,7 +200,8 @@ struct class { int (*suspend)(struct device *dev, pm_message_t state); int (*resume)(struct device *dev); - struct dev_pm_ops *pm; + const struct dev_pm_ops *pm; + struct class_private *p; }; @@ -291,7 +292,7 @@ struct device_type { char *(*nodename)(struct device *dev); void (*release)(struct device *dev); - struct dev_pm_ops *pm; + const struct dev_pm_ops *pm; }; /* interface for exporting device attributes */ -- cgit v1.2.3 From c94aa5ca3088018d2a7a9bd3258aefffe29df265 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 16 Jul 2009 15:44:29 +0200 Subject: lockdep: Print the shortest dependency chain if finding a circle Currently lockdep will print the 1st circle detected if it exists when acquiring a new (next) lock. This patch prints the shortest path from the next lock to be acquired to the previous held lock if a circle is found. The patch still uses the current method to check circle, and once the circle is found, breadth-first search algorithem is used to compute the shortest path from the next lock to the previous lock in the forward lock dependency graph. Printing the shortest path will shorten the dependency chain, and make troubleshooting for possible circular locking easier. Signed-off-by: Ming Lei Signed-off-by: Peter Zijlstra LKML-Reference: <1246201486-7308-2-git-send-email-tom.leiming@gmail.com> Signed-off-by: Ingo Molnar --- include/linux/lockdep.h | 6 +++ kernel/lockdep.c | 115 +++++++++++++++++++++++++++++++++++++++++---- kernel/lockdep_internals.h | 83 ++++++++++++++++++++++++++++++++ 3 files changed, 195 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index b25d1b53df0d..9ec026f8d09e 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -149,6 +149,12 @@ struct lock_list { struct lock_class *class; struct stack_trace trace; int distance; + + /*The parent field is used to implement breadth-first search,and + *the bit 0 is reused to indicate if the lock has been accessed + *in BFS. + */ + struct lock_list *parent; }; /* diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 8bbeef996c76..93dc70d18cdf 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -897,6 +897,79 @@ static int add_lock_to_list(struct lock_class *class, struct lock_class *this, return 1; } +static struct circular_queue lock_cq; +static int __search_shortest_path(struct lock_list *source_entry, + struct lock_class *target, + struct lock_list **target_entry, + int forward) +{ + struct lock_list *entry; + struct circular_queue *cq = &lock_cq; + int ret = 1; + + __cq_init(cq); + + mark_lock_accessed(source_entry, NULL); + if (source_entry->class == target) { + *target_entry = source_entry; + ret = 0; + goto exit; + } + + __cq_enqueue(cq, (unsigned long)source_entry); + + while (!__cq_empty(cq)) { + struct lock_list *lock; + struct list_head *head; + + __cq_dequeue(cq, (unsigned long *)&lock); + + if (!lock->class) { + ret = -2; + goto exit; + } + + if (forward) + head = &lock->class->locks_after; + else + head = &lock->class->locks_before; + + list_for_each_entry(entry, head, entry) { + if (!lock_accessed(entry)) { + mark_lock_accessed(entry, lock); + if (entry->class == target) { + *target_entry = entry; + ret = 0; + goto exit; + } + + if (__cq_enqueue(cq, (unsigned long)entry)) { + ret = -1; + goto exit; + } + } + } + } +exit: + return ret; +} + +static inline int __search_forward_shortest_path(struct lock_list *src_entry, + struct lock_class *target, + struct lock_list **target_entry) +{ + return __search_shortest_path(src_entry, target, target_entry, 1); + +} + +static inline int __search_backward_shortest_path(struct lock_list *src_entry, + struct lock_class *target, + struct lock_list **target_entry) +{ + return __search_shortest_path(src_entry, target, target_entry, 0); + +} + /* * Recursive, forwards-direction lock-dependency checking, used for * both noncyclic checking and for hardirq-unsafe/softirq-unsafe @@ -934,7 +1007,7 @@ print_circular_bug_header(struct lock_list *entry, unsigned int depth) { struct task_struct *curr = current; - if (!debug_locks_off_graph_unlock() || debug_locks_silent) + if (debug_locks_silent) return 0; printk("\n=======================================================\n"); @@ -954,19 +1027,41 @@ print_circular_bug_header(struct lock_list *entry, unsigned int depth) return 0; } -static noinline int print_circular_bug_tail(void) +static noinline int print_circular_bug(void) { struct task_struct *curr = current; struct lock_list this; + struct lock_list *target; + struct lock_list *parent; + int result; + unsigned long depth; - if (debug_locks_silent) + if (!debug_locks_off_graph_unlock() || debug_locks_silent) return 0; this.class = hlock_class(check_source); if (!save_trace(&this.trace)) return 0; - print_circular_bug_entry(&this, 0); + result = __search_forward_shortest_path(&this, + hlock_class(check_target), + &target); + if (result) { + printk("\n%s:search shortest path failed:%d\n", __func__, + result); + return 0; + } + + depth = get_lock_depth(target); + + print_circular_bug_header(target, depth); + + parent = get_lock_parent(target); + + while (parent) { + print_circular_bug_entry(parent, --depth); + parent = get_lock_parent(parent); + } printk("\nother info that might help us debug this:\n\n"); lockdep_print_held_locks(curr); @@ -1072,14 +1167,15 @@ check_noncircular(struct lock_class *source, unsigned int depth) */ list_for_each_entry(entry, &source->locks_after, entry) { if (entry->class == hlock_class(check_target)) - return print_circular_bug_header(entry, depth+1); + return 2; debug_atomic_inc(&nr_cyclic_checks); - if (!check_noncircular(entry->class, depth+1)) - return print_circular_bug_entry(entry, depth+1); + if (check_noncircular(entry->class, depth+1) == 2) + return 2; } return 1; } + #if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) /* * Forwards and backwards subgraph searching, for the purposes of @@ -1484,8 +1580,9 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev, */ check_source = next; check_target = prev; - if (!(check_noncircular(hlock_class(next), 0))) - return print_circular_bug_tail(); + if (check_noncircular(hlock_class(next), 0) == 2) + return print_circular_bug(); + if (!check_prev_add_irq(curr, prev, next)) return 0; diff --git a/kernel/lockdep_internals.h b/kernel/lockdep_internals.h index 699a2ac3a0d7..6f48d37d5be2 100644 --- a/kernel/lockdep_internals.h +++ b/kernel/lockdep_internals.h @@ -136,3 +136,86 @@ extern atomic_t nr_find_usage_backwards_recursions; # define debug_atomic_dec(ptr) do { } while (0) # define debug_atomic_read(ptr) 0 #endif + +/* The circular_queue and helpers is used to implement the + * breadth-first search(BFS)algorithem, by which we can build + * the shortest path from the next lock to be acquired to the + * previous held lock if there is a circular between them. + * */ +#define MAX_CIRCULAR_QUE_SIZE 4096UL +struct circular_queue{ + unsigned long element[MAX_CIRCULAR_QUE_SIZE]; + unsigned int front, rear; +}; + +#define LOCK_ACCESSED 1UL +#define LOCK_ACCESSED_MASK (~LOCK_ACCESSED) + +static inline void __cq_init(struct circular_queue *cq) +{ + cq->front = cq->rear = 0; +} + +static inline int __cq_empty(struct circular_queue *cq) +{ + return (cq->front == cq->rear); +} + +static inline int __cq_full(struct circular_queue *cq) +{ + return ((cq->rear + 1)%MAX_CIRCULAR_QUE_SIZE) == cq->front; +} + +static inline int __cq_enqueue(struct circular_queue *cq, unsigned long elem) +{ + if (__cq_full(cq)) + return -1; + + cq->element[cq->rear] = elem; + cq->rear = (cq->rear + 1)%MAX_CIRCULAR_QUE_SIZE; + return 0; +} + +static inline int __cq_dequeue(struct circular_queue *cq, unsigned long *elem) +{ + if (__cq_empty(cq)) + return -1; + + *elem = cq->element[cq->front]; + cq->front = (cq->front + 1)%MAX_CIRCULAR_QUE_SIZE; + return 0; +} + +static inline int __cq_get_elem_count(struct circular_queue *cq) +{ + return (cq->rear - cq->front)%MAX_CIRCULAR_QUE_SIZE; +} + +static inline void mark_lock_accessed(struct lock_list *lock, + struct lock_list *parent) +{ + lock->parent = (void *) parent + LOCK_ACCESSED; +} + +static inline unsigned long lock_accessed(struct lock_list *lock) +{ + return (unsigned long)lock->parent & LOCK_ACCESSED; +} + +static inline struct lock_list *get_lock_parent(struct lock_list *child) +{ + return (struct lock_list *) + ((unsigned long)child->parent & LOCK_ACCESSED_MASK); +} + +static inline unsigned long get_lock_depth(struct lock_list *child) +{ + unsigned long depth = 0; + struct lock_list *parent; + + while ((parent = get_lock_parent(child))) { + child = parent; + depth++; + } + return depth; +} -- cgit v1.2.3 From af012961450949ea297b209e091bd1a3805b8a0a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 16 Jul 2009 15:44:29 +0200 Subject: lockdep: BFS cleanup Some cleanups of the lockdep code after the BFS series: - Remove the last traces of the generation id - Fixup comment style - Move the bfs routines into lockdep.c - Cleanup the bfs routines [ tom.leiming@gmail.com: Fix crash ] Signed-off-by: Peter Zijlstra LKML-Reference: <1246201486-7308-11-git-send-email-tom.leiming@gmail.com> Signed-off-by: Ingo Molnar --- include/linux/lockdep.h | 7 +- kernel/lockdep.c | 284 +++++++++++++++++++++++++++------------------ kernel/lockdep_internals.h | 97 +--------------- 3 files changed, 175 insertions(+), 213 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 9ec026f8d09e..12aabfcb45f6 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -58,7 +58,6 @@ struct lock_class { struct lockdep_subclass_key *key; unsigned int subclass; - unsigned int dep_gen_id; /* * IRQ/softirq usage tracking bits: @@ -150,9 +149,9 @@ struct lock_list { struct stack_trace trace; int distance; - /*The parent field is used to implement breadth-first search,and - *the bit 0 is reused to indicate if the lock has been accessed - *in BFS. + /* + * The parent field is used to implement breadth-first search, and the + * bit 0 is reused to indicate if the lock has been accessed in BFS. */ struct lock_list *parent; }; diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 744da6265d99..1cedb00e3e7a 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -43,6 +43,7 @@ #include #include #include + #include #include "lockdep_internals.h" @@ -118,7 +119,7 @@ static inline int debug_locks_off_graph_unlock(void) static int lockdep_initialized; unsigned long nr_list_entries; -struct lock_list list_entries[MAX_LOCKDEP_ENTRIES]; +static struct lock_list list_entries[MAX_LOCKDEP_ENTRIES]; /* * All data structures here are protected by the global debug_lock. @@ -390,19 +391,6 @@ unsigned int nr_process_chains; unsigned int max_lockdep_depth; unsigned int max_recursion_depth; -static unsigned int lockdep_dependency_gen_id; - -static bool lockdep_dependency_visit(struct lock_class *source, - unsigned int depth) -{ - if (!depth) - lockdep_dependency_gen_id++; - if (source->dep_gen_id == lockdep_dependency_gen_id) - return true; - source->dep_gen_id = lockdep_dependency_gen_id; - return false; -} - #ifdef CONFIG_DEBUG_LOCKDEP /* * We cannot printk in early bootup code. Not even early_printk() @@ -551,88 +539,6 @@ static void lockdep_print_held_locks(struct task_struct *curr) } } -static void print_lock_class_header(struct lock_class *class, int depth) -{ - int bit; - - printk("%*s->", depth, ""); - print_lock_name(class); - printk(" ops: %lu", class->ops); - printk(" {\n"); - - for (bit = 0; bit < LOCK_USAGE_STATES; bit++) { - if (class->usage_mask & (1 << bit)) { - int len = depth; - - len += printk("%*s %s", depth, "", usage_str[bit]); - len += printk(" at:\n"); - print_stack_trace(class->usage_traces + bit, len); - } - } - printk("%*s }\n", depth, ""); - - printk("%*s ... key at: ",depth,""); - print_ip_sym((unsigned long)class->key); -} - -/* - * printk the shortest lock dependencies from @start to @end in reverse order: - */ -static void __used -print_shortest_lock_dependencies(struct lock_list *leaf, - struct lock_list *root) -{ - struct lock_list *entry = leaf; - int depth; - - /*compute depth from generated tree by BFS*/ - depth = get_lock_depth(leaf); - - do { - print_lock_class_header(entry->class, depth); - printk("%*s ... acquired at:\n", depth, ""); - print_stack_trace(&entry->trace, 2); - printk("\n"); - - if (depth == 0 && (entry != root)) { - printk("lockdep:%s bad BFS generated tree\n", __func__); - break; - } - - entry = get_lock_parent(entry); - depth--; - } while (entry && (depth >= 0)); - - return; -} -/* - * printk all lock dependencies starting at : - */ -static void __used -print_lock_dependencies(struct lock_class *class, int depth) -{ - struct lock_list *entry; - - if (lockdep_dependency_visit(class, depth)) - return; - - if (DEBUG_LOCKS_WARN_ON(depth >= 20)) - return; - - print_lock_class_header(class, depth); - - list_for_each_entry(entry, &class->locks_after, entry) { - if (DEBUG_LOCKS_WARN_ON(!entry->class)) - return; - - print_lock_dependencies(entry->class, depth + 1); - - printk("%*s ... acquired at:\n",depth,""); - print_stack_trace(&entry->trace, 2); - printk("\n"); - } -} - static void print_kernel_version(void) { printk("%s %.*s\n", init_utsname()->release, @@ -927,14 +833,106 @@ static int add_lock_to_list(struct lock_class *class, struct lock_class *this, return 1; } -unsigned long bfs_accessed[BITS_TO_LONGS(MAX_LOCKDEP_ENTRIES)]; -static struct circular_queue lock_cq; +/*For good efficiency of modular, we use power of 2*/ +#define MAX_CIRCULAR_QUEUE_SIZE 4096UL +#define CQ_MASK (MAX_CIRCULAR_QUEUE_SIZE-1) + +/* The circular_queue and helpers is used to implement the + * breadth-first search(BFS)algorithem, by which we can build + * the shortest path from the next lock to be acquired to the + * previous held lock if there is a circular between them. + * */ +struct circular_queue { + unsigned long element[MAX_CIRCULAR_QUEUE_SIZE]; + unsigned int front, rear; +}; + +static struct circular_queue lock_cq; +static unsigned long bfs_accessed[BITS_TO_LONGS(MAX_LOCKDEP_ENTRIES)]; + unsigned int max_bfs_queue_depth; + +static inline void __cq_init(struct circular_queue *cq) +{ + cq->front = cq->rear = 0; + bitmap_zero(bfs_accessed, MAX_LOCKDEP_ENTRIES); +} + +static inline int __cq_empty(struct circular_queue *cq) +{ + return (cq->front == cq->rear); +} + +static inline int __cq_full(struct circular_queue *cq) +{ + return ((cq->rear + 1) & CQ_MASK) == cq->front; +} + +static inline int __cq_enqueue(struct circular_queue *cq, unsigned long elem) +{ + if (__cq_full(cq)) + return -1; + + cq->element[cq->rear] = elem; + cq->rear = (cq->rear + 1) & CQ_MASK; + return 0; +} + +static inline int __cq_dequeue(struct circular_queue *cq, unsigned long *elem) +{ + if (__cq_empty(cq)) + return -1; + + *elem = cq->element[cq->front]; + cq->front = (cq->front + 1) & CQ_MASK; + return 0; +} + +static inline unsigned int __cq_get_elem_count(struct circular_queue *cq) +{ + return (cq->rear - cq->front) & CQ_MASK; +} + +static inline void mark_lock_accessed(struct lock_list *lock, + struct lock_list *parent) +{ + unsigned long nr; + nr = lock - list_entries; + WARN_ON(nr >= nr_list_entries); + lock->parent = parent; + set_bit(nr, bfs_accessed); +} + +static inline unsigned long lock_accessed(struct lock_list *lock) +{ + unsigned long nr; + nr = lock - list_entries; + WARN_ON(nr >= nr_list_entries); + return test_bit(nr, bfs_accessed); +} + +static inline struct lock_list *get_lock_parent(struct lock_list *child) +{ + return child->parent; +} + +static inline int get_lock_depth(struct lock_list *child) +{ + int depth = 0; + struct lock_list *parent; + + while ((parent = get_lock_parent(child))) { + child = parent; + depth++; + } + return depth; +} + static int __bfs(struct lock_list *source_entry, - void *data, - int (*match)(struct lock_list *entry, void *data), - struct lock_list **target_entry, - int forward) + void *data, + int (*match)(struct lock_list *entry, void *data), + struct lock_list **target_entry, + int forward) { struct lock_list *entry; struct list_head *head; @@ -1202,14 +1200,6 @@ check_noncircular(struct lock_list *root, struct lock_class *target, * without creating any illegal irq-safe -> irq-unsafe lock dependency. */ - -#define BFS_PROCESS_RET(ret) do { \ - if (ret < 0) \ - return print_bfs_bug(ret); \ - if (ret == 1) \ - return 1; \ - } while (0) - static inline int usage_match(struct lock_list *entry, void *bit) { return entry->class->usage_mask & (1 << (enum lock_usage_bit)bit); @@ -1263,6 +1253,60 @@ find_usage_backwards(struct lock_list *root, enum lock_usage_bit bit, return result; } +static void print_lock_class_header(struct lock_class *class, int depth) +{ + int bit; + + printk("%*s->", depth, ""); + print_lock_name(class); + printk(" ops: %lu", class->ops); + printk(" {\n"); + + for (bit = 0; bit < LOCK_USAGE_STATES; bit++) { + if (class->usage_mask & (1 << bit)) { + int len = depth; + + len += printk("%*s %s", depth, "", usage_str[bit]); + len += printk(" at:\n"); + print_stack_trace(class->usage_traces + bit, len); + } + } + printk("%*s }\n", depth, ""); + + printk("%*s ... key at: ",depth,""); + print_ip_sym((unsigned long)class->key); +} + +/* + * printk the shortest lock dependencies from @start to @end in reverse order: + */ +static void __used +print_shortest_lock_dependencies(struct lock_list *leaf, + struct lock_list *root) +{ + struct lock_list *entry = leaf; + int depth; + + /*compute depth from generated tree by BFS*/ + depth = get_lock_depth(leaf); + + do { + print_lock_class_header(entry->class, depth); + printk("%*s ... acquired at:\n", depth, ""); + print_stack_trace(&entry->trace, 2); + printk("\n"); + + if (depth == 0 && (entry != root)) { + printk("lockdep:%s bad BFS generated tree\n", __func__); + break; + } + + entry = get_lock_parent(entry); + depth--; + } while (entry && (depth >= 0)); + + return; +} static int print_bad_irq_dependency(struct task_struct *curr, @@ -1349,12 +1393,18 @@ check_usage(struct task_struct *curr, struct held_lock *prev, this.class = hlock_class(prev); ret = find_usage_backwards(&this, bit_backwards, &target_entry); - BFS_PROCESS_RET(ret); + if (ret < 0) + return print_bfs_bug(ret); + if (ret == 1) + return ret; that.parent = NULL; that.class = hlock_class(next); ret = find_usage_forwards(&that, bit_forwards, &target_entry1); - BFS_PROCESS_RET(ret); + if (ret < 0) + return print_bfs_bug(ret); + if (ret == 1) + return ret; return print_bad_irq_dependency(curr, &this, &that, target_entry, target_entry1, @@ -2038,7 +2088,10 @@ check_usage_forwards(struct task_struct *curr, struct held_lock *this, root.parent = NULL; root.class = hlock_class(this); ret = find_usage_forwards(&root, bit, &target_entry); - BFS_PROCESS_RET(ret); + if (ret < 0) + return print_bfs_bug(ret); + if (ret == 1) + return ret; return print_irq_inversion_bug(curr, &root, target_entry, this, 1, irqclass); @@ -2059,7 +2112,10 @@ check_usage_backwards(struct task_struct *curr, struct held_lock *this, root.parent = NULL; root.class = hlock_class(this); ret = find_usage_backwards(&root, bit, &target_entry); - BFS_PROCESS_RET(ret); + if (ret < 0) + return print_bfs_bug(ret); + if (ret == 1) + return ret; return print_irq_inversion_bug(curr, &root, target_entry, this, 1, irqclass); diff --git a/kernel/lockdep_internals.h b/kernel/lockdep_internals.h index 6baa8807efdd..a2ee95ad1313 100644 --- a/kernel/lockdep_internals.h +++ b/kernel/lockdep_internals.h @@ -91,6 +91,8 @@ extern unsigned int nr_process_chains; extern unsigned int max_lockdep_depth; extern unsigned int max_recursion_depth; +extern unsigned int max_bfs_queue_depth; + #ifdef CONFIG_PROVE_LOCKING extern unsigned long lockdep_count_forward_deps(struct lock_class *); extern unsigned long lockdep_count_backward_deps(struct lock_class *); @@ -136,98 +138,3 @@ extern atomic_t nr_find_usage_backwards_recursions; # define debug_atomic_dec(ptr) do { } while (0) # define debug_atomic_read(ptr) 0 #endif - - -extern unsigned int max_bfs_queue_depth; -extern unsigned long nr_list_entries; -extern struct lock_list list_entries[MAX_LOCKDEP_ENTRIES]; -extern unsigned long bfs_accessed[]; - -/*For good efficiency of modular, we use power of 2*/ -#define MAX_CIRCULAR_QUE_SIZE 4096UL - -/* The circular_queue and helpers is used to implement the - * breadth-first search(BFS)algorithem, by which we can build - * the shortest path from the next lock to be acquired to the - * previous held lock if there is a circular between them. - * */ -struct circular_queue{ - unsigned long element[MAX_CIRCULAR_QUE_SIZE]; - unsigned int front, rear; -}; - -static inline void __cq_init(struct circular_queue *cq) -{ - cq->front = cq->rear = 0; - bitmap_zero(bfs_accessed, MAX_LOCKDEP_ENTRIES); -} - -static inline int __cq_empty(struct circular_queue *cq) -{ - return (cq->front == cq->rear); -} - -static inline int __cq_full(struct circular_queue *cq) -{ - return ((cq->rear + 1)&(MAX_CIRCULAR_QUE_SIZE-1)) == cq->front; -} - -static inline int __cq_enqueue(struct circular_queue *cq, unsigned long elem) -{ - if (__cq_full(cq)) - return -1; - - cq->element[cq->rear] = elem; - cq->rear = (cq->rear + 1)&(MAX_CIRCULAR_QUE_SIZE-1); - return 0; -} - -static inline int __cq_dequeue(struct circular_queue *cq, unsigned long *elem) -{ - if (__cq_empty(cq)) - return -1; - - *elem = cq->element[cq->front]; - cq->front = (cq->front + 1)&(MAX_CIRCULAR_QUE_SIZE-1); - return 0; -} - -static inline unsigned int __cq_get_elem_count(struct circular_queue *cq) -{ - return (cq->rear - cq->front)&(MAX_CIRCULAR_QUE_SIZE-1); -} - -static inline void mark_lock_accessed(struct lock_list *lock, - struct lock_list *parent) -{ - unsigned long nr; - nr = lock - list_entries; - WARN_ON(nr >= nr_list_entries); - lock->parent = parent; - set_bit(nr, bfs_accessed); -} - -static inline unsigned long lock_accessed(struct lock_list *lock) -{ - unsigned long nr; - nr = lock - list_entries; - WARN_ON(nr >= nr_list_entries); - return test_bit(nr, bfs_accessed); -} - -static inline struct lock_list *get_lock_parent(struct lock_list *child) -{ - return child->parent; -} - -static inline int get_lock_depth(struct lock_list *child) -{ - int depth = 0; - struct lock_list *parent; - - while ((parent = get_lock_parent(child))) { - child = parent; - depth++; - } - return depth; -} -- cgit v1.2.3 From b9454e83cac42fcdc90bfbfba479132bd6629455 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 8 Jul 2009 13:29:08 +0200 Subject: nl80211: introduce new key attributes We will soon want to nest key attributes into some new attribute for configuring static WEP keys at connect() and ibss_join() time, so we need nested attributes for that. However, key attributes right now are 'global'. This patch thus introduces new nested attributes for the key settings and functions for parsing them. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/nl80211.h | 36 +++++++++ net/wireless/nl80211.c | 203 +++++++++++++++++++++++++++++++++++++----------- 2 files changed, 192 insertions(+), 47 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index e496a2daf7ef..48e0913c2209 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -567,6 +567,9 @@ enum nl80211_commands { * @NL80211_ATTR_PREV_BSSID: previous BSSID, to be used by in ASSOCIATE * commands to specify using a reassociate frame * + * @NL80211_ATTR_KEY: key information in a nested attribute with + * %NL80211_KEY_* sub-attributes + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -692,6 +695,8 @@ enum nl80211_attrs { NL80211_ATTR_PREV_BSSID, + NL80211_ATTR_KEY, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -720,6 +725,7 @@ enum nl80211_attrs { #define NL80211_ATTR_CIPHER_SUITE_GROUP NL80211_ATTR_CIPHER_SUITE_GROUP #define NL80211_ATTR_WPA_VERSIONS NL80211_ATTR_WPA_VERSIONS #define NL80211_ATTR_AKM_SUITES NL80211_ATTR_AKM_SUITES +#define NL80211_ATTR_KEY NL80211_ATTR_KEY #define NL80211_MAX_SUPP_RATES 32 #define NL80211_MAX_SUPP_REG_RULES 32 @@ -1320,4 +1326,34 @@ enum nl80211_wpa_versions { NL80211_WPA_VERSION_2 = 1 << 1, }; +/** + * enum nl80211_key_attributes - key attributes + * @__NL80211_KEY_INVALID: invalid + * @NL80211_KEY_DATA: (temporal) key data; for TKIP this consists of + * 16 bytes encryption key followed by 8 bytes each for TX and RX MIC + * keys + * @NL80211_KEY_IDX: key ID (u8, 0-3) + * @NL80211_KEY_CIPHER: key cipher suite (u32, as defined by IEEE 802.11 + * section 7.3.2.25.1, e.g. 0x000FAC04) + * @NL80211_KEY_SEQ: transmit key sequence number (IV/PN) for TKIP and + * CCMP keys, each six bytes in little endian + * @NL80211_KEY_DEFAULT: flag indicating default key + * @NL80211_KEY_DEFAULT_MGMT: flag indicating default management key + * @__NL80211_KEY_AFTER_LAST: internal + * @NL80211_KEY_MAX: highest key attribute + */ +enum nl80211_key_attributes { + __NL80211_KEY_INVALID, + NL80211_KEY_DATA, + NL80211_KEY_IDX, + NL80211_KEY_CIPHER, + NL80211_KEY_SEQ, + NL80211_KEY_DEFAULT, + NL80211_KEY_DEFAULT_MGMT, + + /* keep last */ + __NL80211_KEY_AFTER_LAST, + NL80211_KEY_MAX = __NL80211_KEY_AFTER_LAST - 1 +}; + #endif /* __LINUX_NL80211_H */ diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index a00fd644370b..50cf59316292 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -73,6 +73,7 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = { [NL80211_ATTR_MAC] = { .type = NLA_BINARY, .len = ETH_ALEN }, [NL80211_ATTR_PREV_BSSID] = { .type = NLA_BINARY, .len = ETH_ALEN }, + [NL80211_ATTR_KEY] = { .type = NLA_NESTED, }, [NL80211_ATTR_KEY_DATA] = { .type = NLA_BINARY, .len = WLAN_MAX_KEY_LEN }, [NL80211_ATTR_KEY_IDX] = { .type = NLA_U8 }, @@ -134,6 +135,18 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = { [NL80211_ATTR_WPA_VERSIONS] = { .type = NLA_U32 }, }; +/* policy for the attributes */ +static struct nla_policy +nl80211_key_policy[NL80211_KEY_MAX + 1] __read_mostly = { + [NL80211_KEY_DATA] = { .type = NLA_BINARY, + .len = WLAN_MAX_KEY_LEN }, + [NL80211_KEY_IDX] = { .type = NLA_U8 }, + [NL80211_KEY_CIPHER] = { .type = NLA_U32 }, + [NL80211_KEY_SEQ] = { .type = NLA_BINARY, .len = 8 }, + [NL80211_KEY_DEFAULT] = { .type = NLA_FLAG }, + [NL80211_KEY_DEFAULT_MGMT] = { .type = NLA_FLAG }, +}; + /* IE validation */ static bool is_valid_ie_attr(const struct nlattr *attr) { @@ -198,6 +211,100 @@ static int nl80211_msg_put_channel(struct sk_buff *msg, /* netlink command implementations */ +struct key_parse { + struct key_params p; + int idx; + bool def, defmgmt; +}; + +static int nl80211_parse_key_new(struct nlattr *key, struct key_parse *k) +{ + struct nlattr *tb[NL80211_KEY_MAX + 1]; + int err = nla_parse_nested(tb, NL80211_KEY_MAX, key, + nl80211_key_policy); + if (err) + return err; + + k->def = !!tb[NL80211_KEY_DEFAULT]; + k->defmgmt = !!tb[NL80211_KEY_DEFAULT_MGMT]; + + if (tb[NL80211_KEY_IDX]) + k->idx = nla_get_u8(tb[NL80211_KEY_IDX]); + + if (tb[NL80211_KEY_DATA]) { + k->p.key = nla_data(tb[NL80211_KEY_DATA]); + k->p.key_len = nla_len(tb[NL80211_KEY_DATA]); + } + + if (tb[NL80211_KEY_SEQ]) { + k->p.seq = nla_data(tb[NL80211_KEY_SEQ]); + k->p.seq_len = nla_len(tb[NL80211_KEY_SEQ]); + } + + if (tb[NL80211_KEY_CIPHER]) + k->p.cipher = nla_get_u32(tb[NL80211_KEY_CIPHER]); + + return 0; +} + +static int nl80211_parse_key_old(struct genl_info *info, struct key_parse *k) +{ + if (info->attrs[NL80211_ATTR_KEY_DATA]) { + k->p.key = nla_data(info->attrs[NL80211_ATTR_KEY_DATA]); + k->p.key_len = nla_len(info->attrs[NL80211_ATTR_KEY_DATA]); + } + + if (info->attrs[NL80211_ATTR_KEY_SEQ]) { + k->p.seq = nla_data(info->attrs[NL80211_ATTR_KEY_SEQ]); + k->p.seq_len = nla_len(info->attrs[NL80211_ATTR_KEY_SEQ]); + } + + if (info->attrs[NL80211_ATTR_KEY_IDX]) + k->idx = nla_get_u8(info->attrs[NL80211_ATTR_KEY_IDX]); + + if (info->attrs[NL80211_ATTR_KEY_CIPHER]) + k->p.cipher = nla_get_u32(info->attrs[NL80211_ATTR_KEY_CIPHER]); + + k->def = !!info->attrs[NL80211_ATTR_KEY_DEFAULT]; + k->defmgmt = !!info->attrs[NL80211_ATTR_KEY_DEFAULT_MGMT]; + + return 0; +} + +static int nl80211_parse_key(struct genl_info *info, struct key_parse *k) +{ + int err; + + memset(k, 0, sizeof(*k)); + k->idx = -1; + + if (info->attrs[NL80211_ATTR_KEY]) + err = nl80211_parse_key_new(info->attrs[NL80211_ATTR_KEY], k); + else + err = nl80211_parse_key_old(info, k); + + if (err) + return err; + + if (k->def && k->defmgmt) + return -EINVAL; + + if (k->idx != -1) { + if (k->defmgmt) { + if (k->idx < 4 || k->idx > 5) + return -EINVAL; + } else if (k->def) { + if (k->idx < 0 || k->idx > 3) + return -EINVAL; + } else { + if (k->idx < 0 || k->idx > 5) + return -EINVAL; + } + } + + return 0; +} + static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, struct cfg80211_registered_device *dev) { @@ -943,10 +1050,12 @@ static int nl80211_del_interface(struct sk_buff *skb, struct genl_info *info) struct get_key_cookie { struct sk_buff *msg; int error; + int idx; }; static void get_key_callback(void *c, struct key_params *params) { + struct nlattr *key; struct get_key_cookie *cookie = c; if (params->key) @@ -961,6 +1070,26 @@ static void get_key_callback(void *c, struct key_params *params) NLA_PUT_U32(cookie->msg, NL80211_ATTR_KEY_CIPHER, params->cipher); + key = nla_nest_start(cookie->msg, NL80211_ATTR_KEY); + if (!key) + goto nla_put_failure; + + if (params->key) + NLA_PUT(cookie->msg, NL80211_KEY_DATA, + params->key_len, params->key); + + if (params->seq) + NLA_PUT(cookie->msg, NL80211_KEY_SEQ, + params->seq_len, params->seq); + + if (params->cipher) + NLA_PUT_U32(cookie->msg, NL80211_KEY_CIPHER, + params->cipher); + + NLA_PUT_U8(cookie->msg, NL80211_ATTR_KEY_IDX, cookie->idx); + + nla_nest_end(cookie->msg, key); + return; nla_put_failure: cookie->error = 1; @@ -1014,6 +1143,7 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info) } cookie.msg = msg; + cookie.idx = key_idx; NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, dev->ifindex); NLA_PUT_U8(msg, NL80211_ATTR_KEY_IDX, key_idx); @@ -1049,26 +1179,21 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info) static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev; + struct key_parse key; int err; struct net_device *dev; - u8 key_idx; int (*func)(struct wiphy *wiphy, struct net_device *netdev, u8 key_index); - if (!info->attrs[NL80211_ATTR_KEY_IDX]) - return -EINVAL; - - key_idx = nla_get_u8(info->attrs[NL80211_ATTR_KEY_IDX]); + err = nl80211_parse_key(info, &key); + if (err) + return err; - if (info->attrs[NL80211_ATTR_KEY_DEFAULT_MGMT]) { - if (key_idx < 4 || key_idx > 5) - return -EINVAL; - } else if (key_idx > 3) + if (key.idx < 0) return -EINVAL; - /* currently only support setting default key */ - if (!info->attrs[NL80211_ATTR_KEY_DEFAULT] && - !info->attrs[NL80211_ATTR_KEY_DEFAULT_MGMT]) + /* only support setting default key */ + if (!key.def && !key.defmgmt) return -EINVAL; rtnl_lock(); @@ -1077,7 +1202,7 @@ static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info) if (err) goto unlock_rtnl; - if (info->attrs[NL80211_ATTR_KEY_DEFAULT]) + if (key.def) func = rdev->ops->set_default_key; else func = rdev->ops->set_default_mgmt_key; @@ -1087,13 +1212,13 @@ static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info) goto out; } - err = func(&rdev->wiphy, dev, key_idx); + err = func(&rdev->wiphy, dev, key.idx); #ifdef CONFIG_WIRELESS_EXT if (!err) { if (func == rdev->ops->set_default_key) - dev->ieee80211_ptr->wext.default_key = key_idx; + dev->ieee80211_ptr->wext.default_key = key.idx; else - dev->ieee80211_ptr->wext.default_mgmt_key = key_idx; + dev->ieee80211_ptr->wext.default_mgmt_key = key.idx; } #endif @@ -1112,34 +1237,20 @@ static int nl80211_new_key(struct sk_buff *skb, struct genl_info *info) struct cfg80211_registered_device *rdev; int err, i; struct net_device *dev; - struct key_params params; - u8 key_idx = 0; + struct key_parse key; u8 *mac_addr = NULL; - memset(¶ms, 0, sizeof(params)); + err = nl80211_parse_key(info, &key); + if (err) + return err; - if (!info->attrs[NL80211_ATTR_KEY_CIPHER]) + if (!key.p.key) return -EINVAL; - if (info->attrs[NL80211_ATTR_KEY_DATA]) { - params.key = nla_data(info->attrs[NL80211_ATTR_KEY_DATA]); - params.key_len = nla_len(info->attrs[NL80211_ATTR_KEY_DATA]); - } - - if (info->attrs[NL80211_ATTR_KEY_SEQ]) { - params.seq = nla_data(info->attrs[NL80211_ATTR_KEY_SEQ]); - params.seq_len = nla_len(info->attrs[NL80211_ATTR_KEY_SEQ]); - } - - if (info->attrs[NL80211_ATTR_KEY_IDX]) - key_idx = nla_get_u8(info->attrs[NL80211_ATTR_KEY_IDX]); - - params.cipher = nla_get_u32(info->attrs[NL80211_ATTR_KEY_CIPHER]); - if (info->attrs[NL80211_ATTR_MAC]) mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]); - if (cfg80211_validate_key_settings(¶ms, key_idx, mac_addr)) + if (cfg80211_validate_key_settings(&key.p, key.idx, mac_addr)) return -EINVAL; rtnl_lock(); @@ -1149,7 +1260,7 @@ static int nl80211_new_key(struct sk_buff *skb, struct genl_info *info) goto unlock_rtnl; for (i = 0; i < rdev->wiphy.n_cipher_suites; i++) - if (params.cipher == rdev->wiphy.cipher_suites[i]) + if (key.p.cipher == rdev->wiphy.cipher_suites[i]) break; if (i == rdev->wiphy.n_cipher_suites) { err = -EINVAL; @@ -1161,7 +1272,7 @@ static int nl80211_new_key(struct sk_buff *skb, struct genl_info *info) goto out; } - err = rdev->ops->add_key(&rdev->wiphy, dev, key_idx, mac_addr, ¶ms); + err = rdev->ops->add_key(&rdev->wiphy, dev, key.idx, mac_addr, &key.p); out: cfg80211_unlock_rdev(rdev); @@ -1177,14 +1288,12 @@ static int nl80211_del_key(struct sk_buff *skb, struct genl_info *info) struct cfg80211_registered_device *rdev; int err; struct net_device *dev; - u8 key_idx = 0; u8 *mac_addr = NULL; + struct key_parse key; - if (info->attrs[NL80211_ATTR_KEY_IDX]) - key_idx = nla_get_u8(info->attrs[NL80211_ATTR_KEY_IDX]); - - if (key_idx > 5) - return -EINVAL; + err = nl80211_parse_key(info, &key); + if (err) + return err; if (info->attrs[NL80211_ATTR_MAC]) mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]); @@ -1200,13 +1309,13 @@ static int nl80211_del_key(struct sk_buff *skb, struct genl_info *info) goto out; } - err = rdev->ops->del_key(&rdev->wiphy, dev, key_idx, mac_addr); + err = rdev->ops->del_key(&rdev->wiphy, dev, key.idx, mac_addr); #ifdef CONFIG_WIRELESS_EXT if (!err) { - if (key_idx == dev->ieee80211_ptr->wext.default_key) + if (key.idx == dev->ieee80211_ptr->wext.default_key) dev->ieee80211_ptr->wext.default_key = -1; - else if (key_idx == dev->ieee80211_ptr->wext.default_mgmt_key) + else if (key.idx == dev->ieee80211_ptr->wext.default_mgmt_key) dev->ieee80211_ptr->wext.default_mgmt_key = -1; } #endif -- cgit v1.2.3 From fffd0934b9390f34bec45762192b7edd3b12b4b5 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 8 Jul 2009 14:22:54 +0200 Subject: cfg80211: rework key operation This reworks the key operation in cfg80211, and now only allows, from userspace, configuring keys (via nl80211) after the connection has been established (in managed mode), the IBSS been joined (in IBSS mode), at any time (in AP[_VLAN] modes) or never for all the other modes. In order to do shared key authentication correctly, it is now possible to give a WEP key to the AUTH command. To configure static WEP keys, these are given to the CONNECT or IBSS_JOIN command directly, for a userspace SME it is assumed it will configure it properly after the connection has been established. Since mac80211 used to check the default key in IBSS mode to see whether or not the network is protected, it needs an update in that area, as well as an update to make use of the WEP key passed to auth() for shared key authentication. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/nl80211.h | 5 ++ include/net/cfg80211.h | 18 ++++- net/mac80211/ibss.c | 9 +-- net/mac80211/ieee80211_i.h | 8 ++- net/mac80211/mlme.c | 11 ++- net/mac80211/util.c | 16 +++-- net/mac80211/wep.c | 6 +- net/mac80211/wep.h | 3 + net/wireless/core.c | 11 ++- net/wireless/core.h | 32 +++++++-- net/wireless/ibss.c | 79 +++++++++++++++++---- net/wireless/mlme.c | 16 ++++- net/wireless/nl80211.c | 170 ++++++++++++++++++++++++++++++++++++++++----- net/wireless/sme.c | 97 +++++++++++++++++++------- net/wireless/util.c | 41 ++++++++++- net/wireless/wext-compat.c | 163 ++++++++++++++++++++++++++----------------- net/wireless/wext-sme.c | 30 ++++++-- 17 files changed, 554 insertions(+), 161 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 48e0913c2209..b043b78dd2c3 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -569,6 +569,9 @@ enum nl80211_commands { * * @NL80211_ATTR_KEY: key information in a nested attribute with * %NL80211_KEY_* sub-attributes + * @NL80211_ATTR_KEYS: array of keys for static WEP keys for connect() + * and join_ibss(), key information is in a nested attribute each + * with %NL80211_KEY_* sub-attributes * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -696,6 +699,7 @@ enum nl80211_attrs { NL80211_ATTR_PREV_BSSID, NL80211_ATTR_KEY, + NL80211_ATTR_KEYS, /* add attributes here, update the policy in nl80211.c */ @@ -726,6 +730,7 @@ enum nl80211_attrs { #define NL80211_ATTR_WPA_VERSIONS NL80211_ATTR_WPA_VERSIONS #define NL80211_ATTR_AKM_SUITES NL80211_ATTR_AKM_SUITES #define NL80211_ATTR_KEY NL80211_ATTR_KEY +#define NL80211_ATTR_KEYS NL80211_ATTR_KEYS #define NL80211_MAX_SUPP_RATES 32 #define NL80211_MAX_SUPP_REG_RULES 32 diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 83c2c727d71e..65a5cbcb5d14 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -647,12 +647,17 @@ struct cfg80211_crypto_settings { * @auth_type: Authentication type (algorithm) * @ie: Extra IEs to add to Authentication frame or %NULL * @ie_len: Length of ie buffer in octets + * @key_len: length of WEP key for shared key authentication + * @key_idx: index of WEP key for shared key authentication + * @key: WEP key for shared key authentication */ struct cfg80211_auth_request { struct cfg80211_bss *bss; const u8 *ie; size_t ie_len; enum nl80211_auth_type auth_type; + const u8 *key; + u8 key_len, key_idx; }; /** @@ -727,6 +732,8 @@ struct cfg80211_disassoc_request { * @ie: information element(s) to include in the beacon * @ie_len: length of that * @beacon_interval: beacon interval to use + * @privacy: this is a protected network, keys will be configured + * after joining */ struct cfg80211_ibss_params { u8 *ssid; @@ -736,6 +743,7 @@ struct cfg80211_ibss_params { u8 ssid_len, ie_len; u16 beacon_interval; bool channel_fixed; + bool privacy; }; /** @@ -755,6 +763,9 @@ struct cfg80211_ibss_params { * @assoc_ie_len: Length of assoc_ie in octets * @privacy: indicates whether privacy-enabled APs should be used * @crypto: crypto settings + * @key_len: length of WEP key for shared key authentication + * @key_idx: index of WEP key for shared key authentication + * @key: WEP key for shared key authentication */ struct cfg80211_connect_params { struct ieee80211_channel *channel; @@ -766,6 +777,8 @@ struct cfg80211_connect_params { size_t ie_len; bool privacy; struct cfg80211_crypto_settings crypto; + const u8 *key; + u8 key_len, key_idx; }; /** @@ -1223,9 +1236,10 @@ extern void wiphy_unregister(struct wiphy *wiphy); */ extern void wiphy_free(struct wiphy *wiphy); -/* internal struct */ +/* internal structs */ struct cfg80211_conn; struct cfg80211_internal_bss; +struct cfg80211_cached_keys; #define MAX_AUTH_BSSES 4 @@ -1267,6 +1281,7 @@ struct wireless_dev { CFG80211_SME_CONNECTED, } sme_state; struct cfg80211_conn *conn; + struct cfg80211_cached_keys *connect_keys; struct list_head event_list; spinlock_t event_lock; @@ -1280,6 +1295,7 @@ struct wireless_dev { struct { struct cfg80211_ibss_params ibss; struct cfg80211_connect_params connect; + struct cfg80211_cached_keys *keys; u8 *ie; size_t ie_len; u8 bssid[ETH_ALEN]; diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 15d5a53b59a8..8e2220000e5c 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -57,7 +57,7 @@ static void ieee80211_rx_mgmt_auth_ibss(struct ieee80211_sub_if_data *sdata, */ if (auth_alg == WLAN_AUTH_OPEN && auth_transaction == 1) ieee80211_send_auth(sdata, 2, WLAN_AUTH_OPEN, NULL, 0, - sdata->u.ibss.bssid, 0); + sdata->u.ibss.bssid, NULL, 0, 0); } static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, @@ -494,7 +494,7 @@ static void ieee80211_sta_create_ibss(struct ieee80211_sub_if_data *sdata) capability = WLAN_CAPABILITY_IBSS; - if (sdata->default_key) + if (ifibss->privacy) capability |= WLAN_CAPABILITY_PRIVACY; else sdata->drop_unencrypted = 0; @@ -524,9 +524,8 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata) return; capability = WLAN_CAPABILITY_IBSS; - if (sdata->default_key) + if (ifibss->privacy) capability |= WLAN_CAPABILITY_PRIVACY; - if (ifibss->fixed_bssid) bssid = ifibss->bssid; if (ifibss->fixed_channel) @@ -872,6 +871,8 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata, } else sdata->u.ibss.fixed_bssid = false; + sdata->u.ibss.privacy = params->privacy; + sdata->vif.bss_conf.beacon_int = params->beacon_interval; sdata->u.ibss.channel = params->channel; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 327aabc07abe..06b3411530f2 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -247,6 +247,9 @@ struct ieee80211_mgd_work { int tries; + u8 key[WLAN_KEY_LEN_WEP104]; + u8 key_len, key_idx; + /* must be last */ u8 ie[0]; /* for auth or assoc frame, not probe */ }; @@ -321,6 +324,7 @@ struct ieee80211_if_ibss { bool fixed_bssid; bool fixed_channel; + bool privacy; u8 bssid[ETH_ALEN]; u8 ssid[IEEE80211_MAX_SSID_LEN]; @@ -1093,8 +1097,8 @@ int ieee80211_add_pending_skbs(struct ieee80211_local *local, void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata, u16 transaction, u16 auth_alg, - u8 *extra, size_t extra_len, - const u8 *bssid, int encrypt); + u8 *extra, size_t extra_len, const u8 *bssid, + const u8 *key, u8 key_len, u8 key_idx); int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer, const u8 *ie, size_t ie_len); void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst, diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index c9db9646025b..8e4a60497bba 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -954,7 +954,7 @@ ieee80211_authenticate(struct ieee80211_sub_if_data *sdata, sdata->dev->name, wk->bss->cbss.bssid, wk->tries); ieee80211_send_auth(sdata, 1, wk->auth_alg, wk->ie, wk->ie_len, - wk->bss->cbss.bssid, 0); + wk->bss->cbss.bssid, NULL, 0, 0); wk->auth_transaction = 2; wk->timeout = jiffies + IEEE80211_AUTH_TIMEOUT; @@ -1176,7 +1176,8 @@ static void ieee80211_auth_challenge(struct ieee80211_sub_if_data *sdata, return; ieee80211_send_auth(sdata, 3, wk->auth_alg, elems.challenge - 2, elems.challenge_len + 2, - wk->bss->cbss.bssid, 1); + wk->bss->cbss.bssid, + wk->key, wk->key_len, wk->key_idx); wk->auth_transaction = 4; } @@ -2175,6 +2176,12 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, wk->ie_len = req->ie_len; } + if (req->key && req->key_len) { + wk->key_len = req->key_len; + wk->key_idx = req->key_idx; + memcpy(wk->key, req->key, req->key_len); + } + ssid = ieee80211_bss_get_ie(req->bss, WLAN_EID_SSID); memcpy(wk->ssid, ssid + 2, ssid[1]); wk->ssid_len = ssid[1]; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 915e77769312..dbf66b52d38c 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -31,6 +31,7 @@ #include "mesh.h" #include "wme.h" #include "led.h" +#include "wep.h" /* privid for wiphys to determine whether they belong to us or not */ void *mac80211_wiphy_privid = &mac80211_wiphy_privid; @@ -804,12 +805,13 @@ u32 ieee80211_mandatory_rates(struct ieee80211_local *local, void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata, u16 transaction, u16 auth_alg, - u8 *extra, size_t extra_len, - const u8 *bssid, int encrypt) + u8 *extra, size_t extra_len, const u8 *bssid, + const u8 *key, u8 key_len, u8 key_idx) { struct ieee80211_local *local = sdata->local; struct sk_buff *skb; struct ieee80211_mgmt *mgmt; + int err; skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*mgmt) + 6 + extra_len); @@ -824,8 +826,6 @@ void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata, memset(mgmt, 0, 24 + 6); mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_AUTH); - if (encrypt) - mgmt->frame_control |= cpu_to_le16(IEEE80211_FCTL_PROTECTED); memcpy(mgmt->da, bssid, ETH_ALEN); memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN); memcpy(mgmt->bssid, bssid, ETH_ALEN); @@ -835,7 +835,13 @@ void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata, if (extra) memcpy(skb_put(skb, extra_len), extra, extra_len); - ieee80211_tx_skb(sdata, skb, encrypt); + if (auth_alg == WLAN_AUTH_SHARED_KEY && transaction == 3) { + mgmt->frame_control |= cpu_to_le16(IEEE80211_FCTL_PROTECTED); + err = ieee80211_wep_encrypt(local, skb, key, key_len, key_idx); + WARN_ON(err); + } + + ieee80211_tx_skb(sdata, skb, 0); } int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer, diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c index 4fafb2d27c84..8a980f136941 100644 --- a/net/mac80211/wep.c +++ b/net/mac80211/wep.c @@ -144,9 +144,9 @@ void ieee80211_wep_encrypt_data(struct crypto_blkcipher *tfm, u8 *rc4key, * * WEP frame payload: IV + TX key idx, RC4(data), ICV = RC4(CRC32(data)) */ -static int ieee80211_wep_encrypt(struct ieee80211_local *local, - struct sk_buff *skb, - const u8 *key, int keylen, int keyidx) +int ieee80211_wep_encrypt(struct ieee80211_local *local, + struct sk_buff *skb, + const u8 *key, int keylen, int keyidx) { u8 *iv; size_t len; diff --git a/net/mac80211/wep.h b/net/mac80211/wep.h index 85219ded8703..fe29d7e5759f 100644 --- a/net/mac80211/wep.h +++ b/net/mac80211/wep.h @@ -20,6 +20,9 @@ int ieee80211_wep_init(struct ieee80211_local *local); void ieee80211_wep_free(struct ieee80211_local *local); void ieee80211_wep_encrypt_data(struct crypto_blkcipher *tfm, u8 *rc4key, size_t klen, u8 *data, size_t data_len); +int ieee80211_wep_encrypt(struct ieee80211_local *local, + struct sk_buff *skb, + const u8 *key, int keylen, int keyidx); int ieee80211_wep_decrypt_data(struct crypto_blkcipher *tfm, u8 *rc4key, size_t klen, u8 *data, size_t data_len); bool ieee80211_wep_is_weak_iv(struct sk_buff *skb, struct ieee80211_key *key); diff --git a/net/wireless/core.c b/net/wireless/core.c index 1a78b3c70cf2..97cc5968b7d6 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -666,14 +666,10 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb, wdev_lock(wdev); switch (wdev->iftype) { case NL80211_IFTYPE_ADHOC: - if (wdev->wext.ibss.ssid_len) - __cfg80211_join_ibss(rdev, dev, - &wdev->wext.ibss); + cfg80211_ibss_wext_join(rdev, wdev); break; case NL80211_IFTYPE_STATION: - if (wdev->wext.connect.ssid_len) - __cfg80211_connect(rdev, dev, - &wdev->wext.connect); + cfg80211_mgd_wext_connect(rdev, wdev); break; default: break; @@ -690,6 +686,9 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb, } mutex_unlock(&rdev->devlist_mtx); mutex_destroy(&wdev->mtx); +#ifdef CONFIG_WIRELESS_EXT + kfree(wdev->wext.keys); +#endif break; case NETDEV_PRE_UP: if (!(wdev->wiphy->interface_modes & BIT(wdev->iftype))) diff --git a/net/wireless/core.h b/net/wireless/core.h index e46cd6eb61d7..2ec8ddbe57de 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -238,6 +238,12 @@ struct cfg80211_event { }; }; +struct cfg80211_cached_keys { + struct key_params params[6]; + u8 data[6][WLAN_MAX_KEY_LEN]; + int def, defmgmt; +}; + /* free object */ extern void cfg80211_dev_free(struct cfg80211_registered_device *rdev); @@ -256,14 +262,18 @@ void cfg80211_bss_age(struct cfg80211_registered_device *dev, /* IBSS */ int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev, struct net_device *dev, - struct cfg80211_ibss_params *params); + struct cfg80211_ibss_params *params, + struct cfg80211_cached_keys *connkeys); int cfg80211_join_ibss(struct cfg80211_registered_device *rdev, struct net_device *dev, - struct cfg80211_ibss_params *params); + struct cfg80211_ibss_params *params, + struct cfg80211_cached_keys *connkeys); void cfg80211_clear_ibss(struct net_device *dev, bool nowext); int cfg80211_leave_ibss(struct cfg80211_registered_device *rdev, struct net_device *dev, bool nowext); void __cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid); +int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev); /* MLME */ int __cfg80211_mlme_auth(struct cfg80211_registered_device *rdev, @@ -272,12 +282,14 @@ int __cfg80211_mlme_auth(struct cfg80211_registered_device *rdev, enum nl80211_auth_type auth_type, const u8 *bssid, const u8 *ssid, int ssid_len, - const u8 *ie, int ie_len); + const u8 *ie, int ie_len, + const u8 *key, int key_len, int key_idx); int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev, struct net_device *dev, struct ieee80211_channel *chan, enum nl80211_auth_type auth_type, const u8 *bssid, const u8 *ssid, int ssid_len, - const u8 *ie, int ie_len); + const u8 *ie, int ie_len, + const u8 *key, int key_len, int key_idx); int __cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, struct net_device *dev, struct ieee80211_channel *chan, @@ -310,10 +322,12 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, /* SME */ int __cfg80211_connect(struct cfg80211_registered_device *rdev, struct net_device *dev, - struct cfg80211_connect_params *connect); + struct cfg80211_connect_params *connect, + struct cfg80211_cached_keys *connkeys); int cfg80211_connect(struct cfg80211_registered_device *rdev, struct net_device *dev, - struct cfg80211_connect_params *connect); + struct cfg80211_connect_params *connect, + struct cfg80211_cached_keys *connkeys); int __cfg80211_disconnect(struct cfg80211_registered_device *rdev, struct net_device *dev, u16 reason, bool wextev); @@ -323,11 +337,14 @@ int cfg80211_disconnect(struct cfg80211_registered_device *rdev, void __cfg80211_roamed(struct wireless_dev *wdev, const u8 *bssid, const u8 *req_ie, size_t req_ie_len, const u8 *resp_ie, size_t resp_ie_len); +int cfg80211_mgd_wext_connect(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev); void cfg80211_conn_work(struct work_struct *work); /* internal helpers */ -int cfg80211_validate_key_settings(struct key_params *params, int key_idx, +int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev, + struct key_params *params, int key_idx, const u8 *mac_addr); void __cfg80211_disconnected(struct net_device *dev, const u8 *ie, size_t ie_len, u16 reason, bool from_ap); @@ -335,5 +352,6 @@ void cfg80211_sme_scan_done(struct net_device *dev); void cfg80211_sme_rx_auth(struct net_device *dev, const u8 *buf, size_t len); void cfg80211_sme_disassoc(struct net_device *dev, int idx); void __cfg80211_scan_done(struct work_struct *wk); +void cfg80211_upload_connect_keys(struct wireless_dev *wdev); #endif /* __NET_WIRELESS_CORE_H */ diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c index 99ef9364b7e8..9394e78cd11f 100644 --- a/net/wireless/ibss.c +++ b/net/wireless/ibss.c @@ -39,6 +39,8 @@ void __cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid) cfg80211_hold_bss(bss_from_pub(bss)); wdev->current_bss = bss_from_pub(bss); + cfg80211_upload_connect_keys(wdev); + nl80211_send_ibss_bssid(wiphy_to_dev(wdev->wiphy), dev, bssid, GFP_KERNEL); #ifdef CONFIG_WIRELESS_EXT @@ -71,7 +73,8 @@ EXPORT_SYMBOL(cfg80211_ibss_joined); int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev, struct net_device *dev, - struct cfg80211_ibss_params *params) + struct cfg80211_ibss_params *params, + struct cfg80211_cached_keys *connkeys) { struct wireless_dev *wdev = dev->ieee80211_ptr; int err; @@ -81,13 +84,18 @@ int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev, if (wdev->ssid_len) return -EALREADY; + if (WARN_ON(wdev->connect_keys)) + kfree(wdev->connect_keys); + wdev->connect_keys = connkeys; + #ifdef CONFIG_WIRELESS_EXT wdev->wext.ibss.channel = params->channel; #endif err = rdev->ops->join_ibss(&rdev->wiphy, dev, params); - - if (err) + if (err) { + wdev->connect_keys = NULL; return err; + } memcpy(wdev->ssid, params->ssid, params->ssid_len); wdev->ssid_len = params->ssid_len; @@ -97,13 +105,14 @@ int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev, int cfg80211_join_ibss(struct cfg80211_registered_device *rdev, struct net_device *dev, - struct cfg80211_ibss_params *params) + struct cfg80211_ibss_params *params, + struct cfg80211_cached_keys *connkeys) { struct wireless_dev *wdev = dev->ieee80211_ptr; int err; wdev_lock(wdev); - err = __cfg80211_join_ibss(rdev, dev, params); + err = __cfg80211_join_ibss(rdev, dev, params, connkeys); wdev_unlock(wdev); return err; @@ -112,9 +121,22 @@ int cfg80211_join_ibss(struct cfg80211_registered_device *rdev, static void __cfg80211_clear_ibss(struct net_device *dev, bool nowext) { struct wireless_dev *wdev = dev->ieee80211_ptr; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + int i; ASSERT_WDEV_LOCK(wdev); + kfree(wdev->connect_keys); + wdev->connect_keys = NULL; + + /* + * Delete all the keys ... pairwise keys can't really + * exist any more anyway, but default keys might. + */ + if (rdev->ops->del_key) + for (i = 0; i < 6; i++) + rdev->ops->del_key(wdev->wiphy, dev, i, NULL); + if (wdev->current_bss) { cfg80211_unhold_bss(wdev->current_bss); cfg80211_put_bss(&wdev->current_bss->pub); @@ -172,11 +194,14 @@ int cfg80211_leave_ibss(struct cfg80211_registered_device *rdev, } #ifdef CONFIG_WIRELESS_EXT -static int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev, - struct wireless_dev *wdev) +int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev) { + struct cfg80211_cached_keys *ck = NULL; enum ieee80211_band band; - int i; + int i, err; + + ASSERT_WDEV_LOCK(wdev); if (!wdev->wext.ibss.beacon_interval) wdev->wext.ibss.beacon_interval = 100; @@ -216,8 +241,24 @@ static int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev, if (!netif_running(wdev->netdev)) return 0; - return cfg80211_join_ibss(wiphy_to_dev(wdev->wiphy), - wdev->netdev, &wdev->wext.ibss); + if (wdev->wext.keys) + wdev->wext.keys->def = wdev->wext.default_key; + + wdev->wext.ibss.privacy = wdev->wext.default_key != -1; + + if (wdev->wext.keys) { + ck = kmemdup(wdev->wext.keys, sizeof(*ck), GFP_KERNEL); + if (!ck) + return -ENOMEM; + for (i = 0; i < 6; i++) + ck->params[i].key = ck->data[i]; + } + err = __cfg80211_join_ibss(rdev, wdev->netdev, + &wdev->wext.ibss, ck); + if (err) + kfree(ck); + + return err; } int cfg80211_ibss_wext_siwfreq(struct net_device *dev, @@ -265,7 +306,11 @@ int cfg80211_ibss_wext_siwfreq(struct net_device *dev, wdev->wext.ibss.channel_fixed = false; } - return cfg80211_ibss_wext_join(wiphy_to_dev(wdev->wiphy), wdev); + wdev_lock(wdev); + err = cfg80211_ibss_wext_join(wiphy_to_dev(wdev->wiphy), wdev); + wdev_unlock(wdev); + + return err; } /* temporary symbol - mark GPL - in the future the handler won't be */ EXPORT_SYMBOL_GPL(cfg80211_ibss_wext_siwfreq); @@ -333,7 +378,11 @@ int cfg80211_ibss_wext_siwessid(struct net_device *dev, memcpy(wdev->wext.ibss.ssid, ssid, len); wdev->wext.ibss.ssid_len = len; - return cfg80211_ibss_wext_join(wiphy_to_dev(wdev->wiphy), wdev); + wdev_lock(wdev); + err = cfg80211_ibss_wext_join(wiphy_to_dev(wdev->wiphy), wdev); + wdev_unlock(wdev); + + return err; } /* temporary symbol - mark GPL - in the future the handler won't be */ EXPORT_SYMBOL_GPL(cfg80211_ibss_wext_siwessid); @@ -414,7 +463,11 @@ int cfg80211_ibss_wext_siwap(struct net_device *dev, } else wdev->wext.ibss.bssid = NULL; - return cfg80211_ibss_wext_join(wiphy_to_dev(wdev->wiphy), wdev); + wdev_lock(wdev); + err = cfg80211_ibss_wext_join(wiphy_to_dev(wdev->wiphy), wdev); + wdev_unlock(wdev); + + return err; } /* temporary symbol - mark GPL - in the future the handler won't be */ EXPORT_SYMBOL_GPL(cfg80211_ibss_wext_siwap); diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 1b2ca1fea7a1..8e4ce2fdf862 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -328,7 +328,8 @@ int __cfg80211_mlme_auth(struct cfg80211_registered_device *rdev, enum nl80211_auth_type auth_type, const u8 *bssid, const u8 *ssid, int ssid_len, - const u8 *ie, int ie_len) + const u8 *ie, int ie_len, + const u8 *key, int key_len, int key_idx) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_auth_request req; @@ -337,6 +338,10 @@ int __cfg80211_mlme_auth(struct cfg80211_registered_device *rdev, ASSERT_WDEV_LOCK(wdev); + if (auth_type == NL80211_AUTHTYPE_SHARED_KEY) + if (!key || !key_len || key_idx < 0 || key_idx > 4) + return -EINVAL; + if (wdev->current_bss && memcmp(bssid, wdev->current_bss->pub.bssid, ETH_ALEN) == 0) return -EALREADY; @@ -359,6 +364,9 @@ int __cfg80211_mlme_auth(struct cfg80211_registered_device *rdev, req.auth_type = auth_type; req.bss = cfg80211_get_bss(&rdev->wiphy, chan, bssid, ssid, ssid_len, WLAN_CAPABILITY_ESS, WLAN_CAPABILITY_ESS); + req.key = key; + req.key_len = key_len; + req.key_idx = key_idx; if (!req.bss) return -ENOENT; @@ -396,13 +404,15 @@ int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev, struct net_device *dev, struct ieee80211_channel *chan, enum nl80211_auth_type auth_type, const u8 *bssid, const u8 *ssid, int ssid_len, - const u8 *ie, int ie_len) + const u8 *ie, int ie_len, + const u8 *key, int key_len, int key_idx) { int err; wdev_lock(dev->ieee80211_ptr); err = __cfg80211_mlme_auth(rdev, dev, chan, auth_type, bssid, - ssid, ssid_len, ie, ie_len); + ssid, ssid_len, ie, ie_len, + key, key_len, key_idx); wdev_unlock(dev->ieee80211_ptr); return err; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 50cf59316292..45c5f9c8e51b 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -138,8 +138,7 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = { /* policy for the attributes */ static struct nla_policy nl80211_key_policy[NL80211_KEY_MAX + 1] __read_mostly = { - [NL80211_KEY_DATA] = { .type = NLA_BINARY, - .len = WLAN_MAX_KEY_LEN }, + [NL80211_KEY_DATA] = { .type = NLA_BINARY, .len = WLAN_MAX_KEY_LEN }, [NL80211_KEY_IDX] = { .type = NLA_U8 }, [NL80211_KEY_CIPHER] = { .type = NLA_U32 }, [NL80211_KEY_SEQ] = { .type = NLA_BINARY, .len = 8 }, @@ -305,6 +304,83 @@ static int nl80211_parse_key(struct genl_info *info, struct key_parse *k) return 0; } +static struct cfg80211_cached_keys * +nl80211_parse_connkeys(struct cfg80211_registered_device *rdev, + struct nlattr *keys) +{ + struct key_parse parse; + struct nlattr *key; + struct cfg80211_cached_keys *result; + int rem, err, def = 0; + + result = kzalloc(sizeof(*result), GFP_KERNEL); + if (!result) + return ERR_PTR(-ENOMEM); + + result->def = -1; + result->defmgmt = -1; + + nla_for_each_nested(key, keys, rem) { + memset(&parse, 0, sizeof(parse)); + parse.idx = -1; + + err = nl80211_parse_key_new(key, &parse); + if (err) + goto error; + err = -EINVAL; + if (!parse.p.key) + goto error; + if (parse.idx < 0 || parse.idx > 4) + goto error; + if (parse.def) { + if (def) + goto error; + def = 1; + result->def = parse.idx; + } else if (parse.defmgmt) + goto error; + err = cfg80211_validate_key_settings(rdev, &parse.p, + parse.idx, NULL); + if (err) + goto error; + result->params[parse.idx].cipher = parse.p.cipher; + result->params[parse.idx].key_len = parse.p.key_len; + result->params[parse.idx].key = result->data[parse.idx]; + memcpy(result->data[parse.idx], parse.p.key, parse.p.key_len); + } + + return result; + error: + kfree(result); + return ERR_PTR(err); +} + +static int nl80211_key_allowed(struct wireless_dev *wdev) +{ + ASSERT_WDEV_LOCK(wdev); + + if (!netif_running(wdev->netdev)) + return -ENETDOWN; + + switch (wdev->iftype) { + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_AP_VLAN: + break; + case NL80211_IFTYPE_ADHOC: + if (!wdev->current_bss) + return -ENOLINK; + break; + case NL80211_IFTYPE_STATION: + if (wdev->sme_state != CFG80211_SME_CONNECTED) + return -ENOLINK; + break; + default: + return -EINVAL; + } + + return 0; +} + static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, struct cfg80211_registered_device *dev) { @@ -1212,7 +1288,11 @@ static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info) goto out; } - err = func(&rdev->wiphy, dev, key.idx); + wdev_lock(dev->ieee80211_ptr); + err = nl80211_key_allowed(dev->ieee80211_ptr); + if (!err) + err = func(&rdev->wiphy, dev, key.idx); + #ifdef CONFIG_WIRELESS_EXT if (!err) { if (func == rdev->ops->set_default_key) @@ -1221,6 +1301,7 @@ static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info) dev->ieee80211_ptr->wext.default_mgmt_key = key.idx; } #endif + wdev_unlock(dev->ieee80211_ptr); out: cfg80211_unlock_rdev(rdev); @@ -1235,7 +1316,7 @@ static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info) static int nl80211_new_key(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev; - int err, i; + int err; struct net_device *dev; struct key_parse key; u8 *mac_addr = NULL; @@ -1250,29 +1331,28 @@ static int nl80211_new_key(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_MAC]) mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]); - if (cfg80211_validate_key_settings(&key.p, key.idx, mac_addr)) - return -EINVAL; - rtnl_lock(); err = get_rdev_dev_by_info_ifindex(info->attrs, &rdev, &dev); if (err) goto unlock_rtnl; - for (i = 0; i < rdev->wiphy.n_cipher_suites; i++) - if (key.p.cipher == rdev->wiphy.cipher_suites[i]) - break; - if (i == rdev->wiphy.n_cipher_suites) { - err = -EINVAL; + if (!rdev->ops->add_key) { + err = -EOPNOTSUPP; goto out; } - if (!rdev->ops->add_key) { - err = -EOPNOTSUPP; + if (cfg80211_validate_key_settings(rdev, &key.p, key.idx, mac_addr)) { + err = -EINVAL; goto out; } - err = rdev->ops->add_key(&rdev->wiphy, dev, key.idx, mac_addr, &key.p); + wdev_lock(dev->ieee80211_ptr); + err = nl80211_key_allowed(dev->ieee80211_ptr); + if (!err) + err = rdev->ops->add_key(&rdev->wiphy, dev, key.idx, + mac_addr, &key.p); + wdev_unlock(dev->ieee80211_ptr); out: cfg80211_unlock_rdev(rdev); @@ -1309,7 +1389,10 @@ static int nl80211_del_key(struct sk_buff *skb, struct genl_info *info) goto out; } - err = rdev->ops->del_key(&rdev->wiphy, dev, key.idx, mac_addr); + wdev_lock(dev->ieee80211_ptr); + err = nl80211_key_allowed(dev->ieee80211_ptr); + if (!err) + err = rdev->ops->del_key(&rdev->wiphy, dev, key.idx, mac_addr); #ifdef CONFIG_WIRELESS_EXT if (!err) { @@ -1319,6 +1402,7 @@ static int nl80211_del_key(struct sk_buff *skb, struct genl_info *info) dev->ieee80211_ptr->wext.default_mgmt_key = -1; } #endif + wdev_unlock(dev->ieee80211_ptr); out: cfg80211_unlock_rdev(rdev); @@ -3159,6 +3243,7 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info) const u8 *bssid, *ssid, *ie = NULL; int err, ssid_len, ie_len = 0; enum nl80211_auth_type auth_type; + struct key_parse key; if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_IE])) return -EINVAL; @@ -3175,6 +3260,25 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[NL80211_ATTR_WIPHY_FREQ]) return -EINVAL; + err = nl80211_parse_key(info, &key); + if (err) + return err; + + if (key.idx >= 0) { + if (!key.p.key || !key.p.key_len) + return -EINVAL; + if ((key.p.cipher != WLAN_CIPHER_SUITE_WEP40 || + key.p.key_len != WLAN_KEY_LEN_WEP40) && + (key.p.cipher != WLAN_CIPHER_SUITE_WEP104 || + key.p.key_len != WLAN_KEY_LEN_WEP104)) + return -EINVAL; + if (key.idx > 4) + return -EINVAL; + } else { + key.p.key_len = 0; + key.p.key = NULL; + } + rtnl_lock(); err = get_rdev_dev_by_info_ifindex(info->attrs, &rdev, &dev); @@ -3219,7 +3323,8 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info) } err = cfg80211_mlme_auth(rdev, dev, chan, auth_type, bssid, - ssid, ssid_len, ie, ie_len); + ssid, ssid_len, ie, ie_len, + key.p.key, key.p.key_len, key.idx); out: cfg80211_unlock_rdev(rdev); @@ -3506,6 +3611,7 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info) struct net_device *dev; struct cfg80211_ibss_params ibss; struct wiphy *wiphy; + struct cfg80211_cached_keys *connkeys = NULL; int err; memset(&ibss, 0, sizeof(ibss)); @@ -3570,13 +3676,26 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info) } ibss.channel_fixed = !!info->attrs[NL80211_ATTR_FREQ_FIXED]; + ibss.privacy = !!info->attrs[NL80211_ATTR_PRIVACY]; + + if (ibss.privacy && info->attrs[NL80211_ATTR_KEYS]) { + connkeys = nl80211_parse_connkeys(rdev, + info->attrs[NL80211_ATTR_KEYS]); + if (IS_ERR(connkeys)) { + err = PTR_ERR(connkeys); + connkeys = NULL; + goto out; + } + } - err = cfg80211_join_ibss(rdev, dev, &ibss); + err = cfg80211_join_ibss(rdev, dev, &ibss, connkeys); out: cfg80211_unlock_rdev(rdev); dev_put(dev); unlock_rtnl: + if (err) + kfree(connkeys); rtnl_unlock(); return err; } @@ -3746,6 +3865,7 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info) struct net_device *dev; struct cfg80211_connect_params connect; struct wiphy *wiphy; + struct cfg80211_cached_keys *connkeys = NULL; int err; memset(&connect, 0, sizeof(connect)); @@ -3810,12 +3930,24 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info) } } - err = cfg80211_connect(rdev, dev, &connect); + if (connect.privacy && info->attrs[NL80211_ATTR_KEYS]) { + connkeys = nl80211_parse_connkeys(rdev, + info->attrs[NL80211_ATTR_KEYS]); + if (IS_ERR(connkeys)) { + err = PTR_ERR(connkeys); + connkeys = NULL; + goto out; + } + } + + err = cfg80211_connect(rdev, dev, &connect, connkeys); out: cfg80211_unlock_rdev(rdev); dev_put(dev); unlock_rtnl: + if (err) + kfree(connkeys); rtnl_unlock(); return err; } diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 79ca56cbfd36..d635a99dba51 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -125,7 +125,9 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev) params->channel, params->auth_type, params->bssid, params->ssid, params->ssid_len, - NULL, 0); + NULL, 0, + params->key, params->key_len, + params->key_idx); case CFG80211_CONN_ASSOCIATE_NEXT: BUG_ON(!rdev->ops->assoc); wdev->conn->state = CFG80211_CONN_ASSOCIATING; @@ -279,8 +281,12 @@ void cfg80211_sme_rx_auth(struct net_device *dev, /* select automatically between only open, shared, leap */ switch (wdev->conn->params.auth_type) { case NL80211_AUTHTYPE_OPEN_SYSTEM: - wdev->conn->params.auth_type = - NL80211_AUTHTYPE_SHARED_KEY; + if (wdev->connect_keys) + wdev->conn->params.auth_type = + NL80211_AUTHTYPE_SHARED_KEY; + else + wdev->conn->params.auth_type = + NL80211_AUTHTYPE_NETWORK_EAP; break; case NL80211_AUTHTYPE_SHARED_KEY: wdev->conn->params.auth_type = @@ -353,10 +359,8 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, #endif if (status == WLAN_STATUS_SUCCESS && - wdev->sme_state == CFG80211_SME_IDLE) { - wdev->sme_state = CFG80211_SME_CONNECTED; - return; - } + wdev->sme_state == CFG80211_SME_IDLE) + goto success; if (wdev->sme_state != CFG80211_SME_CONNECTING) return; @@ -370,24 +374,29 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, if (wdev->conn) wdev->conn->state = CFG80211_CONN_IDLE; - if (status == WLAN_STATUS_SUCCESS) { - bss = cfg80211_get_bss(wdev->wiphy, NULL, bssid, - wdev->ssid, wdev->ssid_len, - WLAN_CAPABILITY_ESS, - WLAN_CAPABILITY_ESS); - - if (WARN_ON(!bss)) - return; - - cfg80211_hold_bss(bss_from_pub(bss)); - wdev->current_bss = bss_from_pub(bss); - - wdev->sme_state = CFG80211_SME_CONNECTED; - } else { + if (status != WLAN_STATUS_SUCCESS) { wdev->sme_state = CFG80211_SME_IDLE; kfree(wdev->conn); wdev->conn = NULL; + kfree(wdev->connect_keys); + wdev->connect_keys = NULL; + return; } + + bss = cfg80211_get_bss(wdev->wiphy, NULL, bssid, + wdev->ssid, wdev->ssid_len, + WLAN_CAPABILITY_ESS, + WLAN_CAPABILITY_ESS); + + if (WARN_ON(!bss)) + return; + + cfg80211_hold_bss(bss_from_pub(bss)); + wdev->current_bss = bss_from_pub(bss); + + success: + wdev->sme_state = CFG80211_SME_CONNECTED; + cfg80211_upload_connect_keys(wdev); } void cfg80211_connect_result(struct net_device *dev, const u8 *bssid, @@ -516,6 +525,8 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie, size_t ie_len, u16 reason, bool from_ap) { struct wireless_dev *wdev = dev->ieee80211_ptr; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + int i; #ifdef CONFIG_WIRELESS_EXT union iwreq_data wrqu; #endif @@ -543,8 +554,15 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie, wdev->conn = NULL; } - nl80211_send_disconnected(wiphy_to_dev(wdev->wiphy), dev, - reason, ie, ie_len, from_ap); + nl80211_send_disconnected(rdev, dev, reason, ie, ie_len, from_ap); + + /* + * Delete all the keys ... pairwise keys can't really + * exist any more anyway, but default keys might. + */ + if (rdev->ops->del_key) + for (i = 0; i < 6; i++) + rdev->ops->del_key(wdev->wiphy, dev, i, NULL); #ifdef CONFIG_WIRELESS_EXT memset(&wrqu, 0, sizeof(wrqu)); @@ -580,7 +598,8 @@ EXPORT_SYMBOL(cfg80211_disconnected); int __cfg80211_connect(struct cfg80211_registered_device *rdev, struct net_device *dev, - struct cfg80211_connect_params *connect) + struct cfg80211_connect_params *connect, + struct cfg80211_cached_keys *connkeys) { struct wireless_dev *wdev = dev->ieee80211_ptr; int err; @@ -590,6 +609,24 @@ int __cfg80211_connect(struct cfg80211_registered_device *rdev, if (wdev->sme_state != CFG80211_SME_IDLE) return -EALREADY; + if (WARN_ON(wdev->connect_keys)) { + kfree(wdev->connect_keys); + wdev->connect_keys = NULL; + } + + if (connkeys && connkeys->def >= 0) { + int idx; + + idx = connkeys->def; + /* If given a WEP key we may need it for shared key auth */ + if (connkeys->params[idx].cipher == WLAN_CIPHER_SUITE_WEP40 || + connkeys->params[idx].cipher == WLAN_CIPHER_SUITE_WEP104) { + connect->key_idx = idx; + connect->key = connkeys->params[idx].key; + connect->key_len = connkeys->params[idx].key_len; + } + } + if (!rdev->ops->connect) { if (!rdev->ops->auth || !rdev->ops->assoc) return -EOPNOTSUPP; @@ -640,6 +677,7 @@ int __cfg80211_connect(struct cfg80211_registered_device *rdev, cfg80211_get_conn_bss(wdev); wdev->sme_state = CFG80211_SME_CONNECTING; + wdev->connect_keys = connkeys; /* we're good if we have both BSSID and channel */ if (wdev->conn->params.bssid && wdev->conn->params.channel) { @@ -662,13 +700,16 @@ int __cfg80211_connect(struct cfg80211_registered_device *rdev, kfree(wdev->conn); wdev->conn = NULL; wdev->sme_state = CFG80211_SME_IDLE; + wdev->connect_keys = NULL; } return err; } else { wdev->sme_state = CFG80211_SME_CONNECTING; + wdev->connect_keys = connkeys; err = rdev->ops->connect(&rdev->wiphy, dev, connect); if (err) { + wdev->connect_keys = NULL; wdev->sme_state = CFG80211_SME_IDLE; return err; } @@ -682,12 +723,13 @@ int __cfg80211_connect(struct cfg80211_registered_device *rdev, int cfg80211_connect(struct cfg80211_registered_device *rdev, struct net_device *dev, - struct cfg80211_connect_params *connect) + struct cfg80211_connect_params *connect, + struct cfg80211_cached_keys *connkeys) { int err; wdev_lock(dev->ieee80211_ptr); - err = __cfg80211_connect(rdev, dev, connect); + err = __cfg80211_connect(rdev, dev, connect, connkeys); wdev_unlock(dev->ieee80211_ptr); return err; @@ -704,6 +746,9 @@ int __cfg80211_disconnect(struct cfg80211_registered_device *rdev, if (wdev->sme_state == CFG80211_SME_IDLE) return -EINVAL; + kfree(wdev->connect_keys); + wdev->connect_keys = NULL; + if (!rdev->ops->disconnect) { if (!rdev->ops->deauth) return -EOPNOTSUPP; diff --git a/net/wireless/util.c b/net/wireless/util.c index 28f8f96801d4..4bab380a1204 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -141,9 +141,12 @@ void ieee80211_set_bitrate_flags(struct wiphy *wiphy) set_mandatory_flags_band(wiphy->bands[band], band); } -int cfg80211_validate_key_settings(struct key_params *params, int key_idx, +int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev, + struct key_params *params, int key_idx, const u8 *mac_addr) { + int i; + if (key_idx > 5) return -EINVAL; @@ -197,6 +200,12 @@ int cfg80211_validate_key_settings(struct key_params *params, int key_idx, } } + for (i = 0; i < rdev->wiphy.n_cipher_suites; i++) + if (params->cipher == rdev->wiphy.cipher_suites[i]) + break; + if (i == rdev->wiphy.n_cipher_suites) + return -EINVAL; + return 0; } @@ -523,3 +532,33 @@ const u8 *ieee80211_bss_get_ie(struct cfg80211_bss *bss, u8 ie) return NULL; } EXPORT_SYMBOL(ieee80211_bss_get_ie); + +void cfg80211_upload_connect_keys(struct wireless_dev *wdev) +{ + struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct net_device *dev = wdev->netdev; + int i; + + if (!wdev->connect_keys) + return; + + for (i = 0; i < 6; i++) { + if (!wdev->connect_keys->params[i].cipher) + continue; + if (rdev->ops->add_key(wdev->wiphy, dev, i, NULL, + &wdev->connect_keys->params[i])) + printk(KERN_ERR "%s: failed to set key %d\n", + dev->name, i); + if (wdev->connect_keys->def == i) + if (rdev->ops->set_default_key(wdev->wiphy, dev, i)) + printk(KERN_ERR "%s: failed to set defkey %d\n", + dev->name, i); + if (wdev->connect_keys->defmgmt == i) + if (rdev->ops->set_default_mgmt_key(wdev->wiphy, dev, i)) + printk(KERN_ERR "%s: failed to set mgtdef %d\n", + dev->name, i); + } + + kfree(wdev->connect_keys); + wdev->connect_keys = NULL; +} diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index 5088d89a30fc..5d0176338539 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -453,15 +453,32 @@ int cfg80211_wext_giwretry(struct net_device *dev, } EXPORT_SYMBOL_GPL(cfg80211_wext_giwretry); -static int cfg80211_set_encryption(struct cfg80211_registered_device *rdev, - struct net_device *dev, const u8 *addr, - bool remove, bool tx_key, int idx, - struct key_params *params) +static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev, + struct net_device *dev, const u8 *addr, + bool remove, bool tx_key, int idx, + struct key_params *params) { struct wireless_dev *wdev = dev->ieee80211_ptr; - int err; + int err, i; + + if (!wdev->wext.keys) { + wdev->wext.keys = kzalloc(sizeof(*wdev->wext.keys), + GFP_KERNEL); + if (!wdev->wext.keys) + return -ENOMEM; + for (i = 0; i < 6; i++) + wdev->wext.keys->params[i].key = + wdev->wext.keys->data[i]; + } + + if (wdev->iftype != NL80211_IFTYPE_ADHOC && + wdev->iftype != NL80211_IFTYPE_STATION) + return -EOPNOTSUPP; if (params->cipher == WLAN_CIPHER_SUITE_AES_CMAC) { + if (!wdev->current_bss) + return -ENOLINK; + if (!rdev->ops->set_default_mgmt_key) return -EOPNOTSUPP; @@ -471,8 +488,14 @@ static int cfg80211_set_encryption(struct cfg80211_registered_device *rdev, return -EINVAL; if (remove) { - err = rdev->ops->del_key(&rdev->wiphy, dev, idx, addr); + err = 0; + if (wdev->current_bss) + err = rdev->ops->del_key(&rdev->wiphy, dev, idx, addr); if (!err) { + if (!addr) { + wdev->wext.keys->params[idx].key_len = 0; + wdev->wext.keys->params[idx].cipher = 0; + } if (idx == wdev->wext.default_key) wdev->wext.default_key = -1; else if (idx == wdev->wext.default_mgmt_key) @@ -486,36 +509,64 @@ static int cfg80211_set_encryption(struct cfg80211_registered_device *rdev, return 0; return err; - } else { - if (addr) - tx_key = false; + } - if (cfg80211_validate_key_settings(params, idx, addr)) - return -EINVAL; + if (addr) + tx_key = false; + if (cfg80211_validate_key_settings(rdev, params, idx, addr)) + return -EINVAL; + + err = 0; + if (wdev->current_bss) err = rdev->ops->add_key(&rdev->wiphy, dev, idx, addr, params); - if (err) - return err; + if (err) + return err; + + if (!addr) { + wdev->wext.keys->params[idx] = *params; + memcpy(wdev->wext.keys->data[idx], + params->key, params->key_len); + wdev->wext.keys->params[idx].key = + wdev->wext.keys->data[idx]; + } - if (tx_key || (!addr && wdev->wext.default_key == -1)) { + if (params->cipher != WLAN_CIPHER_SUITE_AES_CMAC && + (tx_key || (!addr && wdev->wext.default_key == -1))) { + if (wdev->current_bss) err = rdev->ops->set_default_key(&rdev->wiphy, dev, idx); - if (!err) - wdev->wext.default_key = idx; - return err; - } + if (!err) + wdev->wext.default_key = idx; + return err; + } - if (params->cipher == WLAN_CIPHER_SUITE_AES_CMAC && - (tx_key || (!addr && wdev->wext.default_mgmt_key == -1))) { + if (params->cipher == WLAN_CIPHER_SUITE_AES_CMAC && + (tx_key || (!addr && wdev->wext.default_mgmt_key == -1))) { + if (wdev->current_bss) err = rdev->ops->set_default_mgmt_key(&rdev->wiphy, dev, idx); - if (!err) - wdev->wext.default_mgmt_key = idx; - return err; - } - - return 0; + if (!err) + wdev->wext.default_mgmt_key = idx; + return err; } + + return 0; +} + +static int cfg80211_set_encryption(struct cfg80211_registered_device *rdev, + struct net_device *dev, const u8 *addr, + bool remove, bool tx_key, int idx, + struct key_params *params) +{ + int err; + + wdev_lock(dev->ieee80211_ptr); + err = __cfg80211_set_encryption(rdev, dev, addr, remove, + tx_key, idx, params); + wdev_unlock(dev->ieee80211_ptr); + + return err; } int cfg80211_wext_siwencode(struct net_device *dev, @@ -528,6 +579,10 @@ int cfg80211_wext_siwencode(struct net_device *dev, bool remove = false; struct key_params params; + if (wdev->iftype != NL80211_IFTYPE_STATION && + wdev->iftype != NL80211_IFTYPE_ADHOC) + return -EOPNOTSUPP; + /* no use -- only MFP (set_default_mgmt_key) is optional */ if (!rdev->ops->del_key || !rdev->ops->add_key || @@ -548,9 +603,14 @@ int cfg80211_wext_siwencode(struct net_device *dev, remove = true; else if (erq->length == 0) { /* No key data - just set the default TX key index */ - err = rdev->ops->set_default_key(&rdev->wiphy, dev, idx); + err = 0; + wdev_lock(wdev); + if (wdev->current_bss) + err = rdev->ops->set_default_key(&rdev->wiphy, + dev, idx); if (!err) wdev->wext.default_key = idx; + wdev_unlock(wdev); return err; } @@ -583,6 +643,10 @@ int cfg80211_wext_siwencodeext(struct net_device *dev, struct key_params params; u32 cipher; + if (wdev->iftype != NL80211_IFTYPE_STATION && + wdev->iftype != NL80211_IFTYPE_ADHOC) + return -EOPNOTSUPP; + /* no use -- only MFP (set_default_mgmt_key) is optional */ if (!rdev->ops->del_key || !rdev->ops->add_key || @@ -656,37 +720,15 @@ int cfg80211_wext_siwencodeext(struct net_device *dev, } EXPORT_SYMBOL_GPL(cfg80211_wext_siwencodeext); -struct giwencode_cookie { - size_t buflen; - char *keybuf; -}; - -static void giwencode_get_key_cb(void *cookie, struct key_params *params) -{ - struct giwencode_cookie *data = cookie; - - if (!params->key) { - data->buflen = 0; - return; - } - - data->buflen = min_t(size_t, data->buflen, params->key_len); - memcpy(data->keybuf, params->key, data->buflen); -} - int cfg80211_wext_giwencode(struct net_device *dev, struct iw_request_info *info, struct iw_point *erq, char *keybuf) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); - int idx, err; - struct giwencode_cookie data = { - .keybuf = keybuf, - .buflen = erq->length, - }; + int idx; - if (!rdev->ops->get_key) + if (wdev->iftype != NL80211_IFTYPE_STATION && + wdev->iftype != NL80211_IFTYPE_ADHOC) return -EOPNOTSUPP; idx = erq->flags & IW_ENCODE_INDEX; @@ -701,21 +743,18 @@ int cfg80211_wext_giwencode(struct net_device *dev, erq->flags = idx + 1; - err = rdev->ops->get_key(&rdev->wiphy, dev, idx, NULL, &data, - giwencode_get_key_cb); - if (!err) { - erq->length = data.buflen; - erq->flags |= IW_ENCODE_ENABLED; - return 0; - } - - if (err == -ENOENT) { + if (!wdev->wext.keys || !wdev->wext.keys->params[idx].cipher) { erq->flags |= IW_ENCODE_DISABLED; erq->length = 0; return 0; } - return err; + erq->length = min_t(size_t, erq->length, + wdev->wext.keys->params[idx].key_len); + memcpy(keybuf, wdev->wext.keys->params[idx].key, erq->length); + erq->flags |= IW_ENCODE_ENABLED; + + return 0; } EXPORT_SYMBOL_GPL(cfg80211_wext_giwencode); diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c index 6f75aaa7f795..c33ea9a5de78 100644 --- a/net/wireless/wext-sme.c +++ b/net/wireless/wext-sme.c @@ -10,10 +10,11 @@ #include #include "nl80211.h" -static int cfg80211_mgd_wext_connect(struct cfg80211_registered_device *rdev, - struct wireless_dev *wdev) +int cfg80211_mgd_wext_connect(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev) { - int err; + struct cfg80211_cached_keys *ck = NULL; + int err, i; ASSERT_RDEV_LOCK(rdev); ASSERT_WDEV_LOCK(wdev); @@ -25,10 +26,25 @@ static int cfg80211_mgd_wext_connect(struct cfg80211_registered_device *rdev, wdev->wext.connect.ie_len = wdev->wext.ie_len; wdev->wext.connect.privacy = wdev->wext.default_key != -1; - err = 0; - if (wdev->wext.connect.ssid_len != 0) - err = __cfg80211_connect(rdev, wdev->netdev, - &wdev->wext.connect); + if (wdev->wext.keys) { + wdev->wext.keys->def = wdev->wext.default_key; + wdev->wext.keys->defmgmt = wdev->wext.default_mgmt_key; + } + + if (!wdev->wext.connect.ssid_len) + return 0; + + if (wdev->wext.keys) { + ck = kmemdup(wdev->wext.keys, sizeof(*ck), GFP_KERNEL); + if (!ck) + return -ENOMEM; + for (i = 0; i < 6; i++) + ck->params[i].key = ck->data[i]; + } + err = __cfg80211_connect(rdev, wdev->netdev, + &wdev->wext.connect, ck); + if (err) + kfree(ck); return err; } -- cgit v1.2.3 From 48ab905d1a81b7df33a33def04a890e4e0c51460 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 10 Jul 2009 18:42:31 +0200 Subject: nl80211: report BSS status When connected to a BSS, or joined to an IBSS, we'll want to know in userspace without using wireless extensions, so report the BSS status in the BSS list. Userspace can query the BSS list, display all the information and retrieve the station information as well. For example (from hwsim): $ iw dev wlan1 scan dump BSS 02:00:00:00:00:00 (on wlan1) -- associated freq: 2462 beacon interval: 100 capability: ESS ShortSlotTime (0x0401) signal: -50.00 dBm SSID: j Supported rates: 1.0* 2.0* 5.5* 11.0* 6.0 9.0 12.0 18.0 DS Paramater set: channel 11 ERP: Extended supported rates: 24.0 36.0 48.0 54.0 Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/nl80211.h | 11 +++++++++ net/wireless/nl80211.c | 65 ++++++++++++++++++++++++++++++++++++------------- 2 files changed, 59 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index b043b78dd2c3..962e2232a074 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -1260,6 +1260,7 @@ enum nl80211_channel_type { * in mBm (100 * dBm) (s32) * @NL80211_BSS_SIGNAL_UNSPEC: signal strength of the probe response/beacon * in unspecified units, scaled to 0..100 (u8) + * @NL80211_BSS_STATUS: status, if this BSS is "used" * @__NL80211_BSS_AFTER_LAST: internal * @NL80211_BSS_MAX: highest BSS attribute */ @@ -1273,12 +1274,22 @@ enum nl80211_bss { NL80211_BSS_INFORMATION_ELEMENTS, NL80211_BSS_SIGNAL_MBM, NL80211_BSS_SIGNAL_UNSPEC, + NL80211_BSS_STATUS, /* keep last */ __NL80211_BSS_AFTER_LAST, NL80211_BSS_MAX = __NL80211_BSS_AFTER_LAST - 1 }; +/** + * enum nl80211_bss_status - BSS "status" + */ +enum nl80211_bss_status { + NL80211_BSS_STATUS_AUTHENTICATED, + NL80211_BSS_STATUS_ASSOCIATED, + NL80211_BSS_STATUS_IBSS_JOINED, +}; + /** * enum nl80211_auth_type - AuthenticationType * diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 45c5f9c8e51b..da450ef1fc7e 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3094,11 +3094,15 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) static int nl80211_send_bss(struct sk_buff *msg, u32 pid, u32 seq, int flags, struct cfg80211_registered_device *rdev, - struct net_device *dev, - struct cfg80211_bss *res) + struct wireless_dev *wdev, + struct cfg80211_internal_bss *intbss) { + struct cfg80211_bss *res = &intbss->pub; void *hdr; struct nlattr *bss; + int i; + + ASSERT_WDEV_LOCK(wdev); hdr = nl80211hdr_put(msg, pid, seq, flags, NL80211_CMD_NEW_SCAN_RESULTS); @@ -3107,7 +3111,7 @@ static int nl80211_send_bss(struct sk_buff *msg, u32 pid, u32 seq, int flags, NLA_PUT_U32(msg, NL80211_ATTR_SCAN_GENERATION, rdev->bss_generation); - NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, dev->ifindex); + NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, wdev->netdev->ifindex); bss = nla_nest_start(msg, NL80211_ATTR_BSS); if (!bss) @@ -3136,6 +3140,28 @@ static int nl80211_send_bss(struct sk_buff *msg, u32 pid, u32 seq, int flags, break; } + switch (wdev->iftype) { + case NL80211_IFTYPE_STATION: + if (intbss == wdev->current_bss) + NLA_PUT_U32(msg, NL80211_BSS_STATUS, + NL80211_BSS_STATUS_ASSOCIATED); + else for (i = 0; i < MAX_AUTH_BSSES; i++) { + if (intbss != wdev->auth_bsses[i]) + continue; + NLA_PUT_U32(msg, NL80211_BSS_STATUS, + NL80211_BSS_STATUS_AUTHENTICATED); + break; + } + break; + case NL80211_IFTYPE_ADHOC: + if (intbss == wdev->current_bss) + NLA_PUT_U32(msg, NL80211_BSS_STATUS, + NL80211_BSS_STATUS_IBSS_JOINED); + break; + default: + break; + } + nla_nest_end(msg, bss); return genlmsg_end(msg, hdr); @@ -3148,9 +3174,10 @@ static int nl80211_send_bss(struct sk_buff *msg, u32 pid, u32 seq, int flags, static int nl80211_dump_scan(struct sk_buff *skb, struct netlink_callback *cb) { - struct cfg80211_registered_device *dev; - struct net_device *netdev; + struct cfg80211_registered_device *rdev; + struct net_device *dev; struct cfg80211_internal_bss *scan; + struct wireless_dev *wdev; int ifidx = cb->args[0]; int start = cb->args[1], idx = 0; int err; @@ -3171,39 +3198,43 @@ static int nl80211_dump_scan(struct sk_buff *skb, cb->args[0] = ifidx; } - netdev = dev_get_by_index(&init_net, ifidx); - if (!netdev) + dev = dev_get_by_index(&init_net, ifidx); + if (!dev) return -ENODEV; - dev = cfg80211_get_dev_from_ifindex(ifidx); - if (IS_ERR(dev)) { - err = PTR_ERR(dev); + rdev = cfg80211_get_dev_from_ifindex(ifidx); + if (IS_ERR(rdev)) { + err = PTR_ERR(rdev); goto out_put_netdev; } - spin_lock_bh(&dev->bss_lock); - cfg80211_bss_expire(dev); + wdev = dev->ieee80211_ptr; - list_for_each_entry(scan, &dev->bss_list, list) { + wdev_lock(wdev); + spin_lock_bh(&rdev->bss_lock); + cfg80211_bss_expire(rdev); + + list_for_each_entry(scan, &rdev->bss_list, list) { if (++idx <= start) continue; if (nl80211_send_bss(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, NLM_F_MULTI, - dev, netdev, &scan->pub) < 0) { + rdev, wdev, scan) < 0) { idx--; goto out; } } out: - spin_unlock_bh(&dev->bss_lock); + spin_unlock_bh(&rdev->bss_lock); + wdev_unlock(wdev); cb->args[1] = idx; err = skb->len; - cfg80211_unlock_rdev(dev); + cfg80211_unlock_rdev(rdev); out_put_netdev: - dev_put(netdev); + dev_put(dev); return err; } -- cgit v1.2.3 From 72bce62775db0315511474e8d8f8e25d25b48366 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 17 Jun 2009 17:45:28 +0200 Subject: net: remove unused skb->do_not_encrypt mac80211 required this due to the master netdev, but now it can put all information into skb->cb and this can go. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/skbuff.h | 6 +----- net/core/skbuff.c | 3 --- 2 files changed, 1 insertion(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f2c69a2cca17..df7b23ac66e6 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -304,7 +304,6 @@ typedef unsigned char *sk_buff_data_t; * @tc_index: Traffic control index * @tc_verd: traffic control verdict * @ndisc_nodetype: router type (from link layer) - * @do_not_encrypt: set to prevent encryption of this frame * @dma_cookie: a cookie to one of several possible DMA operations * done by skb DMA functions * @secmark: security marking @@ -379,13 +378,10 @@ struct sk_buff { kmemcheck_bitfield_begin(flags2); #ifdef CONFIG_IPV6_NDISC_NODETYPE __u8 ndisc_nodetype:2; -#endif -#if defined(CONFIG_MAC80211) || defined(CONFIG_MAC80211_MODULE) - __u8 do_not_encrypt:1; #endif kmemcheck_bitfield_end(flags2); - /* 0/13/14 bit hole */ + /* 0/14 bit hole */ #ifdef CONFIG_NET_DMA dma_cookie_t dma_cookie; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 9e0597d189b0..80a96166df39 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -559,9 +559,6 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) #endif #endif new->vlan_tci = old->vlan_tci; -#if defined(CONFIG_MAC80211) || defined(CONFIG_MAC80211_MODULE) - new->do_not_encrypt = old->do_not_encrypt; -#endif skb_copy_secmark(new, old); } -- cgit v1.2.3 From 8150f32b90f630ad3e460f026ce338cb81685bc9 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 24 Jul 2009 22:11:32 -0700 Subject: Driver Core: Make PM operations a const pointer They are not supposed to be modified by drivers, so make them const. Signed-off-by: Dmitry Torokhov Acked-by: Greg Kroah-Hartman Signed-off-by: Rafael J. Wysocki --- drivers/base/platform.c | 2 +- drivers/base/power/main.c | 8 +++++--- drivers/pci/pci-driver.c | 16 ++++++++-------- include/linux/device.h | 9 +++++---- 4 files changed, 19 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/platform.c b/drivers/base/platform.c index 81cb01bfc356..c4b3fcee17b2 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -925,7 +925,7 @@ static int platform_pm_restore_noirq(struct device *dev) #endif /* !CONFIG_HIBERNATION */ -static struct dev_pm_ops platform_dev_pm_ops = { +static const struct dev_pm_ops platform_dev_pm_ops = { .prepare = platform_pm_prepare, .complete = platform_pm_complete, .suspend = platform_pm_suspend, diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 58a3e572f2c9..1b1a786b7dec 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -157,8 +157,9 @@ void device_pm_move_last(struct device *dev) * @ops: PM operations to choose from. * @state: PM transition of the system being carried out. */ -static int pm_op(struct device *dev, struct dev_pm_ops *ops, - pm_message_t state) +static int pm_op(struct device *dev, + const struct dev_pm_ops *ops, + pm_message_t state) { int error = 0; @@ -220,7 +221,8 @@ static int pm_op(struct device *dev, struct dev_pm_ops *ops, * The operation is executed with interrupts disabled by the only remaining * functional CPU in the system. */ -static int pm_noirq_op(struct device *dev, struct dev_pm_ops *ops, +static int pm_noirq_op(struct device *dev, + const struct dev_pm_ops *ops, pm_message_t state) { int error = 0; diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index d76c4c85367e..0c2ea44ae5e1 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -575,7 +575,7 @@ static void pci_pm_complete(struct device *dev) static int pci_pm_suspend(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); - struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; + const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; if (pci_has_legacy_pm_support(pci_dev)) return pci_legacy_suspend(dev, PMSG_SUSPEND); @@ -613,7 +613,7 @@ static int pci_pm_suspend(struct device *dev) static int pci_pm_suspend_noirq(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); - struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; + const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; if (pci_has_legacy_pm_support(pci_dev)) return pci_legacy_suspend_late(dev, PMSG_SUSPEND); @@ -672,7 +672,7 @@ static int pci_pm_resume_noirq(struct device *dev) static int pci_pm_resume(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); - struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; + const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; int error = 0; /* @@ -711,7 +711,7 @@ static int pci_pm_resume(struct device *dev) static int pci_pm_freeze(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); - struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; + const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; if (pci_has_legacy_pm_support(pci_dev)) return pci_legacy_suspend(dev, PMSG_FREEZE); @@ -780,7 +780,7 @@ static int pci_pm_thaw_noirq(struct device *dev) static int pci_pm_thaw(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); - struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; + const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; int error = 0; if (pci_has_legacy_pm_support(pci_dev)) @@ -799,7 +799,7 @@ static int pci_pm_thaw(struct device *dev) static int pci_pm_poweroff(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); - struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; + const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; if (pci_has_legacy_pm_support(pci_dev)) return pci_legacy_suspend(dev, PMSG_HIBERNATE); @@ -872,7 +872,7 @@ static int pci_pm_restore_noirq(struct device *dev) static int pci_pm_restore(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); - struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; + const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; int error = 0; /* @@ -910,7 +910,7 @@ static int pci_pm_restore(struct device *dev) #endif /* !CONFIG_HIBERNATION */ -struct dev_pm_ops pci_dev_pm_ops = { +const struct dev_pm_ops pci_dev_pm_ops = { .prepare = pci_pm_prepare, .complete = pci_pm_complete, .suspend = pci_pm_suspend, diff --git a/include/linux/device.h b/include/linux/device.h index aebb81036db2..a28642975053 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -62,7 +62,7 @@ struct bus_type { int (*suspend)(struct device *dev, pm_message_t state); int (*resume)(struct device *dev); - struct dev_pm_ops *pm; + const struct dev_pm_ops *pm; struct bus_type_private *p; }; @@ -132,7 +132,7 @@ struct device_driver { int (*resume) (struct device *dev); struct attribute_group **groups; - struct dev_pm_ops *pm; + const struct dev_pm_ops *pm; struct driver_private *p; }; @@ -200,7 +200,8 @@ struct class { int (*suspend)(struct device *dev, pm_message_t state); int (*resume)(struct device *dev); - struct dev_pm_ops *pm; + const struct dev_pm_ops *pm; + struct class_private *p; }; @@ -291,7 +292,7 @@ struct device_type { char *(*nodename)(struct device *dev); void (*release)(struct device *dev); - struct dev_pm_ops *pm; + const struct dev_pm_ops *pm; }; /* interface for exporting device attributes */ -- cgit v1.2.3 From cb3824bade2549d7ad059d5802da43312540fdee Mon Sep 17 00:00:00 2001 From: Karsten Keil Date: Wed, 8 Jul 2009 14:21:12 +0200 Subject: ISDN: Make isdnhdlc usable for other ISDN drivers isdnhdlc is useful for other ISDN drivers as well. Move the include file to a central location and the source to the central isdn location. Signed-off-by: Karsten Keil --- drivers/isdn/Kconfig | 6 +- drivers/isdn/hisax/Kconfig | 6 +- drivers/isdn/hisax/Makefile | 4 - drivers/isdn/hisax/isdnhdlc.c | 603 ------------------------------------------ drivers/isdn/hisax/isdnhdlc.h | 70 ----- drivers/isdn/hisax/st5481.h | 2 +- drivers/isdn/i4l/Kconfig | 11 + drivers/isdn/i4l/Makefile | 1 + drivers/isdn/i4l/isdnhdlc.c | 603 ++++++++++++++++++++++++++++++++++++++++++ include/linux/isdn/hdlc.h | 70 +++++ 10 files changed, 689 insertions(+), 687 deletions(-) delete mode 100644 drivers/isdn/hisax/isdnhdlc.c delete mode 100644 drivers/isdn/hisax/isdnhdlc.h create mode 100644 drivers/isdn/i4l/isdnhdlc.c create mode 100644 include/linux/isdn/hdlc.h (limited to 'include/linux') diff --git a/drivers/isdn/Kconfig b/drivers/isdn/Kconfig index 02bdca6f95c3..022a19452953 100644 --- a/drivers/isdn/Kconfig +++ b/drivers/isdn/Kconfig @@ -21,8 +21,6 @@ menuconfig ISDN if ISDN -source "drivers/isdn/mISDN/Kconfig" - menuconfig ISDN_I4L tristate "Old ISDN4Linux (deprecated)" ---help--- @@ -41,9 +39,9 @@ menuconfig ISDN_I4L It is still available, though, for use with adapters that are not supported by the new CAPI subsystem yet. -if ISDN_I4L +source "drivers/isdn/mISDN/Kconfig" + source "drivers/isdn/i4l/Kconfig" -endif menuconfig ISDN_CAPI tristate "CAPI 2.0 subsystem" diff --git a/drivers/isdn/hisax/Kconfig b/drivers/isdn/hisax/Kconfig index 7832d8ba8e44..3464ebc4cdbc 100644 --- a/drivers/isdn/hisax/Kconfig +++ b/drivers/isdn/hisax/Kconfig @@ -391,6 +391,7 @@ comment "HiSax sub driver modules" config HISAX_ST5481 tristate "ST5481 USB ISDN modem (EXPERIMENTAL)" depends on USB && EXPERIMENTAL + select ISDN_HDLC select CRC_CCITT select BITREVERSE help @@ -418,11 +419,6 @@ config HISAX_FRITZ_PCIPNP (the latter also needs you to select "ISA Plug and Play support" from the menu "Plug and Play configuration") -config HISAX_HDLC - bool - depends on HISAX_ST5481 - default y - config HISAX_AVM_A1_PCMCIA bool depends on HISAX_AVM_A1_CS diff --git a/drivers/isdn/hisax/Makefile b/drivers/isdn/hisax/Makefile index c7a3794bdae4..ab638b083df9 100644 --- a/drivers/isdn/hisax/Makefile +++ b/drivers/isdn/hisax/Makefile @@ -16,10 +16,6 @@ obj-$(CONFIG_HISAX_HFCUSB) += hfc_usb.o obj-$(CONFIG_HISAX_HFC4S8S) += hfc4s8s_l1.o obj-$(CONFIG_HISAX_FRITZ_PCIPNP) += hisax_isac.o hisax_fcpcipnp.o -ifdef CONFIG_HISAX_HDLC -obj-$(CONFIG_ISDN_DRV_HISAX) += isdnhdlc.o -endif - # Multipart objects. hisax_st5481-y := st5481_init.o st5481_usb.o st5481_d.o \ diff --git a/drivers/isdn/hisax/isdnhdlc.c b/drivers/isdn/hisax/isdnhdlc.c deleted file mode 100644 index c69a77a80062..000000000000 --- a/drivers/isdn/hisax/isdnhdlc.c +++ /dev/null @@ -1,603 +0,0 @@ -/* - * isdnhdlc.c -- General purpose ISDN HDLC decoder. - * - *Copyright (C) 2002 Wolfgang Mües - * 2001 Frode Isaksen - * 2001 Kai Germaschewski - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include -#include -#include -#include "isdnhdlc.h" - -/*-------------------------------------------------------------------*/ - -MODULE_AUTHOR("Wolfgang Mües , " - "Frode Isaksen , " - "Kai Germaschewski "); -MODULE_DESCRIPTION("General purpose ISDN HDLC decoder"); -MODULE_LICENSE("GPL"); - -/*-------------------------------------------------------------------*/ - -enum { - HDLC_FAST_IDLE,HDLC_GET_FLAG_B0,HDLC_GETFLAG_B1A6,HDLC_GETFLAG_B7, - HDLC_GET_DATA,HDLC_FAST_FLAG -}; - -enum { - HDLC_SEND_DATA,HDLC_SEND_CRC1,HDLC_SEND_FAST_FLAG, - HDLC_SEND_FIRST_FLAG,HDLC_SEND_CRC2,HDLC_SEND_CLOSING_FLAG, - HDLC_SEND_IDLE1,HDLC_SEND_FAST_IDLE,HDLC_SENDFLAG_B0, - HDLC_SENDFLAG_B1A6,HDLC_SENDFLAG_B7,STOPPED -}; - -void isdnhdlc_rcv_init (struct isdnhdlc_vars *hdlc, int do_adapt56) -{ - hdlc->bit_shift = 0; - hdlc->hdlc_bits1 = 0; - hdlc->data_bits = 0; - hdlc->ffbit_shift = 0; - hdlc->data_received = 0; - hdlc->state = HDLC_GET_DATA; - hdlc->do_adapt56 = do_adapt56; - hdlc->dchannel = 0; - hdlc->crc = 0; - hdlc->cbin = 0; - hdlc->shift_reg = 0; - hdlc->ffvalue = 0; - hdlc->dstpos = 0; -} - -void isdnhdlc_out_init (struct isdnhdlc_vars *hdlc, int is_d_channel, int do_adapt56) -{ - hdlc->bit_shift = 0; - hdlc->hdlc_bits1 = 0; - hdlc->data_bits = 0; - hdlc->ffbit_shift = 0; - hdlc->data_received = 0; - hdlc->do_closing = 0; - hdlc->ffvalue = 0; - if (is_d_channel) { - hdlc->dchannel = 1; - hdlc->state = HDLC_SEND_FIRST_FLAG; - } else { - hdlc->dchannel = 0; - hdlc->state = HDLC_SEND_FAST_FLAG; - hdlc->ffvalue = 0x7e; - } - hdlc->cbin = 0x7e; - hdlc->bit_shift = 0; - if(do_adapt56){ - hdlc->do_adapt56 = 1; - hdlc->data_bits = 0; - hdlc->state = HDLC_SENDFLAG_B0; - } else { - hdlc->do_adapt56 = 0; - hdlc->data_bits = 8; - } - hdlc->shift_reg = 0; -} - -/* - isdnhdlc_decode - decodes HDLC frames from a transparent bit stream. - - The source buffer is scanned for valid HDLC frames looking for - flags (01111110) to indicate the start of a frame. If the start of - the frame is found, the bit stuffing is removed (0 after 5 1's). - When a new flag is found, the complete frame has been received - and the CRC is checked. - If a valid frame is found, the function returns the frame length - excluding the CRC with the bit HDLC_END_OF_FRAME set. - If the beginning of a valid frame is found, the function returns - the length. - If a framing error is found (too many 1s and not a flag) the function - returns the length with the bit HDLC_FRAMING_ERROR set. - If a CRC error is found the function returns the length with the - bit HDLC_CRC_ERROR set. - If the frame length exceeds the destination buffer size, the function - returns the length with the bit HDLC_LENGTH_ERROR set. - - src - source buffer - slen - source buffer length - count - number of bytes removed (decoded) from the source buffer - dst _ destination buffer - dsize - destination buffer size - returns - number of decoded bytes in the destination buffer and status - flag. - */ -int isdnhdlc_decode (struct isdnhdlc_vars *hdlc, const unsigned char *src, - int slen, int *count, unsigned char *dst, int dsize) -{ - int status=0; - - static const unsigned char fast_flag[]={ - 0x00,0x00,0x00,0x20,0x30,0x38,0x3c,0x3e,0x3f - }; - - static const unsigned char fast_flag_value[]={ - 0x00,0x7e,0xfc,0xf9,0xf3,0xe7,0xcf,0x9f,0x3f - }; - - static const unsigned char fast_abort[]={ - 0x00,0x00,0x80,0xc0,0xe0,0xf0,0xf8,0xfc,0xfe,0xff - }; - - *count = slen; - - while(slen > 0){ - if(hdlc->bit_shift==0){ - hdlc->cbin = *src++; - slen--; - hdlc->bit_shift = 8; - if(hdlc->do_adapt56){ - hdlc->bit_shift --; - } - } - - switch(hdlc->state){ - case STOPPED: - return 0; - case HDLC_FAST_IDLE: - if(hdlc->cbin == 0xff){ - hdlc->bit_shift = 0; - break; - } - hdlc->state = HDLC_GET_FLAG_B0; - hdlc->hdlc_bits1 = 0; - hdlc->bit_shift = 8; - break; - case HDLC_GET_FLAG_B0: - if(!(hdlc->cbin & 0x80)) { - hdlc->state = HDLC_GETFLAG_B1A6; - hdlc->hdlc_bits1 = 0; - } else { - if(!hdlc->do_adapt56){ - if(++hdlc->hdlc_bits1 >=8 ) if(hdlc->bit_shift==1) - hdlc->state = HDLC_FAST_IDLE; - } - } - hdlc->cbin<<=1; - hdlc->bit_shift --; - break; - case HDLC_GETFLAG_B1A6: - if(hdlc->cbin & 0x80){ - hdlc->hdlc_bits1++; - if(hdlc->hdlc_bits1==6){ - hdlc->state = HDLC_GETFLAG_B7; - } - } else { - hdlc->hdlc_bits1 = 0; - } - hdlc->cbin<<=1; - hdlc->bit_shift --; - break; - case HDLC_GETFLAG_B7: - if(hdlc->cbin & 0x80) { - hdlc->state = HDLC_GET_FLAG_B0; - } else { - hdlc->state = HDLC_GET_DATA; - hdlc->crc = 0xffff; - hdlc->shift_reg = 0; - hdlc->hdlc_bits1 = 0; - hdlc->data_bits = 0; - hdlc->data_received = 0; - } - hdlc->cbin<<=1; - hdlc->bit_shift --; - break; - case HDLC_GET_DATA: - if(hdlc->cbin & 0x80){ - hdlc->hdlc_bits1++; - switch(hdlc->hdlc_bits1){ - case 6: - break; - case 7: - if(hdlc->data_received) { - // bad frame - status = -HDLC_FRAMING_ERROR; - } - if(!hdlc->do_adapt56){ - if(hdlc->cbin==fast_abort[hdlc->bit_shift+1]){ - hdlc->state = HDLC_FAST_IDLE; - hdlc->bit_shift=1; - break; - } - } else { - hdlc->state = HDLC_GET_FLAG_B0; - } - break; - default: - hdlc->shift_reg>>=1; - hdlc->shift_reg |= 0x80; - hdlc->data_bits++; - break; - } - } else { - switch(hdlc->hdlc_bits1){ - case 5: - break; - case 6: - if(hdlc->data_received){ - if (hdlc->dstpos < 2) { - status = -HDLC_FRAMING_ERROR; - } else if (hdlc->crc != 0xf0b8){ - // crc error - status = -HDLC_CRC_ERROR; - } else { - // remove CRC - hdlc->dstpos -= 2; - // good frame - status = hdlc->dstpos; - } - } - hdlc->crc = 0xffff; - hdlc->shift_reg = 0; - hdlc->data_bits = 0; - if(!hdlc->do_adapt56){ - if(hdlc->cbin==fast_flag[hdlc->bit_shift]){ - hdlc->ffvalue = fast_flag_value[hdlc->bit_shift]; - hdlc->state = HDLC_FAST_FLAG; - hdlc->ffbit_shift = hdlc->bit_shift; - hdlc->bit_shift = 1; - } else { - hdlc->state = HDLC_GET_DATA; - hdlc->data_received = 0; - } - } else { - hdlc->state = HDLC_GET_DATA; - hdlc->data_received = 0; - } - break; - default: - hdlc->shift_reg>>=1; - hdlc->data_bits++; - break; - } - hdlc->hdlc_bits1 = 0; - } - if (status) { - hdlc->dstpos = 0; - *count -= slen; - hdlc->cbin <<= 1; - hdlc->bit_shift--; - return status; - } - if(hdlc->data_bits==8){ - hdlc->data_bits = 0; - hdlc->data_received = 1; - hdlc->crc = crc_ccitt_byte(hdlc->crc, hdlc->shift_reg); - - // good byte received - if (hdlc->dstpos < dsize) { - dst[hdlc->dstpos++] = hdlc->shift_reg; - } else { - // frame too long - status = -HDLC_LENGTH_ERROR; - hdlc->dstpos = 0; - } - } - hdlc->cbin <<= 1; - hdlc->bit_shift--; - break; - case HDLC_FAST_FLAG: - if(hdlc->cbin==hdlc->ffvalue){ - hdlc->bit_shift = 0; - break; - } else { - if(hdlc->cbin == 0xff){ - hdlc->state = HDLC_FAST_IDLE; - hdlc->bit_shift=0; - } else if(hdlc->ffbit_shift==8){ - hdlc->state = HDLC_GETFLAG_B7; - break; - } else { - hdlc->shift_reg = fast_abort[hdlc->ffbit_shift-1]; - hdlc->hdlc_bits1 = hdlc->ffbit_shift-2; - if(hdlc->hdlc_bits1<0)hdlc->hdlc_bits1 = 0; - hdlc->data_bits = hdlc->ffbit_shift-1; - hdlc->state = HDLC_GET_DATA; - hdlc->data_received = 0; - } - } - break; - default: - break; - } - } - *count -= slen; - return 0; -} - -/* - isdnhdlc_encode - encodes HDLC frames to a transparent bit stream. - - The bit stream starts with a beginning flag (01111110). After - that each byte is added to the bit stream with bit stuffing added - (0 after 5 1's). - When the last byte has been removed from the source buffer, the - CRC (2 bytes is added) and the frame terminates with the ending flag. - For the dchannel, the idle character (all 1's) is also added at the end. - If this function is called with empty source buffer (slen=0), flags or - idle character will be generated. - - src - source buffer - slen - source buffer length - count - number of bytes removed (encoded) from source buffer - dst _ destination buffer - dsize - destination buffer size - returns - number of encoded bytes in the destination buffer -*/ -int isdnhdlc_encode(struct isdnhdlc_vars *hdlc, const unsigned char *src, - unsigned short slen, int *count, - unsigned char *dst, int dsize) -{ - static const unsigned char xfast_flag_value[] = { - 0x7e,0x3f,0x9f,0xcf,0xe7,0xf3,0xf9,0xfc,0x7e - }; - - int len = 0; - - *count = slen; - - while (dsize > 0) { - if(hdlc->bit_shift==0){ - if(slen && !hdlc->do_closing){ - hdlc->shift_reg = *src++; - slen--; - if (slen == 0) - hdlc->do_closing = 1; /* closing sequence, CRC + flag(s) */ - hdlc->bit_shift = 8; - } else { - if(hdlc->state == HDLC_SEND_DATA){ - if(hdlc->data_received){ - hdlc->state = HDLC_SEND_CRC1; - hdlc->crc ^= 0xffff; - hdlc->bit_shift = 8; - hdlc->shift_reg = hdlc->crc & 0xff; - } else if(!hdlc->do_adapt56){ - hdlc->state = HDLC_SEND_FAST_FLAG; - } else { - hdlc->state = HDLC_SENDFLAG_B0; - } - } - - } - } - - switch(hdlc->state){ - case STOPPED: - while (dsize--) - *dst++ = 0xff; - - return dsize; - case HDLC_SEND_FAST_FLAG: - hdlc->do_closing = 0; - if(slen == 0){ - *dst++ = hdlc->ffvalue; - len++; - dsize--; - break; - } - if(hdlc->bit_shift==8){ - hdlc->cbin = hdlc->ffvalue>>(8-hdlc->data_bits); - hdlc->state = HDLC_SEND_DATA; - hdlc->crc = 0xffff; - hdlc->hdlc_bits1 = 0; - hdlc->data_received = 1; - } - break; - case HDLC_SENDFLAG_B0: - hdlc->do_closing = 0; - hdlc->cbin <<= 1; - hdlc->data_bits++; - hdlc->hdlc_bits1 = 0; - hdlc->state = HDLC_SENDFLAG_B1A6; - break; - case HDLC_SENDFLAG_B1A6: - hdlc->cbin <<= 1; - hdlc->data_bits++; - hdlc->cbin++; - if(++hdlc->hdlc_bits1 == 6) - hdlc->state = HDLC_SENDFLAG_B7; - break; - case HDLC_SENDFLAG_B7: - hdlc->cbin <<= 1; - hdlc->data_bits++; - if(slen == 0){ - hdlc->state = HDLC_SENDFLAG_B0; - break; - } - if(hdlc->bit_shift==8){ - hdlc->state = HDLC_SEND_DATA; - hdlc->crc = 0xffff; - hdlc->hdlc_bits1 = 0; - hdlc->data_received = 1; - } - break; - case HDLC_SEND_FIRST_FLAG: - hdlc->data_received = 1; - if(hdlc->data_bits==8){ - hdlc->state = HDLC_SEND_DATA; - hdlc->crc = 0xffff; - hdlc->hdlc_bits1 = 0; - break; - } - hdlc->cbin <<= 1; - hdlc->data_bits++; - if(hdlc->shift_reg & 0x01) - hdlc->cbin++; - hdlc->shift_reg >>= 1; - hdlc->bit_shift--; - if(hdlc->bit_shift==0){ - hdlc->state = HDLC_SEND_DATA; - hdlc->crc = 0xffff; - hdlc->hdlc_bits1 = 0; - } - break; - case HDLC_SEND_DATA: - hdlc->cbin <<= 1; - hdlc->data_bits++; - if(hdlc->hdlc_bits1 == 5){ - hdlc->hdlc_bits1 = 0; - break; - } - if(hdlc->bit_shift==8){ - hdlc->crc = crc_ccitt_byte(hdlc->crc, hdlc->shift_reg); - } - if(hdlc->shift_reg & 0x01){ - hdlc->hdlc_bits1++; - hdlc->cbin++; - hdlc->shift_reg >>= 1; - hdlc->bit_shift--; - } else { - hdlc->hdlc_bits1 = 0; - hdlc->shift_reg >>= 1; - hdlc->bit_shift--; - } - break; - case HDLC_SEND_CRC1: - hdlc->cbin <<= 1; - hdlc->data_bits++; - if(hdlc->hdlc_bits1 == 5){ - hdlc->hdlc_bits1 = 0; - break; - } - if(hdlc->shift_reg & 0x01){ - hdlc->hdlc_bits1++; - hdlc->cbin++; - hdlc->shift_reg >>= 1; - hdlc->bit_shift--; - } else { - hdlc->hdlc_bits1 = 0; - hdlc->shift_reg >>= 1; - hdlc->bit_shift--; - } - if(hdlc->bit_shift==0){ - hdlc->shift_reg = (hdlc->crc >> 8); - hdlc->state = HDLC_SEND_CRC2; - hdlc->bit_shift = 8; - } - break; - case HDLC_SEND_CRC2: - hdlc->cbin <<= 1; - hdlc->data_bits++; - if(hdlc->hdlc_bits1 == 5){ - hdlc->hdlc_bits1 = 0; - break; - } - if(hdlc->shift_reg & 0x01){ - hdlc->hdlc_bits1++; - hdlc->cbin++; - hdlc->shift_reg >>= 1; - hdlc->bit_shift--; - } else { - hdlc->hdlc_bits1 = 0; - hdlc->shift_reg >>= 1; - hdlc->bit_shift--; - } - if(hdlc->bit_shift==0){ - hdlc->shift_reg = 0x7e; - hdlc->state = HDLC_SEND_CLOSING_FLAG; - hdlc->bit_shift = 8; - } - break; - case HDLC_SEND_CLOSING_FLAG: - hdlc->cbin <<= 1; - hdlc->data_bits++; - if(hdlc->hdlc_bits1 == 5){ - hdlc->hdlc_bits1 = 0; - break; - } - if(hdlc->shift_reg & 0x01){ - hdlc->cbin++; - } - hdlc->shift_reg >>= 1; - hdlc->bit_shift--; - if(hdlc->bit_shift==0){ - hdlc->ffvalue = xfast_flag_value[hdlc->data_bits]; - if(hdlc->dchannel){ - hdlc->ffvalue = 0x7e; - hdlc->state = HDLC_SEND_IDLE1; - hdlc->bit_shift = 8-hdlc->data_bits; - if(hdlc->bit_shift==0) - hdlc->state = HDLC_SEND_FAST_IDLE; - } else { - if(!hdlc->do_adapt56){ - hdlc->state = HDLC_SEND_FAST_FLAG; - hdlc->data_received = 0; - } else { - hdlc->state = HDLC_SENDFLAG_B0; - hdlc->data_received = 0; - } - // Finished with this frame, send flags - if (dsize > 1) dsize = 1; - } - } - break; - case HDLC_SEND_IDLE1: - hdlc->do_closing = 0; - hdlc->cbin <<= 1; - hdlc->cbin++; - hdlc->data_bits++; - hdlc->bit_shift--; - if(hdlc->bit_shift==0){ - hdlc->state = HDLC_SEND_FAST_IDLE; - hdlc->bit_shift = 0; - } - break; - case HDLC_SEND_FAST_IDLE: - hdlc->do_closing = 0; - hdlc->cbin = 0xff; - hdlc->data_bits = 8; - if(hdlc->bit_shift == 8){ - hdlc->cbin = 0x7e; - hdlc->state = HDLC_SEND_FIRST_FLAG; - } else { - *dst++ = hdlc->cbin; - hdlc->bit_shift = hdlc->data_bits = 0; - len++; - dsize = 0; - } - break; - default: - break; - } - if(hdlc->do_adapt56){ - if(hdlc->data_bits==7){ - hdlc->cbin <<= 1; - hdlc->cbin++; - hdlc->data_bits++; - } - } - if(hdlc->data_bits==8){ - *dst++ = hdlc->cbin; - hdlc->data_bits = 0; - len++; - dsize--; - } - } - *count -= slen; - - return len; -} - -EXPORT_SYMBOL(isdnhdlc_rcv_init); -EXPORT_SYMBOL(isdnhdlc_decode); -EXPORT_SYMBOL(isdnhdlc_out_init); -EXPORT_SYMBOL(isdnhdlc_encode); diff --git a/drivers/isdn/hisax/isdnhdlc.h b/drivers/isdn/hisax/isdnhdlc.h deleted file mode 100644 index cf0a95a24015..000000000000 --- a/drivers/isdn/hisax/isdnhdlc.h +++ /dev/null @@ -1,70 +0,0 @@ -/* - * isdnhdlc.h -- General purpose ISDN HDLC decoder. - * - * Implementation of a HDLC decoder/encoder in software. - * Neccessary because some ISDN devices don't have HDLC - * controllers. Also included: a bit reversal table. - * - *Copyright (C) 2002 Wolfgang Mües - * 2001 Frode Isaksen - * 2001 Kai Germaschewski - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#ifndef __ISDNHDLC_H__ -#define __ISDNHDLC_H__ - -struct isdnhdlc_vars { - int bit_shift; - int hdlc_bits1; - int data_bits; - int ffbit_shift; // encoding only - int state; - int dstpos; - - unsigned short crc; - - unsigned char cbin; - unsigned char shift_reg; - unsigned char ffvalue; - - unsigned int data_received:1; // set if transferring data - unsigned int dchannel:1; // set if D channel (send idle instead of flags) - unsigned int do_adapt56:1; // set if 56K adaptation - unsigned int do_closing:1; // set if in closing phase (need to send CRC + flag -}; - - -/* - The return value from isdnhdlc_decode is - the frame length, 0 if no complete frame was decoded, - or a negative error number -*/ -#define HDLC_FRAMING_ERROR 1 -#define HDLC_CRC_ERROR 2 -#define HDLC_LENGTH_ERROR 3 - -extern void isdnhdlc_rcv_init (struct isdnhdlc_vars *hdlc, int do_adapt56); - -extern int isdnhdlc_decode (struct isdnhdlc_vars *hdlc, const unsigned char *src, int slen,int *count, - unsigned char *dst, int dsize); - -extern void isdnhdlc_out_init (struct isdnhdlc_vars *hdlc,int is_d_channel,int do_adapt56); - -extern int isdnhdlc_encode (struct isdnhdlc_vars *hdlc,const unsigned char *src,unsigned short slen,int *count, - unsigned char *dst,int dsize); - -#endif /* __ISDNHDLC_H__ */ diff --git a/drivers/isdn/hisax/st5481.h b/drivers/isdn/hisax/st5481.h index cff7a6354334..64f78a8c28c5 100644 --- a/drivers/isdn/hisax/st5481.h +++ b/drivers/isdn/hisax/st5481.h @@ -226,7 +226,7 @@ printk(KERN_WARNING "%s:%s: " format "\n" , __FILE__, __func__ , ## arg) #define INFO(format, arg...) \ printk(KERN_INFO "%s:%s: " format "\n" , __FILE__, __func__ , ## arg) -#include "isdnhdlc.h" +#include #include "fsm.h" #include "hisax_if.h" #include diff --git a/drivers/isdn/i4l/Kconfig b/drivers/isdn/i4l/Kconfig index ed3510f273d8..dd744ffd240b 100644 --- a/drivers/isdn/i4l/Kconfig +++ b/drivers/isdn/i4l/Kconfig @@ -2,6 +2,8 @@ # Old ISDN4Linux config # +if ISDN_I4L + config ISDN_PPP bool "Support synchronous PPP" depends on INET @@ -135,3 +137,12 @@ source "drivers/isdn/act2000/Kconfig" source "drivers/isdn/hysdn/Kconfig" endmenu +# end ISDN_I4L +endif + +config ISDN_HDLC + tristate + depends on HISAX_ST5481 + select CRC_CCITT + select BITREVERSE + diff --git a/drivers/isdn/i4l/Makefile b/drivers/isdn/i4l/Makefile index 49a06c0005dd..cb9d3bb9fae0 100644 --- a/drivers/isdn/i4l/Makefile +++ b/drivers/isdn/i4l/Makefile @@ -4,6 +4,7 @@ obj-$(CONFIG_ISDN_I4L) += isdn.o obj-$(CONFIG_ISDN_PPP_BSDCOMP) += isdn_bsdcomp.o +obj-$(CONFIG_ISDN_HDLC) += isdnhdlc.o # Multipart objects. diff --git a/drivers/isdn/i4l/isdnhdlc.c b/drivers/isdn/i4l/isdnhdlc.c new file mode 100644 index 000000000000..44ec7418496b --- /dev/null +++ b/drivers/isdn/i4l/isdnhdlc.c @@ -0,0 +1,603 @@ +/* + * isdnhdlc.c -- General purpose ISDN HDLC decoder. + * + *Copyright (C) 2002 Wolfgang Mües + * 2001 Frode Isaksen + * 2001 Kai Germaschewski + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include +#include + +/*-------------------------------------------------------------------*/ + +MODULE_AUTHOR("Wolfgang Mües , " + "Frode Isaksen , " + "Kai Germaschewski "); +MODULE_DESCRIPTION("General purpose ISDN HDLC decoder"); +MODULE_LICENSE("GPL"); + +/*-------------------------------------------------------------------*/ + +enum { + HDLC_FAST_IDLE,HDLC_GET_FLAG_B0,HDLC_GETFLAG_B1A6,HDLC_GETFLAG_B7, + HDLC_GET_DATA,HDLC_FAST_FLAG +}; + +enum { + HDLC_SEND_DATA,HDLC_SEND_CRC1,HDLC_SEND_FAST_FLAG, + HDLC_SEND_FIRST_FLAG,HDLC_SEND_CRC2,HDLC_SEND_CLOSING_FLAG, + HDLC_SEND_IDLE1,HDLC_SEND_FAST_IDLE,HDLC_SENDFLAG_B0, + HDLC_SENDFLAG_B1A6,HDLC_SENDFLAG_B7,STOPPED +}; + +void isdnhdlc_rcv_init (struct isdnhdlc_vars *hdlc, int do_adapt56) +{ + hdlc->bit_shift = 0; + hdlc->hdlc_bits1 = 0; + hdlc->data_bits = 0; + hdlc->ffbit_shift = 0; + hdlc->data_received = 0; + hdlc->state = HDLC_GET_DATA; + hdlc->do_adapt56 = do_adapt56; + hdlc->dchannel = 0; + hdlc->crc = 0; + hdlc->cbin = 0; + hdlc->shift_reg = 0; + hdlc->ffvalue = 0; + hdlc->dstpos = 0; +} + +void isdnhdlc_out_init (struct isdnhdlc_vars *hdlc, int is_d_channel, int do_adapt56) +{ + hdlc->bit_shift = 0; + hdlc->hdlc_bits1 = 0; + hdlc->data_bits = 0; + hdlc->ffbit_shift = 0; + hdlc->data_received = 0; + hdlc->do_closing = 0; + hdlc->ffvalue = 0; + if (is_d_channel) { + hdlc->dchannel = 1; + hdlc->state = HDLC_SEND_FIRST_FLAG; + } else { + hdlc->dchannel = 0; + hdlc->state = HDLC_SEND_FAST_FLAG; + hdlc->ffvalue = 0x7e; + } + hdlc->cbin = 0x7e; + hdlc->bit_shift = 0; + if(do_adapt56){ + hdlc->do_adapt56 = 1; + hdlc->data_bits = 0; + hdlc->state = HDLC_SENDFLAG_B0; + } else { + hdlc->do_adapt56 = 0; + hdlc->data_bits = 8; + } + hdlc->shift_reg = 0; +} + +/* + isdnhdlc_decode - decodes HDLC frames from a transparent bit stream. + + The source buffer is scanned for valid HDLC frames looking for + flags (01111110) to indicate the start of a frame. If the start of + the frame is found, the bit stuffing is removed (0 after 5 1's). + When a new flag is found, the complete frame has been received + and the CRC is checked. + If a valid frame is found, the function returns the frame length + excluding the CRC with the bit HDLC_END_OF_FRAME set. + If the beginning of a valid frame is found, the function returns + the length. + If a framing error is found (too many 1s and not a flag) the function + returns the length with the bit HDLC_FRAMING_ERROR set. + If a CRC error is found the function returns the length with the + bit HDLC_CRC_ERROR set. + If the frame length exceeds the destination buffer size, the function + returns the length with the bit HDLC_LENGTH_ERROR set. + + src - source buffer + slen - source buffer length + count - number of bytes removed (decoded) from the source buffer + dst _ destination buffer + dsize - destination buffer size + returns - number of decoded bytes in the destination buffer and status + flag. + */ +int isdnhdlc_decode (struct isdnhdlc_vars *hdlc, const unsigned char *src, + int slen, int *count, unsigned char *dst, int dsize) +{ + int status=0; + + static const unsigned char fast_flag[]={ + 0x00,0x00,0x00,0x20,0x30,0x38,0x3c,0x3e,0x3f + }; + + static const unsigned char fast_flag_value[]={ + 0x00,0x7e,0xfc,0xf9,0xf3,0xe7,0xcf,0x9f,0x3f + }; + + static const unsigned char fast_abort[]={ + 0x00,0x00,0x80,0xc0,0xe0,0xf0,0xf8,0xfc,0xfe,0xff + }; + + *count = slen; + + while(slen > 0){ + if(hdlc->bit_shift==0){ + hdlc->cbin = *src++; + slen--; + hdlc->bit_shift = 8; + if(hdlc->do_adapt56){ + hdlc->bit_shift --; + } + } + + switch(hdlc->state){ + case STOPPED: + return 0; + case HDLC_FAST_IDLE: + if(hdlc->cbin == 0xff){ + hdlc->bit_shift = 0; + break; + } + hdlc->state = HDLC_GET_FLAG_B0; + hdlc->hdlc_bits1 = 0; + hdlc->bit_shift = 8; + break; + case HDLC_GET_FLAG_B0: + if(!(hdlc->cbin & 0x80)) { + hdlc->state = HDLC_GETFLAG_B1A6; + hdlc->hdlc_bits1 = 0; + } else { + if(!hdlc->do_adapt56){ + if(++hdlc->hdlc_bits1 >=8 ) if(hdlc->bit_shift==1) + hdlc->state = HDLC_FAST_IDLE; + } + } + hdlc->cbin<<=1; + hdlc->bit_shift --; + break; + case HDLC_GETFLAG_B1A6: + if(hdlc->cbin & 0x80){ + hdlc->hdlc_bits1++; + if(hdlc->hdlc_bits1==6){ + hdlc->state = HDLC_GETFLAG_B7; + } + } else { + hdlc->hdlc_bits1 = 0; + } + hdlc->cbin<<=1; + hdlc->bit_shift --; + break; + case HDLC_GETFLAG_B7: + if(hdlc->cbin & 0x80) { + hdlc->state = HDLC_GET_FLAG_B0; + } else { + hdlc->state = HDLC_GET_DATA; + hdlc->crc = 0xffff; + hdlc->shift_reg = 0; + hdlc->hdlc_bits1 = 0; + hdlc->data_bits = 0; + hdlc->data_received = 0; + } + hdlc->cbin<<=1; + hdlc->bit_shift --; + break; + case HDLC_GET_DATA: + if(hdlc->cbin & 0x80){ + hdlc->hdlc_bits1++; + switch(hdlc->hdlc_bits1){ + case 6: + break; + case 7: + if(hdlc->data_received) { + // bad frame + status = -HDLC_FRAMING_ERROR; + } + if(!hdlc->do_adapt56){ + if(hdlc->cbin==fast_abort[hdlc->bit_shift+1]){ + hdlc->state = HDLC_FAST_IDLE; + hdlc->bit_shift=1; + break; + } + } else { + hdlc->state = HDLC_GET_FLAG_B0; + } + break; + default: + hdlc->shift_reg>>=1; + hdlc->shift_reg |= 0x80; + hdlc->data_bits++; + break; + } + } else { + switch(hdlc->hdlc_bits1){ + case 5: + break; + case 6: + if(hdlc->data_received){ + if (hdlc->dstpos < 2) { + status = -HDLC_FRAMING_ERROR; + } else if (hdlc->crc != 0xf0b8){ + // crc error + status = -HDLC_CRC_ERROR; + } else { + // remove CRC + hdlc->dstpos -= 2; + // good frame + status = hdlc->dstpos; + } + } + hdlc->crc = 0xffff; + hdlc->shift_reg = 0; + hdlc->data_bits = 0; + if(!hdlc->do_adapt56){ + if(hdlc->cbin==fast_flag[hdlc->bit_shift]){ + hdlc->ffvalue = fast_flag_value[hdlc->bit_shift]; + hdlc->state = HDLC_FAST_FLAG; + hdlc->ffbit_shift = hdlc->bit_shift; + hdlc->bit_shift = 1; + } else { + hdlc->state = HDLC_GET_DATA; + hdlc->data_received = 0; + } + } else { + hdlc->state = HDLC_GET_DATA; + hdlc->data_received = 0; + } + break; + default: + hdlc->shift_reg>>=1; + hdlc->data_bits++; + break; + } + hdlc->hdlc_bits1 = 0; + } + if (status) { + hdlc->dstpos = 0; + *count -= slen; + hdlc->cbin <<= 1; + hdlc->bit_shift--; + return status; + } + if(hdlc->data_bits==8){ + hdlc->data_bits = 0; + hdlc->data_received = 1; + hdlc->crc = crc_ccitt_byte(hdlc->crc, hdlc->shift_reg); + + // good byte received + if (hdlc->dstpos < dsize) { + dst[hdlc->dstpos++] = hdlc->shift_reg; + } else { + // frame too long + status = -HDLC_LENGTH_ERROR; + hdlc->dstpos = 0; + } + } + hdlc->cbin <<= 1; + hdlc->bit_shift--; + break; + case HDLC_FAST_FLAG: + if(hdlc->cbin==hdlc->ffvalue){ + hdlc->bit_shift = 0; + break; + } else { + if(hdlc->cbin == 0xff){ + hdlc->state = HDLC_FAST_IDLE; + hdlc->bit_shift=0; + } else if(hdlc->ffbit_shift==8){ + hdlc->state = HDLC_GETFLAG_B7; + break; + } else { + hdlc->shift_reg = fast_abort[hdlc->ffbit_shift-1]; + hdlc->hdlc_bits1 = hdlc->ffbit_shift-2; + if(hdlc->hdlc_bits1<0)hdlc->hdlc_bits1 = 0; + hdlc->data_bits = hdlc->ffbit_shift-1; + hdlc->state = HDLC_GET_DATA; + hdlc->data_received = 0; + } + } + break; + default: + break; + } + } + *count -= slen; + return 0; +} + +/* + isdnhdlc_encode - encodes HDLC frames to a transparent bit stream. + + The bit stream starts with a beginning flag (01111110). After + that each byte is added to the bit stream with bit stuffing added + (0 after 5 1's). + When the last byte has been removed from the source buffer, the + CRC (2 bytes is added) and the frame terminates with the ending flag. + For the dchannel, the idle character (all 1's) is also added at the end. + If this function is called with empty source buffer (slen=0), flags or + idle character will be generated. + + src - source buffer + slen - source buffer length + count - number of bytes removed (encoded) from source buffer + dst _ destination buffer + dsize - destination buffer size + returns - number of encoded bytes in the destination buffer +*/ +int isdnhdlc_encode(struct isdnhdlc_vars *hdlc, const unsigned char *src, + unsigned short slen, int *count, + unsigned char *dst, int dsize) +{ + static const unsigned char xfast_flag_value[] = { + 0x7e,0x3f,0x9f,0xcf,0xe7,0xf3,0xf9,0xfc,0x7e + }; + + int len = 0; + + *count = slen; + + while (dsize > 0) { + if(hdlc->bit_shift==0){ + if(slen && !hdlc->do_closing){ + hdlc->shift_reg = *src++; + slen--; + if (slen == 0) + hdlc->do_closing = 1; /* closing sequence, CRC + flag(s) */ + hdlc->bit_shift = 8; + } else { + if(hdlc->state == HDLC_SEND_DATA){ + if(hdlc->data_received){ + hdlc->state = HDLC_SEND_CRC1; + hdlc->crc ^= 0xffff; + hdlc->bit_shift = 8; + hdlc->shift_reg = hdlc->crc & 0xff; + } else if(!hdlc->do_adapt56){ + hdlc->state = HDLC_SEND_FAST_FLAG; + } else { + hdlc->state = HDLC_SENDFLAG_B0; + } + } + + } + } + + switch(hdlc->state){ + case STOPPED: + while (dsize--) + *dst++ = 0xff; + + return dsize; + case HDLC_SEND_FAST_FLAG: + hdlc->do_closing = 0; + if(slen == 0){ + *dst++ = hdlc->ffvalue; + len++; + dsize--; + break; + } + if(hdlc->bit_shift==8){ + hdlc->cbin = hdlc->ffvalue>>(8-hdlc->data_bits); + hdlc->state = HDLC_SEND_DATA; + hdlc->crc = 0xffff; + hdlc->hdlc_bits1 = 0; + hdlc->data_received = 1; + } + break; + case HDLC_SENDFLAG_B0: + hdlc->do_closing = 0; + hdlc->cbin <<= 1; + hdlc->data_bits++; + hdlc->hdlc_bits1 = 0; + hdlc->state = HDLC_SENDFLAG_B1A6; + break; + case HDLC_SENDFLAG_B1A6: + hdlc->cbin <<= 1; + hdlc->data_bits++; + hdlc->cbin++; + if(++hdlc->hdlc_bits1 == 6) + hdlc->state = HDLC_SENDFLAG_B7; + break; + case HDLC_SENDFLAG_B7: + hdlc->cbin <<= 1; + hdlc->data_bits++; + if(slen == 0){ + hdlc->state = HDLC_SENDFLAG_B0; + break; + } + if(hdlc->bit_shift==8){ + hdlc->state = HDLC_SEND_DATA; + hdlc->crc = 0xffff; + hdlc->hdlc_bits1 = 0; + hdlc->data_received = 1; + } + break; + case HDLC_SEND_FIRST_FLAG: + hdlc->data_received = 1; + if(hdlc->data_bits==8){ + hdlc->state = HDLC_SEND_DATA; + hdlc->crc = 0xffff; + hdlc->hdlc_bits1 = 0; + break; + } + hdlc->cbin <<= 1; + hdlc->data_bits++; + if(hdlc->shift_reg & 0x01) + hdlc->cbin++; + hdlc->shift_reg >>= 1; + hdlc->bit_shift--; + if(hdlc->bit_shift==0){ + hdlc->state = HDLC_SEND_DATA; + hdlc->crc = 0xffff; + hdlc->hdlc_bits1 = 0; + } + break; + case HDLC_SEND_DATA: + hdlc->cbin <<= 1; + hdlc->data_bits++; + if(hdlc->hdlc_bits1 == 5){ + hdlc->hdlc_bits1 = 0; + break; + } + if(hdlc->bit_shift==8){ + hdlc->crc = crc_ccitt_byte(hdlc->crc, hdlc->shift_reg); + } + if(hdlc->shift_reg & 0x01){ + hdlc->hdlc_bits1++; + hdlc->cbin++; + hdlc->shift_reg >>= 1; + hdlc->bit_shift--; + } else { + hdlc->hdlc_bits1 = 0; + hdlc->shift_reg >>= 1; + hdlc->bit_shift--; + } + break; + case HDLC_SEND_CRC1: + hdlc->cbin <<= 1; + hdlc->data_bits++; + if(hdlc->hdlc_bits1 == 5){ + hdlc->hdlc_bits1 = 0; + break; + } + if(hdlc->shift_reg & 0x01){ + hdlc->hdlc_bits1++; + hdlc->cbin++; + hdlc->shift_reg >>= 1; + hdlc->bit_shift--; + } else { + hdlc->hdlc_bits1 = 0; + hdlc->shift_reg >>= 1; + hdlc->bit_shift--; + } + if(hdlc->bit_shift==0){ + hdlc->shift_reg = (hdlc->crc >> 8); + hdlc->state = HDLC_SEND_CRC2; + hdlc->bit_shift = 8; + } + break; + case HDLC_SEND_CRC2: + hdlc->cbin <<= 1; + hdlc->data_bits++; + if(hdlc->hdlc_bits1 == 5){ + hdlc->hdlc_bits1 = 0; + break; + } + if(hdlc->shift_reg & 0x01){ + hdlc->hdlc_bits1++; + hdlc->cbin++; + hdlc->shift_reg >>= 1; + hdlc->bit_shift--; + } else { + hdlc->hdlc_bits1 = 0; + hdlc->shift_reg >>= 1; + hdlc->bit_shift--; + } + if(hdlc->bit_shift==0){ + hdlc->shift_reg = 0x7e; + hdlc->state = HDLC_SEND_CLOSING_FLAG; + hdlc->bit_shift = 8; + } + break; + case HDLC_SEND_CLOSING_FLAG: + hdlc->cbin <<= 1; + hdlc->data_bits++; + if(hdlc->hdlc_bits1 == 5){ + hdlc->hdlc_bits1 = 0; + break; + } + if(hdlc->shift_reg & 0x01){ + hdlc->cbin++; + } + hdlc->shift_reg >>= 1; + hdlc->bit_shift--; + if(hdlc->bit_shift==0){ + hdlc->ffvalue = xfast_flag_value[hdlc->data_bits]; + if(hdlc->dchannel){ + hdlc->ffvalue = 0x7e; + hdlc->state = HDLC_SEND_IDLE1; + hdlc->bit_shift = 8-hdlc->data_bits; + if(hdlc->bit_shift==0) + hdlc->state = HDLC_SEND_FAST_IDLE; + } else { + if(!hdlc->do_adapt56){ + hdlc->state = HDLC_SEND_FAST_FLAG; + hdlc->data_received = 0; + } else { + hdlc->state = HDLC_SENDFLAG_B0; + hdlc->data_received = 0; + } + // Finished with this frame, send flags + if (dsize > 1) dsize = 1; + } + } + break; + case HDLC_SEND_IDLE1: + hdlc->do_closing = 0; + hdlc->cbin <<= 1; + hdlc->cbin++; + hdlc->data_bits++; + hdlc->bit_shift--; + if(hdlc->bit_shift==0){ + hdlc->state = HDLC_SEND_FAST_IDLE; + hdlc->bit_shift = 0; + } + break; + case HDLC_SEND_FAST_IDLE: + hdlc->do_closing = 0; + hdlc->cbin = 0xff; + hdlc->data_bits = 8; + if(hdlc->bit_shift == 8){ + hdlc->cbin = 0x7e; + hdlc->state = HDLC_SEND_FIRST_FLAG; + } else { + *dst++ = hdlc->cbin; + hdlc->bit_shift = hdlc->data_bits = 0; + len++; + dsize = 0; + } + break; + default: + break; + } + if(hdlc->do_adapt56){ + if(hdlc->data_bits==7){ + hdlc->cbin <<= 1; + hdlc->cbin++; + hdlc->data_bits++; + } + } + if(hdlc->data_bits==8){ + *dst++ = hdlc->cbin; + hdlc->data_bits = 0; + len++; + dsize--; + } + } + *count -= slen; + + return len; +} + +EXPORT_SYMBOL(isdnhdlc_rcv_init); +EXPORT_SYMBOL(isdnhdlc_decode); +EXPORT_SYMBOL(isdnhdlc_out_init); +EXPORT_SYMBOL(isdnhdlc_encode); diff --git a/include/linux/isdn/hdlc.h b/include/linux/isdn/hdlc.h new file mode 100644 index 000000000000..cf0a95a24015 --- /dev/null +++ b/include/linux/isdn/hdlc.h @@ -0,0 +1,70 @@ +/* + * isdnhdlc.h -- General purpose ISDN HDLC decoder. + * + * Implementation of a HDLC decoder/encoder in software. + * Neccessary because some ISDN devices don't have HDLC + * controllers. Also included: a bit reversal table. + * + *Copyright (C) 2002 Wolfgang Mües + * 2001 Frode Isaksen + * 2001 Kai Germaschewski + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef __ISDNHDLC_H__ +#define __ISDNHDLC_H__ + +struct isdnhdlc_vars { + int bit_shift; + int hdlc_bits1; + int data_bits; + int ffbit_shift; // encoding only + int state; + int dstpos; + + unsigned short crc; + + unsigned char cbin; + unsigned char shift_reg; + unsigned char ffvalue; + + unsigned int data_received:1; // set if transferring data + unsigned int dchannel:1; // set if D channel (send idle instead of flags) + unsigned int do_adapt56:1; // set if 56K adaptation + unsigned int do_closing:1; // set if in closing phase (need to send CRC + flag +}; + + +/* + The return value from isdnhdlc_decode is + the frame length, 0 if no complete frame was decoded, + or a negative error number +*/ +#define HDLC_FRAMING_ERROR 1 +#define HDLC_CRC_ERROR 2 +#define HDLC_LENGTH_ERROR 3 + +extern void isdnhdlc_rcv_init (struct isdnhdlc_vars *hdlc, int do_adapt56); + +extern int isdnhdlc_decode (struct isdnhdlc_vars *hdlc, const unsigned char *src, int slen,int *count, + unsigned char *dst, int dsize); + +extern void isdnhdlc_out_init (struct isdnhdlc_vars *hdlc,int is_d_channel,int do_adapt56); + +extern int isdnhdlc_encode (struct isdnhdlc_vars *hdlc,const unsigned char *src,unsigned short slen,int *count, + unsigned char *dst,int dsize); + +#endif /* __ISDNHDLC_H__ */ -- cgit v1.2.3 From 6bd4bcd3cd8affc09eaee7efbc037f65f4a71501 Mon Sep 17 00:00:00 2001 From: Karsten Keil Date: Wed, 8 Jul 2009 19:11:09 +0200 Subject: ISDN: Clean up isdnhdlc code Clean up isdnhdlc to meet current code standard. Remove hint to already removed bit reversal table. Signed-off-by: Karsten Keil --- drivers/isdn/i4l/isdnhdlc.c | 366 +++++++++++++++++++++++--------------------- include/linux/isdn/hdlc.h | 68 ++++---- 2 files changed, 231 insertions(+), 203 deletions(-) (limited to 'include/linux') diff --git a/drivers/isdn/i4l/isdnhdlc.c b/drivers/isdn/i4l/isdnhdlc.c index 44ec7418496b..b80e55ab8914 100644 --- a/drivers/isdn/i4l/isdnhdlc.c +++ b/drivers/isdn/i4l/isdnhdlc.c @@ -1,23 +1,24 @@ /* * isdnhdlc.c -- General purpose ISDN HDLC decoder. * - *Copyright (C) 2002 Wolfgang Mües - * 2001 Frode Isaksen - * 2001 Kai Germaschewski + * Copyright (C) + * 2002 Wolfgang Mües + * 2001 Frode Isaksen + * 2001 Kai Germaschewski * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include @@ -36,20 +37,20 @@ MODULE_LICENSE("GPL"); /*-------------------------------------------------------------------*/ enum { - HDLC_FAST_IDLE,HDLC_GET_FLAG_B0,HDLC_GETFLAG_B1A6,HDLC_GETFLAG_B7, - HDLC_GET_DATA,HDLC_FAST_FLAG + HDLC_FAST_IDLE, HDLC_GET_FLAG_B0, HDLC_GETFLAG_B1A6, HDLC_GETFLAG_B7, + HDLC_GET_DATA, HDLC_FAST_FLAG }; enum { - HDLC_SEND_DATA,HDLC_SEND_CRC1,HDLC_SEND_FAST_FLAG, - HDLC_SEND_FIRST_FLAG,HDLC_SEND_CRC2,HDLC_SEND_CLOSING_FLAG, - HDLC_SEND_IDLE1,HDLC_SEND_FAST_IDLE,HDLC_SENDFLAG_B0, - HDLC_SENDFLAG_B1A6,HDLC_SENDFLAG_B7,STOPPED + HDLC_SEND_DATA, HDLC_SEND_CRC1, HDLC_SEND_FAST_FLAG, + HDLC_SEND_FIRST_FLAG, HDLC_SEND_CRC2, HDLC_SEND_CLOSING_FLAG, + HDLC_SEND_IDLE1, HDLC_SEND_FAST_IDLE, HDLC_SENDFLAG_B0, + HDLC_SENDFLAG_B1A6, HDLC_SENDFLAG_B7, STOPPED }; -void isdnhdlc_rcv_init (struct isdnhdlc_vars *hdlc, int do_adapt56) +void isdnhdlc_rcv_init(struct isdnhdlc_vars *hdlc, int do_adapt56) { - hdlc->bit_shift = 0; + hdlc->bit_shift = 0; hdlc->hdlc_bits1 = 0; hdlc->data_bits = 0; hdlc->ffbit_shift = 0; @@ -63,10 +64,12 @@ void isdnhdlc_rcv_init (struct isdnhdlc_vars *hdlc, int do_adapt56) hdlc->ffvalue = 0; hdlc->dstpos = 0; } +EXPORT_SYMBOL(isdnhdlc_out_init); -void isdnhdlc_out_init (struct isdnhdlc_vars *hdlc, int is_d_channel, int do_adapt56) +void isdnhdlc_out_init(struct isdnhdlc_vars *hdlc, int is_d_channel, + int do_adapt56) { - hdlc->bit_shift = 0; + hdlc->bit_shift = 0; hdlc->hdlc_bits1 = 0; hdlc->data_bits = 0; hdlc->ffbit_shift = 0; @@ -83,7 +86,7 @@ void isdnhdlc_out_init (struct isdnhdlc_vars *hdlc, int is_d_channel, int do_ada } hdlc->cbin = 0x7e; hdlc->bit_shift = 0; - if(do_adapt56){ + if (do_adapt56) { hdlc->do_adapt56 = 1; hdlc->data_bits = 0; hdlc->state = HDLC_SENDFLAG_B0; @@ -93,6 +96,25 @@ void isdnhdlc_out_init (struct isdnhdlc_vars *hdlc, int is_d_channel, int do_ada } hdlc->shift_reg = 0; } +EXPORT_SYMBOL(isdnhdlc_rcv_init); + +static int +check_frame(struct isdnhdlc_vars *hdlc) +{ + int status; + + if (hdlc->dstpos < 2) /* too small - framing error */ + status = -HDLC_FRAMING_ERROR; + else if (hdlc->crc != 0xf0b8) /* crc error */ + status = -HDLC_CRC_ERROR; + else { + /* remove CRC */ + hdlc->dstpos -= 2; + /* good frame */ + status = hdlc->dstpos; + } + return status; +} /* isdnhdlc_decode - decodes HDLC frames from a transparent bit stream. @@ -121,40 +143,63 @@ void isdnhdlc_out_init (struct isdnhdlc_vars *hdlc, int is_d_channel, int do_ada returns - number of decoded bytes in the destination buffer and status flag. */ -int isdnhdlc_decode (struct isdnhdlc_vars *hdlc, const unsigned char *src, - int slen, int *count, unsigned char *dst, int dsize) +int isdnhdlc_decode(struct isdnhdlc_vars *hdlc, const u8 *src, int slen, + int *count, u8 *dst, int dsize) { - int status=0; + int status = 0; - static const unsigned char fast_flag[]={ - 0x00,0x00,0x00,0x20,0x30,0x38,0x3c,0x3e,0x3f + static const unsigned char fast_flag[] = { + 0x00, 0x00, 0x00, 0x20, 0x30, 0x38, 0x3c, 0x3e, 0x3f }; - static const unsigned char fast_flag_value[]={ - 0x00,0x7e,0xfc,0xf9,0xf3,0xe7,0xcf,0x9f,0x3f + static const unsigned char fast_flag_value[] = { + 0x00, 0x7e, 0xfc, 0xf9, 0xf3, 0xe7, 0xcf, 0x9f, 0x3f }; - static const unsigned char fast_abort[]={ - 0x00,0x00,0x80,0xc0,0xe0,0xf0,0xf8,0xfc,0xfe,0xff + static const unsigned char fast_abort[] = { + 0x00, 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, 0xff }; +#define handle_fast_flag(h) \ + do {\ + if (h->cbin == fast_flag[h->bit_shift]) {\ + h->ffvalue = fast_flag_value[h->bit_shift];\ + h->state = HDLC_FAST_FLAG;\ + h->ffbit_shift = h->bit_shift;\ + h->bit_shift = 1;\ + } else {\ + h->state = HDLC_GET_DATA;\ + h->data_received = 0;\ + } \ + } while (0) + +#define handle_abort(h) \ + do {\ + h->shift_reg = fast_abort[h->ffbit_shift - 1];\ + h->hdlc_bits1 = h->ffbit_shift - 2;\ + if (h->hdlc_bits1 < 0)\ + h->hdlc_bits1 = 0;\ + h->data_bits = h->ffbit_shift - 1;\ + h->state = HDLC_GET_DATA;\ + h->data_received = 0;\ + } while (0) + *count = slen; - while(slen > 0){ - if(hdlc->bit_shift==0){ + while (slen > 0) { + if (hdlc->bit_shift == 0) { hdlc->cbin = *src++; slen--; hdlc->bit_shift = 8; - if(hdlc->do_adapt56){ - hdlc->bit_shift --; - } + if (hdlc->do_adapt56) + hdlc->bit_shift--; } - switch(hdlc->state){ + switch (hdlc->state) { case STOPPED: return 0; case HDLC_FAST_IDLE: - if(hdlc->cbin == 0xff){ + if (hdlc->cbin == 0xff) { hdlc->bit_shift = 0; break; } @@ -163,32 +208,30 @@ int isdnhdlc_decode (struct isdnhdlc_vars *hdlc, const unsigned char *src, hdlc->bit_shift = 8; break; case HDLC_GET_FLAG_B0: - if(!(hdlc->cbin & 0x80)) { + if (!(hdlc->cbin & 0x80)) { hdlc->state = HDLC_GETFLAG_B1A6; hdlc->hdlc_bits1 = 0; } else { - if(!hdlc->do_adapt56){ - if(++hdlc->hdlc_bits1 >=8 ) if(hdlc->bit_shift==1) + if ((!hdlc->do_adapt56) && + (++hdlc->hdlc_bits1 >= 8) && + (hdlc->bit_shift == 1)) hdlc->state = HDLC_FAST_IDLE; - } } - hdlc->cbin<<=1; - hdlc->bit_shift --; + hdlc->cbin <<= 1; + hdlc->bit_shift--; break; case HDLC_GETFLAG_B1A6: - if(hdlc->cbin & 0x80){ + if (hdlc->cbin & 0x80) { hdlc->hdlc_bits1++; - if(hdlc->hdlc_bits1==6){ + if (hdlc->hdlc_bits1 == 6) hdlc->state = HDLC_GETFLAG_B7; - } - } else { + } else hdlc->hdlc_bits1 = 0; - } - hdlc->cbin<<=1; - hdlc->bit_shift --; + hdlc->cbin <<= 1; + hdlc->bit_shift--; break; case HDLC_GETFLAG_B7: - if(hdlc->cbin & 0x80) { + if (hdlc->cbin & 0x80) { hdlc->state = HDLC_GET_FLAG_B0; } else { hdlc->state = HDLC_GET_DATA; @@ -198,74 +241,55 @@ int isdnhdlc_decode (struct isdnhdlc_vars *hdlc, const unsigned char *src, hdlc->data_bits = 0; hdlc->data_received = 0; } - hdlc->cbin<<=1; - hdlc->bit_shift --; + hdlc->cbin <<= 1; + hdlc->bit_shift--; break; case HDLC_GET_DATA: - if(hdlc->cbin & 0x80){ + if (hdlc->cbin & 0x80) { hdlc->hdlc_bits1++; - switch(hdlc->hdlc_bits1){ + switch (hdlc->hdlc_bits1) { case 6: break; case 7: - if(hdlc->data_received) { - // bad frame + if (hdlc->data_received) + /* bad frame */ status = -HDLC_FRAMING_ERROR; - } - if(!hdlc->do_adapt56){ - if(hdlc->cbin==fast_abort[hdlc->bit_shift+1]){ - hdlc->state = HDLC_FAST_IDLE; - hdlc->bit_shift=1; + if (!hdlc->do_adapt56) { + if (hdlc->cbin == fast_abort + [hdlc->bit_shift + 1]) { + hdlc->state = + HDLC_FAST_IDLE; + hdlc->bit_shift = 1; break; } - } else { + } else hdlc->state = HDLC_GET_FLAG_B0; - } break; default: - hdlc->shift_reg>>=1; + hdlc->shift_reg >>= 1; hdlc->shift_reg |= 0x80; hdlc->data_bits++; break; } } else { - switch(hdlc->hdlc_bits1){ + switch (hdlc->hdlc_bits1) { case 5: break; case 6: - if(hdlc->data_received){ - if (hdlc->dstpos < 2) { - status = -HDLC_FRAMING_ERROR; - } else if (hdlc->crc != 0xf0b8){ - // crc error - status = -HDLC_CRC_ERROR; - } else { - // remove CRC - hdlc->dstpos -= 2; - // good frame - status = hdlc->dstpos; - } - } + if (hdlc->data_received) + status = check_frame(hdlc); hdlc->crc = 0xffff; hdlc->shift_reg = 0; hdlc->data_bits = 0; - if(!hdlc->do_adapt56){ - if(hdlc->cbin==fast_flag[hdlc->bit_shift]){ - hdlc->ffvalue = fast_flag_value[hdlc->bit_shift]; - hdlc->state = HDLC_FAST_FLAG; - hdlc->ffbit_shift = hdlc->bit_shift; - hdlc->bit_shift = 1; - } else { - hdlc->state = HDLC_GET_DATA; - hdlc->data_received = 0; - } - } else { + if (!hdlc->do_adapt56) + handle_fast_flag(hdlc); + else { hdlc->state = HDLC_GET_DATA; hdlc->data_received = 0; } break; default: - hdlc->shift_reg>>=1; + hdlc->shift_reg >>= 1; hdlc->data_bits++; break; } @@ -278,16 +302,17 @@ int isdnhdlc_decode (struct isdnhdlc_vars *hdlc, const unsigned char *src, hdlc->bit_shift--; return status; } - if(hdlc->data_bits==8){ + if (hdlc->data_bits == 8) { hdlc->data_bits = 0; hdlc->data_received = 1; - hdlc->crc = crc_ccitt_byte(hdlc->crc, hdlc->shift_reg); + hdlc->crc = crc_ccitt_byte(hdlc->crc, + hdlc->shift_reg); - // good byte received - if (hdlc->dstpos < dsize) { + /* good byte received */ + if (hdlc->dstpos < dsize) dst[hdlc->dstpos++] = hdlc->shift_reg; - } else { - // frame too long + else { + /* frame too long */ status = -HDLC_LENGTH_ERROR; hdlc->dstpos = 0; } @@ -296,24 +321,18 @@ int isdnhdlc_decode (struct isdnhdlc_vars *hdlc, const unsigned char *src, hdlc->bit_shift--; break; case HDLC_FAST_FLAG: - if(hdlc->cbin==hdlc->ffvalue){ + if (hdlc->cbin == hdlc->ffvalue) { hdlc->bit_shift = 0; break; } else { - if(hdlc->cbin == 0xff){ + if (hdlc->cbin == 0xff) { hdlc->state = HDLC_FAST_IDLE; - hdlc->bit_shift=0; - } else if(hdlc->ffbit_shift==8){ + hdlc->bit_shift = 0; + } else if (hdlc->ffbit_shift == 8) { hdlc->state = HDLC_GETFLAG_B7; break; - } else { - hdlc->shift_reg = fast_abort[hdlc->ffbit_shift-1]; - hdlc->hdlc_bits1 = hdlc->ffbit_shift-2; - if(hdlc->hdlc_bits1<0)hdlc->hdlc_bits1 = 0; - hdlc->data_bits = hdlc->ffbit_shift-1; - hdlc->state = HDLC_GET_DATA; - hdlc->data_received = 0; - } + } else + handle_abort(hdlc); } break; default: @@ -323,7 +342,7 @@ int isdnhdlc_decode (struct isdnhdlc_vars *hdlc, const unsigned char *src, *count -= slen; return 0; } - +EXPORT_SYMBOL(isdnhdlc_decode); /* isdnhdlc_encode - encodes HDLC frames to a transparent bit stream. @@ -343,12 +362,11 @@ int isdnhdlc_decode (struct isdnhdlc_vars *hdlc, const unsigned char *src, dsize - destination buffer size returns - number of encoded bytes in the destination buffer */ -int isdnhdlc_encode(struct isdnhdlc_vars *hdlc, const unsigned char *src, - unsigned short slen, int *count, - unsigned char *dst, int dsize) +int isdnhdlc_encode(struct isdnhdlc_vars *hdlc, const u8 *src, u16 slen, + int *count, u8 *dst, int dsize) { static const unsigned char xfast_flag_value[] = { - 0x7e,0x3f,0x9f,0xcf,0xe7,0xf3,0xf9,0xfc,0x7e + 0x7e, 0x3f, 0x9f, 0xcf, 0xe7, 0xf3, 0xf9, 0xfc, 0x7e }; int len = 0; @@ -356,31 +374,34 @@ int isdnhdlc_encode(struct isdnhdlc_vars *hdlc, const unsigned char *src, *count = slen; while (dsize > 0) { - if(hdlc->bit_shift==0){ - if(slen && !hdlc->do_closing){ + if (hdlc->bit_shift == 0) { + if (slen && !hdlc->do_closing) { hdlc->shift_reg = *src++; slen--; if (slen == 0) - hdlc->do_closing = 1; /* closing sequence, CRC + flag(s) */ + /* closing sequence, CRC + flag(s) */ + hdlc->do_closing = 1; hdlc->bit_shift = 8; } else { - if(hdlc->state == HDLC_SEND_DATA){ - if(hdlc->data_received){ + if (hdlc->state == HDLC_SEND_DATA) { + if (hdlc->data_received) { hdlc->state = HDLC_SEND_CRC1; hdlc->crc ^= 0xffff; hdlc->bit_shift = 8; - hdlc->shift_reg = hdlc->crc & 0xff; - } else if(!hdlc->do_adapt56){ - hdlc->state = HDLC_SEND_FAST_FLAG; - } else { - hdlc->state = HDLC_SENDFLAG_B0; - } + hdlc->shift_reg = + hdlc->crc & 0xff; + } else if (!hdlc->do_adapt56) + hdlc->state = + HDLC_SEND_FAST_FLAG; + else + hdlc->state = + HDLC_SENDFLAG_B0; } } } - switch(hdlc->state){ + switch (hdlc->state) { case STOPPED: while (dsize--) *dst++ = 0xff; @@ -388,14 +409,15 @@ int isdnhdlc_encode(struct isdnhdlc_vars *hdlc, const unsigned char *src, return dsize; case HDLC_SEND_FAST_FLAG: hdlc->do_closing = 0; - if(slen == 0){ + if (slen == 0) { *dst++ = hdlc->ffvalue; len++; dsize--; break; } - if(hdlc->bit_shift==8){ - hdlc->cbin = hdlc->ffvalue>>(8-hdlc->data_bits); + if (hdlc->bit_shift == 8) { + hdlc->cbin = hdlc->ffvalue >> + (8 - hdlc->data_bits); hdlc->state = HDLC_SEND_DATA; hdlc->crc = 0xffff; hdlc->hdlc_bits1 = 0; @@ -413,17 +435,17 @@ int isdnhdlc_encode(struct isdnhdlc_vars *hdlc, const unsigned char *src, hdlc->cbin <<= 1; hdlc->data_bits++; hdlc->cbin++; - if(++hdlc->hdlc_bits1 == 6) + if (++hdlc->hdlc_bits1 == 6) hdlc->state = HDLC_SENDFLAG_B7; break; case HDLC_SENDFLAG_B7: hdlc->cbin <<= 1; hdlc->data_bits++; - if(slen == 0){ + if (slen == 0) { hdlc->state = HDLC_SENDFLAG_B0; break; } - if(hdlc->bit_shift==8){ + if (hdlc->bit_shift == 8) { hdlc->state = HDLC_SEND_DATA; hdlc->crc = 0xffff; hdlc->hdlc_bits1 = 0; @@ -432,7 +454,7 @@ int isdnhdlc_encode(struct isdnhdlc_vars *hdlc, const unsigned char *src, break; case HDLC_SEND_FIRST_FLAG: hdlc->data_received = 1; - if(hdlc->data_bits==8){ + if (hdlc->data_bits == 8) { hdlc->state = HDLC_SEND_DATA; hdlc->crc = 0xffff; hdlc->hdlc_bits1 = 0; @@ -440,11 +462,11 @@ int isdnhdlc_encode(struct isdnhdlc_vars *hdlc, const unsigned char *src, } hdlc->cbin <<= 1; hdlc->data_bits++; - if(hdlc->shift_reg & 0x01) + if (hdlc->shift_reg & 0x01) hdlc->cbin++; hdlc->shift_reg >>= 1; hdlc->bit_shift--; - if(hdlc->bit_shift==0){ + if (hdlc->bit_shift == 0) { hdlc->state = HDLC_SEND_DATA; hdlc->crc = 0xffff; hdlc->hdlc_bits1 = 0; @@ -453,14 +475,14 @@ int isdnhdlc_encode(struct isdnhdlc_vars *hdlc, const unsigned char *src, case HDLC_SEND_DATA: hdlc->cbin <<= 1; hdlc->data_bits++; - if(hdlc->hdlc_bits1 == 5){ + if (hdlc->hdlc_bits1 == 5) { hdlc->hdlc_bits1 = 0; break; } - if(hdlc->bit_shift==8){ - hdlc->crc = crc_ccitt_byte(hdlc->crc, hdlc->shift_reg); - } - if(hdlc->shift_reg & 0x01){ + if (hdlc->bit_shift == 8) + hdlc->crc = crc_ccitt_byte(hdlc->crc, + hdlc->shift_reg); + if (hdlc->shift_reg & 0x01) { hdlc->hdlc_bits1++; hdlc->cbin++; hdlc->shift_reg >>= 1; @@ -474,11 +496,11 @@ int isdnhdlc_encode(struct isdnhdlc_vars *hdlc, const unsigned char *src, case HDLC_SEND_CRC1: hdlc->cbin <<= 1; hdlc->data_bits++; - if(hdlc->hdlc_bits1 == 5){ + if (hdlc->hdlc_bits1 == 5) { hdlc->hdlc_bits1 = 0; break; } - if(hdlc->shift_reg & 0x01){ + if (hdlc->shift_reg & 0x01) { hdlc->hdlc_bits1++; hdlc->cbin++; hdlc->shift_reg >>= 1; @@ -488,7 +510,7 @@ int isdnhdlc_encode(struct isdnhdlc_vars *hdlc, const unsigned char *src, hdlc->shift_reg >>= 1; hdlc->bit_shift--; } - if(hdlc->bit_shift==0){ + if (hdlc->bit_shift == 0) { hdlc->shift_reg = (hdlc->crc >> 8); hdlc->state = HDLC_SEND_CRC2; hdlc->bit_shift = 8; @@ -497,11 +519,11 @@ int isdnhdlc_encode(struct isdnhdlc_vars *hdlc, const unsigned char *src, case HDLC_SEND_CRC2: hdlc->cbin <<= 1; hdlc->data_bits++; - if(hdlc->hdlc_bits1 == 5){ + if (hdlc->hdlc_bits1 == 5) { hdlc->hdlc_bits1 = 0; break; } - if(hdlc->shift_reg & 0x01){ + if (hdlc->shift_reg & 0x01) { hdlc->hdlc_bits1++; hdlc->cbin++; hdlc->shift_reg >>= 1; @@ -511,7 +533,7 @@ int isdnhdlc_encode(struct isdnhdlc_vars *hdlc, const unsigned char *src, hdlc->shift_reg >>= 1; hdlc->bit_shift--; } - if(hdlc->bit_shift==0){ + if (hdlc->bit_shift == 0) { hdlc->shift_reg = 0x7e; hdlc->state = HDLC_SEND_CLOSING_FLAG; hdlc->bit_shift = 8; @@ -520,33 +542,36 @@ int isdnhdlc_encode(struct isdnhdlc_vars *hdlc, const unsigned char *src, case HDLC_SEND_CLOSING_FLAG: hdlc->cbin <<= 1; hdlc->data_bits++; - if(hdlc->hdlc_bits1 == 5){ + if (hdlc->hdlc_bits1 == 5) { hdlc->hdlc_bits1 = 0; break; } - if(hdlc->shift_reg & 0x01){ + if (hdlc->shift_reg & 0x01) hdlc->cbin++; - } hdlc->shift_reg >>= 1; hdlc->bit_shift--; - if(hdlc->bit_shift==0){ - hdlc->ffvalue = xfast_flag_value[hdlc->data_bits]; - if(hdlc->dchannel){ + if (hdlc->bit_shift == 0) { + hdlc->ffvalue = + xfast_flag_value[hdlc->data_bits]; + if (hdlc->dchannel) { hdlc->ffvalue = 0x7e; hdlc->state = HDLC_SEND_IDLE1; hdlc->bit_shift = 8-hdlc->data_bits; - if(hdlc->bit_shift==0) - hdlc->state = HDLC_SEND_FAST_IDLE; + if (hdlc->bit_shift == 0) + hdlc->state = + HDLC_SEND_FAST_IDLE; } else { - if(!hdlc->do_adapt56){ - hdlc->state = HDLC_SEND_FAST_FLAG; + if (!hdlc->do_adapt56) { + hdlc->state = + HDLC_SEND_FAST_FLAG; hdlc->data_received = 0; } else { hdlc->state = HDLC_SENDFLAG_B0; hdlc->data_received = 0; } - // Finished with this frame, send flags - if (dsize > 1) dsize = 1; + /* Finished this frame, send flags */ + if (dsize > 1) + dsize = 1; } } break; @@ -556,7 +581,7 @@ int isdnhdlc_encode(struct isdnhdlc_vars *hdlc, const unsigned char *src, hdlc->cbin++; hdlc->data_bits++; hdlc->bit_shift--; - if(hdlc->bit_shift==0){ + if (hdlc->bit_shift == 0) { hdlc->state = HDLC_SEND_FAST_IDLE; hdlc->bit_shift = 0; } @@ -565,12 +590,13 @@ int isdnhdlc_encode(struct isdnhdlc_vars *hdlc, const unsigned char *src, hdlc->do_closing = 0; hdlc->cbin = 0xff; hdlc->data_bits = 8; - if(hdlc->bit_shift == 8){ + if (hdlc->bit_shift == 8) { hdlc->cbin = 0x7e; hdlc->state = HDLC_SEND_FIRST_FLAG; } else { *dst++ = hdlc->cbin; - hdlc->bit_shift = hdlc->data_bits = 0; + hdlc->bit_shift = 0; + hdlc->data_bits = 0; len++; dsize = 0; } @@ -578,14 +604,14 @@ int isdnhdlc_encode(struct isdnhdlc_vars *hdlc, const unsigned char *src, default: break; } - if(hdlc->do_adapt56){ - if(hdlc->data_bits==7){ + if (hdlc->do_adapt56) { + if (hdlc->data_bits == 7) { hdlc->cbin <<= 1; hdlc->cbin++; hdlc->data_bits++; } } - if(hdlc->data_bits==8){ + if (hdlc->data_bits == 8) { *dst++ = hdlc->cbin; hdlc->data_bits = 0; len++; @@ -596,8 +622,4 @@ int isdnhdlc_encode(struct isdnhdlc_vars *hdlc, const unsigned char *src, return len; } - -EXPORT_SYMBOL(isdnhdlc_rcv_init); -EXPORT_SYMBOL(isdnhdlc_decode); -EXPORT_SYMBOL(isdnhdlc_out_init); EXPORT_SYMBOL(isdnhdlc_encode); diff --git a/include/linux/isdn/hdlc.h b/include/linux/isdn/hdlc.h index cf0a95a24015..8f3540c7f692 100644 --- a/include/linux/isdn/hdlc.h +++ b/include/linux/isdn/hdlc.h @@ -1,27 +1,28 @@ /* - * isdnhdlc.h -- General purpose ISDN HDLC decoder. + * hdlc.h -- General purpose ISDN HDLC decoder. * * Implementation of a HDLC decoder/encoder in software. * Neccessary because some ISDN devices don't have HDLC - * controllers. Also included: a bit reversal table. + * controllers. * - *Copyright (C) 2002 Wolfgang Mües - * 2001 Frode Isaksen - * 2001 Kai Germaschewski + * Copyright (C) + * 2002 Wolfgang Mües + * 2001 Frode Isaksen + * 2001 Kai Germaschewski * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #ifndef __ISDNHDLC_H__ @@ -31,20 +32,24 @@ struct isdnhdlc_vars { int bit_shift; int hdlc_bits1; int data_bits; - int ffbit_shift; // encoding only + int ffbit_shift; /* encoding only */ int state; int dstpos; - unsigned short crc; + u16 crc; - unsigned char cbin; - unsigned char shift_reg; - unsigned char ffvalue; + u8 cbin; + u8 shift_reg; + u8 ffvalue; - unsigned int data_received:1; // set if transferring data - unsigned int dchannel:1; // set if D channel (send idle instead of flags) - unsigned int do_adapt56:1; // set if 56K adaptation - unsigned int do_closing:1; // set if in closing phase (need to send CRC + flag + /* set if transferring data */ + u32 data_received:1; + /* set if D channel (send idle instead of flags) */ + u32 dchannel:1; + /* set if 56K adaptation */ + u32 do_adapt56:1; + /* set if in closing phase (need to send CRC + flag) */ + u32 do_closing:1; }; @@ -57,14 +62,15 @@ struct isdnhdlc_vars { #define HDLC_CRC_ERROR 2 #define HDLC_LENGTH_ERROR 3 -extern void isdnhdlc_rcv_init (struct isdnhdlc_vars *hdlc, int do_adapt56); +extern void isdnhdlc_rcv_init(struct isdnhdlc_vars *hdlc, int do_adapt56); -extern int isdnhdlc_decode (struct isdnhdlc_vars *hdlc, const unsigned char *src, int slen,int *count, - unsigned char *dst, int dsize); +extern int isdnhdlc_decode(struct isdnhdlc_vars *hdlc, const u8 *src, + int slen, int *count, u8 *dst, int dsize); -extern void isdnhdlc_out_init (struct isdnhdlc_vars *hdlc,int is_d_channel,int do_adapt56); +extern void isdnhdlc_out_init(struct isdnhdlc_vars *hdlc, int is_d_channel, + int do_adapt56); -extern int isdnhdlc_encode (struct isdnhdlc_vars *hdlc,const unsigned char *src,unsigned short slen,int *count, - unsigned char *dst,int dsize); +extern int isdnhdlc_encode(struct isdnhdlc_vars *hdlc, const u8 *src, + u16 slen, int *count, u8 *dst, int dsize); #endif /* __ISDNHDLC_H__ */ -- cgit v1.2.3 From c38fc3bc2ecddd4f5278131603e6964cbed071b2 Mon Sep 17 00:00:00 2001 From: Karsten Keil Date: Wed, 8 Jul 2009 20:31:42 +0200 Subject: ISDN: Add support for none reverse bitstreams to isdnhdc The original isdnhdlc code was developed for devices which had reversed bitorder in the byte stream. Adding code to handle normal bitstreams as well. Signed-off-by: Karsten Keil --- drivers/isdn/hisax/st5481_b.c | 5 ++- drivers/isdn/hisax/st5481_d.c | 2 +- drivers/isdn/hisax/st5481_usb.c | 11 ++++--- drivers/isdn/i4l/isdnhdlc.c | 70 ++++++++++++++++++++--------------------- include/linux/isdn/hdlc.h | 12 +++++-- 5 files changed, 56 insertions(+), 44 deletions(-) (limited to 'include/linux') diff --git a/drivers/isdn/hisax/st5481_b.c b/drivers/isdn/hisax/st5481_b.c index 0074b600a0ef..95b1cdd97958 100644 --- a/drivers/isdn/hisax/st5481_b.c +++ b/drivers/isdn/hisax/st5481_b.c @@ -218,7 +218,10 @@ static void st5481B_mode(struct st5481_bcs *bcs, int mode) if (bcs->mode != L1_MODE_NULL) { // Open the B channel if (bcs->mode != L1_MODE_TRANS) { - isdnhdlc_out_init(&b_out->hdlc_state, 0, bcs->mode == L1_MODE_HDLC_56K); + u32 features = HDLC_BITREVERSE; + if (bcs->mode == L1_MODE_HDLC_56K) + features |= HDLC_56KBIT; + isdnhdlc_out_init(&b_out->hdlc_state, features); } st5481_usb_pipe_reset(adapter, (bcs->channel+1)*2, NULL, NULL); diff --git a/drivers/isdn/hisax/st5481_d.c b/drivers/isdn/hisax/st5481_d.c index 077991c1cd05..39e8e49cfd2d 100644 --- a/drivers/isdn/hisax/st5481_d.c +++ b/drivers/isdn/hisax/st5481_d.c @@ -417,7 +417,7 @@ static void dout_start_xmit(struct FsmInst *fsm, int event, void *arg) DBG(2,"len=%d",skb->len); - isdnhdlc_out_init(&d_out->hdlc_state, 1, 0); + isdnhdlc_out_init(&d_out->hdlc_state, HDLC_DCHANNEL | HDLC_BITREVERSE); if (test_and_set_bit(buf_nr, &d_out->busy)) { WARNING("ep %d urb %d busy %#lx", EP_D_OUT, buf_nr, d_out->busy); diff --git a/drivers/isdn/hisax/st5481_usb.c b/drivers/isdn/hisax/st5481_usb.c index 2b3a055059ea..10d41c5d73ed 100644 --- a/drivers/isdn/hisax/st5481_usb.c +++ b/drivers/isdn/hisax/st5481_usb.c @@ -637,10 +637,13 @@ void st5481_in_mode(struct st5481_in *in, int mode) usb_unlink_urb(in->urb[1]); if (in->mode != L1_MODE_NULL) { - if (in->mode != L1_MODE_TRANS) - isdnhdlc_rcv_init(&in->hdlc_state, - in->mode == L1_MODE_HDLC_56K); - + if (in->mode != L1_MODE_TRANS) { + u32 features = HDLC_BITREVERSE; + + if (in->mode == L1_MODE_HDLC_56K) + features |= HDLC_56KBIT; + isdnhdlc_rcv_init(&in->hdlc_state, features); + } st5481_usb_pipe_reset(in->adapter, in->ep, NULL, NULL); st5481_usb_device_ctrl_msg(in->adapter, in->counter, in->packet_size, diff --git a/drivers/isdn/i4l/isdnhdlc.c b/drivers/isdn/i4l/isdnhdlc.c index b80e55ab8914..df345ce73f48 100644 --- a/drivers/isdn/i4l/isdnhdlc.c +++ b/drivers/isdn/i4l/isdnhdlc.c @@ -2,6 +2,7 @@ * isdnhdlc.c -- General purpose ISDN HDLC decoder. * * Copyright (C) + * 2009 Karsten Keil * 2002 Wolfgang Mües * 2001 Frode Isaksen * 2001 Kai Germaschewski @@ -25,6 +26,7 @@ #include #include #include +#include /*-------------------------------------------------------------------*/ @@ -48,35 +50,21 @@ enum { HDLC_SENDFLAG_B1A6, HDLC_SENDFLAG_B7, STOPPED }; -void isdnhdlc_rcv_init(struct isdnhdlc_vars *hdlc, int do_adapt56) +void isdnhdlc_rcv_init(struct isdnhdlc_vars *hdlc, u32 features) { - hdlc->bit_shift = 0; - hdlc->hdlc_bits1 = 0; - hdlc->data_bits = 0; - hdlc->ffbit_shift = 0; - hdlc->data_received = 0; + memset(hdlc, 0, sizeof(struct isdnhdlc_vars)); hdlc->state = HDLC_GET_DATA; - hdlc->do_adapt56 = do_adapt56; - hdlc->dchannel = 0; - hdlc->crc = 0; - hdlc->cbin = 0; - hdlc->shift_reg = 0; - hdlc->ffvalue = 0; - hdlc->dstpos = 0; + if (features & HDLC_56KBIT) + hdlc->do_adapt56 = 1; + if (features & HDLC_BITREVERSE) + hdlc->do_bitreverse = 1; } EXPORT_SYMBOL(isdnhdlc_out_init); -void isdnhdlc_out_init(struct isdnhdlc_vars *hdlc, int is_d_channel, - int do_adapt56) +void isdnhdlc_out_init(struct isdnhdlc_vars *hdlc, u32 features) { - hdlc->bit_shift = 0; - hdlc->hdlc_bits1 = 0; - hdlc->data_bits = 0; - hdlc->ffbit_shift = 0; - hdlc->data_received = 0; - hdlc->do_closing = 0; - hdlc->ffvalue = 0; - if (is_d_channel) { + memset(hdlc, 0, sizeof(struct isdnhdlc_vars)); + if (features & HDLC_DCHANNEL) { hdlc->dchannel = 1; hdlc->state = HDLC_SEND_FIRST_FLAG; } else { @@ -85,16 +73,13 @@ void isdnhdlc_out_init(struct isdnhdlc_vars *hdlc, int is_d_channel, hdlc->ffvalue = 0x7e; } hdlc->cbin = 0x7e; - hdlc->bit_shift = 0; - if (do_adapt56) { + if (features & HDLC_56KBIT) { hdlc->do_adapt56 = 1; - hdlc->data_bits = 0; hdlc->state = HDLC_SENDFLAG_B0; - } else { - hdlc->do_adapt56 = 0; + } else hdlc->data_bits = 8; - } - hdlc->shift_reg = 0; + if (features & HDLC_BITREVERSE) + hdlc->do_bitreverse = 1; } EXPORT_SYMBOL(isdnhdlc_rcv_init); @@ -188,7 +173,11 @@ int isdnhdlc_decode(struct isdnhdlc_vars *hdlc, const u8 *src, int slen, while (slen > 0) { if (hdlc->bit_shift == 0) { - hdlc->cbin = *src++; + /* the code is for bitreverse streams */ + if (hdlc->do_bitreverse == 0) + hdlc->cbin = bitrev8(*src++); + else + hdlc->cbin = *src++; slen--; hdlc->bit_shift = 8; if (hdlc->do_adapt56) @@ -405,12 +394,15 @@ int isdnhdlc_encode(struct isdnhdlc_vars *hdlc, const u8 *src, u16 slen, case STOPPED: while (dsize--) *dst++ = 0xff; - return dsize; case HDLC_SEND_FAST_FLAG: hdlc->do_closing = 0; if (slen == 0) { - *dst++ = hdlc->ffvalue; + /* the code is for bitreverse streams */ + if (hdlc->do_bitreverse == 0) + *dst++ = bitrev8(hdlc->ffvalue); + else + *dst++ = hdlc->ffvalue; len++; dsize--; break; @@ -594,7 +586,11 @@ int isdnhdlc_encode(struct isdnhdlc_vars *hdlc, const u8 *src, u16 slen, hdlc->cbin = 0x7e; hdlc->state = HDLC_SEND_FIRST_FLAG; } else { - *dst++ = hdlc->cbin; + /* the code is for bitreverse streams */ + if (hdlc->do_bitreverse == 0) + *dst++ = bitrev8(hdlc->cbin); + else + *dst++ = hdlc->cbin; hdlc->bit_shift = 0; hdlc->data_bits = 0; len++; @@ -612,7 +608,11 @@ int isdnhdlc_encode(struct isdnhdlc_vars *hdlc, const u8 *src, u16 slen, } } if (hdlc->data_bits == 8) { - *dst++ = hdlc->cbin; + /* the code is for bitreverse streams */ + if (hdlc->do_bitreverse == 0) + *dst++ = bitrev8(hdlc->cbin); + else + *dst++ = hdlc->cbin; hdlc->data_bits = 0; len++; dsize--; diff --git a/include/linux/isdn/hdlc.h b/include/linux/isdn/hdlc.h index 8f3540c7f692..4b3ecc40889a 100644 --- a/include/linux/isdn/hdlc.h +++ b/include/linux/isdn/hdlc.h @@ -6,6 +6,7 @@ * controllers. * * Copyright (C) + * 2009 Karsten Keil * 2002 Wolfgang Mües * 2001 Frode Isaksen * 2001 Kai Germaschewski @@ -50,8 +51,14 @@ struct isdnhdlc_vars { u32 do_adapt56:1; /* set if in closing phase (need to send CRC + flag) */ u32 do_closing:1; + /* set if data is bitreverse */ + u32 do_bitreverse:1; }; +/* Feature Flags */ +#define HDLC_56KBIT 0x01 +#define HDLC_DCHANNEL 0x02 +#define HDLC_BITREVERSE 0x04 /* The return value from isdnhdlc_decode is @@ -62,13 +69,12 @@ struct isdnhdlc_vars { #define HDLC_CRC_ERROR 2 #define HDLC_LENGTH_ERROR 3 -extern void isdnhdlc_rcv_init(struct isdnhdlc_vars *hdlc, int do_adapt56); +extern void isdnhdlc_rcv_init(struct isdnhdlc_vars *hdlc, u32 features); extern int isdnhdlc_decode(struct isdnhdlc_vars *hdlc, const u8 *src, int slen, int *count, u8 *dst, int dsize); -extern void isdnhdlc_out_init(struct isdnhdlc_vars *hdlc, int is_d_channel, - int do_adapt56); +extern void isdnhdlc_out_init(struct isdnhdlc_vars *hdlc, u32 features); extern int isdnhdlc_encode(struct isdnhdlc_vars *hdlc, const u8 *src, u16 slen, int *count, u8 *dst, int dsize); -- cgit v1.2.3 From fb286f0471a04ef646c8e5c79750ae6718183745 Mon Sep 17 00:00:00 2001 From: Karsten Keil Date: Thu, 9 Jul 2009 10:02:29 +0200 Subject: mISDN: Make clearing B-channel a common function Clearing B-channel is needed in every driver, so it makes sense to have it as common function. Signed-off-by: Karsten Keil --- drivers/isdn/hardware/mISDN/hfcmulti.c | 16 +--------------- drivers/isdn/hardware/mISDN/hfcpci.c | 16 +--------------- drivers/isdn/hardware/mISDN/hfcsusb.c | 16 +--------------- drivers/isdn/mISDN/hwchannel.c | 15 +++++++++++++-- include/linux/mISDNhw.h | 1 + 5 files changed, 17 insertions(+), 47 deletions(-) (limited to 'include/linux') diff --git a/drivers/isdn/hardware/mISDN/hfcmulti.c b/drivers/isdn/hardware/mISDN/hfcmulti.c index e1dab30aed30..fd77bb15d790 100644 --- a/drivers/isdn/hardware/mISDN/hfcmulti.c +++ b/drivers/isdn/hardware/mISDN/hfcmulti.c @@ -3416,22 +3416,8 @@ deactivate_bchannel(struct bchannel *bch) u_long flags; spin_lock_irqsave(&hc->lock, flags); - if (test_and_clear_bit(FLG_TX_NEXT, &bch->Flags)) { - dev_kfree_skb(bch->next_skb); - bch->next_skb = NULL; - } - if (bch->tx_skb) { - dev_kfree_skb(bch->tx_skb); - bch->tx_skb = NULL; - } - bch->tx_idx = 0; - if (bch->rx_skb) { - dev_kfree_skb(bch->rx_skb); - bch->rx_skb = NULL; - } + mISDN_clear_bchannel(bch); hc->chan[bch->slot].coeff_count = 0; - test_and_clear_bit(FLG_ACTIVE, &bch->Flags); - test_and_clear_bit(FLG_TX_BUSY, &bch->Flags); hc->chan[bch->slot].rx_off = 0; hc->chan[bch->slot].conf = -1; mode_hfcmulti(hc, bch->slot, ISDN_P_NONE, -1, 0, -1, 0); diff --git a/drivers/isdn/hardware/mISDN/hfcpci.c b/drivers/isdn/hardware/mISDN/hfcpci.c index 228ffbed1286..70e6b0e01121 100644 --- a/drivers/isdn/hardware/mISDN/hfcpci.c +++ b/drivers/isdn/hardware/mISDN/hfcpci.c @@ -1522,22 +1522,8 @@ deactivate_bchannel(struct bchannel *bch) u_long flags; spin_lock_irqsave(&hc->lock, flags); - if (test_and_clear_bit(FLG_TX_NEXT, &bch->Flags)) { - dev_kfree_skb(bch->next_skb); - bch->next_skb = NULL; - } - if (bch->tx_skb) { - dev_kfree_skb(bch->tx_skb); - bch->tx_skb = NULL; - } - bch->tx_idx = 0; - if (bch->rx_skb) { - dev_kfree_skb(bch->rx_skb); - bch->rx_skb = NULL; - } + mISDN_clear_bchannel(bch); mode_hfcpci(bch, bch->nr, ISDN_P_NONE); - test_and_clear_bit(FLG_ACTIVE, &bch->Flags); - test_and_clear_bit(FLG_TX_BUSY, &bch->Flags); spin_unlock_irqrestore(&hc->lock, flags); } diff --git a/drivers/isdn/hardware/mISDN/hfcsusb.c b/drivers/isdn/hardware/mISDN/hfcsusb.c index 6b7704c41b94..fc46a26cb14f 100644 --- a/drivers/isdn/hardware/mISDN/hfcsusb.c +++ b/drivers/isdn/hardware/mISDN/hfcsusb.c @@ -1809,21 +1809,7 @@ deactivate_bchannel(struct bchannel *bch) hw->name, __func__, bch->nr); spin_lock_irqsave(&hw->lock, flags); - if (test_and_clear_bit(FLG_TX_NEXT, &bch->Flags)) { - dev_kfree_skb(bch->next_skb); - bch->next_skb = NULL; - } - if (bch->tx_skb) { - dev_kfree_skb(bch->tx_skb); - bch->tx_skb = NULL; - } - bch->tx_idx = 0; - if (bch->rx_skb) { - dev_kfree_skb(bch->rx_skb); - bch->rx_skb = NULL; - } - clear_bit(FLG_ACTIVE, &bch->Flags); - clear_bit(FLG_TX_BUSY, &bch->Flags); + mISDN_clear_bchannel(bch); spin_unlock_irqrestore(&hw->lock, flags); hfcsusb_setup_bch(bch, ISDN_P_NONE); hfcsusb_stop_endpoint(hw, bch->nr); diff --git a/drivers/isdn/mISDN/hwchannel.c b/drivers/isdn/mISDN/hwchannel.c index 0481a0cdf6db..e8049be552aa 100644 --- a/drivers/isdn/mISDN/hwchannel.c +++ b/drivers/isdn/mISDN/hwchannel.c @@ -114,13 +114,14 @@ mISDN_freedchannel(struct dchannel *ch) } EXPORT_SYMBOL(mISDN_freedchannel); -int -mISDN_freebchannel(struct bchannel *ch) +void +mISDN_clear_bchannel(struct bchannel *ch) { if (ch->tx_skb) { dev_kfree_skb(ch->tx_skb); ch->tx_skb = NULL; } + ch->tx_idx = 0; if (ch->rx_skb) { dev_kfree_skb(ch->rx_skb); ch->rx_skb = NULL; @@ -129,6 +130,16 @@ mISDN_freebchannel(struct bchannel *ch) dev_kfree_skb(ch->next_skb); ch->next_skb = NULL; } + test_and_clear_bit(FLG_TX_BUSY, &ch->Flags); + test_and_clear_bit(FLG_TX_NEXT, &ch->Flags); + test_and_clear_bit(FLG_ACTIVE, &ch->Flags); +} +EXPORT_SYMBOL(mISDN_clear_bchannel); + +int +mISDN_freebchannel(struct bchannel *ch) +{ + mISDN_clear_bchannel(ch); skb_queue_purge(&ch->rqueue); ch->rcount = 0; flush_scheduled_work(); diff --git a/include/linux/mISDNhw.h b/include/linux/mISDNhw.h index 7f9831da847f..4af841408fb5 100644 --- a/include/linux/mISDNhw.h +++ b/include/linux/mISDNhw.h @@ -168,6 +168,7 @@ struct bchannel { extern int mISDN_initdchannel(struct dchannel *, int, void *); extern int mISDN_initbchannel(struct bchannel *, int); extern int mISDN_freedchannel(struct dchannel *); +extern void mISDN_clear_bchannel(struct bchannel *); extern int mISDN_freebchannel(struct bchannel *); extern void queue_ch_frame(struct mISDNchannel *, u_int, int, struct sk_buff *); -- cgit v1.2.3 From da2272c91ae81b41ae6fa6ebdc767a6cef73b770 Mon Sep 17 00:00:00 2001 From: Karsten Keil Date: Wed, 22 Jul 2009 20:01:59 +0200 Subject: mISDN: Add support for Speedfax+ cards Add support for the Siemens ISAR DSP chip and cards based on it, including analog modem protocols. Signed-off-by: Karsten Keil --- drivers/isdn/hardware/mISDN/Kconfig | 13 + drivers/isdn/hardware/mISDN/Makefile | 2 + drivers/isdn/hardware/mISDN/isar.h | 269 +++++ drivers/isdn/hardware/mISDN/mISDNisar.c | 1726 +++++++++++++++++++++++++++++++ drivers/isdn/hardware/mISDN/speedfax.c | 526 ++++++++++ include/linux/mISDNif.h | 16 +- 6 files changed, 2551 insertions(+), 1 deletion(-) create mode 100644 drivers/isdn/hardware/mISDN/isar.h create mode 100644 drivers/isdn/hardware/mISDN/mISDNisar.c create mode 100644 drivers/isdn/hardware/mISDN/speedfax.c (limited to 'include/linux') diff --git a/drivers/isdn/hardware/mISDN/Kconfig b/drivers/isdn/hardware/mISDN/Kconfig index 212dee6adec6..2600534be07e 100644 --- a/drivers/isdn/hardware/mISDN/Kconfig +++ b/drivers/isdn/hardware/mISDN/Kconfig @@ -47,6 +47,15 @@ config MISDN_AVMFRITZ help Enable support for AVMs FRITZ!CARD PCI cards +config MISDN_SPEEDFAX + tristate "Support for Sedlbauer Speedfax+" + depends on MISDN + depends on PCI + select MISDN_IPAC + select MISDN_ISAR + help + Enable support for Sedlbauer Speedfax+. + config MISDN_INFINEON tristate "Support for cards with Infineon chipset" depends on MISDN @@ -61,3 +70,7 @@ config MISDN_IPAC tristate depends on MISDN +config MISDN_ISAR + tristate + depends on MISDN + diff --git a/drivers/isdn/hardware/mISDN/Makefile b/drivers/isdn/hardware/mISDN/Makefile index 8204d84af70a..e18f964e185b 100644 --- a/drivers/isdn/hardware/mISDN/Makefile +++ b/drivers/isdn/hardware/mISDN/Makefile @@ -7,6 +7,8 @@ obj-$(CONFIG_MISDN_HFCPCI) += hfcpci.o obj-$(CONFIG_MISDN_HFCMULTI) += hfcmulti.o obj-$(CONFIG_MISDN_HFCUSB) += hfcsusb.o obj-$(CONFIG_MISDN_AVMFRITZ) += avmfritz.o +obj-$(CONFIG_MISDN_SPEEDFAX) += speedfax.o obj-$(CONFIG_MISDN_INFINEON) += mISDNinfineon.o # chip modules obj-$(CONFIG_MISDN_IPAC) += mISDNipac.o +obj-$(CONFIG_MISDN_ISAR) += mISDNisar.o diff --git a/drivers/isdn/hardware/mISDN/isar.h b/drivers/isdn/hardware/mISDN/isar.h new file mode 100644 index 000000000000..092351acd2ab --- /dev/null +++ b/drivers/isdn/hardware/mISDN/isar.h @@ -0,0 +1,269 @@ +/* + * + * isar.h ISAR (Siemens PSB 7110) specific defines + * + * Author Karsten Keil (keil@isdn4linux.de) + * + * Copyright 2009 by Karsten Keil + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + +#include "iohelper.h" + +struct isar_hw; + +struct isar_ch { + struct bchannel bch; + struct isar_hw *is; + struct timer_list ftimer; + u8 nr; + u8 dpath; + u8 mml; + u8 state; + u8 cmd; + u8 mod; + u8 newcmd; + u8 newmod; + u8 try_mod; + u8 conmsg[16]; +}; + +struct isar_hw { + struct isar_ch ch[2]; + void *hw; + spinlock_t *hwlock; /* lock HW acccess */ + char *name; + struct module *owner; + read_reg_t *read_reg; + write_reg_t *write_reg; + fifo_func_t *read_fifo; + fifo_func_t *write_fifo; + int (*ctrl)(void *, u32, u_long); + void (*release)(struct isar_hw *); + int (*init)(struct isar_hw *); + int (*open)(struct isar_hw *, struct channel_req *); + int (*firmware)(struct isar_hw *, const u8 *, int); + unsigned long Flags; + int version; + u8 bstat; + u8 iis; + u8 cmsb; + u8 clsb; + u8 buf[256]; + u8 log[256]; +}; + +#define ISAR_IRQMSK 0x04 +#define ISAR_IRQSTA 0x04 +#define ISAR_IRQBIT 0x75 +#define ISAR_CTRL_H 0x61 +#define ISAR_CTRL_L 0x60 +#define ISAR_IIS 0x58 +#define ISAR_IIA 0x58 +#define ISAR_HIS 0x50 +#define ISAR_HIA 0x50 +#define ISAR_MBOX 0x4c +#define ISAR_WADR 0x4a +#define ISAR_RADR 0x48 + +#define ISAR_HIS_VNR 0x14 +#define ISAR_HIS_DKEY 0x02 +#define ISAR_HIS_FIRM 0x1e +#define ISAR_HIS_STDSP 0x08 +#define ISAR_HIS_DIAG 0x05 +#define ISAR_HIS_P0CFG 0x3c +#define ISAR_HIS_P12CFG 0x24 +#define ISAR_HIS_SARTCFG 0x25 +#define ISAR_HIS_PUMPCFG 0x26 +#define ISAR_HIS_PUMPCTRL 0x2a +#define ISAR_HIS_IOM2CFG 0x27 +#define ISAR_HIS_IOM2REQ 0x07 +#define ISAR_HIS_IOM2CTRL 0x2b +#define ISAR_HIS_BSTREQ 0x0c +#define ISAR_HIS_PSTREQ 0x0e +#define ISAR_HIS_SDATA 0x20 +#define ISAR_HIS_DPS1 0x40 +#define ISAR_HIS_DPS2 0x80 +#define SET_DPS(x) ((x<<6) & 0xc0) + +#define ISAR_IIS_MSCMSD 0x3f +#define ISAR_IIS_VNR 0x15 +#define ISAR_IIS_DKEY 0x03 +#define ISAR_IIS_FIRM 0x1f +#define ISAR_IIS_STDSP 0x09 +#define ISAR_IIS_DIAG 0x25 +#define ISAR_IIS_GSTEV 0x00 +#define ISAR_IIS_BSTEV 0x28 +#define ISAR_IIS_BSTRSP 0x2c +#define ISAR_IIS_PSTRSP 0x2e +#define ISAR_IIS_PSTEV 0x2a +#define ISAR_IIS_IOM2RSP 0x27 +#define ISAR_IIS_RDATA 0x20 +#define ISAR_IIS_INVMSG 0x3f + +#define ISAR_CTRL_SWVER 0x10 +#define ISAR_CTRL_STST 0x40 + +#define ISAR_MSG_HWVER 0x20 + +#define ISAR_DP1_USE 1 +#define ISAR_DP2_USE 2 +#define ISAR_RATE_REQ 3 + +#define PMOD_DISABLE 0 +#define PMOD_FAX 1 +#define PMOD_DATAMODEM 2 +#define PMOD_HALFDUPLEX 3 +#define PMOD_V110 4 +#define PMOD_DTMF 5 +#define PMOD_DTMF_TRANS 6 +#define PMOD_BYPASS 7 + +#define PCTRL_ORIG 0x80 +#define PV32P2_V23R 0x40 +#define PV32P2_V22A 0x20 +#define PV32P2_V22B 0x10 +#define PV32P2_V22C 0x08 +#define PV32P2_V21 0x02 +#define PV32P2_BEL 0x01 + +/* LSB MSB in ISAR doc wrong !!! Arghhh */ +#define PV32P3_AMOD 0x80 +#define PV32P3_V32B 0x02 +#define PV32P3_V23B 0x01 +#define PV32P4_48 0x11 +#define PV32P5_48 0x05 +#define PV32P4_UT48 0x11 +#define PV32P5_UT48 0x0d +#define PV32P4_96 0x11 +#define PV32P5_96 0x03 +#define PV32P4_UT96 0x11 +#define PV32P5_UT96 0x0f +#define PV32P4_B96 0x91 +#define PV32P5_B96 0x0b +#define PV32P4_UTB96 0xd1 +#define PV32P5_UTB96 0x0f +#define PV32P4_120 0xb1 +#define PV32P5_120 0x09 +#define PV32P4_UT120 0xf1 +#define PV32P5_UT120 0x0f +#define PV32P4_144 0x99 +#define PV32P5_144 0x09 +#define PV32P4_UT144 0xf9 +#define PV32P5_UT144 0x0f +#define PV32P6_CTN 0x01 +#define PV32P6_ATN 0x02 + +#define PFAXP2_CTN 0x01 +#define PFAXP2_ATN 0x04 + +#define PSEV_10MS_TIMER 0x02 +#define PSEV_CON_ON 0x18 +#define PSEV_CON_OFF 0x19 +#define PSEV_V24_OFF 0x20 +#define PSEV_CTS_ON 0x21 +#define PSEV_CTS_OFF 0x22 +#define PSEV_DCD_ON 0x23 +#define PSEV_DCD_OFF 0x24 +#define PSEV_DSR_ON 0x25 +#define PSEV_DSR_OFF 0x26 +#define PSEV_REM_RET 0xcc +#define PSEV_REM_REN 0xcd +#define PSEV_GSTN_CLR 0xd4 + +#define PSEV_RSP_READY 0xbc +#define PSEV_LINE_TX_H 0xb3 +#define PSEV_LINE_TX_B 0xb2 +#define PSEV_LINE_RX_H 0xb1 +#define PSEV_LINE_RX_B 0xb0 +#define PSEV_RSP_CONN 0xb5 +#define PSEV_RSP_DISC 0xb7 +#define PSEV_RSP_FCERR 0xb9 +#define PSEV_RSP_SILDET 0xbe +#define PSEV_RSP_SILOFF 0xab +#define PSEV_FLAGS_DET 0xba + +#define PCTRL_CMD_TDTMF 0x5a + +#define PCTRL_CMD_FTH 0xa7 +#define PCTRL_CMD_FRH 0xa5 +#define PCTRL_CMD_FTM 0xa8 +#define PCTRL_CMD_FRM 0xa6 +#define PCTRL_CMD_SILON 0xac +#define PCTRL_CMD_CONT 0xa2 +#define PCTRL_CMD_ESC 0xa4 +#define PCTRL_CMD_SILOFF 0xab +#define PCTRL_CMD_HALT 0xa9 + +#define PCTRL_LOC_RET 0xcf +#define PCTRL_LOC_REN 0xce + +#define SMODE_DISABLE 0 +#define SMODE_V14 2 +#define SMODE_HDLC 3 +#define SMODE_BINARY 4 +#define SMODE_FSK_V14 5 + +#define SCTRL_HDMC_BOTH 0x00 +#define SCTRL_HDMC_DTX 0x80 +#define SCTRL_HDMC_DRX 0x40 +#define S_P1_OVSP 0x40 +#define S_P1_SNP 0x20 +#define S_P1_EOP 0x10 +#define S_P1_EDP 0x08 +#define S_P1_NSB 0x04 +#define S_P1_CHS_8 0x03 +#define S_P1_CHS_7 0x02 +#define S_P1_CHS_6 0x01 +#define S_P1_CHS_5 0x00 + +#define S_P2_BFT_DEF 0x10 + +#define IOM_CTRL_ENA 0x80 +#define IOM_CTRL_NOPCM 0x00 +#define IOM_CTRL_ALAW 0x02 +#define IOM_CTRL_ULAW 0x04 +#define IOM_CTRL_RCV 0x01 + +#define IOM_P1_TXD 0x10 + +#define HDLC_FED 0x40 +#define HDLC_FSD 0x20 +#define HDLC_FST 0x20 +#define HDLC_ERROR 0x1c +#define HDLC_ERR_FAD 0x10 +#define HDLC_ERR_RER 0x08 +#define HDLC_ERR_CER 0x04 +#define SART_NMD 0x01 + +#define BSTAT_RDM0 0x1 +#define BSTAT_RDM1 0x2 +#define BSTAT_RDM2 0x4 +#define BSTAT_RDM3 0x8 +#define BSTEV_TBO 0x1f +#define BSTEV_RBO 0x2f + +/* FAX State Machine */ +#define STFAX_NULL 0 +#define STFAX_READY 1 +#define STFAX_LINE 2 +#define STFAX_CONT 3 +#define STFAX_ACTIV 4 +#define STFAX_ESCAPE 5 +#define STFAX_SILDET 6 + +extern u32 mISDNisar_init(struct isar_hw *, void *); +extern void mISDNisar_irq(struct isar_hw *); diff --git a/drivers/isdn/hardware/mISDN/mISDNisar.c b/drivers/isdn/hardware/mISDN/mISDNisar.c new file mode 100644 index 000000000000..de352a17673a --- /dev/null +++ b/drivers/isdn/hardware/mISDN/mISDNisar.c @@ -0,0 +1,1726 @@ +/* + * mISDNisar.c ISAR (Siemens PSB 7110) specific functions + * + * Author Karsten Keil (keil@isdn4linux.de) + * + * Copyright 2009 by Karsten Keil + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + +/* define this to enable static debug messages, if you kernel supports + * dynamic debugging, you should use debugfs for this + */ +/* #define DEBUG */ + +#include +#include +#include +#include "isar.h" + +#define ISAR_REV "2.1" + +MODULE_AUTHOR("Karsten Keil"); +MODULE_LICENSE("GPL v2"); +MODULE_VERSION(ISAR_REV); + +#define DEBUG_HW_FIRMWARE_FIFO 0x10000 + +static const u8 faxmodulation_s[] = "3,24,48,72,73,74,96,97,98,121,122,145,146"; +static const u8 faxmodulation[] = {3, 24, 48, 72, 73, 74, 96, 97, 98, 121, + 122, 145, 146}; +#define FAXMODCNT 13 + +static void isar_setup(struct isar_hw *); + +static inline int +waitforHIA(struct isar_hw *isar, int timeout) +{ + int t = timeout; + u8 val = isar->read_reg(isar->hw, ISAR_HIA); + + while ((val & 1) && t) { + udelay(1); + t--; + val = isar->read_reg(isar->hw, ISAR_HIA); + } + pr_debug("%s: HIA after %dus\n", isar->name, timeout - t); + return timeout; +} + +/* + * send msg to ISAR mailbox + * if msg is NULL use isar->buf + */ +static int +send_mbox(struct isar_hw *isar, u8 his, u8 creg, u8 len, u8 *msg) +{ + if (!waitforHIA(isar, 1000)) + return 0; + pr_debug("send_mbox(%02x,%02x,%d)\n", his, creg, len); + isar->write_reg(isar->hw, ISAR_CTRL_H, creg); + isar->write_reg(isar->hw, ISAR_CTRL_L, len); + isar->write_reg(isar->hw, ISAR_WADR, 0); + if (!msg) + msg = isar->buf; + if (msg && len) { + isar->write_fifo(isar->hw, ISAR_MBOX, msg, len); + if (isar->ch[0].bch.debug & DEBUG_HW_BFIFO) { + int l = 0; + + while (l < (int)len) { + hex_dump_to_buffer(msg + l, len - l, 32, 1, + isar->log, 256, 1); + pr_debug("%s: %s %02x: %s\n", isar->name, + __func__, l, isar->log); + l += 32; + } + } + } + isar->write_reg(isar->hw, ISAR_HIS, his); + waitforHIA(isar, 1000); + return 1; +} + +/* + * receive message from ISAR mailbox + * if msg is NULL use isar->buf + */ +static void +rcv_mbox(struct isar_hw *isar, u8 *msg) +{ + if (!msg) + msg = isar->buf; + isar->write_reg(isar->hw, ISAR_RADR, 0); + if (msg && isar->clsb) { + isar->read_fifo(isar->hw, ISAR_MBOX, msg, isar->clsb); + if (isar->ch[0].bch.debug & DEBUG_HW_BFIFO) { + int l = 0; + + while (l < (int)isar->clsb) { + hex_dump_to_buffer(msg + l, isar->clsb - l, 32, + 1, isar->log, 256, 1); + pr_debug("%s: %s %02x: %s\n", isar->name, + __func__, l, isar->log); + l += 32; + } + } + } + isar->write_reg(isar->hw, ISAR_IIA, 0); +} + +static inline void +get_irq_infos(struct isar_hw *isar) +{ + isar->iis = isar->read_reg(isar->hw, ISAR_IIS); + isar->cmsb = isar->read_reg(isar->hw, ISAR_CTRL_H); + isar->clsb = isar->read_reg(isar->hw, ISAR_CTRL_L); + pr_debug("%s: rcv_mbox(%02x,%02x,%d)\n", isar->name, + isar->iis, isar->cmsb, isar->clsb); +} + +/* + * poll answer message from ISAR mailbox + * should be used only with ISAR IRQs disabled before DSP was started + * + */ +static int +poll_mbox(struct isar_hw *isar, int maxdelay) +{ + int t = maxdelay; + u8 irq; + + irq = isar->read_reg(isar->hw, ISAR_IRQBIT); + while (t && !(irq & ISAR_IRQSTA)) { + udelay(1); + t--; + } + if (t) { + get_irq_infos(isar); + rcv_mbox(isar, NULL); + } + pr_debug("%s: pulled %d bytes after %d us\n", + isar->name, isar->clsb, maxdelay - t); + return t; +} + +static int +ISARVersion(struct isar_hw *isar) +{ + int ver; + + /* disable ISAR IRQ */ + isar->write_reg(isar->hw, ISAR_IRQBIT, 0); + isar->buf[0] = ISAR_MSG_HWVER; + isar->buf[1] = 0; + isar->buf[2] = 1; + if (!send_mbox(isar, ISAR_HIS_VNR, 0, 3, NULL)) + return -1; + if (!poll_mbox(isar, 1000)) + return -2; + if (isar->iis == ISAR_IIS_VNR) { + if (isar->clsb == 1) { + ver = isar->buf[0] & 0xf; + return ver; + } + return -3; + } + return -4; +} + +static int +load_firmware(struct isar_hw *isar, const u8 *buf, int size) +{ + u32 saved_debug = isar->ch[0].bch.debug; + int ret, cnt; + u8 nom, noc; + u16 left, val, *sp = (u16 *)buf; + u8 *mp; + u_long flags; + + struct { + u16 sadr; + u16 len; + u16 d_key; + } blk_head; + + if (1 != isar->version) { + pr_err("%s: ISAR wrong version %d firmware download aborted\n", + isar->name, isar->version); + return -EINVAL; + } + if (!(saved_debug & DEBUG_HW_FIRMWARE_FIFO)) + isar->ch[0].bch.debug &= ~DEBUG_HW_BFIFO; + pr_debug("%s: load firmware %d words (%d bytes)\n", + isar->name, size/2, size); + cnt = 0; + size /= 2; + /* disable ISAR IRQ */ + spin_lock_irqsave(isar->hwlock, flags); + isar->write_reg(isar->hw, ISAR_IRQBIT, 0); + spin_unlock_irqrestore(isar->hwlock, flags); + while (cnt < size) { + blk_head.sadr = le16_to_cpu(*sp++); + blk_head.len = le16_to_cpu(*sp++); + blk_head.d_key = le16_to_cpu(*sp++); + cnt += 3; + pr_debug("ISAR firmware block (%#x,%d,%#x)\n", + blk_head.sadr, blk_head.len, blk_head.d_key & 0xff); + left = blk_head.len; + if (cnt + left > size) { + pr_info("%s: firmware error have %d need %d words\n", + isar->name, size, cnt + left); + ret = -EINVAL; + goto reterrflg; + } + spin_lock_irqsave(isar->hwlock, flags); + if (!send_mbox(isar, ISAR_HIS_DKEY, blk_head.d_key & 0xff, + 0, NULL)) { + pr_info("ISAR send_mbox dkey failed\n"); + ret = -ETIME; + goto reterror; + } + if (!poll_mbox(isar, 1000)) { + pr_warning("ISAR poll_mbox dkey failed\n"); + ret = -ETIME; + goto reterror; + } + spin_unlock_irqrestore(isar->hwlock, flags); + if ((isar->iis != ISAR_IIS_DKEY) || isar->cmsb || isar->clsb) { + pr_info("ISAR wrong dkey response (%x,%x,%x)\n", + isar->iis, isar->cmsb, isar->clsb); + ret = 1; + goto reterrflg; + } + while (left > 0) { + if (left > 126) + noc = 126; + else + noc = left; + nom = (2 * noc) + 3; + mp = isar->buf; + /* the ISAR is big endian */ + *mp++ = blk_head.sadr >> 8; + *mp++ = blk_head.sadr & 0xFF; + left -= noc; + cnt += noc; + *mp++ = noc; + pr_debug("%s: load %3d words at %04x\n", isar->name, + noc, blk_head.sadr); + blk_head.sadr += noc; + while (noc) { + val = le16_to_cpu(*sp++); + *mp++ = val >> 8; + *mp++ = val & 0xFF;; + noc--; + } + spin_lock_irqsave(isar->hwlock, flags); + if (!send_mbox(isar, ISAR_HIS_FIRM, 0, nom, NULL)) { + pr_info("ISAR send_mbox prog failed\n"); + ret = -ETIME; + goto reterror; + } + if (!poll_mbox(isar, 1000)) { + pr_info("ISAR poll_mbox prog failed\n"); + ret = -ETIME; + goto reterror; + } + spin_unlock_irqrestore(isar->hwlock, flags); + if ((isar->iis != ISAR_IIS_FIRM) || + isar->cmsb || isar->clsb) { + pr_info("ISAR wrong prog response (%x,%x,%x)\n", + isar->iis, isar->cmsb, isar->clsb); + ret = -EIO; + goto reterrflg; + } + } + pr_debug("%s: ISAR firmware block %d words loaded\n", + isar->name, blk_head.len); + } + isar->ch[0].bch.debug = saved_debug; + /* 10ms delay */ + cnt = 10; + while (cnt--) + mdelay(1); + isar->buf[0] = 0xff; + isar->buf[1] = 0xfe; + isar->bstat = 0; + spin_lock_irqsave(isar->hwlock, flags); + if (!send_mbox(isar, ISAR_HIS_STDSP, 0, 2, NULL)) { + pr_info("ISAR send_mbox start dsp failed\n"); + ret = -ETIME; + goto reterror; + } + if (!poll_mbox(isar, 1000)) { + pr_info("ISAR poll_mbox start dsp failed\n"); + ret = -ETIME; + goto reterror; + } + if ((isar->iis != ISAR_IIS_STDSP) || isar->cmsb || isar->clsb) { + pr_info("ISAR wrong start dsp response (%x,%x,%x)\n", + isar->iis, isar->cmsb, isar->clsb); + ret = -EIO; + goto reterror; + } else + pr_debug("%s: ISAR start dsp success\n", isar->name); + + /* NORMAL mode entered */ + /* Enable IRQs of ISAR */ + isar->write_reg(isar->hw, ISAR_IRQBIT, ISAR_IRQSTA); + spin_unlock_irqrestore(isar->hwlock, flags); + cnt = 1000; /* max 1s */ + while ((!isar->bstat) && cnt) { + mdelay(1); + cnt--; + } + if (!cnt) { + pr_info("ISAR no general status event received\n"); + ret = -ETIME; + goto reterrflg; + } else + pr_debug("%s: ISAR general status event %x\n", + isar->name, isar->bstat); + /* 10ms delay */ + cnt = 10; + while (cnt--) + mdelay(1); + isar->iis = 0; + spin_lock_irqsave(isar->hwlock, flags); + if (!send_mbox(isar, ISAR_HIS_DIAG, ISAR_CTRL_STST, 0, NULL)) { + pr_info("ISAR send_mbox self tst failed\n"); + ret = -ETIME; + goto reterror; + } + spin_unlock_irqrestore(isar->hwlock, flags); + cnt = 10000; /* max 100 ms */ + while ((isar->iis != ISAR_IIS_DIAG) && cnt) { + udelay(10); + cnt--; + } + mdelay(1); + if (!cnt) { + pr_info("ISAR no self tst response\n"); + ret = -ETIME; + goto reterrflg; + } + if ((isar->cmsb == ISAR_CTRL_STST) && (isar->clsb == 1) + && (isar->buf[0] == 0)) + pr_debug("%s: ISAR selftest OK\n", isar->name); + else { + pr_info("ISAR selftest not OK %x/%x/%x\n", + isar->cmsb, isar->clsb, isar->buf[0]); + ret = -EIO; + goto reterrflg; + } + spin_lock_irqsave(isar->hwlock, flags); + isar->iis = 0; + if (!send_mbox(isar, ISAR_HIS_DIAG, ISAR_CTRL_SWVER, 0, NULL)) { + pr_info("ISAR RQST SVN failed\n"); + ret = -ETIME; + goto reterror; + } + spin_unlock_irqrestore(isar->hwlock, flags); + cnt = 30000; /* max 300 ms */ + while ((isar->iis != ISAR_IIS_DIAG) && cnt) { + udelay(10); + cnt--; + } + mdelay(1); + if (!cnt) { + pr_info("ISAR no SVN response\n"); + ret = -ETIME; + goto reterrflg; + } else { + if ((isar->cmsb == ISAR_CTRL_SWVER) && (isar->clsb == 1)) { + pr_notice("%s: ISAR software version %#x\n", + isar->name, isar->buf[0]); + } else { + pr_info("%s: ISAR wrong swver response (%x,%x)" + " cnt(%d)\n", isar->name, isar->cmsb, + isar->clsb, cnt); + ret = -EIO; + goto reterrflg; + } + } + spin_lock_irqsave(isar->hwlock, flags); + isar_setup(isar); + spin_unlock_irqrestore(isar->hwlock, flags); + ret = 0; +reterrflg: + spin_lock_irqsave(isar->hwlock, flags); +reterror: + isar->ch[0].bch.debug = saved_debug; + if (ret) + /* disable ISAR IRQ */ + isar->write_reg(isar->hw, ISAR_IRQBIT, 0); + spin_unlock_irqrestore(isar->hwlock, flags); + return ret; +} + +static inline void +deliver_status(struct isar_ch *ch, int status) +{ + pr_debug("%s: HL->LL FAXIND %x\n", ch->is->name, status); + _queue_data(&ch->bch.ch, PH_CONTROL_IND, status, 0, NULL, GFP_ATOMIC); +} + +static inline void +isar_rcv_frame(struct isar_ch *ch) +{ + u8 *ptr; + + if (!ch->is->clsb) { + pr_debug("%s; ISAR zero len frame\n", ch->is->name); + ch->is->write_reg(ch->is->hw, ISAR_IIA, 0); + return; + } + switch (ch->bch.state) { + case ISDN_P_NONE: + pr_debug("%s: ISAR protocol 0 spurious IIS_RDATA %x/%x/%x\n", + ch->is->name, ch->is->iis, ch->is->cmsb, ch->is->clsb); + ch->is->write_reg(ch->is->hw, ISAR_IIA, 0); + break; + case ISDN_P_B_RAW: + case ISDN_P_B_L2DTMF: + case ISDN_P_B_MODEM_ASYNC: + if (!ch->bch.rx_skb) { + ch->bch.rx_skb = mI_alloc_skb(ch->bch.maxlen, + GFP_ATOMIC); + if (unlikely(!ch->bch.rx_skb)) { + pr_info("%s: B receive out of memory\n", + ch->is->name); + ch->is->write_reg(ch->is->hw, ISAR_IIA, 0); + break; + } + } + rcv_mbox(ch->is, skb_put(ch->bch.rx_skb, ch->is->clsb)); + recv_Bchannel(&ch->bch, 0); + break; + case ISDN_P_B_HDLC: + if (!ch->bch.rx_skb) { + ch->bch.rx_skb = mI_alloc_skb(ch->bch.maxlen, + GFP_ATOMIC); + if (unlikely(!ch->bch.rx_skb)) { + pr_info("%s: B receive out of memory\n", + ch->is->name); + ch->is->write_reg(ch->is->hw, ISAR_IIA, 0); + break; + } + } + if ((ch->bch.rx_skb->len + ch->is->clsb) > + (ch->bch.maxlen + 2)) { + pr_debug("%s: incoming packet too large\n", + ch->is->name); + ch->is->write_reg(ch->is->hw, ISAR_IIA, 0); + skb_trim(ch->bch.rx_skb, 0); + break; + } + if (ch->is->cmsb & HDLC_ERROR) { + pr_debug("%s: ISAR frame error %x len %d\n", + ch->is->name, ch->is->cmsb, ch->is->clsb); +#ifdef ERROR_STATISTIC + if (ch->is->cmsb & HDLC_ERR_RER) + ch->bch.err_inv++; + if (ch->is->cmsb & HDLC_ERR_CER) + ch->bch.err_crc++; +#endif + skb_trim(ch->bch.rx_skb, 0); + ch->is->write_reg(ch->is->hw, ISAR_IIA, 0); + break; + } + if (ch->is->cmsb & HDLC_FSD) + skb_trim(ch->bch.rx_skb, 0); + ptr = skb_put(ch->bch.rx_skb, ch->is->clsb); + rcv_mbox(ch->is, ptr); + if (ch->is->cmsb & HDLC_FED) { + if (ch->bch.rx_skb->len < 3) { /* last 2 are the FCS */ + pr_debug("%s: ISAR frame to short %d\n", + ch->is->name, ch->bch.rx_skb->len); + skb_trim(ch->bch.rx_skb, 0); + break; + } + skb_trim(ch->bch.rx_skb, ch->bch.rx_skb->len - 2); + recv_Bchannel(&ch->bch, 0); + } + break; + case ISDN_P_B_T30_FAX: + if (ch->state != STFAX_ACTIV) { + pr_debug("%s: isar_rcv_frame: not ACTIV\n", + ch->is->name); + ch->is->write_reg(ch->is->hw, ISAR_IIA, 0); + if (ch->bch.rx_skb) + skb_trim(ch->bch.rx_skb, 0); + break; + } + if (!ch->bch.rx_skb) { + ch->bch.rx_skb = mI_alloc_skb(ch->bch.maxlen, + GFP_ATOMIC); + if (unlikely(!ch->bch.rx_skb)) { + pr_info("%s: B receive out of memory\n", + __func__); + ch->is->write_reg(ch->is->hw, ISAR_IIA, 0); + break; + } + } + if (ch->cmd == PCTRL_CMD_FRM) { + rcv_mbox(ch->is, skb_put(ch->bch.rx_skb, ch->is->clsb)); + pr_debug("%s: isar_rcv_frame: %d\n", + ch->is->name, ch->bch.rx_skb->len); + if (ch->is->cmsb & SART_NMD) { /* ABORT */ + pr_debug("%s: isar_rcv_frame: no more data\n", + ch->is->name); + ch->is->write_reg(ch->is->hw, ISAR_IIA, 0); + send_mbox(ch->is, SET_DPS(ch->dpath) | + ISAR_HIS_PUMPCTRL, PCTRL_CMD_ESC, + 0, NULL); + ch->state = STFAX_ESCAPE; + /* set_skb_flag(skb, DF_NOMOREDATA); */ + } + recv_Bchannel(&ch->bch, 0); + if (ch->is->cmsb & SART_NMD) + deliver_status(ch, HW_MOD_NOCARR); + break; + } + if (ch->cmd != PCTRL_CMD_FRH) { + pr_debug("%s: isar_rcv_frame: unknown fax mode %x\n", + ch->is->name, ch->cmd); + ch->is->write_reg(ch->is->hw, ISAR_IIA, 0); + if (ch->bch.rx_skb) + skb_trim(ch->bch.rx_skb, 0); + break; + } + /* PCTRL_CMD_FRH */ + if ((ch->bch.rx_skb->len + ch->is->clsb) > + (ch->bch.maxlen + 2)) { + pr_info("%s: %s incoming packet too large\n", + ch->is->name, __func__); + ch->is->write_reg(ch->is->hw, ISAR_IIA, 0); + skb_trim(ch->bch.rx_skb, 0); + break; + } else if (ch->is->cmsb & HDLC_ERROR) { + pr_info("%s: ISAR frame error %x len %d\n", + ch->is->name, ch->is->cmsb, ch->is->clsb); + skb_trim(ch->bch.rx_skb, 0); + ch->is->write_reg(ch->is->hw, ISAR_IIA, 0); + break; + } + if (ch->is->cmsb & HDLC_FSD) + skb_trim(ch->bch.rx_skb, 0); + ptr = skb_put(ch->bch.rx_skb, ch->is->clsb); + rcv_mbox(ch->is, ptr); + if (ch->is->cmsb & HDLC_FED) { + if (ch->bch.rx_skb->len < 3) { /* last 2 are the FCS */ + pr_info("%s: ISAR frame to short %d\n", + ch->is->name, ch->bch.rx_skb->len); + skb_trim(ch->bch.rx_skb, 0); + break; + } + skb_trim(ch->bch.rx_skb, ch->bch.rx_skb->len - 2); + recv_Bchannel(&ch->bch, 0); + } + if (ch->is->cmsb & SART_NMD) { /* ABORT */ + pr_debug("%s: isar_rcv_frame: no more data\n", + ch->is->name); + ch->is->write_reg(ch->is->hw, ISAR_IIA, 0); + if (ch->bch.rx_skb) + skb_trim(ch->bch.rx_skb, 0); + send_mbox(ch->is, SET_DPS(ch->dpath) | + ISAR_HIS_PUMPCTRL, PCTRL_CMD_ESC, 0, NULL); + ch->state = STFAX_ESCAPE; + deliver_status(ch, HW_MOD_NOCARR); + } + break; + default: + pr_info("isar_rcv_frame protocol (%x)error\n", ch->bch.state); + ch->is->write_reg(ch->is->hw, ISAR_IIA, 0); + break; + } +} + +static void +isar_fill_fifo(struct isar_ch *ch) +{ + int count; + u8 msb; + u8 *ptr; + + pr_debug("%s: ch%d tx_skb %p tx_idx %d\n", + ch->is->name, ch->bch.nr, ch->bch.tx_skb, ch->bch.tx_idx); + if (!ch->bch.tx_skb) + return; + count = ch->bch.tx_skb->len - ch->bch.tx_idx; + if (count <= 0) + return; + if (!(ch->is->bstat & + (ch->dpath == 1 ? BSTAT_RDM1 : BSTAT_RDM2))) + return; + if (count > ch->mml) { + msb = 0; + count = ch->mml; + } else { + msb = HDLC_FED; + } + ptr = ch->bch.tx_skb->data + ch->bch.tx_idx; + if (!ch->bch.tx_idx) { + pr_debug("%s: frame start\n", ch->is->name); + if ((ch->bch.state == ISDN_P_B_T30_FAX) && + (ch->cmd == PCTRL_CMD_FTH)) { + if (count > 1) { + if ((ptr[0] == 0xff) && (ptr[1] == 0x13)) { + /* last frame */ + test_and_set_bit(FLG_LASTDATA, + &ch->bch.Flags); + pr_debug("%s: set LASTDATA\n", + ch->is->name); + if (msb == HDLC_FED) + test_and_set_bit(FLG_DLEETX, + &ch->bch.Flags); + } + } + } + msb |= HDLC_FST; + } + ch->bch.tx_idx += count; + switch (ch->bch.state) { + case ISDN_P_NONE: + pr_info("%s: wrong protocol 0\n", __func__); + break; + case ISDN_P_B_RAW: + case ISDN_P_B_L2DTMF: + case ISDN_P_B_MODEM_ASYNC: + send_mbox(ch->is, SET_DPS(ch->dpath) | ISAR_HIS_SDATA, + 0, count, ptr); + break; + case ISDN_P_B_HDLC: + send_mbox(ch->is, SET_DPS(ch->dpath) | ISAR_HIS_SDATA, + msb, count, ptr); + break; + case ISDN_P_B_T30_FAX: + if (ch->state != STFAX_ACTIV) + pr_debug("%s: not ACTIV\n", ch->is->name); + else if (ch->cmd == PCTRL_CMD_FTH) + send_mbox(ch->is, SET_DPS(ch->dpath) | ISAR_HIS_SDATA, + msb, count, ptr); + else if (ch->cmd == PCTRL_CMD_FTM) + send_mbox(ch->is, SET_DPS(ch->dpath) | ISAR_HIS_SDATA, + 0, count, ptr); + else + pr_debug("%s: not FTH/FTM\n", ch->is->name); + break; + default: + pr_info("%s: protocol(%x) error\n", + __func__, ch->bch.state); + break; + } +} + +static inline struct isar_ch * +sel_bch_isar(struct isar_hw *isar, u8 dpath) +{ + struct isar_ch *base = &isar->ch[0]; + + if ((!dpath) || (dpath > 2)) + return NULL; + if (base->dpath == dpath) + return base; + base++; + if (base->dpath == dpath) + return base; + return NULL; +} + +static void +send_next(struct isar_ch *ch) +{ + pr_debug("%s: %s ch%d tx_skb %p tx_idx %d\n", + ch->is->name, __func__, ch->bch.nr, + ch->bch.tx_skb, ch->bch.tx_idx); + if (ch->bch.state == ISDN_P_B_T30_FAX) { + if (ch->cmd == PCTRL_CMD_FTH) { + if (test_bit(FLG_LASTDATA, &ch->bch.Flags)) { + pr_debug("set NMD_DATA\n"); + test_and_set_bit(FLG_NMD_DATA, &ch->bch.Flags); + } + } else if (ch->cmd == PCTRL_CMD_FTM) { + if (test_bit(FLG_DLEETX, &ch->bch.Flags)) { + test_and_set_bit(FLG_LASTDATA, &ch->bch.Flags); + test_and_set_bit(FLG_NMD_DATA, &ch->bch.Flags); + } + } + } + if (ch->bch.tx_skb) { + /* send confirm, on trans, free on hdlc. */ + if (test_bit(FLG_TRANSPARENT, &ch->bch.Flags)) + confirm_Bsend(&ch->bch); + dev_kfree_skb(ch->bch.tx_skb); + } + if (get_next_bframe(&ch->bch)) + isar_fill_fifo(ch); + else { + if (test_and_clear_bit(FLG_DLEETX, &ch->bch.Flags)) { + if (test_and_clear_bit(FLG_LASTDATA, + &ch->bch.Flags)) { + if (test_and_clear_bit(FLG_NMD_DATA, + &ch->bch.Flags)) { + u8 zd = 0; + send_mbox(ch->is, SET_DPS(ch->dpath) | + ISAR_HIS_SDATA, 0x01, 1, &zd); + } + test_and_set_bit(FLG_LL_OK, &ch->bch.Flags); + } else { + deliver_status(ch, HW_MOD_CONNECT); + } + } + } +} + +static void +check_send(struct isar_hw *isar, u8 rdm) +{ + struct isar_ch *ch; + + pr_debug("%s: rdm %x\n", isar->name, rdm); + if (rdm & BSTAT_RDM1) { + ch = sel_bch_isar(isar, 1); + if (ch && test_bit(FLG_ACTIVE, &ch->bch.Flags)) { + if (ch->bch.tx_skb && (ch->bch.tx_skb->len > + ch->bch.tx_idx)) + isar_fill_fifo(ch); + else + send_next(ch); + } + } + if (rdm & BSTAT_RDM2) { + ch = sel_bch_isar(isar, 2); + if (ch && test_bit(FLG_ACTIVE, &ch->bch.Flags)) { + if (ch->bch.tx_skb && (ch->bch.tx_skb->len > + ch->bch.tx_idx)) + isar_fill_fifo(ch); + else + send_next(ch); + } + } +} + +const char *dmril[] = {"NO SPEED", "1200/75", "NODEF2", "75/1200", "NODEF4", + "300", "600", "1200", "2400", "4800", "7200", + "9600nt", "9600t", "12000", "14400", "WRONG"}; +const char *dmrim[] = {"NO MOD", "NO DEF", "V32/V32b", "V22", "V21", + "Bell103", "V23", "Bell202", "V17", "V29", "V27ter"}; + +static void +isar_pump_status_rsp(struct isar_ch *ch) { + u8 ril = ch->is->buf[0]; + u8 rim; + + if (!test_and_clear_bit(ISAR_RATE_REQ, &ch->is->Flags)) + return; + if (ril > 14) { + pr_info("%s: wrong pstrsp ril=%d\n", ch->is->name, ril); + ril = 15; + } + switch (ch->is->buf[1]) { + case 0: + rim = 0; + break; + case 0x20: + rim = 2; + break; + case 0x40: + rim = 3; + break; + case 0x41: + rim = 4; + break; + case 0x51: + rim = 5; + break; + case 0x61: + rim = 6; + break; + case 0x71: + rim = 7; + break; + case 0x82: + rim = 8; + break; + case 0x92: + rim = 9; + break; + case 0xa2: + rim = 10; + break; + default: + rim = 1; + break; + } + sprintf(ch->conmsg, "%s %s", dmril[ril], dmrim[rim]); + pr_debug("%s: pump strsp %s\n", ch->is->name, ch->conmsg); +} + +static void +isar_pump_statev_modem(struct isar_ch *ch, u8 devt) { + u8 dps = SET_DPS(ch->dpath); + + switch (devt) { + case PSEV_10MS_TIMER: + pr_debug("%s: pump stev TIMER\n", ch->is->name); + break; + case PSEV_CON_ON: + pr_debug("%s: pump stev CONNECT\n", ch->is->name); + deliver_status(ch, HW_MOD_CONNECT); + break; + case PSEV_CON_OFF: + pr_debug("%s: pump stev NO CONNECT\n", ch->is->name); + send_mbox(ch->is, dps | ISAR_HIS_PSTREQ, 0, 0, NULL); + deliver_status(ch, HW_MOD_NOCARR); + break; + case PSEV_V24_OFF: + pr_debug("%s: pump stev V24 OFF\n", ch->is->name); + break; + case PSEV_CTS_ON: + pr_debug("%s: pump stev CTS ON\n", ch->is->name); + break; + case PSEV_CTS_OFF: + pr_debug("%s pump stev CTS OFF\n", ch->is->name); + break; + case PSEV_DCD_ON: + pr_debug("%s: pump stev CARRIER ON\n", ch->is->name); + test_and_set_bit(ISAR_RATE_REQ, &ch->is->Flags); + send_mbox(ch->is, dps | ISAR_HIS_PSTREQ, 0, 0, NULL); + break; + case PSEV_DCD_OFF: + pr_debug("%s: pump stev CARRIER OFF\n", ch->is->name); + break; + case PSEV_DSR_ON: + pr_debug("%s: pump stev DSR ON\n", ch->is->name); + break; + case PSEV_DSR_OFF: + pr_debug("%s: pump stev DSR_OFF\n", ch->is->name); + break; + case PSEV_REM_RET: + pr_debug("%s: pump stev REMOTE RETRAIN\n", ch->is->name); + break; + case PSEV_REM_REN: + pr_debug("%s: pump stev REMOTE RENEGOTIATE\n", ch->is->name); + break; + case PSEV_GSTN_CLR: + pr_debug("%s: pump stev GSTN CLEAR\n", ch->is->name); + break; + default: + pr_info("u%s: nknown pump stev %x\n", ch->is->name, devt); + break; + } +} + +static void +isar_pump_statev_fax(struct isar_ch *ch, u8 devt) { + u8 dps = SET_DPS(ch->dpath); + u8 p1; + + switch (devt) { + case PSEV_10MS_TIMER: + pr_debug("%s: pump stev TIMER\n", ch->is->name); + break; + case PSEV_RSP_READY: + pr_debug("%s: pump stev RSP_READY\n", ch->is->name); + ch->state = STFAX_READY; + deliver_status(ch, HW_MOD_READY); +#ifdef AUTOCON + if (test_bit(BC_FLG_ORIG, &ch->bch.Flags)) + isar_pump_cmd(bch, HW_MOD_FRH, 3); + else + isar_pump_cmd(bch, HW_MOD_FTH, 3); +#endif + break; + case PSEV_LINE_TX_H: + if (ch->state == STFAX_LINE) { + pr_debug("%s: pump stev LINE_TX_H\n", ch->is->name); + ch->state = STFAX_CONT; + send_mbox(ch->is, dps | ISAR_HIS_PUMPCTRL, + PCTRL_CMD_CONT, 0, NULL); + } else { + pr_debug("%s: pump stev LINE_TX_H wrong st %x\n", + ch->is->name, ch->state); + } + break; + case PSEV_LINE_RX_H: + if (ch->state == STFAX_LINE) { + pr_debug("%s: pump stev LINE_RX_H\n", ch->is->name); + ch->state = STFAX_CONT; + send_mbox(ch->is, dps | ISAR_HIS_PUMPCTRL, + PCTRL_CMD_CONT, 0, NULL); + } else { + pr_debug("%s: pump stev LINE_RX_H wrong st %x\n", + ch->is->name, ch->state); + } + break; + case PSEV_LINE_TX_B: + if (ch->state == STFAX_LINE) { + pr_debug("%s: pump stev LINE_TX_B\n", ch->is->name); + ch->state = STFAX_CONT; + send_mbox(ch->is, dps | ISAR_HIS_PUMPCTRL, + PCTRL_CMD_CONT, 0, NULL); + } else { + pr_debug("%s: pump stev LINE_TX_B wrong st %x\n", + ch->is->name, ch->state); + } + break; + case PSEV_LINE_RX_B: + if (ch->state == STFAX_LINE) { + pr_debug("%s: pump stev LINE_RX_B\n", ch->is->name); + ch->state = STFAX_CONT; + send_mbox(ch->is, dps | ISAR_HIS_PUMPCTRL, + PCTRL_CMD_CONT, 0, NULL); + } else { + pr_debug("%s: pump stev LINE_RX_B wrong st %x\n", + ch->is->name, ch->state); + } + break; + case PSEV_RSP_CONN: + if (ch->state == STFAX_CONT) { + pr_debug("%s: pump stev RSP_CONN\n", ch->is->name); + ch->state = STFAX_ACTIV; + test_and_set_bit(ISAR_RATE_REQ, &ch->is->Flags); + send_mbox(ch->is, dps | ISAR_HIS_PSTREQ, 0, 0, NULL); + if (ch->cmd == PCTRL_CMD_FTH) { + int delay = (ch->mod == 3) ? 1000 : 200; + /* 1s (200 ms) Flags before data */ + if (test_and_set_bit(FLG_FTI_RUN, + &ch->bch.Flags)) + del_timer(&ch->ftimer); + ch->ftimer.expires = + jiffies + ((delay * HZ)/1000); + test_and_set_bit(FLG_LL_CONN, + &ch->bch.Flags); + add_timer(&ch->ftimer); + } else { + deliver_status(ch, HW_MOD_CONNECT); + } + } else { + pr_debug("%s: pump stev RSP_CONN wrong st %x\n", + ch->is->name, ch->state); + } + break; + case PSEV_FLAGS_DET: + pr_debug("%s: pump stev FLAGS_DET\n", ch->is->name); + break; + case PSEV_RSP_DISC: + pr_debug("%s: pump stev RSP_DISC state(%d)\n", + ch->is->name, ch->state); + if (ch->state == STFAX_ESCAPE) { + p1 = 5; + switch (ch->newcmd) { + case 0: + ch->state = STFAX_READY; + break; + case PCTRL_CMD_FTM: + p1 = 2; + case PCTRL_CMD_FTH: + send_mbox(ch->is, dps | ISAR_HIS_PUMPCTRL, + PCTRL_CMD_SILON, 1, &p1); + ch->state = STFAX_SILDET; + break; + case PCTRL_CMD_FRH: + case PCTRL_CMD_FRM: + ch->mod = ch->newmod; + p1 = ch->newmod; + ch->newmod = 0; + ch->cmd = ch->newcmd; + ch->newcmd = 0; + send_mbox(ch->is, dps | ISAR_HIS_PUMPCTRL, + ch->cmd, 1, &p1); + ch->state = STFAX_LINE; + ch->try_mod = 3; + break; + default: + pr_debug("%s: RSP_DISC unknown newcmd %x\n", + ch->is->name, ch->newcmd); + break; + } + } else if (ch->state == STFAX_ACTIV) { + if (test_and_clear_bit(FLG_LL_OK, &ch->bch.Flags)) + deliver_status(ch, HW_MOD_OK); + else if (ch->cmd == PCTRL_CMD_FRM) + deliver_status(ch, HW_MOD_NOCARR); + else + deliver_status(ch, HW_MOD_FCERROR); + ch->state = STFAX_READY; + } else if (ch->state != STFAX_SILDET) { + /* ignore in STFAX_SILDET */ + ch->state = STFAX_READY; + deliver_status(ch, HW_MOD_FCERROR); + } + break; + case PSEV_RSP_SILDET: + pr_debug("%s: pump stev RSP_SILDET\n", ch->is->name); + if (ch->state == STFAX_SILDET) { + ch->mod = ch->newmod; + p1 = ch->newmod; + ch->newmod = 0; + ch->cmd = ch->newcmd; + ch->newcmd = 0; + send_mbox(ch->is, dps | ISAR_HIS_PUMPCTRL, + ch->cmd, 1, &p1); + ch->state = STFAX_LINE; + ch->try_mod = 3; + } + break; + case PSEV_RSP_SILOFF: + pr_debug("%s: pump stev RSP_SILOFF\n", ch->is->name); + break; + case PSEV_RSP_FCERR: + if (ch->state == STFAX_LINE) { + pr_debug("%s: pump stev RSP_FCERR try %d\n", + ch->is->name, ch->try_mod); + if (ch->try_mod--) { + send_mbox(ch->is, dps | ISAR_HIS_PUMPCTRL, + ch->cmd, 1, &ch->mod); + break; + } + } + pr_debug("%s: pump stev RSP_FCERR\n", ch->is->name); + ch->state = STFAX_ESCAPE; + send_mbox(ch->is, dps | ISAR_HIS_PUMPCTRL, PCTRL_CMD_ESC, + 0, NULL); + deliver_status(ch, HW_MOD_FCERROR); + break; + default: + break; + } +} + +void +mISDNisar_irq(struct isar_hw *isar) +{ + struct isar_ch *ch; + + get_irq_infos(isar); + switch (isar->iis & ISAR_IIS_MSCMSD) { + case ISAR_IIS_RDATA: + ch = sel_bch_isar(isar, isar->iis >> 6); + if (ch) + isar_rcv_frame(ch); + else { + pr_debug("%s: ISAR spurious IIS_RDATA %x/%x/%x\n", + isar->name, isar->iis, isar->cmsb, + isar->clsb); + isar->write_reg(isar->hw, ISAR_IIA, 0); + } + break; + case ISAR_IIS_GSTEV: + isar->write_reg(isar->hw, ISAR_IIA, 0); + isar->bstat |= isar->cmsb; + check_send(isar, isar->cmsb); + break; + case ISAR_IIS_BSTEV: +#ifdef ERROR_STATISTIC + ch = sel_bch_isar(isar, isar->iis >> 6); + if (ch) { + if (isar->cmsb == BSTEV_TBO) + ch->bch.err_tx++; + if (isar->cmsb == BSTEV_RBO) + ch->bch.err_rdo++; + } +#endif + pr_debug("%s: Buffer STEV dpath%d msb(%x)\n", + isar->name, isar->iis>>6, isar->cmsb); + isar->write_reg(isar->hw, ISAR_IIA, 0); + break; + case ISAR_IIS_PSTEV: + ch = sel_bch_isar(isar, isar->iis >> 6); + if (ch) { + rcv_mbox(isar, NULL); + if (ch->bch.state == ISDN_P_B_MODEM_ASYNC) + isar_pump_statev_modem(ch, isar->cmsb); + else if (ch->bch.state == ISDN_P_B_T30_FAX) + isar_pump_statev_fax(ch, isar->cmsb); + else if (ch->bch.state == ISDN_P_B_RAW) { + int tt; + tt = isar->cmsb | 0x30; + if (tt == 0x3e) + tt = '*'; + else if (tt == 0x3f) + tt = '#'; + else if (tt > '9') + tt += 7; + tt |= DTMF_TONE_VAL; + _queue_data(&ch->bch.ch, PH_CONTROL_IND, + MISDN_ID_ANY, sizeof(tt), &tt, + GFP_ATOMIC); + } else + pr_debug("%s: ISAR IIS_PSTEV pm %d sta %x\n", + isar->name, ch->bch.state, + isar->cmsb); + } else { + pr_debug("%s: ISAR spurious IIS_PSTEV %x/%x/%x\n", + isar->name, isar->iis, isar->cmsb, + isar->clsb); + isar->write_reg(isar->hw, ISAR_IIA, 0); + } + break; + case ISAR_IIS_PSTRSP: + ch = sel_bch_isar(isar, isar->iis >> 6); + if (ch) { + rcv_mbox(isar, NULL); + isar_pump_status_rsp(ch); + } else { + pr_debug("%s: ISAR spurious IIS_PSTRSP %x/%x/%x\n", + isar->name, isar->iis, isar->cmsb, + isar->clsb); + isar->write_reg(isar->hw, ISAR_IIA, 0); + } + break; + case ISAR_IIS_DIAG: + case ISAR_IIS_BSTRSP: + case ISAR_IIS_IOM2RSP: + rcv_mbox(isar, NULL); + break; + case ISAR_IIS_INVMSG: + rcv_mbox(isar, NULL); + pr_debug("%s: invalid msg his:%x\n", isar->name, isar->cmsb); + break; + default: + rcv_mbox(isar, NULL); + pr_debug("%s: unhandled msg iis(%x) ctrl(%x/%x)\n", + isar->name, isar->iis, isar->cmsb, isar->clsb); + break; + } +} +EXPORT_SYMBOL(mISDNisar_irq); + +static void +ftimer_handler(unsigned long data) +{ + struct isar_ch *ch = (struct isar_ch *)data; + + pr_debug("%s: ftimer flags %lx\n", ch->is->name, ch->bch.Flags); + test_and_clear_bit(FLG_FTI_RUN, &ch->bch.Flags); + if (test_and_clear_bit(FLG_LL_CONN, &ch->bch.Flags)) + deliver_status(ch, HW_MOD_CONNECT); +} + +static void +setup_pump(struct isar_ch *ch) { + u8 dps = SET_DPS(ch->dpath); + u8 ctrl, param[6]; + + switch (ch->bch.state) { + case ISDN_P_NONE: + case ISDN_P_B_RAW: + case ISDN_P_B_HDLC: + send_mbox(ch->is, dps | ISAR_HIS_PUMPCFG, PMOD_BYPASS, 0, NULL); + break; + case ISDN_P_B_L2DTMF: + if (test_bit(FLG_DTMFSEND, &ch->bch.Flags)) { + param[0] = 5; /* TOA 5 db */ + send_mbox(ch->is, dps | ISAR_HIS_PUMPCFG, + PMOD_DTMF_TRANS, 1, param); + } else { + param[0] = 40; /* REL -46 dbm */ + send_mbox(ch->is, dps | ISAR_HIS_PUMPCFG, + PMOD_DTMF, 1, param); + } + case ISDN_P_B_MODEM_ASYNC: + ctrl = PMOD_DATAMODEM; + if (test_bit(FLG_ORIGIN, &ch->bch.Flags)) { + ctrl |= PCTRL_ORIG; + param[5] = PV32P6_CTN; + } else { + param[5] = PV32P6_ATN; + } + param[0] = 6; /* 6 db */ + param[1] = PV32P2_V23R | PV32P2_V22A | PV32P2_V22B | + PV32P2_V22C | PV32P2_V21 | PV32P2_BEL; + param[2] = PV32P3_AMOD | PV32P3_V32B | PV32P3_V23B; + param[3] = PV32P4_UT144; + param[4] = PV32P5_UT144; + send_mbox(ch->is, dps | ISAR_HIS_PUMPCFG, ctrl, 6, param); + break; + case ISDN_P_B_T30_FAX: + ctrl = PMOD_FAX; + if (test_bit(FLG_ORIGIN, &ch->bch.Flags)) { + ctrl |= PCTRL_ORIG; + param[1] = PFAXP2_CTN; + } else { + param[1] = PFAXP2_ATN; + } + param[0] = 6; /* 6 db */ + send_mbox(ch->is, dps | ISAR_HIS_PUMPCFG, ctrl, 2, param); + ch->state = STFAX_NULL; + ch->newcmd = 0; + ch->newmod = 0; + test_and_set_bit(FLG_FTI_RUN, &ch->bch.Flags); + break; + } + udelay(1000); + send_mbox(ch->is, dps | ISAR_HIS_PSTREQ, 0, 0, NULL); + udelay(1000); +} + +static void +setup_sart(struct isar_ch *ch) { + u8 dps = SET_DPS(ch->dpath); + u8 ctrl, param[2] = {0, 0}; + + switch (ch->bch.state) { + case ISDN_P_NONE: + send_mbox(ch->is, dps | ISAR_HIS_SARTCFG, SMODE_DISABLE, + 0, NULL); + break; + case ISDN_P_B_RAW: + case ISDN_P_B_L2DTMF: + send_mbox(ch->is, dps | ISAR_HIS_SARTCFG, SMODE_BINARY, + 2, param); + break; + case ISDN_P_B_HDLC: + case ISDN_P_B_T30_FAX: + send_mbox(ch->is, dps | ISAR_HIS_SARTCFG, SMODE_HDLC, + 1, param); + break; + case ISDN_P_B_MODEM_ASYNC: + ctrl = SMODE_V14 | SCTRL_HDMC_BOTH; + param[0] = S_P1_CHS_8; + param[1] = S_P2_BFT_DEF; + send_mbox(ch->is, dps | ISAR_HIS_SARTCFG, ctrl, 2, param); + break; + } + udelay(1000); + send_mbox(ch->is, dps | ISAR_HIS_BSTREQ, 0, 0, NULL); + udelay(1000); +} + +static void +setup_iom2(struct isar_ch *ch) { + u8 dps = SET_DPS(ch->dpath); + u8 cmsb = IOM_CTRL_ENA, msg[5] = {IOM_P1_TXD, 0, 0, 0, 0}; + + if (ch->bch.nr == 2) { + msg[1] = 1; + msg[3] = 1; + } + switch (ch->bch.state) { + case ISDN_P_NONE: + cmsb = 0; + /* dummy slot */ + msg[1] = ch->dpath + 2; + msg[3] = ch->dpath + 2; + break; + case ISDN_P_B_RAW: + case ISDN_P_B_HDLC: + break; + case ISDN_P_B_MODEM_ASYNC: + case ISDN_P_B_T30_FAX: + cmsb |= IOM_CTRL_RCV; + case ISDN_P_B_L2DTMF: + if (test_bit(FLG_DTMFSEND, &ch->bch.Flags)) + cmsb |= IOM_CTRL_RCV; + cmsb |= IOM_CTRL_ALAW; + break; + } + send_mbox(ch->is, dps | ISAR_HIS_IOM2CFG, cmsb, 5, msg); + udelay(1000); + send_mbox(ch->is, dps | ISAR_HIS_IOM2REQ, 0, 0, NULL); + udelay(1000); +} + +static int +modeisar(struct isar_ch *ch, u32 bprotocol) +{ + /* Here we are selecting the best datapath for requested protocol */ + if (ch->bch.state == ISDN_P_NONE) { /* New Setup */ + switch (bprotocol) { + case ISDN_P_NONE: /* init */ + if (!ch->dpath) + /* no init for dpath 0 */ + return 0; + test_and_clear_bit(FLG_HDLC, &ch->bch.Flags); + test_and_clear_bit(FLG_TRANSPARENT, &ch->bch.Flags); + break; + case ISDN_P_B_RAW: + case ISDN_P_B_HDLC: + /* best is datapath 2 */ + if (!test_and_set_bit(ISAR_DP2_USE, &ch->is->Flags)) + ch->dpath = 2; + else if (!test_and_set_bit(ISAR_DP1_USE, + &ch->is->Flags)) + ch->dpath = 1; + else { + pr_info("modeisar both pathes in use\n"); + return -EBUSY; + } + if (bprotocol == ISDN_P_B_HDLC) + test_and_set_bit(FLG_HDLC, &ch->bch.Flags); + else + test_and_set_bit(FLG_TRANSPARENT, + &ch->bch.Flags); + break; + case ISDN_P_B_MODEM_ASYNC: + case ISDN_P_B_T30_FAX: + case ISDN_P_B_L2DTMF: + /* only datapath 1 */ + if (!test_and_set_bit(ISAR_DP1_USE, &ch->is->Flags)) + ch->dpath = 1; + else { + pr_info("%s: ISAR modeisar analog functions" + "only with DP1\n", ch->is->name); + return -EBUSY; + } + break; + default: + pr_info("%s: protocol not known %x\n", ch->is->name, + bprotocol); + return -ENOPROTOOPT; + } + } + pr_debug("%s: ISAR ch%d dp%d protocol %x->%x\n", ch->is->name, + ch->bch.nr, ch->dpath, ch->bch.state, bprotocol); + ch->bch.state = bprotocol; + setup_pump(ch); + setup_iom2(ch); + setup_sart(ch); + if (ch->bch.state == ISDN_P_NONE) { + /* Clear resources */ + if (ch->dpath == 1) + test_and_clear_bit(ISAR_DP1_USE, &ch->is->Flags); + else if (ch->dpath == 2) + test_and_clear_bit(ISAR_DP2_USE, &ch->is->Flags); + ch->dpath = 0; + ch->is->ctrl(ch->is->hw, HW_DEACT_IND, ch->bch.nr); + } else + ch->is->ctrl(ch->is->hw, HW_ACTIVATE_IND, ch->bch.nr); + return 0; +} + +static void +isar_pump_cmd(struct isar_ch *ch, u32 cmd, u8 para) +{ + u8 dps = SET_DPS(ch->dpath); + u8 ctrl = 0, nom = 0, p1 = 0; + + pr_debug("%s: isar_pump_cmd %x/%x state(%x)\n", + ch->is->name, cmd, para, ch->bch.state); + switch (cmd) { + case HW_MOD_FTM: + if (ch->state == STFAX_READY) { + p1 = para; + ctrl = PCTRL_CMD_FTM; + nom = 1; + ch->state = STFAX_LINE; + ch->cmd = ctrl; + ch->mod = para; + ch->newmod = 0; + ch->newcmd = 0; + ch->try_mod = 3; + } else if ((ch->state == STFAX_ACTIV) && + (ch->cmd == PCTRL_CMD_FTM) && (ch->mod == para)) + deliver_status(ch, HW_MOD_CONNECT); + else { + ch->newmod = para; + ch->newcmd = PCTRL_CMD_FTM; + nom = 0; + ctrl = PCTRL_CMD_ESC; + ch->state = STFAX_ESCAPE; + } + break; + case HW_MOD_FTH: + if (ch->state == STFAX_READY) { + p1 = para; + ctrl = PCTRL_CMD_FTH; + nom = 1; + ch->state = STFAX_LINE; + ch->cmd = ctrl; + ch->mod = para; + ch->newmod = 0; + ch->newcmd = 0; + ch->try_mod = 3; + } else if ((ch->state == STFAX_ACTIV) && + (ch->cmd == PCTRL_CMD_FTH) && (ch->mod == para)) + deliver_status(ch, HW_MOD_CONNECT); + else { + ch->newmod = para; + ch->newcmd = PCTRL_CMD_FTH; + nom = 0; + ctrl = PCTRL_CMD_ESC; + ch->state = STFAX_ESCAPE; + } + break; + case HW_MOD_FRM: + if (ch->state == STFAX_READY) { + p1 = para; + ctrl = PCTRL_CMD_FRM; + nom = 1; + ch->state = STFAX_LINE; + ch->cmd = ctrl; + ch->mod = para; + ch->newmod = 0; + ch->newcmd = 0; + ch->try_mod = 3; + } else if ((ch->state == STFAX_ACTIV) && + (ch->cmd == PCTRL_CMD_FRM) && (ch->mod == para)) + deliver_status(ch, HW_MOD_CONNECT); + else { + ch->newmod = para; + ch->newcmd = PCTRL_CMD_FRM; + nom = 0; + ctrl = PCTRL_CMD_ESC; + ch->state = STFAX_ESCAPE; + } + break; + case HW_MOD_FRH: + if (ch->state == STFAX_READY) { + p1 = para; + ctrl = PCTRL_CMD_FRH; + nom = 1; + ch->state = STFAX_LINE; + ch->cmd = ctrl; + ch->mod = para; + ch->newmod = 0; + ch->newcmd = 0; + ch->try_mod = 3; + } else if ((ch->state == STFAX_ACTIV) && + (ch->cmd == PCTRL_CMD_FRH) && (ch->mod == para)) + deliver_status(ch, HW_MOD_CONNECT); + else { + ch->newmod = para; + ch->newcmd = PCTRL_CMD_FRH; + nom = 0; + ctrl = PCTRL_CMD_ESC; + ch->state = STFAX_ESCAPE; + } + break; + case PCTRL_CMD_TDTMF: + p1 = para; + nom = 1; + ctrl = PCTRL_CMD_TDTMF; + break; + } + if (ctrl) + send_mbox(ch->is, dps | ISAR_HIS_PUMPCTRL, ctrl, nom, &p1); +} + +static void +isar_setup(struct isar_hw *isar) +{ + u8 msg; + int i; + + /* Dpath 1, 2 */ + msg = 61; + for (i = 0; i < 2; i++) { + /* Buffer Config */ + send_mbox(isar, (i ? ISAR_HIS_DPS2 : ISAR_HIS_DPS1) | + ISAR_HIS_P12CFG, 4, 1, &msg); + isar->ch[i].mml = msg; + isar->ch[i].bch.state = 0; + isar->ch[i].dpath = i + 1; + modeisar(&isar->ch[i], ISDN_P_NONE); + } +} + +static int +isar_l2l1(struct mISDNchannel *ch, struct sk_buff *skb) +{ + struct bchannel *bch = container_of(ch, struct bchannel, ch); + struct isar_ch *ich = container_of(bch, struct isar_ch, bch); + int ret = -EINVAL; + struct mISDNhead *hh = mISDN_HEAD_P(skb); + u32 id, *val; + u_long flags; + + switch (hh->prim) { + case PH_DATA_REQ: + spin_lock_irqsave(ich->is->hwlock, flags); + ret = bchannel_senddata(bch, skb); + if (ret > 0) { /* direct TX */ + id = hh->id; /* skb can be freed */ + ret = 0; + isar_fill_fifo(ich); + spin_unlock_irqrestore(ich->is->hwlock, flags); + if (!test_bit(FLG_TRANSPARENT, &bch->Flags)) + queue_ch_frame(ch, PH_DATA_CNF, id, NULL); + } else + spin_unlock_irqrestore(ich->is->hwlock, flags); + return ret; + case PH_ACTIVATE_REQ: + spin_lock_irqsave(ich->is->hwlock, flags); + if (!test_and_set_bit(FLG_ACTIVE, &bch->Flags)) + ret = modeisar(ich, ch->protocol); + else + ret = 0; + spin_unlock_irqrestore(ich->is->hwlock, flags); + if (!ret) + _queue_data(ch, PH_ACTIVATE_IND, MISDN_ID_ANY, 0, + NULL, GFP_KERNEL); + break; + case PH_DEACTIVATE_REQ: + spin_lock_irqsave(ich->is->hwlock, flags); + mISDN_clear_bchannel(bch); + modeisar(ich, ISDN_P_NONE); + spin_unlock_irqrestore(ich->is->hwlock, flags); + _queue_data(ch, PH_DEACTIVATE_IND, MISDN_ID_ANY, 0, + NULL, GFP_KERNEL); + ret = 0; + break; + case PH_CONTROL_REQ: + val = (u32 *)skb->data; + pr_debug("%s: PH_CONTROL | REQUEST %x/%x\n", ich->is->name, + hh->id, *val); + if ((hh->id == 0) && ((*val & ~DTMF_TONE_MASK) == + DTMF_TONE_VAL)) { + if (bch->state == ISDN_P_B_L2DTMF) { + char tt = *val & DTMF_TONE_MASK; + + if (tt == '*') + tt = 0x1e; + else if (tt == '#') + tt = 0x1f; + else if (tt > '9') + tt -= 7; + tt &= 0x1f; + spin_lock_irqsave(ich->is->hwlock, flags); + isar_pump_cmd(ich, PCTRL_CMD_TDTMF, tt); + spin_unlock_irqrestore(ich->is->hwlock, flags); + } else { + pr_info("%s: DTMF send wrong protocol %x\n", + __func__, bch->state); + return -EINVAL; + } + } else if ((hh->id == HW_MOD_FRM) || (hh->id == HW_MOD_FRH) || + (hh->id == HW_MOD_FTM) || (hh->id == HW_MOD_FTH)) { + for (id = 0; id < FAXMODCNT; id++) + if (faxmodulation[id] == *val) + break; + if ((FAXMODCNT > id) && + test_bit(FLG_INITIALIZED, &bch->Flags)) { + pr_debug("%s: isar: new mod\n", ich->is->name); + isar_pump_cmd(ich, hh->id, *val); + ret = 0; + } else { + pr_info("%s: wrong modulation\n", + ich->is->name); + ret = -EINVAL; + } + } else if (hh->id == HW_MOD_LASTDATA) + test_and_set_bit(FLG_DLEETX, &bch->Flags); + else { + pr_info("%s: unknown PH_CONTROL_REQ %x\n", + ich->is->name, hh->id); + ret = -EINVAL; + } + default: + pr_info("%s: %s unknown prim(%x,%x)\n", + ich->is->name, __func__, hh->prim, hh->id); + ret = -EINVAL; + } + if (!ret) + dev_kfree_skb(skb); + return ret; +} + +static int +channel_bctrl(struct bchannel *bch, struct mISDN_ctrl_req *cq) +{ + int ret = 0; + + switch (cq->op) { + case MISDN_CTRL_GETOP: + cq->op = 0; + break; + /* Nothing implemented yet */ + case MISDN_CTRL_FILL_EMPTY: + default: + pr_info("%s: unknown Op %x\n", __func__, cq->op); + ret = -EINVAL; + break; + } + return ret; +} + +static int +isar_bctrl(struct mISDNchannel *ch, u32 cmd, void *arg) +{ + struct bchannel *bch = container_of(ch, struct bchannel, ch); + struct isar_ch *ich = container_of(bch, struct isar_ch, bch); + int ret = -EINVAL; + u_long flags; + + pr_debug("%s: %s cmd:%x %p\n", ich->is->name, __func__, cmd, arg); + switch (cmd) { + case CLOSE_CHANNEL: + test_and_clear_bit(FLG_OPEN, &bch->Flags); + if (test_bit(FLG_ACTIVE, &bch->Flags)) { + spin_lock_irqsave(ich->is->hwlock, flags); + mISDN_freebchannel(bch); + modeisar(ich, ISDN_P_NONE); + spin_unlock_irqrestore(ich->is->hwlock, flags); + } else { + skb_queue_purge(&bch->rqueue); + bch->rcount = 0; + } + ch->protocol = ISDN_P_NONE; + ch->peer = NULL; + module_put(ich->is->owner); + ret = 0; + break; + case CONTROL_CHANNEL: + ret = channel_bctrl(bch, arg); + break; + default: + pr_info("%s: %s unknown prim(%x)\n", + ich->is->name, __func__, cmd); + } + return ret; +} + +static void +free_isar(struct isar_hw *isar) +{ + modeisar(&isar->ch[0], ISDN_P_NONE); + modeisar(&isar->ch[1], ISDN_P_NONE); + del_timer(&isar->ch[0].ftimer); + del_timer(&isar->ch[1].ftimer); + test_and_clear_bit(FLG_INITIALIZED, &isar->ch[0].bch.Flags); + test_and_clear_bit(FLG_INITIALIZED, &isar->ch[1].bch.Flags); +} + +static int +init_isar(struct isar_hw *isar) +{ + int cnt = 3; + + while (cnt--) { + isar->version = ISARVersion(isar); + if (isar->ch[0].bch.debug & DEBUG_HW) + pr_notice("%s: Testing version %d (%d time)\n", + isar->name, isar->version, 3 - cnt); + if (isar->version == 1) + break; + isar->ctrl(isar->hw, HW_RESET_REQ, 0); + } + if (isar->version != 1) + return -EINVAL; + isar->ch[0].ftimer.function = &ftimer_handler; + isar->ch[0].ftimer.data = (long)&isar->ch[0]; + init_timer(&isar->ch[0].ftimer); + test_and_set_bit(FLG_INITIALIZED, &isar->ch[0].bch.Flags); + isar->ch[1].ftimer.function = &ftimer_handler; + isar->ch[1].ftimer.data = (long)&isar->ch[1]; + init_timer(&isar->ch[1].ftimer); + test_and_set_bit(FLG_INITIALIZED, &isar->ch[1].bch.Flags); + return 0; +} + +static int +isar_open(struct isar_hw *isar, struct channel_req *rq) +{ + struct bchannel *bch; + + if (rq->adr.channel > 2) + return -EINVAL; + if (rq->protocol == ISDN_P_NONE) + return -EINVAL; + bch = &isar->ch[rq->adr.channel - 1].bch; + if (test_and_set_bit(FLG_OPEN, &bch->Flags)) + return -EBUSY; /* b-channel can be only open once */ + test_and_clear_bit(FLG_FILLEMPTY, &bch->Flags); + bch->ch.protocol = rq->protocol; + rq->ch = &bch->ch; + return 0; +} + +u32 +mISDNisar_init(struct isar_hw *isar, void *hw) +{ + u32 ret, i; + + isar->hw = hw; + for (i = 0; i < 2; i++) { + isar->ch[i].bch.nr = i + 1; + mISDN_initbchannel(&isar->ch[i].bch, MAX_DATA_MEM); + isar->ch[i].bch.ch.nr = i + 1; + isar->ch[i].bch.ch.send = &isar_l2l1; + isar->ch[i].bch.ch.ctrl = isar_bctrl; + isar->ch[i].bch.hw = hw; + isar->ch[i].is = isar; + } + + isar->init = &init_isar; + isar->release = &free_isar; + isar->firmware = &load_firmware; + isar->open = &isar_open; + + ret = (1 << (ISDN_P_B_RAW & ISDN_P_B_MASK)) | + (1 << (ISDN_P_B_HDLC & ISDN_P_B_MASK)) | + (1 << (ISDN_P_B_L2DTMF & ISDN_P_B_MASK)) | + (1 << (ISDN_P_B_MODEM_ASYNC & ISDN_P_B_MASK)) | + (1 << (ISDN_P_B_T30_FAX & ISDN_P_B_MASK)); + + return ret; +} +EXPORT_SYMBOL(mISDNisar_init); + +static int isar_mod_init(void) +{ + pr_notice("mISDN: ISAR driver Rev. %s\n", ISAR_REV); + return 0; +} + +static void isar_mod_cleanup(void) +{ + pr_notice("mISDN: ISAR module unloaded\n"); +} +module_init(isar_mod_init); +module_exit(isar_mod_cleanup); diff --git a/drivers/isdn/hardware/mISDN/speedfax.c b/drivers/isdn/hardware/mISDN/speedfax.c new file mode 100644 index 000000000000..ff3a4e290da3 --- /dev/null +++ b/drivers/isdn/hardware/mISDN/speedfax.c @@ -0,0 +1,526 @@ +/* + * speedfax.c low level stuff for Sedlbauer Speedfax+ cards + * based on the ISAR DSP + * Thanks to Sedlbauer AG for informations and HW + * + * Author Karsten Keil + * + * Copyright 2009 by Karsten Keil + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + +#include +#include +#include +#include +#include +#include "ipac.h" +#include "isar.h" + +#define SPEEDFAX_REV "2.0" + +#define PCI_SUBVENDOR_SPEEDFAX_PYRAMID 0x51 +#define PCI_SUBVENDOR_SPEEDFAX_PCI 0x54 +#define PCI_SUB_ID_SEDLBAUER 0x01 + +#define SFAX_PCI_ADDR 0xc8 +#define SFAX_PCI_ISAC 0xd0 +#define SFAX_PCI_ISAR 0xe0 + +/* TIGER 100 Registers */ + +#define TIGER_RESET_ADDR 0x00 +#define TIGER_EXTERN_RESET_ON 0x01 +#define TIGER_EXTERN_RESET_OFF 0x00 +#define TIGER_AUX_CTRL 0x02 +#define TIGER_AUX_DATA 0x03 +#define TIGER_AUX_IRQMASK 0x05 +#define TIGER_AUX_STATUS 0x07 + +/* Tiger AUX BITs */ +#define SFAX_AUX_IOMASK 0xdd /* 1 and 5 are inputs */ +#define SFAX_ISAR_RESET_BIT_OFF 0x00 +#define SFAX_ISAR_RESET_BIT_ON 0x01 +#define SFAX_TIGER_IRQ_BIT 0x02 +#define SFAX_LED1_BIT 0x08 +#define SFAX_LED2_BIT 0x10 + +#define SFAX_PCI_RESET_ON (SFAX_ISAR_RESET_BIT_ON) +#define SFAX_PCI_RESET_OFF (SFAX_LED1_BIT | SFAX_LED2_BIT) + +static int sfax_cnt; +static u32 debug; +static u32 irqloops = 4; + +struct sfax_hw { + struct list_head list; + struct pci_dev *pdev; + char name[MISDN_MAX_IDLEN]; + u32 irq; + u32 irqcnt; + u32 cfg; + struct _ioport p_isac; + struct _ioport p_isar; + u8 aux_data; + spinlock_t lock; /* HW access lock */ + struct isac_hw isac; + struct isar_hw isar; +}; + +static LIST_HEAD(Cards); +static DEFINE_RWLOCK(card_lock); /* protect Cards */ + +static void +_set_debug(struct sfax_hw *card) +{ + card->isac.dch.debug = debug; + card->isar.ch[0].bch.debug = debug; + card->isar.ch[1].bch.debug = debug; +} + +static int +set_debug(const char *val, struct kernel_param *kp) +{ + int ret; + struct sfax_hw *card; + + ret = param_set_uint(val, kp); + if (!ret) { + read_lock(&card_lock); + list_for_each_entry(card, &Cards, list) + _set_debug(card); + read_unlock(&card_lock); + } + return ret; +} + +MODULE_AUTHOR("Karsten Keil"); +MODULE_LICENSE("GPL v2"); +MODULE_VERSION(SPEEDFAX_REV); +module_param_call(debug, set_debug, param_get_uint, &debug, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(debug, "Speedfax debug mask"); +module_param(irqloops, uint, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(irqloops, "Speedfax maximal irqloops (default 4)"); + +IOFUNC_IND(ISAC, sfax_hw, p_isac) +IOFUNC_IND(ISAR, sfax_hw, p_isar) + +static irqreturn_t +speedfax_irq(int intno, void *dev_id) +{ + struct sfax_hw *sf = dev_id; + u8 val; + int cnt = irqloops; + + spin_lock(&sf->lock); + val = inb(sf->cfg + TIGER_AUX_STATUS); + if (val & SFAX_TIGER_IRQ_BIT) { /* for us or shared ? */ + spin_unlock(&sf->lock); + return IRQ_NONE; /* shared */ + } + sf->irqcnt++; + val = ReadISAR_IND(sf, ISAR_IRQBIT); +Start_ISAR: + if (val & ISAR_IRQSTA) + mISDNisar_irq(&sf->isar); + val = ReadISAC_IND(sf, ISAC_ISTA); + if (val) + mISDNisac_irq(&sf->isac, val); + val = ReadISAR_IND(sf, ISAR_IRQBIT); + if ((val & ISAR_IRQSTA) && cnt--) + goto Start_ISAR; + if (cnt < irqloops) + pr_debug("%s: %d irqloops cpu%d\n", sf->name, + irqloops - cnt, smp_processor_id()); + if (irqloops && !cnt) + pr_notice("%s: %d IRQ LOOP cpu%d\n", sf->name, + irqloops, smp_processor_id()); + spin_unlock(&sf->lock); + return IRQ_HANDLED; +} + +static void +enable_hwirq(struct sfax_hw *sf) +{ + WriteISAC_IND(sf, ISAC_MASK, 0); + WriteISAR_IND(sf, ISAR_IRQBIT, ISAR_IRQMSK); + outb(SFAX_TIGER_IRQ_BIT, sf->cfg + TIGER_AUX_IRQMASK); +} + +static void +disable_hwirq(struct sfax_hw *sf) +{ + WriteISAC_IND(sf, ISAC_MASK, 0xFF); + WriteISAR_IND(sf, ISAR_IRQBIT, 0); + outb(0, sf->cfg + TIGER_AUX_IRQMASK); +} + +static void +reset_speedfax(struct sfax_hw *sf) +{ + + pr_debug("%s: resetting card\n", sf->name); + outb(TIGER_EXTERN_RESET_ON, sf->cfg + TIGER_RESET_ADDR); + outb(SFAX_PCI_RESET_ON, sf->cfg + TIGER_AUX_DATA); + mdelay(1); + outb(TIGER_EXTERN_RESET_OFF, sf->cfg + TIGER_RESET_ADDR); + sf->aux_data = SFAX_PCI_RESET_OFF; + outb(sf->aux_data, sf->cfg + TIGER_AUX_DATA); + mdelay(1); +} + +static int +sfax_ctrl(struct sfax_hw *sf, u32 cmd, u_long arg) +{ + int ret = 0; + + switch (cmd) { + case HW_RESET_REQ: + reset_speedfax(sf); + break; + case HW_ACTIVATE_IND: + if (arg & 1) + sf->aux_data &= ~SFAX_LED1_BIT; + if (arg & 2) + sf->aux_data &= ~SFAX_LED2_BIT; + outb(sf->aux_data, sf->cfg + TIGER_AUX_DATA); + break; + case HW_DEACT_IND: + if (arg & 1) + sf->aux_data |= SFAX_LED1_BIT; + if (arg & 2) + sf->aux_data |= SFAX_LED2_BIT; + outb(sf->aux_data, sf->cfg + TIGER_AUX_DATA); + break; + default: + pr_info("%s: %s unknown command %x %lx\n", + sf->name, __func__, cmd, arg); + ret = -EINVAL; + break; + } + return ret; +} + +static int +channel_ctrl(struct sfax_hw *sf, struct mISDN_ctrl_req *cq) +{ + int ret = 0; + + switch (cq->op) { + case MISDN_CTRL_GETOP: + cq->op = MISDN_CTRL_LOOP; + break; + case MISDN_CTRL_LOOP: + /* cq->channel: 0 disable, 1 B1 loop 2 B2 loop, 3 both */ + if (cq->channel < 0 || cq->channel > 3) { + ret = -EINVAL; + break; + } + ret = sf->isac.ctrl(&sf->isac, HW_TESTLOOP, cq->channel); + break; + default: + pr_info("%s: unknown Op %x\n", sf->name, cq->op); + ret = -EINVAL; + break; + } + return ret; +} + +static int +sfax_dctrl(struct mISDNchannel *ch, u32 cmd, void *arg) +{ + struct mISDNdevice *dev = container_of(ch, struct mISDNdevice, D); + struct dchannel *dch = container_of(dev, struct dchannel, dev); + struct sfax_hw *sf = dch->hw; + struct channel_req *rq; + int err = 0; + + pr_debug("%s: cmd:%x %p\n", sf->name, cmd, arg); + switch (cmd) { + case OPEN_CHANNEL: + rq = arg; + if (rq->protocol == ISDN_P_TE_S0) + err = sf->isac.open(&sf->isac, rq); + else + err = sf->isar.open(&sf->isar, rq); + if (err) + break; + if (!try_module_get(THIS_MODULE)) + pr_info("%s: cannot get module\n", sf->name); + break; + case CLOSE_CHANNEL: + pr_debug("%s: dev(%d) close from %p\n", sf->name, + dch->dev.id, __builtin_return_address(0)); + module_put(THIS_MODULE); + break; + case CONTROL_CHANNEL: + err = channel_ctrl(sf, arg); + break; + default: + pr_debug("%s: unknown command %x\n", sf->name, cmd); + return -EINVAL; + } + return err; +} + +static int __devinit +init_card(struct sfax_hw *sf) +{ + int ret, cnt = 3; + u_long flags; + + ret = request_irq(sf->irq, speedfax_irq, IRQF_SHARED, sf->name, sf); + if (ret) { + pr_info("%s: couldn't get interrupt %d\n", sf->name, sf->irq); + return ret; + } + while (cnt--) { + spin_lock_irqsave(&sf->lock, flags); + ret = sf->isac.init(&sf->isac); + if (ret) { + spin_unlock_irqrestore(&sf->lock, flags); + pr_info("%s: ISAC init failed with %d\n", + sf->name, ret); + break; + } + enable_hwirq(sf); + /* RESET Receiver and Transmitter */ + WriteISAC_IND(sf, ISAC_CMDR, 0x41); + spin_unlock_irqrestore(&sf->lock, flags); + msleep_interruptible(10); + if (debug & DEBUG_HW) + pr_notice("%s: IRQ %d count %d\n", sf->name, + sf->irq, sf->irqcnt); + if (!sf->irqcnt) { + pr_info("%s: IRQ(%d) got no requests during init %d\n", + sf->name, sf->irq, 3 - cnt); + } else + return 0; + } + free_irq(sf->irq, sf); + return -EIO; +} + + +static int __devinit +setup_speedfax(struct sfax_hw *sf) +{ + u_long flags; + + if (!request_region(sf->cfg, 256, sf->name)) { + pr_info("mISDN: %s config port %x-%x already in use\n", + sf->name, sf->cfg, sf->cfg + 255); + return -EIO; + } + outb(0xff, sf->cfg); + outb(0, sf->cfg); + outb(0xdd, sf->cfg + TIGER_AUX_CTRL); + outb(0, sf->cfg + TIGER_AUX_IRQMASK); + + sf->isac.type = IPAC_TYPE_ISAC; + sf->p_isac.ale = sf->cfg + SFAX_PCI_ADDR; + sf->p_isac.port = sf->cfg + SFAX_PCI_ISAC; + sf->p_isar.ale = sf->cfg + SFAX_PCI_ADDR; + sf->p_isar.port = sf->cfg + SFAX_PCI_ISAR; + ASSIGN_FUNC(IND, ISAC, sf->isac); + ASSIGN_FUNC(IND, ISAR, sf->isar); + spin_lock_irqsave(&sf->lock, flags); + reset_speedfax(sf); + disable_hwirq(sf); + spin_unlock_irqrestore(&sf->lock, flags); + return 0; +} + +static void +release_card(struct sfax_hw *card) { + u_long flags; + + spin_lock_irqsave(&card->lock, flags); + disable_hwirq(card); + spin_unlock_irqrestore(&card->lock, flags); + card->isac.release(&card->isac); + free_irq(card->irq, card); + card->isar.release(&card->isar); + mISDN_unregister_device(&card->isac.dch.dev); + release_region(card->cfg, 256); + pci_disable_device(card->pdev); + pci_set_drvdata(card->pdev, NULL); + write_lock_irqsave(&card_lock, flags); + list_del(&card->list); + write_unlock_irqrestore(&card_lock, flags); + kfree(card); + sfax_cnt--; +} + +static int __devinit +setup_instance(struct sfax_hw *card) +{ + const struct firmware *firmware; + int i, err; + u_long flags; + + snprintf(card->name, MISDN_MAX_IDLEN - 1, "Speedfax.%d", sfax_cnt + 1); + write_lock_irqsave(&card_lock, flags); + list_add_tail(&card->list, &Cards); + write_unlock_irqrestore(&card_lock, flags); + _set_debug(card); + spin_lock_init(&card->lock); + card->isac.hwlock = &card->lock; + card->isar.hwlock = &card->lock; + card->isar.ctrl = (void *)&sfax_ctrl; + card->isac.name = card->name; + card->isar.name = card->name; + card->isar.owner = THIS_MODULE; + + err = request_firmware(&firmware, "isdn/ISAR.BIN", &card->pdev->dev); + if (err < 0) { + pr_info("%s: firmware request failed %d\n", + card->name, err); + goto error_fw; + } + if (debug & DEBUG_HW) + pr_notice("%s: got firmware %zu bytes\n", + card->name, firmware->size); + + mISDNisac_init(&card->isac, card); + + card->isac.dch.dev.D.ctrl = sfax_dctrl; + card->isac.dch.dev.Bprotocols = + mISDNisar_init(&card->isar, card); + for (i = 0; i < 2; i++) { + set_channelmap(i + 1, card->isac.dch.dev.channelmap); + list_add(&card->isar.ch[i].bch.ch.list, + &card->isac.dch.dev.bchannels); + } + + err = setup_speedfax(card); + if (err) + goto error_setup; + err = card->isar.init(&card->isar); + if (err) + goto error; + err = mISDN_register_device(&card->isac.dch.dev, + &card->pdev->dev, card->name); + if (err) + goto error; + err = init_card(card); + if (err) + goto error_init; + err = card->isar.firmware(&card->isar, firmware->data, firmware->size); + if (!err) { + release_firmware(firmware); + sfax_cnt++; + pr_notice("SpeedFax %d cards installed\n", sfax_cnt); + return 0; + } + disable_hwirq(card); + free_irq(card->irq, card); +error_init: + mISDN_unregister_device(&card->isac.dch.dev); +error: + release_region(card->cfg, 256); +error_setup: + card->isac.release(&card->isac); + card->isar.release(&card->isar); + release_firmware(firmware); +error_fw: + pci_disable_device(card->pdev); + write_lock_irqsave(&card_lock, flags); + list_del(&card->list); + write_unlock_irqrestore(&card_lock, flags); + kfree(card); + return err; +} + +static int __devinit +sfaxpci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) +{ + int err = -ENOMEM; + struct sfax_hw *card = kzalloc(sizeof(struct sfax_hw), GFP_KERNEL); + + if (!card) { + pr_info("No memory for Speedfax+ PCI\n"); + return err; + } + card->pdev = pdev; + err = pci_enable_device(pdev); + if (err) { + kfree(card); + return err; + } + + pr_notice("mISDN: Speedfax found adapter %s at %s\n", + (char *)ent->driver_data, pci_name(pdev)); + + card->cfg = pci_resource_start(pdev, 0); + card->irq = pdev->irq; + pci_set_drvdata(pdev, card); + err = setup_instance(card); + if (err) + pci_set_drvdata(pdev, NULL); + return err; +} + +static void __devexit +sfax_remove_pci(struct pci_dev *pdev) +{ + struct sfax_hw *card = pci_get_drvdata(pdev); + + if (card) + release_card(card); + else + pr_debug("%s: drvdata allready removed\n", __func__); +} + +static struct pci_device_id sfaxpci_ids[] __devinitdata = { + { PCI_VENDOR_ID_TIGERJET, PCI_DEVICE_ID_TIGERJET_100, + PCI_SUBVENDOR_SPEEDFAX_PYRAMID, PCI_SUB_ID_SEDLBAUER, + 0, 0, (unsigned long) "Pyramid Speedfax + PCI" + }, + { PCI_VENDOR_ID_TIGERJET, PCI_DEVICE_ID_TIGERJET_100, + PCI_SUBVENDOR_SPEEDFAX_PCI, PCI_SUB_ID_SEDLBAUER, + 0, 0, (unsigned long) "Sedlbauer Speedfax + PCI" + }, + { } +}; +MODULE_DEVICE_TABLE(pci, sfaxpci_ids); + +static struct pci_driver sfaxpci_driver = { + .name = "speedfax+ pci", + .probe = sfaxpci_probe, + .remove = __devexit_p(sfax_remove_pci), + .id_table = sfaxpci_ids, +}; + +static int __init +Speedfax_init(void) +{ + int err; + + pr_notice("Sedlbauer Speedfax+ Driver Rev. %s\n", + SPEEDFAX_REV); + err = pci_register_driver(&sfaxpci_driver); + return err; +} + +static void __exit +Speedfax_cleanup(void) +{ + pci_unregister_driver(&sfaxpci_driver); +} + +module_init(Speedfax_init); +module_exit(Speedfax_cleanup); diff --git a/include/linux/mISDNif.h b/include/linux/mISDNif.h index 45100b39a7cf..536ca12442ca 100644 --- a/include/linux/mISDNif.h +++ b/include/linux/mISDNif.h @@ -37,7 +37,7 @@ */ #define MISDN_MAJOR_VERSION 1 #define MISDN_MINOR_VERSION 1 -#define MISDN_RELEASE 20 +#define MISDN_RELEASE 21 /* primitives for information exchange * generell format @@ -153,6 +153,18 @@ #define HFC_VOL_CHANGE_RX 0x2602 #define HFC_SPL_LOOP_ON 0x2603 #define HFC_SPL_LOOP_OFF 0x2604 +/* for T30 FAX and analog modem */ +#define HW_MOD_FRM 0x4000 +#define HW_MOD_FRH 0x4001 +#define HW_MOD_FTM 0x4002 +#define HW_MOD_FTH 0x4003 +#define HW_MOD_FTS 0x4004 +#define HW_MOD_CONNECT 0x4010 +#define HW_MOD_OK 0x4011 +#define HW_MOD_NOCARR 0x4012 +#define HW_MOD_FCERROR 0x4013 +#define HW_MOD_READY 0x4014 +#define HW_MOD_LASTDATA 0x4015 /* DSP_TONE_PATT_ON parameter */ #define TONE_OFF 0x0000 @@ -224,6 +236,8 @@ #define ISDN_P_B_L2DTMF 0x24 #define ISDN_P_B_L2DSP 0x25 #define ISDN_P_B_L2DSPHDLC 0x26 +#define ISDN_P_B_T30_FAX 0x27 +#define ISDN_P_B_MODEM_ASYNC 0x28 #define OPTION_L2_PMX 1 #define OPTION_L2_PTP 2 -- cgit v1.2.3 From 633aae23ff31bef692a70772652e753a0ae59b81 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Wed, 22 Jul 2009 21:51:36 -0700 Subject: Input: serio - switch to using dev_pm_ops Signed-off-by: Dmitry Torokhov --- drivers/input/serio/serio.c | 23 +++++++++++------------ include/linux/serio.h | 2 -- 2 files changed, 11 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/drivers/input/serio/serio.c b/drivers/input/serio/serio.c index d66f4944f2a0..0236f0d5fd91 100644 --- a/drivers/input/serio/serio.c +++ b/drivers/input/serio/serio.c @@ -931,15 +931,11 @@ static int serio_uevent(struct device *dev, struct kobj_uevent_env *env) #endif /* CONFIG_HOTPLUG */ #ifdef CONFIG_PM -static int serio_suspend(struct device *dev, pm_message_t state) +static int serio_suspend(struct device *dev) { struct serio *serio = to_serio_port(dev); - if (!serio->suspended && state.event == PM_EVENT_SUSPEND) - serio_cleanup(serio); - - serio->suspended = state.event == PM_EVENT_SUSPEND || - state.event == PM_EVENT_FREEZE; + serio_cleanup(serio); return 0; } @@ -952,13 +948,17 @@ static int serio_resume(struct device *dev) * Driver reconnect can take a while, so better let kseriod * deal with it. */ - if (serio->suspended) { - serio->suspended = false; - serio_queue_event(serio, NULL, SERIO_RECONNECT_PORT); - } + serio_queue_event(serio, NULL, SERIO_RECONNECT_PORT); return 0; } + +static const struct dev_pm_ops serio_pm_ops = { + .suspend = serio_suspend, + .resume = serio_resume, + .poweroff = serio_suspend, + .restore = serio_resume, +}; #endif /* CONFIG_PM */ /* called from serio_driver->connect/disconnect methods under serio_mutex */ @@ -1015,8 +1015,7 @@ static struct bus_type serio_bus = { .remove = serio_driver_remove, .shutdown = serio_shutdown, #ifdef CONFIG_PM - .suspend = serio_suspend, - .resume = serio_resume, + .pm = &serio_pm_ops, #endif }; diff --git a/include/linux/serio.h b/include/linux/serio.h index 126d24c9eaa8..a640bc2afe76 100644 --- a/include/linux/serio.h +++ b/include/linux/serio.h @@ -31,8 +31,6 @@ struct serio { bool manual_bind; bool registered; /* port has been fully registered with driver core */ - bool suspended; /* port is suspended */ - struct serio_device_id id; -- cgit v1.2.3 From b5eb0589937eae2d58fca17fa45ed44152e772ed Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 15 Jul 2009 05:23:23 +0000 Subject: net: deprecate print_mac We've had %pM for long enough now, time to deprecate print_mac() and remove the __maybe_unused attribute from DECLARE_MAC_BUF so that variables declared with that can be found and removed. Otherwise people are putting in new users of print_mac(). Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- include/linux/if_ether.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h index 70fdba2bbf71..580b6004d00e 100644 --- a/include/linux/if_ether.h +++ b/include/linux/if_ether.h @@ -139,10 +139,10 @@ extern ssize_t sysfs_format_mac(char *buf, const unsigned char *addr, int len); /* * Display a 6 byte device address (MAC) in a readable format. */ -extern char *print_mac(char *buf, const unsigned char *addr); +extern char *print_mac(char *buf, const unsigned char *addr) __deprecated; #define MAC_FMT "%02x:%02x:%02x:%02x:%02x:%02x" #define MAC_BUF_SIZE 18 -#define DECLARE_MAC_BUF(var) char var[MAC_BUF_SIZE] __maybe_unused +#define DECLARE_MAC_BUF(var) char var[MAC_BUF_SIZE] #endif -- cgit v1.2.3 From 51def0bea92629dff02ff1de40603eb90c609c55 Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Wed, 22 Jul 2009 14:06:56 +0000 Subject: imwc3200: move iwmc3200 SDIO ids to sdio_ids.h 1. add intel's sdio vendor id to sdio_ids.h 2. move iwmc3200 sdio devices' ids to sdio_ids.h Signed-off-by: Tomas Winkler Signed-off-by: David S. Miller --- drivers/net/wimax/i2400m/sdio.c | 12 +++++------- drivers/net/wireless/iwmc3200wifi/sdio.c | 4 +++- drivers/net/wireless/iwmc3200wifi/sdio.h | 3 --- include/linux/mmc/sdio_ids.h | 6 ++++++ 4 files changed, 14 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/wimax/i2400m/sdio.c b/drivers/net/wimax/i2400m/sdio.c index 2538825d1c66..ea7b29034aab 100644 --- a/drivers/net/wimax/i2400m/sdio.c +++ b/drivers/net/wimax/i2400m/sdio.c @@ -58,6 +58,7 @@ */ #include +#include #include #include #include "i2400m-sdio.h" @@ -501,15 +502,12 @@ void i2400ms_remove(struct sdio_func *func) d_fnend(3, dev, "SDIO func %p\n", func); } -enum { - I2400MS_INTEL_VID = 0x89, -}; - static const struct sdio_device_id i2400ms_sdio_ids[] = { - /* Intel: i2400m WiMAX over SDIO */ - { SDIO_DEVICE(I2400MS_INTEL_VID, 0x1402) }, - { }, /* end: all zeroes */ + /* Intel: i2400m WiMAX (iwmc3200) over SDIO */ + { SDIO_DEVICE(SDIO_VENDOR_ID_INTEL, + SDIO_DEVICE_ID_INTEL_IWMC3200WIMAX) }, + { /* end: all zeroes */ }, }; MODULE_DEVICE_TABLE(sdio, i2400ms_sdio_ids); diff --git a/drivers/net/wireless/iwmc3200wifi/sdio.c b/drivers/net/wireless/iwmc3200wifi/sdio.c index b93f620ee4f1..8b1de84003ca 100644 --- a/drivers/net/wireless/iwmc3200wifi/sdio.c +++ b/drivers/net/wireless/iwmc3200wifi/sdio.c @@ -65,6 +65,7 @@ #include #include #include +#include #include #include @@ -492,7 +493,8 @@ static void iwm_sdio_remove(struct sdio_func *func) } static const struct sdio_device_id iwm_sdio_ids[] = { - { SDIO_DEVICE(SDIO_VENDOR_ID_INTEL, SDIO_DEVICE_ID_IWM) }, + { SDIO_DEVICE(SDIO_VENDOR_ID_INTEL, + SDIO_DEVICE_ID_INTEL_IWMC3200WIFI) }, { /* end: all zeroes */ }, }; MODULE_DEVICE_TABLE(sdio, iwm_sdio_ids); diff --git a/drivers/net/wireless/iwmc3200wifi/sdio.h b/drivers/net/wireless/iwmc3200wifi/sdio.h index b3c156b08dda..aab6b6892e45 100644 --- a/drivers/net/wireless/iwmc3200wifi/sdio.h +++ b/drivers/net/wireless/iwmc3200wifi/sdio.h @@ -39,9 +39,6 @@ #ifndef __IWM_SDIO_H__ #define __IWM_SDIO_H__ -#define SDIO_VENDOR_ID_INTEL 0x89 -#define SDIO_DEVICE_ID_IWM 0x1403 - #define IWM_SDIO_DATA_ADDR 0x0 #define IWM_SDIO_INTR_ENABLE_ADDR 0x14 #define IWM_SDIO_INTR_STATUS_ADDR 0x13 diff --git a/include/linux/mmc/sdio_ids.h b/include/linux/mmc/sdio_ids.h index 39751c8cde9c..2dbfb5a05994 100644 --- a/include/linux/mmc/sdio_ids.h +++ b/include/linux/mmc/sdio_ids.h @@ -22,6 +22,12 @@ /* * Vendors and devices. Sort key: vendor first, device next. */ +#define SDIO_VENDOR_ID_INTEL 0x0089 +#define SDIO_DEVICE_ID_INTEL_IWMC3200WIMAX 0x1402 +#define SDIO_DEVICE_ID_INTEL_IWMC3200WIFI 0x1403 +#define SDIO_DEVICE_ID_INTEL_IWMC3200TOP 0x1404 +#define SDIO_DEVICE_ID_INTEL_IWMC3200GPS 0x1405 +#define SDIO_DEVICE_ID_INTEL_IWMC3200BT 0x1406 #define SDIO_VENDOR_ID_MARVELL 0x02df #define SDIO_DEVICE_ID_MARVELL_LIBERTAS 0x9103 -- cgit v1.2.3 From 8a729fce76f7af50d8b622f2fb26adce9c8df743 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 26 Jul 2009 23:18:11 +0000 Subject: net: ethtool_op_get_rx_csum() should be public and exported Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/ethtool.h | 1 + net/core/ethtool.c | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 9b660bd2e2b3..90c4a3616d94 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -368,6 +368,7 @@ struct net_device; /* Some generic methods drivers may use in their ethtool_ops */ u32 ethtool_op_get_link(struct net_device *dev); +u32 ethtool_op_get_rx_csum(struct net_device *dev); u32 ethtool_op_get_tx_csum(struct net_device *dev); int ethtool_op_set_tx_csum(struct net_device *dev, u32 data); int ethtool_op_set_tx_hw_csum(struct net_device *dev, u32 data); diff --git a/net/core/ethtool.c b/net/core/ethtool.c index cf36ff44ebb2..44e571111d3a 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -34,11 +34,13 @@ u32 ethtool_op_get_rx_csum(struct net_device *dev) { return (dev->features & NETIF_F_ALL_CSUM) != 0; } +EXPORT_SYMBOL(ethtool_op_get_rx_csum); u32 ethtool_op_get_tx_csum(struct net_device *dev) { return (dev->features & NETIF_F_ALL_CSUM) != 0; } +EXPORT_SYMBOL(ethtool_op_get_tx_csum); int ethtool_op_set_tx_csum(struct net_device *dev, u32 data) { @@ -1125,7 +1127,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) EXPORT_SYMBOL(ethtool_op_get_link); EXPORT_SYMBOL(ethtool_op_get_sg); EXPORT_SYMBOL(ethtool_op_get_tso); -EXPORT_SYMBOL(ethtool_op_get_tx_csum); EXPORT_SYMBOL(ethtool_op_set_sg); EXPORT_SYMBOL(ethtool_op_set_tso); EXPORT_SYMBOL(ethtool_op_set_tx_csum); -- cgit v1.2.3 From 463d018323851a608eef52a9427b0585005c647f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 14 Jul 2009 00:33:35 +0200 Subject: cfg80211: make aware of net namespaces In order to make cfg80211/nl80211 aware of network namespaces, we have to do the following things: * del_virtual_intf method takes an interface index rather than a netdev pointer - simply change this * nl80211 uses init_net a lot, it changes to use the sender's network namespace * scan requests use the interface index, hold a netdev pointer and reference instead * we want a wiphy and its associated virtual interfaces to be in one netns together, so - we need to be able to change ns for a given interface, so export dev_change_net_namespace() - for each virtual interface set the NETIF_F_NETNS_LOCAL flag, and clear that flag only when the wiphy changes ns, to disallow breaking this invariant * when a network namespace goes away, we need to reparent the wiphy to init_net * cfg80211 users that support creating virtual interfaces must create them in the wiphy's namespace, currently this affects only mac80211 The end result is that you can now switch an entire wiphy into a different network namespace with the new command iw phy# set netns and all virtual interfaces will follow (or the operation fails). Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/nl80211.h | 9 +++ include/net/cfg80211.h | 40 +++++++++- net/core/dev.c | 1 + net/mac80211/cfg.c | 14 +--- net/wireless/core.c | 75 ++++++++++++++++-- net/wireless/core.h | 5 +- net/wireless/nl80211.c | 202 +++++++++++++++++++++++++++++++++--------------- net/wireless/scan.c | 22 ++---- net/wireless/sme.c | 3 +- 9 files changed, 272 insertions(+), 99 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 962e2232a074..cb3dc6027fd9 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -262,6 +262,9 @@ * reasons, for this the %NL80211_ATTR_DISCONNECTED_BY_AP and * %NL80211_ATTR_REASON_CODE attributes are used. * + * @NL80211_CMD_SET_WIPHY_NETNS: Set a wiphy's netns. Note that all devices + * associated with this wiphy must be down and will follow. + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -336,6 +339,8 @@ enum nl80211_commands { NL80211_CMD_ROAM, NL80211_CMD_DISCONNECT, + NL80211_CMD_SET_WIPHY_NETNS, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ @@ -573,6 +578,8 @@ enum nl80211_commands { * and join_ibss(), key information is in a nested attribute each * with %NL80211_KEY_* sub-attributes * + * @NL80211_ATTR_PID: Process ID of a network namespace. + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -701,6 +708,8 @@ enum nl80211_attrs { NL80211_ATTR_KEY, NL80211_ATTR_KEYS, + NL80211_ATTR_PID, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index a981ca8a5701..0d278777e39c 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -542,7 +542,7 @@ struct cfg80211_ssid { * @ie: optional information element(s) to add into Probe Request or %NULL * @ie_len: length of ie in octets * @wiphy: the wiphy this was for - * @ifidx: the interface index + * @dev: the interface */ struct cfg80211_scan_request { struct cfg80211_ssid *ssids; @@ -554,7 +554,7 @@ struct cfg80211_scan_request { /* internal */ struct wiphy *wiphy; - int ifidx; + struct net_device *dev; bool aborted; }; @@ -845,7 +845,8 @@ struct cfg80211_bitrate_mask { * @resume: wiphy device needs to be resumed * * @add_virtual_intf: create a new virtual interface with the given name, - * must set the struct wireless_dev's iftype. + * must set the struct wireless_dev's iftype. Beware: You must create + * the new netdev in the wiphy's network namespace! * * @del_virtual_intf: remove the virtual interface determined by ifindex. * @@ -937,7 +938,7 @@ struct cfg80211_ops { int (*add_virtual_intf)(struct wiphy *wiphy, char *name, enum nl80211_iftype type, u32 *flags, struct vif_params *params); - int (*del_virtual_intf)(struct wiphy *wiphy, int ifindex); + int (*del_virtual_intf)(struct wiphy *wiphy, struct net_device *dev); int (*change_virtual_intf)(struct wiphy *wiphy, struct net_device *dev, enum nl80211_iftype type, u32 *flags, @@ -1088,6 +1089,9 @@ struct cfg80211_ops { * @frag_threshold: Fragmentation threshold (dot11FragmentationThreshold); * -1 = fragmentation disabled, only odd values >= 256 used * @rts_threshold: RTS threshold (dot11RTSThreshold); -1 = RTS/CTS disabled + * @net: the network namespace this wiphy currently lives in + * @netnsok: if set to false, do not allow changing the netns of this + * wiphy at all */ struct wiphy { /* assign these fields before you register the wiphy */ @@ -1101,6 +1105,8 @@ struct wiphy { bool custom_regulatory; bool strict_regulatory; + bool netnsok; + enum cfg80211_signal_type signal_type; int bss_priv_size; @@ -1139,9 +1145,35 @@ struct wiphy { /* dir in debugfs: ieee80211/ */ struct dentry *debugfsdir; +#ifdef CONFIG_NET_NS + /* the network namespace this phy lives in currently */ + struct net *_net; +#endif + char priv[0] __attribute__((__aligned__(NETDEV_ALIGN))); }; +#ifdef CONFIG_NET_NS +static inline struct net *wiphy_net(struct wiphy *wiphy) +{ + return wiphy->_net; +} + +static inline void wiphy_net_set(struct wiphy *wiphy, struct net *net) +{ + wiphy->_net = net; +} +#else +static inline struct net *wiphy_net(struct wiphy *wiphy) +{ + return &init_net; +} + +static inline void wiphy_net_set(struct wiphy *wiphy, struct net *net) +{ +} +#endif + /** * wiphy_priv - return priv from wiphy * diff --git a/net/core/dev.c b/net/core/dev.c index d6c657ee413d..71347668c506 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5344,6 +5344,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char out: return err; } +EXPORT_SYMBOL_GPL(dev_change_net_namespace); static int dev_cpu_callback(struct notifier_block *nfb, unsigned long action, diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 52928ad90570..4bbf5007799b 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -57,19 +57,9 @@ static int ieee80211_add_iface(struct wiphy *wiphy, char *name, return 0; } -static int ieee80211_del_iface(struct wiphy *wiphy, int ifindex) +static int ieee80211_del_iface(struct wiphy *wiphy, struct net_device *dev) { - struct net_device *dev; - struct ieee80211_sub_if_data *sdata; - - /* we're under RTNL */ - dev = __dev_get_by_index(&init_net, ifindex); - if (!dev) - return -ENODEV; - - sdata = IEEE80211_DEV_TO_SUB_IF(dev); - - ieee80211_if_remove(sdata); + ieee80211_if_remove(IEEE80211_DEV_TO_SUB_IF(dev)); return 0; } diff --git a/net/wireless/core.c b/net/wireless/core.c index 6891cd0e38d5..442c9f389799 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -106,7 +106,7 @@ __cfg80211_rdev_from_info(struct genl_info *info) if (info->attrs[NL80211_ATTR_IFINDEX]) { ifindex = nla_get_u32(info->attrs[NL80211_ATTR_IFINDEX]); - dev = dev_get_by_index(&init_net, ifindex); + dev = dev_get_by_index(genl_info_net(info), ifindex); if (dev) { if (dev->ieee80211_ptr) byifidx = @@ -151,13 +151,13 @@ cfg80211_get_dev_from_info(struct genl_info *info) } struct cfg80211_registered_device * -cfg80211_get_dev_from_ifindex(int ifindex) +cfg80211_get_dev_from_ifindex(struct net *net, int ifindex) { struct cfg80211_registered_device *rdev = ERR_PTR(-ENODEV); struct net_device *dev; mutex_lock(&cfg80211_mutex); - dev = dev_get_by_index(&init_net, ifindex); + dev = dev_get_by_index(net, ifindex); if (!dev) goto out; if (dev->ieee80211_ptr) { @@ -222,6 +222,42 @@ int cfg80211_dev_rename(struct cfg80211_registered_device *rdev, return 0; } +int cfg80211_switch_netns(struct cfg80211_registered_device *rdev, + struct net *net) +{ + struct wireless_dev *wdev; + int err = 0; + + if (!rdev->wiphy.netnsok) + return -EOPNOTSUPP; + + list_for_each_entry(wdev, &rdev->netdev_list, list) { + wdev->netdev->features &= ~NETIF_F_NETNS_LOCAL; + err = dev_change_net_namespace(wdev->netdev, net, "wlan%d"); + if (err) + break; + wdev->netdev->features |= NETIF_F_NETNS_LOCAL; + } + + if (err) { + /* failed -- clean up to old netns */ + net = wiphy_net(&rdev->wiphy); + + list_for_each_entry_continue_reverse(wdev, &rdev->netdev_list, + list) { + wdev->netdev->features &= ~NETIF_F_NETNS_LOCAL; + err = dev_change_net_namespace(wdev->netdev, net, + "wlan%d"); + WARN_ON(err); + wdev->netdev->features |= NETIF_F_NETNS_LOCAL; + } + } + + wiphy_net_set(&rdev->wiphy, net); + + return err; +} + static void cfg80211_rfkill_poll(struct rfkill *rfkill, void *data) { struct cfg80211_registered_device *rdev = data; @@ -375,6 +411,8 @@ struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv) rdev->wiphy.dev.class = &ieee80211_class; rdev->wiphy.dev.platform_data = rdev; + wiphy_net_set(&rdev->wiphy, &init_net); + rdev->rfkill_ops.set_block = cfg80211_rfkill_set_block; rdev->rfkill = rfkill_alloc(dev_name(&rdev->wiphy.dev), &rdev->wiphy.dev, RFKILL_TYPE_WLAN, @@ -615,6 +653,9 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb, spin_lock_init(&wdev->event_lock); mutex_lock(&rdev->devlist_mtx); list_add(&wdev->list, &rdev->netdev_list); + /* can only change netns with wiphy */ + dev->features |= NETIF_F_NETNS_LOCAL; + if (sysfs_create_link(&dev->dev.kobj, &rdev->wiphy.dev.kobj, "phy80211")) { printk(KERN_ERR "wireless: failed to add phy80211 " @@ -705,10 +746,32 @@ static struct notifier_block cfg80211_netdev_notifier = { .notifier_call = cfg80211_netdev_notifier_call, }; -static int cfg80211_init(void) +static void __net_exit cfg80211_pernet_exit(struct net *net) +{ + struct cfg80211_registered_device *rdev; + + rtnl_lock(); + mutex_lock(&cfg80211_mutex); + list_for_each_entry(rdev, &cfg80211_rdev_list, list) { + if (net_eq(wiphy_net(&rdev->wiphy), net)) + WARN_ON(cfg80211_switch_netns(rdev, &init_net)); + } + mutex_unlock(&cfg80211_mutex); + rtnl_unlock(); +} + +static struct pernet_operations cfg80211_pernet_ops = { + .exit = cfg80211_pernet_exit, +}; + +static int __init cfg80211_init(void) { int err; + err = register_pernet_device(&cfg80211_pernet_ops); + if (err) + goto out_fail_pernet; + err = wiphy_sysfs_init(); if (err) goto out_fail_sysfs; @@ -736,9 +799,10 @@ out_fail_nl80211: out_fail_notifier: wiphy_sysfs_exit(); out_fail_sysfs: + unregister_pernet_device(&cfg80211_pernet_ops); +out_fail_pernet: return err; } - subsys_initcall(cfg80211_init); static void cfg80211_exit(void) @@ -748,5 +812,6 @@ static void cfg80211_exit(void) unregister_netdevice_notifier(&cfg80211_netdev_notifier); wiphy_sysfs_exit(); regulatory_exit(); + unregister_pernet_device(&cfg80211_pernet_ops); } module_exit(cfg80211_exit); diff --git a/net/wireless/core.h b/net/wireless/core.h index 2ec8ddbe57de..4276b70cd975 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -170,7 +170,10 @@ struct wiphy *wiphy_idx_to_wiphy(int wiphy_idx); /* identical to cfg80211_get_dev_from_info but only operate on ifindex */ extern struct cfg80211_registered_device * -cfg80211_get_dev_from_ifindex(int ifindex); +cfg80211_get_dev_from_ifindex(struct net *net, int ifindex); + +int cfg80211_switch_netns(struct cfg80211_registered_device *rdev, + struct net *net); static inline void cfg80211_lock_rdev(struct cfg80211_registered_device *rdev) { diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index da450ef1fc7e..7880a9c4cdda 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -14,8 +14,10 @@ #include #include #include +#include #include #include +#include #include "core.h" #include "nl80211.h" #include "reg.h" @@ -27,24 +29,26 @@ static struct genl_family nl80211_fam = { .hdrsize = 0, /* no private header */ .version = 1, /* no particular meaning now */ .maxattr = NL80211_ATTR_MAX, + .netnsok = true, }; /* internal helper: get rdev and dev */ -static int get_rdev_dev_by_info_ifindex(struct nlattr **attrs, +static int get_rdev_dev_by_info_ifindex(struct genl_info *info, struct cfg80211_registered_device **rdev, struct net_device **dev) { + struct nlattr **attrs = info->attrs; int ifindex; if (!attrs[NL80211_ATTR_IFINDEX]) return -EINVAL; ifindex = nla_get_u32(attrs[NL80211_ATTR_IFINDEX]); - *dev = dev_get_by_index(&init_net, ifindex); + *dev = dev_get_by_index(genl_info_net(info), ifindex); if (!*dev) return -ENODEV; - *rdev = cfg80211_get_dev_from_ifindex(ifindex); + *rdev = cfg80211_get_dev_from_ifindex(genl_info_net(info), ifindex); if (IS_ERR(*rdev)) { dev_put(*dev); return PTR_ERR(*rdev); @@ -133,6 +137,7 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = { [NL80211_ATTR_PRIVACY] = { .type = NLA_FLAG }, [NL80211_ATTR_CIPHER_SUITE_GROUP] = { .type = NLA_U32 }, [NL80211_ATTR_WPA_VERSIONS] = { .type = NLA_U32 }, + [NL80211_ATTR_PID] = { .type = NLA_U32 }, }; /* policy for the attributes */ @@ -532,6 +537,10 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, CMD(deauth, DEAUTHENTICATE); CMD(disassoc, DISASSOCIATE); CMD(join_ibss, JOIN_IBSS); + if (dev->wiphy.netnsok) { + i++; + NLA_PUT_U32(msg, i, NL80211_CMD_SET_WIPHY_NETNS); + } #undef CMD @@ -562,6 +571,8 @@ static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb) mutex_lock(&cfg80211_mutex); list_for_each_entry(dev, &cfg80211_rdev_list, list) { + if (!net_eq(wiphy_net(&dev->wiphy), sock_net(skb->sk))) + continue; if (++idx <= start) continue; if (nl80211_send_wiphy(skb, NETLINK_CB(cb->skb).pid, @@ -867,6 +878,8 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback * mutex_lock(&cfg80211_mutex); list_for_each_entry(dev, &cfg80211_rdev_list, list) { + if (!net_eq(wiphy_net(&dev->wiphy), sock_net(skb->sk))) + continue; if (wp_idx < wp_start) { wp_idx++; continue; @@ -907,7 +920,7 @@ static int nl80211_get_interface(struct sk_buff *skb, struct genl_info *info) struct net_device *netdev; int err; - err = get_rdev_dev_by_info_ifindex(info->attrs, &dev, &netdev); + err = get_rdev_dev_by_info_ifindex(info, &dev, &netdev); if (err) return err; @@ -975,7 +988,7 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info) rtnl_lock(); - err = get_rdev_dev_by_info_ifindex(info->attrs, &rdev, &dev); + err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev); if (err) goto unlock_rtnl; @@ -1098,26 +1111,25 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) static int nl80211_del_interface(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev; - int ifindex, err; + int err; struct net_device *dev; rtnl_lock(); - err = get_rdev_dev_by_info_ifindex(info->attrs, &rdev, &dev); + err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev); if (err) goto unlock_rtnl; - ifindex = dev->ifindex; - dev_put(dev); if (!rdev->ops->del_virtual_intf) { err = -EOPNOTSUPP; goto out; } - err = rdev->ops->del_virtual_intf(&rdev->wiphy, ifindex); + err = rdev->ops->del_virtual_intf(&rdev->wiphy, dev); out: cfg80211_unlock_rdev(rdev); + dev_put(dev); unlock_rtnl: rtnl_unlock(); return err; @@ -1195,7 +1207,7 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info) rtnl_lock(); - err = get_rdev_dev_by_info_ifindex(info->attrs, &rdev, &dev); + err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev); if (err) goto unlock_rtnl; @@ -1274,7 +1286,7 @@ static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info) rtnl_lock(); - err = get_rdev_dev_by_info_ifindex(info->attrs, &rdev, &dev); + err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev); if (err) goto unlock_rtnl; @@ -1333,7 +1345,7 @@ static int nl80211_new_key(struct sk_buff *skb, struct genl_info *info) rtnl_lock(); - err = get_rdev_dev_by_info_ifindex(info->attrs, &rdev, &dev); + err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev); if (err) goto unlock_rtnl; @@ -1380,7 +1392,7 @@ static int nl80211_del_key(struct sk_buff *skb, struct genl_info *info) rtnl_lock(); - err = get_rdev_dev_by_info_ifindex(info->attrs, &rdev, &dev); + err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev); if (err) goto unlock_rtnl; @@ -1429,7 +1441,7 @@ static int nl80211_addset_beacon(struct sk_buff *skb, struct genl_info *info) rtnl_lock(); - err = get_rdev_dev_by_info_ifindex(info->attrs, &rdev, &dev); + err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev); if (err) goto unlock_rtnl; @@ -1516,7 +1528,7 @@ static int nl80211_del_beacon(struct sk_buff *skb, struct genl_info *info) rtnl_lock(); - err = get_rdev_dev_by_info_ifindex(info->attrs, &rdev, &dev); + err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev); if (err) goto unlock_rtnl; @@ -1726,13 +1738,13 @@ static int nl80211_dump_station(struct sk_buff *skb, rtnl_lock(); - netdev = __dev_get_by_index(&init_net, ifidx); + netdev = __dev_get_by_index(sock_net(skb->sk), ifidx); if (!netdev) { err = -ENODEV; goto out_rtnl; } - dev = cfg80211_get_dev_from_ifindex(ifidx); + dev = cfg80211_get_dev_from_ifindex(sock_net(skb->sk), ifidx); if (IS_ERR(dev)) { err = PTR_ERR(dev); goto out_rtnl; @@ -1791,7 +1803,7 @@ static int nl80211_get_station(struct sk_buff *skb, struct genl_info *info) rtnl_lock(); - err = get_rdev_dev_by_info_ifindex(info->attrs, &rdev, &dev); + err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev); if (err) goto out_rtnl; @@ -1829,14 +1841,16 @@ static int nl80211_get_station(struct sk_buff *skb, struct genl_info *info) /* * Get vlan interface making sure it is on the right wiphy. */ -static int get_vlan(struct nlattr *vlanattr, +static int get_vlan(struct genl_info *info, struct cfg80211_registered_device *rdev, struct net_device **vlan) { + struct nlattr *vlanattr = info->attrs[NL80211_ATTR_STA_VLAN]; *vlan = NULL; if (vlanattr) { - *vlan = dev_get_by_index(&init_net, nla_get_u32(vlanattr)); + *vlan = dev_get_by_index(genl_info_net(info), + nla_get_u32(vlanattr)); if (!*vlan) return -ENODEV; if (!(*vlan)->ieee80211_ptr) @@ -1891,11 +1905,11 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) rtnl_lock(); - err = get_rdev_dev_by_info_ifindex(info->attrs, &rdev, &dev); + err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev); if (err) goto out_rtnl; - err = get_vlan(info->attrs[NL80211_ATTR_STA_VLAN], rdev, ¶ms.vlan); + err = get_vlan(info, rdev, ¶ms.vlan); if (err) goto out; @@ -2004,11 +2018,11 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) rtnl_lock(); - err = get_rdev_dev_by_info_ifindex(info->attrs, &rdev, &dev); + err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev); if (err) goto out_rtnl; - err = get_vlan(info->attrs[NL80211_ATTR_STA_VLAN], rdev, ¶ms.vlan); + err = get_vlan(info, rdev, ¶ms.vlan); if (err) goto out; @@ -2079,7 +2093,7 @@ static int nl80211_del_station(struct sk_buff *skb, struct genl_info *info) rtnl_lock(); - err = get_rdev_dev_by_info_ifindex(info->attrs, &rdev, &dev); + err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev); if (err) goto out_rtnl; @@ -2185,13 +2199,13 @@ static int nl80211_dump_mpath(struct sk_buff *skb, rtnl_lock(); - netdev = __dev_get_by_index(&init_net, ifidx); + netdev = __dev_get_by_index(sock_net(skb->sk), ifidx); if (!netdev) { err = -ENODEV; goto out_rtnl; } - dev = cfg80211_get_dev_from_ifindex(ifidx); + dev = cfg80211_get_dev_from_ifindex(sock_net(skb->sk), ifidx); if (IS_ERR(dev)) { err = PTR_ERR(dev); goto out_rtnl; @@ -2255,7 +2269,7 @@ static int nl80211_get_mpath(struct sk_buff *skb, struct genl_info *info) rtnl_lock(); - err = get_rdev_dev_by_info_ifindex(info->attrs, &rdev, &dev); + err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev); if (err) goto out_rtnl; @@ -2314,7 +2328,7 @@ static int nl80211_set_mpath(struct sk_buff *skb, struct genl_info *info) rtnl_lock(); - err = get_rdev_dev_by_info_ifindex(info->attrs, &rdev, &dev); + err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev); if (err) goto out_rtnl; @@ -2362,7 +2376,7 @@ static int nl80211_new_mpath(struct sk_buff *skb, struct genl_info *info) rtnl_lock(); - err = get_rdev_dev_by_info_ifindex(info->attrs, &rdev, &dev); + err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev); if (err) goto out_rtnl; @@ -2404,7 +2418,7 @@ static int nl80211_del_mpath(struct sk_buff *skb, struct genl_info *info) rtnl_lock(); - err = get_rdev_dev_by_info_ifindex(info->attrs, &rdev, &dev); + err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev); if (err) goto out_rtnl; @@ -2455,7 +2469,7 @@ static int nl80211_set_bss(struct sk_buff *skb, struct genl_info *info) rtnl_lock(); - err = get_rdev_dev_by_info_ifindex(info->attrs, &rdev, &dev); + err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev); if (err) goto out_rtnl; @@ -2574,7 +2588,7 @@ static int nl80211_get_mesh_params(struct sk_buff *skb, rtnl_lock(); /* Look up our device */ - err = get_rdev_dev_by_info_ifindex(info->attrs, &rdev, &dev); + err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev); if (err) goto out_rtnl; @@ -2691,7 +2705,7 @@ static int nl80211_set_mesh_params(struct sk_buff *skb, struct genl_info *info) rtnl_lock(); - err = get_rdev_dev_by_info_ifindex(info->attrs, &rdev, &dev); + err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev); if (err) goto out_rtnl; @@ -2947,7 +2961,7 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) rtnl_lock(); - err = get_rdev_dev_by_info_ifindex(info->attrs, &rdev, &dev); + err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev); if (err) goto out_rtnl; @@ -3069,14 +3083,16 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) request->ie_len); } - request->ifidx = dev->ifindex; + request->dev = dev; request->wiphy = &rdev->wiphy; rdev->scan_req = request; err = rdev->ops->scan(&rdev->wiphy, dev, request); - if (!err) + if (!err) { nl80211_send_scan_start(rdev, dev); + dev_hold(dev); + } out_free: if (err) { @@ -3198,11 +3214,11 @@ static int nl80211_dump_scan(struct sk_buff *skb, cb->args[0] = ifidx; } - dev = dev_get_by_index(&init_net, ifidx); + dev = dev_get_by_index(sock_net(skb->sk), ifidx); if (!dev) return -ENODEV; - rdev = cfg80211_get_dev_from_ifindex(ifidx); + rdev = cfg80211_get_dev_from_ifindex(sock_net(skb->sk), ifidx); if (IS_ERR(rdev)) { err = PTR_ERR(rdev); goto out_put_netdev; @@ -3312,7 +3328,7 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info) rtnl_lock(); - err = get_rdev_dev_by_info_ifindex(info->attrs, &rdev, &dev); + err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev); if (err) goto unlock_rtnl; @@ -3448,7 +3464,7 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) rtnl_lock(); - err = get_rdev_dev_by_info_ifindex(info->attrs, &rdev, &dev); + err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev); if (err) goto unlock_rtnl; @@ -3531,7 +3547,7 @@ static int nl80211_deauthenticate(struct sk_buff *skb, struct genl_info *info) rtnl_lock(); - err = get_rdev_dev_by_info_ifindex(info->attrs, &rdev, &dev); + err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev); if (err) goto unlock_rtnl; @@ -3593,7 +3609,7 @@ static int nl80211_disassociate(struct sk_buff *skb, struct genl_info *info) rtnl_lock(); - err = get_rdev_dev_by_info_ifindex(info->attrs, &rdev, &dev); + err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev); if (err) goto unlock_rtnl; @@ -3666,7 +3682,7 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info) rtnl_lock(); - err = get_rdev_dev_by_info_ifindex(info->attrs, &rdev, &dev); + err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev); if (err) goto unlock_rtnl; @@ -3739,7 +3755,7 @@ static int nl80211_leave_ibss(struct sk_buff *skb, struct genl_info *info) rtnl_lock(); - err = get_rdev_dev_by_info_ifindex(info->attrs, &rdev, &dev); + err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev); if (err) goto unlock_rtnl; @@ -3924,7 +3940,7 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info) return err; rtnl_lock(); - err = get_rdev_dev_by_info_ifindex(info->attrs, &rdev, &dev); + err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev); if (err) goto unlock_rtnl; @@ -4000,7 +4016,7 @@ static int nl80211_disconnect(struct sk_buff *skb, struct genl_info *info) rtnl_lock(); - err = get_rdev_dev_by_info_ifindex(info->attrs, &rdev, &dev); + err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev); if (err) goto unlock_rtnl; @@ -4024,6 +4040,47 @@ unlock_rtnl: return err; } +static int nl80211_wiphy_netns(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *rdev; + struct net *net; + int err; + u32 pid; + + if (!info->attrs[NL80211_ATTR_PID]) + return -EINVAL; + + pid = nla_get_u32(info->attrs[NL80211_ATTR_PID]); + + rtnl_lock(); + + rdev = cfg80211_get_dev_from_info(info); + if (IS_ERR(rdev)) { + err = PTR_ERR(rdev); + goto out; + } + + net = get_net_ns_by_pid(pid); + if (IS_ERR(net)) { + err = PTR_ERR(net); + goto out; + } + + err = 0; + + /* check if anything to do */ + if (net_eq(wiphy_net(&rdev->wiphy), net)) + goto out_put_net; + + err = cfg80211_switch_netns(rdev, net); + out_put_net: + put_net(net); + out: + cfg80211_unlock_rdev(rdev); + rtnl_unlock(); + return err; +} + static struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_GET_WIPHY, @@ -4257,6 +4314,12 @@ static struct genl_ops nl80211_ops[] = { .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, }, + { + .cmd = NL80211_CMD_SET_WIPHY_NETNS, + .doit = nl80211_wiphy_netns, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + }, }; static struct genl_multicast_group nl80211_mlme_mcgrp = { .name = "mlme", @@ -4288,7 +4351,8 @@ void nl80211_notify_dev_rename(struct cfg80211_registered_device *rdev) return; } - genlmsg_multicast(msg, 0, nl80211_config_mcgrp.id, GFP_KERNEL); + genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, + nl80211_config_mcgrp.id, GFP_KERNEL); } static int nl80211_add_scan_req(struct sk_buff *msg, @@ -4365,7 +4429,8 @@ void nl80211_send_scan_start(struct cfg80211_registered_device *rdev, return; } - genlmsg_multicast(msg, 0, nl80211_scan_mcgrp.id, GFP_KERNEL); + genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, + nl80211_scan_mcgrp.id, GFP_KERNEL); } void nl80211_send_scan_done(struct cfg80211_registered_device *rdev, @@ -4383,7 +4448,8 @@ void nl80211_send_scan_done(struct cfg80211_registered_device *rdev, return; } - genlmsg_multicast(msg, 0, nl80211_scan_mcgrp.id, GFP_KERNEL); + genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, + nl80211_scan_mcgrp.id, GFP_KERNEL); } void nl80211_send_scan_aborted(struct cfg80211_registered_device *rdev, @@ -4401,7 +4467,8 @@ void nl80211_send_scan_aborted(struct cfg80211_registered_device *rdev, return; } - genlmsg_multicast(msg, 0, nl80211_scan_mcgrp.id, GFP_KERNEL); + genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, + nl80211_scan_mcgrp.id, GFP_KERNEL); } /* @@ -4450,7 +4517,10 @@ void nl80211_send_reg_change_event(struct regulatory_request *request) return; } - genlmsg_multicast(msg, 0, nl80211_regulatory_mcgrp.id, GFP_KERNEL); + rtnl_lock(); + genlmsg_multicast_allns(msg, 0, nl80211_regulatory_mcgrp.id, + GFP_KERNEL); + rtnl_unlock(); return; @@ -4486,7 +4556,8 @@ static void nl80211_send_mlme_event(struct cfg80211_registered_device *rdev, return; } - genlmsg_multicast(msg, 0, nl80211_mlme_mcgrp.id, gfp); + genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, + nl80211_mlme_mcgrp.id, gfp); return; nla_put_failure: @@ -4553,7 +4624,8 @@ static void nl80211_send_mlme_timeout(struct cfg80211_registered_device *rdev, return; } - genlmsg_multicast(msg, 0, nl80211_mlme_mcgrp.id, gfp); + genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, + nl80211_mlme_mcgrp.id, gfp); return; nla_put_failure: @@ -4611,7 +4683,8 @@ void nl80211_send_connect_result(struct cfg80211_registered_device *rdev, return; } - genlmsg_multicast(msg, 0, nl80211_mlme_mcgrp.id, gfp); + genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, + nl80211_mlme_mcgrp.id, gfp); return; nla_put_failure: @@ -4651,7 +4724,8 @@ void nl80211_send_roamed(struct cfg80211_registered_device *rdev, return; } - genlmsg_multicast(msg, 0, nl80211_mlme_mcgrp.id, gfp); + genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, + nl80211_mlme_mcgrp.id, gfp); return; nla_put_failure: @@ -4691,7 +4765,8 @@ void nl80211_send_disconnected(struct cfg80211_registered_device *rdev, return; } - genlmsg_multicast(msg, 0, nl80211_mlme_mcgrp.id, GFP_KERNEL); + genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, + nl80211_mlme_mcgrp.id, GFP_KERNEL); return; nla_put_failure: @@ -4726,7 +4801,8 @@ void nl80211_send_ibss_bssid(struct cfg80211_registered_device *rdev, return; } - genlmsg_multicast(msg, 0, nl80211_mlme_mcgrp.id, gfp); + genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, + nl80211_mlme_mcgrp.id, gfp); return; nla_put_failure: @@ -4766,7 +4842,8 @@ void nl80211_michael_mic_failure(struct cfg80211_registered_device *rdev, return; } - genlmsg_multicast(msg, 0, nl80211_mlme_mcgrp.id, gfp); + genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, + nl80211_mlme_mcgrp.id, gfp); return; nla_put_failure: @@ -4819,7 +4896,10 @@ void nl80211_send_beacon_hint_event(struct wiphy *wiphy, return; } - genlmsg_multicast(msg, 0, nl80211_regulatory_mcgrp.id, GFP_ATOMIC); + rcu_read_lock(); + genlmsg_multicast_allns(msg, 0, nl80211_regulatory_mcgrp.id, + GFP_ATOMIC); + rcu_read_unlock(); return; diff --git a/net/wireless/scan.c b/net/wireless/scan.c index decc59fe0ee8..1b578b8cb1c9 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -32,9 +32,7 @@ void __cfg80211_scan_done(struct work_struct *wk) mutex_lock(&rdev->mtx); request = rdev->scan_req; - dev = dev_get_by_index(&init_net, request->ifidx); - if (!dev) - goto out; + dev = request->dev; /* * This must be before sending the other events! @@ -58,7 +56,6 @@ void __cfg80211_scan_done(struct work_struct *wk) dev_put(dev); - out: cfg80211_unlock_rdev(rdev); wiphy_to_dev(request->wiphy)->scan_req = NULL; kfree(request); @@ -66,17 +63,10 @@ void __cfg80211_scan_done(struct work_struct *wk) void cfg80211_scan_done(struct cfg80211_scan_request *request, bool aborted) { - struct net_device *dev = dev_get_by_index(&init_net, request->ifidx); - if (WARN_ON(!dev)) { - kfree(request); - return; - } - WARN_ON(request != wiphy_to_dev(request->wiphy)->scan_req); request->aborted = aborted; schedule_work(&wiphy_to_dev(request->wiphy)->scan_done_wk); - dev_put(dev); } EXPORT_SYMBOL(cfg80211_scan_done); @@ -592,7 +582,7 @@ int cfg80211_wext_siwscan(struct net_device *dev, if (!netif_running(dev)) return -ENETDOWN; - rdev = cfg80211_get_dev_from_ifindex(dev->ifindex); + rdev = cfg80211_get_dev_from_ifindex(dev_net(dev), dev->ifindex); if (IS_ERR(rdev)) return PTR_ERR(rdev); @@ -617,7 +607,7 @@ int cfg80211_wext_siwscan(struct net_device *dev, } creq->wiphy = wiphy; - creq->ifidx = dev->ifindex; + creq->dev = dev; creq->ssids = (void *)(creq + 1); creq->channels = (void *)(creq->ssids + 1); creq->n_channels = n_channels; @@ -654,8 +644,10 @@ int cfg80211_wext_siwscan(struct net_device *dev, if (err) { rdev->scan_req = NULL; kfree(creq); - } else + } else { nl80211_send_scan_start(rdev, dev); + dev_hold(dev); + } out: cfg80211_unlock_rdev(rdev); return err; @@ -948,7 +940,7 @@ int cfg80211_wext_giwscan(struct net_device *dev, if (!netif_running(dev)) return -ENETDOWN; - rdev = cfg80211_get_dev_from_ifindex(dev->ifindex); + rdev = cfg80211_get_dev_from_ifindex(dev_net(dev), dev->ifindex); if (IS_ERR(rdev)) return PTR_ERR(rdev); diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 82de2d9795f4..a19741097989 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -86,7 +86,7 @@ static int cfg80211_conn_scan(struct wireless_dev *wdev) wdev->conn->params.ssid_len); request->ssids[0].ssid_len = wdev->conn->params.ssid_len; - request->ifidx = wdev->netdev->ifindex; + request->dev = wdev->netdev; request->wiphy = &rdev->wiphy; rdev->scan_req = request; @@ -95,6 +95,7 @@ static int cfg80211_conn_scan(struct wireless_dev *wdev) if (!err) { wdev->conn->state = CFG80211_CONN_SCANNING; nl80211_send_scan_start(rdev, wdev->netdev); + dev_hold(wdev->netdev); } else { rdev->scan_req = NULL; kfree(request); -- cgit v1.2.3 From d1eba248469272ae0618288bccf65b24d017f1d2 Mon Sep 17 00:00:00 2001 From: Sujith Date: Thu, 23 Jul 2009 15:31:31 +0530 Subject: mac80211: Add a few 802.11n defines for AMPDU parameters Signed-off-by: Sujith Acked-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/ieee80211.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 23343ab0bb03..21556a2d9e7e 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -802,6 +802,31 @@ struct ieee80211_ht_cap { #define IEEE80211_HT_AMPDU_PARM_FACTOR 0x03 #define IEEE80211_HT_AMPDU_PARM_DENSITY 0x1C +/* + * Maximum length of AMPDU that the STA can receive. + * Length = 2 ^ (13 + max_ampdu_length_exp) - 1 (octets) + */ +enum ieee80211_max_ampdu_length_exp { + IEEE80211_HT_MAX_AMPDU_8K = 0, + IEEE80211_HT_MAX_AMPDU_16K = 1, + IEEE80211_HT_MAX_AMPDU_32K = 2, + IEEE80211_HT_MAX_AMPDU_64K = 3 +}; + +#define IEEE80211_HT_MAX_AMPDU_FACTOR 13 + +/* Minimum MPDU start spacing */ +enum ieee80211_min_mpdu_spacing { + IEEE80211_HT_MPDU_DENSITY_NONE = 0, /* No restriction */ + IEEE80211_HT_MPDU_DENSITY_0_25 = 1, /* 1/4 usec */ + IEEE80211_HT_MPDU_DENSITY_0_5 = 2, /* 1/2 usec */ + IEEE80211_HT_MPDU_DENSITY_1 = 3, /* 1 usec */ + IEEE80211_HT_MPDU_DENSITY_2 = 4, /* 2 usec */ + IEEE80211_HT_MPDU_DENSITY_4 = 5, /* 4 usec */ + IEEE80211_HT_MPDU_DENSITY_8 = 6, /* 8 usec */ + IEEE80211_HT_MPDU_DENSITY_16 = 7 /* 16 usec */ +}; + /** * struct ieee80211_ht_info - HT information * -- cgit v1.2.3 From 3885123da8335dc6b67387e5e626acbffc56f664 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Fri, 10 Jul 2009 10:04:50 +0900 Subject: swiotlb: remove unused swiotlb_alloc_boot() Nobody uses swiotlb_alloc_boot(). Signed-off-by: FUJITA Tomonori Acked-by: Becky Bruce --- arch/x86/kernel/pci-swiotlb.c | 5 ----- include/linux/swiotlb.h | 2 -- lib/swiotlb.c | 7 +------ 3 files changed, 1 insertion(+), 13 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index 6af96ee44200..0ac7cd524788 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c @@ -13,11 +13,6 @@ int swiotlb __read_mostly; -void * __init swiotlb_alloc_boot(size_t size, unsigned long nslabs) -{ - return alloc_bootmem_low_pages(size); -} - void *swiotlb_alloc(unsigned order, unsigned long nslabs) { return (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, order); diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index cb1a6631b8f4..94db70444c17 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -14,7 +14,6 @@ struct scatterlist; */ #define IO_TLB_SEGSIZE 128 - /* * log of the size of each IO TLB slab. The number of slabs is command line * controllable. @@ -24,7 +23,6 @@ struct scatterlist; extern void swiotlb_init(void); -extern void *swiotlb_alloc_boot(size_t bytes, unsigned long nslabs); extern void *swiotlb_alloc(unsigned order, unsigned long nslabs); extern dma_addr_t swiotlb_phys_to_bus(struct device *hwdev, diff --git a/lib/swiotlb.c b/lib/swiotlb.c index bffe6d7ef9d9..9edfdd442edc 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -114,11 +114,6 @@ setup_io_tlb_npages(char *str) __setup("swiotlb=", setup_io_tlb_npages); /* make io_tlb_overflow tunable too? */ -void * __weak __init swiotlb_alloc_boot(size_t size, unsigned long nslabs) -{ - return alloc_bootmem_low_pages(size); -} - void * __weak swiotlb_alloc(unsigned order, unsigned long nslabs) { return (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, order); @@ -189,7 +184,7 @@ swiotlb_init_with_default_size(size_t default_size) /* * Get IO TLB memory from the low pages */ - io_tlb_start = swiotlb_alloc_boot(bytes, io_tlb_nslabs); + io_tlb_start = alloc_bootmem_low_pages(bytes); if (!io_tlb_start) panic("Cannot allocate SWIOTLB buffer"); io_tlb_end = io_tlb_start + bytes; -- cgit v1.2.3 From bb52196be37ce154ddc50b1f39496146d181cbe7 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Fri, 10 Jul 2009 10:04:51 +0900 Subject: swiotlb: remove unused swiotlb_alloc() Nobody uses swiotlb_alloc(). Signed-off-by: FUJITA Tomonori Acked-by: Becky Bruce --- arch/x86/kernel/pci-swiotlb.c | 5 ----- include/linux/swiotlb.h | 2 -- lib/swiotlb.c | 8 ++------ 3 files changed, 2 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index 0ac7cd524788..ea675cfe76fe 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c @@ -13,11 +13,6 @@ int swiotlb __read_mostly; -void *swiotlb_alloc(unsigned order, unsigned long nslabs) -{ - return (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, order); -} - dma_addr_t swiotlb_phys_to_bus(struct device *hwdev, phys_addr_t paddr) { return paddr; diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 94db70444c17..6bc50944040f 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -23,8 +23,6 @@ struct scatterlist; extern void swiotlb_init(void); -extern void *swiotlb_alloc(unsigned order, unsigned long nslabs); - extern dma_addr_t swiotlb_phys_to_bus(struct device *hwdev, phys_addr_t address); extern phys_addr_t swiotlb_bus_to_phys(struct device *hwdev, diff --git a/lib/swiotlb.c b/lib/swiotlb.c index 9edfdd442edc..3c4c21cdf43d 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -114,11 +114,6 @@ setup_io_tlb_npages(char *str) __setup("swiotlb=", setup_io_tlb_npages); /* make io_tlb_overflow tunable too? */ -void * __weak swiotlb_alloc(unsigned order, unsigned long nslabs) -{ - return (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, order); -} - dma_addr_t __weak swiotlb_phys_to_bus(struct device *hwdev, phys_addr_t paddr) { return paddr; @@ -240,7 +235,8 @@ swiotlb_late_init_with_default_size(size_t default_size) bytes = io_tlb_nslabs << IO_TLB_SHIFT; while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) { - io_tlb_start = swiotlb_alloc(order, io_tlb_nslabs); + io_tlb_start = (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, + order); if (io_tlb_start) break; order--; -- cgit v1.2.3 From cf56e3f2e8a8d5b7bc719980869b0e7985c256f3 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Fri, 10 Jul 2009 10:04:52 +0900 Subject: swiotlb: remove swiotlb_arch_range_needs_mapping Nobody uses swiotlb_arch_range_needs_mapping(). Signed-off-by: FUJITA Tomonori Acked-by: Becky Bruce --- arch/x86/kernel/pci-swiotlb.c | 5 ----- include/linux/swiotlb.h | 2 -- lib/swiotlb.c | 15 ++------------- 3 files changed, 2 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index ea675cfe76fe..165bd7f93bb1 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c @@ -23,11 +23,6 @@ phys_addr_t swiotlb_bus_to_phys(struct device *hwdev, dma_addr_t baddr) return baddr; } -int __weak swiotlb_arch_range_needs_mapping(phys_addr_t paddr, size_t size) -{ - return 0; -} - static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size, dma_addr_t *dma_handle, gfp_t flags) { diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 6bc50944040f..a977da24f17c 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -28,8 +28,6 @@ extern dma_addr_t swiotlb_phys_to_bus(struct device *hwdev, extern phys_addr_t swiotlb_bus_to_phys(struct device *hwdev, dma_addr_t address); -extern int swiotlb_arch_range_needs_mapping(phys_addr_t paddr, size_t size); - extern void *swiotlb_alloc_coherent(struct device *hwdev, size_t size, dma_addr_t *dma_handle, gfp_t flags); diff --git a/lib/swiotlb.c b/lib/swiotlb.c index 3c4c21cdf43d..dc1cd1f5369e 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -141,11 +141,6 @@ int __weak swiotlb_arch_address_needs_mapping(struct device *hwdev, return !is_buffer_dma_capable(dma_get_mask(hwdev), addr, size); } -int __weak swiotlb_arch_range_needs_mapping(phys_addr_t paddr, size_t size) -{ - return 0; -} - static void swiotlb_print_info(unsigned long bytes) { phys_addr_t pstart, pend; @@ -312,11 +307,6 @@ address_needs_mapping(struct device *hwdev, dma_addr_t addr, size_t size) return swiotlb_arch_address_needs_mapping(hwdev, addr, size); } -static inline int range_needs_mapping(phys_addr_t paddr, size_t size) -{ - return swiotlb_force || swiotlb_arch_range_needs_mapping(paddr, size); -} - static int is_swiotlb_buffer(char *addr) { return addr >= io_tlb_start && addr < io_tlb_end; @@ -646,8 +636,7 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, * we can safely return the device addr and not worry about bounce * buffering it. */ - if (!address_needs_mapping(dev, dev_addr, size) && - !range_needs_mapping(phys, size)) + if (!address_needs_mapping(dev, dev_addr, size) && !swiotlb_force) return dev_addr; /* @@ -810,7 +799,7 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems, phys_addr_t paddr = sg_phys(sg); dma_addr_t dev_addr = swiotlb_phys_to_bus(hwdev, paddr); - if (range_needs_mapping(paddr, sg->length) || + if (swiotlb_force || address_needs_mapping(hwdev, dev_addr, sg->length)) { void *map = map_single(hwdev, sg_phys(sg), sg->length, dir); -- cgit v1.2.3 From 8f2502fd8157632909ff335a3c628e7caeec5e03 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Fri, 10 Jul 2009 10:05:00 +0900 Subject: remove is_buffer_dma_capable() is_buffer_dma_capable() was replaced with dma_capable(). is_buffer_dma_capable() tells if a buffer is dma-capable or not. However, it doesn't take a pointer to struct device so it doesn't work for POWERPC. Signed-off-by: FUJITA Tomonori Acked-by: Becky Bruce --- include/linux/dma-mapping.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 07dfd460d286..c0f6c3cd788c 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -98,11 +98,6 @@ static inline int is_device_dma_capable(struct device *dev) return dev->dma_mask != NULL && *dev->dma_mask != DMA_MASK_NONE; } -static inline int is_buffer_dma_capable(u64 mask, dma_addr_t addr, size_t size) -{ - return addr + size <= mask; -} - #ifdef CONFIG_HAS_DMA #include #else -- cgit v1.2.3 From 862d196b27bd30a5ab8109d5cdb37980d81b1fe9 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Fri, 10 Jul 2009 10:05:02 +0900 Subject: swiotlb: use phys_to_dma and dma_to_phys This converts swiotlb to use phys_to_dma and dma_to_phys instead of swiotlb_phys_to_bus() and swiotlb_bus_to_phys(). swiotlb_phys_to_bus() and swiotlb_bus_to_phys() are not necessary so this patch also removes them. Signed-off-by: FUJITA Tomonori Acked-by: Becky Bruce --- include/linux/swiotlb.h | 5 ----- lib/swiotlb.c | 22 ++++++---------------- 2 files changed, 6 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index a977da24f17c..73b1f1cec423 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -23,11 +23,6 @@ struct scatterlist; extern void swiotlb_init(void); -extern dma_addr_t swiotlb_phys_to_bus(struct device *hwdev, - phys_addr_t address); -extern phys_addr_t swiotlb_bus_to_phys(struct device *hwdev, - dma_addr_t address); - extern void *swiotlb_alloc_coherent(struct device *hwdev, size_t size, dma_addr_t *dma_handle, gfp_t flags); diff --git a/lib/swiotlb.c b/lib/swiotlb.c index a012d93792b7..9e2fe3e1804c 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -114,21 +114,11 @@ setup_io_tlb_npages(char *str) __setup("swiotlb=", setup_io_tlb_npages); /* make io_tlb_overflow tunable too? */ -dma_addr_t __weak swiotlb_phys_to_bus(struct device *hwdev, phys_addr_t paddr) -{ - return paddr; -} - -phys_addr_t __weak swiotlb_bus_to_phys(struct device *hwdev, dma_addr_t baddr) -{ - return baddr; -} - /* Note that this doesn't work with highmem page */ static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev, volatile void *address) { - return swiotlb_phys_to_bus(hwdev, virt_to_phys(address)); + return phys_to_dma(hwdev, virt_to_phys(address)); } static void swiotlb_print_info(unsigned long bytes) @@ -567,7 +557,7 @@ void swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr, dma_addr_t dev_addr) { - phys_addr_t paddr = swiotlb_bus_to_phys(hwdev, dev_addr); + phys_addr_t paddr = dma_to_phys(hwdev, dev_addr); WARN_ON(irqs_disabled()); if (!is_swiotlb_buffer(paddr)) @@ -612,7 +602,7 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, struct dma_attrs *attrs) { phys_addr_t phys = page_to_phys(page) + offset; - dma_addr_t dev_addr = swiotlb_phys_to_bus(dev, phys); + dma_addr_t dev_addr = phys_to_dma(dev, phys); void *map; BUG_ON(dir == DMA_NONE); @@ -656,7 +646,7 @@ EXPORT_SYMBOL_GPL(swiotlb_map_page); static void unmap_single(struct device *hwdev, dma_addr_t dev_addr, size_t size, int dir) { - phys_addr_t paddr = swiotlb_bus_to_phys(hwdev, dev_addr); + phys_addr_t paddr = dma_to_phys(hwdev, dev_addr); BUG_ON(dir == DMA_NONE); @@ -699,7 +689,7 @@ static void swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr, size_t size, int dir, int target) { - phys_addr_t paddr = swiotlb_bus_to_phys(hwdev, dev_addr); + phys_addr_t paddr = dma_to_phys(hwdev, dev_addr); BUG_ON(dir == DMA_NONE); @@ -788,7 +778,7 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems, for_each_sg(sgl, sg, nelems, i) { phys_addr_t paddr = sg_phys(sg); - dma_addr_t dev_addr = swiotlb_phys_to_bus(hwdev, paddr); + dma_addr_t dev_addr = phys_to_dma(hwdev, paddr); if (swiotlb_force || !dma_capable(hwdev, dev_addr, sg->length)) { -- cgit v1.2.3 From 0c193054a4c1cf190d2f23e5e91bd14402e43912 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Mon, 27 Jul 2009 19:09:19 -0400 Subject: nfsd41: hange from page to memory based drc limits NFSD_SLOT_CACHE_SIZE is the size of all encoded operation responses (excluding the sequence operation) that we want to cache. For now, keep NFSD_SLOT_CACHE_SIZE at PAGE_SIZE. It will be reduced when the DRC is changed from page based to memory based. Signed-off-by: Andy Adamson Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 28 +++++++++++++--------------- fs/nfsd/nfssvc.c | 13 ++++++------- include/linux/nfsd/nfsd.h | 4 ++-- include/linux/nfsd/state.h | 1 + 4 files changed, 22 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 70cba3fbfa6d..e2b11b1b515c 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -414,31 +414,31 @@ gen_sessionid(struct nfsd4_session *ses) /* * Give the client the number of slots it requests bound by - * NFSD_MAX_SLOTS_PER_SESSION and by sv_drc_max_pages. + * NFSD_MAX_SLOTS_PER_SESSION and by nfsd_drc_max_mem. * - * If we run out of pages (sv_drc_pages_used == sv_drc_max_pages) we - * should (up to a point) re-negotiate active sessions and reduce their - * slot usage to make rooom for new connections. For now we just fail the - * create session. + * If we run out of reserved DRC memory we should (up to a point) re-negotiate + * active sessions and reduce their slot usage to make rooom for new + * connections. For now we just fail the create session. */ static int set_forechannel_maxreqs(struct nfsd4_channel_attrs *fchan) { - int np; + int mem; if (fchan->maxreqs < 1) return nfserr_inval; else if (fchan->maxreqs > NFSD_MAX_SLOTS_PER_SESSION) fchan->maxreqs = NFSD_MAX_SLOTS_PER_SESSION; - np = fchan->maxreqs * NFSD_PAGES_PER_SLOT; + mem = fchan->maxreqs * NFSD_SLOT_CACHE_SIZE; spin_lock(&nfsd_drc_lock); - if (np + nfsd_drc_pages_used > nfsd_drc_max_pages) - np = nfsd_drc_max_pages - nfsd_drc_pages_used; - nfsd_drc_pages_used += np; + if (mem + nfsd_drc_mem_used > nfsd_drc_max_mem) + mem = ((nfsd_drc_max_mem - nfsd_drc_mem_used) / + NFSD_SLOT_CACHE_SIZE) * NFSD_SLOT_CACHE_SIZE; + nfsd_drc_mem_used += mem; spin_unlock(&nfsd_drc_lock); - fchan->maxreqs = np / NFSD_PAGES_PER_SLOT; + fchan->maxreqs = mem / NFSD_SLOT_CACHE_SIZE; if (fchan->maxreqs == 0) return nfserr_resource; return 0; @@ -465,9 +465,7 @@ static int init_forechannel_attrs(struct svc_rqst *rqstp, fchan->maxresp_sz = maxcount; session_fchan->maxresp_sz = fchan->maxresp_sz; - /* Set the max response cached size our default which is - * a multiple of PAGE_SIZE and small */ - session_fchan->maxresp_cached = NFSD_PAGES_PER_SLOT * PAGE_SIZE; + session_fchan->maxresp_cached = NFSD_SLOT_CACHE_SIZE; fchan->maxresp_cached = session_fchan->maxresp_cached; /* Use the client's maxops if possible */ @@ -585,7 +583,7 @@ free_session(struct kref *kref) nfsd4_release_respages(e->ce_respages, e->ce_resused); } spin_lock(&nfsd_drc_lock); - nfsd_drc_pages_used -= ses->se_fchannel.maxreqs * NFSD_PAGES_PER_SLOT; + nfsd_drc_mem_used -= ses->se_fchannel.maxreqs * NFSD_SLOT_CACHE_SIZE; spin_unlock(&nfsd_drc_lock); kfree(ses); } diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 9be2a1932f8a..5a280a9cb540 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -74,8 +74,8 @@ struct svc_serv *nfsd_serv; * nfsd_drc_pages_used tracks the current version 4.1 DRC memory usage. */ spinlock_t nfsd_drc_lock; -unsigned int nfsd_drc_max_pages; -unsigned int nfsd_drc_pages_used; +unsigned int nfsd_drc_max_mem; +unsigned int nfsd_drc_mem_used; #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) static struct svc_stat nfsd_acl_svcstats; @@ -247,12 +247,11 @@ void nfsd_reset_versions(void) static void set_max_drc(void) { #define NFSD_DRC_SIZE_SHIFT 10 - nfsd_drc_max_pages = nr_free_buffer_pages() - >> NFSD_DRC_SIZE_SHIFT; - nfsd_drc_pages_used = 0; + nfsd_drc_max_mem = (nr_free_buffer_pages() + >> NFSD_DRC_SIZE_SHIFT) * PAGE_SIZE; + nfsd_drc_mem_used = 0; spin_lock_init(&nfsd_drc_lock); - dprintk("%s nfsd_drc_max_pages %u\n", __func__, - nfsd_drc_max_pages); + dprintk("%s nfsd_drc_max_mem %u \n", __func__, nfsd_drc_max_mem); } int nfsd_create_serv(void) diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h index 2571f856908f..2812ed52669d 100644 --- a/include/linux/nfsd/nfsd.h +++ b/include/linux/nfsd/nfsd.h @@ -57,8 +57,8 @@ extern u32 nfsd_supported_minorversion; extern struct mutex nfsd_mutex; extern struct svc_serv *nfsd_serv; extern spinlock_t nfsd_drc_lock; -extern unsigned int nfsd_drc_max_pages; -extern unsigned int nfsd_drc_pages_used; +extern unsigned int nfsd_drc_max_mem; +extern unsigned int nfsd_drc_mem_used; extern struct seq_operations nfs_exports_op; diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h index 57ab2ed08459..a6c87d623891 100644 --- a/include/linux/nfsd/state.h +++ b/include/linux/nfsd/state.h @@ -96,6 +96,7 @@ struct nfs4_cb_conn { #define NFSD_MAX_SLOTS_PER_SESSION 128 /* Maximum number of pages per slot cache entry */ #define NFSD_PAGES_PER_SLOT 1 +#define NFSD_SLOT_CACHE_SIZE PAGE_SIZE /* Maximum number of operations per session compound */ #define NFSD_MAX_OPS_PER_COMPOUND 16 -- cgit v1.2.3 From 49557cc74c7bdf6a984be227ead9a84b3a26f053 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Thu, 23 Jul 2009 19:02:16 -0400 Subject: nfsd41: Use separate DRC for setclientid Instead of trying to share the generic 4.1 reply cache code for the CREATE_SESSION reply cache, it's simpler to handle CREATE_SESSION separately. The nfs41 single slot clientid DRC holds the results of create session processing. CREATE_SESSION can be preceeded by a SEQUENCE operation (an embedded CREATE_SESSION) and the create session single slot cache must be maintained. nfsd4_replay_cache_entry() and nfsd4_store_cache_entry() do not implement the replay of an embedded CREATE_SESSION. The clientid DRC slot does not need the inuse, cachethis or other fields that the multiple slot session cache uses. Replace the clientid DRC cache struct nfs4_slot cache with a new nfsd4_clid_slot cache. Save the xdr struct nfsd4_create_session into the cache at the end of processing, and on a replay, replace the struct for the replay request with the cached version all while under the state lock. nfsd4_proc_compound will handle both the solo and embedded CREATE_SESSION case via the normal use of encode_operation. Errors that do not change the create session cache: A create session NFS4ERR_STALE_CLIENTID error means that a client record (and associated create session slot) could not be found and therefore can't be changed. NFSERR_SEQ_MISORDERED errors do not change the slot cache. All other errors get cached. Remove the clientid DRC specific check in nfs4svc_encode_compoundres to put the session only if cstate.session is set which will now always be true. Signed-off-by: Andy Adamson Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 2 +- fs/nfsd/nfs4state.c | 64 +++++++++++++++++++++++++++------------------- fs/nfsd/nfs4xdr.c | 3 +-- include/linux/nfsd/state.h | 21 ++++++++++++++- include/linux/nfsd/xdr4.h | 12 --------- 5 files changed, 60 insertions(+), 42 deletions(-) (limited to 'include/linux') diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index d781658e8084..d606c6a427de 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1120,7 +1120,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, BUG_ON(op->status == nfs_ok); encode_op: - /* Only from SEQUENCE or CREATE_SESSION */ + /* Only from SEQUENCE */ if (resp->cstate.status == nfserr_replay_cache) { dprintk("%s NFS4.1 replay from cache\n", __func__); if (nfsd4_not_cached(resp)) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 99df8e7a687b..7729d092c8a5 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -653,8 +653,6 @@ static inline void free_client(struct nfs4_client *clp) { shutdown_callback_client(clp); - nfsd4_release_respages(clp->cl_slot.sl_cache_entry.ce_respages, - clp->cl_slot.sl_cache_entry.ce_resused); if (clp->cl_cred.cr_group_info) put_group_info(clp->cl_cred.cr_group_info); kfree(clp->cl_principal); @@ -1293,12 +1291,11 @@ out_copy: exid->clientid.cl_boot = new->cl_clientid.cl_boot; exid->clientid.cl_id = new->cl_clientid.cl_id; - new->cl_slot.sl_seqid = 0; exid->seqid = 1; nfsd4_set_ex_flags(new, exid); dprintk("nfsd4_exchange_id seqid %d flags %x\n", - new->cl_slot.sl_seqid, new->cl_exchange_flags); + new->cl_cs_slot.sl_seqid, new->cl_exchange_flags); status = nfs_ok; out: @@ -1334,15 +1331,35 @@ check_slot_seqid(u32 seqid, u32 slot_seqid, int slot_inuse) return nfserr_seq_misordered; } +/* + * Cache the create session result into the create session single DRC + * slot cache by saving the xdr structure. sl_seqid has been set. + * Do this for solo or embedded create session operations. + */ +static void +nfsd4_cache_create_session(struct nfsd4_create_session *cr_ses, + struct nfsd4_clid_slot *slot, int nfserr) +{ + slot->sl_status = nfserr; + memcpy(&slot->sl_cr_ses, cr_ses, sizeof(*cr_ses)); +} + +static __be32 +nfsd4_replay_create_session(struct nfsd4_create_session *cr_ses, + struct nfsd4_clid_slot *slot) +{ + memcpy(cr_ses, &slot->sl_cr_ses, sizeof(*cr_ses)); + return slot->sl_status; +} + __be32 nfsd4_create_session(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_create_session *cr_ses) { u32 ip_addr = svc_addr_in(rqstp)->sin_addr.s_addr; - struct nfsd4_compoundres *resp = rqstp->rq_resp; struct nfs4_client *conf, *unconf; - struct nfsd4_slot *slot = NULL; + struct nfsd4_clid_slot *cs_slot = NULL; int status = 0; nfs4_lock_state(); @@ -1350,25 +1367,22 @@ nfsd4_create_session(struct svc_rqst *rqstp, conf = find_confirmed_client(&cr_ses->clientid); if (conf) { - slot = &conf->cl_slot; - status = check_slot_seqid(cr_ses->seqid, slot->sl_seqid, - slot->sl_inuse); + cs_slot = &conf->cl_cs_slot; + status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0); if (status == nfserr_replay_cache) { dprintk("Got a create_session replay! seqid= %d\n", - slot->sl_seqid); - cstate->slot = slot; - cstate->status = status; + cs_slot->sl_seqid); /* Return the cached reply status */ - status = nfsd4_replay_cache_entry(resp, NULL); + status = nfsd4_replay_create_session(cr_ses, cs_slot); goto out; - } else if (cr_ses->seqid != conf->cl_slot.sl_seqid + 1) { + } else if (cr_ses->seqid != cs_slot->sl_seqid + 1) { status = nfserr_seq_misordered; dprintk("Sequence misordered!\n"); dprintk("Expected seqid= %d but got seqid= %d\n", - slot->sl_seqid, cr_ses->seqid); + cs_slot->sl_seqid, cr_ses->seqid); goto out; } - conf->cl_slot.sl_seqid++; + cs_slot->sl_seqid++; } else if (unconf) { if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred) || (ip_addr != unconf->cl_addr)) { @@ -1376,16 +1390,15 @@ nfsd4_create_session(struct svc_rqst *rqstp, goto out; } - slot = &unconf->cl_slot; - status = check_slot_seqid(cr_ses->seqid, slot->sl_seqid, - slot->sl_inuse); + cs_slot = &unconf->cl_cs_slot; + status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0); if (status) { /* an unconfirmed replay returns misordered */ status = nfserr_seq_misordered; - goto out; + goto out_cache; } - slot->sl_seqid++; /* from 0 to 1 */ + cs_slot->sl_seqid++; /* from 0 to 1 */ move_to_confirmed(unconf); /* @@ -1406,12 +1419,11 @@ nfsd4_create_session(struct svc_rqst *rqstp, memcpy(cr_ses->sessionid.data, conf->cl_sessionid.data, NFS4_MAX_SESSIONID_LEN); - cr_ses->seqid = slot->sl_seqid; + cr_ses->seqid = cs_slot->sl_seqid; - slot->sl_inuse = true; - cstate->slot = slot; - /* Ensure a page is used for the cache */ - slot->sl_cache_entry.ce_cachethis = 1; +out_cache: + /* cache solo and embedded create sessions under the state lock */ + nfsd4_cache_create_session(cr_ses, cs_slot, status); out: nfs4_unlock_state(); dprintk("%s returns %d\n", __func__, ntohl(status)); diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 2dcc7feaa6ff..fdf632bf1cfe 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3313,8 +3313,7 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo dprintk("%s: SET SLOT STATE TO AVAILABLE\n", __func__); resp->cstate.slot->sl_inuse = 0; } - if (resp->cstate.session) - nfsd4_put_session(resp->cstate.session); + nfsd4_put_session(resp->cstate.session); } return 1; } diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h index a6c87d623891..58bb19784e12 100644 --- a/include/linux/nfsd/state.h +++ b/include/linux/nfsd/state.h @@ -127,6 +127,25 @@ struct nfsd4_channel_attrs { u32 rdma_attrs; }; +struct nfsd4_create_session { + clientid_t clientid; + struct nfs4_sessionid sessionid; + u32 seqid; + u32 flags; + struct nfsd4_channel_attrs fore_channel; + struct nfsd4_channel_attrs back_channel; + u32 callback_prog; + u32 uid; + u32 gid; +}; + +/* The single slot clientid cache structure */ +struct nfsd4_clid_slot { + u32 sl_seqid; + __be32 sl_status; + struct nfsd4_create_session sl_cr_ses; +}; + struct nfsd4_session { struct kref se_ref; struct list_head se_hash; /* hash by sessionid */ @@ -193,7 +212,7 @@ struct nfs4_client { /* for nfs41 */ struct list_head cl_sessions; - struct nfsd4_slot cl_slot; /* create_session slot */ + struct nfsd4_clid_slot cl_cs_slot; /* create_session slot */ u32 cl_exchange_flags; struct nfs4_sessionid cl_sessionid; }; diff --git a/include/linux/nfsd/xdr4.h b/include/linux/nfsd/xdr4.h index 2bacf7535069..5e4beb0deb80 100644 --- a/include/linux/nfsd/xdr4.h +++ b/include/linux/nfsd/xdr4.h @@ -366,18 +366,6 @@ struct nfsd4_exchange_id { int spa_how; }; -struct nfsd4_create_session { - clientid_t clientid; - struct nfs4_sessionid sessionid; - u32 seqid; - u32 flags; - struct nfsd4_channel_attrs fore_channel; - struct nfsd4_channel_attrs back_channel; - u32 callback_prog; - u32 uid; - u32 gid; -}; - struct nfsd4_sequence { struct nfs4_sessionid sessionid; /* request/response */ u32 seqid; /* request/response */ -- cgit v1.2.3 From e5f5ccb646bc6009572b5c23201b5e81638ff150 Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Thu, 23 Jul 2009 20:35:53 +0200 Subject: power_supply: get_by_name and set_charged functionality This adds a function that indicates that a battery is fully charged. It also includes functions to get a power_supply device from the class of registered devices by name reference. These can be used to find a specific battery to call power_supply_set_battery_charged() on. Some battery drivers might need this information to calibrate themselves. Signed-off-by: Daniel Mack Cc: Ian Molton Cc: Anton Vorontsov Cc: Matt Reimer Signed-off-by: Anton Vorontsov --- drivers/power/power_supply_core.c | 28 ++++++++++++++++++++++++++++ include/linux/power_supply.h | 3 +++ 2 files changed, 31 insertions(+) (limited to 'include/linux') diff --git a/drivers/power/power_supply_core.c b/drivers/power/power_supply_core.c index 12cd6e36ff1d..cce75b40b435 100644 --- a/drivers/power/power_supply_core.c +++ b/drivers/power/power_supply_core.c @@ -116,6 +116,34 @@ int power_supply_is_system_supplied(void) } EXPORT_SYMBOL_GPL(power_supply_is_system_supplied); +int power_supply_set_battery_charged(struct power_supply *psy) +{ + if (psy->type == POWER_SUPPLY_TYPE_BATTERY && psy->set_charged) { + psy->set_charged(psy); + return 0; + } + + return -EINVAL; +} +EXPORT_SYMBOL_GPL(power_supply_set_battery_charged); + +static int power_supply_match_device_by_name(struct device *dev, void *data) +{ + const char *name = data; + struct power_supply *psy = dev_get_drvdata(dev); + + return strcmp(psy->name, name) == 0; +} + +struct power_supply *power_supply_get_by_name(char *name) +{ + struct device *dev = class_find_device(power_supply_class, NULL, name, + power_supply_match_device_by_name); + + return dev ? dev_get_drvdata(dev) : NULL; +} +EXPORT_SYMBOL_GPL(power_supply_get_by_name); + int power_supply_register(struct device *parent, struct power_supply *psy) { int rc = 0; diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 4c7c6fc35487..b5d096d3a9be 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -144,6 +144,7 @@ struct power_supply { enum power_supply_property psp, union power_supply_propval *val); void (*external_power_changed)(struct power_supply *psy); + void (*set_charged)(struct power_supply *psy); /* For APM emulation, think legacy userspace. */ int use_for_apm; @@ -183,8 +184,10 @@ struct power_supply_info { int use_for_apm; }; +extern struct power_supply *power_supply_get_by_name(char *name); extern void power_supply_changed(struct power_supply *psy); extern int power_supply_am_i_supplied(struct power_supply *psy); +extern int power_supply_set_battery_charged(struct power_supply *psy); #if defined(CONFIG_POWER_SUPPLY) || defined(CONFIG_POWER_SUPPLY_MODULE) extern int power_supply_is_system_supplied(void); -- cgit v1.2.3 From 78ddb2785980fc01b129c3547463266cae9c6ca9 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Thu, 30 Jul 2009 23:23:34 +0100 Subject: ARM: PL093: Header file for PL093 SSMC PrimeCell Header to define the standard registers for an PL093 SSMC memory controller. Signed-off-by: Ben Dooks Signed-off-by: Ben Dooks --- include/linux/amba/pl093.h | 80 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 include/linux/amba/pl093.h (limited to 'include/linux') diff --git a/include/linux/amba/pl093.h b/include/linux/amba/pl093.h new file mode 100644 index 000000000000..2983e3671adb --- /dev/null +++ b/include/linux/amba/pl093.h @@ -0,0 +1,80 @@ +/* linux/amba/pl093.h + * + * Copyright (c) 2008 Simtec Electronics + * http://armlinux.simtec.co.uk/ + * Ben Dooks + * + * AMBA PL093 SSMC (synchronous static memory controller) + * See DDI0236.pdf (r0p4) for more details + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. +*/ + +#define SMB_BANK(x) ((x) * 0x20) /* each bank control set is 0x20 apart */ + +/* Offsets for SMBxxxxRy registers */ + +#define SMBIDCYR (0x00) +#define SMBWSTRDR (0x04) +#define SMBWSTWRR (0x08) +#define SMBWSTOENR (0x0C) +#define SMBWSTWENR (0x10) +#define SMBCR (0x14) +#define SMBSR (0x18) +#define SMBWSTBRDR (0x1C) + +/* Masks for SMB registers */ +#define IDCY_MASK (0xf) +#define WSTRD_MASK (0xf) +#define WSTWR_MASK (0xf) +#define WSTOEN_MASK (0xf) +#define WSTWEN_MASK (0xf) + +/* Notes from datasheet: + * WSTOEN <= WSTRD + * WSTWEN <= WSTWR + * + * WSTOEN is not used with nWAIT + */ + +/* SMBCR bit definitions */ +#define SMBCR_BIWRITEEN (1 << 21) +#define SMBCR_ADDRVALIDWRITEEN (1 << 20) +#define SMBCR_SYNCWRITE (1 << 17) +#define SMBCR_BMWRITE (1 << 16) +#define SMBCR_WRAPREAD (1 << 14) +#define SMBCR_BIREADEN (1 << 13) +#define SMBCR_ADDRVALIDREADEN (1 << 12) +#define SMBCR_SYNCREAD (1 << 9) +#define SMBCR_BMREAD (1 << 8) +#define SMBCR_SMBLSPOL (1 << 6) +#define SMBCR_WP (1 << 3) +#define SMBCR_WAITEN (1 << 2) +#define SMBCR_WAITPOL (1 << 1) +#define SMBCR_RBLE (1 << 0) + +#define SMBCR_BURSTLENWRITE_MASK (3 << 18) +#define SMBCR_BURSTLENWRITE_4 (0 << 18) +#define SMBCR_BURSTLENWRITE_8 (1 << 18) +#define SMBCR_BURSTLENWRITE_RESERVED (2 << 18) +#define SMBCR_BURSTLENWRITE_CONTINUOUS (3 << 18) + +#define SMBCR_BURSTLENREAD_MASK (3 << 10) +#define SMBCR_BURSTLENREAD_4 (0 << 10) +#define SMBCR_BURSTLENREAD_8 (1 << 10) +#define SMBCR_BURSTLENREAD_16 (2 << 10) +#define SMBCR_BURSTLENREAD_CONTINUOUS (3 << 10) + +#define SMBCR_MW_MASK (3 << 4) +#define SMBCR_MW_8BIT (0 << 4) +#define SMBCR_MW_16BIT (1 << 4) +#define SMBCR_MW_M32BIT (2 << 4) + +/* SSMC status registers */ +#define SSMCCSR (0x200) +#define SSMCCR (0x204) +#define SSMCITCR (0x208) +#define SSMCITIP (0x20C) +#define SSMCITIOP (0x210) -- cgit v1.2.3 From 78ed73e84d132e2d556eda90e5b76620f4390ff2 Mon Sep 17 00:00:00 2001 From: Janusz Krzysztofik Date: Wed, 22 Jul 2009 05:22:37 +0200 Subject: TTY: Add definition of a new line discipline required by Amstrad E3 (Delta) ASoC driver This patch adds new line discipline name an number to include/linux/tty.h. The line discipline will be used by the Amstrad E3 (Delta) sound driver that will come next in this series of patches. Created against linux-2.6.31-rc3. Applies to linux-omap-2.6 commit 7c5cb7862d32cb344be7831d466535d5255e35ac as well. Signed-off-by: Janusz Krzysztofik Acked-by: Alan Cox Signed-off-by: Mark Brown --- include/linux/tty.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/tty.h b/include/linux/tty.h index 1488d8c81aac..26af9816391f 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -23,7 +23,7 @@ */ #define NR_UNIX98_PTY_DEFAULT 4096 /* Default maximum for Unix98 ptys */ #define NR_UNIX98_PTY_MAX (1 << MINORBITS) /* Absolute limit */ -#define NR_LDISCS 19 +#define NR_LDISCS 20 /* line disciplines */ #define N_TTY 0 @@ -47,6 +47,8 @@ #define N_SLCAN 17 /* Serial / USB serial CAN Adaptors */ #define N_PPS 18 /* Pulse per Second */ +#define N_AMSDELTA 19 /* codec control over modem, board specific */ + /* * This character is the same as _POSIX_VDISABLE: it cannot be used as * a c_cc[] character, but indicates that a particular special character -- cgit v1.2.3 From 716a42348cdaf04534b15fbdc9c83e25baebfed5 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 24 Jul 2009 20:05:23 +0200 Subject: sched: Fix cond_resched_lock() in !CONFIG_PREEMPT The might_sleep() test inside cond_resched_lock() assumes the spinlock is held and then preemption is disabled. This is true with CONFIG_PREEMPT but the preempt_count() doesn't change otherwise. Check by starting from the appropriate preempt offset depending on the config. Reported-by: Li Zefan Signed-off-by: Frederic Weisbecker Signed-off-by: Peter Zijlstra LKML-Reference: <1248458723-12146-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- include/linux/sched.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index cbbfca69aa4a..c472414953bf 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2293,8 +2293,14 @@ extern int _cond_resched(void); extern int __cond_resched_lock(spinlock_t *lock); +#ifdef CONFIG_PREEMPT +#define PREEMPT_LOCK_OFFSET PREEMPT_OFFSET +#else +#define PREEMPT_LOCK_OFFSET 0 +#endif + #define cond_resched_lock(lock) ({ \ - __might_sleep(__FILE__, __LINE__, PREEMPT_OFFSET); \ + __might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET); \ __cond_resched_lock(lock); \ }) -- cgit v1.2.3 From 3f029d3c6d62068d59301d90c18dbde8ee402107 Mon Sep 17 00:00:00 2001 From: Gregory Haskins Date: Wed, 29 Jul 2009 11:08:47 -0400 Subject: sched: Enhance the pre/post scheduling logic We currently have an explicit "needs_post" vtable method which returns a stack variable for whether we should later run post-schedule. This leads to an awkward exchange of the variable as it bubbles back up out of the context switch. Peter Zijlstra observed that this information could be stored in the run-queue itself instead of handled on the stack. Therefore, we revert to the method of having context_switch return void, and update an internal rq->post_schedule variable when we require further processing. In addition, we fix a race condition where we try to access current->sched_class without holding the rq->lock. This is technically racy, as the sched-class could change out from under us. Instead, we reference the per-rq post_schedule variable with the runqueue unlocked, but with preemption disabled to see if we need to reacquire the rq->lock. Finally, we clean the code up slightly by removing the #ifdef CONFIG_SMP conditionals from the schedule() call, and implement some inline helper functions instead. This patch passes checkpatch, and rt-migrate. Signed-off-by: Gregory Haskins Signed-off-by: Peter Zijlstra LKML-Reference: <20090729150422.17691.55590.stgit@dev.haskins.net> Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 - kernel/sched.c | 82 +++++++++++++++++++++++++++++++-------------------- kernel/sched_rt.c | 31 +++++++------------ 3 files changed, 61 insertions(+), 53 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 2c35bc29d2a9..195d72d5c102 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1047,7 +1047,6 @@ struct sched_class { struct rq *busiest, struct sched_domain *sd, enum cpu_idle_type idle); void (*pre_schedule) (struct rq *this_rq, struct task_struct *task); - int (*needs_post_schedule) (struct rq *this_rq); void (*post_schedule) (struct rq *this_rq); void (*task_wake_up) (struct rq *this_rq, struct task_struct *task); diff --git a/kernel/sched.c b/kernel/sched.c index a030d4514cdc..613fee54fc89 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -616,6 +616,7 @@ struct rq { unsigned char idle_at_tick; /* For active balancing */ + int post_schedule; int active_balance; int push_cpu; /* cpu of this runqueue: */ @@ -2839,17 +2840,11 @@ prepare_task_switch(struct rq *rq, struct task_struct *prev, * with the lock held can cause deadlocks; see schedule() for * details.) */ -static int finish_task_switch(struct rq *rq, struct task_struct *prev) +static void finish_task_switch(struct rq *rq, struct task_struct *prev) __releases(rq->lock) { struct mm_struct *mm = rq->prev_mm; long prev_state; - int post_schedule = 0; - -#ifdef CONFIG_SMP - if (current->sched_class->needs_post_schedule) - post_schedule = current->sched_class->needs_post_schedule(rq); -#endif rq->prev_mm = NULL; @@ -2880,10 +2875,44 @@ static int finish_task_switch(struct rq *rq, struct task_struct *prev) kprobe_flush_task(prev); put_task_struct(prev); } +} + +#ifdef CONFIG_SMP + +/* assumes rq->lock is held */ +static inline void pre_schedule(struct rq *rq, struct task_struct *prev) +{ + if (prev->sched_class->pre_schedule) + prev->sched_class->pre_schedule(rq, prev); +} + +/* rq->lock is NOT held, but preemption is disabled */ +static inline void post_schedule(struct rq *rq) +{ + if (rq->post_schedule) { + unsigned long flags; + + spin_lock_irqsave(&rq->lock, flags); + if (rq->curr->sched_class->post_schedule) + rq->curr->sched_class->post_schedule(rq); + spin_unlock_irqrestore(&rq->lock, flags); + + rq->post_schedule = 0; + } +} + +#else - return post_schedule; +static inline void pre_schedule(struct rq *rq, struct task_struct *p) +{ +} + +static inline void post_schedule(struct rq *rq) +{ } +#endif + /** * schedule_tail - first thing a freshly forked thread must call. * @prev: the thread we just switched away from. @@ -2892,14 +2921,14 @@ asmlinkage void schedule_tail(struct task_struct *prev) __releases(rq->lock) { struct rq *rq = this_rq(); - int post_schedule; - post_schedule = finish_task_switch(rq, prev); + finish_task_switch(rq, prev); -#ifdef CONFIG_SMP - if (post_schedule) - current->sched_class->post_schedule(rq); -#endif + /* + * FIXME: do we need to worry about rq being invalidated by the + * task_switch? + */ + post_schedule(rq); #ifdef __ARCH_WANT_UNLOCKED_CTXSW /* In this case, finish_task_switch does not reenable preemption */ @@ -2913,7 +2942,7 @@ asmlinkage void schedule_tail(struct task_struct *prev) * context_switch - switch to the new MM and the new * thread's register state. */ -static inline int +static inline void context_switch(struct rq *rq, struct task_struct *prev, struct task_struct *next) { @@ -2960,7 +2989,7 @@ context_switch(struct rq *rq, struct task_struct *prev, * CPUs since it called schedule(), thus the 'rq' on its stack * frame will be invalid. */ - return finish_task_switch(this_rq(), prev); + finish_task_switch(this_rq(), prev); } /* @@ -5371,7 +5400,6 @@ asmlinkage void __sched schedule(void) { struct task_struct *prev, *next; unsigned long *switch_count; - int post_schedule = 0; struct rq *rq; int cpu; @@ -5403,10 +5431,7 @@ need_resched_nonpreemptible: switch_count = &prev->nvcsw; } -#ifdef CONFIG_SMP - if (prev->sched_class->pre_schedule) - prev->sched_class->pre_schedule(rq, prev); -#endif + pre_schedule(rq, prev); if (unlikely(!rq->nr_running)) idle_balance(cpu, rq); @@ -5422,25 +5447,17 @@ need_resched_nonpreemptible: rq->curr = next; ++*switch_count; - post_schedule = context_switch(rq, prev, next); /* unlocks the rq */ + context_switch(rq, prev, next); /* unlocks the rq */ /* * the context switch might have flipped the stack from under * us, hence refresh the local variables. */ cpu = smp_processor_id(); rq = cpu_rq(cpu); - } else { -#ifdef CONFIG_SMP - if (current->sched_class->needs_post_schedule) - post_schedule = current->sched_class->needs_post_schedule(rq); -#endif + } else spin_unlock_irq(&rq->lock); - } -#ifdef CONFIG_SMP - if (post_schedule) - current->sched_class->post_schedule(rq); -#endif + post_schedule(rq); if (unlikely(reacquire_kernel_lock(current) < 0)) goto need_resched_nonpreemptible; @@ -9403,6 +9420,7 @@ void __init sched_init(void) #ifdef CONFIG_SMP rq->sd = NULL; rq->rd = NULL; + rq->post_schedule = 0; rq->active_balance = 0; rq->next_balance = jiffies; rq->push_cpu = 0; diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 3918e01994e0..a8f89bc3e5eb 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -1056,6 +1056,11 @@ static struct task_struct *_pick_next_task_rt(struct rq *rq) return p; } +static inline int has_pushable_tasks(struct rq *rq) +{ + return !plist_head_empty(&rq->rt.pushable_tasks); +} + static struct task_struct *pick_next_task_rt(struct rq *rq) { struct task_struct *p = _pick_next_task_rt(rq); @@ -1064,6 +1069,12 @@ static struct task_struct *pick_next_task_rt(struct rq *rq) if (p) dequeue_pushable_task(rq, p); + /* + * We detect this state here so that we can avoid taking the RQ + * lock again later if there is no need to push + */ + rq->post_schedule = has_pushable_tasks(rq); + return p; } @@ -1262,11 +1273,6 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq) return lowest_rq; } -static inline int has_pushable_tasks(struct rq *rq) -{ - return !plist_head_empty(&rq->rt.pushable_tasks); -} - static struct task_struct *pick_next_pushable_task(struct rq *rq) { struct task_struct *p; @@ -1466,23 +1472,9 @@ static void pre_schedule_rt(struct rq *rq, struct task_struct *prev) pull_rt_task(rq); } -/* - * assumes rq->lock is held - */ -static int needs_post_schedule_rt(struct rq *rq) -{ - return has_pushable_tasks(rq); -} - static void post_schedule_rt(struct rq *rq) { - /* - * This is only called if needs_post_schedule_rt() indicates that - * we need to push tasks away - */ - spin_lock_irq(&rq->lock); push_rt_tasks(rq); - spin_unlock_irq(&rq->lock); } /* @@ -1758,7 +1750,6 @@ static const struct sched_class rt_sched_class = { .rq_online = rq_online_rt, .rq_offline = rq_offline_rt, .pre_schedule = pre_schedule_rt, - .needs_post_schedule = needs_post_schedule_rt, .post_schedule = post_schedule_rt, .task_wake_up = task_wake_up_rt, .switched_from = switched_from_rt, -- cgit v1.2.3 From f607c6685774811b8112e124f10a053d77015485 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 20 Jul 2009 19:16:29 +0200 Subject: lockdep: Introduce lockdep_assert_held() Add a lockdep helper to validate that we indeed are the owner of a lock. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- include/linux/lockdep.h | 8 ++++++++ kernel/lockdep.c | 33 +++++++++++++++++++++++++++++++++ kernel/sched.c | 2 ++ 3 files changed, 43 insertions(+) (limited to 'include/linux') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 12aabfcb45f6..a6d5e5e4d084 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -296,6 +296,10 @@ extern void lock_acquire(struct lockdep_map *lock, unsigned int subclass, extern void lock_release(struct lockdep_map *lock, int nested, unsigned long ip); +#define lockdep_is_held(lock) lock_is_held(&(lock)->dep_map) + +extern int lock_is_held(struct lockdep_map *lock); + extern void lock_set_class(struct lockdep_map *lock, const char *name, struct lock_class_key *key, unsigned int subclass, unsigned long ip); @@ -314,6 +318,8 @@ extern void lockdep_trace_alloc(gfp_t mask); #define lockdep_depth(tsk) (debug_locks ? (tsk)->lockdep_depth : 0) +#define lockdep_assert_held(l) WARN_ON(debug_locks && !lockdep_is_held(l)) + #else /* !LOCKDEP */ static inline void lockdep_off(void) @@ -358,6 +364,8 @@ struct lock_class_key { }; #define lockdep_depth(tsk) (0) +#define lockdep_assert_held(l) do { } while (0) + #endif /* !LOCKDEP */ #ifdef CONFIG_LOCK_STAT diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 4b6cebe8ab31..28914a509914 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -3059,6 +3059,19 @@ __lock_release(struct lockdep_map *lock, int nested, unsigned long ip) check_chain_key(curr); } +static int __lock_is_held(struct lockdep_map *lock) +{ + struct task_struct *curr = current; + int i; + + for (i = 0; i < curr->lockdep_depth; i++) { + if (curr->held_locks[i].instance == lock) + return 1; + } + + return 0; +} + /* * Check whether we follow the irq-flags state precisely: */ @@ -3160,6 +3173,26 @@ void lock_release(struct lockdep_map *lock, int nested, } EXPORT_SYMBOL_GPL(lock_release); +int lock_is_held(struct lockdep_map *lock) +{ + unsigned long flags; + int ret = 0; + + if (unlikely(current->lockdep_recursion)) + return ret; + + raw_local_irq_save(flags); + check_flags(flags); + + current->lockdep_recursion = 1; + ret = __lock_is_held(lock); + current->lockdep_recursion = 0; + raw_local_irq_restore(flags); + + return ret; +} +EXPORT_SYMBOL_GPL(lock_is_held); + void lockdep_set_current_reclaim_state(gfp_t gfp_mask) { current->lockdep_reclaim_gfp = gfp_mask; diff --git a/kernel/sched.c b/kernel/sched.c index 1b59e265273b..2c75f7daa439 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -6609,6 +6609,8 @@ int cond_resched_lock(spinlock_t *lock) int resched = should_resched(); int ret = 0; + lockdep_assert_held(lock); + if (spin_needbreak(lock) || resched) { spin_unlock(lock); if (resched) -- cgit v1.2.3 From bb97a91e2549a7f2df9c21d32542582f549ab3ec Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 20 Jul 2009 19:15:35 +0200 Subject: lockdep: Deal with many similar locks spin_lock_nest_lock() allows to take many instances of the same class, this can easily lead to overflow of MAX_LOCK_DEPTH. To avoid this overflow, we'll stop accounting instances but start reference counting the class in the held_lock structure. [ We could maintain a list of instances, if we'd move the hlock stuff into __lock_acquired(), but that would require significant modifications to the current code. ] We restrict this mode to spin_lock_nest_lock() only, because it degrades the lockdep quality due to lost of instance. For lockstat this means we don't track lock statistics for any but the first lock in the series. Currently nesting is limited to 11 bits because that was the spare space available in held_lock. This yields a 2048 instances maximium. Signed-off-by: Peter Zijlstra Cc: Marcelo Tosatti Cc: Linus Torvalds Signed-off-by: Ingo Molnar --- include/linux/lockdep.h | 4 ++- kernel/lockdep.c | 89 ++++++++++++++++++++++++++++++++++++++++++------- 2 files changed, 80 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index a6d5e5e4d084..47d42eff6124 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -213,10 +213,12 @@ struct held_lock { * interrupt context: */ unsigned int irq_context:2; /* bit 0 - soft, bit 1 - hard */ - unsigned int trylock:1; + unsigned int trylock:1; /* 16 bits */ + unsigned int read:2; /* see lock_acquire() comment */ unsigned int check:2; /* see lock_acquire() comment */ unsigned int hardirqs_off:1; + unsigned int references:11; /* 32 bits */ }; /* diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 28914a509914..0bb246e21cd7 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -2708,13 +2708,15 @@ EXPORT_SYMBOL_GPL(lockdep_init_map); */ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, int trylock, int read, int check, int hardirqs_off, - struct lockdep_map *nest_lock, unsigned long ip) + struct lockdep_map *nest_lock, unsigned long ip, + int references) { struct task_struct *curr = current; struct lock_class *class = NULL; struct held_lock *hlock; unsigned int depth, id; int chain_head = 0; + int class_idx; u64 chain_key; if (!prove_locking) @@ -2762,10 +2764,24 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, if (DEBUG_LOCKS_WARN_ON(depth >= MAX_LOCK_DEPTH)) return 0; + class_idx = class - lock_classes + 1; + + if (depth) { + hlock = curr->held_locks + depth - 1; + if (hlock->class_idx == class_idx && nest_lock) { + if (hlock->references) + hlock->references++; + else + hlock->references = 2; + + return 1; + } + } + hlock = curr->held_locks + depth; if (DEBUG_LOCKS_WARN_ON(!class)) return 0; - hlock->class_idx = class - lock_classes + 1; + hlock->class_idx = class_idx; hlock->acquire_ip = ip; hlock->instance = lock; hlock->nest_lock = nest_lock; @@ -2773,6 +2789,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, hlock->read = read; hlock->check = check; hlock->hardirqs_off = !!hardirqs_off; + hlock->references = references; #ifdef CONFIG_LOCK_STAT hlock->waittime_stamp = 0; hlock->holdtime_stamp = sched_clock(); @@ -2881,6 +2898,30 @@ static int check_unlock(struct task_struct *curr, struct lockdep_map *lock, return 1; } +static int match_held_lock(struct held_lock *hlock, struct lockdep_map *lock) +{ + if (hlock->instance == lock) + return 1; + + if (hlock->references) { + struct lock_class *class = lock->class_cache; + + if (!class) + class = look_up_lock_class(lock, 0); + + if (DEBUG_LOCKS_WARN_ON(!class)) + return 0; + + if (DEBUG_LOCKS_WARN_ON(!hlock->nest_lock)) + return 0; + + if (hlock->class_idx == class - lock_classes + 1) + return 1; + } + + return 0; +} + static int __lock_set_class(struct lockdep_map *lock, const char *name, struct lock_class_key *key, unsigned int subclass, @@ -2904,7 +2945,7 @@ __lock_set_class(struct lockdep_map *lock, const char *name, */ if (prev_hlock && prev_hlock->irq_context != hlock->irq_context) break; - if (hlock->instance == lock) + if (match_held_lock(hlock, lock)) goto found_it; prev_hlock = hlock; } @@ -2923,7 +2964,8 @@ found_it: if (!__lock_acquire(hlock->instance, hlock_class(hlock)->subclass, hlock->trylock, hlock->read, hlock->check, hlock->hardirqs_off, - hlock->nest_lock, hlock->acquire_ip)) + hlock->nest_lock, hlock->acquire_ip, + hlock->references)) return 0; } @@ -2962,20 +3004,34 @@ lock_release_non_nested(struct task_struct *curr, */ if (prev_hlock && prev_hlock->irq_context != hlock->irq_context) break; - if (hlock->instance == lock) + if (match_held_lock(hlock, lock)) goto found_it; prev_hlock = hlock; } return print_unlock_inbalance_bug(curr, lock, ip); found_it: - lock_release_holdtime(hlock); + if (hlock->instance == lock) + lock_release_holdtime(hlock); + + if (hlock->references) { + hlock->references--; + if (hlock->references) { + /* + * We had, and after removing one, still have + * references, the current lock stack is still + * valid. We're done! + */ + return 1; + } + } /* * We have the right lock to unlock, 'hlock' points to it. * Now we remove it from the stack, and add back the other * entries (if any), recalculating the hash along the way: */ + curr->lockdep_depth = i; curr->curr_chain_key = hlock->prev_chain_key; @@ -2984,7 +3040,8 @@ found_it: if (!__lock_acquire(hlock->instance, hlock_class(hlock)->subclass, hlock->trylock, hlock->read, hlock->check, hlock->hardirqs_off, - hlock->nest_lock, hlock->acquire_ip)) + hlock->nest_lock, hlock->acquire_ip, + hlock->references)) return 0; } @@ -3014,7 +3071,7 @@ static int lock_release_nested(struct task_struct *curr, /* * Is the unlock non-nested: */ - if (hlock->instance != lock) + if (hlock->instance != lock || hlock->references) return lock_release_non_nested(curr, lock, ip); curr->lockdep_depth--; @@ -3065,7 +3122,9 @@ static int __lock_is_held(struct lockdep_map *lock) int i; for (i = 0; i < curr->lockdep_depth; i++) { - if (curr->held_locks[i].instance == lock) + struct held_lock *hlock = curr->held_locks + i; + + if (match_held_lock(hlock, lock)) return 1; } @@ -3148,7 +3207,7 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass, current->lockdep_recursion = 1; __lock_acquire(lock, subclass, trylock, read, check, - irqs_disabled_flags(flags), nest_lock, ip); + irqs_disabled_flags(flags), nest_lock, ip, 0); current->lockdep_recursion = 0; raw_local_irq_restore(flags); } @@ -3252,7 +3311,7 @@ __lock_contended(struct lockdep_map *lock, unsigned long ip) */ if (prev_hlock && prev_hlock->irq_context != hlock->irq_context) break; - if (hlock->instance == lock) + if (match_held_lock(hlock, lock)) goto found_it; prev_hlock = hlock; } @@ -3260,6 +3319,9 @@ __lock_contended(struct lockdep_map *lock, unsigned long ip) return; found_it: + if (hlock->instance != lock) + return; + hlock->waittime_stamp = sched_clock(); contention_point = lock_point(hlock_class(hlock)->contention_point, ip); @@ -3299,7 +3361,7 @@ __lock_acquired(struct lockdep_map *lock, unsigned long ip) */ if (prev_hlock && prev_hlock->irq_context != hlock->irq_context) break; - if (hlock->instance == lock) + if (match_held_lock(hlock, lock)) goto found_it; prev_hlock = hlock; } @@ -3307,6 +3369,9 @@ __lock_acquired(struct lockdep_map *lock, unsigned long ip) return; found_it: + if (hlock->instance != lock) + return; + cpu = smp_processor_id(); if (hlock->waittime_stamp) { now = sched_clock(); -- cgit v1.2.3 From e351b660fddd4df76cc4635f896d311ed0ff3752 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 22 Jul 2009 22:48:09 +0800 Subject: lockdep: Reintroduce generation count to make BFS faster We still can apply DaveM's generation count optimization to BFS, based on the following idea: - before doing each BFS, increase the global generation id by 1 - if one node in the graph has been visited, mark it as visited by storing the current global generation id into the node's dep_gen_id field - so we can decide if one node has been visited already, by comparing the node's dep_gen_id with the global generation id. By applying DaveM's generation count optimization to current implementation of BFS, we gain the following advantages: - we save MAX_LOCKDEP_ENTRIES/8 bytes memory; - we remove the bitmap_zero(bfs_accessed, MAX_LOCKDEP_ENTRIES); in each BFS, which is very time-consuming since MAX_LOCKDEP_ENTRIES may be very large.(16384UL) Signed-off-by: Ming Lei Signed-off-by: Peter Zijlstra Cc: "David S. Miller" LKML-Reference: <1248274089-6358-1-git-send-email-tom.leiming@gmail.com> Signed-off-by: Ingo Molnar --- include/linux/lockdep.h | 1 + kernel/lockdep.c | 11 ++++++----- 2 files changed, 7 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 47d42eff6124..9ccf0e286b2a 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -58,6 +58,7 @@ struct lock_class { struct lockdep_subclass_key *key; unsigned int subclass; + unsigned int dep_gen_id; /* * IRQ/softirq usage tracking bits: diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 0bb246e21cd7..2b443b5090a1 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -861,14 +861,15 @@ struct circular_queue { }; static struct circular_queue lock_cq; -static unsigned long bfs_accessed[BITS_TO_LONGS(MAX_LOCKDEP_ENTRIES)]; unsigned int max_bfs_queue_depth; +static unsigned int lockdep_dependency_gen_id; + static inline void __cq_init(struct circular_queue *cq) { cq->front = cq->rear = 0; - bitmap_zero(bfs_accessed, MAX_LOCKDEP_ENTRIES); + lockdep_dependency_gen_id++; } static inline int __cq_empty(struct circular_queue *cq) @@ -914,7 +915,7 @@ static inline void mark_lock_accessed(struct lock_list *lock, nr = lock - list_entries; WARN_ON(nr >= nr_list_entries); lock->parent = parent; - set_bit(nr, bfs_accessed); + lock->class->dep_gen_id = lockdep_dependency_gen_id; } static inline unsigned long lock_accessed(struct lock_list *lock) @@ -923,7 +924,7 @@ static inline unsigned long lock_accessed(struct lock_list *lock) nr = lock - list_entries; WARN_ON(nr >= nr_list_entries); - return test_bit(nr, bfs_accessed); + return lock->class->dep_gen_id == lockdep_dependency_gen_id; } static inline struct lock_list *get_lock_parent(struct lock_list *child) @@ -3604,7 +3605,7 @@ void __init lockdep_info(void) sizeof(struct lock_chain) * MAX_LOCKDEP_CHAINS + sizeof(struct list_head) * CHAINHASH_SIZE) / 1024 #ifdef CONFIG_PROVE_LOCKING - + sizeof(struct circular_queue) + sizeof(bfs_accessed) + + sizeof(struct circular_queue) #endif ); -- cgit v1.2.3 From 47cab6a722d44c71c4f8224017ef548522243cf4 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 3 Aug 2009 09:31:54 +0200 Subject: debug lockups: Improve lockup detection, fix generic arch fallback As Andrew noted, my previous patch ("debug lockups: Improve lockup detection") broke/removed SysRq-L support from architecture that do not provide a __trigger_all_cpu_backtrace implementation. Restore a fallback path and clean up the SysRq-L machinery a bit: - Rename the arch method to arch_trigger_all_cpu_backtrace() - Simplify the define - Document the method a bit - in the hope of more architectures adding support for it. [ The patch touches Sparc code for the rename. ] Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Andrew Morton Cc: Linus Torvalds Cc: "David S. Miller" LKML-Reference: <20090802140809.7ec4bb6b.akpm@linux-foundation.org> Signed-off-by: Ingo Molnar --- arch/sparc/include/asm/irq_64.h | 4 ++-- arch/sparc/kernel/process_64.c | 4 ++-- arch/x86/include/asm/nmi.h | 4 ++-- arch/x86/kernel/apic/nmi.c | 2 +- drivers/char/sysrq.c | 15 ++++++++++++++- include/linux/nmi.h | 19 +++++++++++++++++-- 6 files changed, 38 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/arch/sparc/include/asm/irq_64.h b/arch/sparc/include/asm/irq_64.h index 1934f2cbf513..a0b443cb3c1f 100644 --- a/arch/sparc/include/asm/irq_64.h +++ b/arch/sparc/include/asm/irq_64.h @@ -89,8 +89,8 @@ static inline unsigned long get_softint(void) return retval; } -void __trigger_all_cpu_backtrace(void); -#define trigger_all_cpu_backtrace() __trigger_all_cpu_backtrace() +void arch_trigger_all_cpu_backtrace(void); +#define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace extern void *hardirq_stack[NR_CPUS]; extern void *softirq_stack[NR_CPUS]; diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c index 4041f94e7724..18d67854a1b8 100644 --- a/arch/sparc/kernel/process_64.c +++ b/arch/sparc/kernel/process_64.c @@ -251,7 +251,7 @@ static void __global_reg_poll(struct global_reg_snapshot *gp) } } -void __trigger_all_cpu_backtrace(void) +void arch_trigger_all_cpu_backtrace(void) { struct thread_info *tp = current_thread_info(); struct pt_regs *regs = get_irq_regs(); @@ -304,7 +304,7 @@ void __trigger_all_cpu_backtrace(void) static void sysrq_handle_globreg(int key, struct tty_struct *tty) { - __trigger_all_cpu_backtrace(); + arch_trigger_all_cpu_backtrace(); } static struct sysrq_key_op sparc_globalreg_op = { diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h index c86e5ed4af51..e63cf7d441e1 100644 --- a/arch/x86/include/asm/nmi.h +++ b/arch/x86/include/asm/nmi.h @@ -45,8 +45,8 @@ extern int proc_nmi_enabled(struct ctl_table *, int , struct file *, void __user *, size_t *, loff_t *); extern int unknown_nmi_panic; -void __trigger_all_cpu_backtrace(void); -#define trigger_all_cpu_backtrace() __trigger_all_cpu_backtrace() +void arch_trigger_all_cpu_backtrace(void); +#define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace static inline void localise_nmi_watchdog(void) { diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c index 1bb1ac20e9ec..db7220220d09 100644 --- a/arch/x86/kernel/apic/nmi.c +++ b/arch/x86/kernel/apic/nmi.c @@ -554,7 +554,7 @@ int do_nmi_callback(struct pt_regs *regs, int cpu) return 0; } -void __trigger_all_cpu_backtrace(void) +void arch_trigger_all_cpu_backtrace(void) { int i; diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c index 165f307f30e8..50eecfe1d724 100644 --- a/drivers/char/sysrq.c +++ b/drivers/char/sysrq.c @@ -223,7 +223,20 @@ static DECLARE_WORK(sysrq_showallcpus, sysrq_showregs_othercpus); static void sysrq_handle_showallcpus(int key, struct tty_struct *tty) { - trigger_all_cpu_backtrace(); + /* + * Fall back to the workqueue based printing if the + * backtrace printing did not succeed or the + * architecture has no support for it: + */ + if (!trigger_all_cpu_backtrace()) { + struct pt_regs *regs = get_irq_regs(); + + if (regs) { + printk(KERN_INFO "CPU%d:\n", smp_processor_id()); + show_regs(regs); + } + schedule_work(&sysrq_showallcpus); + } } static struct sysrq_key_op sysrq_showallcpus_op = { diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 29af2d5df097..b752e807adde 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -28,8 +28,23 @@ static inline void acpi_nmi_disable(void) { } static inline void acpi_nmi_enable(void) { } #endif -#ifndef trigger_all_cpu_backtrace -#define trigger_all_cpu_backtrace() do { } while (0) +/* + * Create trigger_all_cpu_backtrace() out of the arch-provided + * base function. Return whether such support was available, + * to allow calling code to fall back to some other mechanism: + */ +#ifdef arch_trigger_all_cpu_backtrace +static inline bool trigger_all_cpu_backtrace(void) +{ + arch_trigger_all_cpu_backtrace(); + + return true; +} +#else +static inline bool trigger_all_cpu_backtrace(void) +{ + return false; +} #endif #endif -- cgit v1.2.3 From ff663cf8705bea101d5f73cf471855c85242575e Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Thu, 23 Jul 2009 17:25:49 +0100 Subject: agp: Add generic support for graphics dma remapping New driver hooks for support graphics memory dma remapping are introduced in this patch. It makes generic code can tell if current device needs dma remapping, then call driver provided interfaces for mapping and unmapping. Change has also been made to handle scratch_page in remapping case. Signed-off-by: Zhenyu Wang Signed-off-by: David Woodhouse --- drivers/char/agp/agp.h | 6 ++++++ drivers/char/agp/backend.c | 20 ++++++++++++++++++++ drivers/char/agp/generic.c | 9 +++++++++ include/linux/agp_backend.h | 6 +++++- 4 files changed, 40 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/char/agp/agp.h b/drivers/char/agp/agp.h index ce110a3bf298..17e6d0d3ba36 100644 --- a/drivers/char/agp/agp.h +++ b/drivers/char/agp/agp.h @@ -121,6 +121,11 @@ struct agp_bridge_driver { void (*agp_destroy_pages)(struct agp_memory *); int (*agp_type_to_mask_type) (struct agp_bridge_data *, int); void (*chipset_flush)(struct agp_bridge_data *); + + int (*agp_map_page)(void *addr, dma_addr_t *ret); + void (*agp_unmap_page)(void *addr, dma_addr_t dma); + int (*agp_map_memory)(struct agp_memory *mem); + void (*agp_unmap_memory)(struct agp_memory *mem); }; struct agp_bridge_data { @@ -135,6 +140,7 @@ struct agp_bridge_data { u32 *gatt_table_real; unsigned long scratch_page; unsigned long scratch_page_real; + dma_addr_t scratch_page_dma; unsigned long gart_bus_addr; unsigned long gatt_bus_addr; u32 mode; diff --git a/drivers/char/agp/backend.c b/drivers/char/agp/backend.c index 3bd7e503de41..19ac3663acdc 100644 --- a/drivers/char/agp/backend.c +++ b/drivers/char/agp/backend.c @@ -152,6 +152,15 @@ static int agp_backend_initialize(struct agp_bridge_data *bridge) bridge->scratch_page_real = phys_to_gart(page_to_phys(page)); bridge->scratch_page = bridge->driver->mask_memory(bridge, phys_to_gart(page_to_phys(page)), 0); + + if (bridge->driver->agp_map_page && + bridge->driver->agp_map_page(phys_to_virt(page_to_phys(page)), + &bridge->scratch_page_dma)) { + dev_err(&bridge->dev->dev, + "unable to dma-map scratch page\n"); + rc = -ENOMEM; + goto err_out_nounmap; + } } size_value = bridge->driver->fetch_size(); @@ -191,6 +200,13 @@ static int agp_backend_initialize(struct agp_bridge_data *bridge) return 0; err_out: + if (bridge->driver->needs_scratch_page && + bridge->driver->agp_unmap_page) { + void *va = gart_to_virt(bridge->scratch_page_real); + + bridge->driver->agp_unmap_page(va, bridge->scratch_page_dma); + } +err_out_nounmap: if (bridge->driver->needs_scratch_page) { void *va = gart_to_virt(bridge->scratch_page_real); @@ -221,6 +237,10 @@ static void agp_backend_cleanup(struct agp_bridge_data *bridge) bridge->driver->needs_scratch_page) { void *va = gart_to_virt(bridge->scratch_page_real); + if (bridge->driver->agp_unmap_page) + bridge->driver->agp_unmap_page(va, + bridge->scratch_page_dma); + bridge->driver->agp_destroy_page(va, AGP_PAGE_DESTROY_UNMAP); bridge->driver->agp_destroy_page(va, AGP_PAGE_DESTROY_FREE); } diff --git a/drivers/char/agp/generic.c b/drivers/char/agp/generic.c index a3bcc7ef42f9..28f0208c66a6 100644 --- a/drivers/char/agp/generic.c +++ b/drivers/char/agp/generic.c @@ -437,6 +437,12 @@ int agp_bind_memory(struct agp_memory *curr, off_t pg_start) curr->bridge->driver->cache_flush(); curr->is_flushed = true; } + + if (curr->bridge->driver->agp_map_memory) { + ret_val = curr->bridge->driver->agp_map_memory(curr); + if (ret_val) + return ret_val; + } ret_val = curr->bridge->driver->insert_memory(curr, pg_start, curr->type); if (ret_val != 0) @@ -478,6 +484,9 @@ int agp_unbind_memory(struct agp_memory *curr) if (ret_val != 0) return ret_val; + if (curr->bridge->driver->agp_unmap_memory) + curr->bridge->driver->agp_unmap_memory(curr); + curr->is_bound = false; curr->pg_start = 0; spin_lock(&curr->bridge->mapped_lock); diff --git a/include/linux/agp_backend.h b/include/linux/agp_backend.h index 76fa794fdac0..8a294d65b9b1 100644 --- a/include/linux/agp_backend.h +++ b/include/linux/agp_backend.h @@ -79,9 +79,13 @@ struct agp_memory { u32 physical; bool is_bound; bool is_flushed; - bool vmalloc_flag; + bool vmalloc_flag; + bool sg_vmalloc_flag; /* list of agp_memory mapped to the aperture */ struct list_head mapped_list; + /* DMA-mapped addresses */ + struct scatterlist *sg_list; + int num_sg; }; #define AGP_NORMAL_MEMORY 0 -- cgit v1.2.3 From f692775d7e0a22477143cd884e45c955448ac7d2 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 29 Jul 2009 09:28:45 +0100 Subject: intel-agp: fix sglist allocation to avoid vmalloc() Signed-off-by: David Woodhouse --- drivers/char/agp/intel-agp.c | 29 ++++++++++------------------- include/linux/agp_backend.h | 1 - 2 files changed, 10 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/drivers/char/agp/intel-agp.c b/drivers/char/agp/intel-agp.c index b9d9886ff3c3..d8c80d8be5e2 100644 --- a/drivers/char/agp/intel-agp.c +++ b/drivers/char/agp/intel-agp.c @@ -198,39 +198,30 @@ static void intel_agp_unmap_page(struct page *page, dma_addr_t dma) static void intel_agp_free_sglist(struct agp_memory *mem) { + struct sg_table st; + + st.sgl = mem->sg_list; + st.orig_nents = st.nents = mem->page_count; + + sg_free_table(&st); - if (mem->sg_vmalloc_flag) - vfree(mem->sg_list); - else - kfree(mem->sg_list); - mem->sg_vmalloc_flag = 0; mem->sg_list = NULL; mem->num_sg = 0; } static int intel_agp_map_memory(struct agp_memory *mem) { + struct sg_table st; struct scatterlist *sg; int i; DBG("try mapping %lu pages\n", (unsigned long)mem->page_count); - if ((mem->page_count * sizeof(*mem->sg_list)) < 2*PAGE_SIZE) - mem->sg_list = kcalloc(mem->page_count, sizeof(*mem->sg_list), - GFP_KERNEL); - - if (mem->sg_list == NULL) { - mem->sg_list = vmalloc(mem->page_count * sizeof(*mem->sg_list)); - mem->sg_vmalloc_flag = 1; - } - - if (!mem->sg_list) { - mem->sg_vmalloc_flag = 0; + if (sg_alloc_table(&st, mem->page_count, GFP_KERNEL)) return -ENOMEM; - } - sg_init_table(mem->sg_list, mem->page_count); - sg = mem->sg_list; + mem->sg_list = sg = st.sgl; + for (i = 0 ; i < mem->page_count; i++, sg = sg_next(sg)) sg_set_page(sg, mem->pages[i], PAGE_SIZE, 0); diff --git a/include/linux/agp_backend.h b/include/linux/agp_backend.h index 8a294d65b9b1..880130f7311f 100644 --- a/include/linux/agp_backend.h +++ b/include/linux/agp_backend.h @@ -80,7 +80,6 @@ struct agp_memory { bool is_bound; bool is_flushed; bool vmalloc_flag; - bool sg_vmalloc_flag; /* list of agp_memory mapped to the aperture */ struct list_head mapped_list; /* DMA-mapped addresses */ -- cgit v1.2.3 From 42c4ab41a176ee784c0f28c0b29025a8fc34f05a Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Wed, 29 Jul 2009 12:15:26 +0200 Subject: itimers: Merge ITIMER_VIRT and ITIMER_PROF Both cpu itimers have same data flow in the few places, this patch make unification of code related with VIRT and PROF itimers. Signed-off-by: Stanislaw Gruszka Acked-by: Peter Zijlstra Acked-by: Thomas Gleixner Cc: Oleg Nesterov Cc: Andrew Morton Cc: Paul Mackerras Cc: Benjamin Herrenschmidt LKML-Reference: <1248862529-6063-2-git-send-email-sgruszka@redhat.com> Signed-off-by: Ingo Molnar --- include/linux/sched.h | 14 ++++- kernel/fork.c | 9 +-- kernel/itimer.c | 146 +++++++++++++++++++++------------------------- kernel/posix-cpu-timers.c | 98 +++++++++++++++---------------- 4 files changed, 130 insertions(+), 137 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 3ab08e4bb6b8..3b3efaddd953 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -470,6 +470,11 @@ struct pacct_struct { unsigned long ac_minflt, ac_majflt; }; +struct cpu_itimer { + cputime_t expires; + cputime_t incr; +}; + /** * struct task_cputime - collected CPU time counts * @utime: time spent in user mode, in &cputime_t units @@ -564,9 +569,12 @@ struct signal_struct { struct pid *leader_pid; ktime_t it_real_incr; - /* ITIMER_PROF and ITIMER_VIRTUAL timers for the process */ - cputime_t it_prof_expires, it_virt_expires; - cputime_t it_prof_incr, it_virt_incr; + /* + * ITIMER_PROF and ITIMER_VIRTUAL timers for the process, we use + * CPUCLOCK_PROF and CPUCLOCK_VIRT for indexing array as these + * values are defined to 0 and 1 respectively + */ + struct cpu_itimer it[2]; /* * Thread group totals for process CPU timers. diff --git a/kernel/fork.c b/kernel/fork.c index 29b532e718f7..893ab0bf5e39 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -62,6 +62,7 @@ #include #include #include +#include #include #include @@ -790,10 +791,10 @@ static void posix_cpu_timers_init_group(struct signal_struct *sig) thread_group_cputime_init(sig); /* Expiration times and increments. */ - sig->it_virt_expires = cputime_zero; - sig->it_virt_incr = cputime_zero; - sig->it_prof_expires = cputime_zero; - sig->it_prof_incr = cputime_zero; + sig->it[CPUCLOCK_PROF].expires = cputime_zero; + sig->it[CPUCLOCK_PROF].incr = cputime_zero; + sig->it[CPUCLOCK_VIRT].expires = cputime_zero; + sig->it[CPUCLOCK_VIRT].incr = cputime_zero; /* Cached expiration times. */ sig->cputime_expires.prof_exp = cputime_zero; diff --git a/kernel/itimer.c b/kernel/itimer.c index 58762f7077ec..852c88ddd1f0 100644 --- a/kernel/itimer.c +++ b/kernel/itimer.c @@ -41,10 +41,43 @@ static struct timeval itimer_get_remtime(struct hrtimer *timer) return ktime_to_timeval(rem); } +static void get_cpu_itimer(struct task_struct *tsk, unsigned int clock_id, + struct itimerval *value) +{ + cputime_t cval, cinterval; + struct cpu_itimer *it = &tsk->signal->it[clock_id]; + + spin_lock_irq(&tsk->sighand->siglock); + + cval = it->expires; + cinterval = it->incr; + if (!cputime_eq(cval, cputime_zero)) { + struct task_cputime cputime; + cputime_t t; + + thread_group_cputimer(tsk, &cputime); + if (clock_id == CPUCLOCK_PROF) + t = cputime_add(cputime.utime, cputime.stime); + else + /* CPUCLOCK_VIRT */ + t = cputime.utime; + + if (cputime_le(cval, t)) + /* about to fire */ + cval = jiffies_to_cputime(1); + else + cval = cputime_sub(cval, t); + } + + spin_unlock_irq(&tsk->sighand->siglock); + + cputime_to_timeval(cval, &value->it_value); + cputime_to_timeval(cinterval, &value->it_interval); +} + int do_getitimer(int which, struct itimerval *value) { struct task_struct *tsk = current; - cputime_t cinterval, cval; switch (which) { case ITIMER_REAL: @@ -55,44 +88,10 @@ int do_getitimer(int which, struct itimerval *value) spin_unlock_irq(&tsk->sighand->siglock); break; case ITIMER_VIRTUAL: - spin_lock_irq(&tsk->sighand->siglock); - cval = tsk->signal->it_virt_expires; - cinterval = tsk->signal->it_virt_incr; - if (!cputime_eq(cval, cputime_zero)) { - struct task_cputime cputime; - cputime_t utime; - - thread_group_cputimer(tsk, &cputime); - utime = cputime.utime; - if (cputime_le(cval, utime)) { /* about to fire */ - cval = jiffies_to_cputime(1); - } else { - cval = cputime_sub(cval, utime); - } - } - spin_unlock_irq(&tsk->sighand->siglock); - cputime_to_timeval(cval, &value->it_value); - cputime_to_timeval(cinterval, &value->it_interval); + get_cpu_itimer(tsk, CPUCLOCK_VIRT, value); break; case ITIMER_PROF: - spin_lock_irq(&tsk->sighand->siglock); - cval = tsk->signal->it_prof_expires; - cinterval = tsk->signal->it_prof_incr; - if (!cputime_eq(cval, cputime_zero)) { - struct task_cputime times; - cputime_t ptime; - - thread_group_cputimer(tsk, ×); - ptime = cputime_add(times.utime, times.stime); - if (cputime_le(cval, ptime)) { /* about to fire */ - cval = jiffies_to_cputime(1); - } else { - cval = cputime_sub(cval, ptime); - } - } - spin_unlock_irq(&tsk->sighand->siglock); - cputime_to_timeval(cval, &value->it_value); - cputime_to_timeval(cinterval, &value->it_interval); + get_cpu_itimer(tsk, CPUCLOCK_PROF, value); break; default: return(-EINVAL); @@ -128,6 +127,36 @@ enum hrtimer_restart it_real_fn(struct hrtimer *timer) return HRTIMER_NORESTART; } +static void set_cpu_itimer(struct task_struct *tsk, unsigned int clock_id, + struct itimerval *value, struct itimerval *ovalue) +{ + cputime_t cval, cinterval, nval, ninterval; + struct cpu_itimer *it = &tsk->signal->it[clock_id]; + + nval = timeval_to_cputime(&value->it_value); + ninterval = timeval_to_cputime(&value->it_interval); + + spin_lock_irq(&tsk->sighand->siglock); + + cval = it->expires; + cinterval = it->incr; + if (!cputime_eq(cval, cputime_zero) || + !cputime_eq(nval, cputime_zero)) { + if (cputime_gt(nval, cputime_zero)) + nval = cputime_add(nval, jiffies_to_cputime(1)); + set_process_cpu_timer(tsk, clock_id, &nval, &cval); + } + it->expires = nval; + it->incr = ninterval; + + spin_unlock_irq(&tsk->sighand->siglock); + + if (ovalue) { + cputime_to_timeval(cval, &ovalue->it_value); + cputime_to_timeval(cinterval, &ovalue->it_interval); + } +} + /* * Returns true if the timeval is in canonical form */ @@ -139,7 +168,6 @@ int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue) struct task_struct *tsk = current; struct hrtimer *timer; ktime_t expires; - cputime_t cval, cinterval, nval, ninterval; /* * Validate the timevals in value. @@ -174,48 +202,10 @@ again: spin_unlock_irq(&tsk->sighand->siglock); break; case ITIMER_VIRTUAL: - nval = timeval_to_cputime(&value->it_value); - ninterval = timeval_to_cputime(&value->it_interval); - spin_lock_irq(&tsk->sighand->siglock); - cval = tsk->signal->it_virt_expires; - cinterval = tsk->signal->it_virt_incr; - if (!cputime_eq(cval, cputime_zero) || - !cputime_eq(nval, cputime_zero)) { - if (cputime_gt(nval, cputime_zero)) - nval = cputime_add(nval, - jiffies_to_cputime(1)); - set_process_cpu_timer(tsk, CPUCLOCK_VIRT, - &nval, &cval); - } - tsk->signal->it_virt_expires = nval; - tsk->signal->it_virt_incr = ninterval; - spin_unlock_irq(&tsk->sighand->siglock); - if (ovalue) { - cputime_to_timeval(cval, &ovalue->it_value); - cputime_to_timeval(cinterval, &ovalue->it_interval); - } + set_cpu_itimer(tsk, CPUCLOCK_VIRT, value, ovalue); break; case ITIMER_PROF: - nval = timeval_to_cputime(&value->it_value); - ninterval = timeval_to_cputime(&value->it_interval); - spin_lock_irq(&tsk->sighand->siglock); - cval = tsk->signal->it_prof_expires; - cinterval = tsk->signal->it_prof_incr; - if (!cputime_eq(cval, cputime_zero) || - !cputime_eq(nval, cputime_zero)) { - if (cputime_gt(nval, cputime_zero)) - nval = cputime_add(nval, - jiffies_to_cputime(1)); - set_process_cpu_timer(tsk, CPUCLOCK_PROF, - &nval, &cval); - } - tsk->signal->it_prof_expires = nval; - tsk->signal->it_prof_incr = ninterval; - spin_unlock_irq(&tsk->sighand->siglock); - if (ovalue) { - cputime_to_timeval(cval, &ovalue->it_value); - cputime_to_timeval(cinterval, &ovalue->it_interval); - } + set_cpu_itimer(tsk, CPUCLOCK_PROF, value, ovalue); break; default: return -EINVAL; diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index bece7c0b67b2..9b2d5e4dc8c4 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -14,11 +14,11 @@ */ void update_rlimit_cpu(unsigned long rlim_new) { - cputime_t cputime; + cputime_t cputime = secs_to_cputime(rlim_new); + struct signal_struct *const sig = current->signal; - cputime = secs_to_cputime(rlim_new); - if (cputime_eq(current->signal->it_prof_expires, cputime_zero) || - cputime_gt(current->signal->it_prof_expires, cputime)) { + if (cputime_eq(sig->it[CPUCLOCK_PROF].expires, cputime_zero) || + cputime_gt(sig->it[CPUCLOCK_PROF].expires, cputime)) { spin_lock_irq(¤t->sighand->siglock); set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL); spin_unlock_irq(¤t->sighand->siglock); @@ -613,6 +613,9 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now) break; } } else { + struct signal_struct *const sig = p->signal; + union cpu_time_count *exp = &timer->it.cpu.expires; + /* * For a process timer, set the cached expiration time. */ @@ -620,30 +623,27 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now) default: BUG(); case CPUCLOCK_VIRT: - if (!cputime_eq(p->signal->it_virt_expires, + if (!cputime_eq(sig->it[CPUCLOCK_VIRT].expires, cputime_zero) && - cputime_lt(p->signal->it_virt_expires, - timer->it.cpu.expires.cpu)) + cputime_lt(sig->it[CPUCLOCK_VIRT].expires, + exp->cpu)) break; - p->signal->cputime_expires.virt_exp = - timer->it.cpu.expires.cpu; + sig->cputime_expires.virt_exp = exp->cpu; break; case CPUCLOCK_PROF: - if (!cputime_eq(p->signal->it_prof_expires, + if (!cputime_eq(sig->it[CPUCLOCK_PROF].expires, cputime_zero) && - cputime_lt(p->signal->it_prof_expires, - timer->it.cpu.expires.cpu)) + cputime_lt(sig->it[CPUCLOCK_PROF].expires, + exp->cpu)) break; - i = p->signal->rlim[RLIMIT_CPU].rlim_cur; + i = sig->rlim[RLIMIT_CPU].rlim_cur; if (i != RLIM_INFINITY && - i <= cputime_to_secs(timer->it.cpu.expires.cpu)) + i <= cputime_to_secs(exp->cpu)) break; - p->signal->cputime_expires.prof_exp = - timer->it.cpu.expires.cpu; + sig->cputime_expires.prof_exp = exp->cpu; break; case CPUCLOCK_SCHED: - p->signal->cputime_expires.sched_exp = - timer->it.cpu.expires.sched; + sig->cputime_expires.sched_exp = exp->sched; break; } } @@ -1070,6 +1070,27 @@ static void stop_process_timers(struct task_struct *tsk) spin_unlock_irqrestore(&cputimer->lock, flags); } +static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it, + cputime_t *expires, cputime_t cur_time, int signo) +{ + if (cputime_eq(it->expires, cputime_zero)) + return; + + if (cputime_ge(cur_time, it->expires)) { + it->expires = it->incr; + if (!cputime_eq(it->expires, cputime_zero)) + it->expires = cputime_add(it->expires, cur_time); + + __group_send_sig_info(signo, SEND_SIG_PRIV, tsk); + } + + if (!cputime_eq(it->expires, cputime_zero) && + (cputime_eq(*expires, cputime_zero) || + cputime_lt(it->expires, *expires))) { + *expires = it->expires; + } +} + /* * Check for any per-thread CPU timers that have fired and move them * off the tsk->*_timers list onto the firing list. Per-thread timers @@ -1089,10 +1110,10 @@ static void check_process_timers(struct task_struct *tsk, * Don't sample the current process CPU clocks if there are no timers. */ if (list_empty(&timers[CPUCLOCK_PROF]) && - cputime_eq(sig->it_prof_expires, cputime_zero) && + cputime_eq(sig->it[CPUCLOCK_PROF].expires, cputime_zero) && sig->rlim[RLIMIT_CPU].rlim_cur == RLIM_INFINITY && list_empty(&timers[CPUCLOCK_VIRT]) && - cputime_eq(sig->it_virt_expires, cputime_zero) && + cputime_eq(sig->it[CPUCLOCK_VIRT].expires, cputime_zero) && list_empty(&timers[CPUCLOCK_SCHED])) { stop_process_timers(tsk); return; @@ -1152,38 +1173,11 @@ static void check_process_timers(struct task_struct *tsk, /* * Check for the special case process timers. */ - if (!cputime_eq(sig->it_prof_expires, cputime_zero)) { - if (cputime_ge(ptime, sig->it_prof_expires)) { - /* ITIMER_PROF fires and reloads. */ - sig->it_prof_expires = sig->it_prof_incr; - if (!cputime_eq(sig->it_prof_expires, cputime_zero)) { - sig->it_prof_expires = cputime_add( - sig->it_prof_expires, ptime); - } - __group_send_sig_info(SIGPROF, SEND_SIG_PRIV, tsk); - } - if (!cputime_eq(sig->it_prof_expires, cputime_zero) && - (cputime_eq(prof_expires, cputime_zero) || - cputime_lt(sig->it_prof_expires, prof_expires))) { - prof_expires = sig->it_prof_expires; - } - } - if (!cputime_eq(sig->it_virt_expires, cputime_zero)) { - if (cputime_ge(utime, sig->it_virt_expires)) { - /* ITIMER_VIRTUAL fires and reloads. */ - sig->it_virt_expires = sig->it_virt_incr; - if (!cputime_eq(sig->it_virt_expires, cputime_zero)) { - sig->it_virt_expires = cputime_add( - sig->it_virt_expires, utime); - } - __group_send_sig_info(SIGVTALRM, SEND_SIG_PRIV, tsk); - } - if (!cputime_eq(sig->it_virt_expires, cputime_zero) && - (cputime_eq(virt_expires, cputime_zero) || - cputime_lt(sig->it_virt_expires, virt_expires))) { - virt_expires = sig->it_virt_expires; - } - } + check_cpu_itimer(tsk, &sig->it[CPUCLOCK_PROF], &prof_expires, ptime, + SIGPROF); + check_cpu_itimer(tsk, &sig->it[CPUCLOCK_VIRT], &virt_expires, utime, + SIGVTALRM); + if (sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) { unsigned long psecs = cputime_to_secs(ptime); cputime_t x; -- cgit v1.2.3 From 8356b5f9c424e5831715abbce747197c30d1fd71 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Wed, 29 Jul 2009 12:15:27 +0200 Subject: itimers: Fix periodic tics precision Measure ITIMER_PROF and ITIMER_VIRT timers interval error between real ticks and requested by user. Take it into account when scheduling next tick. This patch introduce possibility where time between two consecutive tics is smaller then requested interval, it preserve however dependency that n tick is generated not earlier than n*interval time - counting from the beginning of periodic signal generation. Signed-off-by: Stanislaw Gruszka Acked-by: Peter Zijlstra Acked-by: Thomas Gleixner Cc: Oleg Nesterov Cc: Andrew Morton Cc: Paul Mackerras Cc: Benjamin Herrenschmidt LKML-Reference: <1248862529-6063-3-git-send-email-sgruszka@redhat.com> Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 ++ kernel/itimer.c | 24 +++++++++++++++++++++--- kernel/posix-cpu-timers.c | 20 +++++++++++++++++--- 3 files changed, 40 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 3b3efaddd953..a069e65e8bb7 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -473,6 +473,8 @@ struct pacct_struct { struct cpu_itimer { cputime_t expires; cputime_t incr; + u32 error; + u32 incr_error; }; /** diff --git a/kernel/itimer.c b/kernel/itimer.c index 852c88ddd1f0..21adff7b2a17 100644 --- a/kernel/itimer.c +++ b/kernel/itimer.c @@ -42,7 +42,7 @@ static struct timeval itimer_get_remtime(struct hrtimer *timer) } static void get_cpu_itimer(struct task_struct *tsk, unsigned int clock_id, - struct itimerval *value) + struct itimerval *const value) { cputime_t cval, cinterval; struct cpu_itimer *it = &tsk->signal->it[clock_id]; @@ -127,14 +127,32 @@ enum hrtimer_restart it_real_fn(struct hrtimer *timer) return HRTIMER_NORESTART; } +static inline u32 cputime_sub_ns(cputime_t ct, s64 real_ns) +{ + struct timespec ts; + s64 cpu_ns; + + cputime_to_timespec(ct, &ts); + cpu_ns = timespec_to_ns(&ts); + + return (cpu_ns <= real_ns) ? 0 : cpu_ns - real_ns; +} + static void set_cpu_itimer(struct task_struct *tsk, unsigned int clock_id, - struct itimerval *value, struct itimerval *ovalue) + const struct itimerval *const value, + struct itimerval *const ovalue) { - cputime_t cval, cinterval, nval, ninterval; + cputime_t cval, nval, cinterval, ninterval; + s64 ns_ninterval, ns_nval; struct cpu_itimer *it = &tsk->signal->it[clock_id]; nval = timeval_to_cputime(&value->it_value); + ns_nval = timeval_to_ns(&value->it_value); ninterval = timeval_to_cputime(&value->it_interval); + ns_ninterval = timeval_to_ns(&value->it_interval); + + it->incr_error = cputime_sub_ns(ninterval, ns_ninterval); + it->error = cputime_sub_ns(nval, ns_nval); spin_lock_irq(&tsk->sighand->siglock); diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 9b2d5e4dc8c4..b60d644ea4b3 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -1070,6 +1070,8 @@ static void stop_process_timers(struct task_struct *tsk) spin_unlock_irqrestore(&cputimer->lock, flags); } +static u32 onecputick; + static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it, cputime_t *expires, cputime_t cur_time, int signo) { @@ -1077,9 +1079,16 @@ static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it, return; if (cputime_ge(cur_time, it->expires)) { - it->expires = it->incr; - if (!cputime_eq(it->expires, cputime_zero)) - it->expires = cputime_add(it->expires, cur_time); + if (!cputime_eq(it->incr, cputime_zero)) { + it->expires = cputime_add(it->expires, it->incr); + it->error += it->incr_error; + if (it->error >= onecputick) { + it->expires = cputime_sub(it->expires, + jiffies_to_cputime(1)); + it->error -= onecputick; + } + } else + it->expires = cputime_zero; __group_send_sig_info(signo, SEND_SIG_PRIV, tsk); } @@ -1696,10 +1705,15 @@ static __init int init_posix_cpu_timers(void) .nsleep = thread_cpu_nsleep, .nsleep_restart = thread_cpu_nsleep_restart, }; + struct timespec ts; register_posix_clock(CLOCK_PROCESS_CPUTIME_ID, &process); register_posix_clock(CLOCK_THREAD_CPUTIME_ID, &thread); + cputime_to_timespec(jiffies_to_cputime(1), &ts); + onecputick = ts.tv_nsec; + WARN_ON(ts.tv_sec != 0); + return 0; } __initcall(init_posix_cpu_timers); -- cgit v1.2.3 From 3e352aa8ee2bd48f1a19c7742810b3a4a7ba605e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 3 Aug 2009 14:10:11 +0900 Subject: x86, percpu: Fix DECLARE/DEFINE_PER_CPU_PAGE_ALIGNED() DECLARE/DEFINE_PER_CPU_PAGE_ALIGNED() put percpu variables in .page_aligned section without adding any alignment restrictions. Currently, this doesn't cause any problem because all users of the macros have explicit page alignment and page-sized but it's much safer to enforce page alignment from the macros. After all, it's what they claim to do. Add __aligned(PAGE_SIZE) to DECLARE/DEFINE_PER_CPU_PAGE_ALIGNED() and drop explicit alignment from it users. Signed-off-by: Tejun Heo Cc: Ingo Molnar Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/common.c | 3 +-- include/linux/percpu-defs.h | 8 +++++--- 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index f1961c07af9a..12493c5485e5 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1008,8 +1008,7 @@ static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = { }; static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks - [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]) - __aligned(PAGE_SIZE); + [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]); /* May not be marked __init: used by software suspend */ void syscall_init(void) diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index 68438e18fff4..afd5f8b7061f 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -69,11 +69,13 @@ /* * Declaration/definition used for per-CPU variables that must be page aligned. */ -#define DECLARE_PER_CPU_PAGE_ALIGNED(type, name) \ - DECLARE_PER_CPU_SECTION(type, name, ".page_aligned") +#define DECLARE_PER_CPU_PAGE_ALIGNED(type, name) \ + DECLARE_PER_CPU_SECTION(type, name, ".page_aligned") \ + __aligned(PAGE_SIZE) #define DEFINE_PER_CPU_PAGE_ALIGNED(type, name) \ - DEFINE_PER_CPU_SECTION(type, name, ".page_aligned") + DEFINE_PER_CPU_SECTION(type, name, ".page_aligned") \ + __aligned(PAGE_SIZE) /* * Intermodule exports for per-CPU variables. -- cgit v1.2.3 From 1487cd5e76337555737cbc55d7d83f41460d198f Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Thu, 30 Jul 2009 19:41:20 +0300 Subject: usbnet: allow "minidriver" to prevent urb unlinking on usbnet_stop rndis_wlan devices freeze after running usbnet_stop several times. It appears that firmware freezes in state where it does not respond to any RNDIS commands and device have to be physically unplugged/replugged. This patch lets minidrivers to disable unlink_urbs on usbnet_stop through new info flag. Signed-off-by: Jussi Kivilinna Cc: David Brownell Signed-off-by: John W. Linville --- drivers/net/usb/usbnet.c | 32 ++++++++++++++++++-------------- drivers/net/wireless/rndis_wlan.c | 9 ++++++--- include/linux/usb/usbnet.h | 1 + 3 files changed, 25 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 25e435c49040..af1fe4696509 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -601,21 +601,25 @@ int usbnet_stop (struct net_device *net) info->description); } - // ensure there are no more active urbs - add_wait_queue (&unlink_wakeup, &wait); - dev->wait = &unlink_wakeup; - temp = unlink_urbs (dev, &dev->txq) + unlink_urbs (dev, &dev->rxq); - - // maybe wait for deletions to finish. - while (!skb_queue_empty(&dev->rxq) - && !skb_queue_empty(&dev->txq) - && !skb_queue_empty(&dev->done)) { - msleep(UNLINK_TIMEOUT_MS); - if (netif_msg_ifdown (dev)) - devdbg (dev, "waited for %d urb completions", temp); + if (!(info->flags & FLAG_AVOID_UNLINK_URBS)) { + /* ensure there are no more active urbs */ + add_wait_queue(&unlink_wakeup, &wait); + dev->wait = &unlink_wakeup; + temp = unlink_urbs(dev, &dev->txq) + + unlink_urbs(dev, &dev->rxq); + + /* maybe wait for deletions to finish. */ + while (!skb_queue_empty(&dev->rxq) + && !skb_queue_empty(&dev->txq) + && !skb_queue_empty(&dev->done)) { + msleep(UNLINK_TIMEOUT_MS); + if (netif_msg_ifdown(dev)) + devdbg(dev, "waited for %d urb completions", + temp); + } + dev->wait = NULL; + remove_wait_queue(&unlink_wakeup, &wait); } - dev->wait = NULL; - remove_wait_queue (&unlink_wakeup, &wait); usb_kill_urb(dev->interrupt); diff --git a/drivers/net/wireless/rndis_wlan.c b/drivers/net/wireless/rndis_wlan.c index 09c0702ae645..76c5ec6bbbc5 100644 --- a/drivers/net/wireless/rndis_wlan.c +++ b/drivers/net/wireless/rndis_wlan.c @@ -2513,7 +2513,8 @@ static int rndis_wlan_stop(struct usbnet *usbdev) static const struct driver_info bcm4320b_info = { .description = "Wireless RNDIS device, BCM4320b based", - .flags = FLAG_WLAN | FLAG_FRAMING_RN | FLAG_NO_SETINT, + .flags = FLAG_WLAN | FLAG_FRAMING_RN | FLAG_NO_SETINT | + FLAG_AVOID_UNLINK_URBS, .bind = rndis_wlan_bind, .unbind = rndis_wlan_unbind, .status = rndis_status, @@ -2527,7 +2528,8 @@ static const struct driver_info bcm4320b_info = { static const struct driver_info bcm4320a_info = { .description = "Wireless RNDIS device, BCM4320a based", - .flags = FLAG_WLAN | FLAG_FRAMING_RN | FLAG_NO_SETINT, + .flags = FLAG_WLAN | FLAG_FRAMING_RN | FLAG_NO_SETINT | + FLAG_AVOID_UNLINK_URBS, .bind = rndis_wlan_bind, .unbind = rndis_wlan_unbind, .status = rndis_status, @@ -2541,7 +2543,8 @@ static const struct driver_info bcm4320a_info = { static const struct driver_info rndis_wlan_info = { .description = "Wireless RNDIS device", - .flags = FLAG_WLAN | FLAG_FRAMING_RN | FLAG_NO_SETINT, + .flags = FLAG_WLAN | FLAG_FRAMING_RN | FLAG_NO_SETINT | + FLAG_AVOID_UNLINK_URBS, .bind = rndis_wlan_bind, .unbind = rndis_wlan_unbind, .status = rndis_status, diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index 7c17b2efba86..c642f78dd9cf 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -86,6 +86,7 @@ struct driver_info { #define FLAG_FRAMING_AX 0x0040 /* AX88772/178 packets */ #define FLAG_WLAN 0x0080 /* use "wlan%d" names */ +#define FLAG_AVOID_UNLINK_URBS 0x0100 /* don't unlink urbs at usbnet_stop() */ /* init device ... can sleep, or cause probe() failure */ -- cgit v1.2.3 From 2a4901bcbe9c122bd56e1f6c337fcb4ad75fafb7 Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Thu, 30 Jul 2009 19:41:52 +0300 Subject: rndis_host: allow rndis_wlan to see all indications Allow rndis_wlan to see all indications. Currently rndis_host lets rndis_wlan to know about link state changes only, but there is whole set of other 802.11-specific indications that rndis_wlan should handle properly. So rename link_change() to indication() and convert rndis_wlan to use it. Signed-off-by: Jussi Kivilinna Cc: David Brownell Signed-off-by: John W. Linville --- drivers/net/usb/rndis_host.c | 50 +++++++++++++++++++++++---------------- drivers/net/wireless/rndis_wlan.c | 31 +++++++++++++++++++----- include/linux/usb/usbnet.h | 5 ++-- 3 files changed, 56 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/usb/rndis_host.c b/drivers/net/usb/rndis_host.c index 2232232b7989..d032bba9bc4c 100644 --- a/drivers/net/usb/rndis_host.c +++ b/drivers/net/usb/rndis_host.c @@ -64,6 +64,32 @@ void rndis_status(struct usbnet *dev, struct urb *urb) } EXPORT_SYMBOL_GPL(rndis_status); +/* + * RNDIS indicate messages. + */ +static void rndis_msg_indicate(struct usbnet *dev, struct rndis_indicate *msg, + int buflen) +{ + struct cdc_state *info = (void *)&dev->data; + struct device *udev = &info->control->dev; + + if (dev->driver_info->indication) { + dev->driver_info->indication(dev, msg, buflen); + } else { + switch (msg->status) { + case RNDIS_STATUS_MEDIA_CONNECT: + dev_info(udev, "rndis media connect\n"); + break; + case RNDIS_STATUS_MEDIA_DISCONNECT: + dev_info(udev, "rndis media disconnect\n"); + break; + default: + dev_info(udev, "rndis indication: 0x%08x\n", + le32_to_cpu(msg->status)); + } + } +} + /* * RPC done RNDIS-style. Caller guarantees: * - message is properly byteswapped @@ -143,27 +169,9 @@ int rndis_command(struct usbnet *dev, struct rndis_msg_hdr *buf, int buflen) request_id, xid); /* then likely retry */ } else switch (buf->msg_type) { - case RNDIS_MSG_INDICATE: { /* fault/event */ - struct rndis_indicate *msg = (void *)buf; - int state = 0; - - switch (msg->status) { - case RNDIS_STATUS_MEDIA_CONNECT: - state = 1; - case RNDIS_STATUS_MEDIA_DISCONNECT: - dev_info(&info->control->dev, - "rndis media %sconnect\n", - !state?"dis":""); - if (dev->driver_info->link_change) - dev->driver_info->link_change( - dev, state); - break; - default: - dev_info(&info->control->dev, - "rndis indication: 0x%08x\n", - le32_to_cpu(msg->status)); - } - } + case RNDIS_MSG_INDICATE: /* fault/event */ + rndis_msg_indicate(dev, (void *)buf, buflen); + break; case RNDIS_MSG_KEEPALIVE: { /* ping */ struct rndis_keepalive_c *msg = (void *)buf; diff --git a/drivers/net/wireless/rndis_wlan.c b/drivers/net/wireless/rndis_wlan.c index f6dcbb168b78..6b6452b0e8c4 100644 --- a/drivers/net/wireless/rndis_wlan.c +++ b/drivers/net/wireless/rndis_wlan.c @@ -2211,13 +2211,32 @@ static void rndis_wlan_set_multicast_list(struct net_device *dev) queue_work(priv->workqueue, &priv->work); } -static void rndis_wlan_link_change(struct usbnet *usbdev, int state) +static void rndis_wlan_indication(struct usbnet *usbdev, void *ind, int buflen) { struct rndis_wlan_private *priv = get_rndis_wlan_priv(usbdev); + struct rndis_indicate *msg = ind; /* queue work to avoid recursive calls into rndis_command */ - set_bit(state ? WORK_LINK_UP : WORK_LINK_DOWN, &priv->work_pending); - queue_work(priv->workqueue, &priv->work); + switch (msg->status) { + case RNDIS_STATUS_MEDIA_CONNECT: + devinfo(usbdev, "media connect"); + + set_bit(WORK_LINK_UP, &priv->work_pending); + queue_work(priv->workqueue, &priv->work); + break; + + case RNDIS_STATUS_MEDIA_DISCONNECT: + devinfo(usbdev, "media disconnect"); + + set_bit(WORK_LINK_DOWN, &priv->work_pending); + queue_work(priv->workqueue, &priv->work); + break; + + default: + devinfo(usbdev, "indication: 0x%08x", + le32_to_cpu(msg->status)); + break; + } } @@ -2666,7 +2685,7 @@ static const struct driver_info bcm4320b_info = { .reset = rndis_wlan_reset, .stop = rndis_wlan_stop, .early_init = bcm4320b_early_init, - .link_change = rndis_wlan_link_change, + .indication = rndis_wlan_indication, }; static const struct driver_info bcm4320a_info = { @@ -2681,7 +2700,7 @@ static const struct driver_info bcm4320a_info = { .reset = rndis_wlan_reset, .stop = rndis_wlan_stop, .early_init = bcm4320a_early_init, - .link_change = rndis_wlan_link_change, + .indication = rndis_wlan_indication, }; static const struct driver_info rndis_wlan_info = { @@ -2696,7 +2715,7 @@ static const struct driver_info rndis_wlan_info = { .reset = rndis_wlan_reset, .stop = rndis_wlan_stop, .early_init = bcm4320a_early_init, - .link_change = rndis_wlan_link_change, + .indication = rndis_wlan_indication, }; /*-------------------------------------------------------------------------*/ diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index c642f78dd9cf..de8b4b18961b 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -122,9 +122,8 @@ struct driver_info { * right after minidriver have initialized hardware. */ int (*early_init)(struct usbnet *dev); - /* called by minidriver when link state changes, state: 0=disconnect, - * 1=connect */ - void (*link_change)(struct usbnet *dev, int state); + /* called by minidriver when receiving indication */ + void (*indication)(struct usbnet *dev, void *ind, int indlen); /* for new devices, use the descriptor-reading code instead */ int in; /* rx endpoint */ -- cgit v1.2.3 From 030645aceb3d9f10b1c3d2231c50f5a8bb3a9667 Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Thu, 30 Jul 2009 19:41:58 +0300 Subject: rndis_wlan: handle 802.11 indications from device Add handling for 802.11 specific rndis indications. Signed-off-by: Jussi Kivilinna Signed-off-by: John W. Linville --- drivers/net/wireless/rndis_wlan.c | 233 +++++++++++++++++++++++++++++++++++++- include/linux/usb/rndis_host.h | 13 ++- 2 files changed, 239 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/wireless/rndis_wlan.c b/drivers/net/wireless/rndis_wlan.c index 6b6452b0e8c4..7a50cfa18843 100644 --- a/drivers/net/wireless/rndis_wlan.c +++ b/drivers/net/wireless/rndis_wlan.c @@ -201,6 +201,24 @@ enum ndis_80211_priv_filter { NDIS_80211_PRIV_8021X_WEP }; +enum ndis_80211_status_type { + NDIS_80211_STATUSTYPE_AUTHENTICATION, + NDIS_80211_STATUSTYPE_MEDIASTREAMMODE, + NDIS_80211_STATUSTYPE_PMKID_CANDIDATELIST, + NDIS_80211_STATUSTYPE_RADIOSTATE, +}; + +enum ndis_80211_media_stream_mode { + NDIS_80211_MEDIA_STREAM_OFF, + NDIS_80211_MEDIA_STREAM_ON +}; + +enum ndis_80211_radio_status { + NDIS_80211_RADIO_STATUS_ON, + NDIS_80211_RADIO_STATUS_HARDWARE_OFF, + NDIS_80211_RADIO_STATUS_SOFTWARE_OFF, +}; + enum ndis_80211_addkey_bits { NDIS_80211_ADDKEY_8021X_AUTH = cpu_to_le32(1 << 28), NDIS_80211_ADDKEY_SET_INIT_RECV_SEQ = cpu_to_le32(1 << 29), @@ -213,6 +231,35 @@ enum ndis_80211_addwep_bits { NDIS_80211_ADDWEP_TRANSMIT_KEY = cpu_to_le32(1 << 31) }; +struct ndis_80211_auth_request { + __le32 length; + u8 bssid[6]; + u8 padding[2]; + __le32 flags; +} __attribute__((packed)); + +struct ndis_80211_pmkid_candidate { + u8 bssid[6]; + u8 padding[2]; + __le32 flags; +} __attribute__((packed)); + +struct ndis_80211_pmkid_cand_list { + __le32 version; + __le32 num_candidates; + struct ndis_80211_pmkid_candidate candidate_list[0]; +} __attribute__((packed)); + +struct ndis_80211_status_indication { + __le32 status_type; + union { + enum ndis_80211_media_stream_mode media_stream_mode; + enum ndis_80211_radio_status radio_status; + struct ndis_80211_auth_request auth_request[0]; + struct ndis_80211_pmkid_cand_list cand_list; + } u; +} __attribute__((packed)); + struct ndis_80211_ssid { __le32 length; u8 essid[NDIS_802_11_LENGTH_SSID]; @@ -2211,16 +2258,195 @@ static void rndis_wlan_set_multicast_list(struct net_device *dev) queue_work(priv->workqueue, &priv->work); } + +static void rndis_wlan_auth_indication(struct usbnet *usbdev, + struct ndis_80211_status_indication *indication, + int len) +{ + u8 *buf; + const char *type; + int flags, buflen; + bool pairwise_error, group_error; + struct ndis_80211_auth_request *auth_req; + + /* must have at least one array entry */ + if (len < offsetof(struct ndis_80211_status_indication, u) + + sizeof(struct ndis_80211_auth_request)) { + devinfo(usbdev, "authentication indication: " + "too short message (%i)", len); + return; + } + + buf = (void *)&indication->u.auth_request[0]; + buflen = len - offsetof(struct ndis_80211_status_indication, u); + + while (buflen >= sizeof(*auth_req)) { + auth_req = (void *)buf; + type = "unknown"; + flags = le32_to_cpu(auth_req->flags); + pairwise_error = false; + group_error = false; + + if (flags & 0x1) + type = "reauth request"; + if (flags & 0x2) + type = "key update request"; + if (flags & 0x6) { + pairwise_error = true; + type = "pairwise_error"; + } + if (flags & 0xe) { + group_error = true; + type = "group_error"; + } + + devinfo(usbdev, "authentication indication: %s (0x%08x)", type, + le32_to_cpu(auth_req->flags)); + + if (pairwise_error || group_error) { + union iwreq_data wrqu; + struct iw_michaelmicfailure micfailure; + + memset(&micfailure, 0, sizeof(micfailure)); + if (pairwise_error) + micfailure.flags |= IW_MICFAILURE_PAIRWISE; + if (group_error) + micfailure.flags |= IW_MICFAILURE_GROUP; + + memcpy(micfailure.src_addr.sa_data, auth_req->bssid, + ETH_ALEN); + + memset(&wrqu, 0, sizeof(wrqu)); + wrqu.data.length = sizeof(micfailure); + wireless_send_event(usbdev->net, IWEVMICHAELMICFAILURE, + &wrqu, (u8 *)&micfailure); + } + + buflen -= le32_to_cpu(auth_req->length); + buf += le32_to_cpu(auth_req->length); + } +} + +static void rndis_wlan_pmkid_cand_list_indication(struct usbnet *usbdev, + struct ndis_80211_status_indication *indication, + int len) +{ + struct ndis_80211_pmkid_cand_list *cand_list; + int list_len, expected_len, i; + + if (len < offsetof(struct ndis_80211_status_indication, u) + + sizeof(struct ndis_80211_pmkid_cand_list)) { + devinfo(usbdev, "pmkid candidate list indication: " + "too short message (%i)", len); + return; + } + + list_len = le32_to_cpu(indication->u.cand_list.num_candidates) * + sizeof(struct ndis_80211_pmkid_candidate); + expected_len = sizeof(struct ndis_80211_pmkid_cand_list) + list_len + + offsetof(struct ndis_80211_status_indication, u); + + if (len < expected_len) { + devinfo(usbdev, "pmkid candidate list indication: " + "list larger than buffer (%i < %i)", + len, expected_len); + return; + } + + cand_list = &indication->u.cand_list; + + devinfo(usbdev, "pmkid candidate list indication: " + "version %i, candidates %i", + le32_to_cpu(cand_list->version), + le32_to_cpu(cand_list->num_candidates)); + + if (le32_to_cpu(cand_list->version) != 1) + return; + + for (i = 0; i < le32_to_cpu(cand_list->num_candidates); i++) { + struct iw_pmkid_cand pcand; + union iwreq_data wrqu; + struct ndis_80211_pmkid_candidate *cand = + &cand_list->candidate_list[i]; + + devdbg(usbdev, "cand[%i]: flags: 0x%08x, bssid: %pM", + i, le32_to_cpu(cand->flags), cand->bssid); + + memset(&pcand, 0, sizeof(pcand)); + if (le32_to_cpu(cand->flags) & 0x01) + pcand.flags |= IW_PMKID_CAND_PREAUTH; + pcand.index = i; + memcpy(pcand.bssid.sa_data, cand->bssid, ETH_ALEN); + + memset(&wrqu, 0, sizeof(wrqu)); + wrqu.data.length = sizeof(pcand); + wireless_send_event(usbdev->net, IWEVPMKIDCAND, &wrqu, + (u8 *)&pcand); + } +} + +static void rndis_wlan_media_specific_indication(struct usbnet *usbdev, + struct rndis_indicate *msg, int buflen) +{ + struct ndis_80211_status_indication *indication; + int len, offset; + + offset = offsetof(struct rndis_indicate, status) + + le32_to_cpu(msg->offset); + len = le32_to_cpu(msg->length); + + if (len < 8) { + devinfo(usbdev, "media specific indication, " + "ignore too short message (%i < 8)", len); + return; + } + + if (offset + len > buflen) { + devinfo(usbdev, "media specific indication, " + "too large to fit to buffer (%i > %i)", + offset + len, buflen); + return; + } + + indication = (void *)((u8 *)msg + offset); + + switch (le32_to_cpu(indication->status_type)) { + case NDIS_80211_STATUSTYPE_RADIOSTATE: + devinfo(usbdev, "radio state indication: %i", + le32_to_cpu(indication->u.radio_status)); + return; + + case NDIS_80211_STATUSTYPE_MEDIASTREAMMODE: + devinfo(usbdev, "media stream mode indication: %i", + le32_to_cpu(indication->u.media_stream_mode)); + return; + + case NDIS_80211_STATUSTYPE_AUTHENTICATION: + rndis_wlan_auth_indication(usbdev, indication, len); + return; + + case NDIS_80211_STATUSTYPE_PMKID_CANDIDATELIST: + rndis_wlan_pmkid_cand_list_indication(usbdev, indication, len); + return; + + default: + devinfo(usbdev, "media specific indication: " + "unknown status type 0x%08x", + le32_to_cpu(indication->status_type)); + } +} + + static void rndis_wlan_indication(struct usbnet *usbdev, void *ind, int buflen) { struct rndis_wlan_private *priv = get_rndis_wlan_priv(usbdev); struct rndis_indicate *msg = ind; - /* queue work to avoid recursive calls into rndis_command */ switch (msg->status) { case RNDIS_STATUS_MEDIA_CONNECT: devinfo(usbdev, "media connect"); + /* queue work to avoid recursive calls into rndis_command */ set_bit(WORK_LINK_UP, &priv->work_pending); queue_work(priv->workqueue, &priv->work); break; @@ -2228,10 +2454,15 @@ static void rndis_wlan_indication(struct usbnet *usbdev, void *ind, int buflen) case RNDIS_STATUS_MEDIA_DISCONNECT: devinfo(usbdev, "media disconnect"); + /* queue work to avoid recursive calls into rndis_command */ set_bit(WORK_LINK_DOWN, &priv->work_pending); queue_work(priv->workqueue, &priv->work); break; + case RNDIS_STATUS_MEDIA_SPECIFIC_INDICATION: + rndis_wlan_media_specific_indication(usbdev, msg, buflen); + break; + default: devinfo(usbdev, "indication: 0x%08x", le32_to_cpu(msg->status)); diff --git a/include/linux/usb/rndis_host.h b/include/linux/usb/rndis_host.h index 37836b937d97..1ef1ebc2b04f 100644 --- a/include/linux/usb/rndis_host.h +++ b/include/linux/usb/rndis_host.h @@ -70,12 +70,13 @@ struct rndis_msg_hdr { #define RNDIS_MSG_KEEPALIVE_C (RNDIS_MSG_KEEPALIVE|RNDIS_MSG_COMPLETION) /* codes for "status" field of completion messages */ -#define RNDIS_STATUS_SUCCESS cpu_to_le32(0x00000000) -#define RNDIS_STATUS_FAILURE cpu_to_le32(0xc0000001) -#define RNDIS_STATUS_INVALID_DATA cpu_to_le32(0xc0010015) -#define RNDIS_STATUS_NOT_SUPPORTED cpu_to_le32(0xc00000bb) -#define RNDIS_STATUS_MEDIA_CONNECT cpu_to_le32(0x4001000b) -#define RNDIS_STATUS_MEDIA_DISCONNECT cpu_to_le32(0x4001000c) +#define RNDIS_STATUS_SUCCESS cpu_to_le32(0x00000000) +#define RNDIS_STATUS_FAILURE cpu_to_le32(0xc0000001) +#define RNDIS_STATUS_INVALID_DATA cpu_to_le32(0xc0010015) +#define RNDIS_STATUS_NOT_SUPPORTED cpu_to_le32(0xc00000bb) +#define RNDIS_STATUS_MEDIA_CONNECT cpu_to_le32(0x4001000b) +#define RNDIS_STATUS_MEDIA_DISCONNECT cpu_to_le32(0x4001000c) +#define RNDIS_STATUS_MEDIA_SPECIFIC_INDICATION cpu_to_le32(0x40010012) /* codes for OID_GEN_PHYSICAL_MEDIUM */ #define RNDIS_PHYSICAL_MEDIUM_UNSPECIFIED cpu_to_le32(0x00000000) -- cgit v1.2.3 From 3ad201496badddd8e1cda87ee6d29e8b3b8e1279 Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Sun, 2 Aug 2009 02:36:49 +0300 Subject: rfkill: add the GPS radio type Althoug GPS is a technology w/o transmitting radio and thus not a primary candidate for rfkill switch, rfkill gives unified interface point for devices with wireless technology. The input key is not supplied as it is too be deprecated. Cc: johannes@sipsolutions.net Signed-off-by: Tomas Winkler Acked-by: Marcel Holtmann Signed-off-by: John W. Linville --- include/linux/rfkill.h | 1 + net/rfkill/core.c | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h index 10202903141a..21ca51bf4dd2 100644 --- a/include/linux/rfkill.h +++ b/include/linux/rfkill.h @@ -47,6 +47,7 @@ enum rfkill_type { RFKILL_TYPE_UWB, RFKILL_TYPE_WIMAX, RFKILL_TYPE_WWAN, + RFKILL_TYPE_GPS, NUM_RFKILL_TYPES, }; diff --git a/net/rfkill/core.c b/net/rfkill/core.c index 044de1c6af3d..dbeaf2983822 100644 --- a/net/rfkill/core.c +++ b/net/rfkill/core.c @@ -589,11 +589,13 @@ static const char *rfkill_get_type_str(enum rfkill_type type) return "wimax"; case RFKILL_TYPE_WWAN: return "wwan"; + case RFKILL_TYPE_GPS: + return "gps"; default: BUG(); } - BUILD_BUG_ON(NUM_RFKILL_TYPES != RFKILL_TYPE_WWAN + 1); + BUILD_BUG_ON(NUM_RFKILL_TYPES != RFKILL_TYPE_GPS + 1); } static ssize_t rfkill_type_show(struct device *dev, -- cgit v1.2.3 From 7c73875e7dda627040b12c19b01db634fa7f0fd1 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Fri, 31 Jul 2009 12:53:58 -0400 Subject: Capabilities: move cap_file_mmap to commoncap.c Currently we duplicate the mmap_min_addr test in cap_file_mmap and in security_file_mmap if !CONFIG_SECURITY. This patch moves cap_file_mmap into commoncap.c and then calls that function directly from security_file_mmap ifndef CONFIG_SECURITY like all of the other capability checks are done. Signed-off-by: Eric Paris Acked-by: Serge Hallyn Signed-off-by: James Morris --- include/linux/security.h | 7 ++++--- security/capability.c | 9 --------- security/commoncap.c | 30 ++++++++++++++++++++++++++++++ 3 files changed, 34 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index 145909165dbf..963a48fc3005 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -66,6 +66,9 @@ extern int cap_inode_setxattr(struct dentry *dentry, const char *name, extern int cap_inode_removexattr(struct dentry *dentry, const char *name); extern int cap_inode_need_killpriv(struct dentry *dentry); extern int cap_inode_killpriv(struct dentry *dentry); +extern int cap_file_mmap(struct file *file, unsigned long reqprot, + unsigned long prot, unsigned long flags, + unsigned long addr, unsigned long addr_only); extern int cap_task_fix_setuid(struct cred *new, const struct cred *old, int flags); extern int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5); @@ -2197,9 +2200,7 @@ static inline int security_file_mmap(struct file *file, unsigned long reqprot, unsigned long addr, unsigned long addr_only) { - if ((addr < mmap_min_addr) && !capable(CAP_SYS_RAWIO)) - return -EACCES; - return 0; + return cap_file_mmap(file, reqprot, prot, flags, addr, addr_only); } static inline int security_file_mprotect(struct vm_area_struct *vma, diff --git a/security/capability.c b/security/capability.c index f218dd361647..ec0573054024 100644 --- a/security/capability.c +++ b/security/capability.c @@ -330,15 +330,6 @@ static int cap_file_ioctl(struct file *file, unsigned int command, return 0; } -static int cap_file_mmap(struct file *file, unsigned long reqprot, - unsigned long prot, unsigned long flags, - unsigned long addr, unsigned long addr_only) -{ - if ((addr < mmap_min_addr) && !capable(CAP_SYS_RAWIO)) - return -EACCES; - return 0; -} - static int cap_file_mprotect(struct vm_area_struct *vma, unsigned long reqprot, unsigned long prot) { diff --git a/security/commoncap.c b/security/commoncap.c index aa97704564d4..3852e9432801 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -984,3 +984,33 @@ int cap_vm_enough_memory(struct mm_struct *mm, long pages) cap_sys_admin = 1; return __vm_enough_memory(mm, pages, cap_sys_admin); } + +/* + * cap_file_mmap - check if able to map given addr + * @file: unused + * @reqprot: unused + * @prot: unused + * @flags: unused + * @addr: address attempting to be mapped + * @addr_only: unused + * + * If the process is attempting to map memory below mmap_min_addr they need + * CAP_SYS_RAWIO. The other parameters to this function are unused by the + * capability security module. Returns 0 if this mapping should be allowed + * -EPERM if not. + */ +int cap_file_mmap(struct file *file, unsigned long reqprot, + unsigned long prot, unsigned long flags, + unsigned long addr, unsigned long addr_only) +{ + int ret = 0; + + if (addr < mmap_min_addr) { + ret = cap_capable(current, current_cred(), CAP_SYS_RAWIO, + SECURITY_CAP_AUDIT); + /* set PF_SUPERPRIV if it turns out we allow the low mmap */ + if (ret == 0) + current->flags |= PF_SUPERPRIV; + } + return ret; +} -- cgit v1.2.3 From a2551df7ec568d87793d2eea4ca744e86318f205 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Fri, 31 Jul 2009 12:54:11 -0400 Subject: Security/SELinux: seperate lsm specific mmap_min_addr Currently SELinux enforcement of controls on the ability to map low memory is determined by the mmap_min_addr tunable. This patch causes SELinux to ignore the tunable and instead use a seperate Kconfig option specific to how much space the LSM should protect. The tunable will now only control the need for CAP_SYS_RAWIO and SELinux permissions will always protect the amount of low memory designated by CONFIG_LSM_MMAP_MIN_ADDR. This allows users who need to disable the mmap_min_addr controls (usual reason being they run WINE as a non-root user) to do so and still have SELinux controls preventing confined domains (like a web server) from being able to map some area of low memory. Signed-off-by: Eric Paris Signed-off-by: James Morris --- include/linux/mm.h | 15 --------------- include/linux/security.h | 17 +++++++++++++++++ kernel/sysctl.c | 7 ++++--- mm/Kconfig | 6 +++--- mm/mmap.c | 3 --- mm/nommu.c | 3 --- security/Kconfig | 16 ++++++++++++++++ security/Makefile | 2 +- security/commoncap.c | 2 +- security/min_addr.c | 49 ++++++++++++++++++++++++++++++++++++++++++++++++ security/selinux/hooks.c | 2 +- 11 files changed, 92 insertions(+), 30 deletions(-) create mode 100644 security/min_addr.c (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index ba3a7cb1eaa0..9a72cc78e6b8 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -34,8 +34,6 @@ extern int sysctl_legacy_va_layout; #define sysctl_legacy_va_layout 0 #endif -extern unsigned long mmap_min_addr; - #include #include #include @@ -574,19 +572,6 @@ static inline void set_page_links(struct page *page, enum zone_type zone, set_page_section(page, pfn_to_section_nr(pfn)); } -/* - * If a hint addr is less than mmap_min_addr change hint to be as - * low as possible but still greater than mmap_min_addr - */ -static inline unsigned long round_hint_to_min(unsigned long hint) -{ - hint &= PAGE_MASK; - if (((void *)hint != NULL) && - (hint < mmap_min_addr)) - return PAGE_ALIGN(mmap_min_addr); - return hint; -} - /* * Some inline functions in vmstat.h depend on page_zone() */ diff --git a/include/linux/security.h b/include/linux/security.h index 963a48fc3005..7b431155e392 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -28,6 +28,7 @@ #include #include #include +#include /* PAGE_ALIGN */ #include #include #include @@ -95,6 +96,7 @@ extern int cap_netlink_send(struct sock *sk, struct sk_buff *skb); extern int cap_netlink_recv(struct sk_buff *skb, int cap); extern unsigned long mmap_min_addr; +extern unsigned long dac_mmap_min_addr; /* * Values used in the task_security_ops calls */ @@ -147,6 +149,21 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) opts->num_mnt_opts = 0; } +/* + * If a hint addr is less than mmap_min_addr change hint to be as + * low as possible but still greater than mmap_min_addr + */ +static inline unsigned long round_hint_to_min(unsigned long hint) +{ + hint &= PAGE_MASK; + if (((void *)hint != NULL) && + (hint < mmap_min_addr)) + return PAGE_ALIGN(mmap_min_addr); + return hint; +} + +extern int mmap_min_addr_handler(struct ctl_table *table, int write, struct file *filp, + void __user *buffer, size_t *lenp, loff_t *ppos); /** * struct security_operations - main security structure * diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 98e02328c67d..58be76017fd0 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -49,6 +49,7 @@ #include #include #include +#include #include #include @@ -1306,10 +1307,10 @@ static struct ctl_table vm_table[] = { { .ctl_name = CTL_UNNUMBERED, .procname = "mmap_min_addr", - .data = &mmap_min_addr, - .maxlen = sizeof(unsigned long), + .data = &dac_mmap_min_addr, + .maxlen = sizeof(unsigned long), .mode = 0644, - .proc_handler = &proc_doulongvec_minmax, + .proc_handler = &mmap_min_addr_handler, }, #ifdef CONFIG_NUMA { diff --git a/mm/Kconfig b/mm/Kconfig index c948d4ca8bde..fe5f674d7a7d 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -225,9 +225,9 @@ config DEFAULT_MMAP_MIN_ADDR For most ia64, ppc64 and x86 users with lots of address space a value of 65536 is reasonable and should cause no problems. On arm and other archs it should not be higher than 32768. - Programs which use vm86 functionality would either need additional - permissions from either the LSM or the capabilities module or have - this protection disabled. + Programs which use vm86 functionality or have some need to map + this low address space will need CAP_SYS_RAWIO or disable this + protection by setting the value to 0. This value can be changed after boot using the /proc/sys/vm/mmap_min_addr tunable. diff --git a/mm/mmap.c b/mm/mmap.c index 34579b23ebd5..8101de490c73 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -88,9 +88,6 @@ int sysctl_overcommit_ratio = 50; /* default is 50% */ int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT; struct percpu_counter vm_committed_as; -/* amount of vm to protect from userspace access */ -unsigned long mmap_min_addr = CONFIG_DEFAULT_MMAP_MIN_ADDR; - /* * Check that a process has enough memory to allocate a new virtual * mapping. 0 means there is enough memory for the allocation to diff --git a/mm/nommu.c b/mm/nommu.c index 53cab10fece4..28754c40be98 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -69,9 +69,6 @@ int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT; int sysctl_nr_trim_pages = CONFIG_NOMMU_INITIAL_TRIM_EXCESS; int heap_stack_gap = 0; -/* amount of vm to protect from userspace access */ -unsigned long mmap_min_addr = CONFIG_DEFAULT_MMAP_MIN_ADDR; - atomic_long_t mmap_pages_allocated; EXPORT_SYMBOL(mem_map); diff --git a/security/Kconfig b/security/Kconfig index d23c839038f0..9c60c346a91d 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -113,6 +113,22 @@ config SECURITY_ROOTPLUG If you are unsure how to answer this question, answer N. +config LSM_MMAP_MIN_ADDR + int "Low address space for LSM to from user allocation" + depends on SECURITY && SECURITY_SELINUX + default 65535 + help + This is the portion of low virtual memory which should be protected + from userspace allocation. Keeping a user from writing to low pages + can help reduce the impact of kernel NULL pointer bugs. + + For most ia64, ppc64 and x86 users with lots of address space + a value of 65536 is reasonable and should cause no problems. + On arm and other archs it should not be higher than 32768. + Programs which use vm86 functionality or have some need to map + this low address space will need the permission specific to the + systems running LSM. + source security/selinux/Kconfig source security/smack/Kconfig source security/tomoyo/Kconfig diff --git a/security/Makefile b/security/Makefile index c67557cdaa85..b56e7f9ecbc2 100644 --- a/security/Makefile +++ b/security/Makefile @@ -8,7 +8,7 @@ subdir-$(CONFIG_SECURITY_SMACK) += smack subdir-$(CONFIG_SECURITY_TOMOYO) += tomoyo # always enable default capabilities -obj-y += commoncap.o +obj-y += commoncap.o min_addr.o # Object file lists obj-$(CONFIG_SECURITY) += security.o capability.o diff --git a/security/commoncap.c b/security/commoncap.c index 3852e9432801..fe30751a6cd9 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -1005,7 +1005,7 @@ int cap_file_mmap(struct file *file, unsigned long reqprot, { int ret = 0; - if (addr < mmap_min_addr) { + if (addr < dac_mmap_min_addr) { ret = cap_capable(current, current_cred(), CAP_SYS_RAWIO, SECURITY_CAP_AUDIT); /* set PF_SUPERPRIV if it turns out we allow the low mmap */ diff --git a/security/min_addr.c b/security/min_addr.c new file mode 100644 index 000000000000..14cc7b3b8d03 --- /dev/null +++ b/security/min_addr.c @@ -0,0 +1,49 @@ +#include +#include +#include +#include + +/* amount of vm to protect from userspace access by both DAC and the LSM*/ +unsigned long mmap_min_addr; +/* amount of vm to protect from userspace using CAP_SYS_RAWIO (DAC) */ +unsigned long dac_mmap_min_addr = CONFIG_DEFAULT_MMAP_MIN_ADDR; +/* amount of vm to protect from userspace using the LSM = CONFIG_LSM_MMAP_MIN_ADDR */ + +/* + * Update mmap_min_addr = max(dac_mmap_min_addr, CONFIG_LSM_MMAP_MIN_ADDR) + */ +static void update_mmap_min_addr(void) +{ +#ifdef CONFIG_LSM_MMAP_MIN_ADDR + if (dac_mmap_min_addr > CONFIG_LSM_MMAP_MIN_ADDR) + mmap_min_addr = dac_mmap_min_addr; + else + mmap_min_addr = CONFIG_LSM_MMAP_MIN_ADDR; +#else + mmap_min_addr = dac_mmap_min_addr; +#endif +} + +/* + * sysctl handler which just sets dac_mmap_min_addr = the new value and then + * calls update_mmap_min_addr() so non MAP_FIXED hints get rounded properly + */ +int mmap_min_addr_handler(struct ctl_table *table, int write, struct file *filp, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + int ret; + + ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos); + + update_mmap_min_addr(); + + return ret; +} + +int __init init_mmap_min_addr(void) +{ + update_mmap_min_addr(); + + return 0; +} +pure_initcall(init_mmap_min_addr); diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 8a78f584f46e..5dee88362e71 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -3040,7 +3040,7 @@ static int selinux_file_mmap(struct file *file, unsigned long reqprot, * at bad behaviour/exploit that we always want to get the AVC, even * if DAC would have also denied the operation. */ - if (addr < mmap_min_addr) { + if (addr < CONFIG_LSM_MMAP_MIN_ADDR) { rc = avc_has_perm(sid, sid, SECCLASS_MEMPROTECT, MEMPROTECT__MMAP_ZERO, NULL); if (rc) -- cgit v1.2.3 From bbff2e433e80fae72c8d00d482927d52ec19ba33 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Thu, 6 Aug 2009 11:36:25 +0300 Subject: slab: remove duplicate kmem_cache_init_late() declarations kmem_cache_init_late() has been declared in slab.h CC: Nick Piggin CC: Matt Mackall CC: Christoph Lameter Signed-off-by: Wu Fengguang Signed-off-by: Pekka Enberg --- include/linux/slob_def.h | 5 ----- include/linux/slub_def.h | 2 -- mm/slob.c | 5 +++++ 3 files changed, 5 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/slob_def.h b/include/linux/slob_def.h index bb5368df4be8..0ec00b39d006 100644 --- a/include/linux/slob_def.h +++ b/include/linux/slob_def.h @@ -34,9 +34,4 @@ static __always_inline void *__kmalloc(size_t size, gfp_t flags) return kmalloc(size, flags); } -static inline void kmem_cache_init_late(void) -{ - /* Nothing to do */ -} - #endif /* __LINUX_SLOB_DEF_H */ diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index c1c862b1d01a..76c831693616 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -304,6 +304,4 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node) } #endif -void __init kmem_cache_init_late(void); - #endif /* _LINUX_SLUB_DEF_H */ diff --git a/mm/slob.c b/mm/slob.c index 9641da3d5e58..837ebd64cc34 100644 --- a/mm/slob.c +++ b/mm/slob.c @@ -692,3 +692,8 @@ void __init kmem_cache_init(void) { slob_ready = 1; } + +void __init kmem_cache_init_late(void) +{ + /* Nothing to do */ +} -- cgit v1.2.3 From b7b8f9bf0cd73b90561e8123fd5ec28f4539c419 Mon Sep 17 00:00:00 2001 From: Janusz Krzysztofik Date: Thu, 6 Aug 2009 13:07:32 +0200 Subject: TTY/ASoC: Rename N_AMSDELTA line discipline to N_V253 The patch changes the line discipline name registered in include/linux/tty.h and updates the ams-delta machine driver to use it. Signed-off-by: Janusz Krzysztofik Signed-off-by: Mark Brown --- include/linux/tty.h | 2 +- sound/soc/omap/ams-delta.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tty.h b/include/linux/tty.h index 26af9816391f..15bd3b065a9d 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -47,7 +47,7 @@ #define N_SLCAN 17 /* Serial / USB serial CAN Adaptors */ #define N_PPS 18 /* Pulse per Second */ -#define N_AMSDELTA 19 /* codec control over modem, board specific */ +#define N_V253 19 /* Codec control over voice modem */ /* * This character is the same as _POSIX_VDISABLE: it cannot be used as diff --git a/sound/soc/omap/ams-delta.c b/sound/soc/omap/ams-delta.c index 4f35b1f18cb9..5a5166ac7279 100644 --- a/sound/soc/omap/ams-delta.c +++ b/sound/soc/omap/ams-delta.c @@ -500,7 +500,7 @@ static int ams_delta_cx20442_init(struct snd_soc_codec *codec) } /* Register optional line discipline for over the modem control */ - ret = tty_register_ldisc(N_AMSDELTA, &cx81801_ops); + ret = tty_register_ldisc(N_V253, &cx81801_ops); if (ret) { dev_warn(card->dev, "Failed to register line discipline, " @@ -625,9 +625,9 @@ static void __exit ams_delta_module_exit(void) } } - if (tty_unregister_ldisc(N_AMSDELTA) != 0) + if (tty_unregister_ldisc(N_V253) != 0) dev_warn(&ams_delta_audio_platform_device->dev, - "failed to unregister AMSDELTA line discipline\n"); + "failed to unregister V253 line discipline\n"); snd_soc_jack_free_gpios(&ams_delta_hook_switch, ARRAY_SIZE(ams_delta_hook_switch_gpios), -- cgit v1.2.3 From fa56d4cb4022c8b313c3b99236e1b87effc3655b Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 23 Jun 2009 11:29:11 +0000 Subject: ide: allow ide_dev_read_id() to be called from the IRQ context * Un-static __ide_wait_stat(). * Allow ide_dev_read_id() helper to be called from the IRQ context by adding irq_ctx flag and using mdelay()/__ide_wait_stat() when needed. * Switch ide_driveid_update() to set irq_ctx flag. This change is needed for the consecutive patch which fixes races in handling of user-space SET XFER commands but for improved bisectability and clarity it is better to do it in a separate patch. Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: David S. Miller --- drivers/ide/ide-iops.c | 6 +++--- drivers/ide/ide-probe.c | 31 +++++++++++++++++++++---------- include/linux/ide.h | 3 ++- 3 files changed, 26 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c index 2892b242bbe1..b99873845d21 100644 --- a/drivers/ide/ide-iops.c +++ b/drivers/ide/ide-iops.c @@ -102,8 +102,8 @@ EXPORT_SYMBOL(ide_fixstring); * setting a timer to wake up at half second intervals thereafter, * until timeout is achieved, before timing out. */ -static int __ide_wait_stat(ide_drive_t *drive, u8 good, u8 bad, - unsigned long timeout, u8 *rstat) +int __ide_wait_stat(ide_drive_t *drive, u8 good, u8 bad, + unsigned long timeout, u8 *rstat) { ide_hwif_t *hwif = drive->hwif; const struct ide_tp_ops *tp_ops = hwif->tp_ops; @@ -316,7 +316,7 @@ int ide_driveid_update(ide_drive_t *drive) return 0; SELECT_MASK(drive, 1); - rc = ide_dev_read_id(drive, ATA_CMD_ID_ATA, id); + rc = ide_dev_read_id(drive, ATA_CMD_ID_ATA, id, 1); SELECT_MASK(drive, 0); if (rc) diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c index 1bb106f6221a..8de442cbee94 100644 --- a/drivers/ide/ide-probe.c +++ b/drivers/ide/ide-probe.c @@ -238,6 +238,7 @@ static void do_identify(ide_drive_t *drive, u8 cmd, u16 *id) * @drive: drive to identify * @cmd: command to use * @id: buffer for IDENTIFY data + * @irq_ctx: flag set when called from the IRQ context * * Sends an ATA(PI) IDENTIFY request to a drive and waits for a response. * @@ -246,7 +247,7 @@ static void do_identify(ide_drive_t *drive, u8 cmd, u16 *id) * 2 device aborted the command (refused to identify itself) */ -int ide_dev_read_id(ide_drive_t *drive, u8 cmd, u16 *id) +int ide_dev_read_id(ide_drive_t *drive, u8 cmd, u16 *id, int irq_ctx) { ide_hwif_t *hwif = drive->hwif; struct ide_io_ports *io_ports = &hwif->io_ports; @@ -263,7 +264,10 @@ int ide_dev_read_id(ide_drive_t *drive, u8 cmd, u16 *id) tp_ops->write_devctl(hwif, ATA_NIEN | ATA_DEVCTL_OBS); /* take a deep breath */ - msleep(50); + if (irq_ctx) + mdelay(50); + else + msleep(50); if (io_ports->ctl_addr && (hwif->host_flags & IDE_HFLAG_BROKEN_ALTSTATUS) == 0) { @@ -295,12 +299,19 @@ int ide_dev_read_id(ide_drive_t *drive, u8 cmd, u16 *id) timeout = ((cmd == ATA_CMD_ID_ATA) ? WAIT_WORSTCASE : WAIT_PIDENTIFY) / 2; - if (ide_busy_sleep(drive, timeout, use_altstatus)) - return 1; - /* wait for IRQ and ATA_DRQ */ - msleep(50); - s = tp_ops->read_status(hwif); + if (irq_ctx) { + rc = __ide_wait_stat(drive, ATA_DRQ, BAD_R_STAT, timeout, &s); + if (rc) + return 1; + } else { + rc = ide_busy_sleep(drive, timeout, use_altstatus); + if (rc) + return 1; + + msleep(50); + s = tp_ops->read_status(hwif); + } if (OK_STAT(s, ATA_DRQ, BAD_R_STAT)) { /* drive returned ID */ @@ -406,10 +417,10 @@ static int do_probe (ide_drive_t *drive, u8 cmd) if (OK_STAT(stat, ATA_DRDY, ATA_BUSY) || present || cmd == ATA_CMD_ID_ATAPI) { - rc = ide_dev_read_id(drive, cmd, id); + rc = ide_dev_read_id(drive, cmd, id, 0); if (rc) /* failed: try again */ - rc = ide_dev_read_id(drive, cmd, id); + rc = ide_dev_read_id(drive, cmd, id, 0); stat = tp_ops->read_status(hwif); @@ -424,7 +435,7 @@ static int do_probe (ide_drive_t *drive, u8 cmd) msleep(50); tp_ops->exec_command(hwif, ATA_CMD_DEV_RESET); (void)ide_busy_sleep(drive, WAIT_WORSTCASE, 0); - rc = ide_dev_read_id(drive, cmd, id); + rc = ide_dev_read_id(drive, cmd, id, 0); } /* ensure drive IRQ is clear */ diff --git a/include/linux/ide.h b/include/linux/ide.h index edc93a6d931d..cb6cd0459a5e 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1081,6 +1081,7 @@ extern void ide_fixstring(u8 *, const int, const int); int ide_busy_sleep(ide_drive_t *, unsigned long, int); +int __ide_wait_stat(ide_drive_t *, u8, u8, unsigned long, u8 *); int ide_wait_stat(ide_startstop_t *, ide_drive_t *, u8, u8, unsigned long); ide_startstop_t ide_do_park_unpark(ide_drive_t *, struct request *); @@ -1169,7 +1170,7 @@ int ide_no_data_taskfile(ide_drive_t *, struct ide_cmd *); int ide_taskfile_ioctl(ide_drive_t *, unsigned long); -int ide_dev_read_id(ide_drive_t *, u8, u16 *); +int ide_dev_read_id(ide_drive_t *, u8, u16 *, int); extern int ide_driveid_update(ide_drive_t *); extern int ide_config_drive_speed(ide_drive_t *, u8); -- cgit v1.2.3 From 665d66e8fad60a5a162c4615f27f916ad1a6d567 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 23 Jun 2009 11:35:51 +0000 Subject: ide: fix races in handling of user-space SET XFER commands * Make cmd->tf_flags field 'u16' and add IDE_TFLAG_SET_XFER taskfile flag. * Update ide_finish_cmd() to set xfer / re-read id if the new flag is set. * Convert set_xfer_rate() (write handler for /proc/ide/hd?/current_speed) and ide_cmd_ioctl() (HDIO_DRIVE_CMD ioctl handler) to use the new flag. * Remove no longer needed disable_irq_nosync() + enable_irq() from ide_config_drive_speed(). Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: David S. Miller --- drivers/ide/ide-ioctls.c | 8 ++------ drivers/ide/ide-iops.c | 10 ---------- drivers/ide/ide-proc.c | 10 ++-------- drivers/ide/ide-taskfile.c | 9 ++++++++- include/linux/ide.h | 3 ++- 5 files changed, 14 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/drivers/ide/ide-ioctls.c b/drivers/ide/ide-ioctls.c index e246d3d3fbcc..d3440b5010a5 100644 --- a/drivers/ide/ide-ioctls.c +++ b/drivers/ide/ide-ioctls.c @@ -167,6 +167,8 @@ static int ide_cmd_ioctl(ide_drive_t *drive, unsigned long arg) err = -EINVAL; goto abort; } + + cmd.tf_flags |= IDE_TFLAG_SET_XFER; } err = ide_raw_taskfile(drive, &cmd, buf, args[3]); @@ -174,12 +176,6 @@ static int ide_cmd_ioctl(ide_drive_t *drive, unsigned long arg) args[0] = tf->status; args[1] = tf->error; args[2] = tf->nsect; - - if (!err && xfer_rate) { - /* active-retuning-calls future */ - ide_set_xfer_rate(drive, xfer_rate); - ide_driveid_update(drive); - } abort: if (copy_to_user((void __user *)arg, &args, 4)) err = -EFAULT; diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c index b99873845d21..b14fa9a87c49 100644 --- a/drivers/ide/ide-iops.c +++ b/drivers/ide/ide-iops.c @@ -363,14 +363,6 @@ int ide_config_drive_speed(ide_drive_t *drive, u8 speed) * this point (lost interrupt). */ - /* - * FIXME: we race against the running IRQ here if - * this is called from non IRQ context. If we use - * disable_irq() we hang on the error path. Work - * is needed. - */ - disable_irq_nosync(hwif->irq); - udelay(1); tp_ops->dev_select(drive); SELECT_MASK(drive, 1); @@ -394,8 +386,6 @@ int ide_config_drive_speed(ide_drive_t *drive, u8 speed) SELECT_MASK(drive, 0); - enable_irq(hwif->irq); - if (error) { (void) ide_dump_status(drive, "set_drive_speed_status", stat); return error; diff --git a/drivers/ide/ide-proc.c b/drivers/ide/ide-proc.c index 3242698832a4..021de41655e6 100644 --- a/drivers/ide/ide-proc.c +++ b/drivers/ide/ide-proc.c @@ -195,7 +195,6 @@ ide_devset_get(xfer_rate, current_speed); static int set_xfer_rate (ide_drive_t *drive, int arg) { struct ide_cmd cmd; - int err; if (arg < XFER_PIO_0 || arg > XFER_UDMA_6) return -EINVAL; @@ -206,14 +205,9 @@ static int set_xfer_rate (ide_drive_t *drive, int arg) cmd.tf.nsect = (u8)arg; cmd.valid.out.tf = IDE_VALID_FEATURE | IDE_VALID_NSECT; cmd.valid.in.tf = IDE_VALID_NSECT; + cmd.tf_flags = IDE_TFLAG_SET_XFER; - err = ide_no_data_taskfile(drive, &cmd); - - if (!err) { - ide_set_xfer_rate(drive, (u8) arg); - ide_driveid_update(drive); - } - return err; + return ide_no_data_taskfile(drive, &cmd); } ide_devset_rw(current_speed, xfer_rate); diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c index 50336d51eebc..cc8633cbe133 100644 --- a/drivers/ide/ide-taskfile.c +++ b/drivers/ide/ide-taskfile.c @@ -324,10 +324,17 @@ static void ide_error_cmd(ide_drive_t *drive, struct ide_cmd *cmd) void ide_finish_cmd(ide_drive_t *drive, struct ide_cmd *cmd, u8 stat) { struct request *rq = drive->hwif->rq; - u8 err = ide_read_error(drive); + u8 err = ide_read_error(drive), nsect = cmd->tf.nsect; + u8 set_xfer = !!(cmd->tf_flags & IDE_TFLAG_SET_XFER); ide_complete_cmd(drive, cmd, stat, err); rq->errors = err; + + if (err == 0 && set_xfer) { + ide_set_xfer_rate(drive, nsect); + ide_driveid_update(drive); + } + ide_complete_rq(drive, err ? -EIO : 0, blk_rq_bytes(rq)); } diff --git a/include/linux/ide.h b/include/linux/ide.h index cb6cd0459a5e..803c1ae31237 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -258,6 +258,7 @@ enum { IDE_TFLAG_DYN = (1 << 5), IDE_TFLAG_FS = (1 << 6), IDE_TFLAG_MULTI_PIO = (1 << 7), + IDE_TFLAG_SET_XFER = (1 << 8), }; enum { @@ -294,7 +295,7 @@ struct ide_cmd { } out, in; } valid; - u8 tf_flags; + u16 tf_flags; u8 ftf_flags; /* for TASKFILE ioctl */ int protocol; -- cgit v1.2.3 From 30dd568c912602b7dbd609a45d053e01b13422bb Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Tue, 21 Jul 2009 15:56:48 +0200 Subject: x86, perf_counter, bts: Add BTS support to perfcounters Implement a performance counter with: attr.type = PERF_TYPE_HARDWARE attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS attr.sample_period = 1 Using branch trace store (BTS) on x86 hardware, if available. The from and to address for each branch can be sampled using: PERF_SAMPLE_IP for the from address PERF_SAMPLE_ADDR for the to address [ v2: address review feedback, fix bugs ] Signed-off-by: Markus Metzger Acked-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- arch/x86/include/asm/perf_counter.h | 10 ++ arch/x86/kernel/cpu/perf_counter.c | 325 +++++++++++++++++++++++++++++++++++- include/linux/perf_counter.h | 2 + kernel/perf_counter.c | 10 +- 4 files changed, 340 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h index fa64e401589d..e7b7c938ae27 100644 --- a/arch/x86/include/asm/perf_counter.h +++ b/arch/x86/include/asm/perf_counter.h @@ -84,6 +84,16 @@ union cpuid10_edx { #define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b #define X86_PMC_IDX_FIXED_BUS_CYCLES (X86_PMC_IDX_FIXED + 2) +/* + * We model BTS tracing as another fixed-mode PMC. + * + * We choose a value in the middle of the fixed counter range, since lower + * values are used by actual fixed counters and higher values are used + * to indicate other overflow conditions in the PERF_GLOBAL_STATUS msr. + */ +#define X86_PMC_IDX_FIXED_BTS (X86_PMC_IDX_FIXED + 16) + + #ifdef CONFIG_PERF_COUNTERS extern void init_hw_perf_counters(void); extern void perf_counters_lapic_init(void); diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index a7aa8f900954..b237c181aa41 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -6,6 +6,7 @@ * Copyright (C) 2009 Jaswinder Singh Rajput * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra + * Copyright (C) 2009 Intel Corporation, * * For licencing details see kernel-base/COPYING */ @@ -20,6 +21,7 @@ #include #include #include +#include #include #include @@ -27,12 +29,52 @@ static u64 perf_counter_mask __read_mostly; +/* The maximal number of PEBS counters: */ +#define MAX_PEBS_COUNTERS 4 + +/* The size of a BTS record in bytes: */ +#define BTS_RECORD_SIZE 24 + +/* The size of a per-cpu BTS buffer in bytes: */ +#define BTS_BUFFER_SIZE (BTS_RECORD_SIZE * 1024) + +/* The BTS overflow threshold in bytes from the end of the buffer: */ +#define BTS_OVFL_TH (BTS_RECORD_SIZE * 64) + + +/* + * Bits in the debugctlmsr controlling branch tracing. + */ +#define X86_DEBUGCTL_TR (1 << 6) +#define X86_DEBUGCTL_BTS (1 << 7) +#define X86_DEBUGCTL_BTINT (1 << 8) +#define X86_DEBUGCTL_BTS_OFF_OS (1 << 9) +#define X86_DEBUGCTL_BTS_OFF_USR (1 << 10) + +/* + * A debug store configuration. + * + * We only support architectures that use 64bit fields. + */ +struct debug_store { + u64 bts_buffer_base; + u64 bts_index; + u64 bts_absolute_maximum; + u64 bts_interrupt_threshold; + u64 pebs_buffer_base; + u64 pebs_index; + u64 pebs_absolute_maximum; + u64 pebs_interrupt_threshold; + u64 pebs_counter_reset[MAX_PEBS_COUNTERS]; +}; + struct cpu_hw_counters { struct perf_counter *counters[X86_PMC_IDX_MAX]; unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; unsigned long interrupts; int enabled; + struct debug_store *ds; }; /* @@ -57,6 +99,8 @@ struct x86_pmu { u64 counter_mask; u64 max_period; u64 intel_ctrl; + void (*enable_bts)(u64 config); + void (*disable_bts)(void); }; static struct x86_pmu x86_pmu __read_mostly; @@ -576,6 +620,9 @@ x86_perf_counter_update(struct perf_counter *counter, u64 prev_raw_count, new_raw_count; s64 delta; + if (idx == X86_PMC_IDX_FIXED_BTS) + return 0; + /* * Careful: an NMI might modify the previous counter value. * @@ -659,10 +706,109 @@ static void release_pmc_hardware(void) enable_lapic_nmi_watchdog(); } +static inline bool bts_available(void) +{ + return x86_pmu.enable_bts != NULL; +} + +static inline void init_debug_store_on_cpu(int cpu) +{ + struct debug_store *ds = per_cpu(cpu_hw_counters, cpu).ds; + + if (!ds) + return; + + wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, + (u32)((u64)(long)ds), (u32)((u64)(long)ds >> 32)); +} + +static inline void fini_debug_store_on_cpu(int cpu) +{ + if (!per_cpu(cpu_hw_counters, cpu).ds) + return; + + wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0); +} + +static void release_bts_hardware(void) +{ + int cpu; + + if (!bts_available()) + return; + + get_online_cpus(); + + for_each_online_cpu(cpu) + fini_debug_store_on_cpu(cpu); + + for_each_possible_cpu(cpu) { + struct debug_store *ds = per_cpu(cpu_hw_counters, cpu).ds; + + if (!ds) + continue; + + per_cpu(cpu_hw_counters, cpu).ds = NULL; + + kfree((void *)(long)ds->bts_buffer_base); + kfree(ds); + } + + put_online_cpus(); +} + +static int reserve_bts_hardware(void) +{ + int cpu, err = 0; + + if (!bts_available()) + return -EOPNOTSUPP; + + get_online_cpus(); + + for_each_possible_cpu(cpu) { + struct debug_store *ds; + void *buffer; + + err = -ENOMEM; + buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL); + if (unlikely(!buffer)) + break; + + ds = kzalloc(sizeof(*ds), GFP_KERNEL); + if (unlikely(!ds)) { + kfree(buffer); + break; + } + + ds->bts_buffer_base = (u64)(long)buffer; + ds->bts_index = ds->bts_buffer_base; + ds->bts_absolute_maximum = + ds->bts_buffer_base + BTS_BUFFER_SIZE; + ds->bts_interrupt_threshold = + ds->bts_absolute_maximum - BTS_OVFL_TH; + + per_cpu(cpu_hw_counters, cpu).ds = ds; + err = 0; + } + + if (err) + release_bts_hardware(); + else { + for_each_online_cpu(cpu) + init_debug_store_on_cpu(cpu); + } + + put_online_cpus(); + + return err; +} + static void hw_perf_counter_destroy(struct perf_counter *counter) { if (atomic_dec_and_mutex_lock(&active_counters, &pmc_reserve_mutex)) { release_pmc_hardware(); + release_bts_hardware(); mutex_unlock(&pmc_reserve_mutex); } } @@ -705,6 +851,42 @@ set_ext_hw_attr(struct hw_perf_counter *hwc, struct perf_counter_attr *attr) return 0; } +static void intel_pmu_enable_bts(u64 config) +{ + unsigned long debugctlmsr; + + debugctlmsr = get_debugctlmsr(); + + debugctlmsr |= X86_DEBUGCTL_TR; + debugctlmsr |= X86_DEBUGCTL_BTS; + debugctlmsr |= X86_DEBUGCTL_BTINT; + + if (!(config & ARCH_PERFMON_EVENTSEL_OS)) + debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS; + + if (!(config & ARCH_PERFMON_EVENTSEL_USR)) + debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR; + + update_debugctlmsr(debugctlmsr); +} + +static void intel_pmu_disable_bts(void) +{ + struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); + unsigned long debugctlmsr; + + if (!cpuc->ds) + return; + + debugctlmsr = get_debugctlmsr(); + + debugctlmsr &= + ~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT | + X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR); + + update_debugctlmsr(debugctlmsr); +} + /* * Setup the hardware configuration for a given attr_type */ @@ -721,9 +903,13 @@ static int __hw_perf_counter_init(struct perf_counter *counter) err = 0; if (!atomic_inc_not_zero(&active_counters)) { mutex_lock(&pmc_reserve_mutex); - if (atomic_read(&active_counters) == 0 && !reserve_pmc_hardware()) - err = -EBUSY; - else + if (atomic_read(&active_counters) == 0) { + if (!reserve_pmc_hardware()) + err = -EBUSY; + else + reserve_bts_hardware(); + } + if (!err) atomic_inc(&active_counters); mutex_unlock(&pmc_reserve_mutex); } @@ -801,7 +987,18 @@ static void p6_pmu_disable_all(void) static void intel_pmu_disable_all(void) { + struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); + + if (!cpuc->enabled) + return; + + cpuc->enabled = 0; + barrier(); + wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); + + if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) + intel_pmu_disable_bts(); } static void amd_pmu_disable_all(void) @@ -859,7 +1056,25 @@ static void p6_pmu_enable_all(void) static void intel_pmu_enable_all(void) { + struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); + + if (cpuc->enabled) + return; + + cpuc->enabled = 1; + barrier(); + wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); + + if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { + struct perf_counter *counter = + cpuc->counters[X86_PMC_IDX_FIXED_BTS]; + + if (WARN_ON_ONCE(!counter)) + return; + + intel_pmu_enable_bts(counter->hw.config); + } } static void amd_pmu_enable_all(void) @@ -946,6 +1161,11 @@ p6_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) static inline void intel_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) { + if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { + intel_pmu_disable_bts(); + return; + } + if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { intel_pmu_disable_fixed(hwc, idx); return; @@ -974,6 +1194,9 @@ x86_perf_counter_set_period(struct perf_counter *counter, s64 period = hwc->sample_period; int err, ret = 0; + if (idx == X86_PMC_IDX_FIXED_BTS) + return 0; + /* * If we are way outside a reasoable range then just skip forward: */ @@ -1056,6 +1279,14 @@ static void p6_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) static void intel_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) { + if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { + if (!__get_cpu_var(cpu_hw_counters).enabled) + return; + + intel_pmu_enable_bts(hwc->config); + return; + } + if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { intel_pmu_enable_fixed(hwc, idx); return; @@ -1077,11 +1308,16 @@ fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc) { unsigned int event; + event = hwc->config & ARCH_PERFMON_EVENT_MASK; + + if (unlikely((event == + x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) && + (hwc->sample_period == 1))) + return X86_PMC_IDX_FIXED_BTS; + if (!x86_pmu.num_counters_fixed) return -1; - event = hwc->config & ARCH_PERFMON_EVENT_MASK; - if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS))) return X86_PMC_IDX_FIXED_INSTRUCTIONS; if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES))) @@ -1102,7 +1338,25 @@ static int x86_pmu_enable(struct perf_counter *counter) int idx; idx = fixed_mode_idx(counter, hwc); - if (idx >= 0) { + if (idx == X86_PMC_IDX_FIXED_BTS) { + /* + * Try to use BTS for branch tracing. If that is not + * available, try to get a generic counter. + */ + if (unlikely(!cpuc->ds)) + goto try_generic; + + /* + * Try to get the fixed counter, if that is already taken + * then try to get a generic counter: + */ + if (test_and_set_bit(idx, cpuc->used_mask)) + goto try_generic; + + hwc->config_base = 0; + hwc->counter_base = 0; + hwc->idx = idx; + } else if (idx >= 0) { /* * Try to get the fixed counter, if that is already taken * then try to get a generic counter: @@ -1213,6 +1467,45 @@ void perf_counter_print_debug(void) local_irq_restore(flags); } +static void intel_pmu_drain_bts_buffer(struct cpu_hw_counters *cpuc, + struct perf_sample_data *data) +{ + struct debug_store *ds = cpuc->ds; + struct bts_record { + u64 from; + u64 to; + u64 flags; + }; + struct perf_counter *counter = cpuc->counters[X86_PMC_IDX_FIXED_BTS]; + unsigned long orig_ip = data->regs->ip; + u64 at; + + if (!counter) + return; + + if (!ds) + return; + + for (at = ds->bts_buffer_base; + at < ds->bts_index; + at += sizeof(struct bts_record)) { + struct bts_record *rec = (struct bts_record *)(long)at; + + data->regs->ip = rec->from; + data->addr = rec->to; + + perf_counter_output(counter, 1, data); + } + + ds->bts_index = ds->bts_buffer_base; + + data->regs->ip = orig_ip; + data->addr = 0; + + /* There's new data available. */ + counter->pending_kill = POLL_IN; +} + static void x86_pmu_disable(struct perf_counter *counter) { struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); @@ -1237,6 +1530,15 @@ static void x86_pmu_disable(struct perf_counter *counter) * that we are disabling: */ x86_perf_counter_update(counter, hwc, idx); + + /* Drain the remaining BTS records. */ + if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { + struct perf_sample_data data; + struct pt_regs regs; + + data.regs = ®s; + intel_pmu_drain_bts_buffer(cpuc, &data); + } cpuc->counters[idx] = NULL; clear_bit(idx, cpuc->used_mask); @@ -1264,6 +1566,7 @@ static int intel_pmu_save_and_restart(struct perf_counter *counter) static void intel_pmu_reset(void) { + struct debug_store *ds = __get_cpu_var(cpu_hw_counters).ds; unsigned long flags; int idx; @@ -1281,6 +1584,8 @@ static void intel_pmu_reset(void) for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) { checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); } + if (ds) + ds->bts_index = ds->bts_buffer_base; local_irq_restore(flags); } @@ -1346,6 +1651,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) cpuc = &__get_cpu_var(cpu_hw_counters); perf_disable(); + intel_pmu_drain_bts_buffer(cpuc, &data); status = intel_pmu_get_status(); if (!status) { perf_enable(); @@ -1547,6 +1853,8 @@ static struct x86_pmu intel_pmu = { * the generic counter period: */ .max_period = (1ULL << 31) - 1, + .enable_bts = intel_pmu_enable_bts, + .disable_bts = intel_pmu_disable_bts, }; static struct x86_pmu amd_pmu = { @@ -1936,3 +2244,8 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) return entry; } + +void hw_perf_counter_setup_online(int cpu) +{ + init_debug_store_on_cpu(cpu); +} diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 2aabe43c1d04..0a6f3209c9de 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -692,6 +692,8 @@ struct perf_sample_data { extern int perf_counter_overflow(struct perf_counter *counter, int nmi, struct perf_sample_data *data); +extern void perf_counter_output(struct perf_counter *counter, int nmi, + struct perf_sample_data *data); /* * Return 1 for a software counter, 0 for a hardware counter diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 546e62d62941..bf8110b35c51 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -88,6 +88,7 @@ void __weak hw_perf_disable(void) { barrier(); } void __weak hw_perf_enable(void) { barrier(); } void __weak hw_perf_counter_setup(int cpu) { barrier(); } +void __weak hw_perf_counter_setup_online(int cpu) { barrier(); } int __weak hw_perf_group_sched_in(struct perf_counter *group_leader, @@ -2630,7 +2631,7 @@ static u32 perf_counter_tid(struct perf_counter *counter, struct task_struct *p) return task_pid_nr_ns(p, counter->ns); } -static void perf_counter_output(struct perf_counter *counter, int nmi, +void perf_counter_output(struct perf_counter *counter, int nmi, struct perf_sample_data *data) { int ret; @@ -4566,6 +4567,11 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) perf_counter_init_cpu(cpu); break; + case CPU_ONLINE: + case CPU_ONLINE_FROZEN: + hw_perf_counter_setup_online(cpu); + break; + case CPU_DOWN_PREPARE: case CPU_DOWN_PREPARE_FROZEN: perf_counter_exit_cpu(cpu); @@ -4590,6 +4596,8 @@ void __init perf_counter_init(void) { perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_UP_PREPARE, (void *)(long)smp_processor_id()); + perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_ONLINE, + (void *)(long)smp_processor_id()); register_cpu_notifier(&perf_cpu_nb); } -- cgit v1.2.3 From 8e5b59a2d728e6963b35dba8bb36e0b70267462e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 6 Aug 2009 16:02:50 -0700 Subject: sched: Add default defines for PREEMPT_ACTIVE The PREEMPT_ACTIVE setting doesn't actually need to be arch-specific, so set up a sane default for all arches to (hopefully) migrate to. > if we look at linux/hardirq.h, it makes this claim: > * - bit 28 is the PREEMPT_ACTIVE flag > if that's true, then why are we letting any arch set this define ? a > quick survey shows that half the arches (11) are using 0x10000000 (bit > 28) while the other half (10) are using 0x4000000 (bit 26). and then > there is the ia64 oddity which uses bit 30. the exact value here > shouldnt really matter across arches though should it ? actually alpha, arm and avr32 also use bit 30 (0x40000000), there are only five (or eight, depending on how you count) architectures (blackfin, h8300, m68k, s390 and sparc) using bit 26. Signed-off-by: Arnd Bergmann Signed-off-by: Mike Frysinger Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar --- include/linux/hardirq.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 8246c697863d..0d885fd75111 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -64,6 +64,12 @@ #define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT) #define NMI_OFFSET (1UL << NMI_SHIFT) +#ifndef PREEMPT_ACTIVE +#define PREEMPT_ACTIVE_BITS 1 +#define PREEMPT_ACTIVE_SHIFT (NMI_SHIFT + NMI_BITS) +#define PREEMPT_ACTIVE (__IRQ_MASK(PREEMPT_ACTIVE_BITS) << PREEMPT_ACTIVE_SHIFT) +#endif + #if PREEMPT_ACTIVE < (1 << (NMI_SHIFT + NMI_BITS)) #error PREEMPT_ACTIVE is too low! #endif -- cgit v1.2.3 From 62ab460cf5e450e1d207a98a9c6cf2e4a6a78fd1 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 9 Aug 2009 15:06:19 -0400 Subject: NFSv4: Add 'server capability' flags for NFSv4 recommended attributes If the NFSv4 server doesn't support a POSIX attribute, the generic NFS code needs to know that, so that it don't keep trying to poll for it. However, by the same count, if the NFSv4 server does support that attribute, then we should ensure that the inode metadata is appropriately labelled as being untrusted. For instance, if we don't know the correct value of the file's uid, we should certainly not be caching ACLs or ACCESS results. Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 7 ++-- fs/nfs/inode.c | 92 +++++++++++++++++++++++++++++++++++++++-------- fs/nfs/nfs4proc.c | 22 ++++++++++++ include/linux/nfs_fs_sb.h | 9 +++++ 4 files changed, 114 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 8d25ccb2d51d..8f34fd8028fc 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -809,6 +809,9 @@ static int nfs_init_server(struct nfs_server *server, /* Initialise the client representation from the mount data */ server->flags = data->flags; server->options = data->options; + server->caps |= NFS_CAP_HARDLINKS|NFS_CAP_SYMLINKS|NFS_CAP_FILEID| + NFS_CAP_MODE|NFS_CAP_NLINK|NFS_CAP_OWNER|NFS_CAP_OWNER_GROUP| + NFS_CAP_ATIME|NFS_CAP_CTIME|NFS_CAP_MTIME; if (data->rsize) server->rsize = nfs_block_size(data->rsize, NULL); @@ -1274,7 +1277,7 @@ static int nfs4_init_server(struct nfs_server *server, /* Initialise the client representation from the mount data */ server->flags = data->flags; - server->caps |= NFS_CAP_ATOMIC_OPEN; + server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR; server->options = data->options; /* Get a client record */ @@ -1400,7 +1403,7 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, /* Initialise the client representation from the parent server */ nfs_server_copy_userdata(server, parent_server); - server->caps |= NFS_CAP_ATOMIC_OPEN; + server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR; /* Get a client representation. * Note: NFSv4 always uses TCP, */ diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index bd7938eda6a8..fe5a8b45d867 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -286,6 +286,11 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) /* We can't support update_atime(), since the server will reset it */ inode->i_flags |= S_NOATIME|S_NOCMTIME; inode->i_mode = fattr->mode; + if ((fattr->valid & NFS_ATTR_FATTR_MODE) == 0 + && nfs_server_capable(inode, NFS_CAP_MODE)) + nfsi->cache_validity |= NFS_INO_INVALID_ATTR + | NFS_INO_INVALID_ACCESS + | NFS_INO_INVALID_ACL; /* Why so? Because we want revalidate for devices/FIFOs, and * that's precisely what we have in nfs_file_inode_operations. */ @@ -330,20 +335,46 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) nfsi->attr_gencount = fattr->gencount; if (fattr->valid & NFS_ATTR_FATTR_ATIME) inode->i_atime = fattr->atime; + else if (nfs_server_capable(inode, NFS_CAP_ATIME)) + nfsi->cache_validity |= NFS_INO_INVALID_ATTR; if (fattr->valid & NFS_ATTR_FATTR_MTIME) inode->i_mtime = fattr->mtime; + else if (nfs_server_capable(inode, NFS_CAP_MTIME)) + nfsi->cache_validity |= NFS_INO_INVALID_ATTR + | NFS_INO_INVALID_DATA; if (fattr->valid & NFS_ATTR_FATTR_CTIME) inode->i_ctime = fattr->ctime; + else if (nfs_server_capable(inode, NFS_CAP_CTIME)) + nfsi->cache_validity |= NFS_INO_INVALID_ATTR + | NFS_INO_INVALID_ACCESS + | NFS_INO_INVALID_ACL; if (fattr->valid & NFS_ATTR_FATTR_CHANGE) nfsi->change_attr = fattr->change_attr; + else if (nfs_server_capable(inode, NFS_CAP_CHANGE_ATTR)) + nfsi->cache_validity |= NFS_INO_INVALID_ATTR + | NFS_INO_INVALID_DATA; if (fattr->valid & NFS_ATTR_FATTR_SIZE) inode->i_size = nfs_size_to_loff_t(fattr->size); + else + nfsi->cache_validity |= NFS_INO_INVALID_ATTR + | NFS_INO_INVALID_DATA + | NFS_INO_REVAL_PAGECACHE; if (fattr->valid & NFS_ATTR_FATTR_NLINK) inode->i_nlink = fattr->nlink; + else if (nfs_server_capable(inode, NFS_CAP_NLINK)) + nfsi->cache_validity |= NFS_INO_INVALID_ATTR; if (fattr->valid & NFS_ATTR_FATTR_OWNER) inode->i_uid = fattr->uid; + else if (nfs_server_capable(inode, NFS_CAP_OWNER)) + nfsi->cache_validity |= NFS_INO_INVALID_ATTR + | NFS_INO_INVALID_ACCESS + | NFS_INO_INVALID_ACL; if (fattr->valid & NFS_ATTR_FATTR_GROUP) inode->i_gid = fattr->gid; + else if (nfs_server_capable(inode, NFS_CAP_OWNER_GROUP)) + nfsi->cache_validity |= NFS_INO_INVALID_ATTR + | NFS_INO_INVALID_ACCESS + | NFS_INO_INVALID_ACL; if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED) inode->i_blocks = fattr->du.nfs2.blocks; if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) { @@ -1145,6 +1176,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) loff_t cur_isize, new_isize; unsigned long invalid = 0; unsigned long now = jiffies; + unsigned long save_cache_validity; dfprintk(VFS, "NFS: %s(%s/%ld ct=%d info=0x%x)\n", __func__, inode->i_sb->s_id, inode->i_ino, @@ -1171,10 +1203,11 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) */ nfsi->read_cache_jiffies = fattr->time_start; - if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) || (fattr->valid & (NFS_ATTR_FATTR_MTIME|NFS_ATTR_FATTR_CTIME))) - nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR - | NFS_INO_INVALID_ATIME - | NFS_INO_REVAL_PAGECACHE); + save_cache_validity = nfsi->cache_validity; + nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR + | NFS_INO_INVALID_ATIME + | NFS_INO_REVAL_FORCED + | NFS_INO_REVAL_PAGECACHE); /* Do atomic weak cache consistency updates */ nfs_wcc_update_inode(inode, fattr); @@ -1189,7 +1222,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) nfs_force_lookup_revalidate(inode); nfsi->change_attr = fattr->change_attr; } - } + } else if (server->caps & NFS_CAP_CHANGE_ATTR) + invalid |= save_cache_validity; if (fattr->valid & NFS_ATTR_FATTR_MTIME) { /* NFSv2/v3: Check if the mtime agrees */ @@ -1201,7 +1235,12 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) nfs_force_lookup_revalidate(inode); memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); } - } + } else if (server->caps & NFS_CAP_MTIME) + invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR + | NFS_INO_INVALID_DATA + | NFS_INO_REVAL_PAGECACHE + | NFS_INO_REVAL_FORCED); + if (fattr->valid & NFS_ATTR_FATTR_CTIME) { /* If ctime has changed we should definitely clear access+acl caches */ if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) { @@ -1215,7 +1254,11 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) } memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); } - } + } else if (server->caps & NFS_CAP_CTIME) + invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR + | NFS_INO_INVALID_ACCESS + | NFS_INO_INVALID_ACL + | NFS_INO_REVAL_FORCED); /* Check if our cached file size is stale */ if (fattr->valid & NFS_ATTR_FATTR_SIZE) { @@ -1231,30 +1274,50 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) dprintk("NFS: isize change on server for file %s/%ld\n", inode->i_sb->s_id, inode->i_ino); } - } + } else + invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR + | NFS_INO_REVAL_PAGECACHE + | NFS_INO_REVAL_FORCED); if (fattr->valid & NFS_ATTR_FATTR_ATIME) memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime)); + else if (server->caps & NFS_CAP_ATIME) + invalid |= save_cache_validity & (NFS_INO_INVALID_ATIME + | NFS_INO_REVAL_FORCED); if (fattr->valid & NFS_ATTR_FATTR_MODE) { if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)) { invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; inode->i_mode = fattr->mode; } - } + } else if (server->caps & NFS_CAP_MODE) + invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR + | NFS_INO_INVALID_ACCESS + | NFS_INO_INVALID_ACL + | NFS_INO_REVAL_FORCED); + if (fattr->valid & NFS_ATTR_FATTR_OWNER) { if (inode->i_uid != fattr->uid) { invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; inode->i_uid = fattr->uid; } - } + } else if (server->caps & NFS_CAP_OWNER) + invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR + | NFS_INO_INVALID_ACCESS + | NFS_INO_INVALID_ACL + | NFS_INO_REVAL_FORCED); + if (fattr->valid & NFS_ATTR_FATTR_GROUP) { if (inode->i_gid != fattr->gid) { invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; inode->i_gid = fattr->gid; } - } + } else if (server->caps & NFS_CAP_OWNER_GROUP) + invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR + | NFS_INO_INVALID_ACCESS + | NFS_INO_INVALID_ACL + | NFS_INO_REVAL_FORCED); if (fattr->valid & NFS_ATTR_FATTR_NLINK) { if (inode->i_nlink != fattr->nlink) { @@ -1263,7 +1326,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) invalid |= NFS_INO_INVALID_DATA; inode->i_nlink = fattr->nlink; } - } + } else if (server->caps & NFS_CAP_NLINK) + invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR + | NFS_INO_REVAL_FORCED); if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) { /* @@ -1293,9 +1358,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) || S_ISLNK(inode->i_mode))) invalid &= ~NFS_INO_INVALID_DATA; if (!nfs_have_delegation(inode, FMODE_READ) || - (nfsi->cache_validity & NFS_INO_REVAL_FORCED)) + (save_cache_validity & NFS_INO_REVAL_FORCED)) nfsi->cache_validity |= invalid; - nfsi->cache_validity &= ~NFS_INO_REVAL_FORCED; return 0; out_changed: diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index d95f7f9e60c4..be6544aef41f 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2003,12 +2003,34 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f status = nfs4_call_sync(server, &msg, &args, &res, 0); if (status == 0) { memcpy(server->attr_bitmask, res.attr_bitmask, sizeof(server->attr_bitmask)); + server->caps &= ~(NFS_CAP_ACLS|NFS_CAP_HARDLINKS| + NFS_CAP_SYMLINKS|NFS_CAP_FILEID| + NFS_CAP_MODE|NFS_CAP_NLINK|NFS_CAP_OWNER| + NFS_CAP_OWNER_GROUP|NFS_CAP_ATIME| + NFS_CAP_CTIME|NFS_CAP_MTIME); if (res.attr_bitmask[0] & FATTR4_WORD0_ACL) server->caps |= NFS_CAP_ACLS; if (res.has_links != 0) server->caps |= NFS_CAP_HARDLINKS; if (res.has_symlinks != 0) server->caps |= NFS_CAP_SYMLINKS; + if (res.attr_bitmask[0] & FATTR4_WORD0_FILEID) + server->caps |= NFS_CAP_FILEID; + if (res.attr_bitmask[1] & FATTR4_WORD1_MODE) + server->caps |= NFS_CAP_MODE; + if (res.attr_bitmask[1] & FATTR4_WORD1_NUMLINKS) + server->caps |= NFS_CAP_NLINK; + if (res.attr_bitmask[1] & FATTR4_WORD1_OWNER) + server->caps |= NFS_CAP_OWNER; + if (res.attr_bitmask[1] & FATTR4_WORD1_OWNER_GROUP) + server->caps |= NFS_CAP_OWNER_GROUP; + if (res.attr_bitmask[1] & FATTR4_WORD1_TIME_ACCESS) + server->caps |= NFS_CAP_ATIME; + if (res.attr_bitmask[1] & FATTR4_WORD1_TIME_METADATA) + server->caps |= NFS_CAP_CTIME; + if (res.attr_bitmask[1] & FATTR4_WORD1_TIME_MODIFY) + server->caps |= NFS_CAP_MTIME; + memcpy(server->cache_consistency_bitmask, res.attr_bitmask, sizeof(server->cache_consistency_bitmask)); server->cache_consistency_bitmask[0] &= FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE; server->cache_consistency_bitmask[1] &= FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY; diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 19fe15d12042..320569eabe3b 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -167,6 +167,15 @@ struct nfs_server { #define NFS_CAP_SYMLINKS (1U << 2) #define NFS_CAP_ACLS (1U << 3) #define NFS_CAP_ATOMIC_OPEN (1U << 4) +#define NFS_CAP_CHANGE_ATTR (1U << 5) +#define NFS_CAP_FILEID (1U << 6) +#define NFS_CAP_MODE (1U << 7) +#define NFS_CAP_NLINK (1U << 8) +#define NFS_CAP_OWNER (1U << 9) +#define NFS_CAP_OWNER_GROUP (1U << 10) +#define NFS_CAP_ATIME (1U << 11) +#define NFS_CAP_CTIME (1U << 12) +#define NFS_CAP_MTIME (1U << 13) /* maximum number of slots to use */ -- cgit v1.2.3 From 169026a61e6f436dfc12c9d10d95455c4e9f945b Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 9 Aug 2009 15:09:32 -0400 Subject: SUNRPC: Clean up RPCBIND_MAXUADDRLEN definitions Clean up: Replace the single-integer definition of RPCBIND_MAXUADDRLEN with a definition that is based on previously defined address string sizes, and document the way this maximum is calculated. Also provide a separate macro for the size of the port number extension. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/msg_prot.h | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/msg_prot.h b/include/linux/sunrpc/msg_prot.h index 70df4f1d8847..77e624883393 100644 --- a/include/linux/sunrpc/msg_prot.h +++ b/include/linux/sunrpc/msg_prot.h @@ -189,7 +189,22 @@ typedef __be32 rpc_fraghdr; * Additionally, the two alternative forms specified in Section 2.2 of * [RFC2373] are also acceptable. */ -#define RPCBIND_MAXUADDRLEN (56u) + +#include + +/* Maximum size of the port number part of a universal address */ +#define RPCBIND_MAXUADDRPLEN sizeof(".255.255") + +/* Maximum size of an IPv4 universal address */ +#define RPCBIND_MAXUADDR4LEN \ + (INET_ADDRSTRLEN + RPCBIND_MAXUADDRPLEN) + +/* Maximum size of an IPv6 universal address */ +#define RPCBIND_MAXUADDR6LEN \ + (INET6_ADDRSTRLEN + RPCBIND_MAXUADDRPLEN) + +/* Assume INET6_ADDRSTRLEN will always be larger than INET_ADDRSTRLEN... */ +#define RPCBIND_MAXUADDRLEN RPCBIND_MAXUADDR6LEN #endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_MSGPROT_H_ */ -- cgit v1.2.3 From a02d692611348f11ee1bc37431a883c3ff2de23e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 9 Aug 2009 15:09:34 -0400 Subject: SUNRPC: Provide functions for managing universal addresses Introduce a set of functions in the kernel's RPC implementation for converting between a socket address and either a standard presentation address string or an RPC universal address. The universal address functions will be used to encode and decode RPCB_FOO and NFSv4 SETCLIENTID arguments. The other functions are part of a previous promise to deliver shared functions that can be used by upper-layer protocols to display and manipulate IP addresses. The kernel's current address printf formatters were designed specifically for kernel to user-space APIs that require a particular string format for socket addresses, thus are somewhat limited for the purposes of sunrpc.ko. The formatter for IPv6 addresses, %pI6, does not support short-handing or scope IDs. Also, these printf formatters are unique per address family, so a separate formatter string is required for printing AF_INET and AF_INET6 addresses. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 2 - include/linux/sunrpc/clnt.h | 38 +++++ net/sunrpc/Makefile | 2 +- net/sunrpc/addr.c | 364 ++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 403 insertions(+), 3 deletions(-) create mode 100644 net/sunrpc/addr.c (limited to 'include/linux') diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 8d2b71d57d29..ff68397f9b19 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -370,8 +370,6 @@ unsigned int nfs_page_array_len(unsigned int base, size_t len) PAGE_SIZE - 1) >> PAGE_SHIFT; } -#define IPV6_SCOPE_DELIMITER '%' - /* * Set the port number in an address. Be agnostic about the address * family. diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 37881f1a0bd7..2636e8ad551c 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -9,6 +9,10 @@ #ifndef _LINUX_SUNRPC_CLNT_H #define _LINUX_SUNRPC_CLNT_H +#include +#include +#include + #include #include #include @@ -151,5 +155,39 @@ void rpc_force_rebind(struct rpc_clnt *); size_t rpc_peeraddr(struct rpc_clnt *, struct sockaddr *, size_t); const char *rpc_peeraddr2str(struct rpc_clnt *, enum rpc_display_format_t); +size_t rpc_ntop(const struct sockaddr *, char *, const size_t); +size_t rpc_pton(const char *, const size_t, + struct sockaddr *, const size_t); +char * rpc_sockaddr2uaddr(const struct sockaddr *); +size_t rpc_uaddr2sockaddr(const char *, const size_t, + struct sockaddr *, const size_t); + +static inline unsigned short rpc_get_port(const struct sockaddr *sap) +{ + switch (sap->sa_family) { + case AF_INET: + return ntohs(((struct sockaddr_in *)sap)->sin_port); + case AF_INET6: + return ntohs(((struct sockaddr_in6 *)sap)->sin6_port); + } + return 0; +} + +static inline void rpc_set_port(struct sockaddr *sap, + const unsigned short port) +{ + switch (sap->sa_family) { + case AF_INET: + ((struct sockaddr_in *)sap)->sin_port = htons(port); + break; + case AF_INET6: + ((struct sockaddr_in6 *)sap)->sin6_port = htons(port); + break; + } +} + +#define IPV6_SCOPE_DELIMITER '%' +#define IPV6_SCOPE_ID_LEN sizeof("%nnnnnnnnnn") + #endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_CLNT_H */ diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile index db73fd2a3f0e..9d2fca5ad14a 100644 --- a/net/sunrpc/Makefile +++ b/net/sunrpc/Makefile @@ -10,7 +10,7 @@ obj-$(CONFIG_SUNRPC_XPRT_RDMA) += xprtrdma/ sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \ auth.o auth_null.o auth_unix.o auth_generic.o \ svc.o svcsock.o svcauth.o svcauth_unix.o \ - rpcb_clnt.o timer.o xdr.o \ + addr.o rpcb_clnt.o timer.o xdr.o \ sunrpc_syms.o cache.o rpc_pipe.o \ svc_xprt.o sunrpc-$(CONFIG_NFS_V4_1) += backchannel_rqst.o bc_svc.o diff --git a/net/sunrpc/addr.c b/net/sunrpc/addr.c new file mode 100644 index 000000000000..22e8fd89477f --- /dev/null +++ b/net/sunrpc/addr.c @@ -0,0 +1,364 @@ +/* + * Copyright 2009, Oracle. All rights reserved. + * + * Convert socket addresses to presentation addresses and universal + * addresses, and vice versa. + * + * Universal addresses are introduced by RFC 1833 and further refined by + * recent RFCs describing NFSv4. The universal address format is part + * of the external (network) interface provided by rpcbind version 3 + * and 4, and by NFSv4. Such an address is a string containing a + * presentation format IP address followed by a port number in + * "hibyte.lobyte" format. + * + * IPv6 addresses can also include a scope ID, typically denoted by + * a '%' followed by a device name or a non-negative integer. Refer to + * RFC 4291, Section 2.2 for details on IPv6 presentation formats. + */ + +#include +#include + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + +static size_t rpc_ntop6_noscopeid(const struct sockaddr *sap, + char *buf, const int buflen) +{ + const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap; + const struct in6_addr *addr = &sin6->sin6_addr; + + /* + * RFC 4291, Section 2.2.2 + * + * Shorthanded ANY address + */ + if (ipv6_addr_any(addr)) + return snprintf(buf, buflen, "::"); + + /* + * RFC 4291, Section 2.2.2 + * + * Shorthanded loopback address + */ + if (ipv6_addr_loopback(addr)) + return snprintf(buf, buflen, "::1"); + + /* + * RFC 4291, Section 2.2.3 + * + * Special presentation address format for mapped v4 + * addresses. + */ + if (ipv6_addr_v4mapped(addr)) + return snprintf(buf, buflen, "::ffff:%pI4", + &addr->s6_addr32[3]); + + /* + * RFC 4291, Section 2.2.1 + * + * To keep the result as short as possible, especially + * since we don't shorthand, we don't want leading zeros + * in each halfword, so avoid %pI6. + */ + return snprintf(buf, buflen, "%x:%x:%x:%x:%x:%x:%x:%x", + ntohs(addr->s6_addr16[0]), ntohs(addr->s6_addr16[1]), + ntohs(addr->s6_addr16[2]), ntohs(addr->s6_addr16[3]), + ntohs(addr->s6_addr16[4]), ntohs(addr->s6_addr16[5]), + ntohs(addr->s6_addr16[6]), ntohs(addr->s6_addr16[7])); +} + +static size_t rpc_ntop6(const struct sockaddr *sap, + char *buf, const size_t buflen) +{ + const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap; + char scopebuf[IPV6_SCOPE_ID_LEN]; + size_t len; + int rc; + + len = rpc_ntop6_noscopeid(sap, buf, buflen); + if (unlikely(len == 0)) + return len; + + if (!(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) && + !(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_SITELOCAL)) + return len; + + rc = snprintf(scopebuf, sizeof(scopebuf), "%c%u", + IPV6_SCOPE_DELIMITER, sin6->sin6_scope_id); + if (unlikely((size_t)rc > sizeof(scopebuf))) + return 0; + + len += rc; + if (unlikely(len > buflen)) + return 0; + + strcat(buf, scopebuf); + return len; +} + +#else /* !(defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)) */ + +static size_t rpc_ntop6_noscopeid(const struct sockaddr *sap, + char *buf, const int buflen) +{ + return 0; +} + +static size_t rpc_ntop6(const struct sockaddr *sap, + char *buf, const size_t buflen) +{ + return 0; +} + +#endif /* !(defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)) */ + +static int rpc_ntop4(const struct sockaddr *sap, + char *buf, const size_t buflen) +{ + const struct sockaddr_in *sin = (struct sockaddr_in *)sap; + + return snprintf(buf, buflen, "%pI4", &sin->sin_addr); +} + +/** + * rpc_ntop - construct a presentation address in @buf + * @sap: socket address + * @buf: construction area + * @buflen: size of @buf, in bytes + * + * Plants a %NUL-terminated string in @buf and returns the length + * of the string, excluding the %NUL. Otherwise zero is returned. + */ +size_t rpc_ntop(const struct sockaddr *sap, char *buf, const size_t buflen) +{ + switch (sap->sa_family) { + case AF_INET: + return rpc_ntop4(sap, buf, buflen); + case AF_INET6: + return rpc_ntop6(sap, buf, buflen); + } + + return 0; +} +EXPORT_SYMBOL_GPL(rpc_ntop); + +static size_t rpc_pton4(const char *buf, const size_t buflen, + struct sockaddr *sap, const size_t salen) +{ + struct sockaddr_in *sin = (struct sockaddr_in *)sap; + u8 *addr = (u8 *)&sin->sin_addr.s_addr; + + if (buflen > INET_ADDRSTRLEN || salen < sizeof(struct sockaddr_in)) + return 0; + + memset(sap, 0, sizeof(struct sockaddr_in)); + + if (in4_pton(buf, buflen, addr, '\0', NULL) == 0) + return 0; + + sin->sin_family = AF_INET; + return sizeof(struct sockaddr_in);; +} + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +static int rpc_parse_scope_id(const char *buf, const size_t buflen, + const char *delim, struct sockaddr_in6 *sin6) +{ + char *p; + size_t len; + + if ((buf + buflen) == delim) + return 1; + + if (*delim != IPV6_SCOPE_DELIMITER) + return 0; + + if (!(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) && + !(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_SITELOCAL)) + return 0; + + len = (buf + buflen) - delim - 1; + p = kstrndup(delim + 1, len, GFP_KERNEL); + if (p) { + unsigned long scope_id = 0; + struct net_device *dev; + + dev = dev_get_by_name(&init_net, p); + if (dev != NULL) { + scope_id = dev->ifindex; + dev_put(dev); + } else { + if (strict_strtoul(p, 10, &scope_id) == 0) { + kfree(p); + return 0; + } + } + + kfree(p); + + sin6->sin6_scope_id = scope_id; + return 1; + } + + return 0; +} + +static size_t rpc_pton6(const char *buf, const size_t buflen, + struct sockaddr *sap, const size_t salen) +{ + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap; + u8 *addr = (u8 *)&sin6->sin6_addr.in6_u; + const char *delim; + + if (buflen > (INET6_ADDRSTRLEN + IPV6_SCOPE_ID_LEN) || + salen < sizeof(struct sockaddr_in6)) + return 0; + + memset(sap, 0, sizeof(struct sockaddr_in6)); + + if (in6_pton(buf, buflen, addr, IPV6_SCOPE_DELIMITER, &delim) == 0) + return 0; + + if (!rpc_parse_scope_id(buf, buflen, delim, sin6)) + return 0; + + sin6->sin6_family = AF_INET6; + return sizeof(struct sockaddr_in6); +} +#else +static size_t rpc_pton6(const char *buf, const size_t buflen, + struct sockaddr *sap, const size_t salen) +{ + return 0; +} +#endif + +/** + * rpc_pton - Construct a sockaddr in @sap + * @buf: C string containing presentation format IP address + * @buflen: length of presentation address in bytes + * @sap: buffer into which to plant socket address + * @salen: size of buffer in bytes + * + * Returns the size of the socket address if successful; otherwise + * zero is returned. + * + * Plants a socket address in @sap and returns the size of the + * socket address, if successful. Returns zero if an error + * occurred. + */ +size_t rpc_pton(const char *buf, const size_t buflen, + struct sockaddr *sap, const size_t salen) +{ + unsigned int i; + + for (i = 0; i < buflen; i++) + if (buf[i] == ':') + return rpc_pton6(buf, buflen, sap, salen); + return rpc_pton4(buf, buflen, sap, salen); +} +EXPORT_SYMBOL_GPL(rpc_pton); + +/** + * rpc_sockaddr2uaddr - Construct a universal address string from @sap. + * @sap: socket address + * + * Returns a %NUL-terminated string in dynamically allocated memory; + * otherwise NULL is returned if an error occurred. Caller must + * free the returned string. + */ +char *rpc_sockaddr2uaddr(const struct sockaddr *sap) +{ + char portbuf[RPCBIND_MAXUADDRPLEN]; + char addrbuf[RPCBIND_MAXUADDRLEN]; + unsigned short port; + + switch (sap->sa_family) { + case AF_INET: + if (rpc_ntop4(sap, addrbuf, sizeof(addrbuf)) == 0) + return NULL; + port = ntohs(((struct sockaddr_in *)sap)->sin_port); + break; + case AF_INET6: + if (rpc_ntop6_noscopeid(sap, addrbuf, sizeof(addrbuf)) == 0) + return NULL; + port = ntohs(((struct sockaddr_in6 *)sap)->sin6_port); + break; + default: + return NULL; + } + + if (snprintf(portbuf, sizeof(portbuf), + ".%u.%u", port >> 8, port & 0xff) > (int)sizeof(portbuf)) + return NULL; + + if (strlcat(addrbuf, portbuf, sizeof(addrbuf)) > sizeof(addrbuf)) + return NULL; + + return kstrdup(addrbuf, GFP_KERNEL); +} +EXPORT_SYMBOL_GPL(rpc_sockaddr2uaddr); + +/** + * rpc_uaddr2sockaddr - convert a universal address to a socket address. + * @uaddr: C string containing universal address to convert + * @uaddr_len: length of universal address string + * @sap: buffer into which to plant socket address + * @salen: size of buffer + * + * Returns the size of the socket address if successful; otherwise + * zero is returned. + */ +size_t rpc_uaddr2sockaddr(const char *uaddr, const size_t uaddr_len, + struct sockaddr *sap, const size_t salen) +{ + char *c, buf[RPCBIND_MAXUADDRLEN]; + unsigned long portlo, porthi; + unsigned short port; + + if (uaddr_len > sizeof(buf)) + return 0; + + memcpy(buf, uaddr, uaddr_len); + + buf[uaddr_len] = '\n'; + buf[uaddr_len + 1] = '\0'; + + c = strrchr(buf, '.'); + if (unlikely(c == NULL)) + return 0; + if (unlikely(strict_strtoul(c + 1, 10, &portlo) != 0)) + return 0; + if (unlikely(portlo > 255)) + return 0; + + c[0] = '\n'; + c[1] = '\0'; + + c = strrchr(buf, '.'); + if (unlikely(c == NULL)) + return 0; + if (unlikely(strict_strtoul(c + 1, 10, &porthi) != 0)) + return 0; + if (unlikely(porthi > 255)) + return 0; + + port = (unsigned short)((porthi << 8) | portlo); + + c[0] = '\0'; + + if (rpc_pton(buf, strlen(buf), sap, salen) == 0) + return 0; + + switch (sap->sa_family) { + case AF_INET: + ((struct sockaddr_in *)sap)->sin_port = htons(port); + return sizeof(struct sockaddr_in); + case AF_INET6: + ((struct sockaddr_in6 *)sap)->sin6_port = htons(port); + return sizeof(struct sockaddr_in6); + } + + return 0; +} +EXPORT_SYMBOL_GPL(rpc_uaddr2sockaddr); -- cgit v1.2.3 From ba809130bc260fce04141aca01ef9e068d32af2a Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 9 Aug 2009 15:09:35 -0400 Subject: SUNRPC: Remove duplicate universal address generation RPC universal address generation is currently done in several places: rpcb_clnt.c, nfs4proc.c xprtsock.c, and xprtrdma.c. Remove the redundant cases that convert a socket address to a universal address. The nfs4proc.c case takes a pre-formatted presentation address string, not a socket address, so we'll leave that one. Because the new uaddr constructor uses the recently introduced rpc_ntop(), it now supports proper "::" shorthanding for IPv6 addresses. This allows the kernel to register properly formed universal addresses with the local rpcbind service, in _all_ cases. The kernel can now also send properly formed universal addresses in RPCB_GETADDR requests, and support link-local properly when encoding and decoding IPv6 addresses. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 1 - net/sunrpc/rpcb_clnt.c | 48 +++++++++++++++++++++++++---------------- net/sunrpc/xprtrdma/transport.c | 8 ------- net/sunrpc/xprtsock.c | 18 ---------------- 4 files changed, 29 insertions(+), 46 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 1175d58efc2e..65fad9534d93 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -41,7 +41,6 @@ enum rpc_display_format_t { RPC_DISPLAY_ALL, RPC_DISPLAY_HEX_ADDR, RPC_DISPLAY_HEX_PORT, - RPC_DISPLAY_UNIVERSAL_ADDR, RPC_DISPLAY_NETID, RPC_DISPLAY_MAX, }; diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index ad1d7315c498..1fb1c070c19d 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -153,6 +153,7 @@ static void rpcb_map_release(void *data) rpcb_wake_rpcbind_waiters(map->r_xprt, map->r_status); xprt_put(map->r_xprt); + kfree(map->r_addr); kfree(map); } @@ -299,12 +300,9 @@ static int rpcb_register_inet4(const struct sockaddr *sap, const struct sockaddr_in *sin = (const struct sockaddr_in *)sap; struct rpcbind_args *map = msg->rpc_argp; unsigned short port = ntohs(sin->sin_port); - char buf[32]; + int result; - /* Construct AF_INET universal address */ - snprintf(buf, sizeof(buf), "%pI4.%u.%u", - &sin->sin_addr.s_addr, port >> 8, port & 0xff); - map->r_addr = buf; + map->r_addr = rpc_sockaddr2uaddr(sap); dprintk("RPC: %sregistering [%u, %u, %s, '%s'] with " "local rpcbind\n", (port ? "" : "un"), @@ -315,7 +313,9 @@ static int rpcb_register_inet4(const struct sockaddr *sap, if (port) msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET]; - return rpcb_register_call(RPCBVERS_4, msg); + result = rpcb_register_call(RPCBVERS_4, msg); + kfree(map->r_addr); + return result; } /* @@ -327,16 +327,9 @@ static int rpcb_register_inet6(const struct sockaddr *sap, const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)sap; struct rpcbind_args *map = msg->rpc_argp; unsigned short port = ntohs(sin6->sin6_port); - char buf[64]; + int result; - /* Construct AF_INET6 universal address */ - if (ipv6_addr_any(&sin6->sin6_addr)) - snprintf(buf, sizeof(buf), "::.%u.%u", - port >> 8, port & 0xff); - else - snprintf(buf, sizeof(buf), "%pI6.%u.%u", - &sin6->sin6_addr, port >> 8, port & 0xff); - map->r_addr = buf; + map->r_addr = rpc_sockaddr2uaddr(sap); dprintk("RPC: %sregistering [%u, %u, %s, '%s'] with " "local rpcbind\n", (port ? "" : "un"), @@ -347,7 +340,9 @@ static int rpcb_register_inet6(const struct sockaddr *sap, if (port) msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET]; - return rpcb_register_call(RPCBVERS_4, msg); + result = rpcb_register_call(RPCBVERS_4, msg); + kfree(map->r_addr); + return result; } static int rpcb_unregister_all_protofamilies(struct rpc_message *msg) @@ -570,6 +565,7 @@ void rpcb_getport_async(struct rpc_task *task) goto bailout_nofree; } + /* Parent transport's destination address */ salen = rpc_peeraddr(clnt, sap, sizeof(addr)); /* Don't ever use rpcbind v2 for AF_INET6 requests */ @@ -620,11 +616,22 @@ void rpcb_getport_async(struct rpc_task *task) map->r_prot = xprt->prot; map->r_port = 0; map->r_xprt = xprt_get(xprt); - map->r_netid = rpc_peeraddr2str(clnt, RPC_DISPLAY_NETID); - map->r_addr = rpc_peeraddr2str(rpcb_clnt, RPC_DISPLAY_UNIVERSAL_ADDR); - map->r_owner = ""; map->r_status = -EIO; + switch (bind_version) { + case RPCBVERS_4: + case RPCBVERS_3: + map->r_netid = rpc_peeraddr2str(clnt, RPC_DISPLAY_NETID); + map->r_addr = rpc_sockaddr2uaddr(sap); + map->r_owner = ""; + break; + case RPCBVERS_2: + map->r_addr = NULL; + break; + default: + BUG(); + } + child = rpcb_call_async(rpcb_clnt, map, proc); rpc_release_client(rpcb_clnt); if (IS_ERR(child)) { @@ -722,6 +729,9 @@ static int rpcb_decode_set(struct rpc_rqst *req, __be32 *p, static int rpcb_encode_getaddr(struct rpc_rqst *req, __be32 *p, struct rpcbind_args *rpcb) { + if (rpcb->r_addr == NULL) + return -EIO; + dprintk("RPC: encoding rpcb request (%u, %u, %s)\n", rpcb->r_prog, rpcb->r_vers, rpcb->r_addr); *p++ = htonl(rpcb->r_prog); diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 1dd6123070e9..537c210a8b92 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -202,14 +202,6 @@ xprt_rdma_format_addresses(struct rpc_xprt *xprt) snprintf(buf, 8, "%4hx", ntohs(addr->sin_port)); xprt->address_strings[RPC_DISPLAY_HEX_PORT] = buf; - buf = kzalloc(30, GFP_KERNEL); - if (buf) - snprintf(buf, 30, "%pI4.%u.%u", - &addr->sin_addr.s_addr, - ntohs(addr->sin_port) >> 8, - ntohs(addr->sin_port) & 0xff); - xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR] = buf; - /* netid */ xprt->address_strings[RPC_DISPLAY_NETID] = "rdma"; } diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 83c73c4d017a..a42c2adda59f 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -341,15 +341,6 @@ static void xs_format_ipv4_peer_addresses(struct rpc_xprt *xprt, } xprt->address_strings[RPC_DISPLAY_HEX_PORT] = buf; - buf = kzalloc(30, GFP_KERNEL); - if (buf) { - snprintf(buf, 30, "%pI4.%u.%u", - &addr->sin_addr.s_addr, - ntohs(addr->sin_port) >> 8, - ntohs(addr->sin_port) & 0xff); - } - xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR] = buf; - xprt->address_strings[RPC_DISPLAY_NETID] = netid; } @@ -397,15 +388,6 @@ static void xs_format_ipv6_peer_addresses(struct rpc_xprt *xprt, } xprt->address_strings[RPC_DISPLAY_HEX_PORT] = buf; - buf = kzalloc(50, GFP_KERNEL); - if (buf) { - snprintf(buf, 50, "%pI6.%u.%u", - &addr->sin6_addr, - ntohs(addr->sin6_port) >> 8, - ntohs(addr->sin6_port) & 0xff); - } - xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR] = buf; - xprt->address_strings[RPC_DISPLAY_NETID] = netid; } -- cgit v1.2.3 From c740eff84bcfd63c0497ef880e80171931cb8222 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 9 Aug 2009 15:09:46 -0400 Subject: SUNRPC: Kill RPC_DISPLAY_ALL At some point, I recall that rpc_pipe_fs used RPC_DISPLAY_ALL. Currently there are no uses of RPC_DISPLAY_ALL outside the transport modules themselves, so we can safely get rid of it. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 1 - net/sunrpc/xprtrdma/transport.c | 5 ----- net/sunrpc/xprtsock.c | 50 ++++++++++++++++++++++++++++------------- 3 files changed, 34 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 65fad9534d93..c090df442572 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -38,7 +38,6 @@ enum rpc_display_format_t { RPC_DISPLAY_ADDR = 0, RPC_DISPLAY_PORT, RPC_DISPLAY_PROTO, - RPC_DISPLAY_ALL, RPC_DISPLAY_HEX_ADDR, RPC_DISPLAY_HEX_PORT, RPC_DISPLAY_NETID, diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 5f9b8676b6bd..9a63f669ece4 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -181,11 +181,6 @@ xprt_rdma_format_addresses(struct rpc_xprt *xprt) xprt->address_strings[RPC_DISPLAY_PROTO] = "rdma"; - (void)snprintf(buf, sizeof(buf), "addr=%s port=%s proto=rdma", - xprt->address_strings[RPC_DISPLAY_ADDR], - xprt->address_strings[RPC_DISPLAY_PORT]); - xprt->address_strings[RPC_DISPLAY_ALL] = kstrdup(buf, GFP_KERNEL); - (void)snprintf(buf, sizeof(buf), "%02x%02x%02x%02x", NIPQUAD(sin->sin_addr.s_addr)); xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 7bc3c178ccb3..eee5ac96e177 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -307,12 +307,6 @@ static void xs_format_common_peer_addresses(struct rpc_xprt *xprt) (void)snprintf(buf, sizeof(buf), "%u", rpc_get_port(sap)); xprt->address_strings[RPC_DISPLAY_PORT] = kstrdup(buf, GFP_KERNEL); - (void)snprintf(buf, sizeof(buf), "addr=%s port=%s proto=%s", - xprt->address_strings[RPC_DISPLAY_ADDR], - xprt->address_strings[RPC_DISPLAY_PORT], - xprt->address_strings[RPC_DISPLAY_PROTO]); - xprt->address_strings[RPC_DISPLAY_ALL] = kstrdup(buf, GFP_KERNEL); - (void)snprintf(buf, sizeof(buf), "%4hx", rpc_get_port(sap)); xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL); } @@ -1721,8 +1715,11 @@ static void xs_udp_connect_worker4(struct work_struct *work) goto out; } - dprintk("RPC: worker connecting xprt %p to address: %s\n", - xprt, xprt->address_strings[RPC_DISPLAY_ALL]); + dprintk("RPC: worker connecting xprt %p via %s to " + "%s (port %s)\n", xprt, + xprt->address_strings[RPC_DISPLAY_PROTO], + xprt->address_strings[RPC_DISPLAY_ADDR], + xprt->address_strings[RPC_DISPLAY_PORT]); xs_udp_finish_connecting(xprt, sock); status = 0; @@ -1763,8 +1760,11 @@ static void xs_udp_connect_worker6(struct work_struct *work) goto out; } - dprintk("RPC: worker connecting xprt %p to address: %s\n", - xprt, xprt->address_strings[RPC_DISPLAY_ALL]); + dprintk("RPC: worker connecting xprt %p via %s to " + "%s (port %s)\n", xprt, + xprt->address_strings[RPC_DISPLAY_PROTO], + xprt->address_strings[RPC_DISPLAY_ADDR], + xprt->address_strings[RPC_DISPLAY_PORT]); xs_udp_finish_connecting(xprt, sock); status = 0; @@ -1889,8 +1889,11 @@ static void xs_tcp_setup_socket(struct rpc_xprt *xprt, goto out_eagain; } - dprintk("RPC: worker connecting xprt %p to address: %s\n", - xprt, xprt->address_strings[RPC_DISPLAY_ALL]); + dprintk("RPC: worker connecting xprt %p via %s to " + "%s (port %s)\n", xprt, + xprt->address_strings[RPC_DISPLAY_PROTO], + xprt->address_strings[RPC_DISPLAY_ADDR], + xprt->address_strings[RPC_DISPLAY_PORT]); status = xs_tcp_finish_connecting(xprt, sock); dprintk("RPC: %p connect status %d connected %d sock state %d\n", @@ -2228,8 +2231,15 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args) return ERR_PTR(-EAFNOSUPPORT); } - dprintk("RPC: set up transport to address %s\n", - xprt->address_strings[RPC_DISPLAY_ALL]); + if (xprt_bound(xprt)) + dprintk("RPC: set up xprt to %s (port %s) via %s\n", + xprt->address_strings[RPC_DISPLAY_ADDR], + xprt->address_strings[RPC_DISPLAY_PORT], + xprt->address_strings[RPC_DISPLAY_PROTO]); + else + dprintk("RPC: set up xprt to %s (autobind) via %s\n", + xprt->address_strings[RPC_DISPLAY_ADDR], + xprt->address_strings[RPC_DISPLAY_PROTO]); if (try_module_get(THIS_MODULE)) return xprt; @@ -2293,8 +2303,16 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args) return ERR_PTR(-EAFNOSUPPORT); } - dprintk("RPC: set up transport to address %s\n", - xprt->address_strings[RPC_DISPLAY_ALL]); + if (xprt_bound(xprt)) + dprintk("RPC: set up xprt to %s (port %s) via %s\n", + xprt->address_strings[RPC_DISPLAY_ADDR], + xprt->address_strings[RPC_DISPLAY_PORT], + xprt->address_strings[RPC_DISPLAY_PROTO]); + else + dprintk("RPC: set up xprt to %s (autobind) via %s\n", + xprt->address_strings[RPC_DISPLAY_ADDR], + xprt->address_strings[RPC_DISPLAY_PROTO]); + if (try_module_get(THIS_MODULE)) return xprt; -- cgit v1.2.3 From b693ba4a338da15db1db4b5ebaa36e4ab9781c82 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 9 Aug 2009 15:14:15 -0400 Subject: SUNRPC: Constify rpc_pipe_ops... Signed-off-by: Trond Myklebust --- fs/nfs/idmap.c | 2 +- include/linux/sunrpc/rpc_pipe_fs.h | 5 +++-- net/sunrpc/auth_gss/auth_gss.c | 8 ++++---- net/sunrpc/rpc_pipe.c | 7 ++++--- 4 files changed, 12 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index 86147b0ab2cf..fae0d3e52b44 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -101,7 +101,7 @@ static void idmap_pipe_destroy_msg(struct rpc_pipe_msg *); static unsigned int fnvhash32(const void *, size_t); -static struct rpc_pipe_ops idmap_upcall_ops = { +static const struct rpc_pipe_ops idmap_upcall_ops = { .upcall = idmap_pipe_upcall, .downcall = idmap_pipe_downcall, .destroy_msg = idmap_pipe_destroy_msg, diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h index cea764c2359f..91f5b13389c5 100644 --- a/include/linux/sunrpc/rpc_pipe_fs.h +++ b/include/linux/sunrpc/rpc_pipe_fs.h @@ -32,8 +32,8 @@ struct rpc_inode { wait_queue_head_t waitq; #define RPC_PIPE_WAIT_FOR_OPEN 1 int flags; - struct rpc_pipe_ops *ops; struct delayed_work queue_timeout; + const struct rpc_pipe_ops *ops; }; static inline struct rpc_inode * @@ -46,7 +46,8 @@ extern int rpc_queue_upcall(struct inode *, struct rpc_pipe_msg *); extern struct dentry *rpc_mkdir(char *, struct rpc_clnt *); extern int rpc_rmdir(struct dentry *); -extern struct dentry *rpc_mkpipe(struct dentry *, const char *, void *, struct rpc_pipe_ops *, int flags); +extern struct dentry *rpc_mkpipe(struct dentry *, const char *, void *, + const struct rpc_pipe_ops *, int flags); extern int rpc_unlink(struct dentry *); extern struct vfsmount *rpc_get_mount(void); extern void rpc_put_mount(void); diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 66d458fc6920..23eb3864ffc0 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -89,8 +89,8 @@ static struct rpc_wait_queue pipe_version_rpc_waitqueue; static DECLARE_WAIT_QUEUE_HEAD(pipe_version_waitqueue); static void gss_free_ctx(struct gss_cl_ctx *); -static struct rpc_pipe_ops gss_upcall_ops_v0; -static struct rpc_pipe_ops gss_upcall_ops_v1; +static const struct rpc_pipe_ops gss_upcall_ops_v0; +static const struct rpc_pipe_ops gss_upcall_ops_v1; static inline struct gss_cl_ctx * gss_get_ctx(struct gss_cl_ctx *ctx) @@ -1507,7 +1507,7 @@ static const struct rpc_credops gss_nullops = { .crunwrap_resp = gss_unwrap_resp, }; -static struct rpc_pipe_ops gss_upcall_ops_v0 = { +static const struct rpc_pipe_ops gss_upcall_ops_v0 = { .upcall = gss_pipe_upcall, .downcall = gss_pipe_downcall, .destroy_msg = gss_pipe_destroy_msg, @@ -1515,7 +1515,7 @@ static struct rpc_pipe_ops gss_upcall_ops_v0 = { .release_pipe = gss_pipe_release, }; -static struct rpc_pipe_ops gss_upcall_ops_v1 = { +static const struct rpc_pipe_ops gss_upcall_ops_v1 = { .upcall = gss_pipe_upcall, .downcall = gss_pipe_downcall, .destroy_msg = gss_pipe_destroy_msg, diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 9ced0628d69c..f6f60f625e3f 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -125,7 +125,7 @@ static void rpc_close_pipes(struct inode *inode) { struct rpc_inode *rpci = RPC_I(inode); - struct rpc_pipe_ops *ops; + const struct rpc_pipe_ops *ops; int need_release; mutex_lock(&inode->i_mutex); @@ -776,8 +776,9 @@ rpc_rmdir(struct dentry *dentry) * The @private argument passed here will be available to all these methods * from the file pointer, via RPC_I(file->f_dentry->d_inode)->private. */ -struct dentry * -rpc_mkpipe(struct dentry *parent, const char *name, void *private, struct rpc_pipe_ops *ops, int flags) +struct dentry *rpc_mkpipe(struct dentry *parent, const char *name, + void *private, const struct rpc_pipe_ops *ops, + int flags) { struct dentry *dentry; struct inode *dir, *inode; -- cgit v1.2.3 From 458adb8ba9b26bfc66593866013adbb62a1a3d2e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 9 Aug 2009 15:14:22 -0400 Subject: SUNRPC: Rename rpc_mkdir to rpc_create_client_dir() This reflects the fact that rpc_mkdir() as it stands today, can only create a RPC client type directory. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/rpc_pipe_fs.h | 5 +++-- net/sunrpc/clnt.c | 6 +++--- net/sunrpc/rpc_pipe.c | 11 +++++------ 3 files changed, 11 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h index 91f5b13389c5..8de0ac276499 100644 --- a/include/linux/sunrpc/rpc_pipe_fs.h +++ b/include/linux/sunrpc/rpc_pipe_fs.h @@ -44,8 +44,9 @@ RPC_I(struct inode *inode) extern int rpc_queue_upcall(struct inode *, struct rpc_pipe_msg *); -extern struct dentry *rpc_mkdir(char *, struct rpc_clnt *); -extern int rpc_rmdir(struct dentry *); +struct rpc_clnt; +extern struct dentry *rpc_create_client_dir(const char *, struct rpc_clnt *); +extern int rpc_remove_client_dir(struct dentry *); extern struct dentry *rpc_mkpipe(struct dentry *, const char *, void *, const struct rpc_pipe_ops *, int flags); extern int rpc_unlink(struct dentry *); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index ebfcf9b89909..6ec37701a165 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -113,7 +113,7 @@ rpc_setup_pipedir(struct rpc_clnt *clnt, char *dir_name) "%s/clnt%x", dir_name, (unsigned int)clntid++); clnt->cl_pathname[sizeof(clnt->cl_pathname) - 1] = '\0'; - clnt->cl_dentry = rpc_mkdir(clnt->cl_pathname, clnt); + clnt->cl_dentry = rpc_create_client_dir(clnt->cl_pathname, clnt); if (!IS_ERR(clnt->cl_dentry)) return 0; error = PTR_ERR(clnt->cl_dentry); @@ -232,7 +232,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru out_no_auth: if (!IS_ERR(clnt->cl_dentry)) { - rpc_rmdir(clnt->cl_dentry); + rpc_remove_client_dir(clnt->cl_dentry); rpc_put_mount(); } out_no_path: @@ -424,7 +424,7 @@ rpc_free_client(struct kref *kref) dprintk("RPC: destroying %s client for %s\n", clnt->cl_protname, clnt->cl_server); if (!IS_ERR(clnt->cl_dentry)) { - rpc_rmdir(clnt->cl_dentry); + rpc_remove_client_dir(clnt->cl_dentry); rpc_put_mount(); } if (clnt->cl_parent != clnt) { diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 2a4e6eb0a3e9..08580bedc25f 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -755,7 +755,7 @@ out_bad: } /** - * rpc_mkdir - Create a new directory in rpc_pipefs + * rpc_create_client_dir - Create a new rpc_client directory in rpc_pipefs * @path: path from the rpc_pipefs root to the new directory * @rpc_client: rpc client to associate with this directory * @@ -764,8 +764,8 @@ out_bad: * information about the client, together with any "pipes" that may * later be created using rpc_mkpipe(). */ -struct dentry * -rpc_mkdir(char *path, struct rpc_clnt *rpc_client) +struct dentry *rpc_create_client_dir(const char *path, + struct rpc_clnt *rpc_client) { struct nameidata nd; struct dentry *dentry; @@ -797,11 +797,10 @@ out_err: } /** - * rpc_rmdir - Remove a directory created with rpc_mkdir() + * rpc_remove_client_dir - Remove a directory created with rpc_create_client_dir() * @dentry: directory to remove */ -int -rpc_rmdir(struct dentry *dentry) +int rpc_remove_client_dir(struct dentry *dentry) { struct dentry *parent; struct inode *dir; -- cgit v1.2.3 From 7d217caca5d704e48aa5e59aba0b3ad4c7af4fd2 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 9 Aug 2009 15:14:24 -0400 Subject: SUNRPC: Replace rpc_client->cl_dentry and cl_mnt, with a cl_path Signed-off-by: Trond Myklebust --- fs/nfs/idmap.c | 4 ++-- include/linux/sunrpc/clnt.h | 4 ++-- net/sunrpc/auth_gss/auth_gss.c | 4 ++-- net/sunrpc/clnt.c | 24 ++++++++++++------------ 4 files changed, 18 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index fae0d3e52b44..21a84d45916f 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -119,8 +119,8 @@ nfs_idmap_new(struct nfs_client *clp) if (idmap == NULL) return -ENOMEM; - idmap->idmap_dentry = rpc_mkpipe(clp->cl_rpcclient->cl_dentry, "idmap", - idmap, &idmap_upcall_ops, 0); + idmap->idmap_dentry = rpc_mkpipe(clp->cl_rpcclient->cl_path.dentry, + "idmap", idmap, &idmap_upcall_ops, 0); if (IS_ERR(idmap->idmap_dentry)) { error = PTR_ERR(idmap->idmap_dentry); kfree(idmap); diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 37881f1a0bd7..38ad162330a2 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -17,6 +17,7 @@ #include #include #include +#include struct rpc_inode; @@ -51,8 +52,7 @@ struct rpc_clnt { int cl_nodelen; /* nodename length */ char cl_nodename[UNX_MAXNODENAME]; char cl_pathname[30];/* Path in rpc_pipe_fs */ - struct vfsmount * cl_vfsmnt; - struct dentry * cl_dentry; /* inode */ + struct path cl_path; struct rpc_clnt * cl_parent; /* Points to parent of clones */ struct rpc_rtt cl_rtt_default; struct rpc_timeout cl_timeout_default; diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 23eb3864ffc0..fc6a43ccd950 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -777,7 +777,7 @@ gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor) * that we supported only the old pipe. So we instead create * the new pipe first. */ - gss_auth->dentry[1] = rpc_mkpipe(clnt->cl_dentry, + gss_auth->dentry[1] = rpc_mkpipe(clnt->cl_path.dentry, "gssd", clnt, &gss_upcall_ops_v1, RPC_PIPE_WAIT_FOR_OPEN); @@ -786,7 +786,7 @@ gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor) goto err_put_mech; } - gss_auth->dentry[0] = rpc_mkpipe(clnt->cl_dentry, + gss_auth->dentry[0] = rpc_mkpipe(clnt->cl_path.dentry, gss_auth->mech->gm_name, clnt, &gss_upcall_ops_v0, RPC_PIPE_WAIT_FOR_OPEN); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 6ec37701a165..b3f863346300 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -99,24 +99,24 @@ rpc_setup_pipedir(struct rpc_clnt *clnt, char *dir_name) static uint32_t clntid; int error; - clnt->cl_vfsmnt = ERR_PTR(-ENOENT); - clnt->cl_dentry = ERR_PTR(-ENOENT); + clnt->cl_path.mnt = ERR_PTR(-ENOENT); + clnt->cl_path.dentry = ERR_PTR(-ENOENT); if (dir_name == NULL) return 0; - clnt->cl_vfsmnt = rpc_get_mount(); - if (IS_ERR(clnt->cl_vfsmnt)) - return PTR_ERR(clnt->cl_vfsmnt); + clnt->cl_path.mnt = rpc_get_mount(); + if (IS_ERR(clnt->cl_path.mnt)) + return PTR_ERR(clnt->cl_path.mnt); for (;;) { snprintf(clnt->cl_pathname, sizeof(clnt->cl_pathname), "%s/clnt%x", dir_name, (unsigned int)clntid++); clnt->cl_pathname[sizeof(clnt->cl_pathname) - 1] = '\0'; - clnt->cl_dentry = rpc_create_client_dir(clnt->cl_pathname, clnt); - if (!IS_ERR(clnt->cl_dentry)) + clnt->cl_path.dentry = rpc_create_client_dir(clnt->cl_pathname, clnt); + if (!IS_ERR(clnt->cl_path.dentry)) return 0; - error = PTR_ERR(clnt->cl_dentry); + error = PTR_ERR(clnt->cl_path.dentry); if (error != -EEXIST) { printk(KERN_INFO "RPC: Couldn't create pipefs entry %s, error %d\n", clnt->cl_pathname, error); @@ -231,8 +231,8 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru return clnt; out_no_auth: - if (!IS_ERR(clnt->cl_dentry)) { - rpc_remove_client_dir(clnt->cl_dentry); + if (!IS_ERR(clnt->cl_path.dentry)) { + rpc_remove_client_dir(clnt->cl_path.dentry); rpc_put_mount(); } out_no_path: @@ -423,8 +423,8 @@ rpc_free_client(struct kref *kref) dprintk("RPC: destroying %s client for %s\n", clnt->cl_protname, clnt->cl_server); - if (!IS_ERR(clnt->cl_dentry)) { - rpc_remove_client_dir(clnt->cl_dentry); + if (!IS_ERR(clnt->cl_path.dentry)) { + rpc_remove_client_dir(clnt->cl_path.dentry); rpc_put_mount(); } if (clnt->cl_parent != clnt) { -- cgit v1.2.3 From 23ac6581702ac6d029643328a7e6ea3baf834c5e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 9 Aug 2009 15:14:25 -0400 Subject: SUNRPC: clean up rpc_setup_pipedir() There is still a little wart or two there: Since we've already got a vfsmount, we might as well pass that in to rpc_create_client_dir. Another point is that if we open code __rpc_lookup_path() here, then we can avoid looking up the entire parent directory path over and over again: it doesn't change. Also get rid of rpc_clnt->cl_pathname, since it has no users... Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 1 - include/linux/sunrpc/rpc_pipe_fs.h | 2 +- net/sunrpc/clnt.c | 48 +++++++++++++++++++++---------- net/sunrpc/rpc_pipe.c | 58 ++------------------------------------ 4 files changed, 37 insertions(+), 72 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 38ad162330a2..1848d44922ef 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -51,7 +51,6 @@ struct rpc_clnt { int cl_nodelen; /* nodename length */ char cl_nodename[UNX_MAXNODENAME]; - char cl_pathname[30];/* Path in rpc_pipe_fs */ struct path cl_path; struct rpc_clnt * cl_parent; /* Points to parent of clones */ struct rpc_rtt cl_rtt_default; diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h index 8de0ac276499..88332ef1e959 100644 --- a/include/linux/sunrpc/rpc_pipe_fs.h +++ b/include/linux/sunrpc/rpc_pipe_fs.h @@ -45,7 +45,7 @@ RPC_I(struct inode *inode) extern int rpc_queue_upcall(struct inode *, struct rpc_pipe_msg *); struct rpc_clnt; -extern struct dentry *rpc_create_client_dir(const char *, struct rpc_clnt *); +extern struct dentry *rpc_create_client_dir(struct dentry *, struct qstr *, struct rpc_clnt *); extern int rpc_remove_client_dir(struct dentry *); extern struct dentry *rpc_mkpipe(struct dentry *, const char *, void *, const struct rpc_pipe_ops *, int flags); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index b3f863346300..c1e467e1b07d 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -27,6 +27,8 @@ #include #include #include +#include +#include #include #include #include @@ -97,6 +99,12 @@ static int rpc_setup_pipedir(struct rpc_clnt *clnt, char *dir_name) { static uint32_t clntid; + struct nameidata nd; + struct path path; + char name[15]; + struct qstr q = { + .name = name, + }; int error; clnt->cl_path.mnt = ERR_PTR(-ENOENT); @@ -104,26 +112,36 @@ rpc_setup_pipedir(struct rpc_clnt *clnt, char *dir_name) if (dir_name == NULL) return 0; - clnt->cl_path.mnt = rpc_get_mount(); - if (IS_ERR(clnt->cl_path.mnt)) - return PTR_ERR(clnt->cl_path.mnt); + path.mnt = rpc_get_mount(); + if (IS_ERR(path.mnt)) + return PTR_ERR(path.mnt); + error = vfs_path_lookup(path.mnt->mnt_root, path.mnt, dir_name, 0, &nd); + if (error) + goto err; for (;;) { - snprintf(clnt->cl_pathname, sizeof(clnt->cl_pathname), - "%s/clnt%x", dir_name, - (unsigned int)clntid++); - clnt->cl_pathname[sizeof(clnt->cl_pathname) - 1] = '\0'; - clnt->cl_path.dentry = rpc_create_client_dir(clnt->cl_pathname, clnt); - if (!IS_ERR(clnt->cl_path.dentry)) - return 0; - error = PTR_ERR(clnt->cl_path.dentry); + q.len = snprintf(name, sizeof(name), "clnt%x", (unsigned int)clntid++); + name[sizeof(name) - 1] = '\0'; + q.hash = full_name_hash(q.name, q.len); + path.dentry = rpc_create_client_dir(nd.path.dentry, &q, clnt); + if (!IS_ERR(path.dentry)) + break; + error = PTR_ERR(path.dentry); if (error != -EEXIST) { - printk(KERN_INFO "RPC: Couldn't create pipefs entry %s, error %d\n", - clnt->cl_pathname, error); - rpc_put_mount(); - return error; + printk(KERN_INFO "RPC: Couldn't create pipefs entry" + " %s/%s, error %d\n", + dir_name, name, error); + goto err_path_put; } } + path_put(&nd.path); + clnt->cl_path = path; + return 0; +err_path_put: + path_put(&nd.path); +err: + rpc_put_mount(); + return error; } static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, struct rpc_xprt *xprt) diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 6d152f69587e..1613d858ba38 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -443,42 +443,6 @@ static const struct dentry_operations rpc_dentry_operations = { .d_delete = rpc_delete_dentry, }; -static int __rpc_lookup_path(const char *pathname, unsigned flags, - struct nameidata *nd) -{ - struct vfsmount *mnt; - - if (pathname[0] == '\0') - return -ENOENT; - - mnt = rpc_get_mount(); - if (IS_ERR(mnt)) { - printk(KERN_WARNING "%s: %s failed to mount " - "pseudofilesystem \n", __FILE__, __func__); - return PTR_ERR(mnt); - } - - if (vfs_path_lookup(mnt->mnt_root, mnt, pathname, flags, nd)) { - printk(KERN_WARNING "%s: %s failed to find path %s\n", - __FILE__, __func__, pathname); - rpc_put_mount(); - return -ENOENT; - } - return 0; -} - -static int rpc_lookup_parent(const char *pathname, struct nameidata *nd) -{ - return __rpc_lookup_path(pathname, LOOKUP_PARENT, nd); -} - -static void -rpc_release_path(struct nameidata *nd) -{ - path_put(&nd->path); - rpc_put_mount(); -} - static struct inode * rpc_get_inode(struct super_block *sb, umode_t mode) { @@ -889,27 +853,11 @@ EXPORT_SYMBOL_GPL(rpc_unlink); * information about the client, together with any "pipes" that may * later be created using rpc_mkpipe(). */ -struct dentry *rpc_create_client_dir(const char *path, +struct dentry *rpc_create_client_dir(struct dentry *dentry, + struct qstr *name, struct rpc_clnt *rpc_client) { - struct nameidata nd; - struct dentry *ret; - struct inode *dir; - - ret = ERR_PTR(rpc_lookup_parent(path, &nd)); - if (IS_ERR(ret)) - goto out_err; - dir = nd.path.dentry->d_inode; - - ret = rpc_mkdir_populate(nd.path.dentry, &nd.last, - S_IRUGO | S_IXUGO, rpc_client); - rpc_release_path(&nd); - if (!IS_ERR(ret)) - return ret; -out_err: - printk(KERN_WARNING "%s: %s() failed to create directory %s (errno = %ld)\n", - __FILE__, __func__, path, PTR_ERR(ret)); - return ret; + return rpc_mkdir_populate(dentry, name, S_IRUGO | S_IXUGO, rpc_client); } /* -- cgit v1.2.3 From 2da8ca26c6bfad685bfddf39728eac1c83906aa9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 9 Aug 2009 15:14:26 -0400 Subject: NFSD: Clean up the idmapper warning... What part of 'internal use' is so hard to understand? Signed-off-by: Trond Myklebust --- fs/nfsd/nfs4idmap.c | 4 ++-- include/linux/sunrpc/cache.h | 3 ++- net/sunrpc/cache.c | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index 5b398421b051..e9012ad36ac0 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c @@ -175,10 +175,10 @@ idtoname_show(struct seq_file *m, struct cache_detail *cd, struct cache_head *h) } static void -warn_no_idmapd(struct cache_detail *detail) +warn_no_idmapd(struct cache_detail *detail, int has_died) { printk("nfsd: nfsv4 idmapping failing: has idmapd %s?\n", - detail->last_close? "died" : "not been started"); + has_died ? "died" : "not been started"); } diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 2d8b211b9324..3d1fad22185a 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -79,6 +79,8 @@ struct cache_detail { int (*cache_show)(struct seq_file *m, struct cache_detail *cd, struct cache_head *h); + void (*warn_no_listener)(struct cache_detail *cd, + int has_died); struct cache_head * (*alloc)(void); int (*match)(struct cache_head *orig, struct cache_head *new); @@ -102,7 +104,6 @@ struct cache_detail { atomic_t readers; /* how many time is /chennel open */ time_t last_close; /* if no readers, when did last close */ time_t last_warn; /* when we last warned about no readers */ - void (*warn_no_listener)(struct cache_detail *cd); }; diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index ff0c23053d2f..8ede4a6f384b 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -1020,7 +1020,7 @@ static void warn_no_listener(struct cache_detail *detail) if (detail->last_warn != detail->last_close) { detail->last_warn = detail->last_close; if (detail->warn_no_listener) - detail->warn_no_listener(detail); + detail->warn_no_listener(detail, detail->last_close != 0); } } -- cgit v1.2.3 From bc74b4f5e63a09fb78e245794a0de1e5a2716bbe Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 9 Aug 2009 15:14:29 -0400 Subject: SUNRPC: Allow the cache_detail to specify alternative upcall mechanisms For events that are rare, such as referral DNS lookups, it makes limited sense to have a daemon constantly listening for upcalls on a channel. An alternative in those cases might simply be to run the app that fills the cache using call_usermodehelper_exec() and friends. The following patch allows the cache_detail to specify alternative upcall mechanisms for these particular cases. Signed-off-by: Trond Myklebust --- fs/nfsd/export.c | 14 ++++++++++++-- fs/nfsd/nfs4idmap.c | 16 ++++++++++++++-- include/linux/sunrpc/cache.h | 13 ++++++++++--- net/sunrpc/auth_gss/svcauth_gss.c | 7 ++++++- net/sunrpc/cache.c | 26 ++++++++++++++++++-------- net/sunrpc/svcauth_unix.c | 14 ++++++++++++-- 6 files changed, 72 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index b92a27629fb7..d9462643155c 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -85,6 +85,11 @@ static void expkey_request(struct cache_detail *cd, (*bpp)[-1] = '\n'; } +static int expkey_upcall(struct cache_detail *cd, struct cache_head *h) +{ + return sunrpc_cache_pipe_upcall(cd, h, expkey_request); +} + static struct svc_expkey *svc_expkey_update(struct svc_expkey *new, struct svc_expkey *old); static struct svc_expkey *svc_expkey_lookup(struct svc_expkey *); static struct cache_detail svc_expkey_cache; @@ -259,7 +264,7 @@ static struct cache_detail svc_expkey_cache = { .hash_table = expkey_table, .name = "nfsd.fh", .cache_put = expkey_put, - .cache_request = expkey_request, + .cache_upcall = expkey_upcall, .cache_parse = expkey_parse, .cache_show = expkey_show, .match = expkey_match, @@ -355,6 +360,11 @@ static void svc_export_request(struct cache_detail *cd, (*bpp)[-1] = '\n'; } +static int svc_export_upcall(struct cache_detail *cd, struct cache_head *h) +{ + return sunrpc_cache_pipe_upcall(cd, h, svc_export_request); +} + static struct svc_export *svc_export_update(struct svc_export *new, struct svc_export *old); static struct svc_export *svc_export_lookup(struct svc_export *); @@ -724,7 +734,7 @@ struct cache_detail svc_export_cache = { .hash_table = export_table, .name = "nfsd.export", .cache_put = svc_export_put, - .cache_request = svc_export_request, + .cache_upcall = svc_export_upcall, .cache_parse = svc_export_parse, .cache_show = svc_export_show, .match = svc_export_match, diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index e9012ad36ac0..cdfa86fa1471 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c @@ -145,6 +145,12 @@ idtoname_request(struct cache_detail *cd, struct cache_head *ch, char **bpp, (*bpp)[-1] = '\n'; } +static int +idtoname_upcall(struct cache_detail *cd, struct cache_head *ch) +{ + return sunrpc_cache_pipe_upcall(cd, ch, idtoname_request); +} + static int idtoname_match(struct cache_head *ca, struct cache_head *cb) { @@ -192,7 +198,7 @@ static struct cache_detail idtoname_cache = { .hash_table = idtoname_table, .name = "nfs4.idtoname", .cache_put = ent_put, - .cache_request = idtoname_request, + .cache_upcall = idtoname_upcall, .cache_parse = idtoname_parse, .cache_show = idtoname_show, .warn_no_listener = warn_no_idmapd, @@ -324,6 +330,12 @@ nametoid_request(struct cache_detail *cd, struct cache_head *ch, char **bpp, (*bpp)[-1] = '\n'; } +static int +nametoid_upcall(struct cache_detail *cd, struct cache_head *ch) +{ + return sunrpc_cache_pipe_upcall(cd, ch, nametoid_request); +} + static int nametoid_match(struct cache_head *ca, struct cache_head *cb) { @@ -363,7 +375,7 @@ static struct cache_detail nametoid_cache = { .hash_table = nametoid_table, .name = "nfs4.nametoid", .cache_put = ent_put, - .cache_request = nametoid_request, + .cache_upcall = nametoid_upcall, .cache_parse = nametoid_parse, .cache_show = nametoid_show, .warn_no_listener = warn_no_idmapd, diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 3d1fad22185a..23ee25981a0c 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -70,9 +70,9 @@ struct cache_detail { char *name; void (*cache_put)(struct kref *); - void (*cache_request)(struct cache_detail *cd, - struct cache_head *h, - char **bpp, int *blen); + int (*cache_upcall)(struct cache_detail *, + struct cache_head *); + int (*cache_parse)(struct cache_detail *, char *buf, int len); @@ -135,6 +135,13 @@ extern struct cache_head * sunrpc_cache_update(struct cache_detail *detail, struct cache_head *new, struct cache_head *old, int hash); +extern int +sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h, + void (*cache_request)(struct cache_detail *, + struct cache_head *, + char **, + int *)); + extern void cache_clean_deferred(void *owner); diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 2278a50c6444..2e6a148d277c 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -181,6 +181,11 @@ static void rsi_request(struct cache_detail *cd, (*bpp)[-1] = '\n'; } +static int rsi_upcall(struct cache_detail *cd, struct cache_head *h) +{ + return sunrpc_cache_pipe_upcall(cd, h, rsi_request); +} + static int rsi_parse(struct cache_detail *cd, char *mesg, int mlen) @@ -270,7 +275,7 @@ static struct cache_detail rsi_cache = { .hash_table = rsi_table, .name = "auth.rpcsec.init", .cache_put = rsi_put, - .cache_request = rsi_request, + .cache_upcall = rsi_upcall, .cache_parse = rsi_parse, .match = rsi_match, .init = rsi_init, diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index c8e7d2d07260..e438352bed7b 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -176,7 +176,13 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail, } EXPORT_SYMBOL_GPL(sunrpc_cache_update); -static int cache_make_upcall(struct cache_detail *detail, struct cache_head *h); +static int cache_make_upcall(struct cache_detail *cd, struct cache_head *h) +{ + if (!cd->cache_upcall) + return -EINVAL; + return cd->cache_upcall(cd, h); +} + /* * This is the generic cache management routine for all * the authentication caches. @@ -322,7 +328,7 @@ static int create_cache_proc_entries(struct cache_detail *cd) if (p == NULL) goto out_nomem; - if (cd->cache_request || cd->cache_parse) { + if (cd->cache_upcall || cd->cache_parse) { p = proc_create_data("channel", S_IFREG|S_IRUSR|S_IWUSR, cd->proc_ent, &cache_file_operations, cd); cd->channel_ent = p; @@ -1080,10 +1086,16 @@ static void warn_no_listener(struct cache_detail *detail) } /* - * register an upcall request to user-space. + * register an upcall request to user-space and queue it up for read() by the + * upcall daemon. + * * Each request is at most one page long. */ -static int cache_make_upcall(struct cache_detail *detail, struct cache_head *h) +int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h, + void (*cache_request)(struct cache_detail *, + struct cache_head *, + char **, + int *)) { char *buf; @@ -1091,9 +1103,6 @@ static int cache_make_upcall(struct cache_detail *detail, struct cache_head *h) char *bp; int len; - if (detail->cache_request == NULL) - return -EINVAL; - if (atomic_read(&detail->readers) == 0 && detail->last_close < get_seconds() - 30) { warn_no_listener(detail); @@ -1112,7 +1121,7 @@ static int cache_make_upcall(struct cache_detail *detail, struct cache_head *h) bp = buf; len = PAGE_SIZE; - detail->cache_request(detail, h, &bp, &len); + cache_request(detail, h, &bp, &len); if (len < 0) { kfree(buf); @@ -1130,6 +1139,7 @@ static int cache_make_upcall(struct cache_detail *detail, struct cache_head *h) wake_up(&queue_wait); return 0; } +EXPORT_SYMBOL_GPL(sunrpc_cache_pipe_upcall); /* * parse a message from user-space and pass it diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 5c865e2d299e..6caffa34ac01 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -171,6 +171,11 @@ static void ip_map_request(struct cache_detail *cd, (*bpp)[-1] = '\n'; } +static int ip_map_upcall(struct cache_detail *cd, struct cache_head *h) +{ + return sunrpc_cache_pipe_upcall(cd, h, ip_map_request); +} + static struct ip_map *ip_map_lookup(char *class, struct in6_addr *addr); static int ip_map_update(struct ip_map *ipm, struct unix_domain *udom, time_t expiry); @@ -289,7 +294,7 @@ struct cache_detail ip_map_cache = { .hash_table = ip_table, .name = "auth.unix.ip", .cache_put = ip_map_put, - .cache_request = ip_map_request, + .cache_upcall = ip_map_upcall, .cache_parse = ip_map_parse, .cache_show = ip_map_show, .match = ip_map_match, @@ -523,6 +528,11 @@ static void unix_gid_request(struct cache_detail *cd, (*bpp)[-1] = '\n'; } +static int unix_gid_upcall(struct cache_detail *cd, struct cache_head *h) +{ + return sunrpc_cache_pipe_upcall(cd, h, unix_gid_request); +} + static struct unix_gid *unix_gid_lookup(uid_t uid); extern struct cache_detail unix_gid_cache; @@ -622,7 +632,7 @@ struct cache_detail unix_gid_cache = { .hash_table = gid_table, .name = "auth.unix.gid", .cache_put = unix_gid_put, - .cache_request = unix_gid_request, + .cache_upcall = unix_gid_upcall, .cache_parse = unix_gid_parse, .cache_show = unix_gid_show, .match = unix_gid_match, -- cgit v1.2.3 From 173912a6add00f4715774dcecf9ee53274c5924c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 9 Aug 2009 15:14:29 -0400 Subject: SUNRPC: Move procfs-specific stuff out of the generic sunrpc cache code Signed-off-by: Trond Myklebust --- include/linux/sunrpc/cache.h | 11 +- net/sunrpc/cache.c | 319 ++++++++++++++++++++++++++----------------- 2 files changed, 199 insertions(+), 131 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 23ee25981a0c..8e5bf3036652 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -59,6 +59,11 @@ struct cache_head { #define CACHE_NEW_EXPIRY 120 /* keep new things pending confirmation for 120 seconds */ +struct cache_detail_procfs { + struct proc_dir_entry *proc_ent; + struct proc_dir_entry *flush_ent, *channel_ent, *content_ent; +}; + struct cache_detail { struct module * owner; int hash_size; @@ -98,12 +103,14 @@ struct cache_detail { /* fields for communication over channel */ struct list_head queue; - struct proc_dir_entry *proc_ent; - struct proc_dir_entry *flush_ent, *channel_ent, *content_ent; atomic_t readers; /* how many time is /chennel open */ time_t last_close; /* if no readers, when did last close */ time_t last_warn; /* when we last warned about no readers */ + + union { + struct cache_detail_procfs procfs; + } u; }; diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index e438352bed7b..1cd82eda56d0 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -291,69 +291,9 @@ static DEFINE_SPINLOCK(cache_list_lock); static struct cache_detail *current_detail; static int current_index; -static const struct file_operations cache_file_operations; -static const struct file_operations content_file_operations; -static const struct file_operations cache_flush_operations; - static void do_cache_clean(struct work_struct *work); static DECLARE_DELAYED_WORK(cache_cleaner, do_cache_clean); -static void remove_cache_proc_entries(struct cache_detail *cd) -{ - if (cd->proc_ent == NULL) - return; - if (cd->flush_ent) - remove_proc_entry("flush", cd->proc_ent); - if (cd->channel_ent) - remove_proc_entry("channel", cd->proc_ent); - if (cd->content_ent) - remove_proc_entry("content", cd->proc_ent); - cd->proc_ent = NULL; - remove_proc_entry(cd->name, proc_net_rpc); -} - -#ifdef CONFIG_PROC_FS -static int create_cache_proc_entries(struct cache_detail *cd) -{ - struct proc_dir_entry *p; - - cd->proc_ent = proc_mkdir(cd->name, proc_net_rpc); - if (cd->proc_ent == NULL) - goto out_nomem; - cd->channel_ent = cd->content_ent = NULL; - - p = proc_create_data("flush", S_IFREG|S_IRUSR|S_IWUSR, - cd->proc_ent, &cache_flush_operations, cd); - cd->flush_ent = p; - if (p == NULL) - goto out_nomem; - - if (cd->cache_upcall || cd->cache_parse) { - p = proc_create_data("channel", S_IFREG|S_IRUSR|S_IWUSR, - cd->proc_ent, &cache_file_operations, cd); - cd->channel_ent = p; - if (p == NULL) - goto out_nomem; - } - if (cd->cache_show) { - p = proc_create_data("content", S_IFREG|S_IRUSR|S_IWUSR, - cd->proc_ent, &content_file_operations, cd); - cd->content_ent = p; - if (p == NULL) - goto out_nomem; - } - return 0; -out_nomem: - remove_cache_proc_entries(cd); - return -ENOMEM; -} -#else /* CONFIG_PROC_FS */ -static int create_cache_proc_entries(struct cache_detail *cd) -{ - return 0; -} -#endif - static void sunrpc_init_cache_detail(struct cache_detail *cd) { rwlock_init(&cd->hash_lock); @@ -395,25 +335,6 @@ out: printk(KERN_ERR "nfsd: failed to unregister %s cache\n", cd->name); } -int cache_register(struct cache_detail *cd) -{ - int ret; - - sunrpc_init_cache_detail(cd); - ret = create_cache_proc_entries(cd); - if (ret) - sunrpc_destroy_cache_detail(cd); - return ret; -} -EXPORT_SYMBOL_GPL(cache_register); - -void cache_unregister(struct cache_detail *cd) -{ - remove_cache_proc_entries(cd); - sunrpc_destroy_cache_detail(cd); -} -EXPORT_SYMBOL_GPL(cache_unregister); - /* clean cache tries to find something to clean * and cleans it. * It returns 1 if it cleaned something, @@ -704,13 +625,12 @@ struct cache_reader { int offset; /* if non-0, we have a refcnt on next request */ }; -static ssize_t -cache_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos) +static ssize_t cache_read(struct file *filp, char __user *buf, size_t count, + loff_t *ppos, struct cache_detail *cd) { struct cache_reader *rp = filp->private_data; struct cache_request *rq; struct inode *inode = filp->f_path.dentry->d_inode; - struct cache_detail *cd = PDE(inode)->data; int err; if (count == 0) @@ -834,13 +754,12 @@ out_slow: return cache_slow_downcall(buf, count, cd); } -static ssize_t -cache_write(struct file *filp, const char __user *buf, size_t count, - loff_t *ppos) +static ssize_t cache_write(struct file *filp, const char __user *buf, + size_t count, loff_t *ppos, + struct cache_detail *cd) { struct address_space *mapping = filp->f_mapping; struct inode *inode = filp->f_path.dentry->d_inode; - struct cache_detail *cd = PDE(inode)->data; ssize_t ret = -EINVAL; if (!cd->cache_parse) @@ -855,13 +774,12 @@ out: static DECLARE_WAIT_QUEUE_HEAD(queue_wait); -static unsigned int -cache_poll(struct file *filp, poll_table *wait) +static unsigned int cache_poll(struct file *filp, poll_table *wait, + struct cache_detail *cd) { unsigned int mask; struct cache_reader *rp = filp->private_data; struct cache_queue *cq; - struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data; poll_wait(filp, &queue_wait, wait); @@ -883,14 +801,13 @@ cache_poll(struct file *filp, poll_table *wait) return mask; } -static int -cache_ioctl(struct inode *ino, struct file *filp, - unsigned int cmd, unsigned long arg) +static int cache_ioctl(struct inode *ino, struct file *filp, + unsigned int cmd, unsigned long arg, + struct cache_detail *cd) { int len = 0; struct cache_reader *rp = filp->private_data; struct cache_queue *cq; - struct cache_detail *cd = PDE(ino)->data; if (cmd != FIONREAD || !rp) return -EINVAL; @@ -913,15 +830,13 @@ cache_ioctl(struct inode *ino, struct file *filp, return put_user(len, (int __user *)arg); } -static int -cache_open(struct inode *inode, struct file *filp) +static int cache_open(struct inode *inode, struct file *filp, + struct cache_detail *cd) { struct cache_reader *rp = NULL; nonseekable_open(inode, filp); if (filp->f_mode & FMODE_READ) { - struct cache_detail *cd = PDE(inode)->data; - rp = kmalloc(sizeof(*rp), GFP_KERNEL); if (!rp) return -ENOMEM; @@ -936,11 +851,10 @@ cache_open(struct inode *inode, struct file *filp) return 0; } -static int -cache_release(struct inode *inode, struct file *filp) +static int cache_release(struct inode *inode, struct file *filp, + struct cache_detail *cd) { struct cache_reader *rp = filp->private_data; - struct cache_detail *cd = PDE(inode)->data; if (rp) { spin_lock(&queue_lock); @@ -969,18 +883,6 @@ cache_release(struct inode *inode, struct file *filp) -static const struct file_operations cache_file_operations = { - .owner = THIS_MODULE, - .llseek = no_llseek, - .read = cache_read, - .write = cache_write, - .poll = cache_poll, - .ioctl = cache_ioctl, /* for FIONREAD */ - .open = cache_open, - .release = cache_release, -}; - - static void queue_loose(struct cache_detail *detail, struct cache_head *ch) { struct cache_queue *cq; @@ -1307,10 +1209,10 @@ static const struct seq_operations cache_content_op = { .show = c_show, }; -static int content_open(struct inode *inode, struct file *file) +static int content_open(struct inode *inode, struct file *file, + struct cache_detail *cd) { struct handle *han; - struct cache_detail *cd = PDE(inode)->data; han = __seq_open_private(file, &cache_content_op, sizeof(*han)); if (han == NULL) @@ -1320,17 +1222,10 @@ static int content_open(struct inode *inode, struct file *file) return 0; } -static const struct file_operations content_file_operations = { - .open = content_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_private, -}; - static ssize_t read_flush(struct file *file, char __user *buf, - size_t count, loff_t *ppos) + size_t count, loff_t *ppos, + struct cache_detail *cd) { - struct cache_detail *cd = PDE(file->f_path.dentry->d_inode)->data; char tbuf[20]; unsigned long p = *ppos; size_t len; @@ -1348,10 +1243,10 @@ static ssize_t read_flush(struct file *file, char __user *buf, return len; } -static ssize_t write_flush(struct file * file, const char __user * buf, - size_t count, loff_t *ppos) +static ssize_t write_flush(struct file *file, const char __user *buf, + size_t count, loff_t *ppos, + struct cache_detail *cd) { - struct cache_detail *cd = PDE(file->f_path.dentry->d_inode)->data; char tbuf[20]; char *ep; long flushtime; @@ -1372,8 +1267,174 @@ static ssize_t write_flush(struct file * file, const char __user * buf, return count; } -static const struct file_operations cache_flush_operations = { +static ssize_t cache_read_procfs(struct file *filp, char __user *buf, + size_t count, loff_t *ppos) +{ + struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data; + + return cache_read(filp, buf, count, ppos, cd); +} + +static ssize_t cache_write_procfs(struct file *filp, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data; + + return cache_write(filp, buf, count, ppos, cd); +} + +static unsigned int cache_poll_procfs(struct file *filp, poll_table *wait) +{ + struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data; + + return cache_poll(filp, wait, cd); +} + +static int cache_ioctl_procfs(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg) +{ + struct cache_detail *cd = PDE(inode)->data; + + return cache_ioctl(inode, filp, cmd, arg, cd); +} + +static int cache_open_procfs(struct inode *inode, struct file *filp) +{ + struct cache_detail *cd = PDE(inode)->data; + + return cache_open(inode, filp, cd); +} + +static int cache_release_procfs(struct inode *inode, struct file *filp) +{ + struct cache_detail *cd = PDE(inode)->data; + + return cache_release(inode, filp, cd); +} + +static const struct file_operations cache_file_operations_procfs = { + .owner = THIS_MODULE, + .llseek = no_llseek, + .read = cache_read_procfs, + .write = cache_write_procfs, + .poll = cache_poll_procfs, + .ioctl = cache_ioctl_procfs, /* for FIONREAD */ + .open = cache_open_procfs, + .release = cache_release_procfs, +}; + +static int content_open_procfs(struct inode *inode, struct file *filp) +{ + struct cache_detail *cd = PDE(inode)->data; + + return content_open(inode, filp, cd); +} + +static const struct file_operations content_file_operations_procfs = { + .open = content_open_procfs, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_private, +}; + +static ssize_t read_flush_procfs(struct file *filp, char __user *buf, + size_t count, loff_t *ppos) +{ + struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data; + + return read_flush(filp, buf, count, ppos, cd); +} + +static ssize_t write_flush_procfs(struct file *filp, + const char __user *buf, + size_t count, loff_t *ppos) +{ + struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data; + + return write_flush(filp, buf, count, ppos, cd); +} + +static const struct file_operations cache_flush_operations_procfs = { .open = nonseekable_open, - .read = read_flush, - .write = write_flush, + .read = read_flush_procfs, + .write = write_flush_procfs, }; + +static void remove_cache_proc_entries(struct cache_detail *cd) +{ + if (cd->u.procfs.proc_ent == NULL) + return; + if (cd->u.procfs.flush_ent) + remove_proc_entry("flush", cd->u.procfs.proc_ent); + if (cd->u.procfs.channel_ent) + remove_proc_entry("channel", cd->u.procfs.proc_ent); + if (cd->u.procfs.content_ent) + remove_proc_entry("content", cd->u.procfs.proc_ent); + cd->u.procfs.proc_ent = NULL; + remove_proc_entry(cd->name, proc_net_rpc); +} + +#ifdef CONFIG_PROC_FS +static int create_cache_proc_entries(struct cache_detail *cd) +{ + struct proc_dir_entry *p; + + cd->u.procfs.proc_ent = proc_mkdir(cd->name, proc_net_rpc); + if (cd->u.procfs.proc_ent == NULL) + goto out_nomem; + cd->u.procfs.channel_ent = NULL; + cd->u.procfs.content_ent = NULL; + + p = proc_create_data("flush", S_IFREG|S_IRUSR|S_IWUSR, + cd->u.procfs.proc_ent, + &cache_flush_operations_procfs, cd); + cd->u.procfs.flush_ent = p; + if (p == NULL) + goto out_nomem; + + if (cd->cache_upcall || cd->cache_parse) { + p = proc_create_data("channel", S_IFREG|S_IRUSR|S_IWUSR, + cd->u.procfs.proc_ent, + &cache_file_operations_procfs, cd); + cd->u.procfs.channel_ent = p; + if (p == NULL) + goto out_nomem; + } + if (cd->cache_show) { + p = proc_create_data("content", S_IFREG|S_IRUSR|S_IWUSR, + cd->u.procfs.proc_ent, + &content_file_operations_procfs, cd); + cd->u.procfs.content_ent = p; + if (p == NULL) + goto out_nomem; + } + return 0; +out_nomem: + remove_cache_proc_entries(cd); + return -ENOMEM; +} +#else /* CONFIG_PROC_FS */ +static int create_cache_proc_entries(struct cache_detail *cd) +{ + return 0; +} +#endif + +int cache_register(struct cache_detail *cd) +{ + int ret; + + sunrpc_init_cache_detail(cd); + ret = create_cache_proc_entries(cd); + if (ret) + sunrpc_destroy_cache_detail(cd); + return ret; +} +EXPORT_SYMBOL_GPL(cache_register); + +void cache_unregister(struct cache_detail *cd) +{ + remove_cache_proc_entries(cd); + sunrpc_destroy_cache_detail(cd); +} +EXPORT_SYMBOL_GPL(cache_unregister); -- cgit v1.2.3 From 8854e82d9accc80f43c0bc3ff06b5979ac858185 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 9 Aug 2009 15:14:30 -0400 Subject: SUNRPC: Add an rpc_pipefs front end for the sunrpc cache code Signed-off-by: Trond Myklebust --- include/linux/sunrpc/cache.h | 13 ++++ include/linux/sunrpc/rpc_pipe_fs.h | 8 +++ net/sunrpc/cache.c | 126 +++++++++++++++++++++++++++++++++++++ net/sunrpc/rpc_pipe.c | 43 +++++++++++++ 4 files changed, 190 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 8e5bf3036652..6f52b4d7c447 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -64,6 +64,10 @@ struct cache_detail_procfs { struct proc_dir_entry *flush_ent, *channel_ent, *content_ent; }; +struct cache_detail_pipefs { + struct dentry *dir; +}; + struct cache_detail { struct module * owner; int hash_size; @@ -110,6 +114,7 @@ struct cache_detail { union { struct cache_detail_procfs procfs; + struct cache_detail_pipefs pipefs; } u; }; @@ -135,6 +140,10 @@ struct cache_deferred_req { }; +extern const struct file_operations cache_file_operations_pipefs; +extern const struct file_operations content_file_operations_pipefs; +extern const struct file_operations cache_flush_operations_pipefs; + extern struct cache_head * sunrpc_cache_lookup(struct cache_detail *detail, struct cache_head *key, int hash); @@ -186,6 +195,10 @@ extern void cache_purge(struct cache_detail *detail); extern int cache_register(struct cache_detail *cd); extern void cache_unregister(struct cache_detail *cd); +extern int sunrpc_cache_register_pipefs(struct dentry *parent, const char *, + mode_t, struct cache_detail *); +extern void sunrpc_cache_unregister_pipefs(struct cache_detail *); + extern void qword_add(char **bpp, int *lp, char *str); extern void qword_addhex(char **bpp, int *lp, char *buf, int blen); extern int qword_get(char **bpp, char *dest, int bufsize); diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h index 88332ef1e959..a92571a34556 100644 --- a/include/linux/sunrpc/rpc_pipe_fs.h +++ b/include/linux/sunrpc/rpc_pipe_fs.h @@ -47,6 +47,14 @@ extern int rpc_queue_upcall(struct inode *, struct rpc_pipe_msg *); struct rpc_clnt; extern struct dentry *rpc_create_client_dir(struct dentry *, struct qstr *, struct rpc_clnt *); extern int rpc_remove_client_dir(struct dentry *); + +struct cache_detail; +extern struct dentry *rpc_create_cache_dir(struct dentry *, + struct qstr *, + mode_t umode, + struct cache_detail *); +extern void rpc_remove_cache_dir(struct dentry *); + extern struct dentry *rpc_mkpipe(struct dentry *, const char *, void *, const struct rpc_pipe_ops *, int flags); extern int rpc_unlink(struct dentry *); diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 1cd82eda56d0..db7720e453c3 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -32,6 +32,7 @@ #include #include #include +#include #define RPCDBG_FACILITY RPCDBG_CACHE @@ -1438,3 +1439,128 @@ void cache_unregister(struct cache_detail *cd) sunrpc_destroy_cache_detail(cd); } EXPORT_SYMBOL_GPL(cache_unregister); + +static ssize_t cache_read_pipefs(struct file *filp, char __user *buf, + size_t count, loff_t *ppos) +{ + struct cache_detail *cd = RPC_I(filp->f_path.dentry->d_inode)->private; + + return cache_read(filp, buf, count, ppos, cd); +} + +static ssize_t cache_write_pipefs(struct file *filp, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct cache_detail *cd = RPC_I(filp->f_path.dentry->d_inode)->private; + + return cache_write(filp, buf, count, ppos, cd); +} + +static unsigned int cache_poll_pipefs(struct file *filp, poll_table *wait) +{ + struct cache_detail *cd = RPC_I(filp->f_path.dentry->d_inode)->private; + + return cache_poll(filp, wait, cd); +} + +static int cache_ioctl_pipefs(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg) +{ + struct cache_detail *cd = RPC_I(inode)->private; + + return cache_ioctl(inode, filp, cmd, arg, cd); +} + +static int cache_open_pipefs(struct inode *inode, struct file *filp) +{ + struct cache_detail *cd = RPC_I(inode)->private; + + return cache_open(inode, filp, cd); +} + +static int cache_release_pipefs(struct inode *inode, struct file *filp) +{ + struct cache_detail *cd = RPC_I(inode)->private; + + return cache_release(inode, filp, cd); +} + +const struct file_operations cache_file_operations_pipefs = { + .owner = THIS_MODULE, + .llseek = no_llseek, + .read = cache_read_pipefs, + .write = cache_write_pipefs, + .poll = cache_poll_pipefs, + .ioctl = cache_ioctl_pipefs, /* for FIONREAD */ + .open = cache_open_pipefs, + .release = cache_release_pipefs, +}; + +static int content_open_pipefs(struct inode *inode, struct file *filp) +{ + struct cache_detail *cd = RPC_I(inode)->private; + + return content_open(inode, filp, cd); +} + +const struct file_operations content_file_operations_pipefs = { + .open = content_open_pipefs, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_private, +}; + +static ssize_t read_flush_pipefs(struct file *filp, char __user *buf, + size_t count, loff_t *ppos) +{ + struct cache_detail *cd = RPC_I(filp->f_path.dentry->d_inode)->private; + + return read_flush(filp, buf, count, ppos, cd); +} + +static ssize_t write_flush_pipefs(struct file *filp, + const char __user *buf, + size_t count, loff_t *ppos) +{ + struct cache_detail *cd = RPC_I(filp->f_path.dentry->d_inode)->private; + + return write_flush(filp, buf, count, ppos, cd); +} + +const struct file_operations cache_flush_operations_pipefs = { + .open = nonseekable_open, + .read = read_flush_pipefs, + .write = write_flush_pipefs, +}; + +int sunrpc_cache_register_pipefs(struct dentry *parent, + const char *name, mode_t umode, + struct cache_detail *cd) +{ + struct qstr q; + struct dentry *dir; + int ret = 0; + + sunrpc_init_cache_detail(cd); + q.name = name; + q.len = strlen(name); + q.hash = full_name_hash(q.name, q.len); + dir = rpc_create_cache_dir(parent, &q, umode, cd); + if (!IS_ERR(dir)) + cd->u.pipefs.dir = dir; + else { + sunrpc_destroy_cache_detail(cd); + ret = PTR_ERR(dir); + } + return ret; +} +EXPORT_SYMBOL_GPL(sunrpc_cache_register_pipefs); + +void sunrpc_cache_unregister_pipefs(struct cache_detail *cd) +{ + rpc_remove_cache_dir(cd->u.pipefs.dir); + cd->u.pipefs.dir = NULL; + sunrpc_destroy_cache_detail(cd); +} +EXPORT_SYMBOL_GPL(sunrpc_cache_unregister_pipefs); + diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 57e9cd3c49b6..8dd81535e08f 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -26,6 +26,7 @@ #include #include #include +#include static struct vfsmount *rpc_mount __read_mostly; static int rpc_mount_count; @@ -882,6 +883,48 @@ int rpc_remove_client_dir(struct dentry *dentry) return rpc_rmdir_depopulate(dentry, rpc_clntdir_depopulate); } +static const struct rpc_filelist cache_pipefs_files[3] = { + [0] = { + .name = "channel", + .i_fop = &cache_file_operations_pipefs, + .mode = S_IFIFO|S_IRUSR|S_IWUSR, + }, + [1] = { + .name = "content", + .i_fop = &content_file_operations_pipefs, + .mode = S_IFREG|S_IRUSR, + }, + [2] = { + .name = "flush", + .i_fop = &cache_flush_operations_pipefs, + .mode = S_IFREG|S_IRUSR|S_IWUSR, + }, +}; + +static int rpc_cachedir_populate(struct dentry *dentry, void *private) +{ + return rpc_populate(dentry, + cache_pipefs_files, 0, 3, + private); +} + +static void rpc_cachedir_depopulate(struct dentry *dentry) +{ + rpc_depopulate(dentry, cache_pipefs_files, 0, 3); +} + +struct dentry *rpc_create_cache_dir(struct dentry *parent, struct qstr *name, + mode_t umode, struct cache_detail *cd) +{ + return rpc_mkdir_populate(parent, name, umode, NULL, + rpc_cachedir_populate, cd); +} + +void rpc_remove_cache_dir(struct dentry *dentry) +{ + rpc_rmdir_depopulate(dentry, rpc_cachedir_depopulate); +} + /* * populate the filesystem */ -- cgit v1.2.3 From 47d439e9fb8a81a90022cfa785bf1c36c4e2aff6 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Fri, 7 Aug 2009 14:53:57 -0400 Subject: security: define round_hint_to_min in !CONFIG_SECURITY Fix the header files to define round_hint_to_min() and to define mmap_min_addr_handler() in the !CONFIG_SECURITY case. Built and tested with !CONFIG_SECURITY Signed-off-by: Eric Paris Signed-off-by: James Morris --- include/linux/security.h | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index 7b431155e392..57ead99d2593 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -121,6 +121,21 @@ struct request_sock; #define LSM_UNSAFE_PTRACE 2 #define LSM_UNSAFE_PTRACE_CAP 4 +/* + * If a hint addr is less than mmap_min_addr change hint to be as + * low as possible but still greater than mmap_min_addr + */ +static inline unsigned long round_hint_to_min(unsigned long hint) +{ + hint &= PAGE_MASK; + if (((void *)hint != NULL) && + (hint < mmap_min_addr)) + return PAGE_ALIGN(mmap_min_addr); + return hint; +} +extern int mmap_min_addr_handler(struct ctl_table *table, int write, struct file *filp, + void __user *buffer, size_t *lenp, loff_t *ppos); + #ifdef CONFIG_SECURITY struct security_mnt_opts { @@ -149,21 +164,6 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) opts->num_mnt_opts = 0; } -/* - * If a hint addr is less than mmap_min_addr change hint to be as - * low as possible but still greater than mmap_min_addr - */ -static inline unsigned long round_hint_to_min(unsigned long hint) -{ - hint &= PAGE_MASK; - if (((void *)hint != NULL) && - (hint < mmap_min_addr)) - return PAGE_ALIGN(mmap_min_addr); - return hint; -} - -extern int mmap_min_addr_handler(struct ctl_table *table, int write, struct file *filp, - void __user *buffer, size_t *lenp, loff_t *ppos); /** * struct security_operations - main security structure * -- cgit v1.2.3 From 7cd1837b5d24417eca667d674a97bea936849785 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Fri, 12 Jun 2009 18:36:33 +0200 Subject: netfilter: xtables: remove xt_TOS v0 Superseded by xt_TOS v1 (v2.6.24-2396-g5c350e5). Signed-off-by: Jan Engelhardt --- Documentation/feature-removal-schedule.txt | 3 -- include/linux/netfilter_ipv4/Kbuild | 2 -- include/linux/netfilter_ipv4/ipt_TOS.h | 12 -------- include/linux/netfilter_ipv4/ipt_tos.h | 13 --------- net/netfilter/xt_DSCP.c | 46 ------------------------------ net/netfilter/xt_dscp.c | 17 ----------- 6 files changed, 93 deletions(-) delete mode 100644 include/linux/netfilter_ipv4/ipt_TOS.h delete mode 100644 include/linux/netfilter_ipv4/ipt_tos.h (limited to 'include/linux') diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index f8cd450be9aa..3aa4a779092b 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -235,9 +235,6 @@ Who: Thomas Gleixner --------------------------- What (Why): - - include/linux/netfilter_ipv4/ipt_TOS.h ipt_tos.h header files - (superseded by xt_TOS/xt_tos target & match) - - "forwarding" header files like ipt_mac.h in include/linux/netfilter_ipv4/ and include/linux/netfilter_ipv6/ diff --git a/include/linux/netfilter_ipv4/Kbuild b/include/linux/netfilter_ipv4/Kbuild index 3a7105bb8f33..86d81a285f9c 100644 --- a/include/linux/netfilter_ipv4/Kbuild +++ b/include/linux/netfilter_ipv4/Kbuild @@ -9,7 +9,6 @@ header-y += ipt_NFQUEUE.h header-y += ipt_REJECT.h header-y += ipt_SAME.h header-y += ipt_TCPMSS.h -header-y += ipt_TOS.h header-y += ipt_TTL.h header-y += ipt_ULOG.h header-y += ipt_addrtype.h @@ -40,7 +39,6 @@ header-y += ipt_sctp.h header-y += ipt_state.h header-y += ipt_string.h header-y += ipt_tcpmss.h -header-y += ipt_tos.h header-y += ipt_ttl.h unifdef-y += ip_queue.h diff --git a/include/linux/netfilter_ipv4/ipt_TOS.h b/include/linux/netfilter_ipv4/ipt_TOS.h deleted file mode 100644 index 6bf9e1fdfd88..000000000000 --- a/include/linux/netfilter_ipv4/ipt_TOS.h +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef _IPT_TOS_H_target -#define _IPT_TOS_H_target - -#ifndef IPTOS_NORMALSVC -#define IPTOS_NORMALSVC 0 -#endif - -struct ipt_tos_target_info { - u_int8_t tos; -}; - -#endif /*_IPT_TOS_H_target*/ diff --git a/include/linux/netfilter_ipv4/ipt_tos.h b/include/linux/netfilter_ipv4/ipt_tos.h deleted file mode 100644 index a21f5df23c50..000000000000 --- a/include/linux/netfilter_ipv4/ipt_tos.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef _IPT_TOS_H -#define _IPT_TOS_H - -struct ipt_tos_info { - u_int8_t tos; - u_int8_t invert; -}; - -#ifndef IPTOS_NORMALSVC -#define IPTOS_NORMALSVC 0 -#endif - -#endif /*_IPT_TOS_H*/ diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c index 6a347e768f86..74ce89260056 100644 --- a/net/netfilter/xt_DSCP.c +++ b/net/netfilter/xt_DSCP.c @@ -18,7 +18,6 @@ #include #include -#include MODULE_AUTHOR("Harald Welte "); MODULE_DESCRIPTION("Xtables: DSCP/TOS field modification"); @@ -72,41 +71,6 @@ static bool dscp_tg_check(const struct xt_tgchk_param *par) return true; } -static unsigned int -tos_tg_v0(struct sk_buff *skb, const struct xt_target_param *par) -{ - const struct ipt_tos_target_info *info = par->targinfo; - struct iphdr *iph = ip_hdr(skb); - u_int8_t oldtos; - - if ((iph->tos & IPTOS_TOS_MASK) != info->tos) { - if (!skb_make_writable(skb, sizeof(struct iphdr))) - return NF_DROP; - - iph = ip_hdr(skb); - oldtos = iph->tos; - iph->tos = (iph->tos & IPTOS_PREC_MASK) | info->tos; - csum_replace2(&iph->check, htons(oldtos), htons(iph->tos)); - } - - return XT_CONTINUE; -} - -static bool tos_tg_check_v0(const struct xt_tgchk_param *par) -{ - const struct ipt_tos_target_info *info = par->targinfo; - const uint8_t tos = info->tos; - - if (tos != IPTOS_LOWDELAY && tos != IPTOS_THROUGHPUT && - tos != IPTOS_RELIABILITY && tos != IPTOS_MINCOST && - tos != IPTOS_NORMALSVC) { - printk(KERN_WARNING "TOS: bad tos value %#x\n", tos); - return false; - } - - return true; -} - static unsigned int tos_tg(struct sk_buff *skb, const struct xt_target_param *par) { @@ -166,16 +130,6 @@ static struct xt_target dscp_tg_reg[] __read_mostly = { .table = "mangle", .me = THIS_MODULE, }, - { - .name = "TOS", - .revision = 0, - .family = NFPROTO_IPV4, - .table = "mangle", - .target = tos_tg_v0, - .targetsize = sizeof(struct ipt_tos_target_info), - .checkentry = tos_tg_check_v0, - .me = THIS_MODULE, - }, { .name = "TOS", .revision = 1, diff --git a/net/netfilter/xt_dscp.c b/net/netfilter/xt_dscp.c index c3f8085460d7..0280d3a8c161 100644 --- a/net/netfilter/xt_dscp.c +++ b/net/netfilter/xt_dscp.c @@ -15,7 +15,6 @@ #include #include -#include MODULE_AUTHOR("Harald Welte "); MODULE_DESCRIPTION("Xtables: DSCP/TOS field match"); @@ -55,14 +54,6 @@ static bool dscp_mt_check(const struct xt_mtchk_param *par) return true; } -static bool -tos_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par) -{ - const struct ipt_tos_info *info = par->matchinfo; - - return (ip_hdr(skb)->tos == info->tos) ^ info->invert; -} - static bool tos_mt(const struct sk_buff *skb, const struct xt_match_param *par) { const struct xt_tos_match_info *info = par->matchinfo; @@ -92,14 +83,6 @@ static struct xt_match dscp_mt_reg[] __read_mostly = { .matchsize = sizeof(struct xt_dscp_info), .me = THIS_MODULE, }, - { - .name = "tos", - .revision = 0, - .family = NFPROTO_IPV4, - .match = tos_mt_v0, - .matchsize = sizeof(struct ipt_tos_info), - .me = THIS_MODULE, - }, { .name = "tos", .revision = 1, -- cgit v1.2.3 From e973a70ca033bfcd4d8b59d1f66bfc1e782e1276 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Fri, 12 Jun 2009 18:42:12 +0200 Subject: netfilter: xtables: remove xt_CONNMARK v0 Superseded by xt_CONNMARK v1 (v2.6.24-2917-g0dc8c76). Signed-off-by: Jan Engelhardt --- Documentation/feature-removal-schedule.txt | 3 - include/linux/netfilter/xt_CONNMARK.h | 6 -- net/netfilter/xt_CONNMARK.c | 134 +++-------------------------- 3 files changed, 11 insertions(+), 132 deletions(-) (limited to 'include/linux') diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 3aa4a779092b..7eccf945d4e0 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -238,9 +238,6 @@ What (Why): - "forwarding" header files like ipt_mac.h in include/linux/netfilter_ipv4/ and include/linux/netfilter_ipv6/ - - xt_CONNMARK match revision 0 - (superseded by xt_CONNMARK match revision 1) - - xt_MARK target revisions 0 and 1 (superseded by xt_MARK match revision 2) diff --git a/include/linux/netfilter/xt_CONNMARK.h b/include/linux/netfilter/xt_CONNMARK.h index 7635c8ffdadb..0a8545866752 100644 --- a/include/linux/netfilter/xt_CONNMARK.h +++ b/include/linux/netfilter/xt_CONNMARK.h @@ -18,12 +18,6 @@ enum { XT_CONNMARK_RESTORE }; -struct xt_connmark_target_info { - unsigned long mark; - unsigned long mask; - __u8 mode; -}; - struct xt_connmark_tginfo1 { __u32 ctmark, ctmask, nfmask; __u8 mode; diff --git a/net/netfilter/xt_CONNMARK.c b/net/netfilter/xt_CONNMARK.c index d6e5ab463277..593457068ae1 100644 --- a/net/netfilter/xt_CONNMARK.c +++ b/net/netfilter/xt_CONNMARK.c @@ -35,45 +35,6 @@ MODULE_ALIAS("ip6t_CONNMARK"); #include #include -static unsigned int -connmark_tg_v0(struct sk_buff *skb, const struct xt_target_param *par) -{ - const struct xt_connmark_target_info *markinfo = par->targinfo; - struct nf_conn *ct; - enum ip_conntrack_info ctinfo; - u_int32_t diff; - u_int32_t mark; - u_int32_t newmark; - - ct = nf_ct_get(skb, &ctinfo); - if (ct) { - switch(markinfo->mode) { - case XT_CONNMARK_SET: - newmark = (ct->mark & ~markinfo->mask) | markinfo->mark; - if (newmark != ct->mark) { - ct->mark = newmark; - nf_conntrack_event_cache(IPCT_MARK, ct); - } - break; - case XT_CONNMARK_SAVE: - newmark = (ct->mark & ~markinfo->mask) | - (skb->mark & markinfo->mask); - if (ct->mark != newmark) { - ct->mark = newmark; - nf_conntrack_event_cache(IPCT_MARK, ct); - } - break; - case XT_CONNMARK_RESTORE: - mark = skb->mark; - diff = (ct->mark ^ mark) & markinfo->mask; - skb->mark = mark ^ diff; - break; - } - } - - return XT_CONTINUE; -} - static unsigned int connmark_tg(struct sk_buff *skb, const struct xt_target_param *par) { @@ -112,30 +73,6 @@ connmark_tg(struct sk_buff *skb, const struct xt_target_param *par) return XT_CONTINUE; } -static bool connmark_tg_check_v0(const struct xt_tgchk_param *par) -{ - const struct xt_connmark_target_info *matchinfo = par->targinfo; - - if (matchinfo->mode == XT_CONNMARK_RESTORE) { - if (strcmp(par->table, "mangle") != 0) { - printk(KERN_WARNING "CONNMARK: restore can only be " - "called from \"mangle\" table, not \"%s\"\n", - par->table); - return false; - } - } - if (matchinfo->mark > 0xffffffff || matchinfo->mask > 0xffffffff) { - printk(KERN_WARNING "CONNMARK: Only supports 32bit mark\n"); - return false; - } - if (nf_ct_l3proto_try_module_get(par->family) < 0) { - printk(KERN_WARNING "can't load conntrack support for " - "proto=%u\n", par->family); - return false; - } - return true; -} - static bool connmark_tg_check(const struct xt_tgchk_param *par) { if (nf_ct_l3proto_try_module_get(par->family) < 0) { @@ -151,74 +88,25 @@ static void connmark_tg_destroy(const struct xt_tgdtor_param *par) nf_ct_l3proto_module_put(par->family); } -#ifdef CONFIG_COMPAT -struct compat_xt_connmark_target_info { - compat_ulong_t mark, mask; - u_int8_t mode; - u_int8_t __pad1; - u_int16_t __pad2; -}; - -static void connmark_tg_compat_from_user_v0(void *dst, void *src) -{ - const struct compat_xt_connmark_target_info *cm = src; - struct xt_connmark_target_info m = { - .mark = cm->mark, - .mask = cm->mask, - .mode = cm->mode, - }; - memcpy(dst, &m, sizeof(m)); -} - -static int connmark_tg_compat_to_user_v0(void __user *dst, void *src) -{ - const struct xt_connmark_target_info *m = src; - struct compat_xt_connmark_target_info cm = { - .mark = m->mark, - .mask = m->mask, - .mode = m->mode, - }; - return copy_to_user(dst, &cm, sizeof(cm)) ? -EFAULT : 0; -} -#endif /* CONFIG_COMPAT */ - -static struct xt_target connmark_tg_reg[] __read_mostly = { - { - .name = "CONNMARK", - .revision = 0, - .family = NFPROTO_UNSPEC, - .checkentry = connmark_tg_check_v0, - .destroy = connmark_tg_destroy, - .target = connmark_tg_v0, - .targetsize = sizeof(struct xt_connmark_target_info), -#ifdef CONFIG_COMPAT - .compatsize = sizeof(struct compat_xt_connmark_target_info), - .compat_from_user = connmark_tg_compat_from_user_v0, - .compat_to_user = connmark_tg_compat_to_user_v0, -#endif - .me = THIS_MODULE - }, - { - .name = "CONNMARK", - .revision = 1, - .family = NFPROTO_UNSPEC, - .checkentry = connmark_tg_check, - .target = connmark_tg, - .targetsize = sizeof(struct xt_connmark_tginfo1), - .destroy = connmark_tg_destroy, - .me = THIS_MODULE, - }, +static struct xt_target connmark_tg_reg __read_mostly = { + .name = "CONNMARK", + .revision = 1, + .family = NFPROTO_UNSPEC, + .checkentry = connmark_tg_check, + .target = connmark_tg, + .targetsize = sizeof(struct xt_connmark_tginfo1), + .destroy = connmark_tg_destroy, + .me = THIS_MODULE, }; static int __init connmark_tg_init(void) { - return xt_register_targets(connmark_tg_reg, - ARRAY_SIZE(connmark_tg_reg)); + return xt_register_target(&connmark_tg_reg); } static void __exit connmark_tg_exit(void) { - xt_unregister_targets(connmark_tg_reg, ARRAY_SIZE(connmark_tg_reg)); + xt_unregister_target(&connmark_tg_reg); } module_init(connmark_tg_init); -- cgit v1.2.3 From c8001f7fd5a4684280fddceed9fae9ea2e4fb521 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Fri, 12 Jun 2009 18:47:32 +0200 Subject: netfilter: xtables: remove xt_MARK v0, v1 Superseded by xt_MARK v2 (v2.6.24-2918-ge0a812a). Signed-off-by: Jan Engelhardt --- Documentation/feature-removal-schedule.txt | 3 - include/linux/netfilter/xt_MARK.h | 17 --- net/netfilter/xt_MARK.c | 163 ++--------------------------- 3 files changed, 9 insertions(+), 174 deletions(-) (limited to 'include/linux') diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 7eccf945d4e0..121e19c9eee6 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -238,9 +238,6 @@ What (Why): - "forwarding" header files like ipt_mac.h in include/linux/netfilter_ipv4/ and include/linux/netfilter_ipv6/ - - xt_MARK target revisions 0 and 1 - (superseded by xt_MARK match revision 2) - - xt_connmark match revision 0 (superseded by xt_connmark match revision 1) diff --git a/include/linux/netfilter/xt_MARK.h b/include/linux/netfilter/xt_MARK.h index 028304bcc0b1..bc9561bdef79 100644 --- a/include/linux/netfilter/xt_MARK.h +++ b/include/linux/netfilter/xt_MARK.h @@ -3,23 +3,6 @@ #include -/* Version 0 */ -struct xt_mark_target_info { - unsigned long mark; -}; - -/* Version 1 */ -enum { - XT_MARK_SET=0, - XT_MARK_AND, - XT_MARK_OR, -}; - -struct xt_mark_target_info_v1 { - unsigned long mark; - __u8 mode; -}; - struct xt_mark_tginfo2 { __u32 mark, mask; }; diff --git a/net/netfilter/xt_MARK.c b/net/netfilter/xt_MARK.c index 67574bcfb8ac..225f8d11e173 100644 --- a/net/netfilter/xt_MARK.c +++ b/net/netfilter/xt_MARK.c @@ -24,39 +24,6 @@ MODULE_DESCRIPTION("Xtables: packet mark modification"); MODULE_ALIAS("ipt_MARK"); MODULE_ALIAS("ip6t_MARK"); -static unsigned int -mark_tg_v0(struct sk_buff *skb, const struct xt_target_param *par) -{ - const struct xt_mark_target_info *markinfo = par->targinfo; - - skb->mark = markinfo->mark; - return XT_CONTINUE; -} - -static unsigned int -mark_tg_v1(struct sk_buff *skb, const struct xt_target_param *par) -{ - const struct xt_mark_target_info_v1 *markinfo = par->targinfo; - int mark = 0; - - switch (markinfo->mode) { - case XT_MARK_SET: - mark = markinfo->mark; - break; - - case XT_MARK_AND: - mark = skb->mark & markinfo->mark; - break; - - case XT_MARK_OR: - mark = skb->mark | markinfo->mark; - break; - } - - skb->mark = mark; - return XT_CONTINUE; -} - static unsigned int mark_tg(struct sk_buff *skb, const struct xt_target_param *par) { @@ -66,135 +33,23 @@ mark_tg(struct sk_buff *skb, const struct xt_target_param *par) return XT_CONTINUE; } -static bool mark_tg_check_v0(const struct xt_tgchk_param *par) -{ - const struct xt_mark_target_info *markinfo = par->targinfo; - - if (markinfo->mark > 0xffffffff) { - printk(KERN_WARNING "MARK: Only supports 32bit wide mark\n"); - return false; - } - return true; -} - -static bool mark_tg_check_v1(const struct xt_tgchk_param *par) -{ - const struct xt_mark_target_info_v1 *markinfo = par->targinfo; - - if (markinfo->mode != XT_MARK_SET - && markinfo->mode != XT_MARK_AND - && markinfo->mode != XT_MARK_OR) { - printk(KERN_WARNING "MARK: unknown mode %u\n", - markinfo->mode); - return false; - } - if (markinfo->mark > 0xffffffff) { - printk(KERN_WARNING "MARK: Only supports 32bit wide mark\n"); - return false; - } - return true; -} - -#ifdef CONFIG_COMPAT -struct compat_xt_mark_target_info { - compat_ulong_t mark; -}; - -static void mark_tg_compat_from_user_v0(void *dst, void *src) -{ - const struct compat_xt_mark_target_info *cm = src; - struct xt_mark_target_info m = { - .mark = cm->mark, - }; - memcpy(dst, &m, sizeof(m)); -} - -static int mark_tg_compat_to_user_v0(void __user *dst, void *src) -{ - const struct xt_mark_target_info *m = src; - struct compat_xt_mark_target_info cm = { - .mark = m->mark, - }; - return copy_to_user(dst, &cm, sizeof(cm)) ? -EFAULT : 0; -} - -struct compat_xt_mark_target_info_v1 { - compat_ulong_t mark; - u_int8_t mode; - u_int8_t __pad1; - u_int16_t __pad2; -}; - -static void mark_tg_compat_from_user_v1(void *dst, void *src) -{ - const struct compat_xt_mark_target_info_v1 *cm = src; - struct xt_mark_target_info_v1 m = { - .mark = cm->mark, - .mode = cm->mode, - }; - memcpy(dst, &m, sizeof(m)); -} - -static int mark_tg_compat_to_user_v1(void __user *dst, void *src) -{ - const struct xt_mark_target_info_v1 *m = src; - struct compat_xt_mark_target_info_v1 cm = { - .mark = m->mark, - .mode = m->mode, - }; - return copy_to_user(dst, &cm, sizeof(cm)) ? -EFAULT : 0; -} -#endif /* CONFIG_COMPAT */ - -static struct xt_target mark_tg_reg[] __read_mostly = { - { - .name = "MARK", - .family = NFPROTO_UNSPEC, - .revision = 0, - .checkentry = mark_tg_check_v0, - .target = mark_tg_v0, - .targetsize = sizeof(struct xt_mark_target_info), -#ifdef CONFIG_COMPAT - .compatsize = sizeof(struct compat_xt_mark_target_info), - .compat_from_user = mark_tg_compat_from_user_v0, - .compat_to_user = mark_tg_compat_to_user_v0, -#endif - .table = "mangle", - .me = THIS_MODULE, - }, - { - .name = "MARK", - .family = NFPROTO_UNSPEC, - .revision = 1, - .checkentry = mark_tg_check_v1, - .target = mark_tg_v1, - .targetsize = sizeof(struct xt_mark_target_info_v1), -#ifdef CONFIG_COMPAT - .compatsize = sizeof(struct compat_xt_mark_target_info_v1), - .compat_from_user = mark_tg_compat_from_user_v1, - .compat_to_user = mark_tg_compat_to_user_v1, -#endif - .table = "mangle", - .me = THIS_MODULE, - }, - { - .name = "MARK", - .revision = 2, - .family = NFPROTO_UNSPEC, - .target = mark_tg, - .targetsize = sizeof(struct xt_mark_tginfo2), - .me = THIS_MODULE, - }, +static struct xt_target mark_tg_reg __read_mostly = { + .name = "MARK", + .revision = 2, + .family = NFPROTO_UNSPEC, + .target = mark_tg, + .targetsize = sizeof(struct xt_mark_tginfo2), + .me = THIS_MODULE, }; static int __init mark_tg_init(void) { - return xt_register_targets(mark_tg_reg, ARRAY_SIZE(mark_tg_reg)); + return xt_register_target(&mark_tg_reg); } static void __exit mark_tg_exit(void) { - xt_unregister_targets(mark_tg_reg, ARRAY_SIZE(mark_tg_reg)); + xt_unregister_target(&mark_tg_reg); } module_init(mark_tg_init); -- cgit v1.2.3 From 84899a2b9adaf6c2e20d198d7c24562ce6b391d8 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Fri, 12 Jun 2009 18:50:33 +0200 Subject: netfilter: xtables: remove xt_connmark v0 Superseded by xt_connmark v1 (v2.6.24-2919-g96e3227). Signed-off-by: Jan Engelhardt --- Documentation/feature-removal-schedule.txt | 3 - include/linux/netfilter/xt_connmark.h | 5 -- net/netfilter/xt_connmark.c | 101 ++++------------------------- 3 files changed, 11 insertions(+), 98 deletions(-) (limited to 'include/linux') diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 121e19c9eee6..54f935794922 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -238,9 +238,6 @@ What (Why): - "forwarding" header files like ipt_mac.h in include/linux/netfilter_ipv4/ and include/linux/netfilter_ipv6/ - - xt_connmark match revision 0 - (superseded by xt_connmark match revision 1) - - xt_conntrack match revision 0 (superseded by xt_conntrack match revision 1) diff --git a/include/linux/netfilter/xt_connmark.h b/include/linux/netfilter/xt_connmark.h index 571e266d004c..619e47cde01a 100644 --- a/include/linux/netfilter/xt_connmark.h +++ b/include/linux/netfilter/xt_connmark.h @@ -12,11 +12,6 @@ * (at your option) any later version. */ -struct xt_connmark_info { - unsigned long mark, mask; - __u8 invert; -}; - struct xt_connmark_mtinfo1 { __u32 mark, mask; __u8 invert; diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c index 86cacab7a4a3..122aa8b0147b 100644 --- a/net/netfilter/xt_connmark.c +++ b/net/netfilter/xt_connmark.c @@ -47,36 +47,6 @@ connmark_mt(const struct sk_buff *skb, const struct xt_match_param *par) return ((ct->mark & info->mask) == info->mark) ^ info->invert; } -static bool -connmark_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par) -{ - const struct xt_connmark_info *info = par->matchinfo; - const struct nf_conn *ct; - enum ip_conntrack_info ctinfo; - - ct = nf_ct_get(skb, &ctinfo); - if (!ct) - return false; - - return ((ct->mark & info->mask) == info->mark) ^ info->invert; -} - -static bool connmark_mt_check_v0(const struct xt_mtchk_param *par) -{ - const struct xt_connmark_info *cm = par->matchinfo; - - if (cm->mark > 0xffffffff || cm->mask > 0xffffffff) { - printk(KERN_WARNING "connmark: only support 32bit mark\n"); - return false; - } - if (nf_ct_l3proto_try_module_get(par->family) < 0) { - printk(KERN_WARNING "can't load conntrack support for " - "proto=%u\n", par->family); - return false; - } - return true; -} - static bool connmark_mt_check(const struct xt_mtchk_param *par) { if (nf_ct_l3proto_try_module_get(par->family) < 0) { @@ -92,74 +62,25 @@ static void connmark_mt_destroy(const struct xt_mtdtor_param *par) nf_ct_l3proto_module_put(par->family); } -#ifdef CONFIG_COMPAT -struct compat_xt_connmark_info { - compat_ulong_t mark, mask; - u_int8_t invert; - u_int8_t __pad1; - u_int16_t __pad2; -}; - -static void connmark_mt_compat_from_user_v0(void *dst, void *src) -{ - const struct compat_xt_connmark_info *cm = src; - struct xt_connmark_info m = { - .mark = cm->mark, - .mask = cm->mask, - .invert = cm->invert, - }; - memcpy(dst, &m, sizeof(m)); -} - -static int connmark_mt_compat_to_user_v0(void __user *dst, void *src) -{ - const struct xt_connmark_info *m = src; - struct compat_xt_connmark_info cm = { - .mark = m->mark, - .mask = m->mask, - .invert = m->invert, - }; - return copy_to_user(dst, &cm, sizeof(cm)) ? -EFAULT : 0; -} -#endif /* CONFIG_COMPAT */ - -static struct xt_match connmark_mt_reg[] __read_mostly = { - { - .name = "connmark", - .revision = 0, - .family = NFPROTO_UNSPEC, - .checkentry = connmark_mt_check_v0, - .match = connmark_mt_v0, - .destroy = connmark_mt_destroy, - .matchsize = sizeof(struct xt_connmark_info), -#ifdef CONFIG_COMPAT - .compatsize = sizeof(struct compat_xt_connmark_info), - .compat_from_user = connmark_mt_compat_from_user_v0, - .compat_to_user = connmark_mt_compat_to_user_v0, -#endif - .me = THIS_MODULE - }, - { - .name = "connmark", - .revision = 1, - .family = NFPROTO_UNSPEC, - .checkentry = connmark_mt_check, - .match = connmark_mt, - .matchsize = sizeof(struct xt_connmark_mtinfo1), - .destroy = connmark_mt_destroy, - .me = THIS_MODULE, - }, +static struct xt_match connmark_mt_reg __read_mostly = { + .name = "connmark", + .revision = 1, + .family = NFPROTO_UNSPEC, + .checkentry = connmark_mt_check, + .match = connmark_mt, + .matchsize = sizeof(struct xt_connmark_mtinfo1), + .destroy = connmark_mt_destroy, + .me = THIS_MODULE, }; static int __init connmark_mt_init(void) { - return xt_register_matches(connmark_mt_reg, - ARRAY_SIZE(connmark_mt_reg)); + return xt_register_match(&connmark_mt_reg); } static void __exit connmark_mt_exit(void) { - xt_unregister_matches(connmark_mt_reg, ARRAY_SIZE(connmark_mt_reg)); + xt_unregister_match(&connmark_mt_reg); } module_init(connmark_mt_init); -- cgit v1.2.3 From 9e05ec4b1804a1ba51f61fe169aef9b86edcd3f7 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Fri, 12 Jun 2009 18:56:14 +0200 Subject: netfilter: xtables: remove xt_conntrack v0 Superseded by xt_conntrack v1 (v2.6.24-2921-g64eb12f). Signed-off-by: Jan Engelhardt --- Documentation/feature-removal-schedule.txt | 3 - include/linux/netfilter/xt_conntrack.h | 36 ------- net/netfilter/xt_conntrack.c | 155 +---------------------------- 3 files changed, 1 insertion(+), 193 deletions(-) (limited to 'include/linux') diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 54f935794922..6746473ef033 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -238,9 +238,6 @@ What (Why): - "forwarding" header files like ipt_mac.h in include/linux/netfilter_ipv4/ and include/linux/netfilter_ipv6/ - - xt_conntrack match revision 0 - (superseded by xt_conntrack match revision 1) - - xt_iprange match revision 0, include/linux/netfilter_ipv4/ipt_iprange.h (superseded by xt_iprange match revision 1) diff --git a/include/linux/netfilter/xt_conntrack.h b/include/linux/netfilter/xt_conntrack.h index 7ae05338e94c..54f47a2f6152 100644 --- a/include/linux/netfilter/xt_conntrack.h +++ b/include/linux/netfilter/xt_conntrack.h @@ -32,42 +32,6 @@ enum { XT_CONNTRACK_DIRECTION = 1 << 12, }; -/* This is exposed to userspace, so remains frozen in time. */ -struct ip_conntrack_old_tuple -{ - struct { - __be32 ip; - union { - __u16 all; - } u; - } src; - - struct { - __be32 ip; - union { - __u16 all; - } u; - - /* The protocol. */ - __u16 protonum; - } dst; -}; - -struct xt_conntrack_info -{ - unsigned int statemask, statusmask; - - struct ip_conntrack_old_tuple tuple[IP_CT_DIR_MAX]; - struct in_addr sipmsk[IP_CT_DIR_MAX], dipmsk[IP_CT_DIR_MAX]; - - unsigned long expires_min, expires_max; - - /* Flags word */ - __u8 flags; - /* Inverse flags */ - __u8 invflags; -}; - struct xt_conntrack_mtinfo1 { union nf_inet_addr origsrc_addr, origsrc_mask; union nf_inet_addr origdst_addr, origdst_mask; diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c index fc581800698e..6dc4652f2fe8 100644 --- a/net/netfilter/xt_conntrack.c +++ b/net/netfilter/xt_conntrack.c @@ -19,100 +19,11 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Marc Boucher "); -MODULE_AUTHOR("Jan Engelhardt "); +MODULE_AUTHOR("Jan Engelhardt "); MODULE_DESCRIPTION("Xtables: connection tracking state match"); MODULE_ALIAS("ipt_conntrack"); MODULE_ALIAS("ip6t_conntrack"); -static bool -conntrack_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par) -{ - const struct xt_conntrack_info *sinfo = par->matchinfo; - const struct nf_conn *ct; - enum ip_conntrack_info ctinfo; - unsigned int statebit; - - ct = nf_ct_get(skb, &ctinfo); - -#define FWINV(bool, invflg) ((bool) ^ !!(sinfo->invflags & (invflg))) - - if (ct == &nf_conntrack_untracked) - statebit = XT_CONNTRACK_STATE_UNTRACKED; - else if (ct) - statebit = XT_CONNTRACK_STATE_BIT(ctinfo); - else - statebit = XT_CONNTRACK_STATE_INVALID; - - if (sinfo->flags & XT_CONNTRACK_STATE) { - if (ct) { - if (test_bit(IPS_SRC_NAT_BIT, &ct->status)) - statebit |= XT_CONNTRACK_STATE_SNAT; - if (test_bit(IPS_DST_NAT_BIT, &ct->status)) - statebit |= XT_CONNTRACK_STATE_DNAT; - } - if (FWINV((statebit & sinfo->statemask) == 0, - XT_CONNTRACK_STATE)) - return false; - } - - if (ct == NULL) { - if (sinfo->flags & ~XT_CONNTRACK_STATE) - return false; - return true; - } - - if (sinfo->flags & XT_CONNTRACK_PROTO && - FWINV(nf_ct_protonum(ct) != - sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.protonum, - XT_CONNTRACK_PROTO)) - return false; - - if (sinfo->flags & XT_CONNTRACK_ORIGSRC && - FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip & - sinfo->sipmsk[IP_CT_DIR_ORIGINAL].s_addr) != - sinfo->tuple[IP_CT_DIR_ORIGINAL].src.ip, - XT_CONNTRACK_ORIGSRC)) - return false; - - if (sinfo->flags & XT_CONNTRACK_ORIGDST && - FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip & - sinfo->dipmsk[IP_CT_DIR_ORIGINAL].s_addr) != - sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.ip, - XT_CONNTRACK_ORIGDST)) - return false; - - if (sinfo->flags & XT_CONNTRACK_REPLSRC && - FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip & - sinfo->sipmsk[IP_CT_DIR_REPLY].s_addr) != - sinfo->tuple[IP_CT_DIR_REPLY].src.ip, - XT_CONNTRACK_REPLSRC)) - return false; - - if (sinfo->flags & XT_CONNTRACK_REPLDST && - FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip & - sinfo->dipmsk[IP_CT_DIR_REPLY].s_addr) != - sinfo->tuple[IP_CT_DIR_REPLY].dst.ip, - XT_CONNTRACK_REPLDST)) - return false; - - if (sinfo->flags & XT_CONNTRACK_STATUS && - FWINV((ct->status & sinfo->statusmask) == 0, - XT_CONNTRACK_STATUS)) - return false; - - if(sinfo->flags & XT_CONNTRACK_EXPIRES) { - unsigned long expires = timer_pending(&ct->timeout) ? - (ct->timeout.expires - jiffies)/HZ : 0; - - if (FWINV(!(expires >= sinfo->expires_min && - expires <= sinfo->expires_max), - XT_CONNTRACK_EXPIRES)) - return false; - } - return true; -#undef FWINV -} - static bool conntrack_addrcmp(const union nf_inet_addr *kaddr, const union nf_inet_addr *uaddr, @@ -337,71 +248,7 @@ static void conntrack_mt_destroy_v1(const struct xt_mtdtor_param *par) conntrack_mt_destroy(par); } -#ifdef CONFIG_COMPAT -struct compat_xt_conntrack_info -{ - compat_uint_t statemask; - compat_uint_t statusmask; - struct ip_conntrack_old_tuple tuple[IP_CT_DIR_MAX]; - struct in_addr sipmsk[IP_CT_DIR_MAX]; - struct in_addr dipmsk[IP_CT_DIR_MAX]; - compat_ulong_t expires_min; - compat_ulong_t expires_max; - u_int8_t flags; - u_int8_t invflags; -}; - -static void conntrack_mt_compat_from_user_v0(void *dst, void *src) -{ - const struct compat_xt_conntrack_info *cm = src; - struct xt_conntrack_info m = { - .statemask = cm->statemask, - .statusmask = cm->statusmask, - .expires_min = cm->expires_min, - .expires_max = cm->expires_max, - .flags = cm->flags, - .invflags = cm->invflags, - }; - memcpy(m.tuple, cm->tuple, sizeof(m.tuple)); - memcpy(m.sipmsk, cm->sipmsk, sizeof(m.sipmsk)); - memcpy(m.dipmsk, cm->dipmsk, sizeof(m.dipmsk)); - memcpy(dst, &m, sizeof(m)); -} - -static int conntrack_mt_compat_to_user_v0(void __user *dst, void *src) -{ - const struct xt_conntrack_info *m = src; - struct compat_xt_conntrack_info cm = { - .statemask = m->statemask, - .statusmask = m->statusmask, - .expires_min = m->expires_min, - .expires_max = m->expires_max, - .flags = m->flags, - .invflags = m->invflags, - }; - memcpy(cm.tuple, m->tuple, sizeof(cm.tuple)); - memcpy(cm.sipmsk, m->sipmsk, sizeof(cm.sipmsk)); - memcpy(cm.dipmsk, m->dipmsk, sizeof(cm.dipmsk)); - return copy_to_user(dst, &cm, sizeof(cm)) ? -EFAULT : 0; -} -#endif - static struct xt_match conntrack_mt_reg[] __read_mostly = { - { - .name = "conntrack", - .revision = 0, - .family = NFPROTO_IPV4, - .match = conntrack_mt_v0, - .checkentry = conntrack_mt_check, - .destroy = conntrack_mt_destroy, - .matchsize = sizeof(struct xt_conntrack_info), - .me = THIS_MODULE, -#ifdef CONFIG_COMPAT - .compatsize = sizeof(struct compat_xt_conntrack_info), - .compat_from_user = conntrack_mt_compat_from_user_v0, - .compat_to_user = conntrack_mt_compat_to_user_v0, -#endif - }, { .name = "conntrack", .revision = 1, -- cgit v1.2.3 From 36d4084dc8eb7a9a3655a2041097a46aff3061e9 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Fri, 12 Jun 2009 18:58:19 +0200 Subject: netfilter: xtables: remove xt_iprange v0 Superseded by xt_iprange v1 (v2.6.24-2928-g1a50c5a1). Signed-off-by: Jan Engelhardt --- Documentation/feature-removal-schedule.txt | 4 --- include/linux/netfilter_ipv4/Kbuild | 1 - include/linux/netfilter_ipv4/ipt_iprange.h | 21 -------------- net/netfilter/xt_iprange.c | 45 ++---------------------------- 4 files changed, 2 insertions(+), 69 deletions(-) delete mode 100644 include/linux/netfilter_ipv4/ipt_iprange.h (limited to 'include/linux') diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 6746473ef033..8862b037f0ac 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -238,10 +238,6 @@ What (Why): - "forwarding" header files like ipt_mac.h in include/linux/netfilter_ipv4/ and include/linux/netfilter_ipv6/ - - xt_iprange match revision 0, - include/linux/netfilter_ipv4/ipt_iprange.h - (superseded by xt_iprange match revision 1) - - xt_mark match revision 0 (superseded by xt_mark match revision 1) diff --git a/include/linux/netfilter_ipv4/Kbuild b/include/linux/netfilter_ipv4/Kbuild index 86d81a285f9c..5e361ef44e87 100644 --- a/include/linux/netfilter_ipv4/Kbuild +++ b/include/linux/netfilter_ipv4/Kbuild @@ -23,7 +23,6 @@ header-y += ipt_ecn.h header-y += ipt_esp.h header-y += ipt_hashlimit.h header-y += ipt_helper.h -header-y += ipt_iprange.h header-y += ipt_length.h header-y += ipt_limit.h header-y += ipt_mac.h diff --git a/include/linux/netfilter_ipv4/ipt_iprange.h b/include/linux/netfilter_ipv4/ipt_iprange.h deleted file mode 100644 index 5f1aebde4d2f..000000000000 --- a/include/linux/netfilter_ipv4/ipt_iprange.h +++ /dev/null @@ -1,21 +0,0 @@ -#ifndef _IPT_IPRANGE_H -#define _IPT_IPRANGE_H - -#include -#include - -struct ipt_iprange { - /* Inclusive: network order. */ - __be32 min_ip, max_ip; -}; - -struct ipt_iprange_info -{ - struct ipt_iprange src; - struct ipt_iprange dst; - - /* Flags from above */ - u_int8_t flags; -}; - -#endif /* _IPT_IPRANGE_H */ diff --git a/net/netfilter/xt_iprange.c b/net/netfilter/xt_iprange.c index 501f9b623188..ffc96387d556 100644 --- a/net/netfilter/xt_iprange.c +++ b/net/netfilter/xt_iprange.c @@ -14,40 +14,6 @@ #include #include #include -#include - -static bool -iprange_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par) -{ - const struct ipt_iprange_info *info = par->matchinfo; - const struct iphdr *iph = ip_hdr(skb); - - if (info->flags & IPRANGE_SRC) { - if ((ntohl(iph->saddr) < ntohl(info->src.min_ip) - || ntohl(iph->saddr) > ntohl(info->src.max_ip)) - ^ !!(info->flags & IPRANGE_SRC_INV)) { - pr_debug("src IP %pI4 NOT in range %s%pI4-%pI4\n", - &iph->saddr, - info->flags & IPRANGE_SRC_INV ? "(INV) " : "", - &info->src.min_ip, - &info->src.max_ip); - return false; - } - } - if (info->flags & IPRANGE_DST) { - if ((ntohl(iph->daddr) < ntohl(info->dst.min_ip) - || ntohl(iph->daddr) > ntohl(info->dst.max_ip)) - ^ !!(info->flags & IPRANGE_DST_INV)) { - pr_debug("dst IP %pI4 NOT in range %s%pI4-%pI4\n", - &iph->daddr, - info->flags & IPRANGE_DST_INV ? "(INV) " : "", - &info->dst.min_ip, - &info->dst.max_ip); - return false; - } - } - return true; -} static bool iprange_mt4(const struct sk_buff *skb, const struct xt_match_param *par) @@ -125,14 +91,6 @@ iprange_mt6(const struct sk_buff *skb, const struct xt_match_param *par) } static struct xt_match iprange_mt_reg[] __read_mostly = { - { - .name = "iprange", - .revision = 0, - .family = NFPROTO_IPV4, - .match = iprange_mt_v0, - .matchsize = sizeof(struct ipt_iprange_info), - .me = THIS_MODULE, - }, { .name = "iprange", .revision = 1, @@ -164,7 +122,8 @@ static void __exit iprange_mt_exit(void) module_init(iprange_mt_init); module_exit(iprange_mt_exit); MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Jozsef Kadlecsik , Jan Engelhardt "); +MODULE_AUTHOR("Jozsef Kadlecsik "); +MODULE_AUTHOR("Jan Engelhardt "); MODULE_DESCRIPTION("Xtables: arbitrary IPv4 range matching"); MODULE_ALIAS("ipt_iprange"); MODULE_ALIAS("ip6t_iprange"); -- cgit v1.2.3 From 4725c7287ef2c4340cb433f59e40d143c1f43c22 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Fri, 12 Jun 2009 19:02:27 +0200 Subject: netfilter: xtables: remove xt_mark v0 Superseded by xt_mark v1 (v2.6.24-2922-g17b0d7e). Signed-off-by: Jan Engelhardt --- Documentation/feature-removal-schedule.txt | 3 -- include/linux/netfilter/xt_mark.h | 5 -- net/netfilter/xt_mark.c | 86 ++++-------------------------- 3 files changed, 10 insertions(+), 84 deletions(-) (limited to 'include/linux') diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 8862b037f0ac..5556d2300bea 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -238,9 +238,6 @@ What (Why): - "forwarding" header files like ipt_mac.h in include/linux/netfilter_ipv4/ and include/linux/netfilter_ipv6/ - - xt_mark match revision 0 - (superseded by xt_mark match revision 1) - - xt_recent: the old ipt_recent proc dir (superseded by /proc/net/xt_recent) diff --git a/include/linux/netfilter/xt_mark.h b/include/linux/netfilter/xt_mark.h index 6fa460a3cc29..6607c8f38ea5 100644 --- a/include/linux/netfilter/xt_mark.h +++ b/include/linux/netfilter/xt_mark.h @@ -3,11 +3,6 @@ #include -struct xt_mark_info { - unsigned long mark, mask; - __u8 invert; -}; - struct xt_mark_mtinfo1 { __u32 mark, mask; __u8 invert; diff --git a/net/netfilter/xt_mark.c b/net/netfilter/xt_mark.c index 10b9e34bbc5b..1db07d8125f8 100644 --- a/net/netfilter/xt_mark.c +++ b/net/netfilter/xt_mark.c @@ -3,7 +3,7 @@ * * (C) 1999-2001 Marc Boucher * Copyright © CC Computer Consultants GmbH, 2007 - 2008 - * Jan Engelhardt + * Jan Engelhardt * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -22,14 +22,6 @@ MODULE_DESCRIPTION("Xtables: packet mark match"); MODULE_ALIAS("ipt_mark"); MODULE_ALIAS("ip6t_mark"); -static bool -mark_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par) -{ - const struct xt_mark_info *info = par->matchinfo; - - return ((skb->mark & info->mask) == info->mark) ^ info->invert; -} - static bool mark_mt(const struct sk_buff *skb, const struct xt_match_param *par) { @@ -38,81 +30,23 @@ mark_mt(const struct sk_buff *skb, const struct xt_match_param *par) return ((skb->mark & info->mask) == info->mark) ^ info->invert; } -static bool mark_mt_check_v0(const struct xt_mtchk_param *par) -{ - const struct xt_mark_info *minfo = par->matchinfo; - - if (minfo->mark > 0xffffffff || minfo->mask > 0xffffffff) { - printk(KERN_WARNING "mark: only supports 32bit mark\n"); - return false; - } - return true; -} - -#ifdef CONFIG_COMPAT -struct compat_xt_mark_info { - compat_ulong_t mark, mask; - u_int8_t invert; - u_int8_t __pad1; - u_int16_t __pad2; -}; - -static void mark_mt_compat_from_user_v0(void *dst, void *src) -{ - const struct compat_xt_mark_info *cm = src; - struct xt_mark_info m = { - .mark = cm->mark, - .mask = cm->mask, - .invert = cm->invert, - }; - memcpy(dst, &m, sizeof(m)); -} - -static int mark_mt_compat_to_user_v0(void __user *dst, void *src) -{ - const struct xt_mark_info *m = src; - struct compat_xt_mark_info cm = { - .mark = m->mark, - .mask = m->mask, - .invert = m->invert, - }; - return copy_to_user(dst, &cm, sizeof(cm)) ? -EFAULT : 0; -} -#endif /* CONFIG_COMPAT */ - -static struct xt_match mark_mt_reg[] __read_mostly = { - { - .name = "mark", - .revision = 0, - .family = NFPROTO_UNSPEC, - .checkentry = mark_mt_check_v0, - .match = mark_mt_v0, - .matchsize = sizeof(struct xt_mark_info), -#ifdef CONFIG_COMPAT - .compatsize = sizeof(struct compat_xt_mark_info), - .compat_from_user = mark_mt_compat_from_user_v0, - .compat_to_user = mark_mt_compat_to_user_v0, -#endif - .me = THIS_MODULE, - }, - { - .name = "mark", - .revision = 1, - .family = NFPROTO_UNSPEC, - .match = mark_mt, - .matchsize = sizeof(struct xt_mark_mtinfo1), - .me = THIS_MODULE, - }, +static struct xt_match mark_mt_reg __read_mostly = { + .name = "mark", + .revision = 1, + .family = NFPROTO_UNSPEC, + .match = mark_mt, + .matchsize = sizeof(struct xt_mark_mtinfo1), + .me = THIS_MODULE, }; static int __init mark_mt_init(void) { - return xt_register_matches(mark_mt_reg, ARRAY_SIZE(mark_mt_reg)); + return xt_register_match(&mark_mt_reg); } static void __exit mark_mt_exit(void) { - xt_unregister_matches(mark_mt_reg, ARRAY_SIZE(mark_mt_reg)); + xt_unregister_match(&mark_mt_reg); } module_init(mark_mt_init); -- cgit v1.2.3 From 6461caed83412ae3e9a16785ffa64396fb66c6a6 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Fri, 12 Jun 2009 19:46:26 +0200 Subject: netfilter: xtables: remove xt_owner v0 Superseded by xt_owner v1 (v2.6.24-2388-g0265ab4). Signed-off-by: Jan Engelhardt --- include/linux/netfilter_ipv4/Kbuild | 1 - include/linux/netfilter_ipv4/ipt_owner.h | 20 ----- include/linux/netfilter_ipv6/Kbuild | 1 - include/linux/netfilter_ipv6/ip6t_owner.h | 18 ----- net/netfilter/xt_owner.c | 130 +++--------------------------- 5 files changed, 12 insertions(+), 158 deletions(-) delete mode 100644 include/linux/netfilter_ipv4/ipt_owner.h delete mode 100644 include/linux/netfilter_ipv6/ip6t_owner.h (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv4/Kbuild b/include/linux/netfilter_ipv4/Kbuild index 5e361ef44e87..541300531cb3 100644 --- a/include/linux/netfilter_ipv4/Kbuild +++ b/include/linux/netfilter_ipv4/Kbuild @@ -28,7 +28,6 @@ header-y += ipt_limit.h header-y += ipt_mac.h header-y += ipt_mark.h header-y += ipt_multiport.h -header-y += ipt_owner.h header-y += ipt_physdev.h header-y += ipt_pkttype.h header-y += ipt_policy.h diff --git a/include/linux/netfilter_ipv4/ipt_owner.h b/include/linux/netfilter_ipv4/ipt_owner.h deleted file mode 100644 index a78445be9992..000000000000 --- a/include/linux/netfilter_ipv4/ipt_owner.h +++ /dev/null @@ -1,20 +0,0 @@ -#ifndef _IPT_OWNER_H -#define _IPT_OWNER_H - -/* match and invert flags */ -#define IPT_OWNER_UID 0x01 -#define IPT_OWNER_GID 0x02 -#define IPT_OWNER_PID 0x04 -#define IPT_OWNER_SID 0x08 -#define IPT_OWNER_COMM 0x10 - -struct ipt_owner_info { - __kernel_uid32_t uid; - __kernel_gid32_t gid; - __kernel_pid_t pid; - __kernel_pid_t sid; - char comm[16]; - u_int8_t match, invert; /* flags */ -}; - -#endif /*_IPT_OWNER_H*/ diff --git a/include/linux/netfilter_ipv6/Kbuild b/include/linux/netfilter_ipv6/Kbuild index aca4bd1f6d7c..4610a16da0ab 100644 --- a/include/linux/netfilter_ipv6/Kbuild +++ b/include/linux/netfilter_ipv6/Kbuild @@ -14,7 +14,6 @@ header-y += ip6t_mark.h header-y += ip6t_mh.h header-y += ip6t_multiport.h header-y += ip6t_opts.h -header-y += ip6t_owner.h header-y += ip6t_physdev.h header-y += ip6t_policy.h header-y += ip6t_rt.h diff --git a/include/linux/netfilter_ipv6/ip6t_owner.h b/include/linux/netfilter_ipv6/ip6t_owner.h deleted file mode 100644 index ec5cc7a38c42..000000000000 --- a/include/linux/netfilter_ipv6/ip6t_owner.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef _IP6T_OWNER_H -#define _IP6T_OWNER_H - -/* match and invert flags */ -#define IP6T_OWNER_UID 0x01 -#define IP6T_OWNER_GID 0x02 -#define IP6T_OWNER_PID 0x04 -#define IP6T_OWNER_SID 0x08 - -struct ip6t_owner_info { - __kernel_uid32_t uid; - __kernel_gid32_t gid; - __kernel_pid_t pid; - __kernel_pid_t sid; - u_int8_t match, invert; /* flags */ -}; - -#endif /*_IPT_OWNER_H*/ diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c index 22b2a5e881ea..d24c76dffee2 100644 --- a/net/netfilter/xt_owner.c +++ b/net/netfilter/xt_owner.c @@ -5,7 +5,6 @@ * (C) 2000 Marc Boucher * * Copyright © CC Computer Consultants GmbH, 2007 - 2008 - * * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -17,60 +16,6 @@ #include #include #include -#include -#include - -static bool -owner_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par) -{ - const struct ipt_owner_info *info = par->matchinfo; - const struct file *filp; - - if (skb->sk == NULL || skb->sk->sk_socket == NULL) - return false; - - filp = skb->sk->sk_socket->file; - if (filp == NULL) - return false; - - if (info->match & IPT_OWNER_UID) - if ((filp->f_cred->fsuid != info->uid) ^ - !!(info->invert & IPT_OWNER_UID)) - return false; - - if (info->match & IPT_OWNER_GID) - if ((filp->f_cred->fsgid != info->gid) ^ - !!(info->invert & IPT_OWNER_GID)) - return false; - - return true; -} - -static bool -owner_mt6_v0(const struct sk_buff *skb, const struct xt_match_param *par) -{ - const struct ip6t_owner_info *info = par->matchinfo; - const struct file *filp; - - if (skb->sk == NULL || skb->sk->sk_socket == NULL) - return false; - - filp = skb->sk->sk_socket->file; - if (filp == NULL) - return false; - - if (info->match & IP6T_OWNER_UID) - if ((filp->f_cred->fsuid != info->uid) ^ - !!(info->invert & IP6T_OWNER_UID)) - return false; - - if (info->match & IP6T_OWNER_GID) - if ((filp->f_cred->fsgid != info->gid) ^ - !!(info->invert & IP6T_OWNER_GID)) - return false; - - return true; -} static bool owner_mt(const struct sk_buff *skb, const struct xt_match_param *par) @@ -107,81 +52,30 @@ owner_mt(const struct sk_buff *skb, const struct xt_match_param *par) return true; } -static bool owner_mt_check_v0(const struct xt_mtchk_param *par) -{ - const struct ipt_owner_info *info = par->matchinfo; - - if (info->match & (IPT_OWNER_PID | IPT_OWNER_SID | IPT_OWNER_COMM)) { - printk(KERN_WARNING KBUILD_MODNAME - ": PID, SID and command matching is not " - "supported anymore\n"); - return false; - } - - return true; -} - -static bool owner_mt6_check_v0(const struct xt_mtchk_param *par) -{ - const struct ip6t_owner_info *info = par->matchinfo; - - if (info->match & (IP6T_OWNER_PID | IP6T_OWNER_SID)) { - printk(KERN_WARNING KBUILD_MODNAME - ": PID and SID matching is not supported anymore\n"); - return false; - } - - return true; -} - -static struct xt_match owner_mt_reg[] __read_mostly = { - { - .name = "owner", - .revision = 0, - .family = NFPROTO_IPV4, - .match = owner_mt_v0, - .matchsize = sizeof(struct ipt_owner_info), - .checkentry = owner_mt_check_v0, - .hooks = (1 << NF_INET_LOCAL_OUT) | - (1 << NF_INET_POST_ROUTING), - .me = THIS_MODULE, - }, - { - .name = "owner", - .revision = 0, - .family = NFPROTO_IPV6, - .match = owner_mt6_v0, - .matchsize = sizeof(struct ip6t_owner_info), - .checkentry = owner_mt6_check_v0, - .hooks = (1 << NF_INET_LOCAL_OUT) | - (1 << NF_INET_POST_ROUTING), - .me = THIS_MODULE, - }, - { - .name = "owner", - .revision = 1, - .family = NFPROTO_UNSPEC, - .match = owner_mt, - .matchsize = sizeof(struct xt_owner_match_info), - .hooks = (1 << NF_INET_LOCAL_OUT) | - (1 << NF_INET_POST_ROUTING), - .me = THIS_MODULE, - }, +static struct xt_match owner_mt_reg __read_mostly = { + .name = "owner", + .revision = 1, + .family = NFPROTO_UNSPEC, + .match = owner_mt, + .matchsize = sizeof(struct xt_owner_match_info), + .hooks = (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_POST_ROUTING), + .me = THIS_MODULE, }; static int __init owner_mt_init(void) { - return xt_register_matches(owner_mt_reg, ARRAY_SIZE(owner_mt_reg)); + return xt_register_match(&owner_mt_reg); } static void __exit owner_mt_exit(void) { - xt_unregister_matches(owner_mt_reg, ARRAY_SIZE(owner_mt_reg)); + xt_unregister_match(&owner_mt_reg); } module_init(owner_mt_init); module_exit(owner_mt_exit); -MODULE_AUTHOR("Jan Engelhardt "); +MODULE_AUTHOR("Jan Engelhardt "); MODULE_DESCRIPTION("Xtables: socket owner matching"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ipt_owner"); -- cgit v1.2.3 From 93bb1e9d117bfc60306b2b8bd9e0fa7ba3c88636 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Fri, 12 Jun 2009 19:47:21 +0200 Subject: netfilter: xtables: remove redirecting header files When IPv4 and IPv6 matches were unified approx. 3.5 years ago, they received new header filenames (e.g. xt_CLASSIFY.h). Let's remove the old ones now. Signed-off-by: Jan Engelhardt --- Documentation/feature-removal-schedule.txt | 3 - include/linux/netfilter_ipv4/Kbuild | 28 ------- include/linux/netfilter_ipv4/ipt_CLASSIFY.h | 7 -- include/linux/netfilter_ipv4/ipt_CONNMARK.h | 19 ----- include/linux/netfilter_ipv4/ipt_DSCP.h | 18 ----- include/linux/netfilter_ipv4/ipt_ECN.h | 4 +- include/linux/netfilter_ipv4/ipt_MARK.h | 18 ----- include/linux/netfilter_ipv4/ipt_NFQUEUE.h | 16 ---- include/linux/netfilter_ipv4/ipt_TCPMSS.h | 9 --- include/linux/netfilter_ipv4/ipt_comment.h | 10 --- include/linux/netfilter_ipv4/ipt_connbytes.h | 18 ----- include/linux/netfilter_ipv4/ipt_connmark.h | 7 -- include/linux/netfilter_ipv4/ipt_conntrack.h | 28 ------- include/linux/netfilter_ipv4/ipt_dccp.h | 15 ---- include/linux/netfilter_ipv4/ipt_dscp.h | 21 ------ include/linux/netfilter_ipv4/ipt_ecn.h | 4 +- include/linux/netfilter_ipv4/ipt_esp.h | 10 --- include/linux/netfilter_ipv4/ipt_hashlimit.h | 14 ---- include/linux/netfilter_ipv4/ipt_helper.h | 7 -- include/linux/netfilter_ipv4/ipt_length.h | 7 -- include/linux/netfilter_ipv4/ipt_limit.h | 8 -- include/linux/netfilter_ipv4/ipt_mac.h | 7 -- include/linux/netfilter_ipv4/ipt_mark.h | 9 --- include/linux/netfilter_ipv4/ipt_multiport.h | 15 ---- include/linux/netfilter_ipv4/ipt_physdev.h | 17 ----- include/linux/netfilter_ipv4/ipt_pkttype.h | 7 -- include/linux/netfilter_ipv4/ipt_policy.h | 23 ------ include/linux/netfilter_ipv4/ipt_recent.h | 21 ------ include/linux/netfilter_ipv4/ipt_sctp.h | 105 -------------------------- include/linux/netfilter_ipv4/ipt_state.h | 15 ---- include/linux/netfilter_ipv4/ipt_string.h | 10 --- include/linux/netfilter_ipv4/ipt_tcpmss.h | 7 -- include/linux/netfilter_ipv6/Kbuild | 11 +-- include/linux/netfilter_ipv6/ip6t_MARK.h | 9 --- include/linux/netfilter_ipv6/ip6t_esp.h | 10 --- include/linux/netfilter_ipv6/ip6t_length.h | 8 -- include/linux/netfilter_ipv6/ip6t_limit.h | 8 -- include/linux/netfilter_ipv6/ip6t_mac.h | 7 -- include/linux/netfilter_ipv6/ip6t_mark.h | 9 --- include/linux/netfilter_ipv6/ip6t_multiport.h | 14 ---- include/linux/netfilter_ipv6/ip6t_physdev.h | 17 ----- include/linux/netfilter_ipv6/ip6t_policy.h | 23 ------ 42 files changed, 5 insertions(+), 618 deletions(-) delete mode 100644 include/linux/netfilter_ipv4/ipt_CLASSIFY.h delete mode 100644 include/linux/netfilter_ipv4/ipt_CONNMARK.h delete mode 100644 include/linux/netfilter_ipv4/ipt_DSCP.h delete mode 100644 include/linux/netfilter_ipv4/ipt_MARK.h delete mode 100644 include/linux/netfilter_ipv4/ipt_NFQUEUE.h delete mode 100644 include/linux/netfilter_ipv4/ipt_TCPMSS.h delete mode 100644 include/linux/netfilter_ipv4/ipt_comment.h delete mode 100644 include/linux/netfilter_ipv4/ipt_connbytes.h delete mode 100644 include/linux/netfilter_ipv4/ipt_connmark.h delete mode 100644 include/linux/netfilter_ipv4/ipt_conntrack.h delete mode 100644 include/linux/netfilter_ipv4/ipt_dccp.h delete mode 100644 include/linux/netfilter_ipv4/ipt_dscp.h delete mode 100644 include/linux/netfilter_ipv4/ipt_esp.h delete mode 100644 include/linux/netfilter_ipv4/ipt_hashlimit.h delete mode 100644 include/linux/netfilter_ipv4/ipt_helper.h delete mode 100644 include/linux/netfilter_ipv4/ipt_length.h delete mode 100644 include/linux/netfilter_ipv4/ipt_limit.h delete mode 100644 include/linux/netfilter_ipv4/ipt_mac.h delete mode 100644 include/linux/netfilter_ipv4/ipt_mark.h delete mode 100644 include/linux/netfilter_ipv4/ipt_multiport.h delete mode 100644 include/linux/netfilter_ipv4/ipt_physdev.h delete mode 100644 include/linux/netfilter_ipv4/ipt_pkttype.h delete mode 100644 include/linux/netfilter_ipv4/ipt_policy.h delete mode 100644 include/linux/netfilter_ipv4/ipt_recent.h delete mode 100644 include/linux/netfilter_ipv4/ipt_sctp.h delete mode 100644 include/linux/netfilter_ipv4/ipt_state.h delete mode 100644 include/linux/netfilter_ipv4/ipt_string.h delete mode 100644 include/linux/netfilter_ipv4/ipt_tcpmss.h delete mode 100644 include/linux/netfilter_ipv6/ip6t_MARK.h delete mode 100644 include/linux/netfilter_ipv6/ip6t_esp.h delete mode 100644 include/linux/netfilter_ipv6/ip6t_length.h delete mode 100644 include/linux/netfilter_ipv6/ip6t_limit.h delete mode 100644 include/linux/netfilter_ipv6/ip6t_mac.h delete mode 100644 include/linux/netfilter_ipv6/ip6t_mark.h delete mode 100644 include/linux/netfilter_ipv6/ip6t_multiport.h delete mode 100644 include/linux/netfilter_ipv6/ip6t_physdev.h delete mode 100644 include/linux/netfilter_ipv6/ip6t_policy.h (limited to 'include/linux') diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 5556d2300bea..698e1e8b3042 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -235,9 +235,6 @@ Who: Thomas Gleixner --------------------------- What (Why): - - "forwarding" header files like ipt_mac.h in - include/linux/netfilter_ipv4/ and include/linux/netfilter_ipv6/ - - xt_recent: the old ipt_recent proc dir (superseded by /proc/net/xt_recent) diff --git a/include/linux/netfilter_ipv4/Kbuild b/include/linux/netfilter_ipv4/Kbuild index 541300531cb3..431b40761920 100644 --- a/include/linux/netfilter_ipv4/Kbuild +++ b/include/linux/netfilter_ipv4/Kbuild @@ -1,42 +1,14 @@ -header-y += ipt_CLASSIFY.h header-y += ipt_CLUSTERIP.h -header-y += ipt_CONNMARK.h -header-y += ipt_DSCP.h header-y += ipt_ECN.h header-y += ipt_LOG.h -header-y += ipt_MARK.h -header-y += ipt_NFQUEUE.h header-y += ipt_REJECT.h header-y += ipt_SAME.h -header-y += ipt_TCPMSS.h header-y += ipt_TTL.h header-y += ipt_ULOG.h header-y += ipt_addrtype.h header-y += ipt_ah.h -header-y += ipt_comment.h -header-y += ipt_connbytes.h -header-y += ipt_connmark.h -header-y += ipt_conntrack.h -header-y += ipt_dccp.h -header-y += ipt_dscp.h header-y += ipt_ecn.h -header-y += ipt_esp.h -header-y += ipt_hashlimit.h -header-y += ipt_helper.h -header-y += ipt_length.h -header-y += ipt_limit.h -header-y += ipt_mac.h -header-y += ipt_mark.h -header-y += ipt_multiport.h -header-y += ipt_physdev.h -header-y += ipt_pkttype.h -header-y += ipt_policy.h header-y += ipt_realm.h -header-y += ipt_recent.h -header-y += ipt_sctp.h -header-y += ipt_state.h -header-y += ipt_string.h -header-y += ipt_tcpmss.h header-y += ipt_ttl.h unifdef-y += ip_queue.h diff --git a/include/linux/netfilter_ipv4/ipt_CLASSIFY.h b/include/linux/netfilter_ipv4/ipt_CLASSIFY.h deleted file mode 100644 index a46d511b5c36..000000000000 --- a/include/linux/netfilter_ipv4/ipt_CLASSIFY.h +++ /dev/null @@ -1,7 +0,0 @@ -#ifndef _IPT_CLASSIFY_H -#define _IPT_CLASSIFY_H - -#include -#define ipt_classify_target_info xt_classify_target_info - -#endif /*_IPT_CLASSIFY_H */ diff --git a/include/linux/netfilter_ipv4/ipt_CONNMARK.h b/include/linux/netfilter_ipv4/ipt_CONNMARK.h deleted file mode 100644 index 9ecfee0a9e33..000000000000 --- a/include/linux/netfilter_ipv4/ipt_CONNMARK.h +++ /dev/null @@ -1,19 +0,0 @@ -#ifndef _IPT_CONNMARK_H_target -#define _IPT_CONNMARK_H_target - -/* Copyright (C) 2002,2004 MARA Systems AB - * by Henrik Nordstrom - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ -#include -#define IPT_CONNMARK_SET XT_CONNMARK_SET -#define IPT_CONNMARK_SAVE XT_CONNMARK_SAVE -#define IPT_CONNMARK_RESTORE XT_CONNMARK_RESTORE - -#define ipt_connmark_target_info xt_connmark_target_info - -#endif /*_IPT_CONNMARK_H_target*/ diff --git a/include/linux/netfilter_ipv4/ipt_DSCP.h b/include/linux/netfilter_ipv4/ipt_DSCP.h deleted file mode 100644 index 3491e524d5ea..000000000000 --- a/include/linux/netfilter_ipv4/ipt_DSCP.h +++ /dev/null @@ -1,18 +0,0 @@ -/* iptables module for setting the IPv4 DSCP field - * - * (C) 2002 Harald Welte - * based on ipt_FTOS.c (C) 2000 by Matthew G. Marsh - * This software is distributed under GNU GPL v2, 1991 - * - * See RFC2474 for a description of the DSCP field within the IP Header. - * - * ipt_DSCP.h,v 1.7 2002/03/14 12:03:13 laforge Exp -*/ -#ifndef _IPT_DSCP_TARGET_H -#define _IPT_DSCP_TARGET_H -#include -#include - -#define ipt_DSCP_info xt_DSCP_info - -#endif /* _IPT_DSCP_TARGET_H */ diff --git a/include/linux/netfilter_ipv4/ipt_ECN.h b/include/linux/netfilter_ipv4/ipt_ECN.h index 94e0d9866469..7ca45918ab8e 100644 --- a/include/linux/netfilter_ipv4/ipt_ECN.h +++ b/include/linux/netfilter_ipv4/ipt_ECN.h @@ -8,9 +8,9 @@ */ #ifndef _IPT_ECN_TARGET_H #define _IPT_ECN_TARGET_H -#include +#include -#define IPT_ECN_IP_MASK (~IPT_DSCP_MASK) +#define IPT_ECN_IP_MASK (~XT_DSCP_MASK) #define IPT_ECN_OP_SET_IP 0x01 /* set ECN bits of IPv4 header */ #define IPT_ECN_OP_SET_ECE 0x10 /* set ECE bit of TCP header */ diff --git a/include/linux/netfilter_ipv4/ipt_MARK.h b/include/linux/netfilter_ipv4/ipt_MARK.h deleted file mode 100644 index 697a486a96d3..000000000000 --- a/include/linux/netfilter_ipv4/ipt_MARK.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef _IPT_MARK_H_target -#define _IPT_MARK_H_target - -/* Backwards compatibility for old userspace */ - -#include - -/* Version 0 */ -#define ipt_mark_target_info xt_mark_target_info - -/* Version 1 */ -#define IPT_MARK_SET XT_MARK_SET -#define IPT_MARK_AND XT_MARK_AND -#define IPT_MARK_OR XT_MARK_OR - -#define ipt_mark_target_info_v1 xt_mark_target_info_v1 - -#endif /*_IPT_MARK_H_target*/ diff --git a/include/linux/netfilter_ipv4/ipt_NFQUEUE.h b/include/linux/netfilter_ipv4/ipt_NFQUEUE.h deleted file mode 100644 index 97a2a7557cb9..000000000000 --- a/include/linux/netfilter_ipv4/ipt_NFQUEUE.h +++ /dev/null @@ -1,16 +0,0 @@ -/* iptables module for using NFQUEUE mechanism - * - * (C) 2005 Harald Welte - * - * This software is distributed under GNU GPL v2, 1991 - * -*/ -#ifndef _IPT_NFQ_TARGET_H -#define _IPT_NFQ_TARGET_H - -/* Backwards compatibility for old userspace */ -#include - -#define ipt_NFQ_info xt_NFQ_info - -#endif /* _IPT_DSCP_TARGET_H */ diff --git a/include/linux/netfilter_ipv4/ipt_TCPMSS.h b/include/linux/netfilter_ipv4/ipt_TCPMSS.h deleted file mode 100644 index 7a850f945824..000000000000 --- a/include/linux/netfilter_ipv4/ipt_TCPMSS.h +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef _IPT_TCPMSS_H -#define _IPT_TCPMSS_H - -#include - -#define ipt_tcpmss_info xt_tcpmss_info -#define IPT_TCPMSS_CLAMP_PMTU XT_TCPMSS_CLAMP_PMTU - -#endif /*_IPT_TCPMSS_H*/ diff --git a/include/linux/netfilter_ipv4/ipt_comment.h b/include/linux/netfilter_ipv4/ipt_comment.h deleted file mode 100644 index ae2afc2f7481..000000000000 --- a/include/linux/netfilter_ipv4/ipt_comment.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef _IPT_COMMENT_H -#define _IPT_COMMENT_H - -#include - -#define IPT_MAX_COMMENT_LEN XT_MAX_COMMENT_LEN - -#define ipt_comment_info xt_comment_info - -#endif /* _IPT_COMMENT_H */ diff --git a/include/linux/netfilter_ipv4/ipt_connbytes.h b/include/linux/netfilter_ipv4/ipt_connbytes.h deleted file mode 100644 index f63e6ee91113..000000000000 --- a/include/linux/netfilter_ipv4/ipt_connbytes.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef _IPT_CONNBYTES_H -#define _IPT_CONNBYTES_H - -#include -#define ipt_connbytes_what xt_connbytes_what - -#define IPT_CONNBYTES_PKTS XT_CONNBYTES_PKTS -#define IPT_CONNBYTES_BYTES XT_CONNBYTES_BYTES -#define IPT_CONNBYTES_AVGPKT XT_CONNBYTES_AVGPKT - -#define ipt_connbytes_direction xt_connbytes_direction -#define IPT_CONNBYTES_DIR_ORIGINAL XT_CONNBYTES_DIR_ORIGINAL -#define IPT_CONNBYTES_DIR_REPLY XT_CONNBYTES_DIR_REPLY -#define IPT_CONNBYTES_DIR_BOTH XT_CONNBYTES_DIR_BOTH - -#define ipt_connbytes_info xt_connbytes_info - -#endif diff --git a/include/linux/netfilter_ipv4/ipt_connmark.h b/include/linux/netfilter_ipv4/ipt_connmark.h deleted file mode 100644 index c7ba6560d44c..000000000000 --- a/include/linux/netfilter_ipv4/ipt_connmark.h +++ /dev/null @@ -1,7 +0,0 @@ -#ifndef _IPT_CONNMARK_H -#define _IPT_CONNMARK_H - -#include -#define ipt_connmark_info xt_connmark_info - -#endif /*_IPT_CONNMARK_H*/ diff --git a/include/linux/netfilter_ipv4/ipt_conntrack.h b/include/linux/netfilter_ipv4/ipt_conntrack.h deleted file mode 100644 index cde6762949c5..000000000000 --- a/include/linux/netfilter_ipv4/ipt_conntrack.h +++ /dev/null @@ -1,28 +0,0 @@ -/* Header file for kernel module to match connection tracking information. - * GPL (C) 2001 Marc Boucher (marc@mbsi.ca). - */ - -#ifndef _IPT_CONNTRACK_H -#define _IPT_CONNTRACK_H - -#include - -#define IPT_CONNTRACK_STATE_BIT(ctinfo) XT_CONNTRACK_STATE_BIT(ctinfo) -#define IPT_CONNTRACK_STATE_INVALID XT_CONNTRACK_STATE_INVALID - -#define IPT_CONNTRACK_STATE_SNAT XT_CONNTRACK_STATE_SNAT -#define IPT_CONNTRACK_STATE_DNAT XT_CONNTRACK_STATE_DNAT -#define IPT_CONNTRACK_STATE_UNTRACKED XT_CONNTRACK_STATE_UNTRACKED - -/* flags, invflags: */ -#define IPT_CONNTRACK_STATE XT_CONNTRACK_STATE -#define IPT_CONNTRACK_PROTO XT_CONNTRACK_PROTO -#define IPT_CONNTRACK_ORIGSRC XT_CONNTRACK_ORIGSRC -#define IPT_CONNTRACK_ORIGDST XT_CONNTRACK_ORIGDST -#define IPT_CONNTRACK_REPLSRC XT_CONNTRACK_REPLSRC -#define IPT_CONNTRACK_REPLDST XT_CONNTRACK_REPLDST -#define IPT_CONNTRACK_STATUS XT_CONNTRACK_STATUS -#define IPT_CONNTRACK_EXPIRES XT_CONNTRACK_EXPIRES - -#define ipt_conntrack_info xt_conntrack_info -#endif /*_IPT_CONNTRACK_H*/ diff --git a/include/linux/netfilter_ipv4/ipt_dccp.h b/include/linux/netfilter_ipv4/ipt_dccp.h deleted file mode 100644 index e70d11e1f53c..000000000000 --- a/include/linux/netfilter_ipv4/ipt_dccp.h +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef _IPT_DCCP_H_ -#define _IPT_DCCP_H_ - -#include -#define IPT_DCCP_SRC_PORTS XT_DCCP_SRC_PORTS -#define IPT_DCCP_DEST_PORTS XT_DCCP_DEST_PORTS -#define IPT_DCCP_TYPE XT_DCCP_TYPE -#define IPT_DCCP_OPTION XT_DCCP_OPTION - -#define IPT_DCCP_VALID_FLAGS XT_DCCP_VALID_FLAGS - -#define ipt_dccp_info xt_dccp_info - -#endif /* _IPT_DCCP_H_ */ - diff --git a/include/linux/netfilter_ipv4/ipt_dscp.h b/include/linux/netfilter_ipv4/ipt_dscp.h deleted file mode 100644 index 4b82ca912b0e..000000000000 --- a/include/linux/netfilter_ipv4/ipt_dscp.h +++ /dev/null @@ -1,21 +0,0 @@ -/* iptables module for matching the IPv4 DSCP field - * - * (C) 2002 Harald Welte - * This software is distributed under GNU GPL v2, 1991 - * - * See RFC2474 for a description of the DSCP field within the IP Header. - * - * ipt_dscp.h,v 1.3 2002/08/05 19:00:21 laforge Exp -*/ -#ifndef _IPT_DSCP_H -#define _IPT_DSCP_H - -#include - -#define IPT_DSCP_MASK XT_DSCP_MASK -#define IPT_DSCP_SHIFT XT_DSCP_SHIFT -#define IPT_DSCP_MAX XT_DSCP_MAX - -#define ipt_dscp_info xt_dscp_info - -#endif /* _IPT_DSCP_H */ diff --git a/include/linux/netfilter_ipv4/ipt_ecn.h b/include/linux/netfilter_ipv4/ipt_ecn.h index 1f0d9a4d3378..9945baa4ccd7 100644 --- a/include/linux/netfilter_ipv4/ipt_ecn.h +++ b/include/linux/netfilter_ipv4/ipt_ecn.h @@ -8,9 +8,9 @@ */ #ifndef _IPT_ECN_H #define _IPT_ECN_H -#include +#include -#define IPT_ECN_IP_MASK (~IPT_DSCP_MASK) +#define IPT_ECN_IP_MASK (~XT_DSCP_MASK) #define IPT_ECN_OP_MATCH_IP 0x01 #define IPT_ECN_OP_MATCH_ECE 0x10 diff --git a/include/linux/netfilter_ipv4/ipt_esp.h b/include/linux/netfilter_ipv4/ipt_esp.h deleted file mode 100644 index 78296e7eeff9..000000000000 --- a/include/linux/netfilter_ipv4/ipt_esp.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef _IPT_ESP_H -#define _IPT_ESP_H - -#include - -#define ipt_esp xt_esp -#define IPT_ESP_INV_SPI XT_ESP_INV_SPI -#define IPT_ESP_INV_MASK XT_ESP_INV_MASK - -#endif /*_IPT_ESP_H*/ diff --git a/include/linux/netfilter_ipv4/ipt_hashlimit.h b/include/linux/netfilter_ipv4/ipt_hashlimit.h deleted file mode 100644 index 5662120a3d7b..000000000000 --- a/include/linux/netfilter_ipv4/ipt_hashlimit.h +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef _IPT_HASHLIMIT_H -#define _IPT_HASHLIMIT_H - -#include - -#define IPT_HASHLIMIT_SCALE XT_HASHLIMIT_SCALE -#define IPT_HASHLIMIT_HASH_DIP XT_HASHLIMIT_HASH_DIP -#define IPT_HASHLIMIT_HASH_DPT XT_HASHLIMIT_HASH_DPT -#define IPT_HASHLIMIT_HASH_SIP XT_HASHLIMIT_HASH_SIP -#define IPT_HASHLIMIT_HASH_SPT XT_HASHLIMIT_HASH_SPT - -#define ipt_hashlimit_info xt_hashlimit_info - -#endif /* _IPT_HASHLIMIT_H */ diff --git a/include/linux/netfilter_ipv4/ipt_helper.h b/include/linux/netfilter_ipv4/ipt_helper.h deleted file mode 100644 index 80452c218551..000000000000 --- a/include/linux/netfilter_ipv4/ipt_helper.h +++ /dev/null @@ -1,7 +0,0 @@ -#ifndef _IPT_HELPER_H -#define _IPT_HELPER_H - -#include -#define ipt_helper_info xt_helper_info - -#endif /* _IPT_HELPER_H */ diff --git a/include/linux/netfilter_ipv4/ipt_length.h b/include/linux/netfilter_ipv4/ipt_length.h deleted file mode 100644 index 9b45206ffcef..000000000000 --- a/include/linux/netfilter_ipv4/ipt_length.h +++ /dev/null @@ -1,7 +0,0 @@ -#ifndef _IPT_LENGTH_H -#define _IPT_LENGTH_H - -#include -#define ipt_length_info xt_length_info - -#endif /*_IPT_LENGTH_H*/ diff --git a/include/linux/netfilter_ipv4/ipt_limit.h b/include/linux/netfilter_ipv4/ipt_limit.h deleted file mode 100644 index 92f5cd07bbc4..000000000000 --- a/include/linux/netfilter_ipv4/ipt_limit.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef _IPT_RATE_H -#define _IPT_RATE_H - -#include -#define IPT_LIMIT_SCALE XT_LIMIT_SCALE -#define ipt_rateinfo xt_rateinfo - -#endif /*_IPT_RATE_H*/ diff --git a/include/linux/netfilter_ipv4/ipt_mac.h b/include/linux/netfilter_ipv4/ipt_mac.h deleted file mode 100644 index b186008a3c47..000000000000 --- a/include/linux/netfilter_ipv4/ipt_mac.h +++ /dev/null @@ -1,7 +0,0 @@ -#ifndef _IPT_MAC_H -#define _IPT_MAC_H - -#include -#define ipt_mac_info xt_mac_info - -#endif /*_IPT_MAC_H*/ diff --git a/include/linux/netfilter_ipv4/ipt_mark.h b/include/linux/netfilter_ipv4/ipt_mark.h deleted file mode 100644 index bfde67c61224..000000000000 --- a/include/linux/netfilter_ipv4/ipt_mark.h +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef _IPT_MARK_H -#define _IPT_MARK_H - -/* Backwards compatibility for old userspace */ -#include - -#define ipt_mark_info xt_mark_info - -#endif /*_IPT_MARK_H*/ diff --git a/include/linux/netfilter_ipv4/ipt_multiport.h b/include/linux/netfilter_ipv4/ipt_multiport.h deleted file mode 100644 index 55fe85eca88c..000000000000 --- a/include/linux/netfilter_ipv4/ipt_multiport.h +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef _IPT_MULTIPORT_H -#define _IPT_MULTIPORT_H - -#include - -#define IPT_MULTIPORT_SOURCE XT_MULTIPORT_SOURCE -#define IPT_MULTIPORT_DESTINATION XT_MULTIPORT_DESTINATION -#define IPT_MULTIPORT_EITHER XT_MULTIPORT_EITHER - -#define IPT_MULTI_PORTS XT_MULTI_PORTS - -#define ipt_multiport xt_multiport -#define ipt_multiport_v1 xt_multiport_v1 - -#endif /*_IPT_MULTIPORT_H*/ diff --git a/include/linux/netfilter_ipv4/ipt_physdev.h b/include/linux/netfilter_ipv4/ipt_physdev.h deleted file mode 100644 index 2400e7140f26..000000000000 --- a/include/linux/netfilter_ipv4/ipt_physdev.h +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef _IPT_PHYSDEV_H -#define _IPT_PHYSDEV_H - -/* Backwards compatibility for old userspace */ - -#include - -#define IPT_PHYSDEV_OP_IN XT_PHYSDEV_OP_IN -#define IPT_PHYSDEV_OP_OUT XT_PHYSDEV_OP_OUT -#define IPT_PHYSDEV_OP_BRIDGED XT_PHYSDEV_OP_BRIDGED -#define IPT_PHYSDEV_OP_ISIN XT_PHYSDEV_OP_ISIN -#define IPT_PHYSDEV_OP_ISOUT XT_PHYSDEV_OP_ISOUT -#define IPT_PHYSDEV_OP_MASK XT_PHYSDEV_OP_MASK - -#define ipt_physdev_info xt_physdev_info - -#endif /*_IPT_PHYSDEV_H*/ diff --git a/include/linux/netfilter_ipv4/ipt_pkttype.h b/include/linux/netfilter_ipv4/ipt_pkttype.h deleted file mode 100644 index ff1fbc949a0c..000000000000 --- a/include/linux/netfilter_ipv4/ipt_pkttype.h +++ /dev/null @@ -1,7 +0,0 @@ -#ifndef _IPT_PKTTYPE_H -#define _IPT_PKTTYPE_H - -#include -#define ipt_pkttype_info xt_pkttype_info - -#endif /*_IPT_PKTTYPE_H*/ diff --git a/include/linux/netfilter_ipv4/ipt_policy.h b/include/linux/netfilter_ipv4/ipt_policy.h deleted file mode 100644 index 1037fb2cd206..000000000000 --- a/include/linux/netfilter_ipv4/ipt_policy.h +++ /dev/null @@ -1,23 +0,0 @@ -#ifndef _IPT_POLICY_H -#define _IPT_POLICY_H - -#include - -#define IPT_POLICY_MAX_ELEM XT_POLICY_MAX_ELEM - -/* ipt_policy_flags */ -#define IPT_POLICY_MATCH_IN XT_POLICY_MATCH_IN -#define IPT_POLICY_MATCH_OUT XT_POLICY_MATCH_OUT -#define IPT_POLICY_MATCH_NONE XT_POLICY_MATCH_NONE -#define IPT_POLICY_MATCH_STRICT XT_POLICY_MATCH_STRICT - -/* ipt_policy_modes */ -#define IPT_POLICY_MODE_TRANSPORT XT_POLICY_MODE_TRANSPORT -#define IPT_POLICY_MODE_TUNNEL XT_POLICY_MODE_TUNNEL - -#define ipt_policy_spec xt_policy_spec -#define ipt_policy_addr xt_policy_addr -#define ipt_policy_elem xt_policy_elem -#define ipt_policy_info xt_policy_info - -#endif /* _IPT_POLICY_H */ diff --git a/include/linux/netfilter_ipv4/ipt_recent.h b/include/linux/netfilter_ipv4/ipt_recent.h deleted file mode 100644 index d636cca133c2..000000000000 --- a/include/linux/netfilter_ipv4/ipt_recent.h +++ /dev/null @@ -1,21 +0,0 @@ -#ifndef _IPT_RECENT_H -#define _IPT_RECENT_H - -#include - -#define ipt_recent_info xt_recent_mtinfo - -enum { - IPT_RECENT_CHECK = XT_RECENT_CHECK, - IPT_RECENT_SET = XT_RECENT_SET, - IPT_RECENT_UPDATE = XT_RECENT_UPDATE, - IPT_RECENT_REMOVE = XT_RECENT_REMOVE, - IPT_RECENT_TTL = XT_RECENT_TTL, - - IPT_RECENT_SOURCE = XT_RECENT_SOURCE, - IPT_RECENT_DEST = XT_RECENT_DEST, - - IPT_RECENT_NAME_LEN = XT_RECENT_NAME_LEN, -}; - -#endif /*_IPT_RECENT_H*/ diff --git a/include/linux/netfilter_ipv4/ipt_sctp.h b/include/linux/netfilter_ipv4/ipt_sctp.h deleted file mode 100644 index 80b3dbacd193..000000000000 --- a/include/linux/netfilter_ipv4/ipt_sctp.h +++ /dev/null @@ -1,105 +0,0 @@ -#ifndef _IPT_SCTP_H_ -#define _IPT_SCTP_H_ - -#define IPT_SCTP_SRC_PORTS 0x01 -#define IPT_SCTP_DEST_PORTS 0x02 -#define IPT_SCTP_CHUNK_TYPES 0x04 - -#define IPT_SCTP_VALID_FLAGS 0x07 - - -struct ipt_sctp_flag_info { - u_int8_t chunktype; - u_int8_t flag; - u_int8_t flag_mask; -}; - -#define IPT_NUM_SCTP_FLAGS 4 - -struct ipt_sctp_info { - u_int16_t dpts[2]; /* Min, Max */ - u_int16_t spts[2]; /* Min, Max */ - - u_int32_t chunkmap[256 / sizeof (u_int32_t)]; /* Bit mask of chunks to be matched according to RFC 2960 */ - -#define SCTP_CHUNK_MATCH_ANY 0x01 /* Match if any of the chunk types are present */ -#define SCTP_CHUNK_MATCH_ALL 0x02 /* Match if all of the chunk types are present */ -#define SCTP_CHUNK_MATCH_ONLY 0x04 /* Match if these are the only chunk types present */ - - u_int32_t chunk_match_type; - struct ipt_sctp_flag_info flag_info[IPT_NUM_SCTP_FLAGS]; - int flag_count; - - u_int32_t flags; - u_int32_t invflags; -}; - -#define bytes(type) (sizeof(type) * 8) - -#define SCTP_CHUNKMAP_SET(chunkmap, type) \ - do { \ - chunkmap[type / bytes(u_int32_t)] |= \ - 1 << (type % bytes(u_int32_t)); \ - } while (0) - -#define SCTP_CHUNKMAP_CLEAR(chunkmap, type) \ - do { \ - chunkmap[type / bytes(u_int32_t)] &= \ - ~(1 << (type % bytes(u_int32_t))); \ - } while (0) - -#define SCTP_CHUNKMAP_IS_SET(chunkmap, type) \ -({ \ - (chunkmap[type / bytes (u_int32_t)] & \ - (1 << (type % bytes (u_int32_t)))) ? 1: 0; \ -}) - -#define SCTP_CHUNKMAP_RESET(chunkmap) \ - do { \ - int i; \ - for (i = 0; i < ARRAY_SIZE(chunkmap); i++) \ - chunkmap[i] = 0; \ - } while (0) - -#define SCTP_CHUNKMAP_SET_ALL(chunkmap) \ - do { \ - int i; \ - for (i = 0; i < ARRAY_SIZE(chunkmap); i++) \ - chunkmap[i] = ~0; \ - } while (0) - -#define SCTP_CHUNKMAP_COPY(destmap, srcmap) \ - do { \ - int i; \ - for (i = 0; i < ARRAY_SIZE(chunkmap); i++) \ - destmap[i] = srcmap[i]; \ - } while (0) - -#define SCTP_CHUNKMAP_IS_CLEAR(chunkmap) \ -({ \ - int i; \ - int flag = 1; \ - for (i = 0; i < ARRAY_SIZE(chunkmap); i++) { \ - if (chunkmap[i]) { \ - flag = 0; \ - break; \ - } \ - } \ - flag; \ -}) - -#define SCTP_CHUNKMAP_IS_ALL_SET(chunkmap) \ -({ \ - int i; \ - int flag = 1; \ - for (i = 0; i < ARRAY_SIZE(chunkmap); i++) { \ - if (chunkmap[i] != ~0) { \ - flag = 0; \ - break; \ - } \ - } \ - flag; \ -}) - -#endif /* _IPT_SCTP_H_ */ - diff --git a/include/linux/netfilter_ipv4/ipt_state.h b/include/linux/netfilter_ipv4/ipt_state.h deleted file mode 100644 index a44a99cc28cc..000000000000 --- a/include/linux/netfilter_ipv4/ipt_state.h +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef _IPT_STATE_H -#define _IPT_STATE_H - -/* Backwards compatibility for old userspace */ - -#include - -#define IPT_STATE_BIT XT_STATE_BIT -#define IPT_STATE_INVALID XT_STATE_INVALID - -#define IPT_STATE_UNTRACKED XT_STATE_UNTRACKED - -#define ipt_state_info xt_state_info - -#endif /*_IPT_STATE_H*/ diff --git a/include/linux/netfilter_ipv4/ipt_string.h b/include/linux/netfilter_ipv4/ipt_string.h deleted file mode 100644 index c26de3059903..000000000000 --- a/include/linux/netfilter_ipv4/ipt_string.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef _IPT_STRING_H -#define _IPT_STRING_H - -#include - -#define IPT_STRING_MAX_PATTERN_SIZE XT_STRING_MAX_PATTERN_SIZE -#define IPT_STRING_MAX_ALGO_NAME_SIZE XT_STRING_MAX_ALGO_NAME_SIZE -#define ipt_string_info xt_string_info - -#endif /*_IPT_STRING_H*/ diff --git a/include/linux/netfilter_ipv4/ipt_tcpmss.h b/include/linux/netfilter_ipv4/ipt_tcpmss.h deleted file mode 100644 index 18bbc8e8e009..000000000000 --- a/include/linux/netfilter_ipv4/ipt_tcpmss.h +++ /dev/null @@ -1,7 +0,0 @@ -#ifndef _IPT_TCPMSS_MATCH_H -#define _IPT_TCPMSS_MATCH_H - -#include -#define ipt_tcpmss_match_info xt_tcpmss_match_info - -#endif /*_IPT_TCPMSS_MATCH_H*/ diff --git a/include/linux/netfilter_ipv6/Kbuild b/include/linux/netfilter_ipv6/Kbuild index 4610a16da0ab..e864eaee9e5e 100644 --- a/include/linux/netfilter_ipv6/Kbuild +++ b/include/linux/netfilter_ipv6/Kbuild @@ -1,21 +1,12 @@ header-y += ip6t_HL.h header-y += ip6t_LOG.h -header-y += ip6t_MARK.h header-y += ip6t_REJECT.h header-y += ip6t_ah.h -header-y += ip6t_esp.h header-y += ip6t_frag.h -header-y += ip6t_hl.h header-y += ip6t_ipv6header.h -header-y += ip6t_length.h -header-y += ip6t_limit.h -header-y += ip6t_mac.h -header-y += ip6t_mark.h +header-y += ip6t_hl.h header-y += ip6t_mh.h -header-y += ip6t_multiport.h header-y += ip6t_opts.h -header-y += ip6t_physdev.h -header-y += ip6t_policy.h header-y += ip6t_rt.h unifdef-y += ip6_tables.h diff --git a/include/linux/netfilter_ipv6/ip6t_MARK.h b/include/linux/netfilter_ipv6/ip6t_MARK.h deleted file mode 100644 index 7cf629a8ab92..000000000000 --- a/include/linux/netfilter_ipv6/ip6t_MARK.h +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef _IP6T_MARK_H_target -#define _IP6T_MARK_H_target - -/* Backwards compatibility for old userspace */ -#include - -#define ip6t_mark_target_info xt_mark_target_info - -#endif /*_IP6T_MARK_H_target*/ diff --git a/include/linux/netfilter_ipv6/ip6t_esp.h b/include/linux/netfilter_ipv6/ip6t_esp.h deleted file mode 100644 index f62eaf53c16c..000000000000 --- a/include/linux/netfilter_ipv6/ip6t_esp.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef _IP6T_ESP_H -#define _IP6T_ESP_H - -#include - -#define ip6t_esp xt_esp -#define IP6T_ESP_INV_SPI XT_ESP_INV_SPI -#define IP6T_ESP_INV_MASK XT_ESP_INV_MASK - -#endif /*_IP6T_ESP_H*/ diff --git a/include/linux/netfilter_ipv6/ip6t_length.h b/include/linux/netfilter_ipv6/ip6t_length.h deleted file mode 100644 index 9e9689d03ed7..000000000000 --- a/include/linux/netfilter_ipv6/ip6t_length.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef _IP6T_LENGTH_H -#define _IP6T_LENGTH_H - -#include -#define ip6t_length_info xt_length_info - -#endif /*_IP6T_LENGTH_H*/ - diff --git a/include/linux/netfilter_ipv6/ip6t_limit.h b/include/linux/netfilter_ipv6/ip6t_limit.h deleted file mode 100644 index 487e5ea342c6..000000000000 --- a/include/linux/netfilter_ipv6/ip6t_limit.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef _IP6T_RATE_H -#define _IP6T_RATE_H - -#include -#define IP6T_LIMIT_SCALE XT_LIMIT_SCALE -#define ip6t_rateinfo xt_rateinfo - -#endif /*_IP6T_RATE_H*/ diff --git a/include/linux/netfilter_ipv6/ip6t_mac.h b/include/linux/netfilter_ipv6/ip6t_mac.h deleted file mode 100644 index ac58e83e9423..000000000000 --- a/include/linux/netfilter_ipv6/ip6t_mac.h +++ /dev/null @@ -1,7 +0,0 @@ -#ifndef _IP6T_MAC_H -#define _IP6T_MAC_H - -#include -#define ip6t_mac_info xt_mac_info - -#endif /*_IP6T_MAC_H*/ diff --git a/include/linux/netfilter_ipv6/ip6t_mark.h b/include/linux/netfilter_ipv6/ip6t_mark.h deleted file mode 100644 index ff204951ddc3..000000000000 --- a/include/linux/netfilter_ipv6/ip6t_mark.h +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef _IP6T_MARK_H -#define _IP6T_MARK_H - -/* Backwards compatibility for old userspace */ -#include - -#define ip6t_mark_info xt_mark_info - -#endif /*_IPT_MARK_H*/ diff --git a/include/linux/netfilter_ipv6/ip6t_multiport.h b/include/linux/netfilter_ipv6/ip6t_multiport.h deleted file mode 100644 index 042c92661cee..000000000000 --- a/include/linux/netfilter_ipv6/ip6t_multiport.h +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef _IP6T_MULTIPORT_H -#define _IP6T_MULTIPORT_H - -#include - -#define IP6T_MULTIPORT_SOURCE XT_MULTIPORT_SOURCE -#define IP6T_MULTIPORT_DESTINATION XT_MULTIPORT_DESTINATION -#define IP6T_MULTIPORT_EITHER XT_MULTIPORT_EITHER - -#define IP6T_MULTI_PORTS XT_MULTI_PORTS - -#define ip6t_multiport xt_multiport - -#endif /*_IP6T_MULTIPORT_H*/ diff --git a/include/linux/netfilter_ipv6/ip6t_physdev.h b/include/linux/netfilter_ipv6/ip6t_physdev.h deleted file mode 100644 index c161c0a81b55..000000000000 --- a/include/linux/netfilter_ipv6/ip6t_physdev.h +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef _IP6T_PHYSDEV_H -#define _IP6T_PHYSDEV_H - -/* Backwards compatibility for old userspace */ - -#include - -#define IP6T_PHYSDEV_OP_IN XT_PHYSDEV_OP_IN -#define IP6T_PHYSDEV_OP_OUT XT_PHYSDEV_OP_OUT -#define IP6T_PHYSDEV_OP_BRIDGED XT_PHYSDEV_OP_BRIDGED -#define IP6T_PHYSDEV_OP_ISIN XT_PHYSDEV_OP_ISIN -#define IP6T_PHYSDEV_OP_ISOUT XT_PHYSDEV_OP_ISOUT -#define IP6T_PHYSDEV_OP_MASK XT_PHYSDEV_OP_MASK - -#define ip6t_physdev_info xt_physdev_info - -#endif /*_IP6T_PHYSDEV_H*/ diff --git a/include/linux/netfilter_ipv6/ip6t_policy.h b/include/linux/netfilter_ipv6/ip6t_policy.h deleted file mode 100644 index b1c449d7ec89..000000000000 --- a/include/linux/netfilter_ipv6/ip6t_policy.h +++ /dev/null @@ -1,23 +0,0 @@ -#ifndef _IP6T_POLICY_H -#define _IP6T_POLICY_H - -#include - -#define IP6T_POLICY_MAX_ELEM XT_POLICY_MAX_ELEM - -/* ip6t_policy_flags */ -#define IP6T_POLICY_MATCH_IN XT_POLICY_MATCH_IN -#define IP6T_POLICY_MATCH_OUT XT_POLICY_MATCH_OUT -#define IP6T_POLICY_MATCH_NONE XT_POLICY_MATCH_NONE -#define IP6T_POLICY_MATCH_STRICT XT_POLICY_MATCH_STRICT - -/* ip6t_policy_modes */ -#define IP6T_POLICY_MODE_TRANSPORT XT_POLICY_MODE_TRANSPORT -#define IP6T_POLICY_MODE_TUNNEL XT_POLICY_MODE_TUNNEL - -#define ip6t_policy_spec xt_policy_spec -#define ip6t_policy_addr xt_policy_addr -#define ip6t_policy_elem xt_policy_elem -#define ip6t_policy_info xt_policy_info - -#endif /* _IP6T_POLICY_H */ -- cgit v1.2.3 From 98d89b4198cf7273968e9217a62ec7ccfd760171 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Sun, 5 Jul 2009 15:55:06 +0200 Subject: netfilter: xtables: realign struct xt_target_param This commit gets rid of a padding hole as reported by pahole(1). Saves 8 bytes on x86_64. Signed-off-by: Jan Engelhardt --- include/linux/netfilter/x_tables.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 1030b7593898..4fa6e4c263e0 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -238,9 +238,9 @@ struct xt_mtdtor_param { */ struct xt_target_param { const struct net_device *in, *out; - unsigned int hooknum; const struct xt_target *target; const void *targinfo; + unsigned int hooknum; u_int8_t family; }; -- cgit v1.2.3 From 1905b1bfc0de6f69a61dc03cac0d86a04b3216bd Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 6 Aug 2009 18:13:23 +0900 Subject: chrdev: implement __[un]register_chrdev() [un]register_chrdev() assume minor range 0-255. This patch adds __ prefixed versions which take @minorbase and @count explicitly. Signed-off-by: Tejun Heo Cc: Al Viro Cc: Greg Kroah-Hartman Signed-off-by: Takashi Iwai --- fs/char_dev.c | 39 ++++++++++++++++++++++++++------------- include/linux/fs.h | 19 ++++++++++++++++--- 2 files changed, 42 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/fs/char_dev.c b/fs/char_dev.c index a173551e19d7..2f18c1e4e301 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c @@ -237,8 +237,10 @@ int alloc_chrdev_region(dev_t *dev, unsigned baseminor, unsigned count, } /** - * register_chrdev() - Register a major number for character devices. + * __register_chrdev() - create and register a cdev occupying a range of minors * @major: major device number or 0 for dynamic allocation + * @baseminor: first of the requested range of minor numbers + * @count: the number of minor numbers required * @name: name of this range of devices * @fops: file operations associated with this devices * @@ -254,19 +256,17 @@ int alloc_chrdev_region(dev_t *dev, unsigned baseminor, unsigned count, * /dev. It only helps to keep track of the different owners of devices. If * your module name has only one type of devices it's ok to use e.g. the name * of the module here. - * - * This function registers a range of 256 minor numbers. The first minor number - * is 0. */ -int register_chrdev(unsigned int major, const char *name, - const struct file_operations *fops) +int __register_chrdev(unsigned int major, unsigned int baseminor, + unsigned int count, const char *name, + const struct file_operations *fops) { struct char_device_struct *cd; struct cdev *cdev; char *s; int err = -ENOMEM; - cd = __register_chrdev_region(major, 0, 256, name); + cd = __register_chrdev_region(major, baseminor, count, name); if (IS_ERR(cd)) return PTR_ERR(cd); @@ -280,7 +280,7 @@ int register_chrdev(unsigned int major, const char *name, for (s = strchr(kobject_name(&cdev->kobj),'/'); s; s = strchr(s, '/')) *s = '!'; - err = cdev_add(cdev, MKDEV(cd->major, 0), 256); + err = cdev_add(cdev, MKDEV(cd->major, baseminor), count); if (err) goto out; @@ -290,7 +290,7 @@ int register_chrdev(unsigned int major, const char *name, out: kobject_put(&cdev->kobj); out2: - kfree(__unregister_chrdev_region(cd->major, 0, 256)); + kfree(__unregister_chrdev_region(cd->major, baseminor, count)); return err; } @@ -316,10 +316,23 @@ void unregister_chrdev_region(dev_t from, unsigned count) } } -void unregister_chrdev(unsigned int major, const char *name) +/** + * __unregister_chrdev - unregister and destroy a cdev + * @major: major device number + * @baseminor: first of the range of minor numbers + * @count: the number of minor numbers this cdev is occupying + * @name: name of this range of devices + * + * Unregister and destroy the cdev occupying the region described by + * @major, @baseminor and @count. This function undoes what + * __register_chrdev() did. + */ +void __unregister_chrdev(unsigned int major, unsigned int baseminor, + unsigned int count, const char *name) { struct char_device_struct *cd; - cd = __unregister_chrdev_region(major, 0, 256); + + cd = __unregister_chrdev_region(major, baseminor, count); if (cd && cd->cdev) cdev_del(cd->cdev); kfree(cd); @@ -568,6 +581,6 @@ EXPORT_SYMBOL(cdev_alloc); EXPORT_SYMBOL(cdev_del); EXPORT_SYMBOL(cdev_add); EXPORT_SYMBOL(cdev_index); -EXPORT_SYMBOL(register_chrdev); -EXPORT_SYMBOL(unregister_chrdev); +EXPORT_SYMBOL(__register_chrdev); +EXPORT_SYMBOL(__unregister_chrdev); EXPORT_SYMBOL(directly_mappable_cdev_bdi); diff --git a/include/linux/fs.h b/include/linux/fs.h index a36ffa5a77a4..6c36ab788854 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1998,12 +1998,25 @@ extern void bd_release_from_disk(struct block_device *, struct gendisk *); #define CHRDEV_MAJOR_HASH_SIZE 255 extern int alloc_chrdev_region(dev_t *, unsigned, unsigned, const char *); extern int register_chrdev_region(dev_t, unsigned, const char *); -extern int register_chrdev(unsigned int, const char *, - const struct file_operations *); -extern void unregister_chrdev(unsigned int, const char *); +extern int __register_chrdev(unsigned int major, unsigned int baseminor, + unsigned int count, const char *name, + const struct file_operations *fops); +extern void __unregister_chrdev(unsigned int major, unsigned int baseminor, + unsigned int count, const char *name); extern void unregister_chrdev_region(dev_t, unsigned); extern void chrdev_show(struct seq_file *,off_t); +static inline int register_chrdev(unsigned int major, const char *name, + const struct file_operations *fops) +{ + return __register_chrdev(major, 0, 256, name, fops); +} + +static inline void unregister_chrdev(unsigned int major, const char *name) +{ + __unregister_chrdev(major, 0, 256, name); +} + /* fs/block_dev.c */ #define BDEVNAME_SIZE 32 /* Largest string for a blockdev identifier */ #define BDEVT_SIZE 10 /* Largest string for MAJ:MIN for blkdev */ -- cgit v1.2.3 From 25a70e38cd57952b09a013bf070e03705ee4f2ff Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Wed, 12 Aug 2009 00:50:09 -0700 Subject: Input: eeti_ts - allow active high irq lines This adds a struct eeti_ts_platform_data which currently holds only one value to specify the interrupt polarity. The driver has a fallback if no platform data is passed in via the i2c_board_info, so no regression is caused. Signed-off-by: Daniel Mack Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/eeti_ts.c | 22 +++++++++++++++++++--- include/linux/input/eeti_ts.h | 9 +++++++++ 2 files changed, 28 insertions(+), 3 deletions(-) create mode 100644 include/linux/input/eeti_ts.h (limited to 'include/linux') diff --git a/drivers/input/touchscreen/eeti_ts.c b/drivers/input/touchscreen/eeti_ts.c index 3ab92222a525..9029bd3f34e5 100644 --- a/drivers/input/touchscreen/eeti_ts.c +++ b/drivers/input/touchscreen/eeti_ts.c @@ -32,6 +32,7 @@ #include #include #include +#include static int flip_x; module_param(flip_x, bool, 0644); @@ -46,7 +47,7 @@ struct eeti_ts_priv { struct input_dev *input; struct work_struct work; struct mutex mutex; - int irq; + int irq, irq_active_high; }; #define EETI_TS_BITDEPTH (11) @@ -58,6 +59,11 @@ struct eeti_ts_priv { #define REPORT_BIT_HAS_PRESSURE (1 << 6) #define REPORT_RES_BITS(v) (((v) >> 1) + EETI_TS_BITDEPTH) +static inline int eeti_ts_irq_active(struct eeti_ts_priv *priv) +{ + return gpio_get_value(irq_to_gpio(priv->irq)) == priv->irq_active_high; +} + static void eeti_ts_read(struct work_struct *work) { char buf[6]; @@ -67,7 +73,7 @@ static void eeti_ts_read(struct work_struct *work) mutex_lock(&priv->mutex); - while (!gpio_get_value(irq_to_gpio(priv->irq)) && --to) + while (eeti_ts_irq_active(priv) && --to) i2c_master_recv(priv->client, buf, sizeof(buf)); if (!to) { @@ -140,8 +146,10 @@ static void eeti_ts_close(struct input_dev *dev) static int __devinit eeti_ts_probe(struct i2c_client *client, const struct i2c_device_id *idp) { + struct eeti_ts_platform_data *pdata; struct eeti_ts_priv *priv; struct input_dev *input; + unsigned int irq_flags; int err = -ENOMEM; /* In contrast to what's described in the datasheet, there seems @@ -180,6 +188,14 @@ static int __devinit eeti_ts_probe(struct i2c_client *client, priv->input = input; priv->irq = client->irq; + pdata = client->dev.platform_data; + + if (pdata) + priv->irq_active_high = pdata->irq_active_high; + + irq_flags = priv->irq_active_high ? + IRQF_TRIGGER_RISING : IRQF_TRIGGER_FALLING; + INIT_WORK(&priv->work, eeti_ts_read); i2c_set_clientdata(client, priv); input_set_drvdata(input, priv); @@ -188,7 +204,7 @@ static int __devinit eeti_ts_probe(struct i2c_client *client, if (err) goto err1; - err = request_irq(priv->irq, eeti_ts_isr, IRQF_TRIGGER_FALLING, + err = request_irq(priv->irq, eeti_ts_isr, irq_flags, client->name, priv); if (err) { dev_err(&client->dev, "Unable to request touchscreen IRQ.\n"); diff --git a/include/linux/input/eeti_ts.h b/include/linux/input/eeti_ts.h new file mode 100644 index 000000000000..f875b316249d --- /dev/null +++ b/include/linux/input/eeti_ts.h @@ -0,0 +1,9 @@ +#ifndef LINUX_INPUT_EETI_TS_H +#define LINUX_INPUT_EETI_TS_H + +struct eeti_ts_platform_data { + unsigned int irq_active_high; +}; + +#endif /* LINUX_INPUT_EETI_TS_H */ + -- cgit v1.2.3 From 78090a58c49f2f6213d0bb1b3b4c4df73e26865f Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Fri, 7 Aug 2009 02:58:38 +0000 Subject: nl802154: make ieee802154_policy constant Signed-off-by: Dmitry Eremin-Solenikov Signed-off-by: David S. Miller --- include/linux/nl802154.h | 2 +- net/ieee802154/nl_policy.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nl802154.h b/include/linux/nl802154.h index 2cda00ccfcca..266dd96ced96 100644 --- a/include/linux/nl802154.h +++ b/include/linux/nl802154.h @@ -69,7 +69,7 @@ enum { #define IEEE802154_ATTR_MAX (__IEEE802154_ATTR_MAX - 1) -extern struct nla_policy ieee802154_policy[]; +extern const struct nla_policy ieee802154_policy[]; /* commands */ /* REQ should be responded with CONF diff --git a/net/ieee802154/nl_policy.c b/net/ieee802154/nl_policy.c index c7d71d1adcac..83cb4ccef90d 100644 --- a/net/ieee802154/nl_policy.c +++ b/net/ieee802154/nl_policy.c @@ -24,7 +24,7 @@ #define NLA_HW_ADDR NLA_U64 -struct nla_policy ieee802154_policy[IEEE802154_ATTR_MAX + 1] = { +const struct nla_policy ieee802154_policy[IEEE802154_ATTR_MAX + 1] = { [IEEE802154_ATTR_DEV_NAME] = { .type = NLA_STRING, }, [IEEE802154_ATTR_DEV_INDEX] = { .type = NLA_U32, }, @@ -50,3 +50,4 @@ struct nla_policy ieee802154_policy[IEEE802154_ATTR_MAX + 1] = { [IEEE802154_ATTR_DURATION] = { .type = NLA_U8, }, [IEEE802154_ATTR_ED_LIST] = { .len = 27 }, }; + -- cgit v1.2.3 From 8e753dd0a82bd266256c20a20b98dfa48f98d21e Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Fri, 7 Aug 2009 02:58:40 +0000 Subject: nl802154: add support for dumping WPAN interface information Signed-off-by: Dmitry Eremin-Solenikov Signed-off-by: David S. Miller --- include/linux/nl802154.h | 2 + net/ieee802154/netlink.c | 106 ++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 106 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nl802154.h b/include/linux/nl802154.h index 266dd96ced96..9a1af5f871a3 100644 --- a/include/linux/nl802154.h +++ b/include/linux/nl802154.h @@ -111,6 +111,8 @@ enum { IEEE802154_RX_ENABLE_REQ, /* Not supported yet */ IEEE802154_RX_ENABLE_CONF, /* Not supported yet */ + IEEE802154_LIST_IFACE, + __IEEE802154_CMD_MAX, }; diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c index a615b9d13212..3fe02b394ad6 100644 --- a/net/ieee802154/netlink.c +++ b/net/ieee802154/netlink.c @@ -19,6 +19,7 @@ * Written by: * Sergey Lapin * Dmitry Eremin-Solenikov + * Maxim Osipov */ #include @@ -26,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -73,7 +75,7 @@ static int ieee802154_nl_finish(struct sk_buff *msg) /* XXX: nlh is right at the start of msg */ void *hdr = genlmsg_data(NLMSG_DATA(msg->data)); - if (!genlmsg_end(msg, hdr)) + if (genlmsg_end(msg, hdr) < 0) goto out; return genlmsg_multicast(msg, 0, ieee802154_coord_mcgrp.id, @@ -260,6 +262,35 @@ nla_put_failure: } EXPORT_SYMBOL(ieee802154_nl_scan_confirm); +static int ieee802154_nl_fill_iface(struct sk_buff *msg, u32 pid, + u32 seq, int flags, struct net_device *dev) +{ + void *hdr; + + pr_debug("%s\n", __func__); + + hdr = genlmsg_put(msg, 0, seq, &ieee802154_coordinator_family, flags, + IEEE802154_LIST_IFACE); + if (!hdr) + goto out; + + NLA_PUT_STRING(msg, IEEE802154_ATTR_DEV_NAME, dev->name); + NLA_PUT_U32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex); + + NLA_PUT(msg, IEEE802154_ATTR_HW_ADDR, IEEE802154_ADDR_LEN, + dev->dev_addr); + NLA_PUT_U16(msg, IEEE802154_ATTR_SHORT_ADDR, + ieee802154_mlme_ops(dev)->get_short_addr(dev)); + NLA_PUT_U16(msg, IEEE802154_ATTR_PAN_ID, + ieee802154_mlme_ops(dev)->get_pan_id(dev)); + return genlmsg_end(msg, hdr); + +nla_put_failure: + genlmsg_cancel(msg, hdr); +out: + return -EMSGSIZE; +} + /* Requests from userspace */ static struct net_device *ieee802154_nl_get_dev(struct genl_info *info) { @@ -272,7 +303,7 @@ static struct net_device *ieee802154_nl_get_dev(struct genl_info *info) dev = dev_get_by_name(&init_net, name); } else if (info->attrs[IEEE802154_ATTR_DEV_INDEX]) dev = dev_get_by_index(&init_net, - nla_get_u32(info->attrs[IEEE802154_ATTR_DEV_INDEX])); + nla_get_u32(info->attrs[IEEE802154_ATTR_DEV_INDEX])); else return NULL; @@ -466,6 +497,67 @@ static int ieee802154_scan_req(struct sk_buff *skb, struct genl_info *info) return ret; } +static int ieee802154_list_iface(struct sk_buff *skb, + struct genl_info *info) +{ + /* Request for interface name, index, type, IEEE address, + PAN Id, short address */ + struct sk_buff *msg; + struct net_device *dev = NULL; + int rc = -ENOBUFS; + + pr_debug("%s\n", __func__); + + dev = ieee802154_nl_get_dev(info); + if (!dev) + return -ENODEV; + + msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!msg) + goto out_dev; + + rc = ieee802154_nl_fill_iface(msg, info->snd_pid, info->snd_seq, + 0, dev); + if (rc < 0) + goto out_free; + + dev_put(dev); + + return genlmsg_unicast(&init_net, msg, info->snd_pid); +out_free: + nlmsg_free(msg); +out_dev: + dev_put(dev); + return rc; + +} + +static int ieee802154_dump_iface(struct sk_buff *skb, + struct netlink_callback *cb) +{ + struct net *net = sock_net(skb->sk); + struct net_device *dev; + int idx; + int s_idx = cb->args[0]; + + pr_debug("%s\n", __func__); + + idx = 0; + for_each_netdev(net, dev) { + if (idx < s_idx || (dev->type != ARPHRD_IEEE802154)) + goto cont; + + if (ieee802154_nl_fill_iface(skb, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, dev) < 0) + break; +cont: + idx++; + } + cb->args[0] = idx; + + return skb->len; +} + #define IEEE802154_OP(_cmd, _func) \ { \ .cmd = _cmd, \ @@ -475,12 +567,22 @@ static int ieee802154_scan_req(struct sk_buff *skb, struct genl_info *info) .flags = GENL_ADMIN_PERM, \ } +#define IEEE802154_DUMP(_cmd, _func, _dump) \ + { \ + .cmd = _cmd, \ + .policy = ieee802154_policy, \ + .doit = _func, \ + .dumpit = _dump, \ + } + static struct genl_ops ieee802154_coordinator_ops[] = { IEEE802154_OP(IEEE802154_ASSOCIATE_REQ, ieee802154_associate_req), IEEE802154_OP(IEEE802154_ASSOCIATE_RESP, ieee802154_associate_resp), IEEE802154_OP(IEEE802154_DISASSOCIATE_REQ, ieee802154_disassociate_req), IEEE802154_OP(IEEE802154_SCAN_REQ, ieee802154_scan_req), IEEE802154_OP(IEEE802154_START_REQ, ieee802154_start_req), + IEEE802154_DUMP(IEEE802154_LIST_IFACE, ieee802154_list_iface, + ieee802154_dump_iface), }; static int __init ieee802154_nl_init(void) -- cgit v1.2.3 From aed7df86983ead0af8364fd26d8a9609ef2ed584 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Sun, 9 Aug 2009 03:27:18 +0000 Subject: net: include/linux/icmpv6: includecheck fix for icmpv6.h fix the following 'make includecheck' warning: include/linux/icmpv6.h: linux/skbuff.h is included more than once. Signed-off-by: Jaswinder Singh Rajput Signed-off-by: David S. Miller --- include/linux/icmpv6.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/icmpv6.h b/include/linux/icmpv6.h index b6a85183c333..c0d8357917e2 100644 --- a/include/linux/icmpv6.h +++ b/include/linux/icmpv6.h @@ -171,8 +171,6 @@ struct icmp6_filter { #ifdef __KERNEL__ #include -#include - extern void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, -- cgit v1.2.3 From 2e955856ff1212bd63dbbf403940c72eca5b4a8f Mon Sep 17 00:00:00 2001 From: françois romieu Date: Mon, 10 Aug 2009 19:44:19 +0000 Subject: r8169: phy init for the 8169scd Synced with Realtek's 6.011.00 r8169 driver. Signed-off-by: Francois Romieu Cc: Edward Hsu Signed-off-by: David S. Miller --- drivers/net/r8169.c | 68 +++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/pci_ids.h | 2 ++ 2 files changed, 70 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c index 86722886ae47..4470fefbe97e 100644 --- a/drivers/net/r8169.c +++ b/drivers/net/r8169.c @@ -1389,6 +1389,71 @@ static void rtl8169sb_hw_phy_config(void __iomem *ioaddr) rtl_phy_write(ioaddr, phy_reg_init, ARRAY_SIZE(phy_reg_init)); } +static void rtl8169scd_hw_phy_config_quirk(struct rtl8169_private *tp, + void __iomem *ioaddr) +{ + struct pci_dev *pdev = tp->pci_dev; + u16 vendor_id, device_id; + + pci_read_config_word(pdev, PCI_SUBSYSTEM_VENDOR_ID, &vendor_id); + pci_read_config_word(pdev, PCI_SUBSYSTEM_ID, &device_id); + + if ((vendor_id != PCI_VENDOR_ID_GIGABYTE) || (device_id != 0xe000)) + return; + + mdio_write(ioaddr, 0x1f, 0x0001); + mdio_write(ioaddr, 0x10, 0xf01b); + mdio_write(ioaddr, 0x1f, 0x0000); +} + +static void rtl8169scd_hw_phy_config(struct rtl8169_private *tp, + void __iomem *ioaddr) +{ + struct phy_reg phy_reg_init[] = { + { 0x1f, 0x0001 }, + { 0x04, 0x0000 }, + { 0x03, 0x00a1 }, + { 0x02, 0x0008 }, + { 0x01, 0x0120 }, + { 0x00, 0x1000 }, + { 0x04, 0x0800 }, + { 0x04, 0x9000 }, + { 0x03, 0x802f }, + { 0x02, 0x4f02 }, + { 0x01, 0x0409 }, + { 0x00, 0xf099 }, + { 0x04, 0x9800 }, + { 0x04, 0xa000 }, + { 0x03, 0xdf01 }, + { 0x02, 0xdf20 }, + { 0x01, 0xff95 }, + { 0x00, 0xba00 }, + { 0x04, 0xa800 }, + { 0x04, 0xf000 }, + { 0x03, 0xdf01 }, + { 0x02, 0xdf20 }, + { 0x01, 0x101a }, + { 0x00, 0xa0ff }, + { 0x04, 0xf800 }, + { 0x04, 0x0000 }, + { 0x1f, 0x0000 }, + + { 0x1f, 0x0001 }, + { 0x10, 0xf41b }, + { 0x14, 0xfb54 }, + { 0x18, 0xf5c7 }, + { 0x1f, 0x0000 }, + + { 0x1f, 0x0001 }, + { 0x17, 0x0cc0 }, + { 0x1f, 0x0000 } + }; + + rtl_phy_write(ioaddr, phy_reg_init, ARRAY_SIZE(phy_reg_init)); + + rtl8169scd_hw_phy_config_quirk(tp, ioaddr); +} + static void rtl8169sce_hw_phy_config(void __iomem *ioaddr) { struct phy_reg phy_reg_init[] = { @@ -1681,6 +1746,9 @@ static void rtl_hw_phy_config(struct net_device *dev) case RTL_GIGA_MAC_VER_04: rtl8169sb_hw_phy_config(ioaddr); break; + case RTL_GIGA_MAC_VER_05: + rtl8169scd_hw_phy_config(tp, ioaddr); + break; case RTL_GIGA_MAC_VER_06: rtl8169sce_hw_phy_config(ioaddr); break; diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 73b46b6b904f..fdc3110c90c0 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1984,6 +1984,8 @@ #define PCI_VENDOR_ID_SAMSUNG 0x144d +#define PCI_VENDOR_ID_GIGABYTE 0x1458 + #define PCI_VENDOR_ID_AMBIT 0x1468 #define PCI_VENDOR_ID_MYRICOM 0x14c1 -- cgit v1.2.3 From 9188499cdb117d86a1ea6b04374095b098d56936 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 13 Aug 2009 09:44:57 -0400 Subject: security: introducing security_request_module Calling request_module() will trigger a userspace upcall which will load a new module into the kernel. This can be a dangerous event if the process able to trigger request_module() is able to control either the modprobe binary or the module binary. This patch adds a new security hook to request_module() which can be used by an LSM to control a processes ability to call request_module(). Signed-off-by: Eric Paris Acked-by: Serge Hallyn Signed-off-by: James Morris --- include/linux/security.h | 10 ++++++++++ kernel/kmod.c | 4 ++++ security/capability.c | 6 ++++++ security/security.c | 5 +++++ 4 files changed, 25 insertions(+) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index 57ead99d2593..1e3dd86eea99 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -678,6 +678,9 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * @inode points to the inode to use as a reference. * The current task must be the one that nominated @inode. * Return 0 if successful. + * @kernel_module_request: + * Ability to trigger the kernel to automatically upcall to userspace for + * userspace to load a kernel module with the given name. * @task_setuid: * Check permission before setting one or more of the user identity * attributes of the current process. The @flags parameter indicates @@ -1489,6 +1492,7 @@ struct security_operations { void (*cred_commit)(struct cred *new, const struct cred *old); int (*kernel_act_as)(struct cred *new, u32 secid); int (*kernel_create_files_as)(struct cred *new, struct inode *inode); + int (*kernel_module_request)(void); int (*task_setuid) (uid_t id0, uid_t id1, uid_t id2, int flags); int (*task_fix_setuid) (struct cred *new, const struct cred *old, int flags); @@ -1741,6 +1745,7 @@ int security_prepare_creds(struct cred *new, const struct cred *old, gfp_t gfp); void security_commit_creds(struct cred *new, const struct cred *old); int security_kernel_act_as(struct cred *new, u32 secid); int security_kernel_create_files_as(struct cred *new, struct inode *inode); +int security_kernel_module_request(void); int security_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags); int security_task_fix_setuid(struct cred *new, const struct cred *old, int flags); @@ -2292,6 +2297,11 @@ static inline int security_kernel_create_files_as(struct cred *cred, return 0; } +static inline int security_kernel_module_request(void) +{ + return 0; +} + static inline int security_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags) { diff --git a/kernel/kmod.c b/kernel/kmod.c index 385c31a1bdbf..5a7ae57f983f 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -78,6 +78,10 @@ int __request_module(bool wait, const char *fmt, ...) #define MAX_KMOD_CONCURRENT 50 /* Completely arbitrary value - KAO */ static int kmod_loop_msg; + ret = security_kernel_module_request(); + if (ret) + return ret; + va_start(args, fmt); ret = vsnprintf(module_name, MODULE_NAME_LEN, fmt, args); va_end(args); diff --git a/security/capability.c b/security/capability.c index ec0573054024..1b943f54b2ea 100644 --- a/security/capability.c +++ b/security/capability.c @@ -396,6 +396,11 @@ static int cap_kernel_create_files_as(struct cred *new, struct inode *inode) return 0; } +static int cap_kernel_module_request(void) +{ + return 0; +} + static int cap_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags) { return 0; @@ -945,6 +950,7 @@ void security_fixup_ops(struct security_operations *ops) set_to_cap_if_null(ops, cred_commit); set_to_cap_if_null(ops, kernel_act_as); set_to_cap_if_null(ops, kernel_create_files_as); + set_to_cap_if_null(ops, kernel_module_request); set_to_cap_if_null(ops, task_setuid); set_to_cap_if_null(ops, task_fix_setuid); set_to_cap_if_null(ops, task_setgid); diff --git a/security/security.c b/security/security.c index 4501c5e1f988..0e993f42ce3d 100644 --- a/security/security.c +++ b/security/security.c @@ -709,6 +709,11 @@ int security_kernel_create_files_as(struct cred *new, struct inode *inode) return security_ops->kernel_create_files_as(new, inode); } +int security_kernel_module_request(void) +{ + return security_ops->kernel_module_request(); +} + int security_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags) { return security_ops->task_setuid(id0, id1, id2, flags); -- cgit v1.2.3 From f322abf83feddc3c37c3a91794e0c5aece4af18e Mon Sep 17 00:00:00 2001 From: James Morris Date: Fri, 14 Aug 2009 11:19:29 +1000 Subject: security: update documentation for security_request_module Update documentation for security_request_module to indicate return value, as suggested by Serge Hallyn. Signed-off-by: James Morris --- include/linux/security.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index 1e3dd86eea99..a16d6b7c4ebe 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -681,6 +681,7 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * @kernel_module_request: * Ability to trigger the kernel to automatically upcall to userspace for * userspace to load a kernel module with the given name. + * Return 0 if successful. * @task_setuid: * Check permission before setting one or more of the user identity * attributes of the current process. The @flags parameter indicates -- cgit v1.2.3 From 00ae4064b1445524752575dd84df227c0687c99d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 14 Aug 2009 15:00:49 +0900 Subject: percpu: rename 4k first chunk allocator to page Page size isn't always 4k depending on arch and configuration. Rename 4k first chunk allocator to page. Signed-off-by: Tejun Heo Cc: David Howells --- Documentation/kernel-parameters.txt | 2 +- arch/x86/kernel/setup_percpu.c | 23 ++++++++++++----------- include/linux/percpu.h | 2 +- mm/percpu.c | 25 ++++++++++++++----------- 4 files changed, 28 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 7936b801fe6a..12e9eb77ee0d 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1920,7 +1920,7 @@ and is between 256 and 4096 characters. It is defined in the file See arch/parisc/kernel/pdc_chassis.c percpu_alloc= [X86] Select which percpu first chunk allocator to use. - Allowed values are one of "lpage", "embed" and "4k". + Allowed values are one of "lpage", "embed" and "page". See comments in arch/x86/kernel/setup_percpu.c for details on each allocator. This parameter is primarily for debugging and performance comparison. diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index a26ff61e2fb0..1e17711c29d6 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -249,21 +249,22 @@ static ssize_t __init setup_pcpu_embed(size_t static_size, bool chosen) } /* - * 4k allocator + * Page allocator * - * Boring fallback 4k allocator. This allocator puts more pressure on - * PTE TLBs but other than that behaves nicely on both UMA and NUMA. + * Boring fallback 4k page allocator. This allocator puts more + * pressure on PTE TLBs but other than that behaves nicely on both UMA + * and NUMA. */ -static void __init pcpu4k_populate_pte(unsigned long addr) +static void __init pcpup_populate_pte(unsigned long addr) { populate_extra_pte(addr); } -static ssize_t __init setup_pcpu_4k(size_t static_size) +static ssize_t __init setup_pcpu_page(size_t static_size) { - return pcpu_4k_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE, - pcpu_fc_alloc, pcpu_fc_free, - pcpu4k_populate_pte); + return pcpu_page_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE, + pcpu_fc_alloc, pcpu_fc_free, + pcpup_populate_pte); } /* for explicit first chunk allocator selection */ @@ -307,7 +308,7 @@ void __init setup_per_cpu_areas(void) */ ret = -EINVAL; if (strlen(pcpu_chosen_alloc)) { - if (strcmp(pcpu_chosen_alloc, "4k")) { + if (strcmp(pcpu_chosen_alloc, "page")) { if (!strcmp(pcpu_chosen_alloc, "lpage")) ret = setup_pcpu_lpage(static_size, true); else if (!strcmp(pcpu_chosen_alloc, "embed")) @@ -317,7 +318,7 @@ void __init setup_per_cpu_areas(void) "specified\n", pcpu_chosen_alloc); if (ret < 0) pr_warning("PERCPU: %s allocator failed (%zd), " - "falling back to 4k\n", + "falling back to page size\n", pcpu_chosen_alloc, ret); } } else { @@ -326,7 +327,7 @@ void __init setup_per_cpu_areas(void) ret = setup_pcpu_embed(static_size, false); } if (ret < 0) - ret = setup_pcpu_4k(static_size); + ret = setup_pcpu_page(static_size); if (ret < 0) panic("cannot allocate static percpu area (%zu bytes, err=%zd)", static_size, ret); diff --git a/include/linux/percpu.h b/include/linux/percpu.h index e134c8229631..7989f61b03f3 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -74,7 +74,7 @@ extern ssize_t __init pcpu_embed_first_chunk( size_t static_size, size_t reserved_size, ssize_t dyn_size); -extern ssize_t __init pcpu_4k_first_chunk( +extern ssize_t __init pcpu_page_first_chunk( size_t static_size, size_t reserved_size, pcpu_fc_alloc_fn_t alloc_fn, pcpu_fc_free_fn_t free_fn, diff --git a/mm/percpu.c b/mm/percpu.c index cbddcbdab681..6feac7934904 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -1497,15 +1497,15 @@ ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size, } /** - * pcpu_4k_first_chunk - map the first chunk using PAGE_SIZE pages + * pcpu_page_first_chunk - map the first chunk using PAGE_SIZE pages * @static_size: the size of static percpu area in bytes * @reserved_size: the size of reserved percpu area in bytes * @alloc_fn: function to allocate percpu page, always called with PAGE_SIZE * @free_fn: funtion to free percpu page, always called with PAGE_SIZE * @populate_pte_fn: function to populate pte * - * This is a helper to ease setting up embedded first percpu chunk and - * can be called where pcpu_setup_first_chunk() is expected. + * This is a helper to ease setting up page-remapped first percpu + * chunk and can be called where pcpu_setup_first_chunk() is expected. * * This is the basic allocator. Static percpu area is allocated * page-by-page into vmalloc area. @@ -1514,12 +1514,13 @@ ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size, * The determined pcpu_unit_size which can be used to initialize * percpu access on success, -errno on failure. */ -ssize_t __init pcpu_4k_first_chunk(size_t static_size, size_t reserved_size, - pcpu_fc_alloc_fn_t alloc_fn, - pcpu_fc_free_fn_t free_fn, - pcpu_fc_populate_pte_fn_t populate_pte_fn) +ssize_t __init pcpu_page_first_chunk(size_t static_size, size_t reserved_size, + pcpu_fc_alloc_fn_t alloc_fn, + pcpu_fc_free_fn_t free_fn, + pcpu_fc_populate_pte_fn_t populate_pte_fn) { static struct vm_struct vm; + char psize_str[16]; int unit_pages; size_t pages_size; struct page **pages; @@ -1527,6 +1528,8 @@ ssize_t __init pcpu_4k_first_chunk(size_t static_size, size_t reserved_size, int i, j; ssize_t ret; + snprintf(psize_str, sizeof(psize_str), "%luK", PAGE_SIZE >> 10); + unit_pages = PFN_UP(max_t(size_t, static_size + reserved_size, PCPU_MIN_UNIT_SIZE)); @@ -1542,8 +1545,8 @@ ssize_t __init pcpu_4k_first_chunk(size_t static_size, size_t reserved_size, ptr = alloc_fn(cpu, PAGE_SIZE); if (!ptr) { - pr_warning("PERCPU: failed to allocate " - "4k page for cpu%u\n", cpu); + pr_warning("PERCPU: failed to allocate %s page " + "for cpu%u\n", psize_str, cpu); goto enomem; } pages[j++] = virt_to_page(ptr); @@ -1580,8 +1583,8 @@ ssize_t __init pcpu_4k_first_chunk(size_t static_size, size_t reserved_size, } /* we're ready, commit */ - pr_info("PERCPU: %d 4k pages/cpu @%p s%zu r%zu\n", - unit_pages, vm.addr, static_size, reserved_size); + pr_info("PERCPU: %d %s pages/cpu @%p s%zu r%zu\n", + unit_pages, psize_str, vm.addr, static_size, reserved_size); ret = pcpu_setup_first_chunk(static_size, reserved_size, -1, unit_pages << PAGE_SHIFT, vm.addr, NULL); -- cgit v1.2.3 From 08fc45806103e59a37418e84719b878f9bb32540 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 14 Aug 2009 15:00:49 +0900 Subject: percpu: build first chunk allocators selectively There's no need to build unused first chunk allocators in. Define CONFIG_NEED_PER_CPU_*_FIRST_CHUNK and let archs enable them selectively. Signed-off-by: Tejun Heo --- arch/x86/Kconfig | 10 ++++++++++ include/linux/percpu.h | 27 +++++---------------------- mm/percpu.c | 19 +++++++++++-------- 3 files changed, 26 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index e06b2eeff9f2..f7ac27215512 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -150,6 +150,16 @@ config ARCH_HAS_CACHE_LINE_SIZE config HAVE_SETUP_PER_CPU_AREA def_bool y +config NEED_PER_CPU_EMBED_FIRST_CHUNK + def_bool y + +config NEED_PER_CPU_PAGE_FIRST_CHUNK + def_bool y + +config NEED_PER_CPU_LPAGE_FIRST_CHUNK + def_bool y + depends on NEED_MULTIPLE_NODES + config HAVE_CPUMASK_OF_CPU_MAP def_bool X86_64_SMP diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 7989f61b03f3..e26788e0da4a 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -70,17 +70,21 @@ extern size_t __init pcpu_setup_first_chunk( ssize_t dyn_size, size_t unit_size, void *base_addr, const int *unit_map); +#ifdef CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK extern ssize_t __init pcpu_embed_first_chunk( size_t static_size, size_t reserved_size, ssize_t dyn_size); +#endif +#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK extern ssize_t __init pcpu_page_first_chunk( size_t static_size, size_t reserved_size, pcpu_fc_alloc_fn_t alloc_fn, pcpu_fc_free_fn_t free_fn, pcpu_fc_populate_pte_fn_t populate_pte_fn); +#endif -#ifdef CONFIG_NEED_MULTIPLE_NODES +#ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK extern int __init pcpu_lpage_build_unit_map( size_t static_size, size_t reserved_size, ssize_t *dyn_sizep, size_t *unit_sizep, @@ -98,27 +102,6 @@ extern ssize_t __init pcpu_lpage_first_chunk( extern void *pcpu_lpage_remapped(void *kaddr); #else -static inline int pcpu_lpage_build_unit_map( - size_t static_size, size_t reserved_size, - ssize_t *dyn_sizep, size_t *unit_sizep, - size_t lpage_size, int *unit_map, - pcpu_fc_cpu_distance_fn_t cpu_distance_fn) -{ - return -EINVAL; -} - -static inline ssize_t __init pcpu_lpage_first_chunk( - size_t static_size, size_t reserved_size, - size_t dyn_size, size_t unit_size, - size_t lpage_size, const int *unit_map, - int nr_units, - pcpu_fc_alloc_fn_t alloc_fn, - pcpu_fc_free_fn_t free_fn, - pcpu_fc_map_fn_t map_fn) -{ - return -EINVAL; -} - static inline void *pcpu_lpage_remapped(void *kaddr) { return NULL; diff --git a/mm/percpu.c b/mm/percpu.c index 6feac7934904..7971997de310 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -1414,8 +1414,9 @@ size_t __init pcpu_setup_first_chunk(size_t static_size, size_t reserved_size, return pcpu_unit_size; } -static size_t pcpu_calc_fc_sizes(size_t static_size, size_t reserved_size, - ssize_t *dyn_sizep) +static inline size_t pcpu_calc_fc_sizes(size_t static_size, + size_t reserved_size, + ssize_t *dyn_sizep) { size_t size_sum; @@ -1427,6 +1428,8 @@ static size_t pcpu_calc_fc_sizes(size_t static_size, size_t reserved_size, return size_sum; } +#if defined(CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK) || \ + !defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) /** * pcpu_embed_first_chunk - embed the first percpu chunk into bootmem * @static_size: the size of static percpu area in bytes @@ -1495,7 +1498,10 @@ ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size, return pcpu_setup_first_chunk(static_size, reserved_size, dyn_size, unit_size, base, NULL); } +#endif /* CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK || + !CONFIG_HAVE_SETUP_PER_CPU_AREA */ +#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK /** * pcpu_page_first_chunk - map the first chunk using PAGE_SIZE pages * @static_size: the size of static percpu area in bytes @@ -1598,12 +1604,9 @@ out_free_ar: free_bootmem(__pa(pages), pages_size); return ret; } +#endif /* CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK */ -/* - * Large page remapping first chunk setup helper - */ -#ifdef CONFIG_NEED_MULTIPLE_NODES - +#ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK /** * pcpu_lpage_build_unit_map - build unit_map for large page remapping * @static_size: the size of static percpu area in bytes @@ -1982,7 +1985,7 @@ void *pcpu_lpage_remapped(void *kaddr) return NULL; } -#endif +#endif /* CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK */ /* * Generic percpu area setup. -- cgit v1.2.3 From f58dc01ba2ca9fe3ab2ba4ca43d9c8a735cf62d8 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 14 Aug 2009 15:00:50 +0900 Subject: percpu: generalize first chunk allocator selection Now that all first chunk allocators are in mm/percpu.c, it makes sense to make generalize percpu_alloc kernel parameter. Define PCPU_FC_* and set pcpu_chosen_fc using early_param() in mm/percpu.c. Arch code can use the set value to determine which first chunk allocator to use. Signed-off-by: Tejun Heo --- Documentation/kernel-parameters.txt | 11 ++++++----- arch/x86/kernel/setup_percpu.c | 24 ++++++------------------ include/linux/percpu.h | 12 ++++++++++++ mm/percpu.c | 32 ++++++++++++++++++++++++++++++++ 4 files changed, 56 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 12e9eb77ee0d..dee9ce2e6cfa 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1919,11 +1919,12 @@ and is between 256 and 4096 characters. It is defined in the file Format: { 0 | 1 } See arch/parisc/kernel/pdc_chassis.c - percpu_alloc= [X86] Select which percpu first chunk allocator to use. - Allowed values are one of "lpage", "embed" and "page". - See comments in arch/x86/kernel/setup_percpu.c for - details on each allocator. This parameter is primarily - for debugging and performance comparison. + percpu_alloc= Select which percpu first chunk allocator to use. + Currently supported values are "embed", "page" and + "lpage". Archs may support subset or none of the + selections. See comments in mm/percpu.c for details + on each allocator. This parameter is primarily for + debugging and performance comparison. pf. [PARIDE] See Documentation/blockdev/paride.txt. diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 1e17711c29d6..b961d99e6416 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -267,16 +267,6 @@ static ssize_t __init setup_pcpu_page(size_t static_size) pcpup_populate_pte); } -/* for explicit first chunk allocator selection */ -static char pcpu_chosen_alloc[16] __initdata; - -static int __init percpu_alloc_setup(char *str) -{ - strncpy(pcpu_chosen_alloc, str, sizeof(pcpu_chosen_alloc) - 1); - return 0; -} -early_param("percpu_alloc", percpu_alloc_setup); - static inline void setup_percpu_segment(int cpu) { #ifdef CONFIG_X86_32 @@ -307,19 +297,17 @@ void __init setup_per_cpu_areas(void) * each allocator for details. */ ret = -EINVAL; - if (strlen(pcpu_chosen_alloc)) { - if (strcmp(pcpu_chosen_alloc, "page")) { - if (!strcmp(pcpu_chosen_alloc, "lpage")) + if (pcpu_chosen_fc != PCPU_FC_AUTO) { + if (pcpu_chosen_fc != PCPU_FC_PAGE) { + if (pcpu_chosen_fc == PCPU_FC_LPAGE) ret = setup_pcpu_lpage(static_size, true); - else if (!strcmp(pcpu_chosen_alloc, "embed")) - ret = setup_pcpu_embed(static_size, true); else - pr_warning("PERCPU: unknown allocator %s " - "specified\n", pcpu_chosen_alloc); + ret = setup_pcpu_embed(static_size, true); + if (ret < 0) pr_warning("PERCPU: %s allocator failed (%zd), " "falling back to page size\n", - pcpu_chosen_alloc, ret); + pcpu_fc_names[pcpu_chosen_fc], ret); } } else { ret = setup_pcpu_lpage(static_size, false); diff --git a/include/linux/percpu.h b/include/linux/percpu.h index e26788e0da4a..9be05cbe5ee0 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -59,6 +59,18 @@ extern void *pcpu_base_addr; extern const int *pcpu_unit_map; +enum pcpu_fc { + PCPU_FC_AUTO, + PCPU_FC_EMBED, + PCPU_FC_PAGE, + PCPU_FC_LPAGE, + + PCPU_FC_NR, +}; +extern const char *pcpu_fc_names[PCPU_FC_NR]; + +extern enum pcpu_fc pcpu_chosen_fc; + typedef void * (*pcpu_fc_alloc_fn_t)(unsigned int cpu, size_t size); typedef void (*pcpu_fc_free_fn_t)(void *ptr, size_t size); typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr); diff --git a/mm/percpu.c b/mm/percpu.c index 7971997de310..7fb40bb1555a 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -1414,6 +1414,38 @@ size_t __init pcpu_setup_first_chunk(size_t static_size, size_t reserved_size, return pcpu_unit_size; } +const char *pcpu_fc_names[PCPU_FC_NR] __initdata = { + [PCPU_FC_AUTO] = "auto", + [PCPU_FC_EMBED] = "embed", + [PCPU_FC_PAGE] = "page", + [PCPU_FC_LPAGE] = "lpage", +}; + +enum pcpu_fc pcpu_chosen_fc __initdata = PCPU_FC_AUTO; + +static int __init percpu_alloc_setup(char *str) +{ + if (0) + /* nada */; +#ifdef CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK + else if (!strcmp(str, "embed")) + pcpu_chosen_fc = PCPU_FC_EMBED; +#endif +#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK + else if (!strcmp(str, "page")) + pcpu_chosen_fc = PCPU_FC_PAGE; +#endif +#ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK + else if (!strcmp(str, "lpage")) + pcpu_chosen_fc = PCPU_FC_LPAGE; +#endif + else + pr_warning("PERCPU: unknown allocator %s specified\n", str); + + return 0; +} +early_param("percpu_alloc", percpu_alloc_setup); + static inline size_t pcpu_calc_fc_sizes(size_t static_size, size_t reserved_size, ssize_t *dyn_sizep) -- cgit v1.2.3 From 9a7737691e90d3cce0e5248f91826c50e5aa3fcf Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 14 Aug 2009 15:00:50 +0900 Subject: percpu: drop @static_size from first chunk allocators First chunk allocators assume percpu areas have been linked using one of PERCPU_*() macros and depend on __per_cpu_load symbol defined by those macros, so there isn't much point in passing in static area size explicitly when it can be easily calculated from __per_cpu_start and __per_cpu_end. Drop @static_size from all percpu first chunk allocators and helpers. Signed-off-by: Tejun Heo --- arch/x86/kernel/setup_percpu.c | 34 +++++++++++++++------------------- include/linux/percpu.h | 18 ++++++++---------- mm/percpu.c | 29 +++++++++++++---------------- 3 files changed, 36 insertions(+), 45 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index b961d99e6416..8aad486c688f 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -157,7 +157,7 @@ static int pcpu_lpage_cpu_distance(unsigned int from, unsigned int to) return REMOTE_DISTANCE; } -static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) +static ssize_t __init setup_pcpu_lpage(bool chosen) { size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE; size_t dyn_size = reserve - PERCPU_FIRST_CHUNK_RESERVE; @@ -184,8 +184,7 @@ static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) return -ENOMEM; } - ret = pcpu_lpage_build_unit_map(static_size, - PERCPU_FIRST_CHUNK_RESERVE, + ret = pcpu_lpage_build_unit_map(PERCPU_FIRST_CHUNK_RESERVE, &dyn_size, &unit_size, PMD_SIZE, unit_map, pcpu_lpage_cpu_distance); if (ret < 0) { @@ -208,9 +207,8 @@ static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) } } - ret = pcpu_lpage_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE, - dyn_size, unit_size, PMD_SIZE, - unit_map, nr_units, + ret = pcpu_lpage_first_chunk(PERCPU_FIRST_CHUNK_RESERVE, dyn_size, + unit_size, PMD_SIZE, unit_map, nr_units, pcpu_fc_alloc, pcpu_fc_free, pcpul_map); out_free: if (ret < 0) @@ -218,7 +216,7 @@ out_free: return ret; } #else -static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) +static ssize_t __init setup_pcpu_lpage(bool chosen) { return -EINVAL; } @@ -232,7 +230,7 @@ static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) * mapping so that it can use PMD mapping without additional TLB * pressure. */ -static ssize_t __init setup_pcpu_embed(size_t static_size, bool chosen) +static ssize_t __init setup_pcpu_embed(bool chosen) { size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE; @@ -244,7 +242,7 @@ static ssize_t __init setup_pcpu_embed(size_t static_size, bool chosen) if (!chosen && (!cpu_has_pse || pcpu_need_numa())) return -EINVAL; - return pcpu_embed_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE, + return pcpu_embed_first_chunk(PERCPU_FIRST_CHUNK_RESERVE, reserve - PERCPU_FIRST_CHUNK_RESERVE); } @@ -260,9 +258,9 @@ static void __init pcpup_populate_pte(unsigned long addr) populate_extra_pte(addr); } -static ssize_t __init setup_pcpu_page(size_t static_size) +static ssize_t __init setup_pcpu_page(void) { - return pcpu_page_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE, + return pcpu_page_first_chunk(PERCPU_FIRST_CHUNK_RESERVE, pcpu_fc_alloc, pcpu_fc_free, pcpup_populate_pte); } @@ -282,7 +280,6 @@ static inline void setup_percpu_segment(int cpu) void __init setup_per_cpu_areas(void) { - size_t static_size = __per_cpu_end - __per_cpu_start; unsigned int cpu; unsigned long delta; size_t pcpu_unit_size; @@ -300,9 +297,9 @@ void __init setup_per_cpu_areas(void) if (pcpu_chosen_fc != PCPU_FC_AUTO) { if (pcpu_chosen_fc != PCPU_FC_PAGE) { if (pcpu_chosen_fc == PCPU_FC_LPAGE) - ret = setup_pcpu_lpage(static_size, true); + ret = setup_pcpu_lpage(true); else - ret = setup_pcpu_embed(static_size, true); + ret = setup_pcpu_embed(true); if (ret < 0) pr_warning("PERCPU: %s allocator failed (%zd), " @@ -310,15 +307,14 @@ void __init setup_per_cpu_areas(void) pcpu_fc_names[pcpu_chosen_fc], ret); } } else { - ret = setup_pcpu_lpage(static_size, false); + ret = setup_pcpu_lpage(false); if (ret < 0) - ret = setup_pcpu_embed(static_size, false); + ret = setup_pcpu_embed(false); } if (ret < 0) - ret = setup_pcpu_page(static_size); + ret = setup_pcpu_page(); if (ret < 0) - panic("cannot allocate static percpu area (%zu bytes, err=%zd)", - static_size, ret); + panic("cannot initialize percpu area (err=%zd)", ret); pcpu_unit_size = ret; diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 9be05cbe5ee0..be2fc8fb9b6f 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -84,13 +84,12 @@ extern size_t __init pcpu_setup_first_chunk( #ifdef CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK extern ssize_t __init pcpu_embed_first_chunk( - size_t static_size, size_t reserved_size, - ssize_t dyn_size); + size_t reserved_size, ssize_t dyn_size); #endif #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK extern ssize_t __init pcpu_page_first_chunk( - size_t static_size, size_t reserved_size, + size_t reserved_size, pcpu_fc_alloc_fn_t alloc_fn, pcpu_fc_free_fn_t free_fn, pcpu_fc_populate_pte_fn_t populate_pte_fn); @@ -98,16 +97,15 @@ extern ssize_t __init pcpu_page_first_chunk( #ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK extern int __init pcpu_lpage_build_unit_map( - size_t static_size, size_t reserved_size, - ssize_t *dyn_sizep, size_t *unit_sizep, - size_t lpage_size, int *unit_map, + size_t reserved_size, ssize_t *dyn_sizep, + size_t *unit_sizep, size_t lpage_size, + int *unit_map, pcpu_fc_cpu_distance_fn_t cpu_distance_fn); extern ssize_t __init pcpu_lpage_first_chunk( - size_t static_size, size_t reserved_size, - size_t dyn_size, size_t unit_size, - size_t lpage_size, const int *unit_map, - int nr_units, + size_t reserved_size, size_t dyn_size, + size_t unit_size, size_t lpage_size, + const int *unit_map, int nr_units, pcpu_fc_alloc_fn_t alloc_fn, pcpu_fc_free_fn_t free_fn, pcpu_fc_map_fn_t map_fn); diff --git a/mm/percpu.c b/mm/percpu.c index 7fb40bb1555a..e2ac58a39bb2 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -1464,7 +1464,6 @@ static inline size_t pcpu_calc_fc_sizes(size_t static_size, !defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) /** * pcpu_embed_first_chunk - embed the first percpu chunk into bootmem - * @static_size: the size of static percpu area in bytes * @reserved_size: the size of reserved percpu area in bytes * @dyn_size: free size for dynamic allocation in bytes, -1 for auto * @@ -1489,9 +1488,9 @@ static inline size_t pcpu_calc_fc_sizes(size_t static_size, * The determined pcpu_unit_size which can be used to initialize * percpu access on success, -errno on failure. */ -ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size, - ssize_t dyn_size) +ssize_t __init pcpu_embed_first_chunk(size_t reserved_size, ssize_t dyn_size) { + const size_t static_size = __per_cpu_end - __per_cpu_start; size_t size_sum, unit_size, chunk_size; void *base; unsigned int cpu; @@ -1536,7 +1535,6 @@ ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size, #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK /** * pcpu_page_first_chunk - map the first chunk using PAGE_SIZE pages - * @static_size: the size of static percpu area in bytes * @reserved_size: the size of reserved percpu area in bytes * @alloc_fn: function to allocate percpu page, always called with PAGE_SIZE * @free_fn: funtion to free percpu page, always called with PAGE_SIZE @@ -1552,12 +1550,13 @@ ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size, * The determined pcpu_unit_size which can be used to initialize * percpu access on success, -errno on failure. */ -ssize_t __init pcpu_page_first_chunk(size_t static_size, size_t reserved_size, +ssize_t __init pcpu_page_first_chunk(size_t reserved_size, pcpu_fc_alloc_fn_t alloc_fn, pcpu_fc_free_fn_t free_fn, pcpu_fc_populate_pte_fn_t populate_pte_fn) { static struct vm_struct vm; + const size_t static_size = __per_cpu_end - __per_cpu_start; char psize_str[16]; int unit_pages; size_t pages_size; @@ -1641,7 +1640,6 @@ out_free_ar: #ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK /** * pcpu_lpage_build_unit_map - build unit_map for large page remapping - * @static_size: the size of static percpu area in bytes * @reserved_size: the size of reserved percpu area in bytes * @dyn_sizep: in/out parameter for dynamic size, -1 for auto * @unit_sizep: out parameter for unit size @@ -1661,13 +1659,14 @@ out_free_ar: * On success, fills in @unit_map, sets *@dyn_sizep, *@unit_sizep and * returns the number of units to be allocated. -errno on failure. */ -int __init pcpu_lpage_build_unit_map(size_t static_size, size_t reserved_size, - ssize_t *dyn_sizep, size_t *unit_sizep, - size_t lpage_size, int *unit_map, +int __init pcpu_lpage_build_unit_map(size_t reserved_size, ssize_t *dyn_sizep, + size_t *unit_sizep, size_t lpage_size, + int *unit_map, pcpu_fc_cpu_distance_fn_t cpu_distance_fn) { static int group_map[NR_CPUS] __initdata; static int group_cnt[NR_CPUS] __initdata; + const size_t static_size = __per_cpu_end - __per_cpu_start; int group_cnt_max = 0; size_t size_sum, min_unit_size, alloc_size; int upa, max_upa, uninitialized_var(best_upa); /* units_per_alloc */ @@ -1819,7 +1818,6 @@ static void __init pcpul_lpage_dump_cfg(const char *lvl, size_t static_size, /** * pcpu_lpage_first_chunk - remap the first percpu chunk using large page - * @static_size: the size of static percpu area in bytes * @reserved_size: the size of reserved percpu area in bytes * @dyn_size: free size for dynamic allocation in bytes * @unit_size: unit size in bytes @@ -1850,15 +1848,15 @@ static void __init pcpul_lpage_dump_cfg(const char *lvl, size_t static_size, * The determined pcpu_unit_size which can be used to initialize * percpu access on success, -errno on failure. */ -ssize_t __init pcpu_lpage_first_chunk(size_t static_size, size_t reserved_size, - size_t dyn_size, size_t unit_size, - size_t lpage_size, const int *unit_map, - int nr_units, +ssize_t __init pcpu_lpage_first_chunk(size_t reserved_size, size_t dyn_size, + size_t unit_size, size_t lpage_size, + const int *unit_map, int nr_units, pcpu_fc_alloc_fn_t alloc_fn, pcpu_fc_free_fn_t free_fn, pcpu_fc_map_fn_t map_fn) { static struct vm_struct vm; + const size_t static_size = __per_cpu_end - __per_cpu_start; size_t chunk_size = unit_size * nr_units; size_t map_size; unsigned int cpu; @@ -2037,7 +2035,6 @@ EXPORT_SYMBOL(__per_cpu_offset); void __init setup_per_cpu_areas(void) { - size_t static_size = __per_cpu_end - __per_cpu_start; ssize_t unit_size; unsigned long delta; unsigned int cpu; @@ -2046,7 +2043,7 @@ void __init setup_per_cpu_areas(void) * Always reserve area for module percpu variables. That's * what the legacy allocator did. */ - unit_size = pcpu_embed_first_chunk(static_size, PERCPU_MODULE_RESERVE, + unit_size = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE, PERCPU_DYNAMIC_RESERVE); if (unit_size < 0) panic("Failed to initialized percpu areas."); -- cgit v1.2.3 From 1d9d32572163b30be81dbe1409dfa7ea9763d0e8 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 14 Aug 2009 15:00:50 +0900 Subject: percpu: make @dyn_size mandatory for pcpu_setup_first_chunk() Now that all actual first chunk allocation and copying happen in the first chunk allocators and helpers, there's no reason for pcpu_setup_first_chunk() to try to determine @dyn_size automatically. The only left user is page first chunk allocator. Make it determine dyn_size like other allocators and make @dyn_size mandatory for pcpu_setup_first_chunk(). Signed-off-by: Tejun Heo --- include/linux/percpu.h | 2 +- mm/percpu.c | 39 +++++++++++++++++++-------------------- 2 files changed, 20 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index be2fc8fb9b6f..0cfdd14d096a 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -79,7 +79,7 @@ typedef void (*pcpu_fc_map_fn_t)(void *ptr, size_t size, void *addr); extern size_t __init pcpu_setup_first_chunk( size_t static_size, size_t reserved_size, - ssize_t dyn_size, size_t unit_size, + size_t dyn_size, size_t unit_size, void *base_addr, const int *unit_map); #ifdef CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK diff --git a/mm/percpu.c b/mm/percpu.c index e2ac58a39bb2..287f59cc5fb9 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -1235,7 +1235,7 @@ EXPORT_SYMBOL_GPL(free_percpu); * pcpu_setup_first_chunk - initialize the first percpu chunk * @static_size: the size of static percpu area in bytes * @reserved_size: the size of reserved percpu area in bytes, 0 for none - * @dyn_size: free size for dynamic allocation in bytes, -1 for auto + * @dyn_size: free size for dynamic allocation in bytes * @unit_size: unit size in bytes, must be multiple of PAGE_SIZE * @base_addr: mapped address * @unit_map: cpu -> unit map, NULL for sequential mapping @@ -1252,10 +1252,9 @@ EXPORT_SYMBOL_GPL(free_percpu); * limited offset range for symbol relocations to guarantee module * percpu symbols fall inside the relocatable range. * - * @dyn_size, if non-negative, determines the number of bytes - * available for dynamic allocation in the first chunk. Specifying - * non-negative value makes percpu leave alone the area beyond - * @static_size + @reserved_size + @dyn_size. + * @dyn_size determines the number of bytes available for dynamic + * allocation in the first chunk. The area between @static_size + + * @reserved_size + @dyn_size and @unit_size is unused. * * @unit_size specifies unit size and must be aligned to PAGE_SIZE and * equal to or larger than @static_size + @reserved_size + if @@ -1276,13 +1275,12 @@ EXPORT_SYMBOL_GPL(free_percpu); * percpu access. */ size_t __init pcpu_setup_first_chunk(size_t static_size, size_t reserved_size, - ssize_t dyn_size, size_t unit_size, + size_t dyn_size, size_t unit_size, void *base_addr, const int *unit_map) { static struct vm_struct first_vm; static int smap[2], dmap[2]; - size_t size_sum = static_size + reserved_size + - (dyn_size >= 0 ? dyn_size : 0); + size_t size_sum = static_size + reserved_size + dyn_size; struct pcpu_chunk *schunk, *dchunk = NULL; unsigned int cpu, tcpu; int i; @@ -1345,9 +1343,6 @@ size_t __init pcpu_setup_first_chunk(size_t static_size, size_t reserved_size, pcpu_chunk_struct_size = sizeof(struct pcpu_chunk) + BITS_TO_LONGS(pcpu_unit_pages) * sizeof(unsigned long); - if (dyn_size < 0) - dyn_size = pcpu_unit_size - static_size - reserved_size; - first_vm.flags = VM_ALLOC; first_vm.size = pcpu_chunk_size; first_vm.addr = base_addr; @@ -1557,6 +1552,8 @@ ssize_t __init pcpu_page_first_chunk(size_t reserved_size, { static struct vm_struct vm; const size_t static_size = __per_cpu_end - __per_cpu_start; + ssize_t dyn_size = -1; + size_t size_sum, unit_size; char psize_str[16]; int unit_pages; size_t pages_size; @@ -1567,8 +1564,9 @@ ssize_t __init pcpu_page_first_chunk(size_t reserved_size, snprintf(psize_str, sizeof(psize_str), "%luK", PAGE_SIZE >> 10); - unit_pages = PFN_UP(max_t(size_t, static_size + reserved_size, - PCPU_MIN_UNIT_SIZE)); + size_sum = pcpu_calc_fc_sizes(static_size, reserved_size, &dyn_size); + unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE); + unit_pages = unit_size >> PAGE_SHIFT; /* unaligned allocations can't be freed, round up to page size */ pages_size = PFN_ALIGN(unit_pages * nr_cpu_ids * sizeof(pages[0])); @@ -1591,12 +1589,12 @@ ssize_t __init pcpu_page_first_chunk(size_t reserved_size, /* allocate vm area, map the pages and copy static data */ vm.flags = VM_ALLOC; - vm.size = nr_cpu_ids * unit_pages << PAGE_SHIFT; + vm.size = nr_cpu_ids * unit_size; vm_area_register_early(&vm, PAGE_SIZE); for_each_possible_cpu(cpu) { - unsigned long unit_addr = (unsigned long)vm.addr + - (cpu * unit_pages << PAGE_SHIFT); + unsigned long unit_addr = + (unsigned long)vm.addr + cpu * unit_size; for (i = 0; i < unit_pages; i++) populate_pte_fn(unit_addr + (i << PAGE_SHIFT)); @@ -1620,11 +1618,12 @@ ssize_t __init pcpu_page_first_chunk(size_t reserved_size, } /* we're ready, commit */ - pr_info("PERCPU: %d %s pages/cpu @%p s%zu r%zu\n", - unit_pages, psize_str, vm.addr, static_size, reserved_size); + pr_info("PERCPU: %d %s pages/cpu @%p s%zu r%zu d%zu\n", + unit_pages, psize_str, vm.addr, static_size, reserved_size, + dyn_size); - ret = pcpu_setup_first_chunk(static_size, reserved_size, -1, - unit_pages << PAGE_SHIFT, vm.addr, NULL); + ret = pcpu_setup_first_chunk(static_size, reserved_size, dyn_size, + unit_size, vm.addr, NULL); goto out_free_ar; enomem: -- cgit v1.2.3 From 3cbc85652767c38b252c8de55f9fd180b29e4c0d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 14 Aug 2009 15:00:50 +0900 Subject: percpu: add @align to pcpu_fc_alloc_fn_t pcpu_fc_alloc_fn_t is about to see more interesting usage, add @align parameter. Signed-off-by: Tejun Heo --- arch/x86/kernel/setup_percpu.c | 4 ++-- include/linux/percpu.h | 3 ++- mm/percpu.c | 4 ++-- 3 files changed, 6 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 8aad486c688f..660cde133141 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -126,9 +126,9 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size, /* * Helpers for first chunk memory allocation */ -static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size) +static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align) { - return pcpu_alloc_bootmem(cpu, size, size); + return pcpu_alloc_bootmem(cpu, size, align); } static void __init pcpu_fc_free(void *ptr, size_t size) diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 0cfdd14d096a..d385dbcf190b 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -71,7 +71,8 @@ extern const char *pcpu_fc_names[PCPU_FC_NR]; extern enum pcpu_fc pcpu_chosen_fc; -typedef void * (*pcpu_fc_alloc_fn_t)(unsigned int cpu, size_t size); +typedef void * (*pcpu_fc_alloc_fn_t)(unsigned int cpu, size_t size, + size_t align); typedef void (*pcpu_fc_free_fn_t)(void *ptr, size_t size); typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr); typedef int (pcpu_fc_cpu_distance_fn_t)(unsigned int from, unsigned int to); diff --git a/mm/percpu.c b/mm/percpu.c index 287f59cc5fb9..3316e3aac7ee 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -1578,7 +1578,7 @@ ssize_t __init pcpu_page_first_chunk(size_t reserved_size, for (i = 0; i < unit_pages; i++) { void *ptr; - ptr = alloc_fn(cpu, PAGE_SIZE); + ptr = alloc_fn(cpu, PAGE_SIZE, PAGE_SIZE); if (!ptr) { pr_warning("PERCPU: failed to allocate %s page " "for cpu%u\n", psize_str, cpu); @@ -1888,7 +1888,7 @@ ssize_t __init pcpu_lpage_first_chunk(size_t reserved_size, size_t dyn_size, goto found; continue; found: - ptr = alloc_fn(cpu, lpage_size); + ptr = alloc_fn(cpu, lpage_size, lpage_size); if (!ptr) { pr_warning("PERCPU: failed to allocate large page " "for cpu%u\n", cpu); -- cgit v1.2.3 From 033e48fb82958053113178264ddb9d5038d5e38b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 14 Aug 2009 15:00:51 +0900 Subject: percpu: move pcpu_lpage_build_unit_map() and pcpul_lpage_dump_cfg() upward Unit map handling will be generalized and extended and used for embedding sparse first chunk and other purposes. Relocate two unit_map related functions upward in preparation. This patch just moves the code without any actual change. Signed-off-by: Tejun Heo --- include/linux/percpu.h | 14 +- mm/percpu.c | 339 +++++++++++++++++++++++++------------------------ 2 files changed, 180 insertions(+), 173 deletions(-) (limited to 'include/linux') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index d385dbcf190b..570fb18de2ba 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -78,6 +78,14 @@ typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr); typedef int (pcpu_fc_cpu_distance_fn_t)(unsigned int from, unsigned int to); typedef void (*pcpu_fc_map_fn_t)(void *ptr, size_t size, void *addr); +#ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK +extern int __init pcpu_lpage_build_unit_map( + size_t reserved_size, ssize_t *dyn_sizep, + size_t *unit_sizep, size_t lpage_size, + int *unit_map, + pcpu_fc_cpu_distance_fn_t cpu_distance_fn); +#endif + extern size_t __init pcpu_setup_first_chunk( size_t static_size, size_t reserved_size, size_t dyn_size, size_t unit_size, @@ -97,12 +105,6 @@ extern ssize_t __init pcpu_page_first_chunk( #endif #ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK -extern int __init pcpu_lpage_build_unit_map( - size_t reserved_size, ssize_t *dyn_sizep, - size_t *unit_sizep, size_t lpage_size, - int *unit_map, - pcpu_fc_cpu_distance_fn_t cpu_distance_fn); - extern ssize_t __init pcpu_lpage_first_chunk( size_t reserved_size, size_t dyn_size, size_t unit_size, size_t lpage_size, diff --git a/mm/percpu.c b/mm/percpu.c index 3316e3aac7ee..2b9c4b2a2fc0 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -1231,6 +1231,178 @@ void free_percpu(void *ptr) } EXPORT_SYMBOL_GPL(free_percpu); +static inline size_t pcpu_calc_fc_sizes(size_t static_size, + size_t reserved_size, + ssize_t *dyn_sizep) +{ + size_t size_sum; + + size_sum = PFN_ALIGN(static_size + reserved_size + + (*dyn_sizep >= 0 ? *dyn_sizep : 0)); + if (*dyn_sizep != 0) + *dyn_sizep = size_sum - static_size - reserved_size; + + return size_sum; +} + +#ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK +/** + * pcpu_lpage_build_unit_map - build unit_map for large page remapping + * @reserved_size: the size of reserved percpu area in bytes + * @dyn_sizep: in/out parameter for dynamic size, -1 for auto + * @unit_sizep: out parameter for unit size + * @unit_map: unit_map to be filled + * @cpu_distance_fn: callback to determine distance between cpus + * + * This function builds cpu -> unit map and determine other parameters + * considering needed percpu size, large page size and distances + * between CPUs in NUMA. + * + * CPUs which are of LOCAL_DISTANCE both ways are grouped together and + * may share units in the same large page. The returned configuration + * is guaranteed to have CPUs on different nodes on different large + * pages and >=75% usage of allocated virtual address space. + * + * RETURNS: + * On success, fills in @unit_map, sets *@dyn_sizep, *@unit_sizep and + * returns the number of units to be allocated. -errno on failure. + */ +int __init pcpu_lpage_build_unit_map(size_t reserved_size, ssize_t *dyn_sizep, + size_t *unit_sizep, size_t lpage_size, + int *unit_map, + pcpu_fc_cpu_distance_fn_t cpu_distance_fn) +{ + static int group_map[NR_CPUS] __initdata; + static int group_cnt[NR_CPUS] __initdata; + const size_t static_size = __per_cpu_end - __per_cpu_start; + int group_cnt_max = 0; + size_t size_sum, min_unit_size, alloc_size; + int upa, max_upa, uninitialized_var(best_upa); /* units_per_alloc */ + int last_allocs; + unsigned int cpu, tcpu; + int group, unit; + + /* + * Determine min_unit_size, alloc_size and max_upa such that + * alloc_size is multiple of lpage_size and is the smallest + * which can accomodate 4k aligned segments which are equal to + * or larger than min_unit_size. + */ + size_sum = pcpu_calc_fc_sizes(static_size, reserved_size, dyn_sizep); + min_unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE); + + alloc_size = roundup(min_unit_size, lpage_size); + upa = alloc_size / min_unit_size; + while (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK)) + upa--; + max_upa = upa; + + /* group cpus according to their proximity */ + for_each_possible_cpu(cpu) { + group = 0; + next_group: + for_each_possible_cpu(tcpu) { + if (cpu == tcpu) + break; + if (group_map[tcpu] == group && + (cpu_distance_fn(cpu, tcpu) > LOCAL_DISTANCE || + cpu_distance_fn(tcpu, cpu) > LOCAL_DISTANCE)) { + group++; + goto next_group; + } + } + group_map[cpu] = group; + group_cnt[group]++; + group_cnt_max = max(group_cnt_max, group_cnt[group]); + } + + /* + * Expand unit size until address space usage goes over 75% + * and then as much as possible without using more address + * space. + */ + last_allocs = INT_MAX; + for (upa = max_upa; upa; upa--) { + int allocs = 0, wasted = 0; + + if (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK)) + continue; + + for (group = 0; group_cnt[group]; group++) { + int this_allocs = DIV_ROUND_UP(group_cnt[group], upa); + allocs += this_allocs; + wasted += this_allocs * upa - group_cnt[group]; + } + + /* + * Don't accept if wastage is over 25%. The + * greater-than comparison ensures upa==1 always + * passes the following check. + */ + if (wasted > num_possible_cpus() / 3) + continue; + + /* and then don't consume more memory */ + if (allocs > last_allocs) + break; + last_allocs = allocs; + best_upa = upa; + } + *unit_sizep = alloc_size / best_upa; + + /* assign units to cpus accordingly */ + unit = 0; + for (group = 0; group_cnt[group]; group++) { + for_each_possible_cpu(cpu) + if (group_map[cpu] == group) + unit_map[cpu] = unit++; + unit = roundup(unit, best_upa); + } + + return unit; /* unit contains aligned number of units */ +} + +static bool __init pcpul_unit_to_cpu(int unit, const int *unit_map, + unsigned int *cpup); + +static void __init pcpul_lpage_dump_cfg(const char *lvl, size_t static_size, + size_t reserved_size, size_t dyn_size, + size_t unit_size, size_t lpage_size, + const int *unit_map, int nr_units) +{ + int width = 1, v = nr_units; + char empty_str[] = "--------"; + int upl, lpl; /* units per lpage, lpage per line */ + unsigned int cpu; + int lpage, unit; + + while (v /= 10) + width++; + empty_str[min_t(int, width, sizeof(empty_str) - 1)] = '\0'; + + upl = max_t(int, lpage_size / unit_size, 1); + lpl = rounddown_pow_of_two(max_t(int, 60 / (upl * (width + 1) + 2), 1)); + + printk("%spcpu-lpage: sta/res/dyn=%zu/%zu/%zu unit=%zu lpage=%zu", lvl, + static_size, reserved_size, dyn_size, unit_size, lpage_size); + + for (lpage = 0, unit = 0; unit < nr_units; unit++) { + if (!(unit % upl)) { + if (!(lpage++ % lpl)) { + printk("\n"); + printk("%spcpu-lpage: ", lvl); + } else + printk("| "); + } + if (pcpul_unit_to_cpu(unit, unit_map, &cpu)) + printk("%0*d ", width, cpu); + else + printk("%s ", empty_str); + } + printk("\n"); +} +#endif + /** * pcpu_setup_first_chunk - initialize the first percpu chunk * @static_size: the size of static percpu area in bytes @@ -1441,20 +1613,6 @@ static int __init percpu_alloc_setup(char *str) } early_param("percpu_alloc", percpu_alloc_setup); -static inline size_t pcpu_calc_fc_sizes(size_t static_size, - size_t reserved_size, - ssize_t *dyn_sizep) -{ - size_t size_sum; - - size_sum = PFN_ALIGN(static_size + reserved_size + - (*dyn_sizep >= 0 ? *dyn_sizep : 0)); - if (*dyn_sizep != 0) - *dyn_sizep = size_sum - static_size - reserved_size; - - return size_sum; -} - #if defined(CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK) || \ !defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) /** @@ -1637,122 +1795,6 @@ out_free_ar: #endif /* CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK */ #ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK -/** - * pcpu_lpage_build_unit_map - build unit_map for large page remapping - * @reserved_size: the size of reserved percpu area in bytes - * @dyn_sizep: in/out parameter for dynamic size, -1 for auto - * @unit_sizep: out parameter for unit size - * @unit_map: unit_map to be filled - * @cpu_distance_fn: callback to determine distance between cpus - * - * This function builds cpu -> unit map and determine other parameters - * considering needed percpu size, large page size and distances - * between CPUs in NUMA. - * - * CPUs which are of LOCAL_DISTANCE both ways are grouped together and - * may share units in the same large page. The returned configuration - * is guaranteed to have CPUs on different nodes on different large - * pages and >=75% usage of allocated virtual address space. - * - * RETURNS: - * On success, fills in @unit_map, sets *@dyn_sizep, *@unit_sizep and - * returns the number of units to be allocated. -errno on failure. - */ -int __init pcpu_lpage_build_unit_map(size_t reserved_size, ssize_t *dyn_sizep, - size_t *unit_sizep, size_t lpage_size, - int *unit_map, - pcpu_fc_cpu_distance_fn_t cpu_distance_fn) -{ - static int group_map[NR_CPUS] __initdata; - static int group_cnt[NR_CPUS] __initdata; - const size_t static_size = __per_cpu_end - __per_cpu_start; - int group_cnt_max = 0; - size_t size_sum, min_unit_size, alloc_size; - int upa, max_upa, uninitialized_var(best_upa); /* units_per_alloc */ - int last_allocs; - unsigned int cpu, tcpu; - int group, unit; - - /* - * Determine min_unit_size, alloc_size and max_upa such that - * alloc_size is multiple of lpage_size and is the smallest - * which can accomodate 4k aligned segments which are equal to - * or larger than min_unit_size. - */ - size_sum = pcpu_calc_fc_sizes(static_size, reserved_size, dyn_sizep); - min_unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE); - - alloc_size = roundup(min_unit_size, lpage_size); - upa = alloc_size / min_unit_size; - while (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK)) - upa--; - max_upa = upa; - - /* group cpus according to their proximity */ - for_each_possible_cpu(cpu) { - group = 0; - next_group: - for_each_possible_cpu(tcpu) { - if (cpu == tcpu) - break; - if (group_map[tcpu] == group && - (cpu_distance_fn(cpu, tcpu) > LOCAL_DISTANCE || - cpu_distance_fn(tcpu, cpu) > LOCAL_DISTANCE)) { - group++; - goto next_group; - } - } - group_map[cpu] = group; - group_cnt[group]++; - group_cnt_max = max(group_cnt_max, group_cnt[group]); - } - - /* - * Expand unit size until address space usage goes over 75% - * and then as much as possible without using more address - * space. - */ - last_allocs = INT_MAX; - for (upa = max_upa; upa; upa--) { - int allocs = 0, wasted = 0; - - if (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK)) - continue; - - for (group = 0; group_cnt[group]; group++) { - int this_allocs = DIV_ROUND_UP(group_cnt[group], upa); - allocs += this_allocs; - wasted += this_allocs * upa - group_cnt[group]; - } - - /* - * Don't accept if wastage is over 25%. The - * greater-than comparison ensures upa==1 always - * passes the following check. - */ - if (wasted > num_possible_cpus() / 3) - continue; - - /* and then don't consume more memory */ - if (allocs > last_allocs) - break; - last_allocs = allocs; - best_upa = upa; - } - *unit_sizep = alloc_size / best_upa; - - /* assign units to cpus accordingly */ - unit = 0; - for (group = 0; group_cnt[group]; group++) { - for_each_possible_cpu(cpu) - if (group_map[cpu] == group) - unit_map[cpu] = unit++; - unit = roundup(unit, best_upa); - } - - return unit; /* unit contains aligned number of units */ -} - struct pcpul_ent { void *ptr; void *map_addr; @@ -1778,43 +1820,6 @@ static bool __init pcpul_unit_to_cpu(int unit, const int *unit_map, return false; } -static void __init pcpul_lpage_dump_cfg(const char *lvl, size_t static_size, - size_t reserved_size, size_t dyn_size, - size_t unit_size, size_t lpage_size, - const int *unit_map, int nr_units) -{ - int width = 1, v = nr_units; - char empty_str[] = "--------"; - int upl, lpl; /* units per lpage, lpage per line */ - unsigned int cpu; - int lpage, unit; - - while (v /= 10) - width++; - empty_str[min_t(int, width, sizeof(empty_str) - 1)] = '\0'; - - upl = max_t(int, lpage_size / unit_size, 1); - lpl = rounddown_pow_of_two(max_t(int, 60 / (upl * (width + 1) + 2), 1)); - - printk("%spcpu-lpage: sta/res/dyn=%zu/%zu/%zu unit=%zu lpage=%zu", lvl, - static_size, reserved_size, dyn_size, unit_size, lpage_size); - - for (lpage = 0, unit = 0; unit < nr_units; unit++) { - if (!(unit % upl)) { - if (!(lpage++ % lpl)) { - printk("\n"); - printk("%spcpu-lpage: ", lvl); - } else - printk("| "); - } - if (pcpul_unit_to_cpu(unit, unit_map, &cpu)) - printk("%0*d ", width, cpu); - else - printk("%s ", empty_str); - } - printk("\n"); -} - /** * pcpu_lpage_first_chunk - remap the first percpu chunk using large page * @reserved_size: the size of reserved percpu area in bytes -- cgit v1.2.3 From fd1e8a1fe2b54df6c185b4fa65f181f50b9c4d4e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 14 Aug 2009 15:00:51 +0900 Subject: percpu: introduce pcpu_alloc_info and pcpu_group_info Till now, non-linear cpu->unit map was expressed using an integer array which maps each cpu to a unit and used only by lpage allocator. Although how many units have been placed in a single contiguos area (group) is known while building unit_map, the information is lost when the result is recorded into the unit_map array. For lpage allocator, as all allocations are done by lpages and whether two adjacent lpages are in the same group or not is irrelevant, this didn't cause any problem. Non-linear cpu->unit mapping will be used for sparse embedding and this grouping information is necessary for that. This patch introduces pcpu_alloc_info which contains all the information necessary for initializing percpu allocator. pcpu_alloc_info contains array of pcpu_group_info which describes how units are grouped and mapped to cpus. pcpu_group_info also has base_offset field to specify its offset from the chunk's base address. pcpu_build_alloc_info() initializes this field as if all groups are allocated back-to-back as is currently done but this will be used to sparsely place groups. pcpu_alloc_info is a rather complex data structure which contains a flexible array which in turn points to nested cpu_map arrays. * pcpu_alloc_alloc_info() and pcpu_free_alloc_info() are provided to help dealing with pcpu_alloc_info. * pcpu_lpage_build_unit_map() is updated to build pcpu_alloc_info, generalized and renamed to pcpu_build_alloc_info(). @cpu_distance_fn may be NULL indicating that all cpus are of LOCAL_DISTANCE. * pcpul_lpage_dump_cfg() is updated to process pcpu_alloc_info, generalized and renamed to pcpu_dump_alloc_info(). It now also prints which group each alloc unit belongs to. * pcpu_setup_first_chunk() now takes pcpu_alloc_info instead of the separate parameters. All first chunk allocators are updated to use pcpu_build_alloc_info() to build alloc_info and call pcpu_setup_first_chunk() with it. This has the side effect of packing units for sparse possible cpus. ie. if cpus 0, 2 and 4 are possible, they'll be assigned unit 0, 1 and 2 instead of 0, 2 and 4. * x86 setup_pcpu_lpage() is updated to deal with alloc_info. * sparc64 setup_per_cpu_areas() is updated to build alloc_info. Although the changes made by this patch are pretty pervasive, it doesn't cause any behavior difference other than packing of sparse cpus. It mostly changes how information is passed among initialization functions and makes room for more flexibility. Signed-off-by: Tejun Heo Cc: Ingo Molnar Cc: David Miller --- arch/sparc/kernel/smp_64.c | 24 +- arch/x86/kernel/setup_percpu.c | 38 ++- include/linux/percpu.h | 42 +++- mm/percpu.c | 529 +++++++++++++++++++++++++---------------- 4 files changed, 389 insertions(+), 244 deletions(-) (limited to 'include/linux') diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index 9856d866b77b..a42a4a744d14 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -1475,17 +1475,29 @@ static void __init pcpu_map_range(unsigned long start, unsigned long end, void __init setup_per_cpu_areas(void) { - size_t dyn_size, static_size = __per_cpu_end - __per_cpu_start; static struct vm_struct vm; + struct pcpu_alloc_info *ai; unsigned long delta, cpu; size_t size_sum, pcpu_unit_size; size_t ptrs_size; void **ptrs; - size_sum = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE + + ai = pcpu_alloc_alloc_info(1, nr_cpu_ids); + + ai->static_size = __per_cpu_end - __per_cpu_start; + ai->reserved_size = PERCPU_MODULE_RESERVE; + + size_sum = PFN_ALIGN(ai->static_size + ai->reserved_size + PERCPU_DYNAMIC_RESERVE); - dyn_size = size_sum - static_size - PERCPU_MODULE_RESERVE; + ai->dyn_size = size_sum - ai->static_size - ai->reserved_size; + ai->unit_size = PCPU_CHUNK_SIZE; + ai->atom_size = PCPU_CHUNK_SIZE; + ai->alloc_size = PCPU_CHUNK_SIZE; + ai->groups[0].nr_units = nr_cpu_ids; + + for_each_possible_cpu(cpu) + ai->groups[0].cpu_map[cpu] = cpu; ptrs_size = PFN_ALIGN(nr_cpu_ids * sizeof(ptrs[0])); ptrs = alloc_bootmem(ptrs_size); @@ -1497,7 +1509,7 @@ void __init setup_per_cpu_areas(void) free_bootmem(__pa(ptrs[cpu] + size_sum), PCPU_CHUNK_SIZE - size_sum); - memcpy(ptrs[cpu], __per_cpu_load, static_size); + memcpy(ptrs[cpu], __per_cpu_load, ai->static_size); } /* allocate address and map */ @@ -1514,9 +1526,7 @@ void __init setup_per_cpu_areas(void) pcpu_map_range(start, end, virt_to_page(ptrs[cpu])); } - pcpu_unit_size = pcpu_setup_first_chunk(static_size, - PERCPU_MODULE_RESERVE, dyn_size, - PCPU_CHUNK_SIZE, vm.addr, NULL); + pcpu_unit_size = pcpu_setup_first_chunk(ai, vm.addr); free_bootmem(__pa(ptrs), ptrs_size); diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 660cde133141..db5f9c49fec5 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -161,9 +161,7 @@ static ssize_t __init setup_pcpu_lpage(bool chosen) { size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE; size_t dyn_size = reserve - PERCPU_FIRST_CHUNK_RESERVE; - size_t unit_map_size, unit_size; - int *unit_map; - int nr_units; + struct pcpu_alloc_info *ai; ssize_t ret; /* on non-NUMA, embedding is better */ @@ -177,26 +175,22 @@ static ssize_t __init setup_pcpu_lpage(bool chosen) } /* allocate and build unit_map */ - unit_map_size = nr_cpu_ids * sizeof(int); - unit_map = alloc_bootmem_nopanic(unit_map_size); - if (!unit_map) { - pr_warning("PERCPU: failed to allocate unit_map\n"); - return -ENOMEM; + ai = pcpu_build_alloc_info(PERCPU_FIRST_CHUNK_RESERVE, dyn_size, + PMD_SIZE, pcpu_lpage_cpu_distance); + if (IS_ERR(ai)) { + pr_warning("PERCPU: failed to build unit_map (%ld)\n", + PTR_ERR(ai)); + return PTR_ERR(ai); } - ret = pcpu_lpage_build_unit_map(PERCPU_FIRST_CHUNK_RESERVE, - &dyn_size, &unit_size, PMD_SIZE, - unit_map, pcpu_lpage_cpu_distance); - if (ret < 0) { - pr_warning("PERCPU: failed to build unit_map\n"); - goto out_free; - } - nr_units = ret; - /* do the parameters look okay? */ if (!chosen) { size_t vm_size = VMALLOC_END - VMALLOC_START; - size_t tot_size = nr_units * unit_size; + size_t tot_size = 0; + int group; + + for (group = 0; group < ai->nr_groups; group++) + tot_size += ai->unit_size * ai->groups[group].nr_units; /* don't consume more than 20% of vmalloc area */ if (tot_size > vm_size / 5) { @@ -207,12 +201,10 @@ static ssize_t __init setup_pcpu_lpage(bool chosen) } } - ret = pcpu_lpage_first_chunk(PERCPU_FIRST_CHUNK_RESERVE, dyn_size, - unit_size, PMD_SIZE, unit_map, nr_units, - pcpu_fc_alloc, pcpu_fc_free, pcpul_map); + ret = pcpu_lpage_first_chunk(ai, pcpu_fc_alloc, pcpu_fc_free, + pcpul_map); out_free: - if (ret < 0) - free_bootmem(__pa(unit_map), unit_map_size); + pcpu_free_alloc_info(ai); return ret; } #else diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 570fb18de2ba..77b86be8ce4f 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -59,6 +59,25 @@ extern void *pcpu_base_addr; extern const int *pcpu_unit_map; +struct pcpu_group_info { + int nr_units; /* aligned # of units */ + unsigned long base_offset; /* base address offset */ + unsigned int *cpu_map; /* unit->cpu map, empty + * entries contain NR_CPUS */ +}; + +struct pcpu_alloc_info { + size_t static_size; + size_t reserved_size; + size_t dyn_size; + size_t unit_size; + size_t atom_size; + size_t alloc_size; + size_t __ai_size; /* internal, don't use */ + int nr_groups; /* 0 if grouping unnecessary */ + struct pcpu_group_info groups[]; +}; + enum pcpu_fc { PCPU_FC_AUTO, PCPU_FC_EMBED, @@ -78,18 +97,17 @@ typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr); typedef int (pcpu_fc_cpu_distance_fn_t)(unsigned int from, unsigned int to); typedef void (*pcpu_fc_map_fn_t)(void *ptr, size_t size, void *addr); -#ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK -extern int __init pcpu_lpage_build_unit_map( - size_t reserved_size, ssize_t *dyn_sizep, - size_t *unit_sizep, size_t lpage_size, - int *unit_map, +extern struct pcpu_alloc_info * __init pcpu_alloc_alloc_info(int nr_groups, + int nr_units); +extern void __init pcpu_free_alloc_info(struct pcpu_alloc_info *ai); + +extern struct pcpu_alloc_info * __init pcpu_build_alloc_info( + size_t reserved_size, ssize_t dyn_size, + size_t atom_size, pcpu_fc_cpu_distance_fn_t cpu_distance_fn); -#endif -extern size_t __init pcpu_setup_first_chunk( - size_t static_size, size_t reserved_size, - size_t dyn_size, size_t unit_size, - void *base_addr, const int *unit_map); +extern size_t __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, + void *base_addr); #ifdef CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK extern ssize_t __init pcpu_embed_first_chunk( @@ -106,9 +124,7 @@ extern ssize_t __init pcpu_page_first_chunk( #ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK extern ssize_t __init pcpu_lpage_first_chunk( - size_t reserved_size, size_t dyn_size, - size_t unit_size, size_t lpage_size, - const int *unit_map, int nr_units, + const struct pcpu_alloc_info *ai, pcpu_fc_alloc_fn_t alloc_fn, pcpu_fc_free_fn_t free_fn, pcpu_fc_map_fn_t map_fn); diff --git a/mm/percpu.c b/mm/percpu.c index 2b9c4b2a2fc0..99f7fa682722 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -58,6 +58,7 @@ #include #include +#include #include #include #include @@ -1245,53 +1246,108 @@ static inline size_t pcpu_calc_fc_sizes(size_t static_size, return size_sum; } -#ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK /** - * pcpu_lpage_build_unit_map - build unit_map for large page remapping + * pcpu_alloc_alloc_info - allocate percpu allocation info + * @nr_groups: the number of groups + * @nr_units: the number of units + * + * Allocate ai which is large enough for @nr_groups groups containing + * @nr_units units. The returned ai's groups[0].cpu_map points to the + * cpu_map array which is long enough for @nr_units and filled with + * NR_CPUS. It's the caller's responsibility to initialize cpu_map + * pointer of other groups. + * + * RETURNS: + * Pointer to the allocated pcpu_alloc_info on success, NULL on + * failure. + */ +struct pcpu_alloc_info * __init pcpu_alloc_alloc_info(int nr_groups, + int nr_units) +{ + struct pcpu_alloc_info *ai; + size_t base_size, ai_size; + void *ptr; + int unit; + + base_size = ALIGN(sizeof(*ai) + nr_groups * sizeof(ai->groups[0]), + __alignof__(ai->groups[0].cpu_map[0])); + ai_size = base_size + nr_units * sizeof(ai->groups[0].cpu_map[0]); + + ptr = alloc_bootmem_nopanic(PFN_ALIGN(ai_size)); + if (!ptr) + return NULL; + ai = ptr; + ptr += base_size; + + ai->groups[0].cpu_map = ptr; + + for (unit = 0; unit < nr_units; unit++) + ai->groups[0].cpu_map[unit] = NR_CPUS; + + ai->nr_groups = nr_groups; + ai->__ai_size = PFN_ALIGN(ai_size); + + return ai; +} + +/** + * pcpu_free_alloc_info - free percpu allocation info + * @ai: pcpu_alloc_info to free + * + * Free @ai which was allocated by pcpu_alloc_alloc_info(). + */ +void __init pcpu_free_alloc_info(struct pcpu_alloc_info *ai) +{ + free_bootmem(__pa(ai), ai->__ai_size); +} + +/** + * pcpu_build_alloc_info - build alloc_info considering distances between CPUs * @reserved_size: the size of reserved percpu area in bytes - * @dyn_sizep: in/out parameter for dynamic size, -1 for auto - * @unit_sizep: out parameter for unit size - * @unit_map: unit_map to be filled - * @cpu_distance_fn: callback to determine distance between cpus + * @dyn_size: free size for dynamic allocation in bytes, -1 for auto + * @atom_size: allocation atom size + * @cpu_distance_fn: callback to determine distance between cpus, optional * - * This function builds cpu -> unit map and determine other parameters - * considering needed percpu size, large page size and distances - * between CPUs in NUMA. + * This function determines grouping of units, their mappings to cpus + * and other parameters considering needed percpu size, allocation + * atom size and distances between CPUs. * - * CPUs which are of LOCAL_DISTANCE both ways are grouped together and - * may share units in the same large page. The returned configuration - * is guaranteed to have CPUs on different nodes on different large - * pages and >=75% usage of allocated virtual address space. + * Groups are always mutliples of atom size and CPUs which are of + * LOCAL_DISTANCE both ways are grouped together and share space for + * units in the same group. The returned configuration is guaranteed + * to have CPUs on different nodes on different groups and >=75% usage + * of allocated virtual address space. * * RETURNS: - * On success, fills in @unit_map, sets *@dyn_sizep, *@unit_sizep and - * returns the number of units to be allocated. -errno on failure. + * On success, pointer to the new allocation_info is returned. On + * failure, ERR_PTR value is returned. */ -int __init pcpu_lpage_build_unit_map(size_t reserved_size, ssize_t *dyn_sizep, - size_t *unit_sizep, size_t lpage_size, - int *unit_map, - pcpu_fc_cpu_distance_fn_t cpu_distance_fn) +struct pcpu_alloc_info * __init pcpu_build_alloc_info( + size_t reserved_size, ssize_t dyn_size, + size_t atom_size, + pcpu_fc_cpu_distance_fn_t cpu_distance_fn) { static int group_map[NR_CPUS] __initdata; static int group_cnt[NR_CPUS] __initdata; const size_t static_size = __per_cpu_end - __per_cpu_start; - int group_cnt_max = 0; + int group_cnt_max = 0, nr_groups = 1, nr_units = 0; size_t size_sum, min_unit_size, alloc_size; int upa, max_upa, uninitialized_var(best_upa); /* units_per_alloc */ - int last_allocs; + int last_allocs, group, unit; unsigned int cpu, tcpu; - int group, unit; + struct pcpu_alloc_info *ai; + unsigned int *cpu_map; /* * Determine min_unit_size, alloc_size and max_upa such that - * alloc_size is multiple of lpage_size and is the smallest + * alloc_size is multiple of atom_size and is the smallest * which can accomodate 4k aligned segments which are equal to * or larger than min_unit_size. */ - size_sum = pcpu_calc_fc_sizes(static_size, reserved_size, dyn_sizep); + size_sum = pcpu_calc_fc_sizes(static_size, reserved_size, &dyn_size); min_unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE); - alloc_size = roundup(min_unit_size, lpage_size); + alloc_size = roundup(min_unit_size, atom_size); upa = alloc_size / min_unit_size; while (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK)) upa--; @@ -1304,10 +1360,11 @@ int __init pcpu_lpage_build_unit_map(size_t reserved_size, ssize_t *dyn_sizep, for_each_possible_cpu(tcpu) { if (cpu == tcpu) break; - if (group_map[tcpu] == group && + if (group_map[tcpu] == group && cpu_distance_fn && (cpu_distance_fn(cpu, tcpu) > LOCAL_DISTANCE || cpu_distance_fn(tcpu, cpu) > LOCAL_DISTANCE)) { group++; + nr_groups = max(nr_groups, group + 1); goto next_group; } } @@ -1328,7 +1385,7 @@ int __init pcpu_lpage_build_unit_map(size_t reserved_size, ssize_t *dyn_sizep, if (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK)) continue; - for (group = 0; group_cnt[group]; group++) { + for (group = 0; group < nr_groups; group++) { int this_allocs = DIV_ROUND_UP(group_cnt[group], upa); allocs += this_allocs; wasted += this_allocs * upa - group_cnt[group]; @@ -1348,75 +1405,122 @@ int __init pcpu_lpage_build_unit_map(size_t reserved_size, ssize_t *dyn_sizep, last_allocs = allocs; best_upa = upa; } - *unit_sizep = alloc_size / best_upa; + upa = best_upa; + + /* allocate and fill alloc_info */ + for (group = 0; group < nr_groups; group++) + nr_units += roundup(group_cnt[group], upa); + + ai = pcpu_alloc_alloc_info(nr_groups, nr_units); + if (!ai) + return ERR_PTR(-ENOMEM); + cpu_map = ai->groups[0].cpu_map; + + for (group = 0; group < nr_groups; group++) { + ai->groups[group].cpu_map = cpu_map; + cpu_map += roundup(group_cnt[group], upa); + } + + ai->static_size = static_size; + ai->reserved_size = reserved_size; + ai->dyn_size = dyn_size; + ai->unit_size = alloc_size / upa; + ai->atom_size = atom_size; + ai->alloc_size = alloc_size; + + for (group = 0, unit = 0; group_cnt[group]; group++) { + struct pcpu_group_info *gi = &ai->groups[group]; + + /* + * Initialize base_offset as if all groups are located + * back-to-back. The caller should update this to + * reflect actual allocation. + */ + gi->base_offset = unit * ai->unit_size; - /* assign units to cpus accordingly */ - unit = 0; - for (group = 0; group_cnt[group]; group++) { for_each_possible_cpu(cpu) if (group_map[cpu] == group) - unit_map[cpu] = unit++; - unit = roundup(unit, best_upa); + gi->cpu_map[gi->nr_units++] = cpu; + gi->nr_units = roundup(gi->nr_units, upa); + unit += gi->nr_units; } + BUG_ON(unit != nr_units); - return unit; /* unit contains aligned number of units */ + return ai; } -static bool __init pcpul_unit_to_cpu(int unit, const int *unit_map, - unsigned int *cpup); - -static void __init pcpul_lpage_dump_cfg(const char *lvl, size_t static_size, - size_t reserved_size, size_t dyn_size, - size_t unit_size, size_t lpage_size, - const int *unit_map, int nr_units) +/** + * pcpu_dump_alloc_info - print out information about pcpu_alloc_info + * @lvl: loglevel + * @ai: allocation info to dump + * + * Print out information about @ai using loglevel @lvl. + */ +static void pcpu_dump_alloc_info(const char *lvl, + const struct pcpu_alloc_info *ai) { - int width = 1, v = nr_units; + int group_width = 1, cpu_width = 1, width; char empty_str[] = "--------"; - int upl, lpl; /* units per lpage, lpage per line */ - unsigned int cpu; - int lpage, unit; + int alloc = 0, alloc_end = 0; + int group, v; + int upa, apl; /* units per alloc, allocs per line */ + + v = ai->nr_groups; + while (v /= 10) + group_width++; + v = num_possible_cpus(); while (v /= 10) - width++; - empty_str[min_t(int, width, sizeof(empty_str) - 1)] = '\0'; + cpu_width++; + empty_str[min_t(int, cpu_width, sizeof(empty_str) - 1)] = '\0'; - upl = max_t(int, lpage_size / unit_size, 1); - lpl = rounddown_pow_of_two(max_t(int, 60 / (upl * (width + 1) + 2), 1)); + upa = ai->alloc_size / ai->unit_size; + width = upa * (cpu_width + 1) + group_width + 3; + apl = rounddown_pow_of_two(max(60 / width, 1)); - printk("%spcpu-lpage: sta/res/dyn=%zu/%zu/%zu unit=%zu lpage=%zu", lvl, - static_size, reserved_size, dyn_size, unit_size, lpage_size); + printk("%spcpu-alloc: s%zu r%zu d%zu u%zu alloc=%zu*%zu", + lvl, ai->static_size, ai->reserved_size, ai->dyn_size, + ai->unit_size, ai->alloc_size / ai->atom_size, ai->atom_size); - for (lpage = 0, unit = 0; unit < nr_units; unit++) { - if (!(unit % upl)) { - if (!(lpage++ % lpl)) { + for (group = 0; group < ai->nr_groups; group++) { + const struct pcpu_group_info *gi = &ai->groups[group]; + int unit = 0, unit_end = 0; + + BUG_ON(gi->nr_units % upa); + for (alloc_end += gi->nr_units / upa; + alloc < alloc_end; alloc++) { + if (!(alloc % apl)) { printk("\n"); - printk("%spcpu-lpage: ", lvl); - } else - printk("| "); + printk("%spcpu-alloc: ", lvl); + } + printk("[%0*d] ", group_width, group); + + for (unit_end += upa; unit < unit_end; unit++) + if (gi->cpu_map[unit] != NR_CPUS) + printk("%0*d ", cpu_width, + gi->cpu_map[unit]); + else + printk("%s ", empty_str); } - if (pcpul_unit_to_cpu(unit, unit_map, &cpu)) - printk("%0*d ", width, cpu); - else - printk("%s ", empty_str); } printk("\n"); } -#endif /** * pcpu_setup_first_chunk - initialize the first percpu chunk - * @static_size: the size of static percpu area in bytes - * @reserved_size: the size of reserved percpu area in bytes, 0 for none - * @dyn_size: free size for dynamic allocation in bytes - * @unit_size: unit size in bytes, must be multiple of PAGE_SIZE + * @ai: pcpu_alloc_info describing how to percpu area is shaped * @base_addr: mapped address - * @unit_map: cpu -> unit map, NULL for sequential mapping * * Initialize the first percpu chunk which contains the kernel static * perpcu area. This function is to be called from arch percpu area * setup path. * - * @reserved_size, if non-zero, specifies the amount of bytes to + * @ai contains all information necessary to initialize the first + * chunk and prime the dynamic percpu allocator. + * + * @ai->static_size is the size of static percpu area. + * + * @ai->reserved_size, if non-zero, specifies the amount of bytes to * reserve after the static area in the first chunk. This reserves * the first chunk such that it's available only through reserved * percpu allocation. This is primarily used to serve module percpu @@ -1424,13 +1528,26 @@ static void __init pcpul_lpage_dump_cfg(const char *lvl, size_t static_size, * limited offset range for symbol relocations to guarantee module * percpu symbols fall inside the relocatable range. * - * @dyn_size determines the number of bytes available for dynamic - * allocation in the first chunk. The area between @static_size + - * @reserved_size + @dyn_size and @unit_size is unused. + * @ai->dyn_size determines the number of bytes available for dynamic + * allocation in the first chunk. The area between @ai->static_size + + * @ai->reserved_size + @ai->dyn_size and @ai->unit_size is unused. * - * @unit_size specifies unit size and must be aligned to PAGE_SIZE and - * equal to or larger than @static_size + @reserved_size + if - * non-negative, @dyn_size. + * @ai->unit_size specifies unit size and must be aligned to PAGE_SIZE + * and equal to or larger than @ai->static_size + @ai->reserved_size + + * @ai->dyn_size. + * + * @ai->atom_size is the allocation atom size and used as alignment + * for vm areas. + * + * @ai->alloc_size is the allocation size and always multiple of + * @ai->atom_size. This is larger than @ai->atom_size if + * @ai->unit_size is larger than @ai->atom_size. + * + * @ai->nr_groups and @ai->groups describe virtual memory layout of + * percpu areas. Units which should be colocated are put into the + * same group. Dynamic VM areas will be allocated according to these + * groupings. If @ai->nr_groups is zero, a single group containing + * all units is assumed. * * The caller should have mapped the first chunk at @base_addr and * copied static data to each unit. @@ -1446,70 +1563,63 @@ static void __init pcpul_lpage_dump_cfg(const char *lvl, size_t static_size, * The determined pcpu_unit_size which can be used to initialize * percpu access. */ -size_t __init pcpu_setup_first_chunk(size_t static_size, size_t reserved_size, - size_t dyn_size, size_t unit_size, - void *base_addr, const int *unit_map) +size_t __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, + void *base_addr) { static struct vm_struct first_vm; static int smap[2], dmap[2]; - size_t size_sum = static_size + reserved_size + dyn_size; + size_t dyn_size = ai->dyn_size; + size_t size_sum = ai->static_size + ai->reserved_size + dyn_size; struct pcpu_chunk *schunk, *dchunk = NULL; - unsigned int cpu, tcpu; - int i; + unsigned int cpu; + int *unit_map; + int group, unit, i; /* sanity checks */ BUILD_BUG_ON(ARRAY_SIZE(smap) >= PCPU_DFL_MAP_ALLOC || ARRAY_SIZE(dmap) >= PCPU_DFL_MAP_ALLOC); - BUG_ON(!static_size); + BUG_ON(ai->nr_groups <= 0); + BUG_ON(!ai->static_size); BUG_ON(!base_addr); - BUG_ON(unit_size < size_sum); - BUG_ON(unit_size & ~PAGE_MASK); - BUG_ON(unit_size < PCPU_MIN_UNIT_SIZE); + BUG_ON(ai->unit_size < size_sum); + BUG_ON(ai->unit_size & ~PAGE_MASK); + BUG_ON(ai->unit_size < PCPU_MIN_UNIT_SIZE); + + pcpu_dump_alloc_info(KERN_DEBUG, ai); /* determine number of units and verify and initialize pcpu_unit_map */ - if (unit_map) { - int first_unit = INT_MAX, last_unit = INT_MIN; - - for_each_possible_cpu(cpu) { - int unit = unit_map[cpu]; - - BUG_ON(unit < 0); - for_each_possible_cpu(tcpu) { - if (tcpu == cpu) - break; - /* the mapping should be one-to-one */ - BUG_ON(unit_map[tcpu] == unit); - } + unit_map = alloc_bootmem(nr_cpu_ids * sizeof(unit_map[0])); - if (unit < first_unit) { - pcpu_first_unit_cpu = cpu; - first_unit = unit; - } - if (unit > last_unit) { - pcpu_last_unit_cpu = cpu; - last_unit = unit; - } - } - pcpu_nr_units = last_unit + 1; - pcpu_unit_map = unit_map; - } else { - int *identity_map; + for (cpu = 0; cpu < nr_cpu_ids; cpu++) + unit_map[cpu] = NR_CPUS; + pcpu_first_unit_cpu = NR_CPUS; - /* #units == #cpus, identity mapped */ - identity_map = alloc_bootmem(nr_cpu_ids * - sizeof(identity_map[0])); + for (group = 0, unit = 0; group < ai->nr_groups; group++, unit += i) { + const struct pcpu_group_info *gi = &ai->groups[group]; - for_each_possible_cpu(cpu) - identity_map[cpu] = cpu; + for (i = 0; i < gi->nr_units; i++) { + cpu = gi->cpu_map[i]; + if (cpu == NR_CPUS) + continue; - pcpu_first_unit_cpu = 0; - pcpu_last_unit_cpu = pcpu_nr_units - 1; - pcpu_nr_units = nr_cpu_ids; - pcpu_unit_map = identity_map; + BUG_ON(cpu > nr_cpu_ids || !cpu_possible(cpu)); + BUG_ON(unit_map[cpu] != NR_CPUS); + + unit_map[cpu] = unit + i; + if (pcpu_first_unit_cpu == NR_CPUS) + pcpu_first_unit_cpu = cpu; + } } + pcpu_last_unit_cpu = cpu; + pcpu_nr_units = unit; + + for_each_possible_cpu(cpu) + BUG_ON(unit_map[cpu] == NR_CPUS); + + pcpu_unit_map = unit_map; /* determine basic parameters */ - pcpu_unit_pages = unit_size >> PAGE_SHIFT; + pcpu_unit_pages = ai->unit_size >> PAGE_SHIFT; pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT; pcpu_chunk_size = pcpu_nr_units * pcpu_unit_size; pcpu_chunk_struct_size = sizeof(struct pcpu_chunk) + @@ -1543,17 +1653,17 @@ size_t __init pcpu_setup_first_chunk(size_t static_size, size_t reserved_size, schunk->immutable = true; bitmap_fill(schunk->populated, pcpu_unit_pages); - if (reserved_size) { - schunk->free_size = reserved_size; + if (ai->reserved_size) { + schunk->free_size = ai->reserved_size; pcpu_reserved_chunk = schunk; - pcpu_reserved_chunk_limit = static_size + reserved_size; + pcpu_reserved_chunk_limit = ai->static_size + ai->reserved_size; } else { schunk->free_size = dyn_size; dyn_size = 0; /* dynamic area covered */ } schunk->contig_hint = schunk->free_size; - schunk->map[schunk->map_used++] = -static_size; + schunk->map[schunk->map_used++] = -ai->static_size; if (schunk->free_size) schunk->map[schunk->map_used++] = schunk->free_size; @@ -1643,44 +1753,47 @@ early_param("percpu_alloc", percpu_alloc_setup); */ ssize_t __init pcpu_embed_first_chunk(size_t reserved_size, ssize_t dyn_size) { - const size_t static_size = __per_cpu_end - __per_cpu_start; - size_t size_sum, unit_size, chunk_size; + struct pcpu_alloc_info *ai; + size_t size_sum, chunk_size; void *base; - unsigned int cpu; + int unit; + ssize_t ret; - /* determine parameters and allocate */ - size_sum = pcpu_calc_fc_sizes(static_size, reserved_size, &dyn_size); + ai = pcpu_build_alloc_info(reserved_size, dyn_size, PAGE_SIZE, NULL); + if (IS_ERR(ai)) + return PTR_ERR(ai); + BUG_ON(ai->nr_groups != 1); + BUG_ON(ai->groups[0].nr_units != num_possible_cpus()); - unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE); - chunk_size = unit_size * nr_cpu_ids; + size_sum = ai->static_size + ai->reserved_size + ai->dyn_size; + chunk_size = ai->unit_size * num_possible_cpus(); base = __alloc_bootmem_nopanic(chunk_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)); if (!base) { pr_warning("PERCPU: failed to allocate %zu bytes for " "embedding\n", chunk_size); - return -ENOMEM; + ret = -ENOMEM; + goto out_free_ai; } /* return the leftover and copy */ - for (cpu = 0; cpu < nr_cpu_ids; cpu++) { - void *ptr = base + cpu * unit_size; - - if (cpu_possible(cpu)) { - free_bootmem(__pa(ptr + size_sum), - unit_size - size_sum); - memcpy(ptr, __per_cpu_load, static_size); - } else - free_bootmem(__pa(ptr), unit_size); + for (unit = 0; unit < num_possible_cpus(); unit++) { + void *ptr = base + unit * ai->unit_size; + + free_bootmem(__pa(ptr + size_sum), ai->unit_size - size_sum); + memcpy(ptr, __per_cpu_load, ai->static_size); } /* we're ready, commit */ pr_info("PERCPU: Embedded %zu pages/cpu @%p s%zu r%zu d%zu u%zu\n", - PFN_DOWN(size_sum), base, static_size, reserved_size, dyn_size, - unit_size); + PFN_DOWN(size_sum), base, ai->static_size, ai->reserved_size, + ai->dyn_size, ai->unit_size); - return pcpu_setup_first_chunk(static_size, reserved_size, dyn_size, - unit_size, base, NULL); + ret = pcpu_setup_first_chunk(ai, base); +out_free_ai: + pcpu_free_alloc_info(ai); + return ret; } #endif /* CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK || !CONFIG_HAVE_SETUP_PER_CPU_AREA */ @@ -1709,31 +1822,34 @@ ssize_t __init pcpu_page_first_chunk(size_t reserved_size, pcpu_fc_populate_pte_fn_t populate_pte_fn) { static struct vm_struct vm; - const size_t static_size = __per_cpu_end - __per_cpu_start; - ssize_t dyn_size = -1; - size_t size_sum, unit_size; + struct pcpu_alloc_info *ai; char psize_str[16]; int unit_pages; size_t pages_size; struct page **pages; - unsigned int cpu; - int i, j; + int unit, i, j; ssize_t ret; snprintf(psize_str, sizeof(psize_str), "%luK", PAGE_SIZE >> 10); - size_sum = pcpu_calc_fc_sizes(static_size, reserved_size, &dyn_size); - unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE); - unit_pages = unit_size >> PAGE_SHIFT; + ai = pcpu_build_alloc_info(reserved_size, -1, PAGE_SIZE, NULL); + if (IS_ERR(ai)) + return PTR_ERR(ai); + BUG_ON(ai->nr_groups != 1); + BUG_ON(ai->groups[0].nr_units != num_possible_cpus()); + + unit_pages = ai->unit_size >> PAGE_SHIFT; /* unaligned allocations can't be freed, round up to page size */ - pages_size = PFN_ALIGN(unit_pages * nr_cpu_ids * sizeof(pages[0])); + pages_size = PFN_ALIGN(unit_pages * num_possible_cpus() * + sizeof(pages[0])); pages = alloc_bootmem(pages_size); /* allocate pages */ j = 0; - for_each_possible_cpu(cpu) + for (unit = 0; unit < num_possible_cpus(); unit++) for (i = 0; i < unit_pages; i++) { + unsigned int cpu = ai->groups[0].cpu_map[unit]; void *ptr; ptr = alloc_fn(cpu, PAGE_SIZE, PAGE_SIZE); @@ -1747,18 +1863,18 @@ ssize_t __init pcpu_page_first_chunk(size_t reserved_size, /* allocate vm area, map the pages and copy static data */ vm.flags = VM_ALLOC; - vm.size = nr_cpu_ids * unit_size; + vm.size = num_possible_cpus() * ai->unit_size; vm_area_register_early(&vm, PAGE_SIZE); - for_each_possible_cpu(cpu) { + for (unit = 0; unit < num_possible_cpus(); unit++) { unsigned long unit_addr = - (unsigned long)vm.addr + cpu * unit_size; + (unsigned long)vm.addr + unit * ai->unit_size; for (i = 0; i < unit_pages; i++) populate_pte_fn(unit_addr + (i << PAGE_SHIFT)); /* pte already populated, the following shouldn't fail */ - ret = __pcpu_map_pages(unit_addr, &pages[cpu * unit_pages], + ret = __pcpu_map_pages(unit_addr, &pages[unit * unit_pages], unit_pages); if (ret < 0) panic("failed to map percpu area, err=%zd\n", ret); @@ -1772,16 +1888,15 @@ ssize_t __init pcpu_page_first_chunk(size_t reserved_size, */ /* copy static data */ - memcpy((void *)unit_addr, __per_cpu_load, static_size); + memcpy((void *)unit_addr, __per_cpu_load, ai->static_size); } /* we're ready, commit */ pr_info("PERCPU: %d %s pages/cpu @%p s%zu r%zu d%zu\n", - unit_pages, psize_str, vm.addr, static_size, reserved_size, - dyn_size); + unit_pages, psize_str, vm.addr, ai->static_size, + ai->reserved_size, ai->dyn_size); - ret = pcpu_setup_first_chunk(static_size, reserved_size, dyn_size, - unit_size, vm.addr, NULL); + ret = pcpu_setup_first_chunk(ai, vm.addr); goto out_free_ar; enomem: @@ -1790,6 +1905,7 @@ enomem: ret = -ENOMEM; out_free_ar: free_bootmem(__pa(pages), pages_size); + pcpu_free_alloc_info(ai); return ret; } #endif /* CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK */ @@ -1805,38 +1921,50 @@ static size_t pcpul_lpage_size; static int pcpul_nr_lpages; static struct pcpul_ent *pcpul_map; -static bool __init pcpul_unit_to_cpu(int unit, const int *unit_map, +static bool __init pcpul_unit_to_cpu(int unit, const struct pcpu_alloc_info *ai, unsigned int *cpup) { - unsigned int cpu; + int group, cunit; - for_each_possible_cpu(cpu) - if (unit_map[cpu] == unit) { + for (group = 0, cunit = 0; group < ai->nr_groups; group++) { + const struct pcpu_group_info *gi = &ai->groups[group]; + + if (unit < cunit + gi->nr_units) { if (cpup) - *cpup = cpu; + *cpup = gi->cpu_map[unit - cunit]; return true; } + cunit += gi->nr_units; + } return false; } +static int __init pcpul_cpu_to_unit(int cpu, const struct pcpu_alloc_info *ai) +{ + int group, unit, i; + + for (group = 0, unit = 0; group < ai->nr_groups; group++, unit += i) { + const struct pcpu_group_info *gi = &ai->groups[group]; + + for (i = 0; i < gi->nr_units; i++) + if (gi->cpu_map[i] == cpu) + return unit + i; + } + BUG(); +} + /** * pcpu_lpage_first_chunk - remap the first percpu chunk using large page - * @reserved_size: the size of reserved percpu area in bytes - * @dyn_size: free size for dynamic allocation in bytes - * @unit_size: unit size in bytes - * @lpage_size: the size of a large page - * @unit_map: cpu -> unit mapping - * @nr_units: the number of units + * @ai: pcpu_alloc_info * @alloc_fn: function to allocate percpu lpage, always called with lpage_size * @free_fn: function to free percpu memory, @size <= lpage_size * @map_fn: function to map percpu lpage, always called with lpage_size * * This allocator uses large page to build and map the first chunk. - * Unlike other helpers, the caller should always specify @dyn_size - * and @unit_size. These parameters along with @unit_map and - * @nr_units can be determined using pcpu_lpage_build_unit_map(). - * This two stage initialization is to allow arch code to evaluate the + * Unlike other helpers, the caller should provide fully initialized + * @ai. This can be done using pcpu_build_alloc_info(). This two + * stage initialization is to allow arch code to evaluate the * parameters before committing to it. * * Large pages are allocated as directed by @unit_map and other @@ -1852,27 +1980,26 @@ static bool __init pcpul_unit_to_cpu(int unit, const int *unit_map, * The determined pcpu_unit_size which can be used to initialize * percpu access on success, -errno on failure. */ -ssize_t __init pcpu_lpage_first_chunk(size_t reserved_size, size_t dyn_size, - size_t unit_size, size_t lpage_size, - const int *unit_map, int nr_units, +ssize_t __init pcpu_lpage_first_chunk(const struct pcpu_alloc_info *ai, pcpu_fc_alloc_fn_t alloc_fn, pcpu_fc_free_fn_t free_fn, pcpu_fc_map_fn_t map_fn) { static struct vm_struct vm; - const size_t static_size = __per_cpu_end - __per_cpu_start; - size_t chunk_size = unit_size * nr_units; - size_t map_size; + const size_t lpage_size = ai->atom_size; + size_t chunk_size, map_size; unsigned int cpu; ssize_t ret; - int i, j, unit; + int i, j, unit, nr_units; - pcpul_lpage_dump_cfg(KERN_DEBUG, static_size, reserved_size, dyn_size, - unit_size, lpage_size, unit_map, nr_units); + nr_units = 0; + for (i = 0; i < ai->nr_groups; i++) + nr_units += ai->groups[i].nr_units; + chunk_size = ai->unit_size * nr_units; BUG_ON(chunk_size % lpage_size); - pcpul_size = static_size + reserved_size + dyn_size; + pcpul_size = ai->static_size + ai->reserved_size + ai->dyn_size; pcpul_lpage_size = lpage_size; pcpul_nr_lpages = chunk_size / lpage_size; @@ -1883,13 +2010,13 @@ ssize_t __init pcpu_lpage_first_chunk(size_t reserved_size, size_t dyn_size, /* allocate all pages */ for (i = 0; i < pcpul_nr_lpages; i++) { size_t offset = i * lpage_size; - int first_unit = offset / unit_size; - int last_unit = (offset + lpage_size - 1) / unit_size; + int first_unit = offset / ai->unit_size; + int last_unit = (offset + lpage_size - 1) / ai->unit_size; void *ptr; /* find out which cpu is mapped to this unit */ for (unit = first_unit; unit <= last_unit; unit++) - if (pcpul_unit_to_cpu(unit, unit_map, &cpu)) + if (pcpul_unit_to_cpu(unit, ai, &cpu)) goto found; continue; found: @@ -1905,12 +2032,12 @@ ssize_t __init pcpu_lpage_first_chunk(size_t reserved_size, size_t dyn_size, /* return unused holes */ for (unit = 0; unit < nr_units; unit++) { - size_t start = unit * unit_size; - size_t end = start + unit_size; + size_t start = unit * ai->unit_size; + size_t end = start + ai->unit_size; size_t off, next; /* don't free used part of occupied unit */ - if (pcpul_unit_to_cpu(unit, unit_map, NULL)) + if (pcpul_unit_to_cpu(unit, ai, NULL)) start += pcpul_size; /* unit can span more than one page, punch the holes */ @@ -1925,7 +2052,7 @@ ssize_t __init pcpu_lpage_first_chunk(size_t reserved_size, size_t dyn_size, /* allocate address, map and copy */ vm.flags = VM_ALLOC; vm.size = chunk_size; - vm_area_register_early(&vm, unit_size); + vm_area_register_early(&vm, ai->unit_size); for (i = 0; i < pcpul_nr_lpages; i++) { if (!pcpul_map[i].ptr) @@ -1935,15 +2062,15 @@ ssize_t __init pcpu_lpage_first_chunk(size_t reserved_size, size_t dyn_size, } for_each_possible_cpu(cpu) - memcpy(vm.addr + unit_map[cpu] * unit_size, __per_cpu_load, - static_size); + memcpy(vm.addr + pcpul_cpu_to_unit(cpu, ai) * ai->unit_size, + __per_cpu_load, ai->static_size); /* we're ready, commit */ pr_info("PERCPU: large pages @%p s%zu r%zu d%zu u%zu\n", - vm.addr, static_size, reserved_size, dyn_size, unit_size); + vm.addr, ai->static_size, ai->reserved_size, ai->dyn_size, + ai->unit_size); - ret = pcpu_setup_first_chunk(static_size, reserved_size, dyn_size, - unit_size, vm.addr, unit_map); + ret = pcpu_setup_first_chunk(ai, vm.addr); /* * Sort pcpul_map array for pcpu_lpage_remapped(). Unmapped -- cgit v1.2.3 From fb435d5233f8b6f9b93c11d6304d8e98fed03234 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 14 Aug 2009 15:00:51 +0900 Subject: percpu: add pcpu_unit_offsets[] Currently units are mapped sequentially into address space. This patch adds pcpu_unit_offsets[] which allows units to be mapped to arbitrary offsets from the chunk base address. This is necessary to allow sparse embedding which might would need to allocate address ranges and memory areas which aren't aligned to unit size but allocation atom size (page or large page size). This also simplifies things a bit by removing the need to calculate offset from unit number. With this change, there's no need for the arch code to know pcpu_unit_size. Update pcpu_setup_first_chunk() and first chunk allocators to return regular 0 or -errno return code instead of unit size or -errno. Signed-off-by: Tejun Heo Cc: David S. Miller --- arch/sparc/kernel/smp_64.c | 12 +++--- arch/x86/kernel/setup_percpu.c | 51 ++++++++++------------- include/linux/percpu.h | 16 ++++--- mm/percpu.c | 95 +++++++++++++++++++++--------------------- 4 files changed, 84 insertions(+), 90 deletions(-) (limited to 'include/linux') diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index a42a4a744d14..b03fd362c629 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -1478,9 +1478,10 @@ void __init setup_per_cpu_areas(void) static struct vm_struct vm; struct pcpu_alloc_info *ai; unsigned long delta, cpu; - size_t size_sum, pcpu_unit_size; + size_t size_sum; size_t ptrs_size; void **ptrs; + int rc; ai = pcpu_alloc_alloc_info(1, nr_cpu_ids); @@ -1526,14 +1527,15 @@ void __init setup_per_cpu_areas(void) pcpu_map_range(start, end, virt_to_page(ptrs[cpu])); } - pcpu_unit_size = pcpu_setup_first_chunk(ai, vm.addr); + rc = pcpu_setup_first_chunk(ai, vm.addr); + if (rc) + panic("failed to setup percpu first chunk (%d)", rc); free_bootmem(__pa(ptrs), ptrs_size); delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; - for_each_possible_cpu(cpu) { - __per_cpu_offset(cpu) = delta + cpu * pcpu_unit_size; - } + for_each_possible_cpu(cpu) + __per_cpu_offset(cpu) = delta + pcpu_unit_offsets[cpu]; /* Setup %g5 for the boot cpu. */ __local_per_cpu_offset = __per_cpu_offset(smp_processor_id()); diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index db5f9c49fec5..9becc5d4b518 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -157,12 +157,12 @@ static int pcpu_lpage_cpu_distance(unsigned int from, unsigned int to) return REMOTE_DISTANCE; } -static ssize_t __init setup_pcpu_lpage(bool chosen) +static int __init setup_pcpu_lpage(bool chosen) { size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE; size_t dyn_size = reserve - PERCPU_FIRST_CHUNK_RESERVE; struct pcpu_alloc_info *ai; - ssize_t ret; + int rc; /* on non-NUMA, embedding is better */ if (!chosen && !pcpu_need_numa()) @@ -196,19 +196,18 @@ static ssize_t __init setup_pcpu_lpage(bool chosen) if (tot_size > vm_size / 5) { pr_info("PERCPU: too large chunk size %zuMB for " "large page remap\n", tot_size >> 20); - ret = -EINVAL; + rc = -EINVAL; goto out_free; } } - ret = pcpu_lpage_first_chunk(ai, pcpu_fc_alloc, pcpu_fc_free, - pcpul_map); + rc = pcpu_lpage_first_chunk(ai, pcpu_fc_alloc, pcpu_fc_free, pcpul_map); out_free: pcpu_free_alloc_info(ai); - return ret; + return rc; } #else -static ssize_t __init setup_pcpu_lpage(bool chosen) +static int __init setup_pcpu_lpage(bool chosen) { return -EINVAL; } @@ -222,7 +221,7 @@ static ssize_t __init setup_pcpu_lpage(bool chosen) * mapping so that it can use PMD mapping without additional TLB * pressure. */ -static ssize_t __init setup_pcpu_embed(bool chosen) +static int __init setup_pcpu_embed(bool chosen) { size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE; @@ -250,7 +249,7 @@ static void __init pcpup_populate_pte(unsigned long addr) populate_extra_pte(addr); } -static ssize_t __init setup_pcpu_page(void) +static int __init setup_pcpu_page(void) { return pcpu_page_first_chunk(PERCPU_FIRST_CHUNK_RESERVE, pcpu_fc_alloc, pcpu_fc_free, @@ -274,8 +273,7 @@ void __init setup_per_cpu_areas(void) { unsigned int cpu; unsigned long delta; - size_t pcpu_unit_size; - ssize_t ret; + int rc; pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n", NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids); @@ -285,36 +283,33 @@ void __init setup_per_cpu_areas(void) * of large page mappings. Please read comments on top of * each allocator for details. */ - ret = -EINVAL; + rc = -EINVAL; if (pcpu_chosen_fc != PCPU_FC_AUTO) { if (pcpu_chosen_fc != PCPU_FC_PAGE) { if (pcpu_chosen_fc == PCPU_FC_LPAGE) - ret = setup_pcpu_lpage(true); + rc = setup_pcpu_lpage(true); else - ret = setup_pcpu_embed(true); + rc = setup_pcpu_embed(true); - if (ret < 0) - pr_warning("PERCPU: %s allocator failed (%zd), " + if (rc < 0) + pr_warning("PERCPU: %s allocator failed (%d), " "falling back to page size\n", - pcpu_fc_names[pcpu_chosen_fc], ret); + pcpu_fc_names[pcpu_chosen_fc], rc); } } else { - ret = setup_pcpu_lpage(false); - if (ret < 0) - ret = setup_pcpu_embed(false); + rc = setup_pcpu_lpage(false); + if (rc < 0) + rc = setup_pcpu_embed(false); } - if (ret < 0) - ret = setup_pcpu_page(); - if (ret < 0) - panic("cannot initialize percpu area (err=%zd)", ret); - - pcpu_unit_size = ret; + if (rc < 0) + rc = setup_pcpu_page(); + if (rc < 0) + panic("cannot initialize percpu area (err=%d)", rc); /* alrighty, percpu areas up and running */ delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; for_each_possible_cpu(cpu) { - per_cpu_offset(cpu) = - delta + pcpu_unit_map[cpu] * pcpu_unit_size; + per_cpu_offset(cpu) = delta + pcpu_unit_offsets[cpu]; per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu); per_cpu(cpu_number, cpu) = cpu; setup_percpu_segment(cpu); diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 77b86be8ce4f..a7ec840f596c 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -57,7 +57,7 @@ #endif extern void *pcpu_base_addr; -extern const int *pcpu_unit_map; +extern const unsigned long *pcpu_unit_offsets; struct pcpu_group_info { int nr_units; /* aligned # of units */ @@ -106,25 +106,23 @@ extern struct pcpu_alloc_info * __init pcpu_build_alloc_info( size_t atom_size, pcpu_fc_cpu_distance_fn_t cpu_distance_fn); -extern size_t __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, - void *base_addr); +extern int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, + void *base_addr); #ifdef CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK -extern ssize_t __init pcpu_embed_first_chunk( - size_t reserved_size, ssize_t dyn_size); +extern int __init pcpu_embed_first_chunk(size_t reserved_size, + ssize_t dyn_size); #endif #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK -extern ssize_t __init pcpu_page_first_chunk( - size_t reserved_size, +extern int __init pcpu_page_first_chunk(size_t reserved_size, pcpu_fc_alloc_fn_t alloc_fn, pcpu_fc_free_fn_t free_fn, pcpu_fc_populate_pte_fn_t populate_pte_fn); #endif #ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK -extern ssize_t __init pcpu_lpage_first_chunk( - const struct pcpu_alloc_info *ai, +extern int __init pcpu_lpage_first_chunk(const struct pcpu_alloc_info *ai, pcpu_fc_alloc_fn_t alloc_fn, pcpu_fc_free_fn_t free_fn, pcpu_fc_map_fn_t map_fn); diff --git a/mm/percpu.c b/mm/percpu.c index 99f7fa682722..653b02c40200 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -117,8 +117,8 @@ static unsigned int pcpu_last_unit_cpu __read_mostly; void *pcpu_base_addr __read_mostly; EXPORT_SYMBOL_GPL(pcpu_base_addr); -/* cpu -> unit map */ -const int *pcpu_unit_map __read_mostly; +static const int *pcpu_unit_map __read_mostly; /* cpu -> unit */ +const unsigned long *pcpu_unit_offsets __read_mostly; /* cpu -> unit offset */ /* * The first chunk which always exists. Note that unlike other @@ -196,8 +196,8 @@ static int pcpu_page_idx(unsigned int cpu, int page_idx) static unsigned long pcpu_chunk_addr(struct pcpu_chunk *chunk, unsigned int cpu, int page_idx) { - return (unsigned long)chunk->vm->addr + - (pcpu_page_idx(cpu, page_idx) << PAGE_SHIFT); + return (unsigned long)chunk->vm->addr + pcpu_unit_offsets[cpu] + + (page_idx << PAGE_SHIFT); } static struct page *pcpu_chunk_page(struct pcpu_chunk *chunk, @@ -341,7 +341,7 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr) * space. Note that any possible cpu id can be used here, so * there's no need to worry about preemption or cpu hotplug. */ - addr += pcpu_unit_map[smp_processor_id()] * pcpu_unit_size; + addr += pcpu_unit_offsets[smp_processor_id()]; return pcpu_get_page_chunk(vmalloc_to_page(addr)); } @@ -1560,17 +1560,17 @@ static void pcpu_dump_alloc_info(const char *lvl, * and available for dynamic allocation like any other chunks. * * RETURNS: - * The determined pcpu_unit_size which can be used to initialize - * percpu access. + * 0 on success, -errno on failure. */ -size_t __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, - void *base_addr) +int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, + void *base_addr) { static struct vm_struct first_vm; static int smap[2], dmap[2]; size_t dyn_size = ai->dyn_size; size_t size_sum = ai->static_size + ai->reserved_size + dyn_size; struct pcpu_chunk *schunk, *dchunk = NULL; + unsigned long *unit_off; unsigned int cpu; int *unit_map; int group, unit, i; @@ -1587,8 +1587,9 @@ size_t __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, pcpu_dump_alloc_info(KERN_DEBUG, ai); - /* determine number of units and verify and initialize pcpu_unit_map */ + /* determine number of units and initialize unit_map and base */ unit_map = alloc_bootmem(nr_cpu_ids * sizeof(unit_map[0])); + unit_off = alloc_bootmem(nr_cpu_ids * sizeof(unit_off[0])); for (cpu = 0; cpu < nr_cpu_ids; cpu++) unit_map[cpu] = NR_CPUS; @@ -1606,6 +1607,8 @@ size_t __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, BUG_ON(unit_map[cpu] != NR_CPUS); unit_map[cpu] = unit + i; + unit_off[cpu] = gi->base_offset + i * ai->unit_size; + if (pcpu_first_unit_cpu == NR_CPUS) pcpu_first_unit_cpu = cpu; } @@ -1617,6 +1620,7 @@ size_t __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, BUG_ON(unit_map[cpu] == NR_CPUS); pcpu_unit_map = unit_map; + pcpu_unit_offsets = unit_off; /* determine basic parameters */ pcpu_unit_pages = ai->unit_size >> PAGE_SHIFT; @@ -1688,7 +1692,7 @@ size_t __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, /* we're done */ pcpu_base_addr = schunk->vm->addr; - return pcpu_unit_size; + return 0; } const char *pcpu_fc_names[PCPU_FC_NR] __initdata = { @@ -1748,16 +1752,15 @@ early_param("percpu_alloc", percpu_alloc_setup); * size, the leftover is returned to the bootmem allocator. * * RETURNS: - * The determined pcpu_unit_size which can be used to initialize - * percpu access on success, -errno on failure. + * 0 on success, -errno on failure. */ -ssize_t __init pcpu_embed_first_chunk(size_t reserved_size, ssize_t dyn_size) +int __init pcpu_embed_first_chunk(size_t reserved_size, ssize_t dyn_size) { struct pcpu_alloc_info *ai; size_t size_sum, chunk_size; void *base; int unit; - ssize_t ret; + int rc; ai = pcpu_build_alloc_info(reserved_size, dyn_size, PAGE_SIZE, NULL); if (IS_ERR(ai)) @@ -1773,7 +1776,7 @@ ssize_t __init pcpu_embed_first_chunk(size_t reserved_size, ssize_t dyn_size) if (!base) { pr_warning("PERCPU: failed to allocate %zu bytes for " "embedding\n", chunk_size); - ret = -ENOMEM; + rc = -ENOMEM; goto out_free_ai; } @@ -1790,10 +1793,10 @@ ssize_t __init pcpu_embed_first_chunk(size_t reserved_size, ssize_t dyn_size) PFN_DOWN(size_sum), base, ai->static_size, ai->reserved_size, ai->dyn_size, ai->unit_size); - ret = pcpu_setup_first_chunk(ai, base); + rc = pcpu_setup_first_chunk(ai, base); out_free_ai: pcpu_free_alloc_info(ai); - return ret; + return rc; } #endif /* CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK || !CONFIG_HAVE_SETUP_PER_CPU_AREA */ @@ -1813,13 +1816,12 @@ out_free_ai: * page-by-page into vmalloc area. * * RETURNS: - * The determined pcpu_unit_size which can be used to initialize - * percpu access on success, -errno on failure. + * 0 on success, -errno on failure. */ -ssize_t __init pcpu_page_first_chunk(size_t reserved_size, - pcpu_fc_alloc_fn_t alloc_fn, - pcpu_fc_free_fn_t free_fn, - pcpu_fc_populate_pte_fn_t populate_pte_fn) +int __init pcpu_page_first_chunk(size_t reserved_size, + pcpu_fc_alloc_fn_t alloc_fn, + pcpu_fc_free_fn_t free_fn, + pcpu_fc_populate_pte_fn_t populate_pte_fn) { static struct vm_struct vm; struct pcpu_alloc_info *ai; @@ -1827,8 +1829,7 @@ ssize_t __init pcpu_page_first_chunk(size_t reserved_size, int unit_pages; size_t pages_size; struct page **pages; - int unit, i, j; - ssize_t ret; + int unit, i, j, rc; snprintf(psize_str, sizeof(psize_str), "%luK", PAGE_SIZE >> 10); @@ -1874,10 +1875,10 @@ ssize_t __init pcpu_page_first_chunk(size_t reserved_size, populate_pte_fn(unit_addr + (i << PAGE_SHIFT)); /* pte already populated, the following shouldn't fail */ - ret = __pcpu_map_pages(unit_addr, &pages[unit * unit_pages], - unit_pages); - if (ret < 0) - panic("failed to map percpu area, err=%zd\n", ret); + rc = __pcpu_map_pages(unit_addr, &pages[unit * unit_pages], + unit_pages); + if (rc < 0) + panic("failed to map percpu area, err=%d\n", rc); /* * FIXME: Archs with virtual cache should flush local @@ -1896,17 +1897,17 @@ ssize_t __init pcpu_page_first_chunk(size_t reserved_size, unit_pages, psize_str, vm.addr, ai->static_size, ai->reserved_size, ai->dyn_size); - ret = pcpu_setup_first_chunk(ai, vm.addr); + rc = pcpu_setup_first_chunk(ai, vm.addr); goto out_free_ar; enomem: while (--j >= 0) free_fn(page_address(pages[j]), PAGE_SIZE); - ret = -ENOMEM; + rc = -ENOMEM; out_free_ar: free_bootmem(__pa(pages), pages_size); pcpu_free_alloc_info(ai); - return ret; + return rc; } #endif /* CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK */ @@ -1977,20 +1978,18 @@ static int __init pcpul_cpu_to_unit(int cpu, const struct pcpu_alloc_info *ai) * pcpu_lpage_remapped(). * * RETURNS: - * The determined pcpu_unit_size which can be used to initialize - * percpu access on success, -errno on failure. + * 0 on success, -errno on failure. */ -ssize_t __init pcpu_lpage_first_chunk(const struct pcpu_alloc_info *ai, - pcpu_fc_alloc_fn_t alloc_fn, - pcpu_fc_free_fn_t free_fn, - pcpu_fc_map_fn_t map_fn) +int __init pcpu_lpage_first_chunk(const struct pcpu_alloc_info *ai, + pcpu_fc_alloc_fn_t alloc_fn, + pcpu_fc_free_fn_t free_fn, + pcpu_fc_map_fn_t map_fn) { static struct vm_struct vm; const size_t lpage_size = ai->atom_size; size_t chunk_size, map_size; unsigned int cpu; - ssize_t ret; - int i, j, unit, nr_units; + int i, j, unit, nr_units, rc; nr_units = 0; for (i = 0; i < ai->nr_groups; i++) @@ -2070,7 +2069,7 @@ ssize_t __init pcpu_lpage_first_chunk(const struct pcpu_alloc_info *ai, vm.addr, ai->static_size, ai->reserved_size, ai->dyn_size, ai->unit_size); - ret = pcpu_setup_first_chunk(ai, vm.addr); + rc = pcpu_setup_first_chunk(ai, vm.addr); /* * Sort pcpul_map array for pcpu_lpage_remapped(). Unmapped @@ -2094,7 +2093,7 @@ ssize_t __init pcpu_lpage_first_chunk(const struct pcpu_alloc_info *ai, while (pcpul_nr_lpages && !pcpul_map[pcpul_nr_lpages - 1].ptr) pcpul_nr_lpages--; - return ret; + return rc; enomem: for (i = 0; i < pcpul_nr_lpages; i++) @@ -2166,21 +2165,21 @@ EXPORT_SYMBOL(__per_cpu_offset); void __init setup_per_cpu_areas(void) { - ssize_t unit_size; unsigned long delta; unsigned int cpu; + int rc; /* * Always reserve area for module percpu variables. That's * what the legacy allocator did. */ - unit_size = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE, - PERCPU_DYNAMIC_RESERVE); - if (unit_size < 0) + rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE, + PERCPU_DYNAMIC_RESERVE); + if (rc < 0) panic("Failed to initialized percpu areas."); delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; for_each_possible_cpu(cpu) - __per_cpu_offset[cpu] = delta + cpu * unit_size; + __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu]; } #endif /* CONFIG_HAVE_SETUP_PER_CPU_AREA */ -- cgit v1.2.3 From ca23e405e06d5fffb005df004c72781f76062f51 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 14 Aug 2009 15:00:52 +0900 Subject: vmalloc: implement pcpu_get_vm_areas() To directly use spread NUMA memories for percpu units, percpu allocator will be updated to allow sparsely mapping units in a chunk. As the distances between units can be very large, this makes allocating single vmap area for each chunk undesirable. This patch implements pcpu_get_vm_areas() and pcpu_free_vm_areas() which allocates and frees sparse congruent vmap areas. pcpu_get_vm_areas() take @offsets and @sizes array which define distances and sizes of vmap areas. It scans down from the top of vmalloc area looking for the top-most address which can accomodate all the areas. The top-down scan is to avoid interacting with regular vmallocs which can push up these congruent areas up little by little ending up wasting address space and page table. To speed up top-down scan, the highest possible address hint is maintained. Although the scan is linear from the hint, given the usual large holes between memory addresses between NUMA nodes, the scanning is highly likely to finish after finding the first hole for the last unit which is scanned first. Signed-off-by: Tejun Heo Cc: Nick Piggin --- include/linux/vmalloc.h | 6 + mm/vmalloc.c | 293 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 299 insertions(+) (limited to 'include/linux') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index a43ebec3a7b9..227c2a585e4f 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -115,4 +115,10 @@ extern rwlock_t vmlist_lock; extern struct vm_struct *vmlist; extern __init void vm_area_register_early(struct vm_struct *vm, size_t align); +struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets, + const size_t *sizes, int nr_vms, + size_t align, gfp_t gfp_mask); + +void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms); + #endif /* _LINUX_VMALLOC_H */ diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 2eb461c3a46e..204b8243d8ab 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -265,6 +265,7 @@ struct vmap_area { static DEFINE_SPINLOCK(vmap_area_lock); static struct rb_root vmap_area_root = RB_ROOT; static LIST_HEAD(vmap_area_list); +static unsigned long vmap_area_pcpu_hole; static struct vmap_area *__find_vmap_area(unsigned long addr) { @@ -431,6 +432,15 @@ static void __free_vmap_area(struct vmap_area *va) RB_CLEAR_NODE(&va->rb_node); list_del_rcu(&va->list); + /* + * Track the highest possible candidate for pcpu area + * allocation. Areas outside of vmalloc area can be returned + * here too, consider only end addresses which fall inside + * vmalloc area proper. + */ + if (va->va_end > VMALLOC_START && va->va_end <= VMALLOC_END) + vmap_area_pcpu_hole = max(vmap_area_pcpu_hole, va->va_end); + call_rcu(&va->rcu_head, rcu_free_va); } @@ -1038,6 +1048,9 @@ void __init vmalloc_init(void) va->va_end = va->va_start + tmp->size; __insert_vmap_area(va); } + + vmap_area_pcpu_hole = VMALLOC_END; + vmap_initialized = true; } @@ -1821,6 +1834,286 @@ void free_vm_area(struct vm_struct *area) } EXPORT_SYMBOL_GPL(free_vm_area); +static struct vmap_area *node_to_va(struct rb_node *n) +{ + return n ? rb_entry(n, struct vmap_area, rb_node) : NULL; +} + +/** + * pvm_find_next_prev - find the next and prev vmap_area surrounding @end + * @end: target address + * @pnext: out arg for the next vmap_area + * @pprev: out arg for the previous vmap_area + * + * Returns: %true if either or both of next and prev are found, + * %false if no vmap_area exists + * + * Find vmap_areas end addresses of which enclose @end. ie. if not + * NULL, *pnext->va_end > @end and *pprev->va_end <= @end. + */ +static bool pvm_find_next_prev(unsigned long end, + struct vmap_area **pnext, + struct vmap_area **pprev) +{ + struct rb_node *n = vmap_area_root.rb_node; + struct vmap_area *va = NULL; + + while (n) { + va = rb_entry(n, struct vmap_area, rb_node); + if (end < va->va_end) + n = n->rb_left; + else if (end > va->va_end) + n = n->rb_right; + else + break; + } + + if (!va) + return false; + + if (va->va_end > end) { + *pnext = va; + *pprev = node_to_va(rb_prev(&(*pnext)->rb_node)); + } else { + *pprev = va; + *pnext = node_to_va(rb_next(&(*pprev)->rb_node)); + } + return true; +} + +/** + * pvm_determine_end - find the highest aligned address between two vmap_areas + * @pnext: in/out arg for the next vmap_area + * @pprev: in/out arg for the previous vmap_area + * @align: alignment + * + * Returns: determined end address + * + * Find the highest aligned address between *@pnext and *@pprev below + * VMALLOC_END. *@pnext and *@pprev are adjusted so that the aligned + * down address is between the end addresses of the two vmap_areas. + * + * Please note that the address returned by this function may fall + * inside *@pnext vmap_area. The caller is responsible for checking + * that. + */ +static unsigned long pvm_determine_end(struct vmap_area **pnext, + struct vmap_area **pprev, + unsigned long align) +{ + const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1); + unsigned long addr; + + if (*pnext) + addr = min((*pnext)->va_start & ~(align - 1), vmalloc_end); + else + addr = vmalloc_end; + + while (*pprev && (*pprev)->va_end > addr) { + *pnext = *pprev; + *pprev = node_to_va(rb_prev(&(*pnext)->rb_node)); + } + + return addr; +} + +/** + * pcpu_get_vm_areas - allocate vmalloc areas for percpu allocator + * @offsets: array containing offset of each area + * @sizes: array containing size of each area + * @nr_vms: the number of areas to allocate + * @align: alignment, all entries in @offsets and @sizes must be aligned to this + * @gfp_mask: allocation mask + * + * Returns: kmalloc'd vm_struct pointer array pointing to allocated + * vm_structs on success, %NULL on failure + * + * Percpu allocator wants to use congruent vm areas so that it can + * maintain the offsets among percpu areas. This function allocates + * congruent vmalloc areas for it. These areas tend to be scattered + * pretty far, distance between two areas easily going up to + * gigabytes. To avoid interacting with regular vmallocs, these areas + * are allocated from top. + * + * Despite its complicated look, this allocator is rather simple. It + * does everything top-down and scans areas from the end looking for + * matching slot. While scanning, if any of the areas overlaps with + * existing vmap_area, the base address is pulled down to fit the + * area. Scanning is repeated till all the areas fit and then all + * necessary data structres are inserted and the result is returned. + */ +struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets, + const size_t *sizes, int nr_vms, + size_t align, gfp_t gfp_mask) +{ + const unsigned long vmalloc_start = ALIGN(VMALLOC_START, align); + const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1); + struct vmap_area **vas, *prev, *next; + struct vm_struct **vms; + int area, area2, last_area, term_area; + unsigned long base, start, end, last_end; + bool purged = false; + + gfp_mask &= GFP_RECLAIM_MASK; + + /* verify parameters and allocate data structures */ + BUG_ON(align & ~PAGE_MASK || !is_power_of_2(align)); + for (last_area = 0, area = 0; area < nr_vms; area++) { + start = offsets[area]; + end = start + sizes[area]; + + /* is everything aligned properly? */ + BUG_ON(!IS_ALIGNED(offsets[area], align)); + BUG_ON(!IS_ALIGNED(sizes[area], align)); + + /* detect the area with the highest address */ + if (start > offsets[last_area]) + last_area = area; + + for (area2 = 0; area2 < nr_vms; area2++) { + unsigned long start2 = offsets[area2]; + unsigned long end2 = start2 + sizes[area2]; + + if (area2 == area) + continue; + + BUG_ON(start2 >= start && start2 < end); + BUG_ON(end2 <= end && end2 > start); + } + } + last_end = offsets[last_area] + sizes[last_area]; + + if (vmalloc_end - vmalloc_start < last_end) { + WARN_ON(true); + return NULL; + } + + vms = kzalloc(sizeof(vms[0]) * nr_vms, gfp_mask); + vas = kzalloc(sizeof(vas[0]) * nr_vms, gfp_mask); + if (!vas || !vms) + goto err_free; + + for (area = 0; area < nr_vms; area++) { + vas[area] = kzalloc(sizeof(struct vmap_area), gfp_mask); + vms[area] = kzalloc(sizeof(struct vm_struct), gfp_mask); + if (!vas[area] || !vms[area]) + goto err_free; + } +retry: + spin_lock(&vmap_area_lock); + + /* start scanning - we scan from the top, begin with the last area */ + area = term_area = last_area; + start = offsets[area]; + end = start + sizes[area]; + + if (!pvm_find_next_prev(vmap_area_pcpu_hole, &next, &prev)) { + base = vmalloc_end - last_end; + goto found; + } + base = pvm_determine_end(&next, &prev, align) - end; + + while (true) { + BUG_ON(next && next->va_end <= base + end); + BUG_ON(prev && prev->va_end > base + end); + + /* + * base might have underflowed, add last_end before + * comparing. + */ + if (base + last_end < vmalloc_start + last_end) { + spin_unlock(&vmap_area_lock); + if (!purged) { + purge_vmap_area_lazy(); + purged = true; + goto retry; + } + goto err_free; + } + + /* + * If next overlaps, move base downwards so that it's + * right below next and then recheck. + */ + if (next && next->va_start < base + end) { + base = pvm_determine_end(&next, &prev, align) - end; + term_area = area; + continue; + } + + /* + * If prev overlaps, shift down next and prev and move + * base so that it's right below new next and then + * recheck. + */ + if (prev && prev->va_end > base + start) { + next = prev; + prev = node_to_va(rb_prev(&next->rb_node)); + base = pvm_determine_end(&next, &prev, align) - end; + term_area = area; + continue; + } + + /* + * This area fits, move on to the previous one. If + * the previous one is the terminal one, we're done. + */ + area = (area + nr_vms - 1) % nr_vms; + if (area == term_area) + break; + start = offsets[area]; + end = start + sizes[area]; + pvm_find_next_prev(base + end, &next, &prev); + } +found: + /* we've found a fitting base, insert all va's */ + for (area = 0; area < nr_vms; area++) { + struct vmap_area *va = vas[area]; + + va->va_start = base + offsets[area]; + va->va_end = va->va_start + sizes[area]; + __insert_vmap_area(va); + } + + vmap_area_pcpu_hole = base + offsets[last_area]; + + spin_unlock(&vmap_area_lock); + + /* insert all vm's */ + for (area = 0; area < nr_vms; area++) + insert_vmalloc_vm(vms[area], vas[area], VM_ALLOC, + pcpu_get_vm_areas); + + kfree(vas); + return vms; + +err_free: + for (area = 0; area < nr_vms; area++) { + if (vas) + kfree(vas[area]); + if (vms) + kfree(vms[area]); + } + kfree(vas); + kfree(vms); + return NULL; +} + +/** + * pcpu_free_vm_areas - free vmalloc areas for percpu allocator + * @vms: vm_struct pointer array returned by pcpu_get_vm_areas() + * @nr_vms: the number of allocated areas + * + * Free vm_structs and the array allocated by pcpu_get_vm_areas(). + */ +void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms) +{ + int i; + + for (i = 0; i < nr_vms; i++) + free_vm_area(vms[i]); + kfree(vms); +} #ifdef CONFIG_PROC_FS static void *s_start(struct seq_file *m, loff_t *pos) -- cgit v1.2.3 From c8826dd538602d730ed2c18c6753f1bbfa6c4933 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 14 Aug 2009 15:00:52 +0900 Subject: percpu: update embedding first chunk allocator to handle sparse units Now that percpu core can handle very sparse units, given that vmalloc space is large enough, embedding first chunk allocator can use any memory to build the first chunk. This patch teaches pcpu_embed_first_chunk() about distances between cpus and to use alloc/free callbacks to allocate node specific areas for each group and use them for the first chunk. This brings the benefits of embedding allocator to NUMA configurations - no extra TLB pressure with the flexibility of unified dynamic allocator and no need to restructure arch code to build memory layout suitable for percpu. With units put into atom_size aligned groups according to cpu distances, using large page for dynamic chunks is also easily possible with falling back to reuglar pages if large allocation fails. Embedding allocator users are converted to specify NULL cpu_distance_fn, so this patch doesn't cause any visible behavior difference. Following patches will convert them. Signed-off-by: Tejun Heo --- arch/x86/kernel/setup_percpu.c | 4 +- include/linux/percpu.h | 7 ++- mm/percpu.c | 113 +++++++++++++++++++++++++++++++---------- 3 files changed, 93 insertions(+), 31 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 9becc5d4b518..67f6314de9f1 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -234,7 +234,9 @@ static int __init setup_pcpu_embed(bool chosen) return -EINVAL; return pcpu_embed_first_chunk(PERCPU_FIRST_CHUNK_RESERVE, - reserve - PERCPU_FIRST_CHUNK_RESERVE); + reserve - PERCPU_FIRST_CHUNK_RESERVE, + PAGE_SIZE, NULL, pcpu_fc_alloc, + pcpu_fc_free); } /* diff --git a/include/linux/percpu.h b/include/linux/percpu.h index a7ec840f596c..25359932740e 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -110,8 +110,11 @@ extern int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, void *base_addr); #ifdef CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK -extern int __init pcpu_embed_first_chunk(size_t reserved_size, - ssize_t dyn_size); +extern int __init pcpu_embed_first_chunk(size_t reserved_size, ssize_t dyn_size, + size_t atom_size, + pcpu_fc_cpu_distance_fn_t cpu_distance_fn, + pcpu_fc_alloc_fn_t alloc_fn, + pcpu_fc_free_fn_t free_fn); #endif #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK diff --git a/mm/percpu.c b/mm/percpu.c index cc9c4c64606d..c2826d05505c 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -1747,15 +1747,25 @@ early_param("percpu_alloc", percpu_alloc_setup); * pcpu_embed_first_chunk - embed the first percpu chunk into bootmem * @reserved_size: the size of reserved percpu area in bytes * @dyn_size: free size for dynamic allocation in bytes, -1 for auto + * @atom_size: allocation atom size + * @cpu_distance_fn: callback to determine distance between cpus, optional + * @alloc_fn: function to allocate percpu page + * @free_fn: funtion to free percpu page * * This is a helper to ease setting up embedded first percpu chunk and * can be called where pcpu_setup_first_chunk() is expected. * * If this function is used to setup the first chunk, it is allocated - * as a contiguous area using bootmem allocator and used as-is without - * being mapped into vmalloc area. This enables the first chunk to - * piggy back on the linear physical mapping which often uses larger - * page size. + * by calling @alloc_fn and used as-is without being mapped into + * vmalloc area. Allocations are always whole multiples of @atom_size + * aligned to @atom_size. + * + * This enables the first chunk to piggy back on the linear physical + * mapping which often uses larger page size. Please note that this + * can result in very sparse cpu->unit mapping on NUMA machines thus + * requiring large vmalloc address space. Don't use this allocator if + * vmalloc space is not orders of magnitude larger than distances + * between node memory addresses (ie. 32bit NUMA machines). * * When @dyn_size is positive, dynamic area might be larger than * specified to fill page alignment. When @dyn_size is auto, @@ -1763,53 +1773,88 @@ early_param("percpu_alloc", percpu_alloc_setup); * and reserved areas. * * If the needed size is smaller than the minimum or specified unit - * size, the leftover is returned to the bootmem allocator. + * size, the leftover is returned using @free_fn. * * RETURNS: * 0 on success, -errno on failure. */ -int __init pcpu_embed_first_chunk(size_t reserved_size, ssize_t dyn_size) +int __init pcpu_embed_first_chunk(size_t reserved_size, ssize_t dyn_size, + size_t atom_size, + pcpu_fc_cpu_distance_fn_t cpu_distance_fn, + pcpu_fc_alloc_fn_t alloc_fn, + pcpu_fc_free_fn_t free_fn) { + void *base = (void *)ULONG_MAX; + void **areas = NULL; struct pcpu_alloc_info *ai; - size_t size_sum, chunk_size; - void *base; - int unit; - int rc; + size_t size_sum, areas_size; + int group, i, rc; - ai = pcpu_build_alloc_info(reserved_size, dyn_size, PAGE_SIZE, NULL); + ai = pcpu_build_alloc_info(reserved_size, dyn_size, atom_size, + cpu_distance_fn); if (IS_ERR(ai)) return PTR_ERR(ai); - BUG_ON(ai->nr_groups != 1); - BUG_ON(ai->groups[0].nr_units != num_possible_cpus()); size_sum = ai->static_size + ai->reserved_size + ai->dyn_size; - chunk_size = ai->unit_size * num_possible_cpus(); + areas_size = PFN_ALIGN(ai->nr_groups * sizeof(void *)); - base = __alloc_bootmem_nopanic(chunk_size, PAGE_SIZE, - __pa(MAX_DMA_ADDRESS)); - if (!base) { - pr_warning("PERCPU: failed to allocate %zu bytes for " - "embedding\n", chunk_size); + areas = alloc_bootmem_nopanic(areas_size); + if (!areas) { rc = -ENOMEM; - goto out_free_ai; + goto out_free; } - /* return the leftover and copy */ - for (unit = 0; unit < num_possible_cpus(); unit++) { - void *ptr = base + unit * ai->unit_size; + /* allocate, copy and determine base address */ + for (group = 0; group < ai->nr_groups; group++) { + struct pcpu_group_info *gi = &ai->groups[group]; + unsigned int cpu = NR_CPUS; + void *ptr; + + for (i = 0; i < gi->nr_units && cpu == NR_CPUS; i++) + cpu = gi->cpu_map[i]; + BUG_ON(cpu == NR_CPUS); + + /* allocate space for the whole group */ + ptr = alloc_fn(cpu, gi->nr_units * ai->unit_size, atom_size); + if (!ptr) { + rc = -ENOMEM; + goto out_free_areas; + } + areas[group] = ptr; - free_bootmem(__pa(ptr + size_sum), ai->unit_size - size_sum); - memcpy(ptr, __per_cpu_load, ai->static_size); + base = min(ptr, base); + + for (i = 0; i < gi->nr_units; i++, ptr += ai->unit_size) { + if (gi->cpu_map[i] == NR_CPUS) { + /* unused unit, free whole */ + free_fn(ptr, ai->unit_size); + continue; + } + /* copy and return the unused part */ + memcpy(ptr, __per_cpu_load, ai->static_size); + free_fn(ptr + size_sum, ai->unit_size - size_sum); + } } - /* we're ready, commit */ + /* base address is now known, determine group base offsets */ + for (group = 0; group < ai->nr_groups; group++) + ai->groups[group].base_offset = areas[group] - base; + pr_info("PERCPU: Embedded %zu pages/cpu @%p s%zu r%zu d%zu u%zu\n", PFN_DOWN(size_sum), base, ai->static_size, ai->reserved_size, ai->dyn_size, ai->unit_size); rc = pcpu_setup_first_chunk(ai, base); -out_free_ai: + goto out_free; + +out_free_areas: + for (group = 0; group < ai->nr_groups; group++) + free_fn(areas[group], + ai->groups[group].nr_units * ai->unit_size); +out_free: pcpu_free_alloc_info(ai); + if (areas) + free_bootmem(__pa(areas), areas_size); return rc; } #endif /* CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK || @@ -2177,6 +2222,17 @@ void *pcpu_lpage_remapped(void *kaddr) unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; EXPORT_SYMBOL(__per_cpu_offset); +static void * __init pcpu_dfl_fc_alloc(unsigned int cpu, size_t size, + size_t align) +{ + return __alloc_bootmem_nopanic(size, align, __pa(MAX_DMA_ADDRESS)); +} + +static void __init pcpu_dfl_fc_free(void *ptr, size_t size) +{ + free_bootmem(__pa(ptr), size); +} + void __init setup_per_cpu_areas(void) { unsigned long delta; @@ -2188,7 +2244,8 @@ void __init setup_per_cpu_areas(void) * what the legacy allocator did. */ rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE, - PERCPU_DYNAMIC_RESERVE); + PERCPU_DYNAMIC_RESERVE, PAGE_SIZE, NULL, + pcpu_dfl_fc_alloc, pcpu_dfl_fc_free); if (rc < 0) panic("Failed to initialized percpu areas."); -- cgit v1.2.3 From e933a73f48e3b2d40cfa56d81e2646f194b5a66a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 14 Aug 2009 15:00:53 +0900 Subject: percpu: kill lpage first chunk allocator With x86 converted to embedding allocator, lpage doesn't have any user left. Kill it along with cpa handling code. Signed-off-by: Tejun Heo Cc: Jan Beulich --- Documentation/kernel-parameters.txt | 10 +- arch/x86/mm/pageattr.c | 20 +-- include/linux/percpu.h | 16 --- mm/percpu.c | 241 ------------------------------------ 4 files changed, 6 insertions(+), 281 deletions(-) (limited to 'include/linux') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index dee9ce2e6cfa..e710093e3d32 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1920,11 +1920,11 @@ and is between 256 and 4096 characters. It is defined in the file See arch/parisc/kernel/pdc_chassis.c percpu_alloc= Select which percpu first chunk allocator to use. - Currently supported values are "embed", "page" and - "lpage". Archs may support subset or none of the - selections. See comments in mm/percpu.c for details - on each allocator. This parameter is primarily for - debugging and performance comparison. + Currently supported values are "embed" and "page". + Archs may support subset or none of the selections. + See comments in mm/percpu.c for details on each + allocator. This parameter is primarily for debugging + and performance comparison. pf. [PARIDE] See Documentation/blockdev/paride.txt. diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index dce282f65700..f53cfc7f963d 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -687,7 +687,7 @@ static int cpa_process_alias(struct cpa_data *cpa) { struct cpa_data alias_cpa; unsigned long laddr = (unsigned long)__va(cpa->pfn << PAGE_SHIFT); - unsigned long vaddr, remapped; + unsigned long vaddr; int ret; if (cpa->pfn >= max_pfn_mapped) @@ -745,24 +745,6 @@ static int cpa_process_alias(struct cpa_data *cpa) } #endif - /* - * If the PMD page was partially used for per-cpu remapping, - * the recycled area needs to be split and modified. Because - * the area is always proper subset of a PMD page - * cpa->numpages is guaranteed to be 1 for these areas, so - * there's no need to loop over and check for further remaps. - */ - remapped = (unsigned long)pcpu_lpage_remapped((void *)laddr); - if (remapped) { - WARN_ON(cpa->numpages > 1); - alias_cpa = *cpa; - alias_cpa.vaddr = &remapped; - alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY); - ret = __change_page_attr_set_clr(&alias_cpa, 0); - if (ret) - return ret; - } - return 0; } diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 25359932740e..878836ca999c 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -82,7 +82,6 @@ enum pcpu_fc { PCPU_FC_AUTO, PCPU_FC_EMBED, PCPU_FC_PAGE, - PCPU_FC_LPAGE, PCPU_FC_NR, }; @@ -95,7 +94,6 @@ typedef void * (*pcpu_fc_alloc_fn_t)(unsigned int cpu, size_t size, typedef void (*pcpu_fc_free_fn_t)(void *ptr, size_t size); typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr); typedef int (pcpu_fc_cpu_distance_fn_t)(unsigned int from, unsigned int to); -typedef void (*pcpu_fc_map_fn_t)(void *ptr, size_t size, void *addr); extern struct pcpu_alloc_info * __init pcpu_alloc_alloc_info(int nr_groups, int nr_units); @@ -124,20 +122,6 @@ extern int __init pcpu_page_first_chunk(size_t reserved_size, pcpu_fc_populate_pte_fn_t populate_pte_fn); #endif -#ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK -extern int __init pcpu_lpage_first_chunk(const struct pcpu_alloc_info *ai, - pcpu_fc_alloc_fn_t alloc_fn, - pcpu_fc_free_fn_t free_fn, - pcpu_fc_map_fn_t map_fn); - -extern void *pcpu_lpage_remapped(void *kaddr); -#else -static inline void *pcpu_lpage_remapped(void *kaddr) -{ - return NULL; -} -#endif - /* * Use this to get to a cpu's version of the per-cpu object * dynamically allocated. Non-atomic access to the current CPU's diff --git a/mm/percpu.c b/mm/percpu.c index c2826d05505c..77933928107d 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -1713,7 +1713,6 @@ const char *pcpu_fc_names[PCPU_FC_NR] __initdata = { [PCPU_FC_AUTO] = "auto", [PCPU_FC_EMBED] = "embed", [PCPU_FC_PAGE] = "page", - [PCPU_FC_LPAGE] = "lpage", }; enum pcpu_fc pcpu_chosen_fc __initdata = PCPU_FC_AUTO; @@ -1729,10 +1728,6 @@ static int __init percpu_alloc_setup(char *str) #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK else if (!strcmp(str, "page")) pcpu_chosen_fc = PCPU_FC_PAGE; -#endif -#ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK - else if (!strcmp(str, "lpage")) - pcpu_chosen_fc = PCPU_FC_LPAGE; #endif else pr_warning("PERCPU: unknown allocator %s specified\n", str); @@ -1970,242 +1965,6 @@ out_free_ar: } #endif /* CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK */ -#ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK -struct pcpul_ent { - void *ptr; - void *map_addr; -}; - -static size_t pcpul_size; -static size_t pcpul_lpage_size; -static int pcpul_nr_lpages; -static struct pcpul_ent *pcpul_map; - -static bool __init pcpul_unit_to_cpu(int unit, const struct pcpu_alloc_info *ai, - unsigned int *cpup) -{ - int group, cunit; - - for (group = 0, cunit = 0; group < ai->nr_groups; group++) { - const struct pcpu_group_info *gi = &ai->groups[group]; - - if (unit < cunit + gi->nr_units) { - if (cpup) - *cpup = gi->cpu_map[unit - cunit]; - return true; - } - cunit += gi->nr_units; - } - - return false; -} - -static int __init pcpul_cpu_to_unit(int cpu, const struct pcpu_alloc_info *ai) -{ - int group, unit, i; - - for (group = 0, unit = 0; group < ai->nr_groups; group++, unit += i) { - const struct pcpu_group_info *gi = &ai->groups[group]; - - for (i = 0; i < gi->nr_units; i++) - if (gi->cpu_map[i] == cpu) - return unit + i; - } - BUG(); -} - -/** - * pcpu_lpage_first_chunk - remap the first percpu chunk using large page - * @ai: pcpu_alloc_info - * @alloc_fn: function to allocate percpu lpage, always called with lpage_size - * @free_fn: function to free percpu memory, @size <= lpage_size - * @map_fn: function to map percpu lpage, always called with lpage_size - * - * This allocator uses large page to build and map the first chunk. - * Unlike other helpers, the caller should provide fully initialized - * @ai. This can be done using pcpu_build_alloc_info(). This two - * stage initialization is to allow arch code to evaluate the - * parameters before committing to it. - * - * Large pages are allocated as directed by @unit_map and other - * parameters and mapped to vmalloc space. Unused holes are returned - * to the page allocator. Note that these holes end up being actively - * mapped twice - once to the physical mapping and to the vmalloc area - * for the first percpu chunk. Depending on architecture, this might - * cause problem when changing page attributes of the returned area. - * These double mapped areas can be detected using - * pcpu_lpage_remapped(). - * - * RETURNS: - * 0 on success, -errno on failure. - */ -int __init pcpu_lpage_first_chunk(const struct pcpu_alloc_info *ai, - pcpu_fc_alloc_fn_t alloc_fn, - pcpu_fc_free_fn_t free_fn, - pcpu_fc_map_fn_t map_fn) -{ - static struct vm_struct vm; - const size_t lpage_size = ai->atom_size; - size_t chunk_size, map_size; - unsigned int cpu; - int i, j, unit, nr_units, rc; - - nr_units = 0; - for (i = 0; i < ai->nr_groups; i++) - nr_units += ai->groups[i].nr_units; - - chunk_size = ai->unit_size * nr_units; - BUG_ON(chunk_size % lpage_size); - - pcpul_size = ai->static_size + ai->reserved_size + ai->dyn_size; - pcpul_lpage_size = lpage_size; - pcpul_nr_lpages = chunk_size / lpage_size; - - /* allocate pointer array and alloc large pages */ - map_size = pcpul_nr_lpages * sizeof(pcpul_map[0]); - pcpul_map = alloc_bootmem(map_size); - - /* allocate all pages */ - for (i = 0; i < pcpul_nr_lpages; i++) { - size_t offset = i * lpage_size; - int first_unit = offset / ai->unit_size; - int last_unit = (offset + lpage_size - 1) / ai->unit_size; - void *ptr; - - /* find out which cpu is mapped to this unit */ - for (unit = first_unit; unit <= last_unit; unit++) - if (pcpul_unit_to_cpu(unit, ai, &cpu)) - goto found; - continue; - found: - ptr = alloc_fn(cpu, lpage_size, lpage_size); - if (!ptr) { - pr_warning("PERCPU: failed to allocate large page " - "for cpu%u\n", cpu); - goto enomem; - } - - pcpul_map[i].ptr = ptr; - } - - /* return unused holes */ - for (unit = 0; unit < nr_units; unit++) { - size_t start = unit * ai->unit_size; - size_t end = start + ai->unit_size; - size_t off, next; - - /* don't free used part of occupied unit */ - if (pcpul_unit_to_cpu(unit, ai, NULL)) - start += pcpul_size; - - /* unit can span more than one page, punch the holes */ - for (off = start; off < end; off = next) { - void *ptr = pcpul_map[off / lpage_size].ptr; - next = min(roundup(off + 1, lpage_size), end); - if (ptr) - free_fn(ptr + off % lpage_size, next - off); - } - } - - /* allocate address, map and copy */ - vm.flags = VM_ALLOC; - vm.size = chunk_size; - vm_area_register_early(&vm, ai->unit_size); - - for (i = 0; i < pcpul_nr_lpages; i++) { - if (!pcpul_map[i].ptr) - continue; - pcpul_map[i].map_addr = vm.addr + i * lpage_size; - map_fn(pcpul_map[i].ptr, lpage_size, pcpul_map[i].map_addr); - } - - for_each_possible_cpu(cpu) - memcpy(vm.addr + pcpul_cpu_to_unit(cpu, ai) * ai->unit_size, - __per_cpu_load, ai->static_size); - - /* we're ready, commit */ - pr_info("PERCPU: large pages @%p s%zu r%zu d%zu u%zu\n", - vm.addr, ai->static_size, ai->reserved_size, ai->dyn_size, - ai->unit_size); - - rc = pcpu_setup_first_chunk(ai, vm.addr); - - /* - * Sort pcpul_map array for pcpu_lpage_remapped(). Unmapped - * lpages are pushed to the end and trimmed. - */ - for (i = 0; i < pcpul_nr_lpages - 1; i++) - for (j = i + 1; j < pcpul_nr_lpages; j++) { - struct pcpul_ent tmp; - - if (!pcpul_map[j].ptr) - continue; - if (pcpul_map[i].ptr && - pcpul_map[i].ptr < pcpul_map[j].ptr) - continue; - - tmp = pcpul_map[i]; - pcpul_map[i] = pcpul_map[j]; - pcpul_map[j] = tmp; - } - - while (pcpul_nr_lpages && !pcpul_map[pcpul_nr_lpages - 1].ptr) - pcpul_nr_lpages--; - - return rc; - -enomem: - for (i = 0; i < pcpul_nr_lpages; i++) - if (pcpul_map[i].ptr) - free_fn(pcpul_map[i].ptr, lpage_size); - free_bootmem(__pa(pcpul_map), map_size); - return -ENOMEM; -} - -/** - * pcpu_lpage_remapped - determine whether a kaddr is in pcpul recycled area - * @kaddr: the kernel address in question - * - * Determine whether @kaddr falls in the pcpul recycled area. This is - * used by pageattr to detect VM aliases and break up the pcpu large - * page mapping such that the same physical page is not mapped under - * different attributes. - * - * The recycled area is always at the tail of a partially used large - * page. - * - * RETURNS: - * Address of corresponding remapped pcpu address if match is found; - * otherwise, NULL. - */ -void *pcpu_lpage_remapped(void *kaddr) -{ - unsigned long lpage_mask = pcpul_lpage_size - 1; - void *lpage_addr = (void *)((unsigned long)kaddr & ~lpage_mask); - unsigned long offset = (unsigned long)kaddr & lpage_mask; - int left = 0, right = pcpul_nr_lpages - 1; - int pos; - - /* pcpul in use at all? */ - if (!pcpul_map) - return NULL; - - /* okay, perform binary search */ - while (left <= right) { - pos = (left + right) / 2; - - if (pcpul_map[pos].ptr < lpage_addr) - left = pos + 1; - else if (pcpul_map[pos].ptr > lpage_addr) - right = pos - 1; - else - return pcpul_map[pos].map_addr + offset; - } - - return NULL; -} -#endif /* CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK */ - /* * Generic percpu area setup. * -- cgit v1.2.3 From f5ea9120be2e5d5c846243416cfdce01d02f5836 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 7 Aug 2009 16:17:38 +0200 Subject: nl80211: add generation number to all dumps In order for userspace to be able to figure out whether it obtained a consistent snapshot of data or not when using netlink dumps, we need to have a generation number in each dump message that indicates whether the list has changed or not -- its value is arbitrary. This patch adds such a number to all dumps, this needs some mac80211 involvement to keep track of a generation number to start with when adding/removing mesh paths or stations. The wiphy and netdev lists can be fully handled within cfg80211, of course, but generation numbers need to be stored there as well. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/nl80211.h | 17 ++++++++++++----- include/net/cfg80211.h | 12 ++++++++++++ net/mac80211/cfg.c | 4 ++++ net/mac80211/ieee80211_i.h | 1 + net/mac80211/mesh.h | 2 ++ net/mac80211/mesh_pathtbl.c | 5 +++++ net/mac80211/sta_info.c | 2 ++ net/wireless/core.c | 5 +++++ net/wireless/core.h | 2 ++ net/wireless/nl80211.c | 31 +++++++++++++++++++++---------- net/wireless/scan.c | 1 + 11 files changed, 67 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index cb3dc6027fd9..a8d71ed43a0e 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -480,10 +480,6 @@ enum nl80211_commands { * @NL80211_ATTR_SCAN_FREQUENCIES: nested attribute with frequencies (in MHz) * @NL80211_ATTR_SCAN_SSIDS: nested attribute with SSIDs, leave out for passive * scanning and include a zero-length SSID (wildcard) for wildcard scan - * @NL80211_ATTR_SCAN_GENERATION: the scan generation increases whenever the - * scan result list changes (BSS expired or added) so that applications - * can verify that they got a single, consistent snapshot (when all dump - * messages carried the same generation number) * @NL80211_ATTR_BSS: scan result BSS * * @NL80211_ATTR_REG_INITIATOR: indicates who requested the regulatory domain @@ -580,6 +576,14 @@ enum nl80211_commands { * * @NL80211_ATTR_PID: Process ID of a network namespace. * + * @NL80211_ATTR_GENERATION: Used to indicate consistent snapshots for + * dumps. This number increases whenever the object list being + * dumped changes, and as such userspace can verify that it has + * obtained a complete and consistent snapshot by verifying that + * all dump messages contain the same generation number. If it + * changed then the list changed and the dump should be repeated + * completely from scratch. + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -651,7 +655,7 @@ enum nl80211_attrs { NL80211_ATTR_SCAN_FREQUENCIES, NL80211_ATTR_SCAN_SSIDS, - NL80211_ATTR_SCAN_GENERATION, + NL80211_ATTR_GENERATION, /* replaces old SCAN_GENERATION */ NL80211_ATTR_BSS, NL80211_ATTR_REG_INITIATOR, @@ -716,6 +720,9 @@ enum nl80211_attrs { NL80211_ATTR_MAX = __NL80211_ATTR_AFTER_LAST - 1 }; +/* source-level API compatibility */ +#define NL80211_ATTR_SCAN_GENERATION NL80211_ATTR_GENERATION + /* * Allow user space programs to use #ifdef on new attributes by defining them * here diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 1ee30fcd6fdc..de7d116acc3d 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -372,6 +372,10 @@ struct rate_info { * @txrate: current unicast bitrate to this station * @rx_packets: packets received from this station * @tx_packets: packets transmitted to this station + * @generation: generation number for nl80211 dumps. + * This number should increase every time the list of stations + * changes, i.e. when a station is added or removed, so that + * userspace can tell whether it got a consistent snapshot. */ struct station_info { u32 filled; @@ -385,6 +389,8 @@ struct station_info { struct rate_info txrate; u32 rx_packets; u32 tx_packets; + + int generation; }; /** @@ -444,6 +450,10 @@ enum mpath_info_flags { * @flags: mesh path flags * @discovery_timeout: total mesh path discovery timeout, in msecs * @discovery_retries: mesh path discovery retries + * @generation: generation number for nl80211 dumps. + * This number should increase every time the list of mesh paths + * changes, i.e. when a station is added or removed, so that + * userspace can tell whether it got a consistent snapshot. */ struct mpath_info { u32 filled; @@ -454,6 +464,8 @@ struct mpath_info { u32 discovery_timeout; u8 discovery_retries; u8 flags; + + int generation; }; /** diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 4bbf5007799b..5608f6c68413 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -323,6 +323,8 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) { struct ieee80211_sub_if_data *sdata = sta->sdata; + sinfo->generation = sdata->local->sta_generation; + sinfo->filled = STATION_INFO_INACTIVE_TIME | STATION_INFO_RX_BYTES | STATION_INFO_TX_BYTES | @@ -909,6 +911,8 @@ static void mpath_set_pinfo(struct mesh_path *mpath, u8 *next_hop, else memset(next_hop, 0, ETH_ALEN); + pinfo->generation = mesh_paths_generation; + pinfo->filled = MPATH_INFO_FRAME_QLEN | MPATH_INFO_DSN | MPATH_INFO_METRIC | diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 989591787aee..99433222bc5c 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -678,6 +678,7 @@ struct ieee80211_local { struct list_head sta_list; struct sta_info *sta_hash[STA_HASH_SIZE]; struct timer_list sta_cleanup; + int sta_generation; struct sk_buff_head pending[IEEE80211_MAX_QUEUES]; struct tasklet_struct tx_pending_tasklet; diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h index 2a2ed182cb7e..ce538814b9bd 100644 --- a/net/mac80211/mesh.h +++ b/net/mac80211/mesh.h @@ -265,6 +265,8 @@ void mesh_path_discard_frame(struct sk_buff *skb, void mesh_path_quiesce(struct ieee80211_sub_if_data *sdata); void mesh_path_restart(struct ieee80211_sub_if_data *sdata); +extern int mesh_paths_generation; + #ifdef CONFIG_MAC80211_MESH extern int mesh_allocated; diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index 04b9e4d61b8e..431865a58622 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -38,6 +38,8 @@ struct mpath_node { static struct mesh_table *mesh_paths; static struct mesh_table *mpp_paths; /* Store paths for MPP&MAP */ +int mesh_paths_generation; + /* This lock will have the grow table function as writer and add / delete nodes * as readers. When reading the table (i.e. doing lookups) we are well protected * by RCU @@ -243,6 +245,8 @@ int mesh_path_add(u8 *dst, struct ieee80211_sub_if_data *sdata) mesh_paths->mean_chain_len * (mesh_paths->hash_mask + 1)) grow = 1; + mesh_paths_generation++; + spin_unlock(&mesh_paths->hashwlock[hash_idx]); read_unlock(&pathtbl_resize_lock); if (grow) { @@ -484,6 +488,7 @@ int mesh_path_del(u8 *addr, struct ieee80211_sub_if_data *sdata) err = -ENXIO; enddel: + mesh_paths_generation++; spin_unlock(&mesh_paths->hashwlock[hash_idx]); read_unlock(&pathtbl_resize_lock); return err; diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index a360bceeba59..eec001491e66 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -349,6 +349,7 @@ int sta_info_insert(struct sta_info *sta) goto out_free; } list_add(&sta->list, &local->sta_list); + local->sta_generation++; local->num_sta++; sta_info_hash_add(local, sta); @@ -485,6 +486,7 @@ static void __sta_info_unlink(struct sta_info **sta) } local->num_sta--; + local->sta_generation++; if (local->ops->sta_notify) { if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) diff --git a/net/wireless/core.c b/net/wireless/core.c index 1e189306560d..62e1ac00879b 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -32,6 +32,7 @@ MODULE_DESCRIPTION("wireless configuration support"); * only read the list, and that can happen quite * often because we need to do it for each command */ LIST_HEAD(cfg80211_rdev_list); +int cfg80211_rdev_list_generation; /* * This is used to protect the cfg80211_rdev_list @@ -511,6 +512,7 @@ int wiphy_register(struct wiphy *wiphy) wiphy_update_regulatory(wiphy, NL80211_REGDOM_SET_BY_CORE); list_add(&rdev->list, &cfg80211_rdev_list); + cfg80211_rdev_list_generation++; mutex_unlock(&cfg80211_mutex); @@ -593,6 +595,7 @@ void wiphy_unregister(struct wiphy *wiphy) reg_device_remove(wiphy); list_del(&rdev->list); + cfg80211_rdev_list_generation++; device_del(&rdev->wiphy.dev); debugfs_remove(rdev->wiphy.debugfsdir); @@ -653,6 +656,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb, spin_lock_init(&wdev->event_lock); mutex_lock(&rdev->devlist_mtx); list_add(&wdev->list, &rdev->netdev_list); + rdev->devlist_generation++; /* can only change netns with wiphy */ dev->features |= NETIF_F_NETNS_LOCAL; @@ -733,6 +737,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb, if (!list_empty(&wdev->list)) { sysfs_remove_link(&dev->dev.kobj, "phy80211"); list_del_init(&wdev->list); + rdev->devlist_generation++; mutex_destroy(&wdev->mtx); #ifdef CONFIG_WIRELESS_EXT kfree(wdev->wext.keys); diff --git a/net/wireless/core.h b/net/wireless/core.h index 92e0492b0e4d..639db52eeff7 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -49,6 +49,7 @@ struct cfg80211_registered_device { /* associate netdev list */ struct mutex devlist_mtx; struct list_head netdev_list; + int devlist_generation; /* BSSes/scanning */ spinlock_t bss_lock; @@ -101,6 +102,7 @@ bool wiphy_idx_valid(int wiphy_idx) extern struct mutex cfg80211_mutex; extern struct list_head cfg80211_rdev_list; +extern int cfg80211_rdev_list_generation; #define assert_cfg80211_lock() WARN_ON(!mutex_is_locked(&cfg80211_mutex)) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 2ff7376f35a3..b3d5c1df08dd 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -408,6 +408,9 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, dev->wiphy_idx); NLA_PUT_STRING(msg, NL80211_ATTR_WIPHY_NAME, wiphy_name(&dev->wiphy)); + NLA_PUT_U32(msg, NL80211_ATTR_GENERATION, + cfg80211_rdev_list_generation); + NLA_PUT_U8(msg, NL80211_ATTR_WIPHY_RETRY_SHORT, dev->wiphy.retry_short); NLA_PUT_U8(msg, NL80211_ATTR_WIPHY_RETRY_LONG, @@ -825,6 +828,11 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 pid, u32 seq, int flags, NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx); NLA_PUT_STRING(msg, NL80211_ATTR_IFNAME, dev->name); NLA_PUT_U32(msg, NL80211_ATTR_IFTYPE, dev->ieee80211_ptr->iftype); + + NLA_PUT_U32(msg, NL80211_ATTR_GENERATION, + rdev->devlist_generation ^ + (cfg80211_rdev_list_generation << 2)); + return genlmsg_end(msg, hdr); nla_put_failure: @@ -838,12 +846,12 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback * int if_idx = 0; int wp_start = cb->args[0]; int if_start = cb->args[1]; - struct cfg80211_registered_device *dev; + struct cfg80211_registered_device *rdev; struct wireless_dev *wdev; mutex_lock(&cfg80211_mutex); - list_for_each_entry(dev, &cfg80211_rdev_list, list) { - if (!net_eq(wiphy_net(&dev->wiphy), sock_net(skb->sk))) + list_for_each_entry(rdev, &cfg80211_rdev_list, list) { + if (!net_eq(wiphy_net(&rdev->wiphy), sock_net(skb->sk))) continue; if (wp_idx < wp_start) { wp_idx++; @@ -851,21 +859,21 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback * } if_idx = 0; - mutex_lock(&dev->devlist_mtx); - list_for_each_entry(wdev, &dev->netdev_list, list) { + mutex_lock(&rdev->devlist_mtx); + list_for_each_entry(wdev, &rdev->netdev_list, list) { if (if_idx < if_start) { if_idx++; continue; } if (nl80211_send_iface(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, NLM_F_MULTI, - dev, wdev->netdev) < 0) { - mutex_unlock(&dev->devlist_mtx); + rdev, wdev->netdev) < 0) { + mutex_unlock(&rdev->devlist_mtx); goto out; } if_idx++; } - mutex_unlock(&dev->devlist_mtx); + mutex_unlock(&rdev->devlist_mtx); wp_idx++; } @@ -1616,6 +1624,8 @@ static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq, NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, dev->ifindex); NLA_PUT(msg, NL80211_ATTR_MAC, ETH_ALEN, mac_addr); + NLA_PUT_U32(msg, NL80211_ATTR_GENERATION, sinfo->generation); + sinfoattr = nla_nest_start(msg, NL80211_ATTR_STA_INFO); if (!sinfoattr) goto nla_put_failure; @@ -2101,6 +2111,8 @@ static int nl80211_send_mpath(struct sk_buff *msg, u32 pid, u32 seq, NLA_PUT(msg, NL80211_ATTR_MAC, ETH_ALEN, dst); NLA_PUT(msg, NL80211_ATTR_MPATH_NEXT_HOP, ETH_ALEN, next_hop); + NLA_PUT_U32(msg, NL80211_ATTR_GENERATION, pinfo->generation); + pinfoattr = nla_nest_start(msg, NL80211_ATTR_MPATH_INFO); if (!pinfoattr) goto nla_put_failure; @@ -3090,8 +3102,7 @@ static int nl80211_send_bss(struct sk_buff *msg, u32 pid, u32 seq, int flags, if (!hdr) return -1; - NLA_PUT_U32(msg, NL80211_ATTR_SCAN_GENERATION, - rdev->bss_generation); + NLA_PUT_U32(msg, NL80211_ATTR_GENERATION, rdev->bss_generation); NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, wdev->netdev->ifindex); bss = nla_nest_start(msg, NL80211_ATTR_BSS); diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 0ccf3a07dc02..1bcb1312bd94 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -562,6 +562,7 @@ void cfg80211_unlink_bss(struct wiphy *wiphy, struct cfg80211_bss *pub) spin_lock_bh(&dev->bss_lock); list_del(&bss->list); + dev->bss_generation++; rb_erase(&bss->rbn, &dev->bss_tree); spin_unlock_bh(&dev->bss_lock); -- cgit v1.2.3 From f679056b2fdd4e9b7c8eb42ba447cd9646236305 Mon Sep 17 00:00:00 2001 From: Gábor Stefanik Date: Mon, 10 Aug 2009 21:23:08 +0200 Subject: ssb: Implement the remaining rev.8 SPROM vars needed for LP-PHY MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Also add a "SPEX32" macro for extracting 32-bit SPROM variables. Signed-off-by: Gábor Stefanik Signed-off-by: John W. Linville --- drivers/ssb/pci.c | 53 ++++++++++++++++++++++++++++++++++- include/linux/ssb/ssb.h | 44 ++++++++++++++++++++++++----- include/linux/ssb/ssb_regs.h | 66 +++++++++++++++++++++++++++++++++++++++----- 3 files changed, 148 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/drivers/ssb/pci.c b/drivers/ssb/pci.c index 40ea41762247..593fc618a2ea 100644 --- a/drivers/ssb/pci.c +++ b/drivers/ssb/pci.c @@ -169,8 +169,14 @@ err_pci: /* Get the word-offset for a SSB_SPROM_XXX define. */ #define SPOFF(offset) (((offset) - SSB_SPROM_BASE) / sizeof(u16)) /* Helper to extract some _offset, which is one of the SSB_SPROM_XXX defines. */ -#define SPEX(_outvar, _offset, _mask, _shift) \ +#define SPEX16(_outvar, _offset, _mask, _shift) \ out->_outvar = ((in[SPOFF(_offset)] & (_mask)) >> (_shift)) +#define SPEX32(_outvar, _offset, _mask, _shift) \ + out->_outvar = ((((u32)in[SPOFF((_offset)+2)] << 16 | \ + in[SPOFF(_offset)]) & (_mask)) >> (_shift)) +#define SPEX(_outvar, _offset, _mask, _shift) \ + SPEX16(_outvar, _offset, _mask, _shift) + static inline u8 ssb_crc8(u8 crc, u8 data) { @@ -480,6 +486,8 @@ static void sprom_extract_r8(struct ssb_sprom *out, const u16 *in) SPEX(country_code, SSB_SPROM8_CCODE, 0xFFFF, 0); SPEX(boardflags_lo, SSB_SPROM8_BFLLO, 0xFFFF, 0); SPEX(boardflags_hi, SSB_SPROM8_BFLHI, 0xFFFF, 0); + SPEX(boardflags2_lo, SSB_SPROM8_BFL2LO, 0xFFFF, 0); + SPEX(boardflags2_hi, SSB_SPROM8_BFL2HI, 0xFFFF, 0); SPEX(ant_available_a, SSB_SPROM8_ANTAVAIL, SSB_SPROM8_ANTAVAIL_A, SSB_SPROM8_ANTAVAIL_A_SHIFT); SPEX(ant_available_bg, SSB_SPROM8_ANTAVAIL, SSB_SPROM8_ANTAVAIL_BG, @@ -490,12 +498,55 @@ static void sprom_extract_r8(struct ssb_sprom *out, const u16 *in) SPEX(maxpwr_a, SSB_SPROM8_MAXP_A, SSB_SPROM8_MAXP_A_MASK, 0); SPEX(itssi_a, SSB_SPROM8_MAXP_A, SSB_SPROM8_ITSSI_A, SSB_SPROM8_ITSSI_A_SHIFT); + SPEX(maxpwr_ah, SSB_SPROM8_MAXP_AHL, SSB_SPROM8_MAXP_AH_MASK, 0); + SPEX(maxpwr_al, SSB_SPROM8_MAXP_AHL, SSB_SPROM8_MAXP_AL_MASK, + SSB_SPROM8_MAXP_AL_SHIFT); SPEX(gpio0, SSB_SPROM8_GPIOA, SSB_SPROM8_GPIOA_P0, 0); SPEX(gpio1, SSB_SPROM8_GPIOA, SSB_SPROM8_GPIOA_P1, SSB_SPROM8_GPIOA_P1_SHIFT); SPEX(gpio2, SSB_SPROM8_GPIOB, SSB_SPROM8_GPIOB_P2, 0); SPEX(gpio3, SSB_SPROM8_GPIOB, SSB_SPROM8_GPIOB_P3, SSB_SPROM8_GPIOB_P3_SHIFT); + SPEX(tri2g, SSB_SPROM8_TRI25G, SSB_SPROM8_TRI2G, 0); + SPEX(tri5g, SSB_SPROM8_TRI25G, SSB_SPROM8_TRI5G, + SSB_SPROM8_TRI5G_SHIFT); + SPEX(tri5gl, SSB_SPROM8_TRI5GHL, SSB_SPROM8_TRI5GL, 0); + SPEX(tri5gh, SSB_SPROM8_TRI5GHL, SSB_SPROM8_TRI5GH, + SSB_SPROM8_TRI5GH_SHIFT); + SPEX(rxpo2g, SSB_SPROM8_RXPO, SSB_SPROM8_RXPO2G, 0); + SPEX(rxpo5g, SSB_SPROM8_RXPO, SSB_SPROM8_RXPO5G, + SSB_SPROM8_RXPO5G_SHIFT); + SPEX(rssismf2g, SSB_SPROM8_RSSIPARM2G, SSB_SPROM8_RSSISMF2G, 0); + SPEX(rssismc2g, SSB_SPROM8_RSSIPARM2G, SSB_SPROM8_RSSISMC2G, + SSB_SPROM8_RSSISMC2G_SHIFT); + SPEX(rssisav2g, SSB_SPROM8_RSSIPARM2G, SSB_SPROM8_RSSISAV2G, + SSB_SPROM8_RSSISAV2G_SHIFT); + SPEX(bxa2g, SSB_SPROM8_RSSIPARM2G, SSB_SPROM8_BXA2G, + SSB_SPROM8_BXA2G_SHIFT); + SPEX(rssismf5g, SSB_SPROM8_RSSIPARM5G, SSB_SPROM8_RSSISMF5G, 0); + SPEX(rssismc5g, SSB_SPROM8_RSSIPARM5G, SSB_SPROM8_RSSISMC5G, + SSB_SPROM8_RSSISMC5G_SHIFT); + SPEX(rssisav5g, SSB_SPROM8_RSSIPARM5G, SSB_SPROM8_RSSISAV5G, + SSB_SPROM8_RSSISAV5G_SHIFT); + SPEX(bxa5g, SSB_SPROM8_RSSIPARM5G, SSB_SPROM8_BXA5G, + SSB_SPROM8_BXA5G_SHIFT); + SPEX(pa0b0, SSB_SPROM8_PA0B0, 0xFFFF, 0); + SPEX(pa0b1, SSB_SPROM8_PA0B1, 0xFFFF, 0); + SPEX(pa0b2, SSB_SPROM8_PA0B2, 0xFFFF, 0); + SPEX(pa1b0, SSB_SPROM8_PA1B0, 0xFFFF, 0); + SPEX(pa1b1, SSB_SPROM8_PA1B1, 0xFFFF, 0); + SPEX(pa1b2, SSB_SPROM8_PA1B2, 0xFFFF, 0); + SPEX(pa1lob0, SSB_SPROM8_PA1LOB0, 0xFFFF, 0); + SPEX(pa1lob1, SSB_SPROM8_PA1LOB1, 0xFFFF, 0); + SPEX(pa1lob2, SSB_SPROM8_PA1LOB2, 0xFFFF, 0); + SPEX(pa1hib0, SSB_SPROM8_PA1HIB0, 0xFFFF, 0); + SPEX(pa1hib1, SSB_SPROM8_PA1HIB1, 0xFFFF, 0); + SPEX(pa1hib2, SSB_SPROM8_PA1HIB2, 0xFFFF, 0); + SPEX(cck2gpo, SSB_SPROM8_CCK2GPO, 0xFFFF, 0); + SPEX32(ofdm2gpo, SSB_SPROM8_OFDM2GPO, 0xFFFFFFFF, 0); + SPEX32(ofdm5glpo, SSB_SPROM8_OFDM5GLPO, 0xFFFFFFFF, 0); + SPEX32(ofdm5gpo, SSB_SPROM8_OFDM5GPO, 0xFFFFFFFF, 0); + SPEX32(ofdm5ghpo, SSB_SPROM8_OFDM5GHPO, 0xFFFFFFFF, 0); /* Extract the antenna gain values. */ SPEX(antenna_gain.ghz24.a0, SSB_SPROM8_AGAIN01, diff --git a/include/linux/ssb/ssb.h b/include/linux/ssb/ssb.h index 5ae8fa22d331..17ffc1f84d76 100644 --- a/include/linux/ssb/ssb.h +++ b/include/linux/ssb/ssb.h @@ -27,24 +27,54 @@ struct ssb_sprom { u8 et1mdcport; /* MDIO for enet1 */ u8 board_rev; /* Board revision number from SPROM. */ u8 country_code; /* Country Code */ - u8 ant_available_a; /* A-PHY antenna available bits (up to 4) */ - u8 ant_available_bg; /* B/G-PHY antenna available bits (up to 4) */ + u8 ant_available_a; /* 2GHz antenna available bits (up to 4) */ + u8 ant_available_bg; /* 5GHz antenna available bits (up to 4) */ u16 pa0b0; u16 pa0b1; u16 pa0b2; u16 pa1b0; u16 pa1b1; u16 pa1b2; + u16 pa1lob0; + u16 pa1lob1; + u16 pa1lob2; + u16 pa1hib0; + u16 pa1hib1; + u16 pa1hib2; u8 gpio0; /* GPIO pin 0 */ u8 gpio1; /* GPIO pin 1 */ u8 gpio2; /* GPIO pin 2 */ u8 gpio3; /* GPIO pin 3 */ - u16 maxpwr_a; /* A-PHY Amplifier Max Power (in dBm Q5.2) */ - u16 maxpwr_bg; /* B/G-PHY Amplifier Max Power (in dBm Q5.2) */ + u16 maxpwr_bg; /* 2.4GHz Amplifier Max Power (in dBm Q5.2) */ + u16 maxpwr_al; /* 5.2GHz Amplifier Max Power (in dBm Q5.2) */ + u16 maxpwr_a; /* 5.3GHz Amplifier Max Power (in dBm Q5.2) */ + u16 maxpwr_ah; /* 5.8GHz Amplifier Max Power (in dBm Q5.2) */ u8 itssi_a; /* Idle TSSI Target for A-PHY */ u8 itssi_bg; /* Idle TSSI Target for B/G-PHY */ - u16 boardflags_lo; /* Boardflags (low 16 bits) */ - u16 boardflags_hi; /* Boardflags (high 16 bits) */ + u8 tri2g; /* 2.4GHz TX isolation */ + u8 tri5gl; /* 5.2GHz TX isolation */ + u8 tri5g; /* 5.3GHz TX isolation */ + u8 tri5gh; /* 5.8GHz TX isolation */ + u8 rxpo2g; /* 2GHz RX power offset */ + u8 rxpo5g; /* 5GHz RX power offset */ + u8 rssisav2g; /* 2GHz RSSI params */ + u8 rssismc2g; + u8 rssismf2g; + u8 bxa2g; /* 2GHz BX arch */ + u8 rssisav5g; /* 5GHz RSSI params */ + u8 rssismc5g; + u8 rssismf5g; + u8 bxa5g; /* 5GHz BX arch */ + u16 cck2gpo; /* CCK power offset */ + u32 ofdm2gpo; /* 2.4GHz OFDM power offset */ + u32 ofdm5glpo; /* 5.2GHz OFDM power offset */ + u32 ofdm5gpo; /* 5.3GHz OFDM power offset */ + u32 ofdm5ghpo; /* 5.8GHz OFDM power offset */ + u16 boardflags_lo; /* Board flags (bits 0-15) */ + u16 boardflags_hi; /* Board flags (bits 16-31) */ + u16 boardflags2_lo; /* Board flags (bits 32-47) */ + u16 boardflags2_hi; /* Board flags (bits 48-63) */ + /* TODO store board flags in a single u64 */ /* Antenna gain values for up to 4 antennas * on each band. Values in dBm/4 (Q5.2). Negative gain means the @@ -58,7 +88,7 @@ struct ssb_sprom { } ghz5; /* 5GHz band */ } antenna_gain; - /* TODO - add any parameters needed from rev 2, 3, or 4 SPROMs */ + /* TODO - add any parameters needed from rev 2, 3, 4, 5 or 8 SPROMs */ }; /* Information about the PCB the circuitry is soldered on. */ diff --git a/include/linux/ssb/ssb_regs.h b/include/linux/ssb/ssb_regs.h index a01b982b5783..9ae9082eaeb4 100644 --- a/include/linux/ssb/ssb_regs.h +++ b/include/linux/ssb/ssb_regs.h @@ -162,7 +162,7 @@ /* SPROM shadow area. If not otherwise noted, fields are * two bytes wide. Note that the SPROM can _only_ be read - * in two-byte quantinies. + * in two-byte quantities. */ #define SSB_SPROMSIZE_WORDS 64 #define SSB_SPROMSIZE_BYTES (SSB_SPROMSIZE_WORDS * sizeof(u16)) @@ -327,8 +327,11 @@ #define SSB_SPROM5_GPIOB_P3_SHIFT 8 /* SPROM Revision 8 */ -#define SSB_SPROM8_BFLLO 0x1084 /* Boardflags (low 16 bits) */ -#define SSB_SPROM8_BFLHI 0x1086 /* Boardflags Hi */ +#define SSB_SPROM8_BOARDREV 0x1082 /* Board revision */ +#define SSB_SPROM8_BFLLO 0x1084 /* Board flags (bits 0-15) */ +#define SSB_SPROM8_BFLHI 0x1086 /* Board flags (bits 16-31) */ +#define SSB_SPROM8_BFL2LO 0x1088 /* Board flags (bits 32-47) */ +#define SSB_SPROM8_BFL2HI 0x108A /* Board flags (bits 48-63) */ #define SSB_SPROM8_IL0MAC 0x108C /* 6 byte MAC address */ #define SSB_SPROM8_CCODE 0x1092 /* 2 byte country code */ #define SSB_SPROM8_ANTAVAIL 0x109C /* Antenna available bitfields*/ @@ -354,14 +357,63 @@ #define SSB_SPROM8_GPIOB_P2 0x00FF /* Pin 2 */ #define SSB_SPROM8_GPIOB_P3 0xFF00 /* Pin 3 */ #define SSB_SPROM8_GPIOB_P3_SHIFT 8 -#define SSB_SPROM8_MAXP_BG 0x10C0 /* Max Power BG in path 1 */ -#define SSB_SPROM8_MAXP_BG_MASK 0x00FF /* Mask for Max Power BG */ +#define SSB_SPROM8_RSSIPARM2G 0x10A4 /* RSSI params for 2GHz */ +#define SSB_SPROM8_RSSISMF2G 0x000F +#define SSB_SPROM8_RSSISMC2G 0x00F0 +#define SSB_SPROM8_RSSISMC2G_SHIFT 4 +#define SSB_SPROM8_RSSISAV2G 0x0700 +#define SSB_SPROM8_RSSISAV2G_SHIFT 8 +#define SSB_SPROM8_BXA2G 0x1800 +#define SSB_SPROM8_BXA2G_SHIFT 11 +#define SSB_SPROM8_RSSIPARM5G 0x10A6 /* RSSI params for 5GHz */ +#define SSB_SPROM8_RSSISMF5G 0x000F +#define SSB_SPROM8_RSSISMC5G 0x00F0 +#define SSB_SPROM8_RSSISMC5G_SHIFT 4 +#define SSB_SPROM8_RSSISAV5G 0x0700 +#define SSB_SPROM8_RSSISAV5G_SHIFT 8 +#define SSB_SPROM8_BXA5G 0x1800 +#define SSB_SPROM8_BXA5G_SHIFT 11 +#define SSB_SPROM8_TRI25G 0x10A8 /* TX isolation 2.4&5.3GHz */ +#define SSB_SPROM8_TRI2G 0x00FF /* TX isolation 2.4GHz */ +#define SSB_SPROM8_TRI5G 0xFF00 /* TX isolation 5.3GHz */ +#define SSB_SPROM8_TRI5G_SHIFT 8 +#define SSB_SPROM8_TRI5GHL 0x10AA /* TX isolation 5.2/5.8GHz */ +#define SSB_SPROM8_TRI5GL 0x00FF /* TX isolation 5.2GHz */ +#define SSB_SPROM8_TRI5GH 0xFF00 /* TX isolation 5.8GHz */ +#define SSB_SPROM8_TRI5GH_SHIFT 8 +#define SSB_SPROM8_RXPO 0x10AC /* RX power offsets */ +#define SSB_SPROM8_RXPO2G 0x00FF /* 2GHz RX power offset */ +#define SSB_SPROM8_RXPO5G 0xFF00 /* 5GHz RX power offset */ +#define SSB_SPROM8_RXPO5G_SHIFT 8 +#define SSB_SPROM8_MAXP_BG 0x10C0 /* Max Power 2GHz in path 1 */ +#define SSB_SPROM8_MAXP_BG_MASK 0x00FF /* Mask for Max Power 2GHz */ #define SSB_SPROM8_ITSSI_BG 0xFF00 /* Mask for path 1 itssi_bg */ #define SSB_SPROM8_ITSSI_BG_SHIFT 8 -#define SSB_SPROM8_MAXP_A 0x10C8 /* Max Power A in path 1 */ -#define SSB_SPROM8_MAXP_A_MASK 0x00FF /* Mask for Max Power A */ +#define SSB_SPROM8_PA0B0 0x10C2 /* 2GHz power amp settings */ +#define SSB_SPROM8_PA0B1 0x10C4 +#define SSB_SPROM8_PA0B2 0x10C6 +#define SSB_SPROM8_MAXP_A 0x10C8 /* Max Power 5.3GHz */ +#define SSB_SPROM8_MAXP_A_MASK 0x00FF /* Mask for Max Power 5.3GHz */ #define SSB_SPROM8_ITSSI_A 0xFF00 /* Mask for path 1 itssi_a */ #define SSB_SPROM8_ITSSI_A_SHIFT 8 +#define SSB_SPROM8_MAXP_AHL 0x10CA /* Max Power 5.2/5.8GHz */ +#define SSB_SPROM8_MAXP_AH_MASK 0x00FF /* Mask for Max Power 5.8GHz */ +#define SSB_SPROM8_MAXP_AL_MASK 0xFF00 /* Mask for Max Power 5.2GHz */ +#define SSB_SPROM8_MAXP_AL_SHIFT 8 +#define SSB_SPROM8_PA1B0 0x10CC /* 5.3GHz power amp settings */ +#define SSB_SPROM8_PA1B1 0x10CE +#define SSB_SPROM8_PA1B2 0x10D0 +#define SSB_SPROM8_PA1LOB0 0x10D2 /* 5.2GHz power amp settings */ +#define SSB_SPROM8_PA1LOB1 0x10D4 +#define SSB_SPROM8_PA1LOB2 0x10D6 +#define SSB_SPROM8_PA1HIB0 0x10D8 /* 5.8GHz power amp settings */ +#define SSB_SPROM8_PA1HIB1 0x10DA +#define SSB_SPROM8_PA1HIB2 0x10DC +#define SSB_SPROM8_CCK2GPO 0x1140 /* CCK power offset */ +#define SSB_SPROM8_OFDM2GPO 0x1142 /* 2.4GHz OFDM power offset */ +#define SSB_SPROM8_OFDM5GPO 0x1146 /* 5.3GHz OFDM power offset */ +#define SSB_SPROM8_OFDM5GLPO 0x114A /* 5.2GHz OFDM power offset */ +#define SSB_SPROM8_OFDM5GHPO 0x114E /* 5.8GHz OFDM power offset */ /* Values for SSB_SPROM1_BINF_CCODE */ enum { -- cgit v1.2.3 From 7834ddbcc7a097443761b0722e8c9fb8511b95b1 Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Tue, 11 Aug 2009 22:57:16 +0300 Subject: usbnet: add rx queue pausing Add rx queue pausing to usbnet. This is needed by rndis_wlan so that it can control rx queue and prevent received packets from being send forward before rndis_wlan receives and handles 'media connect'-indication. Without this establishing WPA connections is hard and fail often. [v2] - removed unneeded use of skb_clone Cc: David Brownell Signed-off-by: Jussi Kivilinna Signed-off-by: John W. Linville --- drivers/net/usb/usbnet.c | 44 ++++++++++++++++++++++++++++++++++++++- drivers/net/wireless/rndis_wlan.c | 13 +++++++++++- include/linux/usb/usbnet.h | 6 ++++++ 3 files changed, 61 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index af1fe4696509..7d471fca2743 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -233,6 +233,11 @@ void usbnet_skb_return (struct usbnet *dev, struct sk_buff *skb) { int status; + if (test_bit(EVENT_RX_PAUSED, &dev->flags)) { + skb_queue_tail(&dev->rxq_pause, skb); + return; + } + skb->protocol = eth_type_trans (skb, dev->net); dev->net->stats.rx_packets++; dev->net->stats.rx_bytes += skb->len; @@ -525,6 +530,41 @@ static void intr_complete (struct urb *urb) deverr(dev, "intr resubmit --> %d", status); } +/*-------------------------------------------------------------------------*/ +void usbnet_pause_rx(struct usbnet *dev) +{ + set_bit(EVENT_RX_PAUSED, &dev->flags); + + if (netif_msg_rx_status(dev)) + devdbg(dev, "paused rx queue enabled"); +} +EXPORT_SYMBOL_GPL(usbnet_pause_rx); + +void usbnet_resume_rx(struct usbnet *dev) +{ + struct sk_buff *skb; + int num = 0; + + clear_bit(EVENT_RX_PAUSED, &dev->flags); + + while ((skb = skb_dequeue(&dev->rxq_pause)) != NULL) { + usbnet_skb_return(dev, skb); + num++; + } + + tasklet_schedule(&dev->bh); + + if (netif_msg_rx_status(dev)) + devdbg(dev, "paused rx queue disabled, %d skbs requeued", num); +} +EXPORT_SYMBOL_GPL(usbnet_resume_rx); + +void usbnet_purge_paused_rxq(struct usbnet *dev) +{ + skb_queue_purge(&dev->rxq_pause); +} +EXPORT_SYMBOL_GPL(usbnet_purge_paused_rxq); + /*-------------------------------------------------------------------------*/ // unlink pending rx/tx; completion handlers do all other cleanup @@ -623,6 +663,8 @@ int usbnet_stop (struct net_device *net) usb_kill_urb(dev->interrupt); + usbnet_purge_paused_rxq(dev); + /* deferred work (task, timer, softirq) must also stop. * can't flush_scheduled_work() until we drop rtnl (later), * else workers could deadlock; so make workers a NOP. @@ -1113,7 +1155,6 @@ static void usbnet_bh (unsigned long param) } - /*------------------------------------------------------------------------- * * USB Device Driver support @@ -1210,6 +1251,7 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod) skb_queue_head_init (&dev->rxq); skb_queue_head_init (&dev->txq); skb_queue_head_init (&dev->done); + skb_queue_head_init(&dev->rxq_pause); dev->bh.func = usbnet_bh; dev->bh.data = (unsigned long) dev; INIT_WORK (&dev->kevent, kevent); diff --git a/drivers/net/wireless/rndis_wlan.c b/drivers/net/wireless/rndis_wlan.c index 828dc1825bba..d42692dfbc67 100644 --- a/drivers/net/wireless/rndis_wlan.c +++ b/drivers/net/wireless/rndis_wlan.c @@ -1764,8 +1764,15 @@ static int rndis_iw_set_essid(struct net_device *dev, if (!wrqu->essid.flags || length == 0) return disassociate(usbdev, 1); - else + else { + /* Pause and purge rx queue, so we don't pass packets before + * 'media connect'-indication. + */ + usbnet_pause_rx(usbdev); + usbnet_purge_paused_rxq(usbdev); + return set_essid(usbdev, &ssid); + } } @@ -2328,6 +2335,8 @@ get_bssid: memcpy(evt.ap_addr.sa_data, bssid, ETH_ALEN); wireless_send_event(usbdev->net, SIOCGIWAP, &evt, NULL); } + + usbnet_resume_rx(usbdev); } if (test_and_clear_bit(WORK_LINK_DOWN, &priv->work_pending)) { @@ -2541,6 +2550,8 @@ static void rndis_wlan_indication(struct usbnet *usbdev, void *ind, int buflen) switch (msg->status) { case RNDIS_STATUS_MEDIA_CONNECT: + usbnet_pause_rx(usbdev); + devinfo(usbdev, "media connect"); /* queue work to avoid recursive calls into rndis_command */ diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index de8b4b18961b..09514252d84e 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -53,6 +53,7 @@ struct usbnet { struct sk_buff_head rxq; struct sk_buff_head txq; struct sk_buff_head done; + struct sk_buff_head rxq_pause; struct urb *interrupt; struct tasklet_struct bh; @@ -63,6 +64,7 @@ struct usbnet { # define EVENT_RX_MEMORY 2 # define EVENT_STS_SPLIT 3 # define EVENT_LINK_RESET 4 +# define EVENT_RX_PAUSED 5 }; static inline struct usb_driver *driver_of(struct usb_interface *intf) @@ -190,6 +192,10 @@ extern void usbnet_defer_kevent (struct usbnet *, int); extern void usbnet_skb_return (struct usbnet *, struct sk_buff *); extern void usbnet_unlink_rx_urbs(struct usbnet *); +extern void usbnet_pause_rx(struct usbnet *); +extern void usbnet_resume_rx(struct usbnet *); +extern void usbnet_purge_paused_rxq(struct usbnet *); + extern int usbnet_get_settings (struct net_device *net, struct ethtool_cmd *cmd); extern int usbnet_set_settings (struct net_device *net, struct ethtool_cmd *cmd); extern u32 usbnet_get_link (struct net_device *net); -- cgit v1.2.3 From 9f162d2a810b4db48f7b8d7e734d0932c81ec2a1 Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Fri, 14 Aug 2009 17:18:44 +0300 Subject: sunrpc: hton -> cpu_to_be* htonl is already defined as cpu_to_be32. cpu_to_be64 has architecture specific optimized implementations. Signed-off-by: Benny Halevy Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xdr.h | 5 ++--- net/sunrpc/xdr.c | 6 +++--- 2 files changed, 5 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index b99c625fddfe..f94bbdc75c4b 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -117,9 +117,8 @@ static inline __be32 *xdr_encode_array(__be32 *p, const void *s, unsigned int le static inline __be32 * xdr_encode_hyper(__be32 *p, __u64 val) { - *p++ = htonl(val >> 32); - *p++ = htonl(val & 0xFFFFFFFF); - return p; + *(__be64 *)p = cpu_to_be64(val); + return p + 2; } static inline __be32 * diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 406e26de584e..0d05d2554b42 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -24,7 +24,7 @@ xdr_encode_netobj(__be32 *p, const struct xdr_netobj *obj) unsigned int quadlen = XDR_QUADLEN(obj->len); p[quadlen] = 0; /* zero trailing bytes */ - *p++ = htonl(obj->len); + *p++ = cpu_to_be32(obj->len); memcpy(p, obj->data, obj->len); return p + XDR_QUADLEN(obj->len); } @@ -83,7 +83,7 @@ EXPORT_SYMBOL_GPL(xdr_encode_opaque_fixed); */ __be32 *xdr_encode_opaque(__be32 *p, const void *ptr, unsigned int nbytes) { - *p++ = htonl(nbytes); + *p++ = cpu_to_be32(nbytes); return xdr_encode_opaque_fixed(p, ptr, nbytes); } EXPORT_SYMBOL_GPL(xdr_encode_opaque); @@ -779,7 +779,7 @@ EXPORT_SYMBOL_GPL(xdr_decode_word); int xdr_encode_word(struct xdr_buf *buf, unsigned int base, u32 obj) { - __be32 raw = htonl(obj); + __be32 raw = cpu_to_be32(obj); return write_bytes_to_xdr_buf(buf, base, &raw, sizeof(obj)); } -- cgit v1.2.3 From 98866b5abe1513cdacc011874ca045d40002eccd Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Fri, 14 Aug 2009 17:18:49 +0300 Subject: sunrpc: ntoh -> be*_to_cpu ntohl is already defined as be32_to_cpu. be64_to_cpu has architecture specific optimized implementations. Signed-off-by: Benny Halevy Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xdr.h | 5 ++--- net/sunrpc/xdr.c | 6 +++--- 2 files changed, 5 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index f94bbdc75c4b..7da466ba4b0d 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -124,9 +124,8 @@ xdr_encode_hyper(__be32 *p, __u64 val) static inline __be32 * xdr_decode_hyper(__be32 *p, __u64 *valp) { - *valp = ((__u64) ntohl(*p++)) << 32; - *valp |= ntohl(*p++); - return p; + *valp = be64_to_cpup((__be64 *)p); + return p + 2; } /* diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 0d05d2554b42..8bd690c48b69 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -35,7 +35,7 @@ xdr_decode_netobj(__be32 *p, struct xdr_netobj *obj) { unsigned int len; - if ((len = ntohl(*p++)) > XDR_MAX_NETOBJ) + if ((len = be32_to_cpu(*p++)) > XDR_MAX_NETOBJ) return NULL; obj->len = len; obj->data = (u8 *) p; @@ -101,7 +101,7 @@ xdr_decode_string_inplace(__be32 *p, char **sp, { u32 len; - len = ntohl(*p++); + len = be32_to_cpu(*p++); if (len > maxlen) return NULL; *lenp = len; @@ -771,7 +771,7 @@ xdr_decode_word(struct xdr_buf *buf, unsigned int base, u32 *obj) status = read_bytes_from_xdr_buf(buf, base, &raw, sizeof(*obj)); if (status) return status; - *obj = ntohl(raw); + *obj = be32_to_cpu(raw); return 0; } EXPORT_SYMBOL_GPL(xdr_decode_word); -- cgit v1.2.3 From bb2af4f54ffa8245d5ce278cae9c66198bc14d8b Mon Sep 17 00:00:00 2001 From: Yi Zou Date: Fri, 14 Aug 2009 12:41:57 +0000 Subject: net: Add NETIF_F_FCOE_MTU to indicate support for a different MTU for FCoE Add NETIF_F_FCOE_MTU to indicate that the NIC can support a secondary MTU for converged traffic of LAN and Fiber Channel over Ethernet (FCoE). The MTU for FCoE is 2158 = 14 (FCoE header) + 24 (FC header) + 2112 (FC max payload) + 4 (FC CRC) + 4 (FCoE trailer). Signed-off-by: Yi Zou Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9f25ab2899de..9192cdf5bd2c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -701,6 +701,7 @@ struct net_device /* the GSO_MASK reserves bits 16 through 23 */ #define NETIF_F_FCOE_CRC (1 << 24) /* FCoE CRC32 */ #define NETIF_F_SCTP_CSUM (1 << 25) /* SCTP checksum offload */ +#define NETIF_F_FCOE_MTU (1 << 26) /* Supports max FCoE MTU, 2158 bytes*/ /* Segmentation offload features */ #define NETIF_F_GSO_SHIFT 16 -- cgit v1.2.3 From 31089c13bcb18d2cd2a3ddfbe3a28666346f237e Mon Sep 17 00:00:00 2001 From: John Stultz Date: Fri, 14 Aug 2009 15:47:18 +0200 Subject: timekeeping: Introduce timekeeping_leap_insert Move the adjustment of xtime, wall_to_monotonic and the update of the vsyscall variables to the timekeeping code. Signed-off-by: John Stultz Signed-off-by: Martin Schwidefsky LKML-Reference: <20090814134807.609730216@de.ibm.com> Signed-off-by: Thomas Gleixner --- include/linux/time.h | 1 + kernel/time/ntp.c | 7 ++----- kernel/time/timekeeping.c | 7 +++++++ 3 files changed, 10 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/time.h b/include/linux/time.h index ea16c1a01d51..e7c844558884 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -147,6 +147,7 @@ extern struct timespec timespec_trunc(struct timespec t, unsigned gran); extern int timekeeping_valid_for_hres(void); extern void update_wall_time(void); extern void update_xtime_cache(u64 nsec); +extern void timekeeping_leap_insert(int leapsecond); struct tms; extern void do_sys_times(struct tms *); diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 7fc64375ff43..4800f933910e 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -194,8 +194,7 @@ static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer) case TIME_OK: break; case TIME_INS: - xtime.tv_sec--; - wall_to_monotonic.tv_sec++; + timekeeping_leap_insert(-1); time_state = TIME_OOP; printk(KERN_NOTICE "Clock: inserting leap second 23:59:60 UTC\n"); @@ -203,9 +202,8 @@ static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer) res = HRTIMER_RESTART; break; case TIME_DEL: - xtime.tv_sec++; + timekeeping_leap_insert(1); time_tai--; - wall_to_monotonic.tv_sec--; time_state = TIME_WAIT; printk(KERN_NOTICE "Clock: deleting leap second 23:59:59 UTC\n"); @@ -219,7 +217,6 @@ static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer) time_state = TIME_OK; break; } - update_vsyscall(&xtime, clock); write_sequnlock(&xtime_lock); diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 02c0b2c9c674..b8b70fb545fc 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -58,6 +58,13 @@ void update_xtime_cache(u64 nsec) struct clocksource *clock; +/* must hold xtime_lock */ +void timekeeping_leap_insert(int leapsecond) +{ + xtime.tv_sec += leapsecond; + wall_to_monotonic.tv_sec -= leapsecond; + update_vsyscall(&xtime, clock); +} #ifdef CONFIG_GENERIC_TIME /** -- cgit v1.2.3 From a0f7d48bfb95a4c5172a2756dbc4b82afc8e9ae4 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 14 Aug 2009 15:47:19 +0200 Subject: timekeeping: Remove clocksource inline functions The three inline functions clocksource_read, clocksource_enable and clocksource_disable are simple wrappers of an indirect call plus the copy from and to the mult_orig value. The functions are exclusively used by the timekeeping code which has intimate knowledge of the clocksource anyway. Therefore remove the inline functions. No functional change. Signed-off-by: Martin Schwidefsky Acked-by: John Stultz Cc: Daniel Walker LKML-Reference: <20090814134807.903108946@de.ibm.com> Signed-off-by: Thomas Gleixner --- include/linux/clocksource.h | 58 --------------------------------------------- kernel/time/timekeeping.c | 41 ++++++++++++++++++++++---------- 2 files changed, 28 insertions(+), 71 deletions(-) (limited to 'include/linux') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 1219be4fb42e..a1ef46f61c81 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -267,64 +267,6 @@ static inline u32 clocksource_hz2mult(u32 hz, u32 shift_constant) return (u32)tmp; } -/** - * clocksource_read: - Access the clocksource's current cycle value - * @cs: pointer to clocksource being read - * - * Uses the clocksource to return the current cycle_t value - */ -static inline cycle_t clocksource_read(struct clocksource *cs) -{ - return cs->read(cs); -} - -/** - * clocksource_enable: - enable clocksource - * @cs: pointer to clocksource - * - * Enables the specified clocksource. The clocksource callback - * function should start up the hardware and setup mult and field - * members of struct clocksource to reflect hardware capabilities. - */ -static inline int clocksource_enable(struct clocksource *cs) -{ - int ret = 0; - - if (cs->enable) - ret = cs->enable(cs); - - /* - * The frequency may have changed while the clocksource - * was disabled. If so the code in ->enable() must update - * the mult value to reflect the new frequency. Make sure - * mult_orig follows this change. - */ - cs->mult_orig = cs->mult; - - return ret; -} - -/** - * clocksource_disable: - disable clocksource - * @cs: pointer to clocksource - * - * Disables the specified clocksource. The clocksource callback - * function should power down the now unused hardware block to - * save power. - */ -static inline void clocksource_disable(struct clocksource *cs) -{ - /* - * Save mult_orig in mult so clocksource_enable() can - * restore the value regardless if ->enable() updates - * the value of mult or not. - */ - cs->mult = cs->mult_orig; - - if (cs->disable) - cs->disable(cs); -} - /** * cyc2ns - converts clocksource cycles to nanoseconds * @cs: Pointer to clocksource diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index b8b70fb545fc..016a2591d719 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -79,7 +79,7 @@ static void clocksource_forward_now(void) cycle_t cycle_now, cycle_delta; s64 nsec; - cycle_now = clocksource_read(clock); + cycle_now = clock->read(clock); cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; clock->cycle_last = cycle_now; @@ -114,7 +114,7 @@ void getnstimeofday(struct timespec *ts) *ts = xtime; /* read clocksource: */ - cycle_now = clocksource_read(clock); + cycle_now = clock->read(clock); /* calculate the delta since the last update_wall_time: */ cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; @@ -146,7 +146,7 @@ ktime_t ktime_get(void) nsecs = xtime.tv_nsec + wall_to_monotonic.tv_nsec; /* read clocksource: */ - cycle_now = clocksource_read(clock); + cycle_now = clock->read(clock); /* calculate the delta since the last update_wall_time: */ cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; @@ -186,7 +186,7 @@ void ktime_get_ts(struct timespec *ts) tomono = wall_to_monotonic; /* read clocksource: */ - cycle_now = clocksource_read(clock); + cycle_now = clock->read(clock); /* calculate the delta since the last update_wall_time: */ cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; @@ -274,16 +274,29 @@ static void change_clocksource(void) clocksource_forward_now(); - if (clocksource_enable(new)) + if (new->enable && !new->enable(new)) return; + /* + * The frequency may have changed while the clocksource + * was disabled. If so the code in ->enable() must update + * the mult value to reflect the new frequency. Make sure + * mult_orig follows this change. + */ + new->mult_orig = new->mult; new->raw_time = clock->raw_time; old = clock; clock = new; - clocksource_disable(old); + /* + * Save mult_orig in mult so that the value can be restored + * regardless if ->enable() updates the value of mult or not. + */ + old->mult = old->mult_orig; + if (old->disable) + old->disable(old); clock->cycle_last = 0; - clock->cycle_last = clocksource_read(clock); + clock->cycle_last = clock->read(clock); clock->error = 0; clock->xtime_nsec = 0; clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH); @@ -373,7 +386,7 @@ void getrawmonotonic(struct timespec *ts) seq = read_seqbegin(&xtime_lock); /* read clocksource: */ - cycle_now = clocksource_read(clock); + cycle_now = clock->read(clock); /* calculate the delta since the last update_wall_time: */ cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; @@ -435,9 +448,12 @@ void __init timekeeping_init(void) ntp_init(); clock = clocksource_get_next(); - clocksource_enable(clock); + if (clock->enable) + clock->enable(clock); + /* set mult_orig on enable */ + clock->mult_orig = clock->mult; clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH); - clock->cycle_last = clocksource_read(clock); + clock->cycle_last = clock->read(clock); xtime.tv_sec = sec; xtime.tv_nsec = 0; @@ -477,8 +493,7 @@ static int timekeeping_resume(struct sys_device *dev) } update_xtime_cache(0); /* re-base the last cycle value */ - clock->cycle_last = 0; - clock->cycle_last = clocksource_read(clock); + clock->cycle_last = clock->read(clock); clock->error = 0; timekeeping_suspended = 0; write_sequnlock_irqrestore(&xtime_lock, flags); @@ -630,7 +645,7 @@ void update_wall_time(void) return; #ifdef CONFIG_GENERIC_TIME - offset = (clocksource_read(clock) - clock->cycle_last) & clock->mask; + offset = (clock->read(clock) - clock->cycle_last) & clock->mask; #else offset = clock->cycle_interval; #endif -- cgit v1.2.3 From f1b82746c1e93daf24e1ab9bfbd39bcdb2e7018b Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 14 Aug 2009 15:47:21 +0200 Subject: clocksource: Cleanup clocksource selection If a non high-resolution clocksource is first set as override clock and then registered it becomes active even if the system is in one-shot mode. Move the override check from sysfs_override_clocksource to the clocksource selection. That fixes the bug and simplifies the code. The check in clocksource_register for double registration of the same clocksource is removed without replacement. To find the initial clocksource a new weak function in jiffies.c is defined that returns the jiffies clocksource. The architecture code can then override the weak function with a more suitable clocksource, e.g. the TOD clock on s390. [ tglx: Folded in a fix from John Stultz ] Signed-off-by: Martin Schwidefsky Acked-by: John Stultz Cc: Daniel Walker LKML-Reference: <20090814134808.388024160@de.ibm.com> Signed-off-by: Thomas Gleixner --- arch/s390/kernel/time.c | 4 ++ include/linux/clocksource.h | 2 + kernel/time/clocksource.c | 134 +++++++++++++++++--------------------------- kernel/time/jiffies.c | 6 +- kernel/time/timekeeping.c | 4 +- 5 files changed, 64 insertions(+), 86 deletions(-) (limited to 'include/linux') diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index d4c8e9c47c81..afefe514df0f 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -205,6 +205,10 @@ static struct clocksource clocksource_tod = { .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; +struct clocksource * __init clocksource_default_clock(void) +{ + return &clocksource_tod; +} void update_vsyscall(struct timespec *wall_time, struct clocksource *clock) { diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index a1ef46f61c81..f263b3abf46e 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -322,6 +323,7 @@ extern void clocksource_touch_watchdog(void); extern struct clocksource* clocksource_get_next(void); extern void clocksource_change_rating(struct clocksource *cs, int rating); extern void clocksource_resume(void); +extern struct clocksource * __init __weak clocksource_default_clock(void); #ifdef CONFIG_GENERIC_TIME_VSYSCALL extern void update_vsyscall(struct timespec *ts, struct clocksource *c); diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 7466cb811251..e91662e87cde 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -21,7 +21,6 @@ * * TODO WishList: * o Allow clocksource drivers to be unregistered - * o get rid of clocksource_jiffies extern */ #include @@ -107,12 +106,9 @@ u64 timecounter_cyc2time(struct timecounter *tc, } EXPORT_SYMBOL(timecounter_cyc2time); -/* XXX - Would like a better way for initializing curr_clocksource */ -extern struct clocksource clocksource_jiffies; - /*[Clocksource internal variables]--------- * curr_clocksource: - * currently selected clocksource. Initialized to clocksource_jiffies. + * currently selected clocksource. * next_clocksource: * pending next selected clocksource. * clocksource_list: @@ -123,9 +119,8 @@ extern struct clocksource clocksource_jiffies; * override_name: * Name of the user-specified clocksource. */ -static struct clocksource *curr_clocksource = &clocksource_jiffies; +static struct clocksource *curr_clocksource; static struct clocksource *next_clocksource; -static struct clocksource *clocksource_override; static LIST_HEAD(clocksource_list); static DEFINE_SPINLOCK(clocksource_lock); static char override_name[32]; @@ -320,6 +315,7 @@ void clocksource_touch_watchdog(void) clocksource_resume_watchdog(); } +#ifdef CONFIG_GENERIC_TIME /** * clocksource_get_next - Returns the selected clocksource * @@ -339,56 +335,65 @@ struct clocksource *clocksource_get_next(void) } /** - * select_clocksource - Selects the best registered clocksource. + * clocksource_select - Select the best clocksource available * * Private function. Must hold clocksource_lock when called. * * Select the clocksource with the best rating, or the clocksource, * which is selected by userspace override. */ -static struct clocksource *select_clocksource(void) +static void clocksource_select(void) { - struct clocksource *next; + struct clocksource *best, *cs; if (list_empty(&clocksource_list)) - return NULL; + return; + /* First clocksource on the list has the best rating. */ + best = list_first_entry(&clocksource_list, struct clocksource, list); + /* Check for the override clocksource. */ + list_for_each_entry(cs, &clocksource_list, list) { + if (strcmp(cs->name, override_name) != 0) + continue; + /* + * Check to make sure we don't switch to a non-highres + * capable clocksource if the tick code is in oneshot + * mode (highres or nohz) + */ + if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) && + tick_oneshot_mode_active()) { + /* Override clocksource cannot be used. */ + printk(KERN_WARNING "Override clocksource %s is not " + "HRT compatible. Cannot switch while in " + "HRT/NOHZ mode\n", cs->name); + override_name[0] = 0; + } else + /* Override clocksource can be used. */ + best = cs; + break; + } + if (curr_clocksource != best) + next_clocksource = best; +} - if (clocksource_override) - next = clocksource_override; - else - next = list_entry(clocksource_list.next, struct clocksource, - list); +#else /* CONFIG_GENERIC_TIME */ - if (next == curr_clocksource) - return NULL; +static void clocksource_select(void) { } - return next; -} +#endif /* * Enqueue the clocksource sorted by rating */ -static int clocksource_enqueue(struct clocksource *c) +static void clocksource_enqueue(struct clocksource *cs) { - struct list_head *tmp, *entry = &clocksource_list; - - list_for_each(tmp, &clocksource_list) { - struct clocksource *cs; + struct list_head *entry = &clocksource_list; + struct clocksource *tmp; - cs = list_entry(tmp, struct clocksource, list); - if (cs == c) - return -EBUSY; + list_for_each_entry(tmp, &clocksource_list, list) /* Keep track of the place, where to insert */ - if (cs->rating >= c->rating) - entry = tmp; - } - list_add(&c->list, entry); - - if (strlen(c->name) == strlen(override_name) && - !strcmp(c->name, override_name)) - clocksource_override = c; - - return 0; + if (tmp->rating >= cs->rating) + entry = &tmp->list; + list_add(&cs->list, entry); } /** @@ -397,19 +402,16 @@ static int clocksource_enqueue(struct clocksource *c) * * Returns -EBUSY if registration fails, zero otherwise. */ -int clocksource_register(struct clocksource *c) +int clocksource_register(struct clocksource *cs) { unsigned long flags; - int ret; spin_lock_irqsave(&clocksource_lock, flags); - ret = clocksource_enqueue(c); - if (!ret) - next_clocksource = select_clocksource(); + clocksource_enqueue(cs); + clocksource_select(); spin_unlock_irqrestore(&clocksource_lock, flags); - if (!ret) - clocksource_check_watchdog(c); - return ret; + clocksource_check_watchdog(cs); + return 0; } EXPORT_SYMBOL(clocksource_register); @@ -425,7 +427,7 @@ void clocksource_change_rating(struct clocksource *cs, int rating) list_del(&cs->list); cs->rating = rating; clocksource_enqueue(cs); - next_clocksource = select_clocksource(); + clocksource_select(); spin_unlock_irqrestore(&clocksource_lock, flags); } @@ -438,9 +440,7 @@ void clocksource_unregister(struct clocksource *cs) spin_lock_irqsave(&clocksource_lock, flags); list_del(&cs->list); - if (clocksource_override == cs) - clocksource_override = NULL; - next_clocksource = select_clocksource(); + clocksource_select(); spin_unlock_irqrestore(&clocksource_lock, flags); } @@ -478,9 +478,7 @@ static ssize_t sysfs_override_clocksource(struct sys_device *dev, struct sysdev_attribute *attr, const char *buf, size_t count) { - struct clocksource *ovr = NULL; size_t ret = count; - int len; /* strings from sysfs write are not 0 terminated! */ if (count >= sizeof(override_name)) @@ -495,37 +493,7 @@ static ssize_t sysfs_override_clocksource(struct sys_device *dev, if (count > 0) memcpy(override_name, buf, count); override_name[count] = 0; - - len = strlen(override_name); - if (len) { - struct clocksource *cs; - - ovr = clocksource_override; - /* try to select it: */ - list_for_each_entry(cs, &clocksource_list, list) { - if (strlen(cs->name) == len && - !strcmp(cs->name, override_name)) - ovr = cs; - } - } - - /* - * Check to make sure we don't switch to a non-highres capable - * clocksource if the tick code is in oneshot mode (highres or nohz) - */ - if (tick_oneshot_mode_active() && ovr && - !(ovr->flags & CLOCK_SOURCE_VALID_FOR_HRES)) { - printk(KERN_WARNING "%s clocksource is not HRT compatible. " - "Cannot switch while in HRT/NOHZ mode\n", ovr->name); - ovr = NULL; - override_name[0] = 0; - } - - /* Reselect, when the override name has changed */ - if (ovr != clocksource_override) { - clocksource_override = ovr; - next_clocksource = select_clocksource(); - } + clocksource_select(); spin_unlock_irq(&clocksource_lock); diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c index c3f6c30816e3..5404a8456909 100644 --- a/kernel/time/jiffies.c +++ b/kernel/time/jiffies.c @@ -61,7 +61,6 @@ struct clocksource clocksource_jiffies = { .read = jiffies_read, .mask = 0xffffffff, /*32bits*/ .mult = NSEC_PER_JIFFY << JIFFIES_SHIFT, /* details above */ - .mult_orig = NSEC_PER_JIFFY << JIFFIES_SHIFT, .shift = JIFFIES_SHIFT, }; @@ -71,3 +70,8 @@ static int __init init_jiffies_clocksource(void) } core_initcall(init_jiffies_clocksource); + +struct clocksource * __init __weak clocksource_default_clock(void) +{ + return &clocksource_jiffies; +} diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index b5673016089f..325a9b63265a 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -269,7 +269,7 @@ static void change_clocksource(void) new = clocksource_get_next(); - if (clock == new) + if (!new || clock == new) return; clocksource_forward_now(); @@ -446,7 +446,7 @@ void __init timekeeping_init(void) ntp_init(); - clock = clocksource_get_next(); + clock = clocksource_default_clock(); if (clock->enable) clock->enable(clock); /* set mult_orig on enable */ -- cgit v1.2.3 From c55c87c892c1875deace0c8fc28787335277fdf2 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 14 Aug 2009 15:47:25 +0200 Subject: clocksource: Move watchdog downgrade to a work queue thread Move the downgrade of an unstable clocksource from the timer interrupt context into the process context of a work queue thread. This is needed to be able to do the clocksource switch with stop_machine. Signed-off-by: Martin Schwidefsky Cc: Ingo Molnar Acked-by: John Stultz Cc: Daniel Walker LKML-Reference: <20090814134809.354926067@de.ibm.com> Signed-off-by: Thomas Gleixner --- include/linux/clocksource.h | 1 + kernel/time/clocksource.c | 56 +++++++++++++++++++++++++++++++-------------- 2 files changed, 40 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index f263b3abf46e..19ad43af62d0 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -213,6 +213,7 @@ extern struct clocksource *clock; /* current clocksource */ #define CLOCK_SOURCE_WATCHDOG 0x10 #define CLOCK_SOURCE_VALID_FOR_HRES 0x20 +#define CLOCK_SOURCE_UNSTABLE 0x40 /* simplify initialization of mask field */ #define CLOCKSOURCE_MASK(bits) (cycle_t)((bits) < 64 ? ((1ULL<<(bits))-1) : -1) diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 56aaa749645d..f1508019bfb4 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -143,10 +143,13 @@ fs_initcall(clocksource_done_booting); static LIST_HEAD(watchdog_list); static struct clocksource *watchdog; static struct timer_list watchdog_timer; +static struct work_struct watchdog_work; static DEFINE_SPINLOCK(watchdog_lock); static cycle_t watchdog_last; static int watchdog_running; +static void clocksource_watchdog_work(struct work_struct *work); + /* * Interval: 0.5sec Threshold: 0.0625s */ @@ -158,15 +161,16 @@ static void clocksource_unstable(struct clocksource *cs, int64_t delta) printk(KERN_WARNING "Clocksource %s unstable (delta = %Ld ns)\n", cs->name, delta); cs->flags &= ~(CLOCK_SOURCE_VALID_FOR_HRES | CLOCK_SOURCE_WATCHDOG); - clocksource_change_rating(cs, 0); - list_del(&cs->wd_list); + cs->flags |= CLOCK_SOURCE_UNSTABLE; + schedule_work(&watchdog_work); } static void clocksource_watchdog(unsigned long data) { - struct clocksource *cs, *tmp; + struct clocksource *cs; cycle_t csnow, wdnow; int64_t wd_nsec, cs_nsec; + int next_cpu; spin_lock(&watchdog_lock); if (!watchdog_running) @@ -176,7 +180,12 @@ static void clocksource_watchdog(unsigned long data) wd_nsec = cyc2ns(watchdog, (wdnow - watchdog_last) & watchdog->mask); watchdog_last = wdnow; - list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) { + list_for_each_entry(cs, &watchdog_list, wd_list) { + + /* Clocksource already marked unstable? */ + if (cs->flags & CLOCK_SOURCE_UNSTABLE) + continue; + csnow = cs->read(cs); /* Clocksource initialized ? */ @@ -207,19 +216,15 @@ static void clocksource_watchdog(unsigned long data) } } - if (!list_empty(&watchdog_list)) { - /* - * Cycle through CPUs to check if the CPUs stay - * synchronized to each other. - */ - int next_cpu = cpumask_next(raw_smp_processor_id(), - cpu_online_mask); - - if (next_cpu >= nr_cpu_ids) - next_cpu = cpumask_first(cpu_online_mask); - watchdog_timer.expires += WATCHDOG_INTERVAL; - add_timer_on(&watchdog_timer, next_cpu); - } + /* + * Cycle through CPUs to check if the CPUs stay synchronized + * to each other. + */ + next_cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask); + if (next_cpu >= nr_cpu_ids) + next_cpu = cpumask_first(cpu_online_mask); + watchdog_timer.expires += WATCHDOG_INTERVAL; + add_timer_on(&watchdog_timer, next_cpu); out: spin_unlock(&watchdog_lock); } @@ -228,6 +233,7 @@ static inline void clocksource_start_watchdog(void) { if (watchdog_running || !watchdog || list_empty(&watchdog_list)) return; + INIT_WORK(&watchdog_work, clocksource_watchdog_work); init_timer(&watchdog_timer); watchdog_timer.function = clocksource_watchdog; watchdog_last = watchdog->read(watchdog); @@ -313,6 +319,22 @@ static void clocksource_dequeue_watchdog(struct clocksource *cs) spin_unlock_irqrestore(&watchdog_lock, flags); } +static void clocksource_watchdog_work(struct work_struct *work) +{ + struct clocksource *cs, *tmp; + unsigned long flags; + + spin_lock_irqsave(&watchdog_lock, flags); + list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) + if (cs->flags & CLOCK_SOURCE_UNSTABLE) { + list_del_init(&cs->wd_list); + clocksource_change_rating(cs, 0); + } + /* Check if the watchdog timer needs to be stopped. */ + clocksource_stop_watchdog(); + spin_unlock(&watchdog_lock); +} + #else /* CONFIG_CLOCKSOURCE_WATCHDOG */ static void clocksource_enqueue_watchdog(struct clocksource *cs) -- cgit v1.2.3 From 155ec60226ae0ae2aadaa57c951a58a359331030 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 14 Aug 2009 15:47:26 +0200 Subject: timekeeping: Introduce struct timekeeper Add struct timekeeper to keep the internal values timekeeping.c needs in regard to the currently selected clock source. This moves the timekeeping intervals, xtime_nsec and the ntp error value from struct clocksource to struct timekeeper. The raw_time is removed from the clocksource as well. It gets treated like xtime as a global variable. Eventually xtime raw_time should be moved to struct timekeeper. [ tglx: minor cleanup ] Signed-off-by: Martin Schwidefsky Cc: Ingo Molnar Acked-by: John Stultz Cc: Daniel Walker LKML-Reference: <20090814134809.613209842@de.ibm.com> Signed-off-by: Thomas Gleixner --- arch/s390/kernel/time.c | 1 - include/linux/clocksource.h | 54 +--------- kernel/time/clocksource.c | 6 +- kernel/time/timekeeping.c | 235 +++++++++++++++++++++++++++++--------------- 4 files changed, 164 insertions(+), 132 deletions(-) (limited to 'include/linux') diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index afefe514df0f..e76c2e7a8b9a 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -280,7 +280,6 @@ void __init time_init(void) now = get_clock(); tod_to_timeval(now - TOD_UNIX_EPOCH, &xtime); clocksource_tod.cycle_last = now; - clocksource_tod.raw_time = xtime; tod_to_timeval(sched_clock_base_cc - TOD_UNIX_EPOCH, &ts); set_normalized_timespec(&wall_to_monotonic, -ts.tv_sec, -ts.tv_nsec); write_sequnlock_irqrestore(&xtime_lock, flags); diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 19ad43af62d0..e12e3095e2fb 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -155,8 +155,6 @@ extern u64 timecounter_cyc2time(struct timecounter *tc, * @flags: flags describing special properties * @vread: vsyscall based read * @resume: resume function for the clocksource, if necessary - * @cycle_interval: Used internally by timekeeping core, please ignore. - * @xtime_interval: Used internally by timekeeping core, please ignore. */ struct clocksource { /* @@ -182,19 +180,12 @@ struct clocksource { #define CLKSRC_FSYS_MMIO_SET(mmio, addr) do { } while (0) #endif - /* timekeeping specific data, ignore */ - cycle_t cycle_interval; - u64 xtime_interval; - u32 raw_interval; /* * Second part is written at each timer interrupt * Keep it in a different cache line to dirty no * more than one cache line. */ cycle_t cycle_last ____cacheline_aligned_in_smp; - u64 xtime_nsec; - s64 error; - struct timespec raw_time; #ifdef CONFIG_CLOCKSOURCE_WATCHDOG /* Watchdog related data, used by the framework */ @@ -203,8 +194,6 @@ struct clocksource { #endif }; -extern struct clocksource *clock; /* current clocksource */ - /* * Clock source flags bits:: */ @@ -270,50 +259,15 @@ static inline u32 clocksource_hz2mult(u32 hz, u32 shift_constant) } /** - * cyc2ns - converts clocksource cycles to nanoseconds - * @cs: Pointer to clocksource - * @cycles: Cycles + * clocksource_cyc2ns - converts clocksource cycles to nanoseconds * - * Uses the clocksource and ntp ajdustment to convert cycle_ts to nanoseconds. + * Converts cycles to nanoseconds, using the given mult and shift. * * XXX - This could use some mult_lxl_ll() asm optimization */ -static inline s64 cyc2ns(struct clocksource *cs, cycle_t cycles) +static inline s64 clocksource_cyc2ns(cycle_t cycles, u32 mult, u32 shift) { - u64 ret = (u64)cycles; - ret = (ret * cs->mult) >> cs->shift; - return ret; -} - -/** - * clocksource_calculate_interval - Calculates a clocksource interval struct - * - * @c: Pointer to clocksource. - * @length_nsec: Desired interval length in nanoseconds. - * - * Calculates a fixed cycle/nsec interval for a given clocksource/adjustment - * pair and interval request. - * - * Unless you're the timekeeping code, you should not be using this! - */ -static inline void clocksource_calculate_interval(struct clocksource *c, - unsigned long length_nsec) -{ - u64 tmp; - - /* Do the ns -> cycle conversion first, using original mult */ - tmp = length_nsec; - tmp <<= c->shift; - tmp += c->mult_orig/2; - do_div(tmp, c->mult_orig); - - c->cycle_interval = (cycle_t)tmp; - if (c->cycle_interval == 0) - c->cycle_interval = 1; - - /* Go back from cycles -> shifted ns, this time use ntp adjused mult */ - c->xtime_interval = (u64)c->cycle_interval * c->mult; - c->raw_interval = ((u64)c->cycle_interval * c->mult_orig) >> c->shift; + return ((u64) cycles * mult) >> shift; } diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index f1508019bfb4..f18c9a6bdcf4 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -177,7 +177,8 @@ static void clocksource_watchdog(unsigned long data) goto out; wdnow = watchdog->read(watchdog); - wd_nsec = cyc2ns(watchdog, (wdnow - watchdog_last) & watchdog->mask); + wd_nsec = clocksource_cyc2ns((wdnow - watchdog_last) & watchdog->mask, + watchdog->mult, watchdog->shift); watchdog_last = wdnow; list_for_each_entry(cs, &watchdog_list, wd_list) { @@ -196,7 +197,8 @@ static void clocksource_watchdog(unsigned long data) } /* Check the deviation from the watchdog clocksource. */ - cs_nsec = cyc2ns(cs, (csnow - cs->wd_last) & cs->mask); + cs_nsec = clocksource_cyc2ns((csnow - cs->wd_last) & + cs->mask, cs->mult, cs->shift); cs->wd_last = csnow; if (abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD) { clocksource_unstable(cs, cs_nsec - wd_nsec); diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 325a9b63265a..7af45cbf6b13 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -19,6 +19,65 @@ #include #include +/* Structure holding internal timekeeping values. */ +struct timekeeper { + /* Current clocksource used for timekeeping. */ + struct clocksource *clock; + + /* Number of clock cycles in one NTP interval. */ + cycle_t cycle_interval; + /* Number of clock shifted nano seconds in one NTP interval. */ + u64 xtime_interval; + /* Raw nano seconds accumulated per NTP interval. */ + u32 raw_interval; + + /* Clock shifted nano seconds remainder not stored in xtime.tv_nsec. */ + u64 xtime_nsec; + /* Difference between accumulated time and NTP time in ntp + * shifted nano seconds. */ + s64 ntp_error; +}; + +struct timekeeper timekeeper; + +/** + * timekeeper_setup_internals - Set up internals to use clocksource clock. + * + * @clock: Pointer to clocksource. + * + * Calculates a fixed cycle/nsec interval for a given clocksource/adjustment + * pair and interval request. + * + * Unless you're the timekeeping code, you should not be using this! + */ +static void timekeeper_setup_internals(struct clocksource *clock) +{ + cycle_t interval; + u64 tmp; + + timekeeper.clock = clock; + clock->cycle_last = clock->read(clock); + + /* Do the ns -> cycle conversion first, using original mult */ + tmp = NTP_INTERVAL_LENGTH; + tmp <<= clock->shift; + tmp += clock->mult_orig/2; + do_div(tmp, clock->mult_orig); + if (tmp == 0) + tmp = 1; + + interval = (cycle_t) tmp; + timekeeper.cycle_interval = interval; + + /* Go back from cycles -> shifted ns */ + timekeeper.xtime_interval = (u64) interval * clock->mult; + timekeeper.raw_interval = + ((u64) interval * clock->mult_orig) >> clock->shift; + + timekeeper.xtime_nsec = 0; + + timekeeper.ntp_error = 0; +} /* * This read-write spinlock protects us from races in SMP while @@ -46,6 +105,11 @@ struct timespec xtime __attribute__ ((aligned (16))); struct timespec wall_to_monotonic __attribute__ ((aligned (16))); static unsigned long total_sleep_time; /* seconds */ +/* + * The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock. + */ +struct timespec raw_time; + /* flag for if timekeeping is suspended */ int __read_mostly timekeeping_suspended; @@ -56,42 +120,42 @@ void update_xtime_cache(u64 nsec) timespec_add_ns(&xtime_cache, nsec); } -struct clocksource *clock; - /* must hold xtime_lock */ void timekeeping_leap_insert(int leapsecond) { xtime.tv_sec += leapsecond; wall_to_monotonic.tv_sec -= leapsecond; - update_vsyscall(&xtime, clock); + update_vsyscall(&xtime, timekeeper.clock); } #ifdef CONFIG_GENERIC_TIME /** - * clocksource_forward_now - update clock to the current time + * timekeeping_forward_now - update clock to the current time * * Forward the current clock to update its state since the last call to * update_wall_time(). This is useful before significant clock changes, * as it avoids having to deal with this time offset explicitly. */ -static void clocksource_forward_now(void) +static void timekeeping_forward_now(void) { cycle_t cycle_now, cycle_delta; + struct clocksource *clock; s64 nsec; + clock = timekeeper.clock; cycle_now = clock->read(clock); cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; clock->cycle_last = cycle_now; - nsec = cyc2ns(clock, cycle_delta); + nsec = clocksource_cyc2ns(cycle_delta, clock->mult, clock->shift); /* If arch requires, add in gettimeoffset() */ nsec += arch_gettimeoffset(); timespec_add_ns(&xtime, nsec); - nsec = ((s64)cycle_delta * clock->mult_orig) >> clock->shift; - clock->raw_time.tv_nsec += nsec; + nsec = clocksource_cyc2ns(cycle_delta, clock->mult_orig, clock->shift); + timespec_add_ns(&raw_time, nsec); } /** @@ -103,6 +167,7 @@ static void clocksource_forward_now(void) void getnstimeofday(struct timespec *ts) { cycle_t cycle_now, cycle_delta; + struct clocksource *clock; unsigned long seq; s64 nsecs; @@ -114,13 +179,15 @@ void getnstimeofday(struct timespec *ts) *ts = xtime; /* read clocksource: */ + clock = timekeeper.clock; cycle_now = clock->read(clock); /* calculate the delta since the last update_wall_time: */ cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; /* convert to nanoseconds: */ - nsecs = cyc2ns(clock, cycle_delta); + nsecs = clocksource_cyc2ns(cycle_delta, clock->mult, + clock->shift); /* If arch requires, add in gettimeoffset() */ nsecs += arch_gettimeoffset(); @@ -135,6 +202,7 @@ EXPORT_SYMBOL(getnstimeofday); ktime_t ktime_get(void) { cycle_t cycle_now, cycle_delta; + struct clocksource *clock; unsigned int seq; s64 secs, nsecs; @@ -146,13 +214,15 @@ ktime_t ktime_get(void) nsecs = xtime.tv_nsec + wall_to_monotonic.tv_nsec; /* read clocksource: */ + clock = timekeeper.clock; cycle_now = clock->read(clock); /* calculate the delta since the last update_wall_time: */ cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; /* convert to nanoseconds: */ - nsecs += cyc2ns(clock, cycle_delta); + nsecs += clocksource_cyc2ns(cycle_delta, clock->mult, + clock->shift); } while (read_seqretry(&xtime_lock, seq)); /* @@ -174,6 +244,7 @@ EXPORT_SYMBOL_GPL(ktime_get); void ktime_get_ts(struct timespec *ts) { cycle_t cycle_now, cycle_delta; + struct clocksource *clock; struct timespec tomono; unsigned int seq; s64 nsecs; @@ -186,13 +257,15 @@ void ktime_get_ts(struct timespec *ts) tomono = wall_to_monotonic; /* read clocksource: */ + clock = timekeeper.clock; cycle_now = clock->read(clock); /* calculate the delta since the last update_wall_time: */ cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; /* convert to nanoseconds: */ - nsecs = cyc2ns(clock, cycle_delta); + nsecs = clocksource_cyc2ns(cycle_delta, clock->mult, + clock->shift); } while (read_seqretry(&xtime_lock, seq)); @@ -233,7 +306,7 @@ int do_settimeofday(struct timespec *tv) write_seqlock_irqsave(&xtime_lock, flags); - clocksource_forward_now(); + timekeeping_forward_now(); ts_delta.tv_sec = tv->tv_sec - xtime.tv_sec; ts_delta.tv_nsec = tv->tv_nsec - xtime.tv_nsec; @@ -243,10 +316,10 @@ int do_settimeofday(struct timespec *tv) update_xtime_cache(0); - clock->error = 0; + timekeeper.ntp_error = 0; ntp_clear(); - update_vsyscall(&xtime, clock); + update_vsyscall(&xtime, timekeeper.clock); write_sequnlock_irqrestore(&xtime_lock, flags); @@ -269,10 +342,10 @@ static void change_clocksource(void) new = clocksource_get_next(); - if (!new || clock == new) + if (!new || timekeeper.clock == new) return; - clocksource_forward_now(); + timekeeping_forward_now(); if (new->enable && !new->enable(new)) return; @@ -284,9 +357,9 @@ static void change_clocksource(void) */ new->mult_orig = new->mult; - new->raw_time = clock->raw_time; - old = clock; - clock = new; + old = timekeeper.clock; + timekeeper_setup_internals(new); + /* * Save mult_orig in mult so that the value can be restored * regardless if ->enable() updates the value of mult or not. @@ -295,22 +368,10 @@ static void change_clocksource(void) if (old->disable) old->disable(old); - clock->cycle_last = clock->read(clock); - clock->error = 0; - clock->xtime_nsec = 0; - clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH); - tick_clock_notify(); - - /* - * We're holding xtime lock and waking up klogd would deadlock - * us on enqueue. So no printing! - printk(KERN_INFO "Time: %s clocksource has been installed.\n", - clock->name); - */ } #else /* GENERIC_TIME */ -static inline void clocksource_forward_now(void) { } +static inline void timekeeping_forward_now(void) { } static inline void change_clocksource(void) { } /** @@ -380,20 +441,23 @@ void getrawmonotonic(struct timespec *ts) unsigned long seq; s64 nsecs; cycle_t cycle_now, cycle_delta; + struct clocksource *clock; do { seq = read_seqbegin(&xtime_lock); /* read clocksource: */ + clock = timekeeper.clock; cycle_now = clock->read(clock); /* calculate the delta since the last update_wall_time: */ cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; /* convert to nanoseconds: */ - nsecs = ((s64)cycle_delta * clock->mult_orig) >> clock->shift; + nsecs = clocksource_cyc2ns(cycle_delta, clock->mult_orig, + clock->shift); - *ts = clock->raw_time; + *ts = raw_time; } while (read_seqretry(&xtime_lock, seq)); @@ -413,7 +477,7 @@ int timekeeping_valid_for_hres(void) do { seq = read_seqbegin(&xtime_lock); - ret = clock->flags & CLOCK_SOURCE_VALID_FOR_HRES; + ret = timekeeper.clock->flags & CLOCK_SOURCE_VALID_FOR_HRES; } while (read_seqretry(&xtime_lock, seq)); @@ -439,6 +503,7 @@ unsigned long __attribute__((weak)) read_persistent_clock(void) */ void __init timekeeping_init(void) { + struct clocksource *clock; unsigned long flags; unsigned long sec = read_persistent_clock(); @@ -451,11 +516,13 @@ void __init timekeeping_init(void) clock->enable(clock); /* set mult_orig on enable */ clock->mult_orig = clock->mult; - clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH); - clock->cycle_last = clock->read(clock); + + timekeeper_setup_internals(clock); xtime.tv_sec = sec; xtime.tv_nsec = 0; + raw_time.tv_sec = 0; + raw_time.tv_nsec = 0; set_normalized_timespec(&wall_to_monotonic, -xtime.tv_sec, -xtime.tv_nsec); update_xtime_cache(0); @@ -492,8 +559,8 @@ static int timekeeping_resume(struct sys_device *dev) } update_xtime_cache(0); /* re-base the last cycle value */ - clock->cycle_last = clock->read(clock); - clock->error = 0; + timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock); + timekeeper.ntp_error = 0; timekeeping_suspended = 0; write_sequnlock_irqrestore(&xtime_lock, flags); @@ -514,7 +581,7 @@ static int timekeeping_suspend(struct sys_device *dev, pm_message_t state) timekeeping_suspend_time = read_persistent_clock(); write_seqlock_irqsave(&xtime_lock, flags); - clocksource_forward_now(); + timekeeping_forward_now(); timekeeping_suspended = 1; write_sequnlock_irqrestore(&xtime_lock, flags); @@ -549,7 +616,7 @@ device_initcall(timekeeping_init_device); * If the error is already larger, we look ahead even further * to compensate for late or lost adjustments. */ -static __always_inline int clocksource_bigadjust(s64 error, s64 *interval, +static __always_inline int timekeeping_bigadjust(s64 error, s64 *interval, s64 *offset) { s64 tick_error, i; @@ -565,7 +632,7 @@ static __always_inline int clocksource_bigadjust(s64 error, s64 *interval, * here. This is tuned so that an error of about 1 msec is adjusted * within about 1 sec (or 2^20 nsec in 2^SHIFT_HZ ticks). */ - error2 = clock->error >> (NTP_SCALE_SHIFT + 22 - 2 * SHIFT_HZ); + error2 = timekeeper.ntp_error >> (NTP_SCALE_SHIFT + 22 - 2 * SHIFT_HZ); error2 = abs(error2); for (look_ahead = 0; error2 > 0; look_ahead++) error2 >>= 2; @@ -574,8 +641,9 @@ static __always_inline int clocksource_bigadjust(s64 error, s64 *interval, * Now calculate the error in (1 << look_ahead) ticks, but first * remove the single look ahead already included in the error. */ - tick_error = tick_length >> (NTP_SCALE_SHIFT - clock->shift + 1); - tick_error -= clock->xtime_interval >> 1; + tick_error = tick_length >> + (NTP_SCALE_SHIFT - timekeeper.clock->shift + 1); + tick_error -= timekeeper.xtime_interval >> 1; error = ((error - tick_error) >> look_ahead) + tick_error; /* Finally calculate the adjustment shift value. */ @@ -600,18 +668,19 @@ static __always_inline int clocksource_bigadjust(s64 error, s64 *interval, * this is optimized for the most common adjustments of -1,0,1, * for other values we can do a bit more work. */ -static void clocksource_adjust(s64 offset) +static void timekeeping_adjust(s64 offset) { - s64 error, interval = clock->cycle_interval; + s64 error, interval = timekeeper.cycle_interval; int adj; - error = clock->error >> (NTP_SCALE_SHIFT - clock->shift - 1); + error = timekeeper.ntp_error >> + (NTP_SCALE_SHIFT - timekeeper.clock->shift - 1); if (error > interval) { error >>= 2; if (likely(error <= interval)) adj = 1; else - adj = clocksource_bigadjust(error, &interval, &offset); + adj = timekeeping_bigadjust(error, &interval, &offset); } else if (error < -interval) { error >>= 2; if (likely(error >= -interval)) { @@ -619,15 +688,15 @@ static void clocksource_adjust(s64 offset) interval = -interval; offset = -offset; } else - adj = clocksource_bigadjust(error, &interval, &offset); + adj = timekeeping_bigadjust(error, &interval, &offset); } else return; - clock->mult += adj; - clock->xtime_interval += interval; - clock->xtime_nsec -= offset; - clock->error -= (interval - offset) << - (NTP_SCALE_SHIFT - clock->shift); + timekeeper.clock->mult += adj; + timekeeper.xtime_interval += interval; + timekeeper.xtime_nsec -= offset; + timekeeper.ntp_error -= (interval - offset) << + (NTP_SCALE_SHIFT - timekeeper.clock->shift); } /** @@ -637,53 +706,59 @@ static void clocksource_adjust(s64 offset) */ void update_wall_time(void) { + struct clocksource *clock; cycle_t offset; + s64 nsecs; /* Make sure we're fully resumed: */ if (unlikely(timekeeping_suspended)) return; + clock = timekeeper.clock; #ifdef CONFIG_GENERIC_TIME offset = (clock->read(clock) - clock->cycle_last) & clock->mask; #else - offset = clock->cycle_interval; + offset = timekeeper.cycle_interval; #endif - clock->xtime_nsec = (s64)xtime.tv_nsec << clock->shift; + timekeeper.xtime_nsec = (s64)xtime.tv_nsec << clock->shift; /* normally this loop will run just once, however in the * case of lost or late ticks, it will accumulate correctly. */ - while (offset >= clock->cycle_interval) { + while (offset >= timekeeper.cycle_interval) { + u64 nsecps = (u64)NSEC_PER_SEC << clock->shift; + /* accumulate one interval */ - offset -= clock->cycle_interval; - clock->cycle_last += clock->cycle_interval; + offset -= timekeeper.cycle_interval; + clock->cycle_last += timekeeper.cycle_interval; - clock->xtime_nsec += clock->xtime_interval; - if (clock->xtime_nsec >= (u64)NSEC_PER_SEC << clock->shift) { - clock->xtime_nsec -= (u64)NSEC_PER_SEC << clock->shift; + timekeeper.xtime_nsec += timekeeper.xtime_interval; + if (timekeeper.xtime_nsec >= nsecps) { + timekeeper.xtime_nsec -= nsecps; xtime.tv_sec++; second_overflow(); } - clock->raw_time.tv_nsec += clock->raw_interval; - if (clock->raw_time.tv_nsec >= NSEC_PER_SEC) { - clock->raw_time.tv_nsec -= NSEC_PER_SEC; - clock->raw_time.tv_sec++; + raw_time.tv_nsec += timekeeper.raw_interval; + if (raw_time.tv_nsec >= NSEC_PER_SEC) { + raw_time.tv_nsec -= NSEC_PER_SEC; + raw_time.tv_sec++; } /* accumulate error between NTP and clock interval */ - clock->error += tick_length; - clock->error -= clock->xtime_interval << (NTP_SCALE_SHIFT - clock->shift); + timekeeper.ntp_error += tick_length; + timekeeper.ntp_error -= timekeeper.xtime_interval << + (NTP_SCALE_SHIFT - clock->shift); } /* correct the clock when NTP error is too big */ - clocksource_adjust(offset); + timekeeping_adjust(offset); /* * Since in the loop above, we accumulate any amount of time * in xtime_nsec over a second into xtime.tv_sec, its possible for * xtime_nsec to be fairly small after the loop. Further, if we're - * slightly speeding the clocksource up in clocksource_adjust(), + * slightly speeding the clocksource up in timekeeping_adjust(), * its possible the required corrective factor to xtime_nsec could * cause it to underflow. * @@ -695,24 +770,26 @@ void update_wall_time(void) * We'll correct this error next time through this function, when * xtime_nsec is not as small. */ - if (unlikely((s64)clock->xtime_nsec < 0)) { - s64 neg = -(s64)clock->xtime_nsec; - clock->xtime_nsec = 0; - clock->error += neg << (NTP_SCALE_SHIFT - clock->shift); + if (unlikely((s64)timekeeper.xtime_nsec < 0)) { + s64 neg = -(s64)timekeeper.xtime_nsec; + timekeeper.xtime_nsec = 0; + timekeeper.ntp_error += neg << (NTP_SCALE_SHIFT - clock->shift); } /* store full nanoseconds into xtime after rounding it up and * add the remainder to the error difference. */ - xtime.tv_nsec = ((s64)clock->xtime_nsec >> clock->shift) + 1; - clock->xtime_nsec -= (s64)xtime.tv_nsec << clock->shift; - clock->error += clock->xtime_nsec << (NTP_SCALE_SHIFT - clock->shift); + xtime.tv_nsec = ((s64)timekeeper.xtime_nsec >> clock->shift) + 1; + timekeeper.xtime_nsec -= (s64)xtime.tv_nsec << clock->shift; + timekeeper.ntp_error += timekeeper.xtime_nsec << + (NTP_SCALE_SHIFT - clock->shift); - update_xtime_cache(cyc2ns(clock, offset)); + nsecs = clocksource_cyc2ns(offset, clock->mult, clock->shift); + update_xtime_cache(nsecs); /* check to see if there is a new clocksource to use */ change_clocksource(); - update_vsyscall(&xtime, clock); + update_vsyscall(&xtime, timekeeper.clock); } /** -- cgit v1.2.3 From 0a54419836254a27baecd9037103171bcbabaf67 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 14 Aug 2009 15:47:28 +0200 Subject: timekeeping: Move NTP adjusted clock multiplier to struct timekeeper The clocksource structure has two multipliers, the unmodified multiplier clock->mult_orig and the NTP corrected multiplier clock->mult. The NTP multiplier is misplaced in the struct clocksource, this is private information of the timekeeping code. Add the mult field to the struct timekeeper to contain the NTP corrected value, keep the unmodifed multiplier in clock->mult and remove clock->mult_orig. Signed-off-by: Martin Schwidefsky Cc: Ingo Molnar Acked-by: John Stultz Cc: Daniel Walker LKML-Reference: <20090814134810.149047645@de.ibm.com> Signed-off-by: Thomas Gleixner --- arch/arm/plat-omap/common.c | 7 ++---- include/linux/clocksource.h | 4 +--- kernel/time/timekeeping.c | 53 ++++++++++++++++++++------------------------- 3 files changed, 27 insertions(+), 37 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/plat-omap/common.c b/arch/arm/plat-omap/common.c index ebcf006406f9..95587b6c0259 100644 --- a/arch/arm/plat-omap/common.c +++ b/arch/arm/plat-omap/common.c @@ -253,11 +253,8 @@ static struct clocksource clocksource_32k = { */ unsigned long long sched_clock(void) { - unsigned long long ret; - - ret = (unsigned long long)clocksource_32k.read(&clocksource_32k); - ret = (ret * clocksource_32k.mult_orig) >> clocksource_32k.shift; - return ret; + return clocksource_cyc2ns(clocksource_32k.read(&clocksource_32k), + clocksource_32k.mult, clocksource_32k.shift); } static int __init omap_init_clocksource_32k(void) diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index e12e3095e2fb..e34015effeb6 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -149,8 +149,7 @@ extern u64 timecounter_cyc2time(struct timecounter *tc, * @disable: optional function to disable the clocksource * @mask: bitmask for two's complement * subtraction of non 64 bit counters - * @mult: cycle to nanosecond multiplier (adjusted by NTP) - * @mult_orig: cycle to nanosecond multiplier (unadjusted by NTP) + * @mult: cycle to nanosecond multiplier * @shift: cycle to nanosecond divisor (power of two) * @flags: flags describing special properties * @vread: vsyscall based read @@ -168,7 +167,6 @@ struct clocksource { void (*disable)(struct clocksource *cs); cycle_t mask; u32 mult; - u32 mult_orig; u32 shift; unsigned long flags; cycle_t (*vread)(void); diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index dfdab1cefe1e..f4056f6c2632 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -41,6 +41,8 @@ struct timekeeper { /* Shift conversion between clock shifted nano seconds and * ntp shifted nano seconds. */ int ntp_error_shift; + /* NTP adjusted clock multiplier */ + u32 mult; }; struct timekeeper timekeeper; @@ -66,8 +68,8 @@ static void timekeeper_setup_internals(struct clocksource *clock) /* Do the ns -> cycle conversion first, using original mult */ tmp = NTP_INTERVAL_LENGTH; tmp <<= clock->shift; - tmp += clock->mult_orig/2; - do_div(tmp, clock->mult_orig); + tmp += clock->mult/2; + do_div(tmp, clock->mult); if (tmp == 0) tmp = 1; @@ -77,13 +79,20 @@ static void timekeeper_setup_internals(struct clocksource *clock) /* Go back from cycles -> shifted ns */ timekeeper.xtime_interval = (u64) interval * clock->mult; timekeeper.raw_interval = - ((u64) interval * clock->mult_orig) >> clock->shift; + ((u64) interval * clock->mult) >> clock->shift; timekeeper.xtime_nsec = 0; timekeeper.shift = clock->shift; timekeeper.ntp_error = 0; timekeeper.ntp_error_shift = NTP_SCALE_SHIFT - clock->shift; + + /* + * The timekeeper keeps its own mult values for the currently + * active clocksource. These value will be adjusted via NTP + * to counteract clock drifting. + */ + timekeeper.mult = clock->mult; } /* @@ -154,14 +163,15 @@ static void timekeeping_forward_now(void) cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; clock->cycle_last = cycle_now; - nsec = clocksource_cyc2ns(cycle_delta, clock->mult, clock->shift); + nsec = clocksource_cyc2ns(cycle_delta, timekeeper.mult, + timekeeper.shift); /* If arch requires, add in gettimeoffset() */ nsec += arch_gettimeoffset(); timespec_add_ns(&xtime, nsec); - nsec = clocksource_cyc2ns(cycle_delta, clock->mult_orig, clock->shift); + nsec = clocksource_cyc2ns(cycle_delta, clock->mult, clock->shift); timespec_add_ns(&raw_time, nsec); } @@ -193,8 +203,8 @@ void getnstimeofday(struct timespec *ts) cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; /* convert to nanoseconds: */ - nsecs = clocksource_cyc2ns(cycle_delta, clock->mult, - clock->shift); + nsecs = clocksource_cyc2ns(cycle_delta, timekeeper.mult, + timekeeper.shift); /* If arch requires, add in gettimeoffset() */ nsecs += arch_gettimeoffset(); @@ -228,8 +238,8 @@ ktime_t ktime_get(void) cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; /* convert to nanoseconds: */ - nsecs += clocksource_cyc2ns(cycle_delta, clock->mult, - clock->shift); + nsecs += clocksource_cyc2ns(cycle_delta, timekeeper.mult, + timekeeper.shift); } while (read_seqretry(&xtime_lock, seq)); /* @@ -271,8 +281,8 @@ void ktime_get_ts(struct timespec *ts) cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; /* convert to nanoseconds: */ - nsecs = clocksource_cyc2ns(cycle_delta, clock->mult, - clock->shift); + nsecs = clocksource_cyc2ns(cycle_delta, timekeeper.mult, + timekeeper.shift); } while (read_seqretry(&xtime_lock, seq)); @@ -356,22 +366,10 @@ static void change_clocksource(void) if (new->enable && !new->enable(new)) return; - /* - * The frequency may have changed while the clocksource - * was disabled. If so the code in ->enable() must update - * the mult value to reflect the new frequency. Make sure - * mult_orig follows this change. - */ - new->mult_orig = new->mult; old = timekeeper.clock; timekeeper_setup_internals(new); - /* - * Save mult_orig in mult so that the value can be restored - * regardless if ->enable() updates the value of mult or not. - */ - old->mult = old->mult_orig; if (old->disable) old->disable(old); @@ -461,7 +459,7 @@ void getrawmonotonic(struct timespec *ts) cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; /* convert to nanoseconds: */ - nsecs = clocksource_cyc2ns(cycle_delta, clock->mult_orig, + nsecs = clocksource_cyc2ns(cycle_delta, clock->mult, clock->shift); *ts = raw_time; @@ -521,9 +519,6 @@ void __init timekeeping_init(void) clock = clocksource_default_clock(); if (clock->enable) clock->enable(clock); - /* set mult_orig on enable */ - clock->mult_orig = clock->mult; - timekeeper_setup_internals(clock); xtime.tv_sec = sec; @@ -697,7 +692,7 @@ static void timekeeping_adjust(s64 offset) } else return; - timekeeper.clock->mult += adj; + timekeeper.mult += adj; timekeeper.xtime_interval += interval; timekeeper.xtime_nsec -= offset; timekeeper.ntp_error -= (interval - offset) << @@ -789,7 +784,7 @@ void update_wall_time(void) timekeeper.ntp_error += timekeeper.xtime_nsec << timekeeper.ntp_error_shift; - nsecs = clocksource_cyc2ns(offset, clock->mult, clock->shift); + nsecs = clocksource_cyc2ns(offset, timekeeper.mult, timekeeper.shift); update_xtime_cache(nsecs); /* check to see if there is a new clocksource to use */ -- cgit v1.2.3 From 75c5158f70c065b9704b924503d96e8297838f79 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 14 Aug 2009 15:47:30 +0200 Subject: timekeeping: Update clocksource with stop_machine update_wall_time calls change_clocksource HZ times per second to check if a new clock source is available. In close to 100% of all calls there is no new clock. Replace the tick based check by an update done with stop_machine. Signed-off-by: Martin Schwidefsky Cc: Ingo Molnar Acked-by: John Stultz Cc: Daniel Walker LKML-Reference: <20090814134810.711836357@de.ibm.com> Signed-off-by: Thomas Gleixner --- include/linux/clocksource.h | 2 + kernel/time/clocksource.c | 112 +++++++++++++++++--------------------------- kernel/time/timekeeping.c | 41 ++++++++++------ 3 files changed, 72 insertions(+), 83 deletions(-) (limited to 'include/linux') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index e34015effeb6..9ea40ff26f0e 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -291,4 +291,6 @@ static inline void update_vsyscall_tz(void) } #endif +extern void timekeeping_notify(struct clocksource *clock); + #endif /* _LINUX_CLOCKSOURCE_H */ diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index f18c9a6bdcf4..a1657b5fdeb9 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -109,35 +109,17 @@ EXPORT_SYMBOL(timecounter_cyc2time); /*[Clocksource internal variables]--------- * curr_clocksource: * currently selected clocksource. - * next_clocksource: - * pending next selected clocksource. * clocksource_list: * linked list with the registered clocksources - * clocksource_lock: - * protects manipulations to curr_clocksource and next_clocksource - * and the clocksource_list + * clocksource_mutex: + * protects manipulations to curr_clocksource and the clocksource_list * override_name: * Name of the user-specified clocksource. */ static struct clocksource *curr_clocksource; -static struct clocksource *next_clocksource; static LIST_HEAD(clocksource_list); -static DEFINE_SPINLOCK(clocksource_lock); +static DEFINE_MUTEX(clocksource_mutex); static char override_name[32]; -static int finished_booting; - -/* clocksource_done_booting - Called near the end of core bootup - * - * Hack to avoid lots of clocksource churn at boot time. - * We use fs_initcall because we want this to start before - * device_initcall but after subsys_initcall. - */ -static int __init clocksource_done_booting(void) -{ - finished_booting = 1; - return 0; -} -fs_initcall(clocksource_done_booting); #ifdef CONFIG_CLOCKSOURCE_WATCHDOG static LIST_HEAD(watchdog_list); @@ -356,18 +338,16 @@ static inline void clocksource_resume_watchdog(void) { } void clocksource_resume(void) { struct clocksource *cs; - unsigned long flags; - spin_lock_irqsave(&clocksource_lock, flags); + mutex_lock(&clocksource_mutex); - list_for_each_entry(cs, &clocksource_list, list) { + list_for_each_entry(cs, &clocksource_list, list) if (cs->resume) cs->resume(); - } clocksource_resume_watchdog(); - spin_unlock_irqrestore(&clocksource_lock, flags); + mutex_unlock(&clocksource_mutex); } /** @@ -383,28 +363,13 @@ void clocksource_touch_watchdog(void) } #ifdef CONFIG_GENERIC_TIME -/** - * clocksource_get_next - Returns the selected clocksource - * - */ -struct clocksource *clocksource_get_next(void) -{ - unsigned long flags; - spin_lock_irqsave(&clocksource_lock, flags); - if (next_clocksource && finished_booting) { - curr_clocksource = next_clocksource; - next_clocksource = NULL; - } - spin_unlock_irqrestore(&clocksource_lock, flags); - - return curr_clocksource; -} +static int finished_booting; /** * clocksource_select - Select the best clocksource available * - * Private function. Must hold clocksource_lock when called. + * Private function. Must hold clocksource_mutex when called. * * Select the clocksource with the best rating, or the clocksource, * which is selected by userspace override. @@ -413,7 +378,7 @@ static void clocksource_select(void) { struct clocksource *best, *cs; - if (list_empty(&clocksource_list)) + if (!finished_booting || list_empty(&clocksource_list)) return; /* First clocksource on the list has the best rating. */ best = list_first_entry(&clocksource_list, struct clocksource, list); @@ -438,13 +403,31 @@ static void clocksource_select(void) best = cs; break; } - if (curr_clocksource != best) - next_clocksource = best; + if (curr_clocksource != best) { + printk(KERN_INFO "Switching to clocksource %s\n", best->name); + curr_clocksource = best; + timekeeping_notify(curr_clocksource); + } } +/* + * clocksource_done_booting - Called near the end of core bootup + * + * Hack to avoid lots of clocksource churn at boot time. + * We use fs_initcall because we want this to start before + * device_initcall but after subsys_initcall. + */ +static int __init clocksource_done_booting(void) +{ + finished_booting = 1; + clocksource_select(); + return 0; +} +fs_initcall(clocksource_done_booting); + #else /* CONFIG_GENERIC_TIME */ -static void clocksource_select(void) { } +static inline void clocksource_select(void) { } #endif @@ -471,13 +454,11 @@ static void clocksource_enqueue(struct clocksource *cs) */ int clocksource_register(struct clocksource *cs) { - unsigned long flags; - - spin_lock_irqsave(&clocksource_lock, flags); + mutex_lock(&clocksource_mutex); clocksource_enqueue(cs); clocksource_select(); - spin_unlock_irqrestore(&clocksource_lock, flags); clocksource_enqueue_watchdog(cs); + mutex_unlock(&clocksource_mutex); return 0; } EXPORT_SYMBOL(clocksource_register); @@ -487,14 +468,12 @@ EXPORT_SYMBOL(clocksource_register); */ void clocksource_change_rating(struct clocksource *cs, int rating) { - unsigned long flags; - - spin_lock_irqsave(&clocksource_lock, flags); + mutex_lock(&clocksource_mutex); list_del(&cs->list); cs->rating = rating; clocksource_enqueue(cs); clocksource_select(); - spin_unlock_irqrestore(&clocksource_lock, flags); + mutex_unlock(&clocksource_mutex); } EXPORT_SYMBOL(clocksource_change_rating); @@ -503,13 +482,11 @@ EXPORT_SYMBOL(clocksource_change_rating); */ void clocksource_unregister(struct clocksource *cs) { - unsigned long flags; - + mutex_lock(&clocksource_mutex); clocksource_dequeue_watchdog(cs); - spin_lock_irqsave(&clocksource_lock, flags); list_del(&cs->list); clocksource_select(); - spin_unlock_irqrestore(&clocksource_lock, flags); + mutex_unlock(&clocksource_mutex); } EXPORT_SYMBOL(clocksource_unregister); @@ -527,9 +504,9 @@ sysfs_show_current_clocksources(struct sys_device *dev, { ssize_t count = 0; - spin_lock_irq(&clocksource_lock); + mutex_lock(&clocksource_mutex); count = snprintf(buf, PAGE_SIZE, "%s\n", curr_clocksource->name); - spin_unlock_irq(&clocksource_lock); + mutex_unlock(&clocksource_mutex); return count; } @@ -557,14 +534,14 @@ static ssize_t sysfs_override_clocksource(struct sys_device *dev, if (buf[count-1] == '\n') count--; - spin_lock_irq(&clocksource_lock); + mutex_lock(&clocksource_mutex); if (count > 0) memcpy(override_name, buf, count); override_name[count] = 0; clocksource_select(); - spin_unlock_irq(&clocksource_lock); + mutex_unlock(&clocksource_mutex); return ret; } @@ -584,7 +561,7 @@ sysfs_show_available_clocksources(struct sys_device *dev, struct clocksource *src; ssize_t count = 0; - spin_lock_irq(&clocksource_lock); + mutex_lock(&clocksource_mutex); list_for_each_entry(src, &clocksource_list, list) { /* * Don't show non-HRES clocksource if the tick code is @@ -596,7 +573,7 @@ sysfs_show_available_clocksources(struct sys_device *dev, max((ssize_t)PAGE_SIZE - count, (ssize_t)0), "%s ", src->name); } - spin_unlock_irq(&clocksource_lock); + mutex_unlock(&clocksource_mutex); count += snprintf(buf + count, max((ssize_t)PAGE_SIZE - count, (ssize_t)0), "\n"); @@ -651,11 +628,10 @@ device_initcall(init_clocksource_sysfs); */ static int __init boot_override_clocksource(char* str) { - unsigned long flags; - spin_lock_irqsave(&clocksource_lock, flags); + mutex_lock(&clocksource_mutex); if (str) strlcpy(override_name, str, sizeof(override_name)); - spin_unlock_irqrestore(&clocksource_lock, flags); + mutex_unlock(&clocksource_mutex); return 1; } diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 27ae01b596b7..41579e7fcf9d 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -18,6 +18,7 @@ #include #include #include +#include /* Structure holding internal timekeeping values. */ struct timekeeper { @@ -179,6 +180,7 @@ void timekeeping_leap_insert(int leapsecond) } #ifdef CONFIG_GENERIC_TIME + /** * timekeeping_forward_now - update clock to the current time * @@ -351,31 +353,40 @@ EXPORT_SYMBOL(do_settimeofday); * * Accumulates current time interval and initializes new clocksource */ -static void change_clocksource(void) +static int change_clocksource(void *data) { struct clocksource *new, *old; - new = clocksource_get_next(); - - if (!new || timekeeper.clock == new) - return; + new = (struct clocksource *) data; timekeeping_forward_now(); + if (!new->enable || new->enable(new) == 0) { + old = timekeeper.clock; + timekeeper_setup_internals(new); + if (old->disable) + old->disable(old); + } + return 0; +} - if (new->enable && !new->enable(new)) +/** + * timekeeping_notify - Install a new clock source + * @clock: pointer to the clock source + * + * This function is called from clocksource.c after a new, better clock + * source has been registered. The caller holds the clocksource_mutex. + */ +void timekeeping_notify(struct clocksource *clock) +{ + if (timekeeper.clock == clock) return; - - old = timekeeper.clock; - timekeeper_setup_internals(new); - - if (old->disable) - old->disable(old); - + stop_machine(change_clocksource, clock, NULL); tick_clock_notify(); } + #else /* GENERIC_TIME */ + static inline void timekeeping_forward_now(void) { } -static inline void change_clocksource(void) { } /** * ktime_get - get the monotonic time in ktime_t format @@ -416,6 +427,7 @@ void ktime_get_ts(struct timespec *ts) ts->tv_nsec + tomono.tv_nsec); } EXPORT_SYMBOL_GPL(ktime_get_ts); + #endif /* !GENERIC_TIME */ /** @@ -773,7 +785,6 @@ void update_wall_time(void) update_xtime_cache(nsecs); /* check to see if there is a new clocksource to use */ - change_clocksource(); update_vsyscall(&xtime, timekeeper.clock); } -- cgit v1.2.3 From d4f587c67fc39e0030ddd718675e252e208da4d7 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 14 Aug 2009 15:47:31 +0200 Subject: timekeeping: Increase granularity of read_persistent_clock() The persistent clock of some architectures (e.g. s390) have a better granularity than seconds. To reduce the delta between the host clock and the guest clock in a virtualized system change the read_persistent_clock function to return a struct timespec. Signed-off-by: Martin Schwidefsky Cc: Ingo Molnar Acked-by: John Stultz Cc: Daniel Walker LKML-Reference: <20090814134811.013873340@de.ibm.com> Signed-off-by: Thomas Gleixner --- arch/m68knommu/kernel/time.c | 5 ++-- arch/mips/dec/time.c | 5 ++-- arch/mips/lasat/ds1603.c | 5 ++-- arch/mips/lasat/sysctl.c | 8 ++++-- arch/mips/lemote/lm2e/setup.c | 5 ++-- arch/mips/mti-malta/malta-time.c | 5 ++-- arch/mips/pmc-sierra/yosemite/setup.c | 5 ++-- arch/mips/sibyte/swarm/setup.c | 15 +++++++--- arch/mips/sni/time.c | 5 ++-- arch/powerpc/kernel/time.c | 7 +++-- arch/s390/kernel/time.c | 22 +++------------ arch/sh/kernel/time.c | 6 ++-- arch/x86/kernel/rtc.c | 5 ++-- arch/xtensa/kernel/time.c | 5 ++-- include/linux/time.h | 2 +- kernel/time/timekeeping.c | 52 +++++++++++++++++++---------------- 16 files changed, 83 insertions(+), 74 deletions(-) (limited to 'include/linux') diff --git a/arch/m68knommu/kernel/time.c b/arch/m68knommu/kernel/time.c index d182b2f72211..68432248515c 100644 --- a/arch/m68knommu/kernel/time.c +++ b/arch/m68knommu/kernel/time.c @@ -72,9 +72,10 @@ static unsigned long read_rtc_mmss(void) return mktime(year, mon, day, hour, min, sec);; } -unsigned long read_persistent_clock(void) +void read_persistent_clock(struct timespec *ts) { - return read_rtc_mmss(); + ts->tv_sec = read_rtc_mmss(); + ts->tv_nsec = 0; } int update_persistent_clock(struct timespec now) diff --git a/arch/mips/dec/time.c b/arch/mips/dec/time.c index 463136e6685a..02f505f23c32 100644 --- a/arch/mips/dec/time.c +++ b/arch/mips/dec/time.c @@ -18,7 +18,7 @@ #include #include -unsigned long read_persistent_clock(void) +void read_persistent_clock(struct timespec *ts) { unsigned int year, mon, day, hour, min, sec, real_year; unsigned long flags; @@ -53,7 +53,8 @@ unsigned long read_persistent_clock(void) year += real_year - 72 + 2000; - return mktime(year, mon, day, hour, min, sec); + ts->tv_sec = mktime(year, mon, day, hour, min, sec); + ts->tv_nsec = 0; } /* diff --git a/arch/mips/lasat/ds1603.c b/arch/mips/lasat/ds1603.c index 52cb1436a12a..c6fd96ff118d 100644 --- a/arch/mips/lasat/ds1603.c +++ b/arch/mips/lasat/ds1603.c @@ -135,7 +135,7 @@ static void rtc_end_op(void) lasat_ndelay(1000); } -unsigned long read_persistent_clock(void) +void read_persistent_clock(struct timespec *ts) { unsigned long word; unsigned long flags; @@ -147,7 +147,8 @@ unsigned long read_persistent_clock(void) rtc_end_op(); spin_unlock_irqrestore(&rtc_lock, flags); - return word; + ts->tv_sec = word; + ts->tv_nsec = 0; } int rtc_mips_set_mmss(unsigned long time) diff --git a/arch/mips/lasat/sysctl.c b/arch/mips/lasat/sysctl.c index 8f88886feb12..3f04d4c406b7 100644 --- a/arch/mips/lasat/sysctl.c +++ b/arch/mips/lasat/sysctl.c @@ -92,10 +92,12 @@ static int rtctmp; int proc_dolasatrtc(ctl_table *table, int write, struct file *filp, void *buffer, size_t *lenp, loff_t *ppos) { + struct timespec ts; int r; if (!write) { - rtctmp = read_persistent_clock(); + read_persistent_clock(&ts); + rtctmp = ts.tv_sec; /* check for time < 0 and set to 0 */ if (rtctmp < 0) rtctmp = 0; @@ -134,9 +136,11 @@ int sysctl_lasat_rtc(ctl_table *table, void *oldval, size_t *oldlenp, void *newval, size_t newlen) { + struct timespec ts; int r; - rtctmp = read_persistent_clock(); + read_persistent_clock(&ts); + rtctmp = ts.tv_sec; if (rtctmp < 0) rtctmp = 0; r = sysctl_intvec(table, oldval, oldlenp, newval, newlen); diff --git a/arch/mips/lemote/lm2e/setup.c b/arch/mips/lemote/lm2e/setup.c index ebd6ceaef2fd..24b355df6127 100644 --- a/arch/mips/lemote/lm2e/setup.c +++ b/arch/mips/lemote/lm2e/setup.c @@ -54,9 +54,10 @@ void __init plat_time_init(void) mips_hpt_frequency = cpu_clock_freq / 2; } -unsigned long read_persistent_clock(void) +void read_persistent_clock(struct timespec *ts) { - return mc146818_get_cmos_time(); + ts->tv_sec = mc146818_get_cmos_time(); + ts->tv_nsec = 0; } void (*__wbflush)(void); diff --git a/arch/mips/mti-malta/malta-time.c b/arch/mips/mti-malta/malta-time.c index 0b97d47691fc..3c6f190aa61c 100644 --- a/arch/mips/mti-malta/malta-time.c +++ b/arch/mips/mti-malta/malta-time.c @@ -100,9 +100,10 @@ static unsigned int __init estimate_cpu_frequency(void) return count; } -unsigned long read_persistent_clock(void) +void read_persistent_clock(struct timespec *ts) { - return mc146818_get_cmos_time(); + ts->tv_sec = mc146818_get_cmos_time(); + ts->tv_nsec = 0; } static void __init plat_perf_setup(void) diff --git a/arch/mips/pmc-sierra/yosemite/setup.c b/arch/mips/pmc-sierra/yosemite/setup.c index 2d3c0dca275d..3498ac9c35af 100644 --- a/arch/mips/pmc-sierra/yosemite/setup.c +++ b/arch/mips/pmc-sierra/yosemite/setup.c @@ -70,7 +70,7 @@ void __init bus_error_init(void) } -unsigned long read_persistent_clock(void) +void read_persistent_clock(struct timespec *ts) { unsigned int year, month, day, hour, min, sec; unsigned long flags; @@ -92,7 +92,8 @@ unsigned long read_persistent_clock(void) m48t37_base->control = 0x00; spin_unlock_irqrestore(&rtc_lock, flags); - return mktime(year, month, day, hour, min, sec); + ts->tv_sec = mktime(year, month, day, hour, min, sec); + ts->tv_nsec = 0; } int rtc_mips_set_time(unsigned long tim) diff --git a/arch/mips/sibyte/swarm/setup.c b/arch/mips/sibyte/swarm/setup.c index 672e45d495a9..623ffc933c4c 100644 --- a/arch/mips/sibyte/swarm/setup.c +++ b/arch/mips/sibyte/swarm/setup.c @@ -87,19 +87,26 @@ enum swarm_rtc_type { enum swarm_rtc_type swarm_rtc_type; -unsigned long read_persistent_clock(void) +void read_persistent_clock(struct timespec *ts) { + unsigned long sec; + switch (swarm_rtc_type) { case RTC_XICOR: - return xicor_get_time(); + sec = xicor_get_time(); + break; case RTC_M4LT81: - return m41t81_get_time(); + sec = m41t81_get_time(); + break; case RTC_NONE: default: - return mktime(2000, 1, 1, 0, 0, 0); + sec = mktime(2000, 1, 1, 0, 0, 0); + break; } + ts->tv_sec = sec; + tv->tv_nsec = 0; } int rtc_mips_set_time(unsigned long sec) diff --git a/arch/mips/sni/time.c b/arch/mips/sni/time.c index 0d9ec1a5c24a..62df6a598e0a 100644 --- a/arch/mips/sni/time.c +++ b/arch/mips/sni/time.c @@ -182,7 +182,8 @@ void __init plat_time_init(void) setup_pit_timer(); } -unsigned long read_persistent_clock(void) +void read_persistent_clock(struct timespec *ts) { - return -1; + ts->tv_sec = -1; + ts->tv_nsec = 0; } diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index eae4511ceeac..ad63f30fe3da 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -769,7 +769,7 @@ int update_persistent_clock(struct timespec now) return ppc_md.set_rtc_time(&tm); } -unsigned long read_persistent_clock(void) +void read_persistent_clock(struct timespec *ts) { struct rtc_time tm; static int first = 1; @@ -787,8 +787,9 @@ unsigned long read_persistent_clock(void) if (!ppc_md.get_rtc_time) return 0; ppc_md.get_rtc_time(&tm); - return mktime(tm.tm_year+1900, tm.tm_mon+1, tm.tm_mday, - tm.tm_hour, tm.tm_min, tm.tm_sec); + ts->tv_sec = mktime(tm.tm_year+1900, tm.tm_mon+1, tm.tm_mday, + tm.tm_hour, tm.tm_min, tm.tm_sec); + ts->tv_nsec = 0; } /* clocksource code */ diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index e76c2e7a8b9a..a94ec48587b4 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -182,12 +182,9 @@ static void timing_alert_interrupt(__u16 code) static void etr_reset(void); static void stp_reset(void); -unsigned long read_persistent_clock(void) +void read_persistent_clock(struct timespec *ts) { - struct timespec ts; - - tod_to_timeval(get_clock() - TOD_UNIX_EPOCH, &ts); - return ts.tv_sec; + tod_to_timeval(get_clock() - TOD_UNIX_EPOCH, ts); } static cycle_t read_tod_clock(struct clocksource *cs) @@ -248,7 +245,6 @@ void __init time_init(void) { struct timespec ts; unsigned long flags; - cycle_t now; /* Reset time synchronization interfaces. */ etr_reset(); @@ -266,20 +262,10 @@ void __init time_init(void) panic("Could not register TOD clock source"); /* - * The TOD clock is an accurate clock. The xtime should be - * initialized in a way that the difference between TOD and - * xtime is reasonably small. Too bad that timekeeping_init - * sets xtime.tv_nsec to zero. In addition the clock source - * change from the jiffies clock source to the TOD clock - * source add another error of up to 1/HZ second. The same - * function sets wall_to_monotonic to a value that is too - * small for /proc/uptime to be accurate. - * Reset xtime and wall_to_monotonic to sane values. + * Reset wall_to_monotonic to the initial timestamp created + * in head.S to get a precise value in /proc/uptime. */ write_seqlock_irqsave(&xtime_lock, flags); - now = get_clock(); - tod_to_timeval(now - TOD_UNIX_EPOCH, &xtime); - clocksource_tod.cycle_last = now; tod_to_timeval(sched_clock_base_cc - TOD_UNIX_EPOCH, &ts); set_normalized_timespec(&wall_to_monotonic, -ts.tv_sec, -ts.tv_nsec); write_sequnlock_irqrestore(&xtime_lock, flags); diff --git a/arch/sh/kernel/time.c b/arch/sh/kernel/time.c index 9b352a1e3fb4..3f4706aa975e 100644 --- a/arch/sh/kernel/time.c +++ b/arch/sh/kernel/time.c @@ -39,11 +39,9 @@ void (*rtc_sh_get_time)(struct timespec *) = null_rtc_get_time; int (*rtc_sh_set_time)(const time_t) = null_rtc_set_time; #ifdef CONFIG_GENERIC_CMOS_UPDATE -unsigned long read_persistent_clock(void) +void read_persistent_clock(struct timespec *ts) { - struct timespec tv; - rtc_sh_get_time(&tv); - return tv.tv_sec; + rtc_sh_get_time(&ts); } int update_persistent_clock(struct timespec now) diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c index 5d465b207e72..bf67dcb4a44c 100644 --- a/arch/x86/kernel/rtc.c +++ b/arch/x86/kernel/rtc.c @@ -178,7 +178,7 @@ static int set_rtc_mmss(unsigned long nowtime) } /* not static: needed by APM */ -unsigned long read_persistent_clock(void) +void read_persistent_clock(struct timespec *ts) { unsigned long retval, flags; @@ -186,7 +186,8 @@ unsigned long read_persistent_clock(void) retval = get_wallclock(); spin_unlock_irqrestore(&rtc_lock, flags); - return retval; + ts->tv_sec = retval; + ts->tv_nsec = 0; } int update_persistent_clock(struct timespec now) diff --git a/arch/xtensa/kernel/time.c b/arch/xtensa/kernel/time.c index 8848120d291b..19085ff0484a 100644 --- a/arch/xtensa/kernel/time.c +++ b/arch/xtensa/kernel/time.c @@ -59,9 +59,8 @@ static struct irqaction timer_irqaction = { void __init time_init(void) { - xtime.tv_nsec = 0; - xtime.tv_sec = read_persistent_clock(); - + /* FIXME: xtime&wall_to_monotonic are set in timekeeping_init. */ + read_persistent_clock(&xtime); set_normalized_timespec(&wall_to_monotonic, -xtime.tv_sec, -xtime.tv_nsec); diff --git a/include/linux/time.h b/include/linux/time.h index e7c844558884..53a3216f0d1b 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -101,7 +101,7 @@ extern struct timespec xtime; extern struct timespec wall_to_monotonic; extern seqlock_t xtime_lock; -extern unsigned long read_persistent_clock(void); +extern void read_persistent_clock(struct timespec *ts); extern int update_persistent_clock(struct timespec now); extern int no_sync_cmos_clock __read_mostly; void timekeeping_init(void); diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 41579e7fcf9d..f1a21ce491e6 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -154,7 +154,7 @@ __cacheline_aligned_in_smp DEFINE_SEQLOCK(xtime_lock); */ struct timespec xtime __attribute__ ((aligned (16))); struct timespec wall_to_monotonic __attribute__ ((aligned (16))); -static unsigned long total_sleep_time; /* seconds */ +static struct timespec total_sleep_time; /* * The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock. @@ -487,17 +487,18 @@ int timekeeping_valid_for_hres(void) } /** - * read_persistent_clock - Return time in seconds from the persistent clock. + * read_persistent_clock - Return time from the persistent clock. * * Weak dummy function for arches that do not yet support it. - * Returns seconds from epoch using the battery backed persistent clock. - * Returns zero if unsupported. + * Reads the time from the battery backed persistent clock. + * Returns a timespec with tv_sec=0 and tv_nsec=0 if unsupported. * * XXX - Do be sure to remove it once all arches implement it. */ -unsigned long __attribute__((weak)) read_persistent_clock(void) +void __attribute__((weak)) read_persistent_clock(struct timespec *ts) { - return 0; + ts->tv_sec = 0; + ts->tv_nsec = 0; } /* @@ -507,7 +508,9 @@ void __init timekeeping_init(void) { struct clocksource *clock; unsigned long flags; - unsigned long sec = read_persistent_clock(); + struct timespec now; + + read_persistent_clock(&now); write_seqlock_irqsave(&xtime_lock, flags); @@ -518,19 +521,20 @@ void __init timekeeping_init(void) clock->enable(clock); timekeeper_setup_internals(clock); - xtime.tv_sec = sec; - xtime.tv_nsec = 0; + xtime.tv_sec = now.tv_sec; + xtime.tv_nsec = now.tv_nsec; raw_time.tv_sec = 0; raw_time.tv_nsec = 0; set_normalized_timespec(&wall_to_monotonic, -xtime.tv_sec, -xtime.tv_nsec); update_xtime_cache(0); - total_sleep_time = 0; + total_sleep_time.tv_sec = 0; + total_sleep_time.tv_nsec = 0; write_sequnlock_irqrestore(&xtime_lock, flags); } /* time in seconds when suspend began */ -static unsigned long timekeeping_suspend_time; +static struct timespec timekeeping_suspend_time; /** * timekeeping_resume - Resumes the generic timekeeping subsystem. @@ -543,18 +547,19 @@ static unsigned long timekeeping_suspend_time; static int timekeeping_resume(struct sys_device *dev) { unsigned long flags; - unsigned long now = read_persistent_clock(); + struct timespec ts; + + read_persistent_clock(&ts); clocksource_resume(); write_seqlock_irqsave(&xtime_lock, flags); - if (now && (now > timekeeping_suspend_time)) { - unsigned long sleep_length = now - timekeeping_suspend_time; - - xtime.tv_sec += sleep_length; - wall_to_monotonic.tv_sec -= sleep_length; - total_sleep_time += sleep_length; + if (timespec_compare(&ts, &timekeeping_suspend_time) > 0) { + ts = timespec_sub(ts, timekeeping_suspend_time); + xtime = timespec_add_safe(xtime, ts); + wall_to_monotonic = timespec_sub(wall_to_monotonic, ts); + total_sleep_time = timespec_add_safe(total_sleep_time, ts); } update_xtime_cache(0); /* re-base the last cycle value */ @@ -577,7 +582,7 @@ static int timekeeping_suspend(struct sys_device *dev, pm_message_t state) { unsigned long flags; - timekeeping_suspend_time = read_persistent_clock(); + read_persistent_clock(&timekeeping_suspend_time); write_seqlock_irqsave(&xtime_lock, flags); timekeeping_forward_now(); @@ -801,9 +806,10 @@ void update_wall_time(void) */ void getboottime(struct timespec *ts) { - set_normalized_timespec(ts, - - (wall_to_monotonic.tv_sec + total_sleep_time), - - wall_to_monotonic.tv_nsec); + struct timespec boottime; + + boottime = timespec_add_safe(wall_to_monotonic, total_sleep_time); + set_normalized_timespec(ts, -boottime.tv_sec, -boottime.tv_nsec); } /** @@ -812,7 +818,7 @@ void getboottime(struct timespec *ts) */ void monotonic_to_bootbased(struct timespec *ts) { - ts->tv_sec += total_sleep_time; + *ts = timespec_add_safe(*ts, total_sleep_time); } unsigned long get_seconds(void) -- cgit v1.2.3 From 23970e389e9cee43c4b41023935e1417271708b2 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 14 Aug 2009 15:47:32 +0200 Subject: timekeeping: Introduce read_boot_clock Add the new function read_boot_clock to get the exact time the system has been started. For architectures without support for exact boot time a new weak function is added that returns 0. Use the exact boot time to initialize wall_to_monotonic, or xtime if the read_boot_clock returned 0. Signed-off-by: Martin Schwidefsky Cc: Ingo Molnar Acked-by: John Stultz Cc: Daniel Walker LKML-Reference: <20090814134811.296703241@de.ibm.com> Signed-off-by: Thomas Gleixner --- arch/s390/kernel/time.c | 17 +++++------------ include/linux/time.h | 1 + kernel/time/timekeeping.c | 24 ++++++++++++++++++++++-- 3 files changed, 28 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index a94ec48587b4..6bff1a1d9060 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -187,6 +187,11 @@ void read_persistent_clock(struct timespec *ts) tod_to_timeval(get_clock() - TOD_UNIX_EPOCH, ts); } +void read_boot_clock(struct timespec *ts) +{ + tod_to_timeval(sched_clock_base_cc - TOD_UNIX_EPOCH, ts); +} + static cycle_t read_tod_clock(struct clocksource *cs) { return get_clock(); @@ -243,9 +248,6 @@ void update_vsyscall_tz(void) */ void __init time_init(void) { - struct timespec ts; - unsigned long flags; - /* Reset time synchronization interfaces. */ etr_reset(); stp_reset(); @@ -261,15 +263,6 @@ void __init time_init(void) if (clocksource_register(&clocksource_tod) != 0) panic("Could not register TOD clock source"); - /* - * Reset wall_to_monotonic to the initial timestamp created - * in head.S to get a precise value in /proc/uptime. - */ - write_seqlock_irqsave(&xtime_lock, flags); - tod_to_timeval(sched_clock_base_cc - TOD_UNIX_EPOCH, &ts); - set_normalized_timespec(&wall_to_monotonic, -ts.tv_sec, -ts.tv_nsec); - write_sequnlock_irqrestore(&xtime_lock, flags); - /* Enable TOD clock interrupts on the boot cpu. */ init_cpu_timer(); diff --git a/include/linux/time.h b/include/linux/time.h index 53a3216f0d1b..f505988398e6 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -102,6 +102,7 @@ extern struct timespec wall_to_monotonic; extern seqlock_t xtime_lock; extern void read_persistent_clock(struct timespec *ts); +extern void read_boot_clock(struct timespec *ts); extern int update_persistent_clock(struct timespec now); extern int no_sync_cmos_clock __read_mostly; void timekeeping_init(void); diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index f1a21ce491e6..15e06defca55 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -501,6 +501,21 @@ void __attribute__((weak)) read_persistent_clock(struct timespec *ts) ts->tv_nsec = 0; } +/** + * read_boot_clock - Return time of the system start. + * + * Weak dummy function for arches that do not yet support it. + * Function to read the exact time the system has been started. + * Returns a timespec with tv_sec=0 and tv_nsec=0 if unsupported. + * + * XXX - Do be sure to remove it once all arches implement it. + */ +void __attribute__((weak)) read_boot_clock(struct timespec *ts) +{ + ts->tv_sec = 0; + ts->tv_nsec = 0; +} + /* * timekeeping_init - Initializes the clocksource and common timekeeping values */ @@ -508,9 +523,10 @@ void __init timekeeping_init(void) { struct clocksource *clock; unsigned long flags; - struct timespec now; + struct timespec now, boot; read_persistent_clock(&now); + read_boot_clock(&boot); write_seqlock_irqsave(&xtime_lock, flags); @@ -525,8 +541,12 @@ void __init timekeeping_init(void) xtime.tv_nsec = now.tv_nsec; raw_time.tv_sec = 0; raw_time.tv_nsec = 0; + if (boot.tv_sec == 0 && boot.tv_nsec == 0) { + boot.tv_sec = xtime.tv_sec; + boot.tv_nsec = xtime.tv_nsec; + } set_normalized_timespec(&wall_to_monotonic, - -xtime.tv_sec, -xtime.tv_nsec); + -boot.tv_sec, -boot.tv_nsec); update_xtime_cache(0); total_sleep_time.tv_sec = 0; total_sleep_time.tv_nsec = 0; -- cgit v1.2.3 From 799e64f05f4bfaad2bb3165cab95c8c992a1c296 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 15 Aug 2009 09:53:47 -0700 Subject: cpu hotplug: Introduce cpu_notifier() to handle !HOTPLUG_CPU case This patch introduces a new cpu_notifier() API that is similar to hotcpu_notifier(), but which also notifies of CPUs coming online during boot in the !HOTPLUG_CPU case. Reported-by: Ingo Molnar Reported-by: Hugh Dickins Tested-by: Hugh Dickins Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: josht@linux.vnet.ibm.com Cc: akpm@linux-foundation.org Cc: mathieu.desnoyers@polymtl.ca Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: benh@kernel.crashing.org LKML-Reference: <12503552312611-git-send-email-> Signed-off-by: Ingo Molnar --- include/linux/cpu.h | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 4d668e05d458..47536197ffdd 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -48,6 +48,15 @@ struct notifier_block; #ifdef CONFIG_SMP /* Need to know about CPUs going up/down? */ +#if defined(CONFIG_HOTPLUG_CPU) || !defined(MODULE) +#define cpu_notifier(fn, pri) { \ + static struct notifier_block fn##_nb __cpuinitdata = \ + { .notifier_call = fn, .priority = pri }; \ + register_cpu_notifier(&fn##_nb); \ +} +#else /* #if defined(CONFIG_HOTPLUG_CPU) || !defined(MODULE) */ +#define cpu_notifier(fn, pri) do { (void)(fn); } while (0) +#endif /* #else #if defined(CONFIG_HOTPLUG_CPU) || !defined(MODULE) */ #ifdef CONFIG_HOTPLUG_CPU extern int register_cpu_notifier(struct notifier_block *nb); extern void unregister_cpu_notifier(struct notifier_block *nb); @@ -74,6 +83,8 @@ extern void cpu_maps_update_done(void); #else /* CONFIG_SMP */ +#define cpu_notifier(fn, pri) do { (void)(fn); } while (0) + static inline int register_cpu_notifier(struct notifier_block *nb) { return 0; @@ -99,11 +110,7 @@ extern struct sysdev_class cpu_sysdev_class; extern void get_online_cpus(void); extern void put_online_cpus(void); -#define hotcpu_notifier(fn, pri) { \ - static struct notifier_block fn##_nb __cpuinitdata = \ - { .notifier_call = fn, .priority = pri }; \ - register_cpu_notifier(&fn##_nb); \ -} +#define hotcpu_notifier(fn, pri) cpu_notifier(fn, pri) #define register_hotcpu_notifier(nb) register_cpu_notifier(nb) #define unregister_hotcpu_notifier(nb) unregister_cpu_notifier(nb) int cpu_down(unsigned int cpu); -- cgit v1.2.3 From 2bf49690325b62480a42f7afed5e9f164173c570 Mon Sep 17 00:00:00 2001 From: Thomas Liu Date: Tue, 14 Jul 2009 12:14:09 -0400 Subject: SELinux: Convert avc_audit to use lsm_audit.h Convert avc_audit in security/selinux/avc.c to use lsm_audit.h, for better maintainability. - changed selinux to use common_audit_data instead of avc_audit_data - eliminated code in avc.c and used code from lsm_audit.h instead. Had to add a LSM_AUDIT_NO_AUDIT to lsm_audit.h so that avc_audit can call common_lsm_audit and do the pre and post callbacks without doing the actual dump. This makes it so that the patched version behaves the same way as the unpatched version. Also added a denied field to the selinux_audit_data private space, once again to make it so that the patched version behaves like the unpatched. I've tested and confirmed that AVCs look the same before and after this patch. Signed-off-by: Thomas Liu Acked-by: Stephen Smalley Signed-off-by: James Morris --- include/linux/lsm_audit.h | 2 + security/Makefile | 4 +- security/lsm_audit.c | 2 + security/selinux/avc.c | 197 ++++++++---------------------------- security/selinux/hooks.c | 142 +++++++++++++------------- security/selinux/include/avc.h | 49 +-------- security/selinux/include/netlabel.h | 4 +- security/selinux/include/xfrm.h | 8 +- security/selinux/netlabel.c | 2 +- security/selinux/xfrm.c | 4 +- 10 files changed, 131 insertions(+), 283 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_audit.h b/include/linux/lsm_audit.h index a5514a3a4f17..190c37854870 100644 --- a/include/linux/lsm_audit.h +++ b/include/linux/lsm_audit.h @@ -33,6 +33,7 @@ struct common_audit_data { #define LSM_AUDIT_DATA_IPC 4 #define LSM_AUDIT_DATA_TASK 5 #define LSM_AUDIT_DATA_KEY 6 +#define LSM_AUDIT_NO_AUDIT 7 struct task_struct *tsk; union { struct { @@ -86,6 +87,7 @@ struct common_audit_data { u16 tclass; u32 requested; u32 audited; + u32 denied; struct av_decision *avd; int result; } selinux_audit_data; diff --git a/security/Makefile b/security/Makefile index b56e7f9ecbc2..95ecc06392d7 100644 --- a/security/Makefile +++ b/security/Makefile @@ -16,9 +16,7 @@ obj-$(CONFIG_SECURITYFS) += inode.o # Must precede capability.o in order to stack properly. obj-$(CONFIG_SECURITY_SELINUX) += selinux/built-in.o obj-$(CONFIG_SECURITY_SMACK) += smack/built-in.o -ifeq ($(CONFIG_AUDIT),y) -obj-$(CONFIG_SECURITY_SMACK) += lsm_audit.o -endif +obj-$(CONFIG_AUDIT) += lsm_audit.o obj-$(CONFIG_SECURITY_TOMOYO) += tomoyo/built-in.o obj-$(CONFIG_SECURITY_ROOTPLUG) += root_plug.o obj-$(CONFIG_CGROUP_DEVICE) += device_cgroup.o diff --git a/security/lsm_audit.c b/security/lsm_audit.c index 94b868494b31..500aad0ebd6a 100644 --- a/security/lsm_audit.c +++ b/security/lsm_audit.c @@ -220,6 +220,8 @@ static void dump_common_audit_data(struct audit_buffer *ab, } switch (a->type) { + case LSM_AUDIT_NO_AUDIT: + return; case LSM_AUDIT_DATA_IPC: audit_log_format(ab, " key=%d ", a->u.ipc_id); break; diff --git a/security/selinux/avc.c b/security/selinux/avc.c index 236aaa2ea86d..e3d19014259b 100644 --- a/security/selinux/avc.c +++ b/security/selinux/avc.c @@ -492,23 +492,35 @@ out: return node; } -static inline void avc_print_ipv6_addr(struct audit_buffer *ab, - struct in6_addr *addr, __be16 port, - char *name1, char *name2) +/** + * avc_audit_pre_callback - SELinux specific information + * will be called by generic audit code + * @ab: the audit buffer + * @a: audit_data + */ +static void avc_audit_pre_callback(struct audit_buffer *ab, void *a) { - if (!ipv6_addr_any(addr)) - audit_log_format(ab, " %s=%pI6", name1, addr); - if (port) - audit_log_format(ab, " %s=%d", name2, ntohs(port)); + struct common_audit_data *ad = a; + audit_log_format(ab, "avc: %s ", + ad->selinux_audit_data.denied ? "denied" : "granted"); + avc_dump_av(ab, ad->selinux_audit_data.tclass, + ad->selinux_audit_data.audited); + audit_log_format(ab, " for "); } -static inline void avc_print_ipv4_addr(struct audit_buffer *ab, __be32 addr, - __be16 port, char *name1, char *name2) +/** + * avc_audit_post_callback - SELinux specific information + * will be called by generic audit code + * @ab: the audit buffer + * @a: audit_data + */ +static void avc_audit_post_callback(struct audit_buffer *ab, void *a) { - if (addr) - audit_log_format(ab, " %s=%pI4", name1, &addr); - if (port) - audit_log_format(ab, " %s=%d", name2, ntohs(port)); + struct common_audit_data *ad = a; + audit_log_format(ab, " "); + avc_dump_query(ab, ad->selinux_audit_data.ssid, + ad->selinux_audit_data.tsid, + ad->selinux_audit_data.tclass); } /** @@ -532,13 +544,10 @@ static inline void avc_print_ipv4_addr(struct audit_buffer *ab, __be32 addr, */ void avc_audit(u32 ssid, u32 tsid, u16 tclass, u32 requested, - struct av_decision *avd, int result, struct avc_audit_data *a) + struct av_decision *avd, int result, struct common_audit_data *a) { - struct task_struct *tsk = current; - struct inode *inode = NULL; + struct common_audit_data stack_data; u32 denied, audited; - struct audit_buffer *ab; - denied = requested & ~avd->allowed; if (denied) { audited = denied; @@ -551,144 +560,20 @@ void avc_audit(u32 ssid, u32 tsid, if (!(audited & avd->auditallow)) return; } - - ab = audit_log_start(current->audit_context, GFP_ATOMIC, AUDIT_AVC); - if (!ab) - return; /* audit_panic has been called */ - audit_log_format(ab, "avc: %s ", denied ? "denied" : "granted"); - avc_dump_av(ab, tclass, audited); - audit_log_format(ab, " for "); - if (a && a->tsk) - tsk = a->tsk; - if (tsk && tsk->pid) { - audit_log_format(ab, " pid=%d comm=", tsk->pid); - audit_log_untrustedstring(ab, tsk->comm); - } - if (a) { - switch (a->type) { - case AVC_AUDIT_DATA_IPC: - audit_log_format(ab, " key=%d", a->u.ipc_id); - break; - case AVC_AUDIT_DATA_CAP: - audit_log_format(ab, " capability=%d", a->u.cap); - break; - case AVC_AUDIT_DATA_FS: - if (a->u.fs.path.dentry) { - struct dentry *dentry = a->u.fs.path.dentry; - if (a->u.fs.path.mnt) { - audit_log_d_path(ab, "path=", - &a->u.fs.path); - } else { - audit_log_format(ab, " name="); - audit_log_untrustedstring(ab, dentry->d_name.name); - } - inode = dentry->d_inode; - } else if (a->u.fs.inode) { - struct dentry *dentry; - inode = a->u.fs.inode; - dentry = d_find_alias(inode); - if (dentry) { - audit_log_format(ab, " name="); - audit_log_untrustedstring(ab, dentry->d_name.name); - dput(dentry); - } - } - if (inode) - audit_log_format(ab, " dev=%s ino=%lu", - inode->i_sb->s_id, - inode->i_ino); - break; - case AVC_AUDIT_DATA_NET: - if (a->u.net.sk) { - struct sock *sk = a->u.net.sk; - struct unix_sock *u; - int len = 0; - char *p = NULL; - - switch (sk->sk_family) { - case AF_INET: { - struct inet_sock *inet = inet_sk(sk); - - avc_print_ipv4_addr(ab, inet->rcv_saddr, - inet->sport, - "laddr", "lport"); - avc_print_ipv4_addr(ab, inet->daddr, - inet->dport, - "faddr", "fport"); - break; - } - case AF_INET6: { - struct inet_sock *inet = inet_sk(sk); - struct ipv6_pinfo *inet6 = inet6_sk(sk); - - avc_print_ipv6_addr(ab, &inet6->rcv_saddr, - inet->sport, - "laddr", "lport"); - avc_print_ipv6_addr(ab, &inet6->daddr, - inet->dport, - "faddr", "fport"); - break; - } - case AF_UNIX: - u = unix_sk(sk); - if (u->dentry) { - struct path path = { - .dentry = u->dentry, - .mnt = u->mnt - }; - audit_log_d_path(ab, "path=", - &path); - break; - } - if (!u->addr) - break; - len = u->addr->len-sizeof(short); - p = &u->addr->name->sun_path[0]; - audit_log_format(ab, " path="); - if (*p) - audit_log_untrustedstring(ab, p); - else - audit_log_n_hex(ab, p, len); - break; - } - } - - switch (a->u.net.family) { - case AF_INET: - avc_print_ipv4_addr(ab, a->u.net.v4info.saddr, - a->u.net.sport, - "saddr", "src"); - avc_print_ipv4_addr(ab, a->u.net.v4info.daddr, - a->u.net.dport, - "daddr", "dest"); - break; - case AF_INET6: - avc_print_ipv6_addr(ab, &a->u.net.v6info.saddr, - a->u.net.sport, - "saddr", "src"); - avc_print_ipv6_addr(ab, &a->u.net.v6info.daddr, - a->u.net.dport, - "daddr", "dest"); - break; - } - if (a->u.net.netif > 0) { - struct net_device *dev; - - /* NOTE: we always use init's namespace */ - dev = dev_get_by_index(&init_net, - a->u.net.netif); - if (dev) { - audit_log_format(ab, " netif=%s", - dev->name); - dev_put(dev); - } - } - break; - } + if (!a) { + a = &stack_data; + memset(a, 0, sizeof(*a)); + a->type = LSM_AUDIT_NO_AUDIT; } - audit_log_format(ab, " "); - avc_dump_query(ab, ssid, tsid, tclass); - audit_log_end(ab); + a->selinux_audit_data.tclass = tclass; + a->selinux_audit_data.requested = requested; + a->selinux_audit_data.ssid = ssid; + a->selinux_audit_data.tsid = tsid; + a->selinux_audit_data.audited = audited; + a->selinux_audit_data.denied = denied; + a->lsm_pre_audit = avc_audit_pre_callback; + a->lsm_post_audit = avc_audit_post_callback; + common_lsm_audit(a); } /** @@ -956,7 +841,7 @@ out: * another -errno upon other errors. */ int avc_has_perm(u32 ssid, u32 tsid, u16 tclass, - u32 requested, struct avc_audit_data *auditdata) + u32 requested, struct common_audit_data *auditdata) { struct av_decision avd; int rc; diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 5aa45b168122..254b7983657d 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -1478,14 +1478,14 @@ static int task_has_capability(struct task_struct *tsk, const struct cred *cred, int cap, int audit) { - struct avc_audit_data ad; + struct common_audit_data ad; struct av_decision avd; u16 sclass; u32 sid = cred_sid(cred); u32 av = CAP_TO_MASK(cap); int rc; - AVC_AUDIT_DATA_INIT(&ad, CAP); + COMMON_AUDIT_DATA_INIT(&ad, CAP); ad.tsk = tsk; ad.u.cap = cap; @@ -1524,10 +1524,10 @@ static int task_has_system(struct task_struct *tsk, static int inode_has_perm(const struct cred *cred, struct inode *inode, u32 perms, - struct avc_audit_data *adp) + struct common_audit_data *adp) { struct inode_security_struct *isec; - struct avc_audit_data ad; + struct common_audit_data ad; u32 sid; if (unlikely(IS_PRIVATE(inode))) @@ -1538,7 +1538,7 @@ static int inode_has_perm(const struct cred *cred, if (!adp) { adp = &ad; - AVC_AUDIT_DATA_INIT(&ad, FS); + COMMON_AUDIT_DATA_INIT(&ad, FS); ad.u.fs.inode = inode; } @@ -1554,9 +1554,9 @@ static inline int dentry_has_perm(const struct cred *cred, u32 av) { struct inode *inode = dentry->d_inode; - struct avc_audit_data ad; + struct common_audit_data ad; - AVC_AUDIT_DATA_INIT(&ad, FS); + COMMON_AUDIT_DATA_INIT(&ad, FS); ad.u.fs.path.mnt = mnt; ad.u.fs.path.dentry = dentry; return inode_has_perm(cred, inode, av, &ad); @@ -1576,11 +1576,11 @@ static int file_has_perm(const struct cred *cred, { struct file_security_struct *fsec = file->f_security; struct inode *inode = file->f_path.dentry->d_inode; - struct avc_audit_data ad; + struct common_audit_data ad; u32 sid = cred_sid(cred); int rc; - AVC_AUDIT_DATA_INIT(&ad, FS); + COMMON_AUDIT_DATA_INIT(&ad, FS); ad.u.fs.path = file->f_path; if (sid != fsec->sid) { @@ -1611,7 +1611,7 @@ static int may_create(struct inode *dir, struct inode_security_struct *dsec; struct superblock_security_struct *sbsec; u32 sid, newsid; - struct avc_audit_data ad; + struct common_audit_data ad; int rc; dsec = dir->i_security; @@ -1620,7 +1620,7 @@ static int may_create(struct inode *dir, sid = tsec->sid; newsid = tsec->create_sid; - AVC_AUDIT_DATA_INIT(&ad, FS); + COMMON_AUDIT_DATA_INIT(&ad, FS); ad.u.fs.path.dentry = dentry; rc = avc_has_perm(sid, dsec->sid, SECCLASS_DIR, @@ -1664,7 +1664,7 @@ static int may_link(struct inode *dir, { struct inode_security_struct *dsec, *isec; - struct avc_audit_data ad; + struct common_audit_data ad; u32 sid = current_sid(); u32 av; int rc; @@ -1672,7 +1672,7 @@ static int may_link(struct inode *dir, dsec = dir->i_security; isec = dentry->d_inode->i_security; - AVC_AUDIT_DATA_INIT(&ad, FS); + COMMON_AUDIT_DATA_INIT(&ad, FS); ad.u.fs.path.dentry = dentry; av = DIR__SEARCH; @@ -1707,7 +1707,7 @@ static inline int may_rename(struct inode *old_dir, struct dentry *new_dentry) { struct inode_security_struct *old_dsec, *new_dsec, *old_isec, *new_isec; - struct avc_audit_data ad; + struct common_audit_data ad; u32 sid = current_sid(); u32 av; int old_is_dir, new_is_dir; @@ -1718,7 +1718,7 @@ static inline int may_rename(struct inode *old_dir, old_is_dir = S_ISDIR(old_dentry->d_inode->i_mode); new_dsec = new_dir->i_security; - AVC_AUDIT_DATA_INIT(&ad, FS); + COMMON_AUDIT_DATA_INIT(&ad, FS); ad.u.fs.path.dentry = old_dentry; rc = avc_has_perm(sid, old_dsec->sid, SECCLASS_DIR, @@ -1760,7 +1760,7 @@ static inline int may_rename(struct inode *old_dir, static int superblock_has_perm(const struct cred *cred, struct super_block *sb, u32 perms, - struct avc_audit_data *ad) + struct common_audit_data *ad) { struct superblock_security_struct *sbsec; u32 sid = cred_sid(cred); @@ -2100,7 +2100,7 @@ static int selinux_bprm_set_creds(struct linux_binprm *bprm) const struct task_security_struct *old_tsec; struct task_security_struct *new_tsec; struct inode_security_struct *isec; - struct avc_audit_data ad; + struct common_audit_data ad; struct inode *inode = bprm->file->f_path.dentry->d_inode; int rc; @@ -2138,7 +2138,7 @@ static int selinux_bprm_set_creds(struct linux_binprm *bprm) return rc; } - AVC_AUDIT_DATA_INIT(&ad, FS); + COMMON_AUDIT_DATA_INIT(&ad, FS); ad.u.fs.path = bprm->file->f_path; if (bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID) @@ -2231,7 +2231,7 @@ extern struct dentry *selinux_null; static inline void flush_unauthorized_files(const struct cred *cred, struct files_struct *files) { - struct avc_audit_data ad; + struct common_audit_data ad; struct file *file, *devnull = NULL; struct tty_struct *tty; struct fdtable *fdt; @@ -2265,7 +2265,7 @@ static inline void flush_unauthorized_files(const struct cred *cred, /* Revalidate access to inherited open files. */ - AVC_AUDIT_DATA_INIT(&ad, FS); + COMMON_AUDIT_DATA_INIT(&ad, FS); spin_lock(&files->file_lock); for (;;) { @@ -2514,7 +2514,7 @@ out: static int selinux_sb_kern_mount(struct super_block *sb, int flags, void *data) { const struct cred *cred = current_cred(); - struct avc_audit_data ad; + struct common_audit_data ad; int rc; rc = superblock_doinit(sb, data); @@ -2525,7 +2525,7 @@ static int selinux_sb_kern_mount(struct super_block *sb, int flags, void *data) if (flags & MS_KERNMOUNT) return 0; - AVC_AUDIT_DATA_INIT(&ad, FS); + COMMON_AUDIT_DATA_INIT(&ad, FS); ad.u.fs.path.dentry = sb->s_root; return superblock_has_perm(cred, sb, FILESYSTEM__MOUNT, &ad); } @@ -2533,9 +2533,9 @@ static int selinux_sb_kern_mount(struct super_block *sb, int flags, void *data) static int selinux_sb_statfs(struct dentry *dentry) { const struct cred *cred = current_cred(); - struct avc_audit_data ad; + struct common_audit_data ad; - AVC_AUDIT_DATA_INIT(&ad, FS); + COMMON_AUDIT_DATA_INIT(&ad, FS); ad.u.fs.path.dentry = dentry->d_sb->s_root; return superblock_has_perm(cred, dentry->d_sb, FILESYSTEM__GETATTR, &ad); } @@ -2755,7 +2755,7 @@ static int selinux_inode_setxattr(struct dentry *dentry, const char *name, struct inode *inode = dentry->d_inode; struct inode_security_struct *isec = inode->i_security; struct superblock_security_struct *sbsec; - struct avc_audit_data ad; + struct common_audit_data ad; u32 newsid, sid = current_sid(); int rc = 0; @@ -2769,7 +2769,7 @@ static int selinux_inode_setxattr(struct dentry *dentry, const char *name, if (!is_owner_or_cap(inode)) return -EPERM; - AVC_AUDIT_DATA_INIT(&ad, FS); + COMMON_AUDIT_DATA_INIT(&ad, FS); ad.u.fs.path.dentry = dentry; rc = avc_has_perm(sid, isec->sid, isec->sclass, @@ -3418,7 +3418,7 @@ static void selinux_task_to_inode(struct task_struct *p, /* Returns error only if unable to parse addresses */ static int selinux_parse_skb_ipv4(struct sk_buff *skb, - struct avc_audit_data *ad, u8 *proto) + struct common_audit_data *ad, u8 *proto) { int offset, ihlen, ret = -EINVAL; struct iphdr _iph, *ih; @@ -3499,7 +3499,7 @@ out: /* Returns error only if unable to parse addresses */ static int selinux_parse_skb_ipv6(struct sk_buff *skb, - struct avc_audit_data *ad, u8 *proto) + struct common_audit_data *ad, u8 *proto) { u8 nexthdr; int ret = -EINVAL, offset; @@ -3570,7 +3570,7 @@ out: #endif /* IPV6 */ -static int selinux_parse_skb(struct sk_buff *skb, struct avc_audit_data *ad, +static int selinux_parse_skb(struct sk_buff *skb, struct common_audit_data *ad, char **_addrp, int src, u8 *proto) { char *addrp; @@ -3652,7 +3652,7 @@ static int socket_has_perm(struct task_struct *task, struct socket *sock, u32 perms) { struct inode_security_struct *isec; - struct avc_audit_data ad; + struct common_audit_data ad; u32 sid; int err = 0; @@ -3662,7 +3662,7 @@ static int socket_has_perm(struct task_struct *task, struct socket *sock, goto out; sid = task_sid(task); - AVC_AUDIT_DATA_INIT(&ad, NET); + COMMON_AUDIT_DATA_INIT(&ad, NET); ad.u.net.sk = sock->sk; err = avc_has_perm(sid, isec->sid, isec->sclass, perms, &ad); @@ -3749,7 +3749,7 @@ static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, in if (family == PF_INET || family == PF_INET6) { char *addrp; struct inode_security_struct *isec; - struct avc_audit_data ad; + struct common_audit_data ad; struct sockaddr_in *addr4 = NULL; struct sockaddr_in6 *addr6 = NULL; unsigned short snum; @@ -3778,7 +3778,7 @@ static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, in snum, &sid); if (err) goto out; - AVC_AUDIT_DATA_INIT(&ad, NET); + COMMON_AUDIT_DATA_INIT(&ad, NET); ad.u.net.sport = htons(snum); ad.u.net.family = family; err = avc_has_perm(isec->sid, sid, @@ -3811,7 +3811,7 @@ static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, in if (err) goto out; - AVC_AUDIT_DATA_INIT(&ad, NET); + COMMON_AUDIT_DATA_INIT(&ad, NET); ad.u.net.sport = htons(snum); ad.u.net.family = family; @@ -3845,7 +3845,7 @@ static int selinux_socket_connect(struct socket *sock, struct sockaddr *address, isec = SOCK_INODE(sock)->i_security; if (isec->sclass == SECCLASS_TCP_SOCKET || isec->sclass == SECCLASS_DCCP_SOCKET) { - struct avc_audit_data ad; + struct common_audit_data ad; struct sockaddr_in *addr4 = NULL; struct sockaddr_in6 *addr6 = NULL; unsigned short snum; @@ -3870,7 +3870,7 @@ static int selinux_socket_connect(struct socket *sock, struct sockaddr *address, perm = (isec->sclass == SECCLASS_TCP_SOCKET) ? TCP_SOCKET__NAME_CONNECT : DCCP_SOCKET__NAME_CONNECT; - AVC_AUDIT_DATA_INIT(&ad, NET); + COMMON_AUDIT_DATA_INIT(&ad, NET); ad.u.net.dport = htons(snum); ad.u.net.family = sk->sk_family; err = avc_has_perm(isec->sid, sid, isec->sclass, perm, &ad); @@ -3960,13 +3960,13 @@ static int selinux_socket_unix_stream_connect(struct socket *sock, struct sk_security_struct *ssec; struct inode_security_struct *isec; struct inode_security_struct *other_isec; - struct avc_audit_data ad; + struct common_audit_data ad; int err; isec = SOCK_INODE(sock)->i_security; other_isec = SOCK_INODE(other)->i_security; - AVC_AUDIT_DATA_INIT(&ad, NET); + COMMON_AUDIT_DATA_INIT(&ad, NET); ad.u.net.sk = other->sk; err = avc_has_perm(isec->sid, other_isec->sid, @@ -3992,13 +3992,13 @@ static int selinux_socket_unix_may_send(struct socket *sock, { struct inode_security_struct *isec; struct inode_security_struct *other_isec; - struct avc_audit_data ad; + struct common_audit_data ad; int err; isec = SOCK_INODE(sock)->i_security; other_isec = SOCK_INODE(other)->i_security; - AVC_AUDIT_DATA_INIT(&ad, NET); + COMMON_AUDIT_DATA_INIT(&ad, NET); ad.u.net.sk = other->sk; err = avc_has_perm(isec->sid, other_isec->sid, @@ -4011,7 +4011,7 @@ static int selinux_socket_unix_may_send(struct socket *sock, static int selinux_inet_sys_rcv_skb(int ifindex, char *addrp, u16 family, u32 peer_sid, - struct avc_audit_data *ad) + struct common_audit_data *ad) { int err; u32 if_sid; @@ -4039,10 +4039,10 @@ static int selinux_sock_rcv_skb_compat(struct sock *sk, struct sk_buff *skb, struct sk_security_struct *sksec = sk->sk_security; u32 peer_sid; u32 sk_sid = sksec->sid; - struct avc_audit_data ad; + struct common_audit_data ad; char *addrp; - AVC_AUDIT_DATA_INIT(&ad, NET); + COMMON_AUDIT_DATA_INIT(&ad, NET); ad.u.net.netif = skb->iif; ad.u.net.family = family; err = selinux_parse_skb(skb, &ad, &addrp, 1, NULL); @@ -4080,7 +4080,7 @@ static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) struct sk_security_struct *sksec = sk->sk_security; u16 family = sk->sk_family; u32 sk_sid = sksec->sid; - struct avc_audit_data ad; + struct common_audit_data ad; char *addrp; u8 secmark_active; u8 peerlbl_active; @@ -4104,7 +4104,7 @@ static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) if (!secmark_active && !peerlbl_active) return 0; - AVC_AUDIT_DATA_INIT(&ad, NET); + COMMON_AUDIT_DATA_INIT(&ad, NET); ad.u.net.netif = skb->iif; ad.u.net.family = family; err = selinux_parse_skb(skb, &ad, &addrp, 1, NULL); @@ -4362,7 +4362,7 @@ static unsigned int selinux_ip_forward(struct sk_buff *skb, int ifindex, int err; char *addrp; u32 peer_sid; - struct avc_audit_data ad; + struct common_audit_data ad; u8 secmark_active; u8 netlbl_active; u8 peerlbl_active; @@ -4379,7 +4379,7 @@ static unsigned int selinux_ip_forward(struct sk_buff *skb, int ifindex, if (selinux_skb_peerlbl_sid(skb, family, &peer_sid) != 0) return NF_DROP; - AVC_AUDIT_DATA_INIT(&ad, NET); + COMMON_AUDIT_DATA_INIT(&ad, NET); ad.u.net.netif = ifindex; ad.u.net.family = family; if (selinux_parse_skb(skb, &ad, &addrp, 1, NULL) != 0) @@ -4467,7 +4467,7 @@ static unsigned int selinux_ip_postroute_compat(struct sk_buff *skb, { struct sock *sk = skb->sk; struct sk_security_struct *sksec; - struct avc_audit_data ad; + struct common_audit_data ad; char *addrp; u8 proto; @@ -4475,7 +4475,7 @@ static unsigned int selinux_ip_postroute_compat(struct sk_buff *skb, return NF_ACCEPT; sksec = sk->sk_security; - AVC_AUDIT_DATA_INIT(&ad, NET); + COMMON_AUDIT_DATA_INIT(&ad, NET); ad.u.net.netif = ifindex; ad.u.net.family = family; if (selinux_parse_skb(skb, &ad, &addrp, 0, &proto)) @@ -4499,7 +4499,7 @@ static unsigned int selinux_ip_postroute(struct sk_buff *skb, int ifindex, u32 secmark_perm; u32 peer_sid; struct sock *sk; - struct avc_audit_data ad; + struct common_audit_data ad; char *addrp; u8 secmark_active; u8 peerlbl_active; @@ -4558,7 +4558,7 @@ static unsigned int selinux_ip_postroute(struct sk_buff *skb, int ifindex, secmark_perm = PACKET__SEND; } - AVC_AUDIT_DATA_INIT(&ad, NET); + COMMON_AUDIT_DATA_INIT(&ad, NET); ad.u.net.netif = ifindex; ad.u.net.family = family; if (selinux_parse_skb(skb, &ad, &addrp, 0, NULL)) @@ -4628,13 +4628,13 @@ static int selinux_netlink_send(struct sock *sk, struct sk_buff *skb) static int selinux_netlink_recv(struct sk_buff *skb, int capability) { int err; - struct avc_audit_data ad; + struct common_audit_data ad; err = cap_netlink_recv(skb, capability); if (err) return err; - AVC_AUDIT_DATA_INIT(&ad, CAP); + COMMON_AUDIT_DATA_INIT(&ad, CAP); ad.u.cap = capability; return avc_has_perm(NETLINK_CB(skb).sid, NETLINK_CB(skb).sid, @@ -4693,12 +4693,12 @@ static int ipc_has_perm(struct kern_ipc_perm *ipc_perms, u32 perms) { struct ipc_security_struct *isec; - struct avc_audit_data ad; + struct common_audit_data ad; u32 sid = current_sid(); isec = ipc_perms->security; - AVC_AUDIT_DATA_INIT(&ad, IPC); + COMMON_AUDIT_DATA_INIT(&ad, IPC); ad.u.ipc_id = ipc_perms->key; return avc_has_perm(sid, isec->sid, isec->sclass, perms, &ad); @@ -4718,7 +4718,7 @@ static void selinux_msg_msg_free_security(struct msg_msg *msg) static int selinux_msg_queue_alloc_security(struct msg_queue *msq) { struct ipc_security_struct *isec; - struct avc_audit_data ad; + struct common_audit_data ad; u32 sid = current_sid(); int rc; @@ -4728,7 +4728,7 @@ static int selinux_msg_queue_alloc_security(struct msg_queue *msq) isec = msq->q_perm.security; - AVC_AUDIT_DATA_INIT(&ad, IPC); + COMMON_AUDIT_DATA_INIT(&ad, IPC); ad.u.ipc_id = msq->q_perm.key; rc = avc_has_perm(sid, isec->sid, SECCLASS_MSGQ, @@ -4748,12 +4748,12 @@ static void selinux_msg_queue_free_security(struct msg_queue *msq) static int selinux_msg_queue_associate(struct msg_queue *msq, int msqflg) { struct ipc_security_struct *isec; - struct avc_audit_data ad; + struct common_audit_data ad; u32 sid = current_sid(); isec = msq->q_perm.security; - AVC_AUDIT_DATA_INIT(&ad, IPC); + COMMON_AUDIT_DATA_INIT(&ad, IPC); ad.u.ipc_id = msq->q_perm.key; return avc_has_perm(sid, isec->sid, SECCLASS_MSGQ, @@ -4792,7 +4792,7 @@ static int selinux_msg_queue_msgsnd(struct msg_queue *msq, struct msg_msg *msg, { struct ipc_security_struct *isec; struct msg_security_struct *msec; - struct avc_audit_data ad; + struct common_audit_data ad; u32 sid = current_sid(); int rc; @@ -4813,7 +4813,7 @@ static int selinux_msg_queue_msgsnd(struct msg_queue *msq, struct msg_msg *msg, return rc; } - AVC_AUDIT_DATA_INIT(&ad, IPC); + COMMON_AUDIT_DATA_INIT(&ad, IPC); ad.u.ipc_id = msq->q_perm.key; /* Can this process write to the queue? */ @@ -4837,14 +4837,14 @@ static int selinux_msg_queue_msgrcv(struct msg_queue *msq, struct msg_msg *msg, { struct ipc_security_struct *isec; struct msg_security_struct *msec; - struct avc_audit_data ad; + struct common_audit_data ad; u32 sid = task_sid(target); int rc; isec = msq->q_perm.security; msec = msg->security; - AVC_AUDIT_DATA_INIT(&ad, IPC); + COMMON_AUDIT_DATA_INIT(&ad, IPC); ad.u.ipc_id = msq->q_perm.key; rc = avc_has_perm(sid, isec->sid, @@ -4859,7 +4859,7 @@ static int selinux_msg_queue_msgrcv(struct msg_queue *msq, struct msg_msg *msg, static int selinux_shm_alloc_security(struct shmid_kernel *shp) { struct ipc_security_struct *isec; - struct avc_audit_data ad; + struct common_audit_data ad; u32 sid = current_sid(); int rc; @@ -4869,7 +4869,7 @@ static int selinux_shm_alloc_security(struct shmid_kernel *shp) isec = shp->shm_perm.security; - AVC_AUDIT_DATA_INIT(&ad, IPC); + COMMON_AUDIT_DATA_INIT(&ad, IPC); ad.u.ipc_id = shp->shm_perm.key; rc = avc_has_perm(sid, isec->sid, SECCLASS_SHM, @@ -4889,12 +4889,12 @@ static void selinux_shm_free_security(struct shmid_kernel *shp) static int selinux_shm_associate(struct shmid_kernel *shp, int shmflg) { struct ipc_security_struct *isec; - struct avc_audit_data ad; + struct common_audit_data ad; u32 sid = current_sid(); isec = shp->shm_perm.security; - AVC_AUDIT_DATA_INIT(&ad, IPC); + COMMON_AUDIT_DATA_INIT(&ad, IPC); ad.u.ipc_id = shp->shm_perm.key; return avc_has_perm(sid, isec->sid, SECCLASS_SHM, @@ -4951,7 +4951,7 @@ static int selinux_shm_shmat(struct shmid_kernel *shp, static int selinux_sem_alloc_security(struct sem_array *sma) { struct ipc_security_struct *isec; - struct avc_audit_data ad; + struct common_audit_data ad; u32 sid = current_sid(); int rc; @@ -4961,7 +4961,7 @@ static int selinux_sem_alloc_security(struct sem_array *sma) isec = sma->sem_perm.security; - AVC_AUDIT_DATA_INIT(&ad, IPC); + COMMON_AUDIT_DATA_INIT(&ad, IPC); ad.u.ipc_id = sma->sem_perm.key; rc = avc_has_perm(sid, isec->sid, SECCLASS_SEM, @@ -4981,12 +4981,12 @@ static void selinux_sem_free_security(struct sem_array *sma) static int selinux_sem_associate(struct sem_array *sma, int semflg) { struct ipc_security_struct *isec; - struct avc_audit_data ad; + struct common_audit_data ad; u32 sid = current_sid(); isec = sma->sem_perm.security; - AVC_AUDIT_DATA_INIT(&ad, IPC); + COMMON_AUDIT_DATA_INIT(&ad, IPC); ad.u.ipc_id = sma->sem_perm.key; return avc_has_perm(sid, isec->sid, SECCLASS_SEM, diff --git a/security/selinux/include/avc.h b/security/selinux/include/avc.h index ae4c3a0e2c1a..e94e82f73818 100644 --- a/security/selinux/include/avc.h +++ b/security/selinux/include/avc.h @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -36,48 +37,6 @@ struct inode; struct sock; struct sk_buff; -/* Auxiliary data to use in generating the audit record. */ -struct avc_audit_data { - char type; -#define AVC_AUDIT_DATA_FS 1 -#define AVC_AUDIT_DATA_NET 2 -#define AVC_AUDIT_DATA_CAP 3 -#define AVC_AUDIT_DATA_IPC 4 - struct task_struct *tsk; - union { - struct { - struct path path; - struct inode *inode; - } fs; - struct { - int netif; - struct sock *sk; - u16 family; - __be16 dport; - __be16 sport; - union { - struct { - __be32 daddr; - __be32 saddr; - } v4; - struct { - struct in6_addr daddr; - struct in6_addr saddr; - } v6; - } fam; - } net; - int cap; - int ipc_id; - } u; -}; - -#define v4info fam.v4 -#define v6info fam.v6 - -/* Initialize an AVC audit data structure. */ -#define AVC_AUDIT_DATA_INIT(_d,_t) \ - { memset((_d), 0, sizeof(struct avc_audit_data)); (_d)->type = AVC_AUDIT_DATA_##_t; } - /* * AVC statistics */ @@ -98,7 +57,9 @@ void __init avc_init(void); void avc_audit(u32 ssid, u32 tsid, u16 tclass, u32 requested, - struct av_decision *avd, int result, struct avc_audit_data *auditdata); + struct av_decision *avd, + int result, + struct common_audit_data *a); #define AVC_STRICT 1 /* Ignore permissive mode. */ int avc_has_perm_noaudit(u32 ssid, u32 tsid, @@ -108,7 +69,7 @@ int avc_has_perm_noaudit(u32 ssid, u32 tsid, int avc_has_perm(u32 ssid, u32 tsid, u16 tclass, u32 requested, - struct avc_audit_data *auditdata); + struct common_audit_data *auditdata); u32 avc_policy_seqno(void); diff --git a/security/selinux/include/netlabel.h b/security/selinux/include/netlabel.h index b4b5b9b2f0be..8d7384280a7a 100644 --- a/security/selinux/include/netlabel.h +++ b/security/selinux/include/netlabel.h @@ -59,7 +59,7 @@ int selinux_netlbl_socket_post_create(struct sock *sk, u16 family); int selinux_netlbl_sock_rcv_skb(struct sk_security_struct *sksec, struct sk_buff *skb, u16 family, - struct avc_audit_data *ad); + struct common_audit_data *ad); int selinux_netlbl_socket_setsockopt(struct socket *sock, int level, int optname); @@ -129,7 +129,7 @@ static inline int selinux_netlbl_socket_post_create(struct sock *sk, static inline int selinux_netlbl_sock_rcv_skb(struct sk_security_struct *sksec, struct sk_buff *skb, u16 family, - struct avc_audit_data *ad) + struct common_audit_data *ad) { return 0; } diff --git a/security/selinux/include/xfrm.h b/security/selinux/include/xfrm.h index 289e24b39e3e..13128f9a3e5a 100644 --- a/security/selinux/include/xfrm.h +++ b/security/selinux/include/xfrm.h @@ -41,9 +41,9 @@ static inline int selinux_xfrm_enabled(void) } int selinux_xfrm_sock_rcv_skb(u32 sid, struct sk_buff *skb, - struct avc_audit_data *ad); + struct common_audit_data *ad); int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb, - struct avc_audit_data *ad, u8 proto); + struct common_audit_data *ad, u8 proto); int selinux_xfrm_decode_session(struct sk_buff *skb, u32 *sid, int ckall); static inline void selinux_xfrm_notify_policyload(void) @@ -57,13 +57,13 @@ static inline int selinux_xfrm_enabled(void) } static inline int selinux_xfrm_sock_rcv_skb(u32 isec_sid, struct sk_buff *skb, - struct avc_audit_data *ad) + struct common_audit_data *ad) { return 0; } static inline int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb, - struct avc_audit_data *ad, u8 proto) + struct common_audit_data *ad, u8 proto) { return 0; } diff --git a/security/selinux/netlabel.c b/security/selinux/netlabel.c index 2e984413c7b2..e68823741ad5 100644 --- a/security/selinux/netlabel.c +++ b/security/selinux/netlabel.c @@ -342,7 +342,7 @@ int selinux_netlbl_socket_post_create(struct sock *sk, u16 family) int selinux_netlbl_sock_rcv_skb(struct sk_security_struct *sksec, struct sk_buff *skb, u16 family, - struct avc_audit_data *ad) + struct common_audit_data *ad) { int rc; u32 nlbl_sid; diff --git a/security/selinux/xfrm.c b/security/selinux/xfrm.c index 72b18452e1a1..f3cb9ed731a9 100644 --- a/security/selinux/xfrm.c +++ b/security/selinux/xfrm.c @@ -401,7 +401,7 @@ int selinux_xfrm_state_delete(struct xfrm_state *x) * gone thru the IPSec process. */ int selinux_xfrm_sock_rcv_skb(u32 isec_sid, struct sk_buff *skb, - struct avc_audit_data *ad) + struct common_audit_data *ad) { int i, rc = 0; struct sec_path *sp; @@ -442,7 +442,7 @@ int selinux_xfrm_sock_rcv_skb(u32 isec_sid, struct sk_buff *skb, * checked in the selinux_xfrm_state_pol_flow_match hook above. */ int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb, - struct avc_audit_data *ad, u8 proto) + struct common_audit_data *ad, u8 proto) { struct dst_entry *dst; int rc = 0; -- cgit v1.2.3 From b25c340c195447afb1860da580fe2a85a6b652c5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 13 Aug 2009 12:17:22 +0200 Subject: genirq: Add oneshot support For threaded interrupt handlers we expect the hard interrupt handler part to mask the interrupt on the originating device. The interrupt line itself is reenabled after the hard interrupt handler has executed. This requires access to the originating device from hard interrupt context which is not always possible. There are devices which can only be accessed via a bus (i2c, spi, ...). The bus access requires thread context. For such devices we need to keep the interrupt line masked until the threaded handler has executed. Add a new flag IRQF_ONESHOT which allows drivers to request that the interrupt is not unmasked after the hard interrupt context handler has been executed and the thread has been woken. The interrupt line is unmasked after the thread handler function has been executed. Note that for now IRQF_ONESHOT cannot be used with IRQF_SHARED to avoid complex accounting mechanisms. For oneshot interrupts the primary handler simply returns IRQ_WAKE_THREAD and does nothing else. A generic implementation irq_default_primary_handler() is provided to avoid useless copies all over the place. It is automatically installed when request_threaded_irq() is called with handler=NULL and thread_fn!=NULL. Signed-off-by: Thomas Gleixner Cc: Mark Brown Cc: Dmitry Torokhov Cc: Trilok Soni Cc: Pavel Machek Cc: Brian Swetland Cc: Joonyoung Shim Cc: m.szyprowski@samsung.com Cc: t.fujak@samsung.com Cc: kyungmin.park@samsung.com, Cc: David Brownell Cc: Daniel Ribeiro Cc: arve@android.com Cc: Barry Song <21cnbao@gmail.com> --- include/linux/interrupt.h | 4 ++++ include/linux/irq.h | 1 + kernel/irq/chip.c | 14 +++++++++++--- kernel/irq/manage.c | 49 +++++++++++++++++++++++++++++++++++++++++++---- 4 files changed, 61 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 35e7df1e9f30..1ac57e522a1f 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -50,6 +50,9 @@ * IRQF_IRQPOLL - Interrupt is used for polling (only the interrupt that is * registered first in an shared interrupt is considered for * performance reasons) + * IRQF_ONESHOT - Interrupt is not reenabled after the hardirq handler finished. + * Used by threaded interrupts which need to keep the + * irq line disabled until the threaded handler has been run. */ #define IRQF_DISABLED 0x00000020 #define IRQF_SAMPLE_RANDOM 0x00000040 @@ -59,6 +62,7 @@ #define IRQF_PERCPU 0x00000400 #define IRQF_NOBALANCING 0x00000800 #define IRQF_IRQPOLL 0x00001000 +#define IRQF_ONESHOT 0x00002000 /* * Bits used by threaded handlers: diff --git a/include/linux/irq.h b/include/linux/irq.h index cb2e77a3f7f7..5e7c6ee8c35c 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -69,6 +69,7 @@ typedef void (*irq_flow_handler_t)(unsigned int irq, #define IRQ_MOVE_PCNTXT 0x01000000 /* IRQ migration from process context */ #define IRQ_AFFINITY_SET 0x02000000 /* IRQ affinity was set from userspace*/ #define IRQ_SUSPENDED 0x04000000 /* IRQ has gone through suspend sequence */ +#define IRQ_ONESHOT 0x08000000 /* IRQ is not unmasked after hardirq */ #ifdef CONFIG_IRQ_PER_CPU # define CHECK_IRQ_PER_CPU(var) ((var) & IRQ_PER_CPU) diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 13c68e71b726..b08c0d24f202 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -382,7 +382,10 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc) spin_lock(&desc->lock); desc->status &= ~IRQ_INPROGRESS; - if (!(desc->status & IRQ_DISABLED) && desc->chip->unmask) + + if (unlikely(desc->status & IRQ_ONESHOT)) + desc->status |= IRQ_MASKED; + else if (!(desc->status & IRQ_DISABLED) && desc->chip->unmask) desc->chip->unmask(irq); out_unlock: spin_unlock(&desc->lock); @@ -478,8 +481,13 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc) kstat_incr_irqs_this_cpu(irq, desc); /* Start handling the irq */ - if (desc->chip->ack) - desc->chip->ack(irq); + if (unlikely(desc->status & IRQ_ONESHOT)) { + desc->status |= IRQ_MASKED; + mask_ack_irq(desc, irq); + } else { + if (desc->chip->ack) + desc->chip->ack(irq); + } /* Mark the IRQ currently in progress.*/ desc->status |= IRQ_INPROGRESS; diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index d222515a5a06..d7f7b5fd2476 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -436,6 +436,16 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned int irq, return ret; } +/* + * Default primary interrupt handler for threaded interrupts. Is + * assigned as primary handler when request_threaded_irq is called + * with handler == NULL. Useful for oneshot interrupts. + */ +static irqreturn_t irq_default_primary_handler(int irq, void *dev_id) +{ + return IRQ_WAKE_THREAD; +} + static int irq_wait_for_interrupt(struct irqaction *action) { while (!kthread_should_stop()) { @@ -451,6 +461,21 @@ static int irq_wait_for_interrupt(struct irqaction *action) return -1; } +/* + * Oneshot interrupts keep the irq line masked until the threaded + * handler finished. unmask if the interrupt has not been disabled and + * is marked MASKED. + */ +static void irq_finalize_oneshot(unsigned int irq, struct irq_desc *desc) +{ + spin_lock_irq(&desc->lock); + if (!(desc->status & IRQ_DISABLED) && (desc->status & IRQ_MASKED)) { + desc->status &= ~IRQ_MASKED; + desc->chip->unmask(irq); + } + spin_unlock_irq(&desc->lock); +} + #ifdef CONFIG_SMP /* * Check whether we need to change the affinity of the interrupt thread. @@ -492,7 +517,7 @@ static int irq_thread(void *data) struct sched_param param = { .sched_priority = MAX_USER_RT_PRIO/2, }; struct irqaction *action = data; struct irq_desc *desc = irq_to_desc(action->irq); - int wake; + int wake, oneshot = desc->status & IRQ_ONESHOT; sched_setscheduler(current, SCHED_FIFO, ¶m); current->irqaction = action; @@ -518,6 +543,9 @@ static int irq_thread(void *data) spin_unlock_irq(&desc->lock); action->thread_fn(action->irq, action->dev_id); + + if (oneshot) + irq_finalize_oneshot(action->irq, desc); } wake = atomic_dec_and_test(&desc->threads_active); @@ -590,6 +618,10 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) rand_initialize_irq(irq); } + /* Oneshot interrupts are not allowed with shared */ + if ((new->flags & IRQF_ONESHOT) && (new->flags & IRQF_SHARED)) + return -EINVAL; + /* * Threaded handler ? */ @@ -663,9 +695,12 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) desc->status |= IRQ_PER_CPU; #endif - desc->status &= ~(IRQ_AUTODETECT | IRQ_WAITING | + desc->status &= ~(IRQ_AUTODETECT | IRQ_WAITING | IRQ_ONESHOT | IRQ_INPROGRESS | IRQ_SPURIOUS_DISABLED); + if (new->flags & IRQF_ONESHOT) + desc->status |= IRQ_ONESHOT; + if (!(desc->status & IRQ_NOAUTOEN)) { desc->depth = 0; desc->status &= ~IRQ_DISABLED; @@ -878,6 +913,8 @@ EXPORT_SYMBOL(free_irq); * @irq: Interrupt line to allocate * @handler: Function to be called when the IRQ occurs. * Primary handler for threaded interrupts + * If NULL and thread_fn != NULL the default + * primary handler is installed * @thread_fn: Function called from the irq handler thread * If NULL, no irq thread is created * @irqflags: Interrupt type flags @@ -957,8 +994,12 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler, if (desc->status & IRQ_NOREQUEST) return -EINVAL; - if (!handler) - return -EINVAL; + + if (!handler) { + if (!thread_fn) + return -EINVAL; + handler = irq_default_primary_handler; + } action = kzalloc(sizeof(struct irqaction), GFP_KERNEL); if (!action) -- cgit v1.2.3 From 70aedd24d20e75198f5a0b11750faabbb56924e2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 13 Aug 2009 12:17:48 +0200 Subject: genirq: Add buslock support Some interrupt chips are connected to a "slow" bus (i2c, spi ...). The bus access needs to sleep and therefor cannot be called in atomic contexts. Some of the generic interrupt management functions like disable_irq(), enable_irq() ... call interrupt chip functions with the irq_desc->lock held and interrupts disabled. This does not work for such devices. Provide a separate synchronization mechanism for such interrupt chips. The irq_chip structure is extended by two optional functions (bus_lock and bus_sync_and_unlock). The idea is to serialize the bus access for those operations in the core code so that drivers which are behind that bus operated interrupt controller do not have to worry about it and just can use the normal interfaces. To achieve this we add two function pointers to the irq_chip: bus_lock and bus_sync_unlock. bus_lock() is called to serialize access to the interrupt controller bus. Now the core code can issue chip->mask/unmask ... commands without changing the fast path code at all. The chip implementation merily stores that information in a chip private data structure and returns. No bus interaction as these functions are called from atomic context. After that bus_sync_unlock() is called outside the atomic context. Now the chip implementation issues the bus commands, waits for completion and unlocks the interrupt controller bus. The irq_chip implementation as pseudo code: struct irq_chip_data { struct mutex mutex; unsigned int irq_offset; unsigned long mask; unsigned long mask_status; } static void bus_lock(unsigned int irq) { struct irq_chip_data *data = get_irq_desc_chip_data(irq); mutex_lock(&data->mutex); } static void mask(unsigned int irq) { struct irq_chip_data *data = get_irq_desc_chip_data(irq); irq -= data->irq_offset; data->mask |= (1 << irq); } static void unmask(unsigned int irq) { struct irq_chip_data *data = get_irq_desc_chip_data(irq); irq -= data->irq_offset; data->mask &= ~(1 << irq); } static void bus_sync_unlock(unsigned int irq) { struct irq_chip_data *data = get_irq_desc_chip_data(irq); if (data->mask != data->mask_status) { do_bus_magic_to_set_mask(data->mask); data->mask_status = data->mask; } mutex_unlock(&data->mutex); } The device drivers can use request_threaded_irq, free_irq, disable_irq and enable_irq as usual with the only restriction that the calls need to come from non atomic context. Signed-off-by: Thomas Gleixner Cc: Mark Brown Cc: Dmitry Torokhov Cc: Trilok Soni Cc: Pavel Machek Cc: Brian Swetland Cc: Joonyoung Shim Cc: m.szyprowski@samsung.com Cc: t.fujak@samsung.com Cc: kyungmin.park@samsung.com, Cc: David Brownell Cc: Daniel Ribeiro Cc: arve@android.com Cc: Barry Song <21cnbao@gmail.com> --- include/linux/irq.h | 6 ++++++ kernel/irq/chip.c | 2 ++ kernel/irq/internals.h | 13 +++++++++++++ kernel/irq/manage.c | 19 ++++++++++++++++++- 4 files changed, 39 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 5e7c6ee8c35c..ce8171bc6fac 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -101,6 +101,9 @@ struct msi_desc; * @set_type: set the flow type (IRQ_TYPE_LEVEL/etc.) of an IRQ * @set_wake: enable/disable power-management wake-on of an IRQ * + * @bus_lock: function to lock access to slow bus (i2c) chips + * @bus_sync_unlock: function to sync and unlock slow bus (i2c) chips + * * @release: release function solely used by UML * @typename: obsoleted by name, kept as migration helper */ @@ -124,6 +127,9 @@ struct irq_chip { int (*set_type)(unsigned int irq, unsigned int flow_type); int (*set_wake)(unsigned int irq, unsigned int on); + void (*bus_lock)(unsigned int irq); + void (*bus_sync_unlock)(unsigned int irq); + /* Currently used only by UML, might disappear one day.*/ #ifdef CONFIG_IRQ_RELEASE_METHOD void (*release)(unsigned int irq, void *dev_id); diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index b08c0d24f202..f856330e684a 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -580,6 +580,7 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained, desc->chip = &dummy_irq_chip; } + chip_bus_lock(irq, desc); spin_lock_irqsave(&desc->lock, flags); /* Uninstall? */ @@ -599,6 +600,7 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained, desc->chip->startup(irq); } spin_unlock_irqrestore(&desc->lock, flags); + chip_bus_sync_unlock(irq, desc); } EXPORT_SYMBOL_GPL(__set_irq_handler); diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index e70ed5592eb9..1b5d742c6a77 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -44,6 +44,19 @@ extern int irq_select_affinity_usr(unsigned int irq); extern void irq_set_thread_affinity(struct irq_desc *desc); +/* Inline functions for support of irq chips on slow busses */ +static inline void chip_bus_lock(unsigned int irq, struct irq_desc *desc) +{ + if (unlikely(desc->chip->bus_lock)) + desc->chip->bus_lock(irq); +} + +static inline void chip_bus_sync_unlock(unsigned int irq, struct irq_desc *desc) +{ + if (unlikely(desc->chip->bus_sync_unlock)) + desc->chip->bus_sync_unlock(irq); +} + /* * Debugging printout: */ diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index d7f7b5fd2476..0a3fd5b524c9 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -230,9 +230,11 @@ void disable_irq_nosync(unsigned int irq) if (!desc) return; + chip_bus_lock(irq, desc); spin_lock_irqsave(&desc->lock, flags); __disable_irq(desc, irq, false); spin_unlock_irqrestore(&desc->lock, flags); + chip_bus_sync_unlock(irq, desc); } EXPORT_SYMBOL(disable_irq_nosync); @@ -294,7 +296,8 @@ void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume) * matches the last disable, processing of interrupts on this * IRQ line is re-enabled. * - * This function may be called from IRQ context. + * This function may be called from IRQ context only when + * desc->chip->bus_lock and desc->chip->bus_sync_unlock are NULL ! */ void enable_irq(unsigned int irq) { @@ -304,9 +307,11 @@ void enable_irq(unsigned int irq) if (!desc) return; + chip_bus_lock(irq, desc); spin_lock_irqsave(&desc->lock, flags); __enable_irq(desc, irq, false); spin_unlock_irqrestore(&desc->lock, flags); + chip_bus_sync_unlock(irq, desc); } EXPORT_SYMBOL(enable_irq); @@ -468,12 +473,14 @@ static int irq_wait_for_interrupt(struct irqaction *action) */ static void irq_finalize_oneshot(unsigned int irq, struct irq_desc *desc) { + chip_bus_lock(irq, desc); spin_lock_irq(&desc->lock); if (!(desc->status & IRQ_DISABLED) && (desc->status & IRQ_MASKED)) { desc->status &= ~IRQ_MASKED; desc->chip->unmask(irq); } spin_unlock_irq(&desc->lock); + chip_bus_sync_unlock(irq, desc); } #ifdef CONFIG_SMP @@ -904,7 +911,14 @@ EXPORT_SYMBOL_GPL(remove_irq); */ void free_irq(unsigned int irq, void *dev_id) { + struct irq_desc *desc = irq_to_desc(irq); + + if (!desc) + return; + + chip_bus_lock(irq, desc); kfree(__free_irq(irq, dev_id)); + chip_bus_sync_unlock(irq, desc); } EXPORT_SYMBOL(free_irq); @@ -1011,7 +1025,10 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler, action->name = devname; action->dev_id = dev_id; + chip_bus_lock(irq, desc); retval = __setup_irq(irq, desc, action); + chip_bus_sync_unlock(irq, desc); + if (retval) kfree(action); -- cgit v1.2.3 From 399b5da29b9f851eb7b96e2882097127f003e87c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 13 Aug 2009 13:21:38 +0200 Subject: genirq: Support nested threaded irq handling Interrupt chips which are behind a slow bus (i2c, spi ...) and demultiplex other interrupt sources need to run their interrupt handler in a thread. The demultiplexed interrupt handlers need to run in thread context as well and need to finish before the demux handler thread can reenable the interrupt line. So the easiest way is to run the sub device handlers in the context of the demultiplexing handler thread. To avoid that a separate thread is created for the subdevices the function set_nested_irq_thread() is provided which sets the IRQ_NESTED_THREAD flag in the interrupt descriptor. A driver which calls request_threaded_irq() must not be aware of the fact that the threaded handler is called in the context of the demultiplexing handler thread. The setup code checks the IRQ_NESTED_THREAD flag which was set from the irq chip setup code and does not setup a separate thread for the interrupt. The primary function which is provided by the device driver is replaced by an internal dummy function which warns when it is called. For the demultiplexing handler a helper function handle_nested_irq() is provided which calls the demux interrupt thread function in the context of the caller and does the proper interrupt accounting and takes the interrupt disabled status of the demultiplexed subdevice into account. Signed-off-by: Thomas Gleixner Cc: Mark Brown Cc: Dmitry Torokhov Cc: Trilok Soni Cc: Pavel Machek Cc: Brian Swetland Cc: Joonyoung Shim Cc: m.szyprowski@samsung.com Cc: t.fujak@samsung.com Cc: kyungmin.park@samsung.com, Cc: David Brownell Cc: Daniel Ribeiro Cc: arve@android.com Cc: Barry Song <21cnbao@gmail.com> --- include/linux/irq.h | 3 +++ kernel/irq/chip.c | 67 +++++++++++++++++++++++++++++++++++++++++++++++++++++ kernel/irq/manage.c | 34 ++++++++++++++++++++++++--- 3 files changed, 101 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index ce8171bc6fac..8778ee993937 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -70,6 +70,7 @@ typedef void (*irq_flow_handler_t)(unsigned int irq, #define IRQ_AFFINITY_SET 0x02000000 /* IRQ affinity was set from userspace*/ #define IRQ_SUSPENDED 0x04000000 /* IRQ has gone through suspend sequence */ #define IRQ_ONESHOT 0x08000000 /* IRQ is not unmasked after hardirq */ +#define IRQ_NESTED_THREAD 0x10000000 /* IRQ is nested into another, no own handler thread */ #ifdef CONFIG_IRQ_PER_CPU # define CHECK_IRQ_PER_CPU(var) ((var) & IRQ_PER_CPU) @@ -386,6 +387,8 @@ set_irq_chained_handler(unsigned int irq, __set_irq_handler(irq, handle, 1, NULL); } +extern void set_irq_nested_thread(unsigned int irq, int nest); + extern void set_irq_noprobe(unsigned int irq); extern void set_irq_probe(unsigned int irq); diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index f856330e684a..5765aad94998 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -222,6 +222,34 @@ int set_irq_chip_data(unsigned int irq, void *data) } EXPORT_SYMBOL(set_irq_chip_data); +/** + * set_irq_nested_thread - Set/Reset the IRQ_NESTED_THREAD flag of an irq + * + * @irq: Interrupt number + * @nest: 0 to clear / 1 to set the IRQ_NESTED_THREAD flag + * + * The IRQ_NESTED_THREAD flag indicates that on + * request_threaded_irq() no separate interrupt thread should be + * created for the irq as the handler are called nested in the + * context of a demultiplexing interrupt handler thread. + */ +void set_irq_nested_thread(unsigned int irq, int nest) +{ + struct irq_desc *desc = irq_to_desc(irq); + unsigned long flags; + + if (!desc) + return; + + spin_lock_irqsave(&desc->lock, flags); + if (nest) + desc->status |= IRQ_NESTED_THREAD; + else + desc->status &= ~IRQ_NESTED_THREAD; + spin_unlock_irqrestore(&desc->lock, flags); +} +EXPORT_SYMBOL_GPL(set_irq_nested_thread); + /* * default enable function */ @@ -299,6 +327,45 @@ static inline void mask_ack_irq(struct irq_desc *desc, int irq) } } +/* + * handle_nested_irq - Handle a nested irq from a irq thread + * @irq: the interrupt number + * + * Handle interrupts which are nested into a threaded interrupt + * handler. The handler function is called inside the calling + * threads context. + */ +void handle_nested_irq(unsigned int irq) +{ + struct irq_desc *desc = irq_to_desc(irq); + struct irqaction *action; + irqreturn_t action_ret; + + might_sleep(); + + spin_lock_irq(&desc->lock); + + kstat_incr_irqs_this_cpu(irq, desc); + + action = desc->action; + if (unlikely(!action || (desc->status & IRQ_DISABLED))) + goto out_unlock; + + desc->status |= IRQ_INPROGRESS; + spin_unlock_irq(&desc->lock); + + action_ret = action->thread_fn(action->irq, action->dev_id); + if (!noirqdebug) + note_interrupt(irq, desc, action_ret); + + spin_lock_irq(&desc->lock); + desc->status &= ~IRQ_INPROGRESS; + +out_unlock: + spin_unlock_irq(&desc->lock); +} +EXPORT_SYMBOL_GPL(handle_nested_irq); + /** * handle_simple_irq - Simple and software-decoded IRQs. * @irq: the interrupt number diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 0a3fd5b524c9..e485cea04bf1 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -451,6 +451,16 @@ static irqreturn_t irq_default_primary_handler(int irq, void *dev_id) return IRQ_WAKE_THREAD; } +/* + * Primary handler for nested threaded interrupts. Should never be + * called. + */ +static irqreturn_t irq_nested_primary_handler(int irq, void *dev_id) +{ + WARN(1, "Primary handler called for nested irq %d\n", irq); + return IRQ_NONE; +} + static int irq_wait_for_interrupt(struct irqaction *action) { while (!kthread_should_stop()) { @@ -600,7 +610,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) struct irqaction *old, **old_ptr; const char *old_name = NULL; unsigned long flags; - int shared = 0; + int nested, shared = 0; int ret; if (!desc) @@ -630,9 +640,27 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) return -EINVAL; /* - * Threaded handler ? + * Check whether the interrupt nests into another interrupt + * thread. + */ + nested = desc->status & IRQ_NESTED_THREAD; + if (nested) { + if (!new->thread_fn) + return -EINVAL; + /* + * Replace the primary handler which was provided from + * the driver for non nested interrupt handling by the + * dummy function which warns when called. + */ + new->handler = irq_nested_primary_handler; + } + + /* + * Create a handler thread when a thread function is supplied + * and the interrupt does not nest into another interrupt + * thread. */ - if (new->thread_fn) { + if (new->thread_fn && !nested) { struct task_struct *t; t = kthread_create(irq_thread, new, "irq/%d-%s", irq, -- cgit v1.2.3 From 0ccff1a49def92d6b838a6da166c89004b3a4d0c Mon Sep 17 00:00:00 2001 From: H Hartley Sweeten Date: Mon, 17 Aug 2009 22:38:04 -0400 Subject: jbd2: bitfields should be unsigned This fixes sparse noise: error: dubious one-bit signed bitfield Signed-off-by: H Hartley Sweeten Signed-off-by: Andrew Morton Signed-off-by: "Theodore Ts'o" Cc: Jan Kara --- include/linux/jbd2.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index d97eb652d6ca..52695d3dfd0b 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -652,7 +652,7 @@ struct transaction_s * This transaction is being forced and some process is * waiting for it to finish. */ - int t_synchronous_commit:1; + unsigned int t_synchronous_commit:1; /* * For use by the filesystem to store fs-specific data -- cgit v1.2.3 From 1314562a9ae5f39f6f595656023c1baf970831ef Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Tue, 18 Aug 2009 15:06:02 +0900 Subject: sched, task_struct: stack_canary is not needed without CC_STACKPROTECTOR The field stack_canary is only used with CC_STACKPROTECTOR. This patch reduces task_struct size without CC_STACKPROTECTOR. Signed-off-by: Hiroshi Shimamoto LKML-Reference: <4A8A44CA.2020701@ct.jp.nec.com> Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 195d72d5c102..7bc2d9290837 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1237,8 +1237,10 @@ struct task_struct { pid_t pid; pid_t tgid; +#ifdef CONFIG_CC_STACKPROTECTOR /* Canary value for the -fstack-protector gcc feature */ unsigned long stack_canary; +#endif /* * pointers to (original) parent process, youngest child, younger sibling, -- cgit v1.2.3 From 23428e6b4649adfbdaa6a0c93fc6d652bf5f9d44 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Tue, 18 Aug 2009 20:13:03 -0700 Subject: mdio: mdio_if_info::mmds should not be __bitwise I misunderstood the meaning of __bitwise. In practice it makes sparse warn about every use of mmds which is certainly not what we want. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- include/linux/mdio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mdio.h b/include/linux/mdio.h index cfdf1df2875e..c779b49a1fda 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -304,7 +304,7 @@ static inline __u16 mdio_phy_id_devad(int phy_id) */ struct mdio_if_info { int prtad; - u32 __bitwise mmds; + u32 mmds; unsigned mode_support; struct net_device *dev; -- cgit v1.2.3 From 16eea493da563b5a3356a77c6d8776dffc29d3b6 Mon Sep 17 00:00:00 2001 From: Dmitry Eremin-Solenikov Date: Wed, 19 Aug 2009 19:32:24 +0400 Subject: ieee802154: add support for channel pages from IEEE 802.15.4-2006 IEEE 802.15.4-2006 adds new concept: channel pages, which can contain several channels. Add support for channel pages in the API and in the fakehard driver. Signed-off-by: Dmitry Eremin-Solenikov --- drivers/ieee802154/fakehard.c | 10 ++++++---- include/linux/nl802154.h | 2 ++ include/net/ieee802154_netdev.h | 6 +++--- include/net/nl802154.h | 2 +- net/ieee802154/netlink.c | 28 +++++++++++++++++++++++++--- net/ieee802154/nl_policy.c | 1 + 6 files changed, 38 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/drivers/ieee802154/fakehard.c b/drivers/ieee802154/fakehard.c index 22a93bc764c5..c1c9697f9fde 100644 --- a/drivers/ieee802154/fakehard.c +++ b/drivers/ieee802154/fakehard.c @@ -119,12 +119,13 @@ static u8 fake_get_bsn(struct net_device *dev) * 802.15.4-2006 document. */ static int fake_assoc_req(struct net_device *dev, - struct ieee802154_addr *addr, u8 channel, u8 cap) + struct ieee802154_addr *addr, u8 channel, u8 page, u8 cap) { struct wpan_phy *phy = net_to_phy(dev); mutex_lock(&phy->pib_lock); phy->current_channel = channel; + phy->current_page = page; mutex_unlock(&phy->pib_lock); /* We simply emulate it here */ @@ -191,7 +192,7 @@ static int fake_disassoc_req(struct net_device *dev, * document, with 7.3.8 describing coordinator realignment. */ static int fake_start_req(struct net_device *dev, struct ieee802154_addr *addr, - u8 channel, + u8 channel, u8 page, u8 bcn_ord, u8 sf_ord, u8 pan_coord, u8 blx, u8 coord_realign) { @@ -199,6 +200,7 @@ static int fake_start_req(struct net_device *dev, struct ieee802154_addr *addr, mutex_lock(&phy->pib_lock); phy->current_channel = channel; + phy->current_page = page; mutex_unlock(&phy->pib_lock); /* We don't emulate beacons here at all, so START should fail */ @@ -222,11 +224,11 @@ static int fake_start_req(struct net_device *dev, struct ieee802154_addr *addr, * Note: This is in section 7.5.2.1 of the IEEE 802.15.4-2006 document. */ static int fake_scan_req(struct net_device *dev, u8 type, u32 channels, - u8 duration) + u8 page, u8 duration) { u8 edl[27] = {}; return ieee802154_nl_scan_confirm(dev, IEEE802154_SUCCESS, type, - channels, + channels, page, type == IEEE802154_MAC_SCAN_ED ? edl : NULL); } diff --git a/include/linux/nl802154.h b/include/linux/nl802154.h index 9a1af5f871a3..b7d9435d5a9f 100644 --- a/include/linux/nl802154.h +++ b/include/linux/nl802154.h @@ -64,6 +64,8 @@ enum { IEEE802154_ATTR_COORD_REALIGN, IEEE802154_ATTR_SEC, + IEEE802154_ATTR_PAGE, + __IEEE802154_ATTR_MAX, }; diff --git a/include/net/ieee802154_netdev.h b/include/net/ieee802154_netdev.h index e2506af3e7c8..5dc6a61952de 100644 --- a/include/net/ieee802154_netdev.h +++ b/include/net/ieee802154_netdev.h @@ -80,7 +80,7 @@ static inline int mac_cb_type(struct sk_buff *skb) struct ieee802154_mlme_ops { int (*assoc_req)(struct net_device *dev, struct ieee802154_addr *addr, - u8 channel, u8 cap); + u8 channel, u8 page, u8 cap); int (*assoc_resp)(struct net_device *dev, struct ieee802154_addr *addr, u16 short_addr, u8 status); @@ -89,10 +89,10 @@ struct ieee802154_mlme_ops { u8 reason); int (*start_req)(struct net_device *dev, struct ieee802154_addr *addr, - u8 channel, u8 bcn_ord, u8 sf_ord, + u8 channel, u8 page, u8 bcn_ord, u8 sf_ord, u8 pan_coord, u8 blx, u8 coord_realign); int (*scan_req)(struct net_device *dev, - u8 type, u32 channels, u8 duration); + u8 type, u32 channels, u8 page, u8 duration); /* * FIXME: these should become the part of PIB/MIB interface. diff --git a/include/net/nl802154.h b/include/net/nl802154.h index e554ecd3727a..99d2ba1c7e03 100644 --- a/include/net/nl802154.h +++ b/include/net/nl802154.h @@ -95,7 +95,7 @@ int ieee802154_nl_disassoc_confirm(struct net_device *dev, * Note: This API does not permit the return of an active scan result. */ int ieee802154_nl_scan_confirm(struct net_device *dev, - u8 status, u8 scan_type, u32 unscanned, + u8 status, u8 scan_type, u32 unscanned, u8 page, u8 *edl/*, struct list_head *pan_desc_list */); /** diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c index cd0567f06716..2106ecbf0308 100644 --- a/net/ieee802154/netlink.c +++ b/net/ieee802154/netlink.c @@ -232,7 +232,7 @@ nla_put_failure: EXPORT_SYMBOL(ieee802154_nl_beacon_indic); int ieee802154_nl_scan_confirm(struct net_device *dev, - u8 status, u8 scan_type, u32 unscanned, + u8 status, u8 scan_type, u32 unscanned, u8 page, u8 *edl/* , struct list_head *pan_desc_list */) { struct sk_buff *msg; @@ -251,6 +251,7 @@ int ieee802154_nl_scan_confirm(struct net_device *dev, NLA_PUT_U8(msg, IEEE802154_ATTR_STATUS, status); NLA_PUT_U8(msg, IEEE802154_ATTR_SCAN_TYPE, scan_type); NLA_PUT_U32(msg, IEEE802154_ATTR_CHANNELS, unscanned); + NLA_PUT_U8(msg, IEEE802154_ATTR_PAGE, page); if (edl) NLA_PUT(msg, IEEE802154_ATTR_ED_LIST, 27, edl); @@ -349,6 +350,7 @@ static int ieee802154_associate_req(struct sk_buff *skb, { struct net_device *dev; struct ieee802154_addr addr; + u8 page; int ret = -EINVAL; if (!info->attrs[IEEE802154_ATTR_CHANNEL] || @@ -374,8 +376,14 @@ static int ieee802154_associate_req(struct sk_buff *skb, } addr.pan_id = nla_get_u16(info->attrs[IEEE802154_ATTR_COORD_PAN_ID]); + if (info->attrs[IEEE802154_ATTR_PAGE]) + page = nla_get_u8(info->attrs[IEEE802154_ATTR_PAGE]); + else + page = 0; + ret = ieee802154_mlme_ops(dev)->assoc_req(dev, &addr, nla_get_u8(info->attrs[IEEE802154_ATTR_CHANNEL]), + page, nla_get_u8(info->attrs[IEEE802154_ATTR_CAPABILITY])); dev_put(dev); @@ -458,6 +466,7 @@ static int ieee802154_start_req(struct sk_buff *skb, struct genl_info *info) struct ieee802154_addr addr; u8 channel, bcn_ord, sf_ord; + u8 page; int pan_coord, blx, coord_realign; int ret; @@ -488,13 +497,19 @@ static int ieee802154_start_req(struct sk_buff *skb, struct genl_info *info) blx = nla_get_u8(info->attrs[IEEE802154_ATTR_BAT_EXT]); coord_realign = nla_get_u8(info->attrs[IEEE802154_ATTR_COORD_REALIGN]); + if (info->attrs[IEEE802154_ATTR_PAGE]) + page = nla_get_u8(info->attrs[IEEE802154_ATTR_PAGE]); + else + page = 0; + + if (addr.short_addr == IEEE802154_ADDR_BROADCAST) { ieee802154_nl_start_confirm(dev, IEEE802154_NO_SHORT_ADDRESS); dev_put(dev); return -EINVAL; } - ret = ieee802154_mlme_ops(dev)->start_req(dev, &addr, channel, + ret = ieee802154_mlme_ops(dev)->start_req(dev, &addr, channel, page, bcn_ord, sf_ord, pan_coord, blx, coord_realign); dev_put(dev); @@ -508,6 +523,7 @@ static int ieee802154_scan_req(struct sk_buff *skb, struct genl_info *info) u8 type; u32 channels; u8 duration; + u8 page; if (!info->attrs[IEEE802154_ATTR_SCAN_TYPE] || !info->attrs[IEEE802154_ATTR_CHANNELS] || @@ -522,7 +538,13 @@ static int ieee802154_scan_req(struct sk_buff *skb, struct genl_info *info) channels = nla_get_u32(info->attrs[IEEE802154_ATTR_CHANNELS]); duration = nla_get_u8(info->attrs[IEEE802154_ATTR_DURATION]); - ret = ieee802154_mlme_ops(dev)->scan_req(dev, type, channels, + if (info->attrs[IEEE802154_ATTR_PAGE]) + page = nla_get_u8(info->attrs[IEEE802154_ATTR_PAGE]); + else + page = 0; + + + ret = ieee802154_mlme_ops(dev)->scan_req(dev, type, channels, page, duration); dev_put(dev); diff --git a/net/ieee802154/nl_policy.c b/net/ieee802154/nl_policy.c index 83cb4ccef90d..2363ebee02e7 100644 --- a/net/ieee802154/nl_policy.c +++ b/net/ieee802154/nl_policy.c @@ -33,6 +33,7 @@ const struct nla_policy ieee802154_policy[IEEE802154_ATTR_MAX + 1] = { [IEEE802154_ATTR_HW_ADDR] = { .type = NLA_HW_ADDR, }, [IEEE802154_ATTR_PAN_ID] = { .type = NLA_U16, }, [IEEE802154_ATTR_CHANNEL] = { .type = NLA_U8, }, + [IEEE802154_ATTR_PAGE] = { .type = NLA_U8, }, [IEEE802154_ATTR_COORD_SHORT_ADDR] = { .type = NLA_U16, }, [IEEE802154_ATTR_COORD_HW_ADDR] = { .type = NLA_HW_ADDR, }, [IEEE802154_ATTR_COORD_PAN_ID] = { .type = NLA_U16, }, -- cgit v1.2.3 From 929122cdd5d4c344e59f9b55f870a8fcf7aa0d27 Mon Sep 17 00:00:00 2001 From: Dmitry Eremin-Solenikov Date: Fri, 14 Aug 2009 20:00:20 +0400 Subject: Drop ARPHRD_IEEE802154_PHY There are not maste devices in mac802154 anymore, so drop ARPHRD_IEEE802154_PHY definition. Signed-off-by: Dmitry Eremin-Solenikov --- include/linux/if_arp.h | 1 - net/core/dev.c | 4 ++-- net/ieee802154/af_ieee802154.c | 4 +--- net/ieee802154/raw.c | 3 +-- 4 files changed, 4 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if_arp.h b/include/linux/if_arp.h index b554300ef8bf..282eb37e2dec 100644 --- a/include/linux/if_arp.h +++ b/include/linux/if_arp.h @@ -87,7 +87,6 @@ #define ARPHRD_IEEE80211_PRISM 802 /* IEEE 802.11 + Prism2 header */ #define ARPHRD_IEEE80211_RADIOTAP 803 /* IEEE 802.11 + radiotap header */ #define ARPHRD_IEEE802154 804 -#define ARPHRD_IEEE802154_PHY 805 #define ARPHRD_PHONET 820 /* PhoNet media type */ #define ARPHRD_PHONET_PIPE 821 /* PhoNet pipe header */ diff --git a/net/core/dev.c b/net/core/dev.c index 09fb03fa1ae6..4b837896ebd2 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -269,7 +269,7 @@ static const unsigned short netdev_lock_type[] = ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL, ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211, ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET, - ARPHRD_PHONET_PIPE, ARPHRD_IEEE802154, ARPHRD_IEEE802154_PHY, + ARPHRD_PHONET_PIPE, ARPHRD_IEEE802154, ARPHRD_VOID, ARPHRD_NONE}; static const char *const netdev_lock_name[] = @@ -287,7 +287,7 @@ static const char *const netdev_lock_name[] = "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL", "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211", "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET", - "_xmit_PHONET_PIPE", "_xmit_IEEE802154", "_xmit_IEEE802154_PHY", + "_xmit_PHONET_PIPE", "_xmit_IEEE802154", "_xmit_VOID", "_xmit_NONE"}; static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)]; diff --git a/net/ieee802154/af_ieee802154.c b/net/ieee802154/af_ieee802154.c index d504c349cb0c..cd949d5e451b 100644 --- a/net/ieee802154/af_ieee802154.c +++ b/net/ieee802154/af_ieee802154.c @@ -147,9 +147,7 @@ static int ieee802154_dev_ioctl(struct sock *sk, struct ifreq __user *arg, dev_load(sock_net(sk), ifr.ifr_name); dev = dev_get_by_name(sock_net(sk), ifr.ifr_name); - if ((dev->type == ARPHRD_IEEE802154 || - dev->type == ARPHRD_IEEE802154_PHY) && - dev->netdev_ops->ndo_do_ioctl) + if (dev->type == ARPHRD_IEEE802154 && dev->netdev_ops->ndo_do_ioctl) ret = dev->netdev_ops->ndo_do_ioctl(dev, &ifr, cmd); if (!ret && copy_to_user(arg, &ifr, sizeof(struct ifreq))) diff --git a/net/ieee802154/raw.c b/net/ieee802154/raw.c index 60dee69a1d04..4681501aae93 100644 --- a/net/ieee802154/raw.c +++ b/net/ieee802154/raw.c @@ -74,8 +74,7 @@ static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int len) goto out; } - if (dev->type != ARPHRD_IEEE802154_PHY && - dev->type != ARPHRD_IEEE802154) { + if (dev->type != ARPHRD_IEEE802154) { err = -ENODEV; goto out_put; } -- cgit v1.2.3 From fc69f4a6af49ee69475dc4217924d9edf77760e0 Mon Sep 17 00:00:00 2001 From: Tai-hwa Liang Date: Sun, 10 May 2009 18:15:39 -0700 Subject: Input: add new driver for Sentelic Finger Sensing Pad This is the driver for Sentelic Finger Sensing Pad which can be found on MSI WIND Netbook. Signed-off-by: Tai-hwa Liang Signed-off-by: Dmitry Torokhov --- Documentation/input/sentelic.txt | 475 ++++++++++++++++++++ drivers/input/mouse/Kconfig | 8 + drivers/input/mouse/Makefile | 1 + drivers/input/mouse/psmouse-base.c | 26 +- drivers/input/mouse/psmouse.h | 1 + drivers/input/mouse/sentelic.c | 867 +++++++++++++++++++++++++++++++++++++ drivers/input/mouse/sentelic.h | 98 +++++ drivers/input/serio/libps2.c | 15 +- include/linux/libps2.h | 1 + 9 files changed, 1488 insertions(+), 4 deletions(-) create mode 100644 Documentation/input/sentelic.txt create mode 100644 drivers/input/mouse/sentelic.c create mode 100644 drivers/input/mouse/sentelic.h (limited to 'include/linux') diff --git a/Documentation/input/sentelic.txt b/Documentation/input/sentelic.txt new file mode 100644 index 000000000000..f7160a2fb6a2 --- /dev/null +++ b/Documentation/input/sentelic.txt @@ -0,0 +1,475 @@ +Copyright (C) 2002-2008 Sentelic Corporation. +Last update: Oct-31-2008 + +============================================================================== +* Finger Sensing Pad Intellimouse Mode(scrolling wheel, 4th and 5th buttons) +============================================================================== +A) MSID 4: Scrolling wheel mode plus Forward page(4th button) and Backward + page (5th button) +@1. Set sample rate to 200; +@2. Set sample rate to 200; +@3. Set sample rate to 80; +@4. Issuing the "Get device ID" command (0xF2) and waits for the response; +@5. FSP will respond 0x04. + +Packet 1 + Bit 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 +BYTE |---------------|BYTE |---------------|BYTE|---------------|BYTE|---------------| + 1 |Y|X|y|x|1|M|R|L| 2 |X|X|X|X|X|X|X|X| 3 |Y|Y|Y|Y|Y|Y|Y|Y| 4 | | |B|F|W|W|W|W| + |---------------| |---------------| |---------------| |---------------| + +Byte 1: Bit7 => Y overflow + Bit6 => X overflow + Bit5 => Y sign bit + Bit4 => X sign bit + Bit3 => 1 + Bit2 => Middle Button, 1 is pressed, 0 is not pressed. + Bit1 => Right Button, 1 is pressed, 0 is not pressed. + Bit0 => Left Button, 1 is pressed, 0 is not pressed. +Byte 2: X Movement(9-bit 2's complement integers) +Byte 3: Y Movement(9-bit 2's complement integers) +Byte 4: Bit3~Bit0 => the scrolling wheel's movement since the last data report. + valid values, -8 ~ +7 + Bit4 => 1 = 4th mouse button is pressed, Forward one page. + 0 = 4th mouse button is not pressed. + Bit5 => 1 = 5th mouse button is pressed, Backward one page. + 0 = 5th mouse button is not pressed. + +B) MSID 6: Horizontal and Vertical scrolling. +@ Set bit 1 in register 0x40 to 1 + +# FSP replaces scrolling wheel's movement as 4 bits to show horizontal and + vertical scrolling. + +Packet 1 + Bit 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 +BYTE |---------------|BYTE |---------------|BYTE|---------------|BYTE|---------------| + 1 |Y|X|y|x|1|M|R|L| 2 |X|X|X|X|X|X|X|X| 3 |Y|Y|Y|Y|Y|Y|Y|Y| 4 | | |B|F|l|r|u|d| + |---------------| |---------------| |---------------| |---------------| + +Byte 1: Bit7 => Y overflow + Bit6 => X overflow + Bit5 => Y sign bit + Bit4 => X sign bit + Bit3 => 1 + Bit2 => Middle Button, 1 is pressed, 0 is not pressed. + Bit1 => Right Button, 1 is pressed, 0 is not pressed. + Bit0 => Left Button, 1 is pressed, 0 is not pressed. +Byte 2: X Movement(9-bit 2's complement integers) +Byte 3: Y Movement(9-bit 2's complement integers) +Byte 4: Bit0 => the Vertical scrolling movement downward. + Bit1 => the Vertical scrolling movement upward. + Bit2 => the Vertical scrolling movement rightward. + Bit3 => the Vertical scrolling movement leftward. + Bit4 => 1 = 4th mouse button is pressed, Forward one page. + 0 = 4th mouse button is not pressed. + Bit5 => 1 = 5th mouse button is pressed, Backward one page. + 0 = 5th mouse button is not pressed. + +C) MSID 7: +# FSP uses 2 packets(8 Bytes) data to represent Absolute Position + so we have PACKET NUMBER to identify packets. + If PACKET NUMBER is 0, the packet is Packet 1. + If PACKET NUMBER is 1, the packet is Packet 2. + Please count this number in program. + +# MSID6 special packet will be enable at the same time when enable MSID 7. + +============================================================================== +* Absolute position for STL3886-G0. +============================================================================== +@ Set bit 2 or 3 in register 0x40 to 1 +@ Set bit 6 in register 0x40 to 1 + +Packet 1 (ABSOLUTE POSITION) + Bit 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 +BYTE |---------------|BYTE |---------------|BYTE|---------------|BYTE|---------------| + 1 |0|1|V|1|1|M|R|L| 2 |X|X|X|X|X|X|X|X| 3 |Y|Y|Y|Y|Y|Y|Y|Y| 4 |r|l|d|u|X|X|Y|Y| + |---------------| |---------------| |---------------| |---------------| + +Byte 1: Bit7~Bit6 => 00, Normal data packet + => 01, Absolute coordination packet + => 10, Notify packet + Bit5 => valid bit + Bit4 => 1 + Bit3 => 1 + Bit2 => Middle Button, 1 is pressed, 0 is not pressed. + Bit1 => Right Button, 1 is pressed, 0 is not pressed. + Bit0 => Left Button, 1 is pressed, 0 is not pressed. +Byte 2: X coordinate (xpos[9:2]) +Byte 3: Y coordinate (ypos[9:2]) +Byte 4: Bit1~Bit0 => Y coordinate (xpos[1:0]) + Bit3~Bit2 => X coordinate (ypos[1:0]) + Bit4 => scroll up + Bit5 => scroll down + Bit6 => scroll left + Bit7 => scroll right + +Notify Packet for G0 + Bit 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 +BYTE |---------------|BYTE |---------------|BYTE|---------------|BYTE|---------------| + 1 |1|0|0|1|1|M|R|L| 2 |C|C|C|C|C|C|C|C| 3 |M|M|M|M|M|M|M|M| 4 |0|0|0|0|0|0|0|0| + |---------------| |---------------| |---------------| |---------------| + +Byte 1: Bit7~Bit6 => 00, Normal data packet + => 01, Absolute coordination packet + => 10, Notify packet + Bit5 => 0 + Bit4 => 1 + Bit3 => 1 + Bit2 => Middle Button, 1 is pressed, 0 is not pressed. + Bit1 => Right Button, 1 is pressed, 0 is not pressed. + Bit0 => Left Button, 1 is pressed, 0 is not pressed. +Byte 2: Message Type => 0x5A (Enable/Disable status packet) + Mode Type => 0xA5 (Normal/Icon mode status) +Byte 3: Message Type => 0x00 (Disabled) + => 0x01 (Enabled) + Mode Type => 0x00 (Normal) + => 0x01 (Icon) +Byte 4: Bit7~Bit0 => Don't Care + +============================================================================== +* Absolute position for STL3888-A0. +============================================================================== +Packet 1 (ABSOLUTE POSITION) + Bit 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 +BYTE |---------------|BYTE |---------------|BYTE|---------------|BYTE|---------------| + 1 |0|1|V|A|1|L|0|1| 2 |X|X|X|X|X|X|X|X| 3 |Y|Y|Y|Y|Y|Y|Y|Y| 4 |x|x|y|y|X|X|Y|Y| + |---------------| |---------------| |---------------| |---------------| + +Byte 1: Bit7~Bit6 => 00, Normal data packet + => 01, Absolute coordination packet + => 10, Notify packet + Bit5 => Valid bit, 0 means that the coordinate is invalid or finger up. + When both fingers are up, the last two reports have zero valid + bit. + Bit4 => arc + Bit3 => 1 + Bit2 => Left Button, 1 is pressed, 0 is released. + Bit1 => 0 + Bit0 => 1 +Byte 2: X coordinate (xpos[9:2]) +Byte 3: Y coordinate (ypos[9:2]) +Byte 4: Bit1~Bit0 => Y coordinate (xpos[1:0]) + Bit3~Bit2 => X coordinate (ypos[1:0]) + Bit5~Bit4 => y1_g + Bit7~Bit6 => x1_g + +Packet 2 (ABSOLUTE POSITION) + Bit 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 +BYTE |---------------|BYTE |---------------|BYTE|---------------|BYTE|---------------| + 1 |0|1|V|A|1|R|1|0| 2 |X|X|X|X|X|X|X|X| 3 |Y|Y|Y|Y|Y|Y|Y|Y| 4 |x|x|y|y|X|X|Y|Y| + |---------------| |---------------| |---------------| |---------------| + +Byte 1: Bit7~Bit6 => 00, Normal data packet + => 01, Absolute coordinates packet + => 10, Notify packet + Bit5 => Valid bit, 0 means that the coordinate is invalid or finger up. + When both fingers are up, the last two reports have zero valid + bit. + Bit4 => arc + Bit3 => 1 + Bit2 => Right Button, 1 is pressed, 0 is released. + Bit1 => 1 + Bit0 => 0 +Byte 2: X coordinate (xpos[9:2]) +Byte 3: Y coordinate (ypos[9:2]) +Byte 4: Bit1~Bit0 => Y coordinate (xpos[1:0]) + Bit3~Bit2 => X coordinate (ypos[1:0]) + Bit5~Bit4 => y2_g + Bit7~Bit6 => x2_g + +Notify Packet for STL3888-A0 + Bit 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 +BYTE |---------------|BYTE |---------------|BYTE|---------------|BYTE|---------------| + 1 |1|0|1|P|1|M|R|L| 2 |C|C|C|C|C|C|C|C| 3 |0|0|F|F|0|0|0|i| 4 |r|l|d|u|0|0|0|0| + |---------------| |---------------| |---------------| |---------------| + +Byte 1: Bit7~Bit6 => 00, Normal data packet + => 01, Absolute coordination packet + => 10, Notify packet + Bit5 => 1 + Bit4 => when in absolute coordinates mode (valid when EN_PKT_GO is 1): + 0: left button is generated by the on-pad command + 1: left button is generated by the external button + Bit3 => 1 + Bit2 => Middle Button, 1 is pressed, 0 is not pressed. + Bit1 => Right Button, 1 is pressed, 0 is not pressed. + Bit0 => Left Button, 1 is pressed, 0 is not pressed. +Byte 2: Message Type => 0xB7 (Multi Finger, Multi Coordinate mode) +Byte 3: Bit7~Bit6 => Don't care + Bit5~Bit4 => Number of fingers + Bit3~Bit1 => Reserved + Bit0 => 1: enter gesture mode; 0: leaving gesture mode +Byte 4: Bit7 => scroll right button + Bit6 => scroll left button + Bit5 => scroll down button + Bit4 => scroll up button + * Note that if gesture and additional button (Bit4~Bit7) + happen at the same time, the button information will not + be sent. + Bit3~Bit0 => Reserved + +Sample sequence of Multi-finger, Multi-coordinate mode: + + notify packet (valid bit == 1), abs pkt 1, abs pkt 2, abs pkt 1, + abs pkt 2, ..., notify packet(valid bit == 0) + +============================================================================== +* FSP Enable/Disable packet +============================================================================== + Bit 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 +BYTE |---------------|BYTE |---------------|BYTE|---------------|BYTE|---------------| + 1 |Y|X|0|0|1|M|R|L| 2 |0|1|0|1|1|0|1|E| 3 | | | | | | | | | 4 | | | | | | | | | + |---------------| |---------------| |---------------| |---------------| + +FSP will send out enable/disable packet when FSP receive PS/2 enable/disable +command. Host will receive the packet which Middle, Right, Left button will +be set. The packet only use byte 0 and byte 1 as a pattern of original packet. +Ignore the other bytes of the packet. + +Byte 1: Bit7 => 0, Y overflow + Bit6 => 0, X overflow + Bit5 => 0, Y sign bit + Bit4 => 0, X sign bit + Bit3 => 1 + Bit2 => 1, Middle Button + Bit1 => 1, Right Button + Bit0 => 1, Left Button +Byte 2: Bit7~1 => (0101101b) + Bit0 => 1 = Enable + 0 = Disable +Byte 3: Don't care +Byte 4: Don't care (MOUSE ID 3, 4) +Byte 5~8: Don't care (Absolute packet) + +============================================================================== +* PS/2 Command Set +============================================================================== + +FSP supports basic PS/2 commanding set and modes, refer to following URL for +details about PS/2 commands: + +http://www.computer-engineering.org/index.php?title=PS/2_Mouse_Interface + +============================================================================== +* Programming Sequence for Determining Packet Parsing Flow +============================================================================== +1. Identify FSP by reading device ID(0x00) and version(0x01) register + +2. Determine number of buttons by reading status2 (0x0b) register + + buttons = reg[0x0b] & 0x30 + + if buttons == 0x30 or buttons == 0x20: + # two/four buttons + Refer to 'Finger Sensing Pad PS/2 Mouse Intellimouse' + section A for packet parsing detail(ignore byte 4, bit ~ 7) + elif buttons == 0x10: + # 6 buttons + Refer to 'Finger Sensing Pad PS/2 Mouse Intellimouse' + section B for packet parsing detail + elif buttons == 0x00: + # 6 buttons + Refer to 'Finger Sensing Pad PS/2 Mouse Intellimouse' + section A for packet parsing detail + +============================================================================== +* Programming Sequence for Register Reading/Writing +============================================================================== + +Register inversion requirement: + + Following values needed to be inverted(the '~' operator in C) before being +sent to FSP: + + 0xe9, 0xee, 0xf2 and 0xff. + +Register swapping requirement: + + Following values needed to have their higher 4 bits and lower 4 bits being +swapped before being sent to FSP: + + 10, 20, 40, 60, 80, 100 and 200. + +Register reading sequence: + + 1. send 0xf3 PS/2 command to FSP; + + 2. send 0x66 PS/2 command to FSP; + + 3. send 0x88 PS/2 command to FSP; + + 4. send 0xf3 PS/2 command to FSP; + + 5. if the register address being to read is not required to be + inverted(refer to the 'Register inversion requirement' section), + goto step 6 + + 5a. send 0x68 PS/2 command to FSP; + + 5b. send the inverted register address to FSP and goto step 8; + + 6. if the register address being to read is not required to be + swapped(refer to the 'Register swapping requirement' section), + goto step 7 + + 6a. send 0xcc PS/2 command to FSP; + + 6b. send the swapped register address to FSP and goto step 8; + + 7. send 0x66 PS/2 command to FSP; + + 7a. send the original register address to FSP and goto step 8; + + 8. send 0xe9(status request) PS/2 command to FSP; + + 9. the response read from FSP should be the requested register value. + +Register writing sequence: + + 1. send 0xf3 PS/2 command to FSP; + + 2. if the register address being to write is not required to be + inverted(refer to the 'Register inversion requirement' section), + goto step 3 + + 2a. send 0x74 PS/2 command to FSP; + + 2b. send the inverted register address to FSP and goto step 5; + + 3. if the register address being to write is not required to be + swapped(refer to the 'Register swapping requirement' section), + goto step 4 + + 3a. send 0x77 PS/2 command to FSP; + + 3b. send the swapped register address to FSP and goto step 5; + + 4. send 0x55 PS/2 command to FSP; + + 4a. send the register address to FSP and goto step 5; + + 5. send 0xf3 PS/2 command to FSP; + + 6. if the register value being to write is not required to be + inverted(refer to the 'Register inversion requirement' section), + goto step 7 + + 6a. send 0x47 PS/2 command to FSP; + + 6b. send the inverted register value to FSP and goto step 9; + + 7. if the register value being to write is not required to be + swapped(refer to the 'Register swapping requirement' section), + goto step 8 + + 7a. send 0x44 PS/2 command to FSP; + + 7b. send the swapped register value to FSP and goto step 9; + + 8. send 0x33 PS/2 command to FSP; + + 8a. send the register value to FSP; + + 9. the register writing sequence is completed. + +============================================================================== +* Register Listing +============================================================================== + +offset width default r/w name +0x00 bit7~bit0 0x01 RO device ID + +0x01 bit7~bit0 0xc0 RW version ID + +0x02 bit7~bit0 0x01 RO vendor ID + +0x03 bit7~bit0 0x01 RO product ID + +0x04 bit3~bit0 0x01 RW revision ID + +0x0b RO test mode status 1 + bit3 1 RO 0: rotate 180 degree, 1: no rotation + + bit5~bit4 RO number of buttons + 11 => 2, lbtn/rbtn + 10 => 4, lbtn/rbtn/scru/scrd + 01 => 6, lbtn/rbtn/scru/scrd/scrl/scrr + 00 => 6, lbtn/rbtn/scru/scrd/fbtn/bbtn + +0x0f RW register file page control + bit0 0 RW 1 to enable page 1 register files + +0x10 RW system control 1 + bit0 1 RW Reserved, must be 1 + bit1 0 RW Reserved, must be 0 + bit4 1 RW Reserved, must be 0 + bit5 0 RW register clock gating enable + 0: read only, 1: read/write enable + (Note that following registers does not require clock gating being + enabled prior to write: 05 06 07 08 09 0c 0f 10 11 12 16 17 18 23 2e + 40 41 42 43.) + +0x31 RW on-pad command detection + bit7 0 RW on-pad command left button down tag + enable + 0: disable, 1: enable + +0x34 RW on-pad command control 5 + bit4~bit0 0x05 RW XLO in 0s/4/1, so 03h = 0010.1b = 2.5 + (Note that position unit is in 0.5 scanline) + + bit7 0 RW on-pad tap zone enable + 0: disable, 1: enable + +0x35 RW on-pad command control 6 + bit4~bit0 0x1d RW XHI in 0s/4/1, so 19h = 1100.1b = 12.5 + (Note that position unit is in 0.5 scanline) + +0x36 RW on-pad command control 7 + bit4~bit0 0x04 RW YLO in 0s/4/1, so 03h = 0010.1b = 2.5 + (Note that position unit is in 0.5 scanline) + +0x37 RW on-pad command control 8 + bit4~bit0 0x13 RW YHI in 0s/4/1, so 11h = 1000.1b = 8.5 + (Note that position unit is in 0.5 scanline) + +0x40 RW system control 5 + bit1 0 RW FSP Intellimouse mode enable + 0: disable, 1: enable + + bit2 0 RW movement + abs. coordinate mode enable + 0: disable, 1: enable + (Note that this function has the functionality of bit 1 even when + bit 1 is not set. However, the format is different from that of bit 1. + In addition, when bit 1 and bit 2 are set at the same time, bit 2 will + override bit 1.) + + bit3 0 RW abs. coordinate only mode enable + 0: disable, 1: enable + (Note that this function has the functionality of bit 1 even when + bit 1 is not set. However, the format is different from that of bit 1. + In addition, when bit 1, bit 2 and bit 3 are set at the same time, + bit 3 will override bit 1 and 2.) + + bit5 0 RW auto switch enable + 0: disable, 1: enable + + bit6 0 RW G0 abs. + notify packet format enable + 0: disable, 1: enable + (Note that the absolute/relative coordinate output still depends on + bit 2 and 3. That is, if any of those bit is 1, host will receive + absolute coordinates; otherwise, host only receives packets with + relative coordinate.) + +0x43 RW on-pad control + bit0 0 RW on-pad control enable + 0: disable, 1: enable + (Note that if this bit is cleared, bit 3/5 will be ineffective) + + bit3 0 RW on-pad fix vertical scrolling enable + 0: disable, 1: enable + + bit5 0 RW on-pad fix horizontal scrolling enable + 0: disable, 1: enable diff --git a/drivers/input/mouse/Kconfig b/drivers/input/mouse/Kconfig index 90bef5d498f0..3feeb3af8abd 100644 --- a/drivers/input/mouse/Kconfig +++ b/drivers/input/mouse/Kconfig @@ -107,6 +107,14 @@ config MOUSE_PS2_ELANTECH entries. For further information, see . +config MOUSE_PS2_SENTELIC + bool "Sentelic Finger Sensing Pad PS/2 protocol extension" + depends on MOUSE_PS2 + help + Say Y here if you have a laptop (such as MSI WIND Netbook) + with Sentelic Finger Sensing Pad touchpad. + + If unsure, say N. config MOUSE_PS2_TOUCHKIT bool "eGalax TouchKit PS/2 protocol extension" diff --git a/drivers/input/mouse/Makefile b/drivers/input/mouse/Makefile index ea58c9a372b6..570c84a4a654 100644 --- a/drivers/input/mouse/Makefile +++ b/drivers/input/mouse/Makefile @@ -27,5 +27,6 @@ psmouse-$(CONFIG_MOUSE_PS2_ELANTECH) += elantech.o psmouse-$(CONFIG_MOUSE_PS2_OLPC) += hgpk.o psmouse-$(CONFIG_MOUSE_PS2_LOGIPS2PP) += logips2pp.o psmouse-$(CONFIG_MOUSE_PS2_LIFEBOOK) += lifebook.o +psmouse-$(CONFIG_MOUSE_PS2_SENTELIC) += sentelic.o psmouse-$(CONFIG_MOUSE_PS2_TRACKPOINT) += trackpoint.o psmouse-$(CONFIG_MOUSE_PS2_TOUCHKIT) += touchkit_ps2.o diff --git a/drivers/input/mouse/psmouse-base.c b/drivers/input/mouse/psmouse-base.c index b407b355dceb..df318887ca09 100644 --- a/drivers/input/mouse/psmouse-base.c +++ b/drivers/input/mouse/psmouse-base.c @@ -30,6 +30,7 @@ #include "trackpoint.h" #include "touchkit_ps2.h" #include "elantech.h" +#include "sentelic.h" #define DRIVER_DESC "PS/2 mouse driver" @@ -666,6 +667,20 @@ static int psmouse_extensions(struct psmouse *psmouse, max_proto = PSMOUSE_IMEX; } +/* + * Try Finger Sensing Pad + */ + if (max_proto > PSMOUSE_IMEX) { + if (fsp_detect(psmouse, set_properties) == 0) { + if (!set_properties || fsp_init(psmouse) == 0) + return PSMOUSE_FSP; +/* + * Init failed, try basic relative protocols + */ + max_proto = PSMOUSE_IMEX; + } + } + if (max_proto > PSMOUSE_IMEX) { if (genius_detect(psmouse, set_properties) == 0) return PSMOUSE_GENPS; @@ -813,7 +828,16 @@ static const struct psmouse_protocol psmouse_protocols[] = { .detect = elantech_detect, .init = elantech_init, }, - #endif +#endif +#ifdef CONFIG_MOUSE_PS2_SENTELIC + { + .type = PSMOUSE_FSP, + .name = "FSPPS/2", + .alias = "fsp", + .detect = fsp_detect, + .init = fsp_init, + }, +#endif { .type = PSMOUSE_CORTRON, .name = "CortronPS/2", diff --git a/drivers/input/mouse/psmouse.h b/drivers/input/mouse/psmouse.h index e3562daeb2ed..cca1744c2a08 100644 --- a/drivers/input/mouse/psmouse.h +++ b/drivers/input/mouse/psmouse.h @@ -91,6 +91,7 @@ enum psmouse_type { PSMOUSE_CORTRON, PSMOUSE_HGPK, PSMOUSE_ELANTECH, + PSMOUSE_FSP, PSMOUSE_AUTO /* This one should always be last */ }; diff --git a/drivers/input/mouse/sentelic.c b/drivers/input/mouse/sentelic.c new file mode 100644 index 000000000000..97b1e72855a0 --- /dev/null +++ b/drivers/input/mouse/sentelic.c @@ -0,0 +1,867 @@ +/*- + * Finger Sensing Pad PS/2 mouse driver. + * + * Copyright (C) 2005-2007 Asia Vital Components Co., Ltd. + * Copyright (C) 2005-2009 Tai-hwa Liang, Sentelic Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "psmouse.h" +#include "sentelic.h" + +/* + * Timeout for FSP PS/2 command only (in milliseconds). + */ +#define FSP_CMD_TIMEOUT 200 +#define FSP_CMD_TIMEOUT2 30 + +/** Driver version. */ +static const char fsp_drv_ver[] = "1.0.0-K"; + +/* + * Make sure that the value being sent to FSP will not conflict with + * possible sample rate values. + */ +static unsigned char fsp_test_swap_cmd(unsigned char reg_val) +{ + switch (reg_val) { + case 10: case 20: case 40: case 60: case 80: case 100: case 200: + /* + * The requested value being sent to FSP matched to possible + * sample rates, swap the given value such that the hardware + * wouldn't get confused. + */ + return (reg_val >> 4) | (reg_val << 4); + default: + return reg_val; /* swap isn't necessary */ + } +} + +/* + * Make sure that the value being sent to FSP will not conflict with certain + * commands. + */ +static unsigned char fsp_test_invert_cmd(unsigned char reg_val) +{ + switch (reg_val) { + case 0xe9: case 0xee: case 0xf2: case 0xff: + /* + * The requested value being sent to FSP matched to certain + * commands, inverse the given value such that the hardware + * wouldn't get confused. + */ + return ~reg_val; + default: + return reg_val; /* inversion isn't necessary */ + } +} + +static int fsp_reg_read(struct psmouse *psmouse, int reg_addr, int *reg_val) +{ + struct ps2dev *ps2dev = &psmouse->ps2dev; + unsigned char param[3]; + unsigned char addr; + int rc = -1; + + /* + * We need to shut off the device and switch it into command + * mode so we don't confuse our protocol handler. We don't need + * to do that for writes because sysfs set helper does this for + * us. + */ + ps2_command(ps2dev, NULL, PSMOUSE_CMD_DISABLE); + psmouse_set_state(psmouse, PSMOUSE_CMD_MODE); + mutex_lock(&ps2dev->cmd_mutex); + + if (ps2_sendbyte(ps2dev, 0xf3, FSP_CMD_TIMEOUT) < 0) + goto out; + + /* should return 0xfe(request for resending) */ + ps2_sendbyte(ps2dev, 0x66, FSP_CMD_TIMEOUT2); + /* should return 0xfc(failed) */ + ps2_sendbyte(ps2dev, 0x88, FSP_CMD_TIMEOUT2); + + if (ps2_sendbyte(ps2dev, 0xf3, FSP_CMD_TIMEOUT) < 0) + goto out; + + if ((addr = fsp_test_invert_cmd(reg_addr)) != reg_addr) { + ps2_sendbyte(ps2dev, 0x68, FSP_CMD_TIMEOUT2); + } else if ((addr = fsp_test_swap_cmd(reg_addr)) != reg_addr) { + /* swapping is required */ + ps2_sendbyte(ps2dev, 0xcc, FSP_CMD_TIMEOUT2); + /* expect 0xfe */ + } else { + /* swapping isn't necessary */ + ps2_sendbyte(ps2dev, 0x66, FSP_CMD_TIMEOUT2); + /* expect 0xfe */ + } + /* should return 0xfc(failed) */ + ps2_sendbyte(ps2dev, addr, FSP_CMD_TIMEOUT); + + if (__ps2_command(ps2dev, param, PSMOUSE_CMD_GETINFO) < 0) + goto out; + + *reg_val = param[2]; + rc = 0; + + out: + mutex_unlock(&ps2dev->cmd_mutex); + ps2_command(ps2dev, NULL, PSMOUSE_CMD_ENABLE); + psmouse_set_state(psmouse, PSMOUSE_ACTIVATED); + dev_dbg(&ps2dev->serio->dev, "READ REG: 0x%02x is 0x%02x (rc = %d)\n", + reg_addr, *reg_val, rc); + return rc; +} + +static int fsp_reg_write(struct psmouse *psmouse, int reg_addr, int reg_val) +{ + struct ps2dev *ps2dev = &psmouse->ps2dev; + unsigned char v; + int rc = -1; + + mutex_lock(&ps2dev->cmd_mutex); + + if (ps2_sendbyte(ps2dev, 0xf3, FSP_CMD_TIMEOUT) < 0) + goto out; + + if ((v = fsp_test_invert_cmd(reg_addr)) != reg_addr) { + /* inversion is required */ + ps2_sendbyte(ps2dev, 0x74, FSP_CMD_TIMEOUT2); + } else { + if ((v = fsp_test_swap_cmd(reg_addr)) != reg_addr) { + /* swapping is required */ + ps2_sendbyte(ps2dev, 0x77, FSP_CMD_TIMEOUT2); + } else { + /* swapping isn't necessary */ + ps2_sendbyte(ps2dev, 0x55, FSP_CMD_TIMEOUT2); + } + } + /* write the register address in correct order */ + ps2_sendbyte(ps2dev, v, FSP_CMD_TIMEOUT2); + + if (ps2_sendbyte(ps2dev, 0xf3, FSP_CMD_TIMEOUT) < 0) + return -1; + + if ((v = fsp_test_invert_cmd(reg_val)) != reg_val) { + /* inversion is required */ + ps2_sendbyte(ps2dev, 0x47, FSP_CMD_TIMEOUT2); + } else if ((v = fsp_test_swap_cmd(reg_val)) != reg_val) { + /* swapping is required */ + ps2_sendbyte(ps2dev, 0x44, FSP_CMD_TIMEOUT2); + } else { + /* swapping isn't necessary */ + ps2_sendbyte(ps2dev, 0x33, FSP_CMD_TIMEOUT2); + } + + /* write the register value in correct order */ + ps2_sendbyte(ps2dev, v, FSP_CMD_TIMEOUT2); + rc = 0; + + out: + mutex_unlock(&ps2dev->cmd_mutex); + dev_dbg(&ps2dev->serio->dev, "WRITE REG: 0x%02x to 0x%02x (rc = %d)\n", + reg_addr, reg_val, rc); + return rc; +} + +/* Enable register clock gating for writing certain registers */ +static int fsp_reg_write_enable(struct psmouse *psmouse, bool enable) +{ + int v, nv; + + if (fsp_reg_read(psmouse, FSP_REG_SYSCTL1, &v) == -1) + return -1; + + if (enable) + nv = v | FSP_BIT_EN_REG_CLK; + else + nv = v & ~FSP_BIT_EN_REG_CLK; + + /* only write if necessary */ + if (nv != v) + if (fsp_reg_write(psmouse, FSP_REG_SYSCTL1, nv) == -1) + return -1; + + return 0; +} + +static int fsp_page_reg_read(struct psmouse *psmouse, int *reg_val) +{ + struct ps2dev *ps2dev = &psmouse->ps2dev; + unsigned char param[3]; + int rc = -1; + + ps2_command(ps2dev, NULL, PSMOUSE_CMD_DISABLE); + psmouse_set_state(psmouse, PSMOUSE_CMD_MODE); + mutex_lock(&ps2dev->cmd_mutex); + + if (ps2_sendbyte(ps2dev, 0xf3, FSP_CMD_TIMEOUT) < 0) + goto out; + + ps2_sendbyte(ps2dev, 0x66, FSP_CMD_TIMEOUT2); + ps2_sendbyte(ps2dev, 0x88, FSP_CMD_TIMEOUT2); + + if (ps2_sendbyte(ps2dev, 0xf3, FSP_CMD_TIMEOUT) < 0) + goto out; + + ps2_sendbyte(ps2dev, 0x83, FSP_CMD_TIMEOUT2); + ps2_sendbyte(ps2dev, 0x88, FSP_CMD_TIMEOUT2); + + /* get the returned result */ + if (__ps2_command(ps2dev, param, PSMOUSE_CMD_GETINFO)) + goto out; + + *reg_val = param[2]; + rc = 0; + + out: + mutex_unlock(&ps2dev->cmd_mutex); + ps2_command(ps2dev, NULL, PSMOUSE_CMD_ENABLE); + psmouse_set_state(psmouse, PSMOUSE_ACTIVATED); + dev_dbg(&ps2dev->serio->dev, "READ PAGE REG: 0x%02x (rc = %d)\n", + *reg_val, rc); + return rc; +} + +static int fsp_page_reg_write(struct psmouse *psmouse, int reg_val) +{ + struct ps2dev *ps2dev = &psmouse->ps2dev; + unsigned char v; + int rc = -1; + + mutex_lock(&ps2dev->cmd_mutex); + + if (ps2_sendbyte(ps2dev, 0xf3, FSP_CMD_TIMEOUT) < 0) + goto out; + + ps2_sendbyte(ps2dev, 0x38, FSP_CMD_TIMEOUT2); + ps2_sendbyte(ps2dev, 0x88, FSP_CMD_TIMEOUT2); + + if (ps2_sendbyte(ps2dev, 0xf3, FSP_CMD_TIMEOUT) < 0) + return -1; + + if ((v = fsp_test_invert_cmd(reg_val)) != reg_val) { + ps2_sendbyte(ps2dev, 0x47, FSP_CMD_TIMEOUT2); + } else if ((v = fsp_test_swap_cmd(reg_val)) != reg_val) { + /* swapping is required */ + ps2_sendbyte(ps2dev, 0x44, FSP_CMD_TIMEOUT2); + } else { + /* swapping isn't necessary */ + ps2_sendbyte(ps2dev, 0x33, FSP_CMD_TIMEOUT2); + } + + ps2_sendbyte(ps2dev, v, FSP_CMD_TIMEOUT2); + rc = 0; + + out: + mutex_unlock(&ps2dev->cmd_mutex); + dev_dbg(&ps2dev->serio->dev, "WRITE PAGE REG: to 0x%02x (rc = %d)\n", + reg_val, rc); + return rc; +} + +static int fsp_get_version(struct psmouse *psmouse, int *version) +{ + if (fsp_reg_read(psmouse, FSP_REG_VERSION, version)) + return -EIO; + + return 0; +} + +static int fsp_get_revision(struct psmouse *psmouse, int *rev) +{ + if (fsp_reg_read(psmouse, FSP_REG_REVISION, rev)) + return -EIO; + + return 0; +} + +static int fsp_get_buttons(struct psmouse *psmouse, int *btn) +{ + static const int buttons[] = { + 0x16, /* Left/Middle/Right/Forward/Backward & Scroll Up/Down */ + 0x06, /* Left/Middle/Right & Scroll Up/Down/Right/Left */ + 0x04, /* Left/Middle/Right & Scroll Up/Down */ + 0x02, /* Left/Middle/Right */ + }; + int val; + + if (fsp_reg_read(psmouse, FSP_REG_TMOD_STATUS1, &val) == -1) + return -EIO; + + *btn = buttons[(val & 0x30) >> 4]; + return 0; +} + +/* Enable on-pad command tag output */ +static int fsp_opc_tag_enable(struct psmouse *psmouse, bool enable) +{ + int v, nv; + int res = 0; + + if (fsp_reg_read(psmouse, FSP_REG_OPC_QDOWN, &v) == -1) { + dev_err(&psmouse->ps2dev.serio->dev, "Unable get OPC state.\n"); + return -EIO; + } + + if (enable) + nv = v | FSP_BIT_EN_OPC_TAG; + else + nv = v & ~FSP_BIT_EN_OPC_TAG; + + /* only write if necessary */ + if (nv != v) { + fsp_reg_write_enable(psmouse, true); + res = fsp_reg_write(psmouse, FSP_REG_OPC_QDOWN, nv); + fsp_reg_write_enable(psmouse, false); + } + + if (res != 0) { + dev_err(&psmouse->ps2dev.serio->dev, + "Unable to enable OPC tag.\n"); + res = -EIO; + } + + return res; +} + +static int fsp_onpad_vscr(struct psmouse *psmouse, bool enable) +{ + struct fsp_data *pad = psmouse->private; + int val; + + if (fsp_reg_read(psmouse, FSP_REG_ONPAD_CTL, &val)) + return -EIO; + + pad->vscroll = enable; + + if (enable) + val |= (FSP_BIT_FIX_VSCR | FSP_BIT_ONPAD_ENABLE); + else + val &= ~FSP_BIT_FIX_VSCR; + + if (fsp_reg_write(psmouse, FSP_REG_ONPAD_CTL, val)) + return -EIO; + + return 0; +} + +static int fsp_onpad_hscr(struct psmouse *psmouse, bool enable) +{ + struct fsp_data *pad = psmouse->private; + int val, v2; + + if (fsp_reg_read(psmouse, FSP_REG_ONPAD_CTL, &val)) + return -EIO; + + if (fsp_reg_read(psmouse, FSP_REG_SYSCTL5, &v2)) + return -EIO; + + pad->hscroll = enable; + + if (enable) { + val |= (FSP_BIT_FIX_HSCR | FSP_BIT_ONPAD_ENABLE); + v2 |= FSP_BIT_EN_MSID6; + } else { + val &= ~FSP_BIT_FIX_HSCR; + v2 &= ~(FSP_BIT_EN_MSID6 | FSP_BIT_EN_MSID7 | FSP_BIT_EN_MSID8); + } + + if (fsp_reg_write(psmouse, FSP_REG_ONPAD_CTL, val)) + return -EIO; + + /* reconfigure horizontal scrolling packet output */ + if (fsp_reg_write(psmouse, FSP_REG_SYSCTL5, v2)) + return -EIO; + + return 0; +} + +/* + * Write device specific initial parameters. + * + * ex: 0xab 0xcd - write oxcd into register 0xab + */ +static ssize_t fsp_attr_set_setreg(struct psmouse *psmouse, void *data, + const char *buf, size_t count) +{ + unsigned long reg, val; + char *rest; + ssize_t retval; + + reg = simple_strtoul(buf, &rest, 16); + if (rest == buf || *rest != ' ' || reg > 0xff) + return -EINVAL; + + if (strict_strtoul(rest + 1, 16, &val) || val > 0xff) + return -EINVAL; + + if (fsp_reg_write_enable(psmouse, true)) + return -EIO; + + retval = fsp_reg_write(psmouse, reg, val) < 0 ? -EIO : count; + + fsp_reg_write_enable(psmouse, false); + + return count; +} + +PSMOUSE_DEFINE_WO_ATTR(setreg, S_IWUSR, NULL, fsp_attr_set_setreg); + +static ssize_t fsp_attr_show_getreg(struct psmouse *psmouse, + void *data, char *buf) +{ + struct fsp_data *pad = psmouse->private; + + return sprintf(buf, "%02x%02x\n", pad->last_reg, pad->last_val); +} + +/* + * Read a register from device. + * + * ex: 0xab -- read content from register 0xab + */ +static ssize_t fsp_attr_set_getreg(struct psmouse *psmouse, void *data, + const char *buf, size_t count) +{ + struct fsp_data *pad = psmouse->private; + unsigned long reg; + int val; + + if (strict_strtoul(buf, 16, ®) || reg > 0xff) + return -EINVAL; + + if (fsp_reg_read(psmouse, reg, &val)) + return -EIO; + + pad->last_reg = reg; + pad->last_val = val; + + return count; +} + +PSMOUSE_DEFINE_ATTR(getreg, S_IWUSR | S_IRUGO, NULL, + fsp_attr_show_getreg, fsp_attr_set_getreg); + +static ssize_t fsp_attr_show_pagereg(struct psmouse *psmouse, + void *data, char *buf) +{ + int val = 0; + + if (fsp_page_reg_read(psmouse, &val)) + return -EIO; + + return sprintf(buf, "%02x\n", val); +} + +static ssize_t fsp_attr_set_pagereg(struct psmouse *psmouse, void *data, + const char *buf, size_t count) +{ + unsigned long val; + + if (strict_strtoul(buf, 16, &val) || val > 0xff) + return -EINVAL; + + if (fsp_page_reg_write(psmouse, val)) + return -EIO; + + return count; +} + +PSMOUSE_DEFINE_ATTR(page, S_IWUSR | S_IRUGO, NULL, + fsp_attr_show_pagereg, fsp_attr_set_pagereg); + +static ssize_t fsp_attr_show_vscroll(struct psmouse *psmouse, + void *data, char *buf) +{ + struct fsp_data *pad = psmouse->private; + + return sprintf(buf, "%d\n", pad->vscroll); +} + +static ssize_t fsp_attr_set_vscroll(struct psmouse *psmouse, void *data, + const char *buf, size_t count) +{ + unsigned long val; + + if (strict_strtoul(buf, 10, &val) || val > 1) + return -EINVAL; + + fsp_onpad_vscr(psmouse, val); + + return count; +} + +PSMOUSE_DEFINE_ATTR(vscroll, S_IWUSR | S_IRUGO, NULL, + fsp_attr_show_vscroll, fsp_attr_set_vscroll); + +static ssize_t fsp_attr_show_hscroll(struct psmouse *psmouse, + void *data, char *buf) +{ + struct fsp_data *pad = psmouse->private; + + return sprintf(buf, "%d\n", pad->hscroll); +} + +static ssize_t fsp_attr_set_hscroll(struct psmouse *psmouse, void *data, + const char *buf, size_t count) +{ + unsigned long val; + + if (strict_strtoul(buf, 10, &val) || val > 1) + return -EINVAL; + + fsp_onpad_hscr(psmouse, val); + + return count; +} + +PSMOUSE_DEFINE_ATTR(hscroll, S_IWUSR | S_IRUGO, NULL, + fsp_attr_show_hscroll, fsp_attr_set_hscroll); + +static ssize_t fsp_attr_show_flags(struct psmouse *psmouse, + void *data, char *buf) +{ + struct fsp_data *pad = psmouse->private; + + return sprintf(buf, "%c\n", + pad->flags & FSPDRV_FLAG_EN_OPC ? 'C' : 'c'); +} + +static ssize_t fsp_attr_set_flags(struct psmouse *psmouse, void *data, + const char *buf, size_t count) +{ + struct fsp_data *pad = psmouse->private; + size_t i; + + for (i = 0; i < count; i++) { + switch (buf[i]) { + case 'C': + pad->flags |= FSPDRV_FLAG_EN_OPC; + break; + case 'c': + pad->flags &= ~FSPDRV_FLAG_EN_OPC; + break; + default: + return -EINVAL; + } + } + return count; +} + +PSMOUSE_DEFINE_ATTR(flags, S_IWUSR | S_IRUGO, NULL, + fsp_attr_show_flags, fsp_attr_set_flags); + +static ssize_t fsp_attr_show_ver(struct psmouse *psmouse, + void *data, char *buf) +{ + return sprintf(buf, "Sentelic FSP kernel module %s\n", fsp_drv_ver); +} + +PSMOUSE_DEFINE_RO_ATTR(ver, S_IRUGO, NULL, fsp_attr_show_ver); + +static struct attribute *fsp_attributes[] = { + &psmouse_attr_setreg.dattr.attr, + &psmouse_attr_getreg.dattr.attr, + &psmouse_attr_page.dattr.attr, + &psmouse_attr_vscroll.dattr.attr, + &psmouse_attr_hscroll.dattr.attr, + &psmouse_attr_flags.dattr.attr, + &psmouse_attr_ver.dattr.attr, + NULL +}; + +static struct attribute_group fsp_attribute_group = { + .attrs = fsp_attributes, +}; + +#ifdef FSP_DEBUG +static void fsp_packet_debug(unsigned char packet[]) +{ + static unsigned int ps2_packet_cnt; + static unsigned int ps2_last_second; + unsigned int jiffies_msec; + + ps2_packet_cnt++; + jiffies_msec = jiffies_to_msecs(jiffies); + printk(KERN_DEBUG "%08dms PS/2 packets: %02x, %02x, %02x, %02x\n", + jiffies_msec, packet[0], packet[1], packet[2], packet[3]); + + if (jiffies_msec - ps2_last_second > 1000) { + printk(KERN_DEBUG "PS/2 packets/sec = %d\n", ps2_packet_cnt); + ps2_packet_cnt = 0; + ps2_last_second = jiffies_msec; + } +} +#else +static void fsp_packet_debug(unsigned char packet[]) +{ +} +#endif + +static psmouse_ret_t fsp_process_byte(struct psmouse *psmouse) +{ + struct input_dev *dev = psmouse->dev; + struct fsp_data *ad = psmouse->private; + unsigned char *packet = psmouse->packet; + unsigned char button_status = 0, lscroll = 0, rscroll = 0; + int rel_x, rel_y; + + if (psmouse->pktcnt < 4) + return PSMOUSE_GOOD_DATA; + + /* + * Full packet accumulated, process it + */ + + switch (psmouse->packet[0] >> FSP_PKT_TYPE_SHIFT) { + case FSP_PKT_TYPE_ABS: + dev_warn(&psmouse->ps2dev.serio->dev, + "Unexpected absolute mode packet, ignored.\n"); + break; + + case FSP_PKT_TYPE_NORMAL_OPC: + /* on-pad click, filter it if necessary */ + if ((ad->flags & FSPDRV_FLAG_EN_OPC) != FSPDRV_FLAG_EN_OPC) + packet[0] &= ~BIT(0); + /* fall through */ + + case FSP_PKT_TYPE_NORMAL: + /* normal packet */ + /* special packet data translation from on-pad packets */ + if (packet[3] != 0) { + if (packet[3] & BIT(0)) + button_status |= 0x01; /* wheel down */ + if (packet[3] & BIT(1)) + button_status |= 0x0f; /* wheel up */ + if (packet[3] & BIT(2)) + button_status |= BIT(5);/* horizontal left */ + if (packet[3] & BIT(3)) + button_status |= BIT(4);/* horizontal right */ + /* push back to packet queue */ + if (button_status != 0) + packet[3] = button_status; + rscroll = (packet[3] >> 4) & 1; + lscroll = (packet[3] >> 5) & 1; + } + /* + * Processing wheel up/down and extra button events + */ + input_report_rel(dev, REL_WHEEL, + (int)(packet[3] & 8) - (int)(packet[3] & 7)); + input_report_rel(dev, REL_HWHEEL, lscroll - rscroll); + input_report_key(dev, BTN_BACK, lscroll); + input_report_key(dev, BTN_FORWARD, rscroll); + + /* + * Standard PS/2 Mouse + */ + input_report_key(dev, BTN_LEFT, packet[0] & 1); + input_report_key(dev, BTN_MIDDLE, (packet[0] >> 2) & 1); + input_report_key(dev, BTN_RIGHT, (packet[0] >> 1) & 1); + + rel_x = packet[1] ? (int)packet[1] - (int)((packet[0] << 4) & 0x100) : 0; + rel_y = packet[2] ? (int)((packet[0] << 3) & 0x100) - (int)packet[2] : 0; + + input_report_rel(dev, REL_X, rel_x); + input_report_rel(dev, REL_Y, rel_y); + break; + } + + input_sync(dev); + + fsp_packet_debug(packet); + + return PSMOUSE_FULL_PACKET; +} + +static int fsp_activate_protocol(struct psmouse *psmouse) +{ + struct fsp_data *pad = psmouse->private; + struct ps2dev *ps2dev = &psmouse->ps2dev; + unsigned char param[2]; + int val; + + /* + * Standard procedure to enter FSP Intellimouse mode + * (scrolling wheel, 4th and 5th buttons) + */ + param[0] = 200; + ps2_command(ps2dev, param, PSMOUSE_CMD_SETRATE); + param[0] = 200; + ps2_command(ps2dev, param, PSMOUSE_CMD_SETRATE); + param[0] = 80; + ps2_command(ps2dev, param, PSMOUSE_CMD_SETRATE); + + ps2_command(ps2dev, param, PSMOUSE_CMD_GETID); + if (param[0] != 0x04) { + dev_err(&psmouse->ps2dev.serio->dev, + "Unable to enable 4 bytes packet format.\n"); + return -EIO; + } + + if (fsp_reg_read(psmouse, FSP_REG_SYSCTL5, &val)) { + dev_err(&psmouse->ps2dev.serio->dev, + "Unable to read SYSCTL5 register.\n"); + return -EIO; + } + + val &= ~(FSP_BIT_EN_MSID7 | FSP_BIT_EN_MSID8 | FSP_BIT_EN_AUTO_MSID8); + /* Ensure we are not in absolute mode */ + val &= ~FSP_BIT_EN_PKT_G0; + if (pad->buttons == 0x06) { + /* Left/Middle/Right & Scroll Up/Down/Right/Left */ + val |= FSP_BIT_EN_MSID6; + } + + if (fsp_reg_write(psmouse, FSP_REG_SYSCTL5, val)) { + dev_err(&psmouse->ps2dev.serio->dev, + "Unable to set up required mode bits.\n"); + return -EIO; + } + + /* + * Enable OPC tags such that driver can tell the difference between + * on-pad and real button click + */ + if (fsp_opc_tag_enable(psmouse, true)) + dev_warn(&psmouse->ps2dev.serio->dev, + "Failed to enable OPC tag mode.\n"); + + /* Enable on-pad vertical and horizontal scrolling */ + fsp_onpad_vscr(psmouse, true); + fsp_onpad_hscr(psmouse, true); + + return 0; +} + +int fsp_detect(struct psmouse *psmouse, int set_properties) +{ + int id; + + if (fsp_reg_read(psmouse, FSP_REG_DEVICE_ID, &id)) + return -EIO; + + if (id != 0x01) + return -ENODEV; + + if (set_properties) { + psmouse->vendor = "Sentelic"; + psmouse->name = "FingerSensingPad"; + } + + return 0; +} + +static void fsp_reset(struct psmouse *psmouse) +{ + fsp_opc_tag_enable(psmouse, false); + fsp_onpad_vscr(psmouse, false); + fsp_onpad_hscr(psmouse, false); +} + +static void fsp_disconnect(struct psmouse *psmouse) +{ + sysfs_remove_group(&psmouse->ps2dev.serio->dev.kobj, + &fsp_attribute_group); + + fsp_reset(psmouse); + kfree(psmouse->private); +} + +static int fsp_reconnect(struct psmouse *psmouse) +{ + int version; + + if (fsp_detect(psmouse, 0)) + return -ENODEV; + + if (fsp_get_version(psmouse, &version)) + return -ENODEV; + + if (fsp_activate_protocol(psmouse)) + return -EIO; + + return 0; +} + +int fsp_init(struct psmouse *psmouse) +{ + struct fsp_data *priv; + int ver, rev, buttons; + int error; + + if (fsp_get_version(psmouse, &ver) || + fsp_get_revision(psmouse, &rev) || + fsp_get_buttons(psmouse, &buttons)) { + return -ENODEV; + } + + printk(KERN_INFO + "Finger Sensing Pad, hw: %d.%d.%d, sw: %s, buttons: %d\n", + ver >> 4, ver & 0x0F, rev, fsp_drv_ver, buttons & 7); + + psmouse->private = priv = kzalloc(sizeof(struct fsp_data), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + priv->ver = ver; + priv->rev = rev; + priv->buttons = buttons; + + /* enable on-pad click by default */ + priv->flags |= FSPDRV_FLAG_EN_OPC; + + /* Set up various supported input event bits */ + __set_bit(BTN_BACK, psmouse->dev->keybit); + __set_bit(BTN_FORWARD, psmouse->dev->keybit); + __set_bit(REL_WHEEL, psmouse->dev->relbit); + __set_bit(REL_HWHEEL, psmouse->dev->relbit); + + psmouse->protocol_handler = fsp_process_byte; + psmouse->disconnect = fsp_disconnect; + psmouse->reconnect = fsp_reconnect; + psmouse->cleanup = fsp_reset; + psmouse->pktsize = 4; + + /* set default packet output based on number of buttons we found */ + error = fsp_activate_protocol(psmouse); + if (error) + goto err_out; + + error = sysfs_create_group(&psmouse->ps2dev.serio->dev.kobj, + &fsp_attribute_group); + if (error) { + dev_err(&psmouse->ps2dev.serio->dev, + "Failed to create sysfs attributes (%d)", error); + goto err_out; + } + + return 0; + + err_out: + kfree(psmouse->private); + psmouse->private = NULL; + return error; +} diff --git a/drivers/input/mouse/sentelic.h b/drivers/input/mouse/sentelic.h new file mode 100644 index 000000000000..083559c7282b --- /dev/null +++ b/drivers/input/mouse/sentelic.h @@ -0,0 +1,98 @@ +/*- + * Finger Sensing Pad PS/2 mouse driver. + * + * Copyright (C) 2005-2007 Asia Vital Components Co., Ltd. + * Copyright (C) 2005-2009 Tai-hwa Liang, Sentelic Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef __SENTELIC_H +#define __SENTELIC_H + +/* Finger-sensing Pad information registers */ +#define FSP_REG_DEVICE_ID 0x00 +#define FSP_REG_VERSION 0x01 +#define FSP_REG_REVISION 0x04 +#define FSP_REG_TMOD_STATUS1 0x0B +#define FSP_BIT_NO_ROTATION BIT(3) +#define FSP_REG_PAGE_CTRL 0x0F + +/* Finger-sensing Pad control registers */ +#define FSP_REG_SYSCTL1 0x10 +#define FSP_BIT_EN_REG_CLK BIT(5) +#define FSP_REG_OPC_QDOWN 0x31 +#define FSP_BIT_EN_OPC_TAG BIT(7) +#define FSP_REG_OPTZ_XLO 0x34 +#define FSP_REG_OPTZ_XHI 0x35 +#define FSP_REG_OPTZ_YLO 0x36 +#define FSP_REG_OPTZ_YHI 0x37 +#define FSP_REG_SYSCTL5 0x40 +#define FSP_BIT_90_DEGREE BIT(0) +#define FSP_BIT_EN_MSID6 BIT(1) +#define FSP_BIT_EN_MSID7 BIT(2) +#define FSP_BIT_EN_MSID8 BIT(3) +#define FSP_BIT_EN_AUTO_MSID8 BIT(5) +#define FSP_BIT_EN_PKT_G0 BIT(6) + +#define FSP_REG_ONPAD_CTL 0x43 +#define FSP_BIT_ONPAD_ENABLE BIT(0) +#define FSP_BIT_ONPAD_FBBB BIT(1) +#define FSP_BIT_FIX_VSCR BIT(3) +#define FSP_BIT_FIX_HSCR BIT(5) +#define FSP_BIT_DRAG_LOCK BIT(6) + +/* Finger-sensing Pad packet formating related definitions */ + +/* absolute packet type */ +#define FSP_PKT_TYPE_NORMAL (0x00) +#define FSP_PKT_TYPE_ABS (0x01) +#define FSP_PKT_TYPE_NOTIFY (0x02) +#define FSP_PKT_TYPE_NORMAL_OPC (0x03) +#define FSP_PKT_TYPE_SHIFT (6) + +#ifdef __KERNEL__ + +struct fsp_data { + unsigned char ver; /* hardware version */ + unsigned char rev; /* hardware revison */ + unsigned char buttons; /* Number of buttons */ + unsigned int flags; +#define FSPDRV_FLAG_EN_OPC (0x001) /* enable on-pad clicking */ + + bool vscroll; /* Vertical scroll zone enabled */ + bool hscroll; /* Horizontal scroll zone enabled */ + + unsigned char last_reg; /* Last register we requested read from */ + unsigned char last_val; +}; + +#ifdef CONFIG_MOUSE_PS2_SENTELIC +extern int fsp_detect(struct psmouse *psmouse, int set_properties); +extern int fsp_init(struct psmouse *psmouse); +#else +inline int fsp_detect(struct psmouse *psmouse, int set_properties) +{ + return -ENOSYS; +} +inline int fsp_init(struct psmouse *psmouse) +{ + return -ENOSYS; +} +#endif + +#endif /* __KERNEL__ */ + +#endif /* !__SENTELIC_H */ diff --git a/drivers/input/serio/libps2.c b/drivers/input/serio/libps2.c index be5bbbb8ae4e..3a95b508bf27 100644 --- a/drivers/input/serio/libps2.c +++ b/drivers/input/serio/libps2.c @@ -161,7 +161,7 @@ static int ps2_adjust_timeout(struct ps2dev *ps2dev, int command, int timeout) * ps2_command() can only be called from a process context */ -int ps2_command(struct ps2dev *ps2dev, unsigned char *param, int command) +int __ps2_command(struct ps2dev *ps2dev, unsigned char *param, int command) { int timeout; int send = (command >> 12) & 0xf; @@ -179,8 +179,6 @@ int ps2_command(struct ps2dev *ps2dev, unsigned char *param, int command) return -1; } - mutex_lock(&ps2dev->cmd_mutex); - serio_pause_rx(ps2dev->serio); ps2dev->flags = command == PS2_CMD_GETID ? PS2_FLAG_WAITID : 0; ps2dev->cmdcnt = receive; @@ -231,7 +229,18 @@ int ps2_command(struct ps2dev *ps2dev, unsigned char *param, int command) ps2dev->flags = 0; serio_continue_rx(ps2dev->serio); + return rc; +} +EXPORT_SYMBOL(__ps2_command); + +int ps2_command(struct ps2dev *ps2dev, unsigned char *param, int command) +{ + int rc; + + mutex_lock(&ps2dev->cmd_mutex); + rc = __ps2_command(ps2dev, param, command); mutex_unlock(&ps2dev->cmd_mutex); + return rc; } EXPORT_SYMBOL(ps2_command); diff --git a/include/linux/libps2.h b/include/linux/libps2.h index b94534b7e266..fcf5fbe6a50c 100644 --- a/include/linux/libps2.h +++ b/include/linux/libps2.h @@ -44,6 +44,7 @@ struct ps2dev { void ps2_init(struct ps2dev *ps2dev, struct serio *serio); int ps2_sendbyte(struct ps2dev *ps2dev, unsigned char byte, int timeout); void ps2_drain(struct ps2dev *ps2dev, int maxbytes, int timeout); +int __ps2_command(struct ps2dev *ps2dev, unsigned char *param, int command); int ps2_command(struct ps2dev *ps2dev, unsigned char *param, int command); int ps2_handle_ack(struct ps2dev *ps2dev, unsigned char data); int ps2_handle_response(struct ps2dev *ps2dev, unsigned char data); -- cgit v1.2.3 From 4dceef96756b667360741712a8e37490f8458516 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 20 Aug 2009 17:08:39 -0400 Subject: nfs: fix compile error in rpc_pipefs.h This include is needed for the definition of delayed_work. Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- include/linux/sunrpc/rpc_pipe_fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h index a92571a34556..cf14db975da0 100644 --- a/include/linux/sunrpc/rpc_pipe_fs.h +++ b/include/linux/sunrpc/rpc_pipe_fs.h @@ -3,6 +3,8 @@ #ifdef __KERNEL__ +#include + struct rpc_pipe_msg { struct list_head list; void *data; -- cgit v1.2.3 From 4516fc0454e7ffe2f369e80045b23c2b32155004 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 14 Aug 2009 12:57:54 -0400 Subject: sunrpc: add routine for comparing addresses lockd needs these sort of routines, as does the NFSv4 callback code. Move lockd's routines into common code and rename them so that they can be used by others. Signed-off-by: Jeff Layton Acked-by: Chuck Lever Signed-off-by: J. Bruce Fields --- fs/lockd/clntlock.c | 2 +- fs/lockd/host.c | 4 ++-- fs/lockd/mon.c | 2 +- fs/lockd/svcsubs.c | 2 +- include/linux/lockd/lockd.h | 43 ---------------------------------------- include/linux/sunrpc/clnt.h | 48 +++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 53 insertions(+), 48 deletions(-) (limited to 'include/linux') diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c index 1f3b0fc0d351..fc9032dc8862 100644 --- a/fs/lockd/clntlock.c +++ b/fs/lockd/clntlock.c @@ -166,7 +166,7 @@ __be32 nlmclnt_grant(const struct sockaddr *addr, const struct nlm_lock *lock) */ if (fl_blocked->fl_u.nfs_fl.owner->pid != lock->svid) continue; - if (!nlm_cmp_addr(nlm_addr(block->b_host), addr)) + if (!rpc_cmp_addr(nlm_addr(block->b_host), addr)) continue; if (nfs_compare_fh(NFS_FH(fl_blocked->fl_file->f_path.dentry->d_inode) ,fh) != 0) continue; diff --git a/fs/lockd/host.c b/fs/lockd/host.c index 7cb076ac6b45..4600c2037b8b 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -111,7 +111,7 @@ static struct nlm_host *nlm_lookup_host(struct nlm_lookup_host_info *ni) */ chain = &nlm_hosts[nlm_hash_address(ni->sap)]; hlist_for_each_entry(host, pos, chain, h_hash) { - if (!nlm_cmp_addr(nlm_addr(host), ni->sap)) + if (!rpc_cmp_addr(nlm_addr(host), ni->sap)) continue; /* See if we have an NSM handle for this client */ @@ -125,7 +125,7 @@ static struct nlm_host *nlm_lookup_host(struct nlm_lookup_host_info *ni) if (host->h_server != ni->server) continue; if (ni->server && - !nlm_cmp_addr(nlm_srcaddr(host), ni->src_sap)) + !rpc_cmp_addr(nlm_srcaddr(host), ni->src_sap)) continue; /* Move to head of hash chain. */ diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c index 30c933188dd7..f956651d0f65 100644 --- a/fs/lockd/mon.c +++ b/fs/lockd/mon.c @@ -209,7 +209,7 @@ static struct nsm_handle *nsm_lookup_addr(const struct sockaddr *sap) struct nsm_handle *nsm; list_for_each_entry(nsm, &nsm_handles, sm_link) - if (nlm_cmp_addr(nsm_addr(nsm), sap)) + if (rpc_cmp_addr(nsm_addr(nsm), sap)) return nsm; return NULL; } diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c index 9e4d6aab611b..ad478da7ca63 100644 --- a/fs/lockd/svcsubs.c +++ b/fs/lockd/svcsubs.c @@ -417,7 +417,7 @@ EXPORT_SYMBOL_GPL(nlmsvc_unlock_all_by_sb); static int nlmsvc_match_ip(void *datap, struct nlm_host *host) { - return nlm_cmp_addr(nlm_srcaddr(host), datap); + return rpc_cmp_addr(nlm_srcaddr(host), datap); } /** diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index c325b187966b..e7a251a988c0 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -338,49 +338,6 @@ static inline int nlm_privileged_requester(const struct svc_rqst *rqstp) } } -static inline int __nlm_cmp_addr4(const struct sockaddr *sap1, - const struct sockaddr *sap2) -{ - const struct sockaddr_in *sin1 = (const struct sockaddr_in *)sap1; - const struct sockaddr_in *sin2 = (const struct sockaddr_in *)sap2; - return sin1->sin_addr.s_addr == sin2->sin_addr.s_addr; -} - -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) -static inline int __nlm_cmp_addr6(const struct sockaddr *sap1, - const struct sockaddr *sap2) -{ - const struct sockaddr_in6 *sin1 = (const struct sockaddr_in6 *)sap1; - const struct sockaddr_in6 *sin2 = (const struct sockaddr_in6 *)sap2; - return ipv6_addr_equal(&sin1->sin6_addr, &sin2->sin6_addr); -} -#else /* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */ -static inline int __nlm_cmp_addr6(const struct sockaddr *sap1, - const struct sockaddr *sap2) -{ - return 0; -} -#endif /* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */ - -/* - * Compare two host addresses - * - * Return TRUE if the addresses are the same; otherwise FALSE. - */ -static inline int nlm_cmp_addr(const struct sockaddr *sap1, - const struct sockaddr *sap2) -{ - if (sap1->sa_family == sap2->sa_family) { - switch (sap1->sa_family) { - case AF_INET: - return __nlm_cmp_addr4(sap1, sap2); - case AF_INET6: - return __nlm_cmp_addr6(sap1, sap2); - } - } - return 0; -} - /* * Compare two NLM locks. * When the second lock is of type F_UNLCK, this acts like a wildcard. diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index ab3f6e90caa5..b17df361be82 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -22,6 +22,7 @@ #include #include #include +#include struct rpc_inode; @@ -188,5 +189,52 @@ static inline void rpc_set_port(struct sockaddr *sap, #define IPV6_SCOPE_DELIMITER '%' #define IPV6_SCOPE_ID_LEN sizeof("%nnnnnnnnnn") +static inline bool __rpc_cmp_addr4(const struct sockaddr *sap1, + const struct sockaddr *sap2) +{ + const struct sockaddr_in *sin1 = (const struct sockaddr_in *)sap1; + const struct sockaddr_in *sin2 = (const struct sockaddr_in *)sap2; + + return sin1->sin_addr.s_addr == sin2->sin_addr.s_addr; +} + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +static inline bool __rpc_cmp_addr6(const struct sockaddr *sap1, + const struct sockaddr *sap2) +{ + const struct sockaddr_in6 *sin1 = (const struct sockaddr_in6 *)sap1; + const struct sockaddr_in6 *sin2 = (const struct sockaddr_in6 *)sap2; + return ipv6_addr_equal(&sin1->sin6_addr, &sin2->sin6_addr); +} +#else /* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */ +static inline bool __rpc_cmp_addr6(const struct sockaddr *sap1, + const struct sockaddr *sap2) +{ + return false; +} +#endif /* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */ + +/** + * rpc_cmp_addr - compare the address portion of two sockaddrs. + * @sap1: first sockaddr + * @sap2: second sockaddr + * + * Just compares the family and address portion. Ignores port, scope, etc. + * Returns true if the addrs are equal, false if they aren't. + */ +static inline bool rpc_cmp_addr(const struct sockaddr *sap1, + const struct sockaddr *sap2) +{ + if (sap1->sa_family == sap2->sa_family) { + switch (sap1->sa_family) { + case AF_INET: + return __rpc_cmp_addr4(sap1, sap2); + case AF_INET6: + return __rpc_cmp_addr6(sap1, sap2); + } + } + return false; +} + #endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_CLNT_H */ -- cgit v1.2.3 From be3ad6b0b675fd1d6b48362ca30bdee75fbef6b4 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 14 Aug 2009 12:57:55 -0400 Subject: sunrpc: add common routine for copying address portion of a sockaddr Signed-off-by: Jeff Layton Acked-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/clnt.h | 50 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index b17df361be82..044f531aee70 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -198,6 +198,17 @@ static inline bool __rpc_cmp_addr4(const struct sockaddr *sap1, return sin1->sin_addr.s_addr == sin2->sin_addr.s_addr; } +static inline bool __rpc_copy_addr4(struct sockaddr *dst, + const struct sockaddr *src) +{ + const struct sockaddr_in *ssin = (struct sockaddr_in *) src; + struct sockaddr_in *dsin = (struct sockaddr_in *) dst; + + dsin->sin_family = ssin->sin_family; + dsin->sin_addr.s_addr = ssin->sin_addr.s_addr; + return true; +} + #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) static inline bool __rpc_cmp_addr6(const struct sockaddr *sap1, const struct sockaddr *sap2) @@ -206,12 +217,29 @@ static inline bool __rpc_cmp_addr6(const struct sockaddr *sap1, const struct sockaddr_in6 *sin2 = (const struct sockaddr_in6 *)sap2; return ipv6_addr_equal(&sin1->sin6_addr, &sin2->sin6_addr); } + +static inline bool __rpc_copy_addr6(struct sockaddr *dst, + const struct sockaddr *src) +{ + const struct sockaddr_in6 *ssin6 = (const struct sockaddr_in6 *) src; + struct sockaddr_in6 *dsin6 = (struct sockaddr_in6 *) dst; + + dsin6->sin6_family = ssin6->sin6_family; + ipv6_addr_copy(&dsin6->sin6_addr, &ssin6->sin6_addr); + return true; +} #else /* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */ static inline bool __rpc_cmp_addr6(const struct sockaddr *sap1, const struct sockaddr *sap2) { return false; } + +static inline bool __rpc_copy_addr6(struct sockaddr *dst, + const struct sockaddr *src) +{ + return false; +} #endif /* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */ /** @@ -236,5 +264,27 @@ static inline bool rpc_cmp_addr(const struct sockaddr *sap1, return false; } +/** + * rpc_copy_addr - copy the address portion of one sockaddr to another + * @dst: destination sockaddr + * @src: source sockaddr + * + * Just copies the address portion and family. Ignores port, scope, etc. + * Caller is responsible for making certain that dst is large enough to hold + * the address in src. Returns true if address family is supported. Returns + * false otherwise. + */ +static inline bool rpc_copy_addr(struct sockaddr *dst, + const struct sockaddr *src) +{ + switch (src->sa_family) { + case AF_INET: + return __rpc_copy_addr4(dst, src); + case AF_INET6: + return __rpc_copy_addr6(dst, src); + } + return false; +} + #endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_CLNT_H */ -- cgit v1.2.3 From 363168b4ea8ec26aeb982ac6024a09f907ecd27e Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 14 Aug 2009 12:57:56 -0400 Subject: nfsd: make nfs4_client->cl_addr a struct sockaddr_storage It's currently a __be32, which isn't big enough to hold an IPv6 address. Signed-off-by: Jeff Layton Acked-by: Chuck Lever Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 32 +++++++++++++++++++------------- include/linux/nfsd/state.h | 2 +- 2 files changed, 20 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 9295c4b56bce..bfc14d879ea1 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -55,6 +55,7 @@ #include #include #include +#include #define NFSDDBG_FACILITY NFSDDBG_PROC @@ -1220,13 +1221,15 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, int status; unsigned int strhashval; char dname[HEXDIR_LEN]; + char addr_str[INET6_ADDRSTRLEN]; nfs4_verifier verf = exid->verifier; - u32 ip_addr = svc_addr_in(rqstp)->sin_addr.s_addr; + struct sockaddr *sa = svc_addr(rqstp); + rpc_ntop(sa, addr_str, sizeof(addr_str)); dprintk("%s rqstp=%p exid=%p clname.len=%u clname.data=%p " - " ip_addr=%u flags %x, spa_how %d\n", + "ip_addr=%s flags %x, spa_how %d\n", __func__, rqstp, exid, exid->clname.len, exid->clname.data, - ip_addr, exid->flags, exid->spa_how); + addr_str, exid->flags, exid->spa_how); if (!check_name(exid->clname) || (exid->flags & ~EXCHGID4_FLAG_MASK_A)) return nfserr_inval; @@ -1315,7 +1318,7 @@ out_new: copy_verf(new, &verf); copy_cred(&new->cl_cred, &rqstp->rq_cred); - new->cl_addr = ip_addr; + rpc_copy_addr((struct sockaddr *) &new->cl_addr, sa); gen_clid(new); gen_confirm(new); add_to_unconfirmed(new, strhashval); @@ -1389,7 +1392,7 @@ nfsd4_create_session(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_create_session *cr_ses) { - u32 ip_addr = svc_addr_in(rqstp)->sin_addr.s_addr; + struct sockaddr *sa = svc_addr(rqstp); struct nfs4_client *conf, *unconf; struct nfsd4_clid_slot *cs_slot = NULL; int status = 0; @@ -1417,7 +1420,7 @@ nfsd4_create_session(struct svc_rqst *rqstp, cs_slot->sl_seqid++; } else if (unconf) { if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred) || - (ip_addr != unconf->cl_addr)) { + !rpc_cmp_addr(sa, (struct sockaddr *) &unconf->cl_addr)) { status = nfserr_clid_inuse; goto out; } @@ -1564,7 +1567,7 @@ __be32 nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_setclientid *setclid) { - struct sockaddr_in *sin = svc_addr_in(rqstp); + struct sockaddr *sa = svc_addr(rqstp); struct xdr_netobj clname = { .len = setclid->se_namelen, .data = setclid->se_name, @@ -1596,8 +1599,11 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, /* RFC 3530 14.2.33 CASE 0: */ status = nfserr_clid_inuse; if (!same_creds(&conf->cl_cred, &rqstp->rq_cred)) { - dprintk("NFSD: setclientid: string in use by client" - " at %pI4\n", &conf->cl_addr); + char addr_str[INET6_ADDRSTRLEN]; + rpc_ntop((struct sockaddr *) &conf->cl_addr, addr_str, + sizeof(addr_str)); + dprintk("NFSD: setclientid: string in use by client " + "at %s\n", addr_str); goto out; } } @@ -1659,7 +1665,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, gen_clid(new); } copy_verf(new, &clverifier); - new->cl_addr = sin->sin_addr.s_addr; + rpc_copy_addr((struct sockaddr *) &new->cl_addr, sa); new->cl_flavor = rqstp->rq_flavor; princ = svc_gss_principal(rqstp); if (princ) { @@ -1693,7 +1699,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_setclientid_confirm *setclientid_confirm) { - struct sockaddr_in *sin = svc_addr_in(rqstp); + struct sockaddr *sa = svc_addr(rqstp); struct nfs4_client *conf, *unconf; nfs4_verifier confirm = setclientid_confirm->sc_confirm; clientid_t * clid = &setclientid_confirm->sc_clientid; @@ -1712,9 +1718,9 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, unconf = find_unconfirmed_client(clid); status = nfserr_clid_inuse; - if (conf && conf->cl_addr != sin->sin_addr.s_addr) + if (conf && !rpc_cmp_addr((struct sockaddr *) &conf->cl_addr, sa)) goto out; - if (unconf && unconf->cl_addr != sin->sin_addr.s_addr) + if (unconf && !rpc_cmp_addr((struct sockaddr *) &unconf->cl_addr, sa)) goto out; /* diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h index 58bb19784e12..3510ddd4be49 100644 --- a/include/linux/nfsd/state.h +++ b/include/linux/nfsd/state.h @@ -200,7 +200,7 @@ struct nfs4_client { char cl_recdir[HEXDIR_LEN]; /* recovery dir */ nfs4_verifier cl_verifier; /* generated by client */ time_t cl_time; /* time of last lease renewal */ - __be32 cl_addr; /* client ipaddress */ + struct sockaddr_storage cl_addr; /* client ipaddress */ u32 cl_flavor; /* setclientid pseudoflavor */ char *cl_principal; /* setclientid principal name */ struct svc_cred cl_cred; /* setclientid principal */ -- cgit v1.2.3 From aa9a4ec7707a5391cde556f3fa1b0eb4bca3bcf6 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 14 Aug 2009 12:57:57 -0400 Subject: nfsd: convert nfs4_cb_conn struct to hold address in sockaddr_storage ...rather than as a separate address and port fields. This will be necessary for implementing callbacks over IPv6. Also, convert gen_callback to use the standard rpcuaddr2sockaddr routine rather than its own private one. Signed-off-by: Jeff Layton Acked-by: Chuck Lever Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4callback.c | 11 ++----- fs/nfsd/nfs4state.c | 81 ++++++---------------------------------------- include/linux/nfsd/state.h | 4 +-- 3 files changed, 13 insertions(+), 83 deletions(-) (limited to 'include/linux') diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 3fd23f7aceca..81d1c5285dcc 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -377,7 +377,6 @@ static int max_cb_time(void) int setup_callback_client(struct nfs4_client *clp) { - struct sockaddr_in addr; struct nfs4_cb_conn *cb = &clp->cl_cb_conn; struct rpc_timeout timeparms = { .to_initval = max_cb_time(), @@ -385,8 +384,8 @@ int setup_callback_client(struct nfs4_client *clp) }; struct rpc_create_args args = { .protocol = IPPROTO_TCP, - .address = (struct sockaddr *)&addr, - .addrsize = sizeof(addr), + .address = (struct sockaddr *) &cb->cb_addr, + .addrsize = cb->cb_addrlen, .timeout = &timeparms, .program = &cb_program, .prognumber = cb->cb_prog, @@ -400,12 +399,6 @@ int setup_callback_client(struct nfs4_client *clp) if (!clp->cl_principal && (clp->cl_flavor >= RPC_AUTH_GSS_KRB5)) return -EINVAL; - /* Initialize address */ - memset(&addr, 0, sizeof(addr)); - addr.sin_family = AF_INET; - addr.sin_port = htons(cb->cb_port); - addr.sin_addr.s_addr = htonl(cb->cb_addr); - /* Create RPC client */ client = rpc_create(&args); if (IS_ERR(client)) { diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index bfc14d879ea1..96a742308cee 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -897,76 +897,6 @@ find_unconfirmed_client_by_str(const char *dname, unsigned int hashval, return NULL; } -/* a helper function for parse_callback */ -static int -parse_octet(unsigned int *lenp, char **addrp) -{ - unsigned int len = *lenp; - char *p = *addrp; - int n = -1; - char c; - - for (;;) { - if (!len) - break; - len--; - c = *p++; - if (c == '.') - break; - if ((c < '0') || (c > '9')) { - n = -1; - break; - } - if (n < 0) - n = 0; - n = (n * 10) + (c - '0'); - if (n > 255) { - n = -1; - break; - } - } - *lenp = len; - *addrp = p; - return n; -} - -/* parse and set the setclientid ipv4 callback address */ -static int -parse_ipv4(unsigned int addr_len, char *addr_val, unsigned int *cbaddrp, unsigned short *cbportp) -{ - int temp = 0; - u32 cbaddr = 0; - u16 cbport = 0; - u32 addrlen = addr_len; - char *addr = addr_val; - int i, shift; - - /* ipaddress */ - shift = 24; - for(i = 4; i > 0 ; i--) { - if ((temp = parse_octet(&addrlen, &addr)) < 0) { - return 0; - } - cbaddr |= (temp << shift); - if (shift > 0) - shift -= 8; - } - *cbaddrp = cbaddr; - - /* port */ - shift = 8; - for(i = 2; i > 0 ; i--) { - if ((temp = parse_octet(&addrlen, &addr)) < 0) { - return 0; - } - cbport |= (temp << shift); - if (shift > 0) - shift -= 8; - } - *cbportp = cbport; - return 1; -} - static void gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se) { @@ -976,14 +906,21 @@ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se) if ((se->se_callback_netid_len != 3) || memcmp((char *)se->se_callback_netid_val, "tcp", 3)) goto out_err; - if ( !(parse_ipv4(se->se_callback_addr_len, se->se_callback_addr_val, - &cb->cb_addr, &cb->cb_port))) + cb->cb_addrlen = rpc_uaddr2sockaddr(se->se_callback_addr_val, + se->se_callback_addr_len, + (struct sockaddr *) &cb->cb_addr, + sizeof(cb->cb_addr)); + + if (!cb->cb_addrlen || cb->cb_addr.ss_family != AF_INET) goto out_err; + cb->cb_minorversion = 0; cb->cb_prog = se->se_callback_prog; cb->cb_ident = se->se_callback_ident; return; out_err: + cb->cb_addr.ss_family = AF_UNSPEC; + cb->cb_addrlen = 0; dprintk(KERN_INFO "NFSD: this client (clientid %08x/%08x) " "will not receive delegations\n", clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id); diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h index 3510ddd4be49..fb0c404c7c5c 100644 --- a/include/linux/nfsd/state.h +++ b/include/linux/nfsd/state.h @@ -81,8 +81,8 @@ struct nfs4_delegation { /* client delegation callback info */ struct nfs4_cb_conn { /* SETCLIENTID info */ - u32 cb_addr; - unsigned short cb_port; + struct sockaddr_storage cb_addr; + size_t cb_addrlen; u32 cb_prog; u32 cb_minorversion; u32 cb_ident; /* minorversion 0 only */ -- cgit v1.2.3 From fbf4665f41b02e757ab9d9198df65e319388e728 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 14 Aug 2009 12:57:59 -0400 Subject: nfsd: populate sin6_scope_id on callback address with scopeid from rq_addr on SETCLIENTID call When a SETCLIENTID call comes in, one of the args given is the svc_rqst. This struct contains an rq_addr field which holds the address that sent the call. If this is an IPv6 address, then we can use the sin6_scope_id field in this address to populate the sin6_scope_id field in the callback address. AFAICT, the rq_addr.sin6_scope_id is non-zero if and only if the client mounted the server's link-local address. Signed-off-by: Jeff Layton Acked-by: Chuck Lever Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 7 +++++-- include/linux/sunrpc/clnt.h | 15 +++++++++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 9ec0ca1ef4ea..d2a052480908 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -898,7 +898,7 @@ find_unconfirmed_client_by_str(const char *dname, unsigned int hashval, } static void -gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se) +gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, u32 scopeid) { struct nfs4_cb_conn *cb = &clp->cl_cb_conn; unsigned short expected_family; @@ -921,6 +921,9 @@ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se) if (!cb->cb_addrlen || cb->cb_addr.ss_family != expected_family) goto out_err; + if (cb->cb_addr.ss_family == AF_INET6) + ((struct sockaddr_in6 *) &cb->cb_addr)->sin6_scope_id = scopeid; + cb->cb_minorversion = 0; cb->cb_prog = se->se_callback_prog; cb->cb_ident = se->se_callback_ident; @@ -1621,7 +1624,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, } copy_cred(&new->cl_cred, &rqstp->rq_cred); gen_confirm(new); - gen_callback(new, setclid); + gen_callback(new, setclid, rpc_get_scope_id(sa)); add_to_unconfirmed(new, strhashval); setclid->se_clientid.cl_boot = new->cl_clientid.cl_boot; setclid->se_clientid.cl_id = new->cl_clientid.cl_id; diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 044f531aee70..3d025588e56e 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -286,5 +286,20 @@ static inline bool rpc_copy_addr(struct sockaddr *dst, return false; } +/** + * rpc_get_scope_id - return scopeid for a given sockaddr + * @sa: sockaddr to get scopeid from + * + * Returns the value of the sin6_scope_id for AF_INET6 addrs, or 0 if + * not an AF_INET6 address. + */ +static inline u32 rpc_get_scope_id(const struct sockaddr *sa) +{ + if (sa->sa_family != AF_INET6) + return 0; + + return ((struct sockaddr_in6 *) sa)->sin6_scope_id; +} + #endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_CLNT_H */ -- cgit v1.2.3 From da15cfdae03351c689736f8d142618592e3cebc3 Mon Sep 17 00:00:00 2001 From: john stultz Date: Wed, 19 Aug 2009 19:13:34 -0700 Subject: time: Introduce CLOCK_REALTIME_COARSE After talking with some application writers who want very fast, but not fine-grained timestamps, I decided to try to implement new clock_ids to clock_gettime(): CLOCK_REALTIME_COARSE and CLOCK_MONOTONIC_COARSE which returns the time at the last tick. This is very fast as we don't have to access any hardware (which can be very painful if you're using something like the acpi_pm clocksource), and we can even use the vdso clock_gettime() method to avoid the syscall. The only trade off is you only get low-res tick grained time resolution. This isn't a new idea, I know Ingo has a patch in the -rt tree that made the vsyscall gettimeofday() return coarse grained time when the vsyscall64 sysctrl was set to 2. However this affects all applications on a system. With this method, applications can choose the proper speed/granularity trade-off for themselves. Signed-off-by: John Stultz Cc: Andi Kleen Cc: nikolag@ca.ibm.com Cc: Darren Hart Cc: arjan@infradead.org Cc: jonathan@jonmasters.org LKML-Reference: <1250734414.6897.5.camel@localhost.localdomain> Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/vgtod.h | 1 + arch/x86/kernel/vsyscall_64.c | 1 + arch/x86/vdso/vclock_gettime.c | 39 ++++++++++++++++++++++++++++++++++++--- include/linux/time.h | 4 ++++ kernel/posix-timers.c | 35 +++++++++++++++++++++++++++++++++++ kernel/time/timekeeping.c | 21 +++++++++++++++++++++ 6 files changed, 98 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h index dc27a69e5d2a..3d61e204826f 100644 --- a/arch/x86/include/asm/vgtod.h +++ b/arch/x86/include/asm/vgtod.h @@ -21,6 +21,7 @@ struct vsyscall_gtod_data { u32 shift; } clock; struct timespec wall_to_monotonic; + struct timespec wall_time_coarse; }; extern struct vsyscall_gtod_data __vsyscall_gtod_data __section_vsyscall_gtod_data; diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 25ee06a80aad..cf53a78e2dcf 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -87,6 +87,7 @@ void update_vsyscall(struct timespec *wall_time, struct clocksource *clock) vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec; vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec; vsyscall_gtod_data.wall_to_monotonic = wall_to_monotonic; + vsyscall_gtod_data.wall_time_coarse = __current_kernel_time(); write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); } diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c index 6a40b78b46aa..ee55754cc3c5 100644 --- a/arch/x86/vdso/vclock_gettime.c +++ b/arch/x86/vdso/vclock_gettime.c @@ -86,14 +86,47 @@ notrace static noinline int do_monotonic(struct timespec *ts) return 0; } +notrace static noinline int do_realtime_coarse(struct timespec *ts) +{ + unsigned long seq; + do { + seq = read_seqbegin(>od->lock); + ts->tv_sec = gtod->wall_time_coarse.tv_sec; + ts->tv_nsec = gtod->wall_time_coarse.tv_nsec; + } while (unlikely(read_seqretry(>od->lock, seq))); + return 0; +} + +notrace static noinline int do_monotonic_coarse(struct timespec *ts) +{ + unsigned long seq, ns, secs; + do { + seq = read_seqbegin(>od->lock); + secs = gtod->wall_time_coarse.tv_sec; + ns = gtod->wall_time_coarse.tv_nsec; + secs += gtod->wall_to_monotonic.tv_sec; + ns += gtod->wall_to_monotonic.tv_nsec; + } while (unlikely(read_seqretry(>od->lock, seq))); + vset_normalized_timespec(ts, secs, ns); + return 0; +} + notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) { - if (likely(gtod->sysctl_enabled && gtod->clock.vread)) + if (likely(gtod->sysctl_enabled)) switch (clock) { case CLOCK_REALTIME: - return do_realtime(ts); + if (likely(gtod->clock.vread)) + return do_realtime(ts); + break; case CLOCK_MONOTONIC: - return do_monotonic(ts); + if (likely(gtod->clock.vread)) + return do_monotonic(ts); + break; + case CLOCK_REALTIME_COARSE: + return do_realtime_coarse(ts); + case CLOCK_MONOTONIC_COARSE: + return do_monotonic_coarse(ts); } return vdso_fallback_gettime(clock, ts); } diff --git a/include/linux/time.h b/include/linux/time.h index f505988398e6..256232f7e5e6 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -110,6 +110,8 @@ extern int timekeeping_suspended; unsigned long get_seconds(void); struct timespec current_kernel_time(void); +struct timespec __current_kernel_time(void); /* does not hold xtime_lock */ +struct timespec get_monotonic_coarse(void); #define CURRENT_TIME (current_kernel_time()) #define CURRENT_TIME_SEC ((struct timespec) { get_seconds(), 0 }) @@ -243,6 +245,8 @@ struct itimerval { #define CLOCK_PROCESS_CPUTIME_ID 2 #define CLOCK_THREAD_CPUTIME_ID 3 #define CLOCK_MONOTONIC_RAW 4 +#define CLOCK_REALTIME_COARSE 5 +#define CLOCK_MONOTONIC_COARSE 6 /* * The IDs of various hardware clocks: diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index d089d052c4a9..495440779ce3 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -242,6 +242,25 @@ static int posix_get_monotonic_raw(clockid_t which_clock, struct timespec *tp) return 0; } + +static int posix_get_realtime_coarse(clockid_t which_clock, struct timespec *tp) +{ + *tp = current_kernel_time(); + return 0; +} + +static int posix_get_monotonic_coarse(clockid_t which_clock, + struct timespec *tp) +{ + *tp = get_monotonic_coarse(); + return 0; +} + +int posix_get_coarse_res(const clockid_t which_clock, struct timespec *tp) +{ + *tp = ktime_to_timespec(KTIME_LOW_RES); + return 0; +} /* * Initialize everything, well, just everything in Posix clocks/timers ;) */ @@ -262,10 +281,26 @@ static __init int init_posix_timers(void) .timer_create = no_timer_create, .nsleep = no_nsleep, }; + struct k_clock clock_realtime_coarse = { + .clock_getres = posix_get_coarse_res, + .clock_get = posix_get_realtime_coarse, + .clock_set = do_posix_clock_nosettime, + .timer_create = no_timer_create, + .nsleep = no_nsleep, + }; + struct k_clock clock_monotonic_coarse = { + .clock_getres = posix_get_coarse_res, + .clock_get = posix_get_monotonic_coarse, + .clock_set = do_posix_clock_nosettime, + .timer_create = no_timer_create, + .nsleep = no_nsleep, + }; register_posix_clock(CLOCK_REALTIME, &clock_realtime); register_posix_clock(CLOCK_MONOTONIC, &clock_monotonic); register_posix_clock(CLOCK_MONOTONIC_RAW, &clock_monotonic_raw); + register_posix_clock(CLOCK_REALTIME_COARSE, &clock_realtime_coarse); + register_posix_clock(CLOCK_MONOTONIC_COARSE, &clock_monotonic_coarse); posix_timers_cache = kmem_cache_create("posix_timers_cache", sizeof (struct k_itimer), 0, SLAB_PANIC, diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 15e06defca55..03cbeb34d141 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -847,6 +847,10 @@ unsigned long get_seconds(void) } EXPORT_SYMBOL(get_seconds); +struct timespec __current_kernel_time(void) +{ + return xtime_cache; +} struct timespec current_kernel_time(void) { @@ -862,3 +866,20 @@ struct timespec current_kernel_time(void) return now; } EXPORT_SYMBOL(current_kernel_time); + +struct timespec get_monotonic_coarse(void) +{ + struct timespec now, mono; + unsigned long seq; + + do { + seq = read_seqbegin(&xtime_lock); + + now = xtime_cache; + mono = wall_to_monotonic; + } while (read_seqretry(&xtime_lock, seq)); + + set_normalized_timespec(&now, now.tv_sec + mono.tv_sec, + now.tv_nsec + mono.tv_nsec); + return now; +} -- cgit v1.2.3 From 8b5a10fc6fd02289ea03480f93382b1a99006142 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 19 Aug 2009 08:40:48 +0100 Subject: x86: properly annotate alternatives.c Some of the NOPs tables aren't used on 64-bits, quite some code and data is needed post-init for module loading only, and a couple of functions aren't used outside that file (i.e. can be static, and don't need to be exported). The change to __INITDATA/__INITRODATA is needed to avoid an assembler warning. Signed-off-by: Jan Beulich LKML-Reference: <4A8BC8A00200007800010823@vpn.id2.novell.com> Acked-by: Sam Ravnborg Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/alternative.h | 7 ----- arch/x86/kernel/alternative.c | 56 ++++++++++++++++++++++---------------- include/linux/init.h | 12 ++++++-- 3 files changed, 42 insertions(+), 33 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 1a37bcdc8606..c240efc74e00 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -73,8 +73,6 @@ static inline void alternatives_smp_module_del(struct module *mod) {} static inline void alternatives_smp_switch(int smp) {} #endif /* CONFIG_SMP */ -const unsigned char *const *find_nop_table(void); - /* alternative assembly primitive: */ #define ALTERNATIVE(oldinstr, newinstr, feature) \ \ @@ -144,8 +142,6 @@ static inline void apply_paravirt(struct paravirt_patch_site *start, #define __parainstructions_end NULL #endif -extern void add_nops(void *insns, unsigned int len); - /* * Clear and restore the kernel write-protection flag on the local CPU. * Allows the kernel to edit read-only pages. @@ -161,10 +157,7 @@ extern void add_nops(void *insns, unsigned int len); * Intel's errata. * On the local CPU you need to be protected again NMI or MCE handlers seeing an * inconsistent instruction while you patch. - * The _early version expects the memory to already be RW. */ - extern void *text_poke(void *addr, const void *opcode, size_t len); -extern void *text_poke_early(void *addr, const void *opcode, size_t len); #endif /* _ASM_X86_ALTERNATIVE_H */ diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index f57658702571..486935143e02 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -32,7 +33,7 @@ __setup("smp-alt-boot", bootonly); #define smp_alt_once 1 #endif -static int debug_alternative; +static int __initdata_or_module debug_alternative; static int __init debug_alt(char *str) { @@ -51,7 +52,7 @@ static int __init setup_noreplace_smp(char *str) __setup("noreplace-smp", setup_noreplace_smp); #ifdef CONFIG_PARAVIRT -static int noreplace_paravirt = 0; +static int __initdata_or_module noreplace_paravirt = 0; static int __init setup_noreplace_paravirt(char *str) { @@ -64,16 +65,17 @@ __setup("noreplace-paravirt", setup_noreplace_paravirt); #define DPRINTK(fmt, args...) if (debug_alternative) \ printk(KERN_DEBUG fmt, args) -#ifdef GENERIC_NOP1 +#if defined(GENERIC_NOP1) && !defined(CONFIG_X86_64) /* Use inline assembly to define this because the nops are defined as inline assembly strings in the include files and we cannot get them easily into strings. */ -asm("\t.section .rodata, \"a\"\nintelnops: " +asm("\t" __stringify(__INITRODATA_OR_MODULE) "\nintelnops: " GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6 GENERIC_NOP7 GENERIC_NOP8 "\t.previous"); extern const unsigned char intelnops[]; -static const unsigned char *const intel_nops[ASM_NOP_MAX+1] = { +static const unsigned char *const __initconst_or_module +intel_nops[ASM_NOP_MAX+1] = { NULL, intelnops, intelnops + 1, @@ -87,12 +89,13 @@ static const unsigned char *const intel_nops[ASM_NOP_MAX+1] = { #endif #ifdef K8_NOP1 -asm("\t.section .rodata, \"a\"\nk8nops: " +asm("\t" __stringify(__INITRODATA_OR_MODULE) "\nk8nops: " K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6 K8_NOP7 K8_NOP8 "\t.previous"); extern const unsigned char k8nops[]; -static const unsigned char *const k8_nops[ASM_NOP_MAX+1] = { +static const unsigned char *const __initconst_or_module +k8_nops[ASM_NOP_MAX+1] = { NULL, k8nops, k8nops + 1, @@ -105,13 +108,14 @@ static const unsigned char *const k8_nops[ASM_NOP_MAX+1] = { }; #endif -#ifdef K7_NOP1 -asm("\t.section .rodata, \"a\"\nk7nops: " +#if defined(K7_NOP1) && !defined(CONFIG_X86_64) +asm("\t" __stringify(__INITRODATA_OR_MODULE) "\nk7nops: " K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6 K7_NOP7 K7_NOP8 "\t.previous"); extern const unsigned char k7nops[]; -static const unsigned char *const k7_nops[ASM_NOP_MAX+1] = { +static const unsigned char *const __initconst_or_module +k7_nops[ASM_NOP_MAX+1] = { NULL, k7nops, k7nops + 1, @@ -125,12 +129,13 @@ static const unsigned char *const k7_nops[ASM_NOP_MAX+1] = { #endif #ifdef P6_NOP1 -asm("\t.section .rodata, \"a\"\np6nops: " +asm("\t" __stringify(__INITRODATA_OR_MODULE) "\np6nops: " P6_NOP1 P6_NOP2 P6_NOP3 P6_NOP4 P6_NOP5 P6_NOP6 P6_NOP7 P6_NOP8 "\t.previous"); extern const unsigned char p6nops[]; -static const unsigned char *const p6_nops[ASM_NOP_MAX+1] = { +static const unsigned char *const __initconst_or_module +p6_nops[ASM_NOP_MAX+1] = { NULL, p6nops, p6nops + 1, @@ -146,7 +151,7 @@ static const unsigned char *const p6_nops[ASM_NOP_MAX+1] = { #ifdef CONFIG_X86_64 extern char __vsyscall_0; -const unsigned char *const *find_nop_table(void) +static const unsigned char *const *__init_or_module find_nop_table(void) { if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && boot_cpu_has(X86_FEATURE_NOPL)) @@ -157,7 +162,7 @@ const unsigned char *const *find_nop_table(void) #else /* CONFIG_X86_64 */ -const unsigned char *const *find_nop_table(void) +static const unsigned char *const *__init_or_module find_nop_table(void) { if (boot_cpu_has(X86_FEATURE_K8)) return k8_nops; @@ -172,7 +177,7 @@ const unsigned char *const *find_nop_table(void) #endif /* CONFIG_X86_64 */ /* Use this to add nops to a buffer, then text_poke the whole buffer. */ -void add_nops(void *insns, unsigned int len) +static void __init_or_module add_nops(void *insns, unsigned int len) { const unsigned char *const *noptable = find_nop_table(); @@ -185,10 +190,10 @@ void add_nops(void *insns, unsigned int len) len -= noplen; } } -EXPORT_SYMBOL_GPL(add_nops); extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; extern u8 *__smp_locks[], *__smp_locks_end[]; +static void *text_poke_early(void *addr, const void *opcode, size_t len); /* Replace instructions with better alternatives for this CPU type. This runs before SMP is initialized to avoid SMP problems with @@ -196,7 +201,8 @@ extern u8 *__smp_locks[], *__smp_locks_end[]; APs have less capabilities than the boot processor are not handled. Tough. Make sure you disable such features by hand. */ -void apply_alternatives(struct alt_instr *start, struct alt_instr *end) +void __init_or_module apply_alternatives(struct alt_instr *start, + struct alt_instr *end) { struct alt_instr *a; char insnbuf[MAX_PATCH_LEN]; @@ -279,9 +285,10 @@ static LIST_HEAD(smp_alt_modules); static DEFINE_MUTEX(smp_alt); static int smp_mode = 1; /* protected by smp_alt */ -void alternatives_smp_module_add(struct module *mod, char *name, - void *locks, void *locks_end, - void *text, void *text_end) +void __init_or_module alternatives_smp_module_add(struct module *mod, + char *name, + void *locks, void *locks_end, + void *text, void *text_end) { struct smp_alt_module *smp; @@ -317,7 +324,7 @@ void alternatives_smp_module_add(struct module *mod, char *name, mutex_unlock(&smp_alt); } -void alternatives_smp_module_del(struct module *mod) +void __init_or_module alternatives_smp_module_del(struct module *mod) { struct smp_alt_module *item; @@ -386,8 +393,8 @@ void alternatives_smp_switch(int smp) #endif #ifdef CONFIG_PARAVIRT -void apply_paravirt(struct paravirt_patch_site *start, - struct paravirt_patch_site *end) +void __init_or_module apply_paravirt(struct paravirt_patch_site *start, + struct paravirt_patch_site *end) { struct paravirt_patch_site *p; char insnbuf[MAX_PATCH_LEN]; @@ -485,7 +492,8 @@ void __init alternative_instructions(void) * instructions. And on the local CPU you need to be protected again NMI or MCE * handlers seeing an inconsistent instruction while you patch. */ -void *text_poke_early(void *addr, const void *opcode, size_t len) +static void *__init_or_module text_poke_early(void *addr, const void *opcode, + size_t len) { unsigned long flags; local_irq_save(flags); diff --git a/include/linux/init.h b/include/linux/init.h index 13b633ed695e..400adbb45414 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -103,8 +103,8 @@ #define __INIT .section ".init.text","ax" #define __FINIT .previous -#define __INITDATA .section ".init.data","aw" -#define __INITRODATA .section ".init.rodata","a" +#define __INITDATA .section ".init.data","aw",%progbits +#define __INITRODATA .section ".init.rodata","a",%progbits #define __FINITDATA .previous #define __DEVINIT .section ".devinit.text", "ax" @@ -305,9 +305,17 @@ void __init parse_early_options(char *cmdline); #ifdef CONFIG_MODULES #define __init_or_module #define __initdata_or_module +#define __initconst_or_module +#define __INIT_OR_MODULE .text +#define __INITDATA_OR_MODULE .data +#define __INITRODATA_OR_MODULE .section ".rodata","a",%progbits #else #define __init_or_module __init #define __initdata_or_module __initdata +#define __initconst_or_module __initconst +#define __INIT_OR_MODULE __INIT +#define __INITDATA_OR_MODULE __INITDATA +#define __INITRODATA_OR_MODULE __INITRODATA #endif /*CONFIG_MODULES*/ /* Functions marked as __devexit may be discarded at kernel link time, depending -- cgit v1.2.3 From b560d8ad8583803978aaaeba50ef29dc8e97a610 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 21 Aug 2009 22:08:51 -0700 Subject: rcu: Expunge lingering references to CONFIG_CLASSIC_RCU, optimize on !SMP A couple of references to CONFIG_CLASSIC_RCU have survived. Although these are harmless, it is past time for them to go. The one in hardirq.h is strictly a readability problem. The two in pagemap.h appear to disable a !SMP performance optimization (which this patch re-enables). This does raise the issue as to whether pagemap.h should really be referring to the CPU implementation. Long term, I intend to make the RCU implementation driven by CONFIG_PREEMPT, at which point these should change from defined(CONFIG_TREE_RCU) to !defined(CONFIG_PREEMPT). In the meantime, is there something else that could be done in pagemap.h? Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: akpm@linux-foundation.org Cc: mathieu.desnoyers@polymtl.ca Cc: josht@linux.vnet.ibm.com Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org LKML-Reference: <20090822050851.GA8414@linux.vnet.ibm.com> Signed-off-by: Ingo Molnar --- include/linux/hardirq.h | 4 ++-- include/linux/pagemap.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 8246c697863d..330cb31bb496 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -132,7 +132,7 @@ static inline void account_system_vtime(struct task_struct *tsk) } #endif -#if defined(CONFIG_NO_HZ) && !defined(CONFIG_CLASSIC_RCU) +#if defined(CONFIG_NO_HZ) extern void rcu_irq_enter(void); extern void rcu_irq_exit(void); extern void rcu_nmi_enter(void); @@ -142,7 +142,7 @@ extern void rcu_nmi_exit(void); # define rcu_irq_exit() do { } while (0) # define rcu_nmi_enter() do { } while (0) # define rcu_nmi_exit() do { } while (0) -#endif /* #if defined(CONFIG_NO_HZ) && !defined(CONFIG_CLASSIC_RCU) */ +#endif /* #if defined(CONFIG_NO_HZ) */ /* * It is safe to do non-atomic ops on ->hardirq_context, diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index aec3252afcf5..ed5d7501e181 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -132,7 +132,7 @@ static inline int page_cache_get_speculative(struct page *page) { VM_BUG_ON(in_interrupt()); -#if !defined(CONFIG_SMP) && defined(CONFIG_CLASSIC_RCU) +#if !defined(CONFIG_SMP) && defined(CONFIG_TREE_RCU) # ifdef CONFIG_PREEMPT VM_BUG_ON(!in_atomic()); # endif @@ -170,7 +170,7 @@ static inline int page_cache_add_speculative(struct page *page, int count) { VM_BUG_ON(in_interrupt()); -#if !defined(CONFIG_SMP) && defined(CONFIG_CLASSIC_RCU) +#if !defined(CONFIG_SMP) && defined(CONFIG_TREE_RCU) # ifdef CONFIG_PREEMPT VM_BUG_ON(!in_atomic()); # endif -- cgit v1.2.3 From 5e928f77a09a07f9dd595bb8a489965d69a83458 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 18 Aug 2009 23:38:32 +0200 Subject: PM: Introduce core framework for run-time PM of I/O devices (rev. 17) Introduce a core framework for run-time power management of I/O devices. Add device run-time PM fields to 'struct dev_pm_info' and device run-time PM callbacks to 'struct dev_pm_ops'. Introduce a run-time PM workqueue and define some device run-time PM helper functions at the core level. Document all these things. Special thanks to Alan Stern for his help with the design and multiple detailed reviews of the pereceding versions of this patch and to Magnus Damm for testing feedback. Signed-off-by: Rafael J. Wysocki Acked-by: Magnus Damm --- Documentation/power/runtime_pm.txt | 378 ++++++++++++++ drivers/base/dd.c | 11 + drivers/base/power/Makefile | 1 + drivers/base/power/main.c | 22 +- drivers/base/power/power.h | 31 +- drivers/base/power/runtime.c | 1011 ++++++++++++++++++++++++++++++++++++ include/linux/pm.h | 101 +++- include/linux/pm_runtime.h | 114 ++++ kernel/power/Kconfig | 14 + kernel/power/main.c | 17 + 10 files changed, 1689 insertions(+), 11 deletions(-) create mode 100644 Documentation/power/runtime_pm.txt create mode 100644 drivers/base/power/runtime.c create mode 100644 include/linux/pm_runtime.h (limited to 'include/linux') diff --git a/Documentation/power/runtime_pm.txt b/Documentation/power/runtime_pm.txt new file mode 100644 index 000000000000..f49a33b704d2 --- /dev/null +++ b/Documentation/power/runtime_pm.txt @@ -0,0 +1,378 @@ +Run-time Power Management Framework for I/O Devices + +(C) 2009 Rafael J. Wysocki , Novell Inc. + +1. Introduction + +Support for run-time power management (run-time PM) of I/O devices is provided +at the power management core (PM core) level by means of: + +* The power management workqueue pm_wq in which bus types and device drivers can + put their PM-related work items. It is strongly recommended that pm_wq be + used for queuing all work items related to run-time PM, because this allows + them to be synchronized with system-wide power transitions (suspend to RAM, + hibernation and resume from system sleep states). pm_wq is declared in + include/linux/pm_runtime.h and defined in kernel/power/main.c. + +* A number of run-time PM fields in the 'power' member of 'struct device' (which + is of the type 'struct dev_pm_info', defined in include/linux/pm.h) that can + be used for synchronizing run-time PM operations with one another. + +* Three device run-time PM callbacks in 'struct dev_pm_ops' (defined in + include/linux/pm.h). + +* A set of helper functions defined in drivers/base/power/runtime.c that can be + used for carrying out run-time PM operations in such a way that the + synchronization between them is taken care of by the PM core. Bus types and + device drivers are encouraged to use these functions. + +The run-time PM callbacks present in 'struct dev_pm_ops', the device run-time PM +fields of 'struct dev_pm_info' and the core helper functions provided for +run-time PM are described below. + +2. Device Run-time PM Callbacks + +There are three device run-time PM callbacks defined in 'struct dev_pm_ops': + +struct dev_pm_ops { + ... + int (*runtime_suspend)(struct device *dev); + int (*runtime_resume)(struct device *dev); + void (*runtime_idle)(struct device *dev); + ... +}; + +The ->runtime_suspend() callback is executed by the PM core for the bus type of +the device being suspended. The bus type's callback is then _entirely_ +_responsible_ for handling the device as appropriate, which may, but need not +include executing the device driver's own ->runtime_suspend() callback (from the +PM core's point of view it is not necessary to implement a ->runtime_suspend() +callback in a device driver as long as the bus type's ->runtime_suspend() knows +what to do to handle the device). + + * Once the bus type's ->runtime_suspend() callback has completed successfully + for given device, the PM core regards the device as suspended, which need + not mean that the device has been put into a low power state. It is + supposed to mean, however, that the device will not process data and will + not communicate with the CPU(s) and RAM until its bus type's + ->runtime_resume() callback is executed for it. The run-time PM status of + a device after successful execution of its bus type's ->runtime_suspend() + callback is 'suspended'. + + * If the bus type's ->runtime_suspend() callback returns -EBUSY or -EAGAIN, + the device's run-time PM status is supposed to be 'active', which means that + the device _must_ be fully operational afterwards. + + * If the bus type's ->runtime_suspend() callback returns an error code + different from -EBUSY or -EAGAIN, the PM core regards this as a fatal + error and will refuse to run the helper functions described in Section 4 + for the device, until the status of it is directly set either to 'active' + or to 'suspended' (the PM core provides special helper functions for this + purpose). + +In particular, if the driver requires remote wakeup capability for proper +functioning and device_may_wakeup() returns 'false' for the device, then +->runtime_suspend() should return -EBUSY. On the other hand, if +device_may_wakeup() returns 'true' for the device and the device is put +into a low power state during the execution of its bus type's +->runtime_suspend(), it is expected that remote wake-up (i.e. hardware mechanism +allowing the device to request a change of its power state, such as PCI PME) +will be enabled for the device. Generally, remote wake-up should be enabled +for all input devices put into a low power state at run time. + +The ->runtime_resume() callback is executed by the PM core for the bus type of +the device being woken up. The bus type's callback is then _entirely_ +_responsible_ for handling the device as appropriate, which may, but need not +include executing the device driver's own ->runtime_resume() callback (from the +PM core's point of view it is not necessary to implement a ->runtime_resume() +callback in a device driver as long as the bus type's ->runtime_resume() knows +what to do to handle the device). + + * Once the bus type's ->runtime_resume() callback has completed successfully, + the PM core regards the device as fully operational, which means that the + device _must_ be able to complete I/O operations as needed. The run-time + PM status of the device is then 'active'. + + * If the bus type's ->runtime_resume() callback returns an error code, the PM + core regards this as a fatal error and will refuse to run the helper + functions described in Section 4 for the device, until its status is + directly set either to 'active' or to 'suspended' (the PM core provides + special helper functions for this purpose). + +The ->runtime_idle() callback is executed by the PM core for the bus type of +given device whenever the device appears to be idle, which is indicated to the +PM core by two counters, the device's usage counter and the counter of 'active' +children of the device. + + * If any of these counters is decreased using a helper function provided by + the PM core and it turns out to be equal to zero, the other counter is + checked. If that counter also is equal to zero, the PM core executes the + device bus type's ->runtime_idle() callback (with the device as an + argument). + +The action performed by a bus type's ->runtime_idle() callback is totally +dependent on the bus type in question, but the expected and recommended action +is to check if the device can be suspended (i.e. if all of the conditions +necessary for suspending the device are satisfied) and to queue up a suspend +request for the device in that case. + +The helper functions provided by the PM core, described in Section 4, guarantee +that the following constraints are met with respect to the bus type's run-time +PM callbacks: + +(1) The callbacks are mutually exclusive (e.g. it is forbidden to execute + ->runtime_suspend() in parallel with ->runtime_resume() or with another + instance of ->runtime_suspend() for the same device) with the exception that + ->runtime_suspend() or ->runtime_resume() can be executed in parallel with + ->runtime_idle() (although ->runtime_idle() will not be started while any + of the other callbacks is being executed for the same device). + +(2) ->runtime_idle() and ->runtime_suspend() can only be executed for 'active' + devices (i.e. the PM core will only execute ->runtime_idle() or + ->runtime_suspend() for the devices the run-time PM status of which is + 'active'). + +(3) ->runtime_idle() and ->runtime_suspend() can only be executed for a device + the usage counter of which is equal to zero _and_ either the counter of + 'active' children of which is equal to zero, or the 'power.ignore_children' + flag of which is set. + +(4) ->runtime_resume() can only be executed for 'suspended' devices (i.e. the + PM core will only execute ->runtime_resume() for the devices the run-time + PM status of which is 'suspended'). + +Additionally, the helper functions provided by the PM core obey the following +rules: + + * If ->runtime_suspend() is about to be executed or there's a pending request + to execute it, ->runtime_idle() will not be executed for the same device. + + * A request to execute or to schedule the execution of ->runtime_suspend() + will cancel any pending requests to execute ->runtime_idle() for the same + device. + + * If ->runtime_resume() is about to be executed or there's a pending request + to execute it, the other callbacks will not be executed for the same device. + + * A request to execute ->runtime_resume() will cancel any pending or + scheduled requests to execute the other callbacks for the same device. + +3. Run-time PM Device Fields + +The following device run-time PM fields are present in 'struct dev_pm_info', as +defined in include/linux/pm.h: + + struct timer_list suspend_timer; + - timer used for scheduling (delayed) suspend request + + unsigned long timer_expires; + - timer expiration time, in jiffies (if this is different from zero, the + timer is running and will expire at that time, otherwise the timer is not + running) + + struct work_struct work; + - work structure used for queuing up requests (i.e. work items in pm_wq) + + wait_queue_head_t wait_queue; + - wait queue used if any of the helper functions needs to wait for another + one to complete + + spinlock_t lock; + - lock used for synchronisation + + atomic_t usage_count; + - the usage counter of the device + + atomic_t child_count; + - the count of 'active' children of the device + + unsigned int ignore_children; + - if set, the value of child_count is ignored (but still updated) + + unsigned int disable_depth; + - used for disabling the helper funcions (they work normally if this is + equal to zero); the initial value of it is 1 (i.e. run-time PM is + initially disabled for all devices) + + unsigned int runtime_error; + - if set, there was a fatal error (one of the callbacks returned error code + as described in Section 2), so the helper funtions will not work until + this flag is cleared; this is the error code returned by the failing + callback + + unsigned int idle_notification; + - if set, ->runtime_idle() is being executed + + unsigned int request_pending; + - if set, there's a pending request (i.e. a work item queued up into pm_wq) + + enum rpm_request request; + - type of request that's pending (valid if request_pending is set) + + unsigned int deferred_resume; + - set if ->runtime_resume() is about to be run while ->runtime_suspend() is + being executed for that device and it is not practical to wait for the + suspend to complete; means "start a resume as soon as you've suspended" + + enum rpm_status runtime_status; + - the run-time PM status of the device; this field's initial value is + RPM_SUSPENDED, which means that each device is initially regarded by the + PM core as 'suspended', regardless of its real hardware status + +All of the above fields are members of the 'power' member of 'struct device'. + +4. Run-time PM Device Helper Functions + +The following run-time PM helper functions are defined in +drivers/base/power/runtime.c and include/linux/pm_runtime.h: + + void pm_runtime_init(struct device *dev); + - initialize the device run-time PM fields in 'struct dev_pm_info' + + void pm_runtime_remove(struct device *dev); + - make sure that the run-time PM of the device will be disabled after + removing the device from device hierarchy + + int pm_runtime_idle(struct device *dev); + - execute ->runtime_idle() for the device's bus type; returns 0 on success + or error code on failure, where -EINPROGRESS means that ->runtime_idle() + is already being executed + + int pm_runtime_suspend(struct device *dev); + - execute ->runtime_suspend() for the device's bus type; returns 0 on + success, 1 if the device's run-time PM status was already 'suspended', or + error code on failure, where -EAGAIN or -EBUSY means it is safe to attempt + to suspend the device again in future + + int pm_runtime_resume(struct device *dev); + - execute ->runtime_resume() for the device's bus type; returns 0 on + success, 1 if the device's run-time PM status was already 'active' or + error code on failure, where -EAGAIN means it may be safe to attempt to + resume the device again in future, but 'power.runtime_error' should be + checked additionally + + int pm_request_idle(struct device *dev); + - submit a request to execute ->runtime_idle() for the device's bus type + (the request is represented by a work item in pm_wq); returns 0 on success + or error code if the request has not been queued up + + int pm_schedule_suspend(struct device *dev, unsigned int delay); + - schedule the execution of ->runtime_suspend() for the device's bus type + in future, where 'delay' is the time to wait before queuing up a suspend + work item in pm_wq, in milliseconds (if 'delay' is zero, the work item is + queued up immediately); returns 0 on success, 1 if the device's PM + run-time status was already 'suspended', or error code if the request + hasn't been scheduled (or queued up if 'delay' is 0); if the execution of + ->runtime_suspend() is already scheduled and not yet expired, the new + value of 'delay' will be used as the time to wait + + int pm_request_resume(struct device *dev); + - submit a request to execute ->runtime_resume() for the device's bus type + (the request is represented by a work item in pm_wq); returns 0 on + success, 1 if the device's run-time PM status was already 'active', or + error code if the request hasn't been queued up + + void pm_runtime_get_noresume(struct device *dev); + - increment the device's usage counter + + int pm_runtime_get(struct device *dev); + - increment the device's usage counter, run pm_request_resume(dev) and + return its result + + int pm_runtime_get_sync(struct device *dev); + - increment the device's usage counter, run pm_runtime_resume(dev) and + return its result + + void pm_runtime_put_noidle(struct device *dev); + - decrement the device's usage counter + + int pm_runtime_put(struct device *dev); + - decrement the device's usage counter, run pm_request_idle(dev) and return + its result + + int pm_runtime_put_sync(struct device *dev); + - decrement the device's usage counter, run pm_runtime_idle(dev) and return + its result + + void pm_runtime_enable(struct device *dev); + - enable the run-time PM helper functions to run the device bus type's + run-time PM callbacks described in Section 2 + + int pm_runtime_disable(struct device *dev); + - prevent the run-time PM helper functions from running the device bus + type's run-time PM callbacks, make sure that all of the pending run-time + PM operations on the device are either completed or canceled; returns + 1 if there was a resume request pending and it was necessary to execute + ->runtime_resume() for the device's bus type to satisfy that request, + otherwise 0 is returned + + void pm_suspend_ignore_children(struct device *dev, bool enable); + - set/unset the power.ignore_children flag of the device + + int pm_runtime_set_active(struct device *dev); + - clear the device's 'power.runtime_error' flag, set the device's run-time + PM status to 'active' and update its parent's counter of 'active' + children as appropriate (it is only valid to use this function if + 'power.runtime_error' is set or 'power.disable_depth' is greater than + zero); it will fail and return error code if the device has a parent + which is not active and the 'power.ignore_children' flag of which is unset + + void pm_runtime_set_suspended(struct device *dev); + - clear the device's 'power.runtime_error' flag, set the device's run-time + PM status to 'suspended' and update its parent's counter of 'active' + children as appropriate (it is only valid to use this function if + 'power.runtime_error' is set or 'power.disable_depth' is greater than + zero) + +It is safe to execute the following helper functions from interrupt context: + +pm_request_idle() +pm_schedule_suspend() +pm_request_resume() +pm_runtime_get_noresume() +pm_runtime_get() +pm_runtime_put_noidle() +pm_runtime_put() +pm_suspend_ignore_children() +pm_runtime_set_active() +pm_runtime_set_suspended() +pm_runtime_enable() + +5. Run-time PM Initialization, Device Probing and Removal + +Initially, the run-time PM is disabled for all devices, which means that the +majority of the run-time PM helper funtions described in Section 4 will return +-EAGAIN until pm_runtime_enable() is called for the device. + +In addition to that, the initial run-time PM status of all devices is +'suspended', but it need not reflect the actual physical state of the device. +Thus, if the device is initially active (i.e. it is able to process I/O), its +run-time PM status must be changed to 'active', with the help of +pm_runtime_set_active(), before pm_runtime_enable() is called for the device. + +However, if the device has a parent and the parent's run-time PM is enabled, +calling pm_runtime_set_active() for the device will affect the parent, unless +the parent's 'power.ignore_children' flag is set. Namely, in that case the +parent won't be able to suspend at run time, using the PM core's helper +functions, as long as the child's status is 'active', even if the child's +run-time PM is still disabled (i.e. pm_runtime_enable() hasn't been called for +the child yet or pm_runtime_disable() has been called for it). For this reason, +once pm_runtime_set_active() has been called for the device, pm_runtime_enable() +should be called for it too as soon as reasonably possible or its run-time PM +status should be changed back to 'suspended' with the help of +pm_runtime_set_suspended(). + +If the default initial run-time PM status of the device (i.e. 'suspended') +reflects the actual state of the device, its bus type's or its driver's +->probe() callback will likely need to wake it up using one of the PM core's +helper functions described in Section 4. In that case, pm_runtime_resume() +should be used. Of course, for this purpose the device's run-time PM has to be +enabled earlier by calling pm_runtime_enable(). + +If the device bus type's or driver's ->probe() or ->remove() callback runs +pm_runtime_suspend() or pm_runtime_idle() or their asynchronous counterparts, +they will fail returning -EAGAIN, because the device's usage counter is +incremented by the core before executing ->probe() and ->remove(). Still, it +may be desirable to suspend the device as soon as ->probe() or ->remove() has +finished, so the PM core uses pm_runtime_idle_sync() to invoke the device bus +type's ->runtime_idle() callback at that time. diff --git a/drivers/base/dd.c b/drivers/base/dd.c index f0106875f01d..7b34b3a48f67 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -23,6 +23,7 @@ #include #include #include +#include #include "base.h" #include "power/power.h" @@ -202,7 +203,10 @@ int driver_probe_device(struct device_driver *drv, struct device *dev) pr_debug("bus: '%s': %s: matched device %s with driver %s\n", drv->bus->name, __func__, dev_name(dev), drv->name); + pm_runtime_get_noresume(dev); + pm_runtime_barrier(dev); ret = really_probe(dev, drv); + pm_runtime_put_sync(dev); return ret; } @@ -245,7 +249,9 @@ int device_attach(struct device *dev) ret = 0; } } else { + pm_runtime_get_noresume(dev); ret = bus_for_each_drv(dev->bus, NULL, dev, __device_attach); + pm_runtime_put_sync(dev); } up(&dev->sem); return ret; @@ -306,6 +312,9 @@ static void __device_release_driver(struct device *dev) drv = dev->driver; if (drv) { + pm_runtime_get_noresume(dev); + pm_runtime_barrier(dev); + driver_sysfs_remove(dev); if (dev->bus) @@ -324,6 +333,8 @@ static void __device_release_driver(struct device *dev) blocking_notifier_call_chain(&dev->bus->p->bus_notifier, BUS_NOTIFY_UNBOUND_DRIVER, dev); + + pm_runtime_put_sync(dev); } } diff --git a/drivers/base/power/Makefile b/drivers/base/power/Makefile index 911208b89259..3ce3519e8f30 100644 --- a/drivers/base/power/Makefile +++ b/drivers/base/power/Makefile @@ -1,5 +1,6 @@ obj-$(CONFIG_PM) += sysfs.o obj-$(CONFIG_PM_SLEEP) += main.o +obj-$(CONFIG_PM_RUNTIME) += runtime.o obj-$(CONFIG_PM_TRACE_RTC) += trace.o ccflags-$(CONFIG_DEBUG_DRIVER) := -DDEBUG diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 1b1a786b7dec..86990011277b 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -48,6 +49,16 @@ static DEFINE_MUTEX(dpm_list_mtx); */ static bool transition_started; +/** + * device_pm_init - Initialize the PM-related part of a device object + * @dev: Device object being initialized. + */ +void device_pm_init(struct device *dev) +{ + dev->power.status = DPM_ON; + pm_runtime_init(dev); +} + /** * device_pm_lock - lock the list of active devices used by the PM core */ @@ -105,6 +116,7 @@ void device_pm_remove(struct device *dev) mutex_lock(&dpm_list_mtx); list_del_init(&dev->power.entry); mutex_unlock(&dpm_list_mtx); + pm_runtime_remove(dev); } /** @@ -512,6 +524,7 @@ static void dpm_complete(pm_message_t state) mutex_unlock(&dpm_list_mtx); device_complete(dev, state); + pm_runtime_put_noidle(dev); mutex_lock(&dpm_list_mtx); } @@ -757,7 +770,14 @@ static int dpm_prepare(pm_message_t state) dev->power.status = DPM_PREPARING; mutex_unlock(&dpm_list_mtx); - error = device_prepare(dev, state); + pm_runtime_get_noresume(dev); + if (pm_runtime_barrier(dev) && device_may_wakeup(dev)) { + /* Wake-up requested during system sleep transition. */ + pm_runtime_put_noidle(dev); + error = -EBUSY; + } else { + error = device_prepare(dev, state); + } mutex_lock(&dpm_list_mtx); if (error) { diff --git a/drivers/base/power/power.h b/drivers/base/power/power.h index c7cb4fc3735c..b8fa1aa5225a 100644 --- a/drivers/base/power/power.h +++ b/drivers/base/power/power.h @@ -1,7 +1,14 @@ -static inline void device_pm_init(struct device *dev) -{ - dev->power.status = DPM_ON; -} +#ifdef CONFIG_PM_RUNTIME + +extern void pm_runtime_init(struct device *dev); +extern void pm_runtime_remove(struct device *dev); + +#else /* !CONFIG_PM_RUNTIME */ + +static inline void pm_runtime_init(struct device *dev) {} +static inline void pm_runtime_remove(struct device *dev) {} + +#endif /* !CONFIG_PM_RUNTIME */ #ifdef CONFIG_PM_SLEEP @@ -16,23 +23,33 @@ static inline struct device *to_device(struct list_head *entry) return container_of(entry, struct device, power.entry); } +extern void device_pm_init(struct device *dev); extern void device_pm_add(struct device *); extern void device_pm_remove(struct device *); extern void device_pm_move_before(struct device *, struct device *); extern void device_pm_move_after(struct device *, struct device *); extern void device_pm_move_last(struct device *); -#else /* CONFIG_PM_SLEEP */ +#else /* !CONFIG_PM_SLEEP */ + +static inline void device_pm_init(struct device *dev) +{ + pm_runtime_init(dev); +} + +static inline void device_pm_remove(struct device *dev) +{ + pm_runtime_remove(dev); +} static inline void device_pm_add(struct device *dev) {} -static inline void device_pm_remove(struct device *dev) {} static inline void device_pm_move_before(struct device *deva, struct device *devb) {} static inline void device_pm_move_after(struct device *deva, struct device *devb) {} static inline void device_pm_move_last(struct device *dev) {} -#endif +#endif /* !CONFIG_PM_SLEEP */ #ifdef CONFIG_PM diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c new file mode 100644 index 000000000000..38556f6cc22d --- /dev/null +++ b/drivers/base/power/runtime.c @@ -0,0 +1,1011 @@ +/* + * drivers/base/power/runtime.c - Helper functions for device run-time PM + * + * Copyright (c) 2009 Rafael J. Wysocki , Novell Inc. + * + * This file is released under the GPLv2. + */ + +#include +#include +#include + +static int __pm_runtime_resume(struct device *dev, bool from_wq); +static int __pm_request_idle(struct device *dev); +static int __pm_request_resume(struct device *dev); + +/** + * pm_runtime_deactivate_timer - Deactivate given device's suspend timer. + * @dev: Device to handle. + */ +static void pm_runtime_deactivate_timer(struct device *dev) +{ + if (dev->power.timer_expires > 0) { + del_timer(&dev->power.suspend_timer); + dev->power.timer_expires = 0; + } +} + +/** + * pm_runtime_cancel_pending - Deactivate suspend timer and cancel requests. + * @dev: Device to handle. + */ +static void pm_runtime_cancel_pending(struct device *dev) +{ + pm_runtime_deactivate_timer(dev); + /* + * In case there's a request pending, make sure its work function will + * return without doing anything. + */ + dev->power.request = RPM_REQ_NONE; +} + +/** + * __pm_runtime_idle - Notify device bus type if the device can be suspended. + * @dev: Device to notify the bus type about. + * + * This function must be called under dev->power.lock with interrupts disabled. + */ +static int __pm_runtime_idle(struct device *dev) + __releases(&dev->power.lock) __acquires(&dev->power.lock) +{ + int retval = 0; + + dev_dbg(dev, "__pm_runtime_idle()!\n"); + + if (dev->power.runtime_error) + retval = -EINVAL; + else if (dev->power.idle_notification) + retval = -EINPROGRESS; + else if (atomic_read(&dev->power.usage_count) > 0 + || dev->power.disable_depth > 0 + || dev->power.runtime_status != RPM_ACTIVE) + retval = -EAGAIN; + else if (!pm_children_suspended(dev)) + retval = -EBUSY; + if (retval) + goto out; + + if (dev->power.request_pending) { + /* + * If an idle notification request is pending, cancel it. Any + * other pending request takes precedence over us. + */ + if (dev->power.request == RPM_REQ_IDLE) { + dev->power.request = RPM_REQ_NONE; + } else if (dev->power.request != RPM_REQ_NONE) { + retval = -EAGAIN; + goto out; + } + } + + dev->power.idle_notification = true; + + if (dev->bus && dev->bus->pm && dev->bus->pm->runtime_idle) { + spin_unlock_irq(&dev->power.lock); + + dev->bus->pm->runtime_idle(dev); + + spin_lock_irq(&dev->power.lock); + } + + dev->power.idle_notification = false; + wake_up_all(&dev->power.wait_queue); + + out: + dev_dbg(dev, "__pm_runtime_idle() returns %d!\n", retval); + + return retval; +} + +/** + * pm_runtime_idle - Notify device bus type if the device can be suspended. + * @dev: Device to notify the bus type about. + */ +int pm_runtime_idle(struct device *dev) +{ + int retval; + + spin_lock_irq(&dev->power.lock); + retval = __pm_runtime_idle(dev); + spin_unlock_irq(&dev->power.lock); + + return retval; +} +EXPORT_SYMBOL_GPL(pm_runtime_idle); + +/** + * __pm_runtime_suspend - Carry out run-time suspend of given device. + * @dev: Device to suspend. + * @from_wq: If set, the function has been called via pm_wq. + * + * Check if the device can be suspended and run the ->runtime_suspend() callback + * provided by its bus type. If another suspend has been started earlier, wait + * for it to finish. If an idle notification or suspend request is pending or + * scheduled, cancel it. + * + * This function must be called under dev->power.lock with interrupts disabled. + */ +int __pm_runtime_suspend(struct device *dev, bool from_wq) + __releases(&dev->power.lock) __acquires(&dev->power.lock) +{ + struct device *parent = NULL; + bool notify = false; + int retval = 0; + + dev_dbg(dev, "__pm_runtime_suspend()%s!\n", + from_wq ? " from workqueue" : ""); + + repeat: + if (dev->power.runtime_error) { + retval = -EINVAL; + goto out; + } + + /* Pending resume requests take precedence over us. */ + if (dev->power.request_pending + && dev->power.request == RPM_REQ_RESUME) { + retval = -EAGAIN; + goto out; + } + + /* Other scheduled or pending requests need to be canceled. */ + pm_runtime_cancel_pending(dev); + + if (dev->power.runtime_status == RPM_SUSPENDED) + retval = 1; + else if (dev->power.runtime_status == RPM_RESUMING + || dev->power.disable_depth > 0 + || atomic_read(&dev->power.usage_count) > 0) + retval = -EAGAIN; + else if (!pm_children_suspended(dev)) + retval = -EBUSY; + if (retval) + goto out; + + if (dev->power.runtime_status == RPM_SUSPENDING) { + DEFINE_WAIT(wait); + + if (from_wq) { + retval = -EINPROGRESS; + goto out; + } + + /* Wait for the other suspend running in parallel with us. */ + for (;;) { + prepare_to_wait(&dev->power.wait_queue, &wait, + TASK_UNINTERRUPTIBLE); + if (dev->power.runtime_status != RPM_SUSPENDING) + break; + + spin_unlock_irq(&dev->power.lock); + + schedule(); + + spin_lock_irq(&dev->power.lock); + } + finish_wait(&dev->power.wait_queue, &wait); + goto repeat; + } + + dev->power.runtime_status = RPM_SUSPENDING; + + if (dev->bus && dev->bus->pm && dev->bus->pm->runtime_suspend) { + spin_unlock_irq(&dev->power.lock); + + retval = dev->bus->pm->runtime_suspend(dev); + + spin_lock_irq(&dev->power.lock); + dev->power.runtime_error = retval; + } else { + retval = -ENOSYS; + } + + if (retval) { + dev->power.runtime_status = RPM_ACTIVE; + pm_runtime_cancel_pending(dev); + dev->power.deferred_resume = false; + + if (retval == -EAGAIN || retval == -EBUSY) { + notify = true; + dev->power.runtime_error = 0; + } + } else { + dev->power.runtime_status = RPM_SUSPENDED; + + if (dev->parent) { + parent = dev->parent; + atomic_add_unless(&parent->power.child_count, -1, 0); + } + } + wake_up_all(&dev->power.wait_queue); + + if (dev->power.deferred_resume) { + dev->power.deferred_resume = false; + __pm_runtime_resume(dev, false); + retval = -EAGAIN; + goto out; + } + + if (notify) + __pm_runtime_idle(dev); + + if (parent && !parent->power.ignore_children) { + spin_unlock_irq(&dev->power.lock); + + pm_request_idle(parent); + + spin_lock_irq(&dev->power.lock); + } + + out: + dev_dbg(dev, "__pm_runtime_suspend() returns %d!\n", retval); + + return retval; +} + +/** + * pm_runtime_suspend - Carry out run-time suspend of given device. + * @dev: Device to suspend. + */ +int pm_runtime_suspend(struct device *dev) +{ + int retval; + + spin_lock_irq(&dev->power.lock); + retval = __pm_runtime_suspend(dev, false); + spin_unlock_irq(&dev->power.lock); + + return retval; +} +EXPORT_SYMBOL_GPL(pm_runtime_suspend); + +/** + * __pm_runtime_resume - Carry out run-time resume of given device. + * @dev: Device to resume. + * @from_wq: If set, the function has been called via pm_wq. + * + * Check if the device can be woken up and run the ->runtime_resume() callback + * provided by its bus type. If another resume has been started earlier, wait + * for it to finish. If there's a suspend running in parallel with this + * function, wait for it to finish and resume the device. Cancel any scheduled + * or pending requests. + * + * This function must be called under dev->power.lock with interrupts disabled. + */ +int __pm_runtime_resume(struct device *dev, bool from_wq) + __releases(&dev->power.lock) __acquires(&dev->power.lock) +{ + struct device *parent = NULL; + int retval = 0; + + dev_dbg(dev, "__pm_runtime_resume()%s!\n", + from_wq ? " from workqueue" : ""); + + repeat: + if (dev->power.runtime_error) { + retval = -EINVAL; + goto out; + } + + pm_runtime_cancel_pending(dev); + + if (dev->power.runtime_status == RPM_ACTIVE) + retval = 1; + else if (dev->power.disable_depth > 0) + retval = -EAGAIN; + if (retval) + goto out; + + if (dev->power.runtime_status == RPM_RESUMING + || dev->power.runtime_status == RPM_SUSPENDING) { + DEFINE_WAIT(wait); + + if (from_wq) { + if (dev->power.runtime_status == RPM_SUSPENDING) + dev->power.deferred_resume = true; + retval = -EINPROGRESS; + goto out; + } + + /* Wait for the operation carried out in parallel with us. */ + for (;;) { + prepare_to_wait(&dev->power.wait_queue, &wait, + TASK_UNINTERRUPTIBLE); + if (dev->power.runtime_status != RPM_RESUMING + && dev->power.runtime_status != RPM_SUSPENDING) + break; + + spin_unlock_irq(&dev->power.lock); + + schedule(); + + spin_lock_irq(&dev->power.lock); + } + finish_wait(&dev->power.wait_queue, &wait); + goto repeat; + } + + if (!parent && dev->parent) { + /* + * Increment the parent's resume counter and resume it if + * necessary. + */ + parent = dev->parent; + spin_unlock_irq(&dev->power.lock); + + pm_runtime_get_noresume(parent); + + spin_lock_irq(&parent->power.lock); + /* + * We can resume if the parent's run-time PM is disabled or it + * is set to ignore children. + */ + if (!parent->power.disable_depth + && !parent->power.ignore_children) { + __pm_runtime_resume(parent, false); + if (parent->power.runtime_status != RPM_ACTIVE) + retval = -EBUSY; + } + spin_unlock_irq(&parent->power.lock); + + spin_lock_irq(&dev->power.lock); + if (retval) + goto out; + goto repeat; + } + + dev->power.runtime_status = RPM_RESUMING; + + if (dev->bus && dev->bus->pm && dev->bus->pm->runtime_resume) { + spin_unlock_irq(&dev->power.lock); + + retval = dev->bus->pm->runtime_resume(dev); + + spin_lock_irq(&dev->power.lock); + dev->power.runtime_error = retval; + } else { + retval = -ENOSYS; + } + + if (retval) { + dev->power.runtime_status = RPM_SUSPENDED; + pm_runtime_cancel_pending(dev); + } else { + dev->power.runtime_status = RPM_ACTIVE; + if (parent) + atomic_inc(&parent->power.child_count); + } + wake_up_all(&dev->power.wait_queue); + + if (!retval) + __pm_request_idle(dev); + + out: + if (parent) { + spin_unlock_irq(&dev->power.lock); + + pm_runtime_put(parent); + + spin_lock_irq(&dev->power.lock); + } + + dev_dbg(dev, "__pm_runtime_resume() returns %d!\n", retval); + + return retval; +} + +/** + * pm_runtime_resume - Carry out run-time resume of given device. + * @dev: Device to suspend. + */ +int pm_runtime_resume(struct device *dev) +{ + int retval; + + spin_lock_irq(&dev->power.lock); + retval = __pm_runtime_resume(dev, false); + spin_unlock_irq(&dev->power.lock); + + return retval; +} +EXPORT_SYMBOL_GPL(pm_runtime_resume); + +/** + * pm_runtime_work - Universal run-time PM work function. + * @work: Work structure used for scheduling the execution of this function. + * + * Use @work to get the device object the work is to be done for, determine what + * is to be done and execute the appropriate run-time PM function. + */ +static void pm_runtime_work(struct work_struct *work) +{ + struct device *dev = container_of(work, struct device, power.work); + enum rpm_request req; + + spin_lock_irq(&dev->power.lock); + + if (!dev->power.request_pending) + goto out; + + req = dev->power.request; + dev->power.request = RPM_REQ_NONE; + dev->power.request_pending = false; + + switch (req) { + case RPM_REQ_NONE: + break; + case RPM_REQ_IDLE: + __pm_runtime_idle(dev); + break; + case RPM_REQ_SUSPEND: + __pm_runtime_suspend(dev, true); + break; + case RPM_REQ_RESUME: + __pm_runtime_resume(dev, true); + break; + } + + out: + spin_unlock_irq(&dev->power.lock); +} + +/** + * __pm_request_idle - Submit an idle notification request for given device. + * @dev: Device to handle. + * + * Check if the device's run-time PM status is correct for suspending the device + * and queue up a request to run __pm_runtime_idle() for it. + * + * This function must be called under dev->power.lock with interrupts disabled. + */ +static int __pm_request_idle(struct device *dev) +{ + int retval = 0; + + if (dev->power.runtime_error) + retval = -EINVAL; + else if (atomic_read(&dev->power.usage_count) > 0 + || dev->power.disable_depth > 0 + || dev->power.runtime_status == RPM_SUSPENDED + || dev->power.runtime_status == RPM_SUSPENDING) + retval = -EAGAIN; + else if (!pm_children_suspended(dev)) + retval = -EBUSY; + if (retval) + return retval; + + if (dev->power.request_pending) { + /* Any requests other then RPM_REQ_IDLE take precedence. */ + if (dev->power.request == RPM_REQ_NONE) + dev->power.request = RPM_REQ_IDLE; + else if (dev->power.request != RPM_REQ_IDLE) + retval = -EAGAIN; + return retval; + } + + dev->power.request = RPM_REQ_IDLE; + dev->power.request_pending = true; + queue_work(pm_wq, &dev->power.work); + + return retval; +} + +/** + * pm_request_idle - Submit an idle notification request for given device. + * @dev: Device to handle. + */ +int pm_request_idle(struct device *dev) +{ + unsigned long flags; + int retval; + + spin_lock_irqsave(&dev->power.lock, flags); + retval = __pm_request_idle(dev); + spin_unlock_irqrestore(&dev->power.lock, flags); + + return retval; +} +EXPORT_SYMBOL_GPL(pm_request_idle); + +/** + * __pm_request_suspend - Submit a suspend request for given device. + * @dev: Device to suspend. + * + * This function must be called under dev->power.lock with interrupts disabled. + */ +static int __pm_request_suspend(struct device *dev) +{ + int retval = 0; + + if (dev->power.runtime_error) + return -EINVAL; + + if (dev->power.runtime_status == RPM_SUSPENDED) + retval = 1; + else if (atomic_read(&dev->power.usage_count) > 0 + || dev->power.disable_depth > 0) + retval = -EAGAIN; + else if (dev->power.runtime_status == RPM_SUSPENDING) + retval = -EINPROGRESS; + else if (!pm_children_suspended(dev)) + retval = -EBUSY; + if (retval < 0) + return retval; + + pm_runtime_deactivate_timer(dev); + + if (dev->power.request_pending) { + /* + * Pending resume requests take precedence over us, but we can + * overtake any other pending request. + */ + if (dev->power.request == RPM_REQ_RESUME) + retval = -EAGAIN; + else if (dev->power.request != RPM_REQ_SUSPEND) + dev->power.request = retval ? + RPM_REQ_NONE : RPM_REQ_SUSPEND; + return retval; + } else if (retval) { + return retval; + } + + dev->power.request = RPM_REQ_SUSPEND; + dev->power.request_pending = true; + queue_work(pm_wq, &dev->power.work); + + return 0; +} + +/** + * pm_suspend_timer_fn - Timer function for pm_schedule_suspend(). + * @data: Device pointer passed by pm_schedule_suspend(). + * + * Check if the time is right and execute __pm_request_suspend() in that case. + */ +static void pm_suspend_timer_fn(unsigned long data) +{ + struct device *dev = (struct device *)data; + unsigned long flags; + unsigned long expires; + + spin_lock_irqsave(&dev->power.lock, flags); + + expires = dev->power.timer_expires; + /* If 'expire' is after 'jiffies' we've been called too early. */ + if (expires > 0 && !time_after(expires, jiffies)) { + dev->power.timer_expires = 0; + __pm_request_suspend(dev); + } + + spin_unlock_irqrestore(&dev->power.lock, flags); +} + +/** + * pm_schedule_suspend - Set up a timer to submit a suspend request in future. + * @dev: Device to suspend. + * @delay: Time to wait before submitting a suspend request, in milliseconds. + */ +int pm_schedule_suspend(struct device *dev, unsigned int delay) +{ + unsigned long flags; + int retval = 0; + + spin_lock_irqsave(&dev->power.lock, flags); + + if (dev->power.runtime_error) { + retval = -EINVAL; + goto out; + } + + if (!delay) { + retval = __pm_request_suspend(dev); + goto out; + } + + pm_runtime_deactivate_timer(dev); + + if (dev->power.request_pending) { + /* + * Pending resume requests take precedence over us, but any + * other pending requests have to be canceled. + */ + if (dev->power.request == RPM_REQ_RESUME) { + retval = -EAGAIN; + goto out; + } + dev->power.request = RPM_REQ_NONE; + } + + if (dev->power.runtime_status == RPM_SUSPENDED) + retval = 1; + else if (dev->power.runtime_status == RPM_SUSPENDING) + retval = -EINPROGRESS; + else if (atomic_read(&dev->power.usage_count) > 0 + || dev->power.disable_depth > 0) + retval = -EAGAIN; + else if (!pm_children_suspended(dev)) + retval = -EBUSY; + if (retval) + goto out; + + dev->power.timer_expires = jiffies + msecs_to_jiffies(delay); + mod_timer(&dev->power.suspend_timer, dev->power.timer_expires); + + out: + spin_unlock_irqrestore(&dev->power.lock, flags); + + return retval; +} +EXPORT_SYMBOL_GPL(pm_schedule_suspend); + +/** + * pm_request_resume - Submit a resume request for given device. + * @dev: Device to resume. + * + * This function must be called under dev->power.lock with interrupts disabled. + */ +static int __pm_request_resume(struct device *dev) +{ + int retval = 0; + + if (dev->power.runtime_error) + return -EINVAL; + + if (dev->power.runtime_status == RPM_ACTIVE) + retval = 1; + else if (dev->power.runtime_status == RPM_RESUMING) + retval = -EINPROGRESS; + else if (dev->power.disable_depth > 0) + retval = -EAGAIN; + if (retval < 0) + return retval; + + pm_runtime_deactivate_timer(dev); + + if (dev->power.request_pending) { + /* If non-resume request is pending, we can overtake it. */ + dev->power.request = retval ? RPM_REQ_NONE : RPM_REQ_RESUME; + return retval; + } else if (retval) { + return retval; + } + + dev->power.request = RPM_REQ_RESUME; + dev->power.request_pending = true; + queue_work(pm_wq, &dev->power.work); + + return retval; +} + +/** + * pm_request_resume - Submit a resume request for given device. + * @dev: Device to resume. + */ +int pm_request_resume(struct device *dev) +{ + unsigned long flags; + int retval; + + spin_lock_irqsave(&dev->power.lock, flags); + retval = __pm_request_resume(dev); + spin_unlock_irqrestore(&dev->power.lock, flags); + + return retval; +} +EXPORT_SYMBOL_GPL(pm_request_resume); + +/** + * __pm_runtime_get - Reference count a device and wake it up, if necessary. + * @dev: Device to handle. + * @sync: If set and the device is suspended, resume it synchronously. + * + * Increment the usage count of the device and if it was zero previously, + * resume it or submit a resume request for it, depending on the value of @sync. + */ +int __pm_runtime_get(struct device *dev, bool sync) +{ + int retval = 1; + + if (atomic_add_return(1, &dev->power.usage_count) == 1) + retval = sync ? pm_runtime_resume(dev) : pm_request_resume(dev); + + return retval; +} +EXPORT_SYMBOL_GPL(__pm_runtime_get); + +/** + * __pm_runtime_put - Decrement the device's usage counter and notify its bus. + * @dev: Device to handle. + * @sync: If the device's bus type is to be notified, do that synchronously. + * + * Decrement the usage count of the device and if it reaches zero, carry out a + * synchronous idle notification or submit an idle notification request for it, + * depending on the value of @sync. + */ +int __pm_runtime_put(struct device *dev, bool sync) +{ + int retval = 0; + + if (atomic_dec_and_test(&dev->power.usage_count)) + retval = sync ? pm_runtime_idle(dev) : pm_request_idle(dev); + + return retval; +} +EXPORT_SYMBOL_GPL(__pm_runtime_put); + +/** + * __pm_runtime_set_status - Set run-time PM status of a device. + * @dev: Device to handle. + * @status: New run-time PM status of the device. + * + * If run-time PM of the device is disabled or its power.runtime_error field is + * different from zero, the status may be changed either to RPM_ACTIVE, or to + * RPM_SUSPENDED, as long as that reflects the actual state of the device. + * However, if the device has a parent and the parent is not active, and the + * parent's power.ignore_children flag is unset, the device's status cannot be + * set to RPM_ACTIVE, so -EBUSY is returned in that case. + * + * If successful, __pm_runtime_set_status() clears the power.runtime_error field + * and the device parent's counter of unsuspended children is modified to + * reflect the new status. If the new status is RPM_SUSPENDED, an idle + * notification request for the parent is submitted. + */ +int __pm_runtime_set_status(struct device *dev, unsigned int status) +{ + struct device *parent = dev->parent; + unsigned long flags; + bool notify_parent = false; + int error = 0; + + if (status != RPM_ACTIVE && status != RPM_SUSPENDED) + return -EINVAL; + + spin_lock_irqsave(&dev->power.lock, flags); + + if (!dev->power.runtime_error && !dev->power.disable_depth) { + error = -EAGAIN; + goto out; + } + + if (dev->power.runtime_status == status) + goto out_set; + + if (status == RPM_SUSPENDED) { + /* It always is possible to set the status to 'suspended'. */ + if (parent) { + atomic_add_unless(&parent->power.child_count, -1, 0); + notify_parent = !parent->power.ignore_children; + } + goto out_set; + } + + if (parent) { + spin_lock_irq(&parent->power.lock); + + /* + * It is invalid to put an active child under a parent that is + * not active, has run-time PM enabled and the + * 'power.ignore_children' flag unset. + */ + if (!parent->power.disable_depth + && !parent->power.ignore_children + && parent->power.runtime_status != RPM_ACTIVE) { + error = -EBUSY; + } else { + if (dev->power.runtime_status == RPM_SUSPENDED) + atomic_inc(&parent->power.child_count); + } + + spin_unlock_irq(&parent->power.lock); + + if (error) + goto out; + } + + out_set: + dev->power.runtime_status = status; + dev->power.runtime_error = 0; + out: + spin_unlock_irqrestore(&dev->power.lock, flags); + + if (notify_parent) + pm_request_idle(parent); + + return error; +} +EXPORT_SYMBOL_GPL(__pm_runtime_set_status); + +/** + * __pm_runtime_barrier - Cancel pending requests and wait for completions. + * @dev: Device to handle. + * + * Flush all pending requests for the device from pm_wq and wait for all + * run-time PM operations involving the device in progress to complete. + * + * Should be called under dev->power.lock with interrupts disabled. + */ +static void __pm_runtime_barrier(struct device *dev) +{ + pm_runtime_deactivate_timer(dev); + + if (dev->power.request_pending) { + dev->power.request = RPM_REQ_NONE; + spin_unlock_irq(&dev->power.lock); + + cancel_work_sync(&dev->power.work); + + spin_lock_irq(&dev->power.lock); + dev->power.request_pending = false; + } + + if (dev->power.runtime_status == RPM_SUSPENDING + || dev->power.runtime_status == RPM_RESUMING + || dev->power.idle_notification) { + DEFINE_WAIT(wait); + + /* Suspend, wake-up or idle notification in progress. */ + for (;;) { + prepare_to_wait(&dev->power.wait_queue, &wait, + TASK_UNINTERRUPTIBLE); + if (dev->power.runtime_status != RPM_SUSPENDING + && dev->power.runtime_status != RPM_RESUMING + && !dev->power.idle_notification) + break; + spin_unlock_irq(&dev->power.lock); + + schedule(); + + spin_lock_irq(&dev->power.lock); + } + finish_wait(&dev->power.wait_queue, &wait); + } +} + +/** + * pm_runtime_barrier - Flush pending requests and wait for completions. + * @dev: Device to handle. + * + * Prevent the device from being suspended by incrementing its usage counter and + * if there's a pending resume request for the device, wake the device up. + * Next, make sure that all pending requests for the device have been flushed + * from pm_wq and wait for all run-time PM operations involving the device in + * progress to complete. + * + * Return value: + * 1, if there was a resume request pending and the device had to be woken up, + * 0, otherwise + */ +int pm_runtime_barrier(struct device *dev) +{ + int retval = 0; + + pm_runtime_get_noresume(dev); + spin_lock_irq(&dev->power.lock); + + if (dev->power.request_pending + && dev->power.request == RPM_REQ_RESUME) { + __pm_runtime_resume(dev, false); + retval = 1; + } + + __pm_runtime_barrier(dev); + + spin_unlock_irq(&dev->power.lock); + pm_runtime_put_noidle(dev); + + return retval; +} +EXPORT_SYMBOL_GPL(pm_runtime_barrier); + +/** + * __pm_runtime_disable - Disable run-time PM of a device. + * @dev: Device to handle. + * @check_resume: If set, check if there's a resume request for the device. + * + * Increment power.disable_depth for the device and if was zero previously, + * cancel all pending run-time PM requests for the device and wait for all + * operations in progress to complete. The device can be either active or + * suspended after its run-time PM has been disabled. + * + * If @check_resume is set and there's a resume request pending when + * __pm_runtime_disable() is called and power.disable_depth is zero, the + * function will wake up the device before disabling its run-time PM. + */ +void __pm_runtime_disable(struct device *dev, bool check_resume) +{ + spin_lock_irq(&dev->power.lock); + + if (dev->power.disable_depth > 0) { + dev->power.disable_depth++; + goto out; + } + + /* + * Wake up the device if there's a resume request pending, because that + * means there probably is some I/O to process and disabling run-time PM + * shouldn't prevent the device from processing the I/O. + */ + if (check_resume && dev->power.request_pending + && dev->power.request == RPM_REQ_RESUME) { + /* + * Prevent suspends and idle notifications from being carried + * out after we have woken up the device. + */ + pm_runtime_get_noresume(dev); + + __pm_runtime_resume(dev, false); + + pm_runtime_put_noidle(dev); + } + + if (!dev->power.disable_depth++) + __pm_runtime_barrier(dev); + + out: + spin_unlock_irq(&dev->power.lock); +} +EXPORT_SYMBOL_GPL(__pm_runtime_disable); + +/** + * pm_runtime_enable - Enable run-time PM of a device. + * @dev: Device to handle. + */ +void pm_runtime_enable(struct device *dev) +{ + unsigned long flags; + + spin_lock_irqsave(&dev->power.lock, flags); + + if (dev->power.disable_depth > 0) + dev->power.disable_depth--; + else + dev_warn(dev, "Unbalanced %s!\n", __func__); + + spin_unlock_irqrestore(&dev->power.lock, flags); +} +EXPORT_SYMBOL_GPL(pm_runtime_enable); + +/** + * pm_runtime_init - Initialize run-time PM fields in given device object. + * @dev: Device object to initialize. + */ +void pm_runtime_init(struct device *dev) +{ + spin_lock_init(&dev->power.lock); + + dev->power.runtime_status = RPM_SUSPENDED; + dev->power.idle_notification = false; + + dev->power.disable_depth = 1; + atomic_set(&dev->power.usage_count, 0); + + dev->power.runtime_error = 0; + + atomic_set(&dev->power.child_count, 0); + pm_suspend_ignore_children(dev, false); + + dev->power.request_pending = false; + dev->power.request = RPM_REQ_NONE; + dev->power.deferred_resume = false; + INIT_WORK(&dev->power.work, pm_runtime_work); + + dev->power.timer_expires = 0; + setup_timer(&dev->power.suspend_timer, pm_suspend_timer_fn, + (unsigned long)dev); + + init_waitqueue_head(&dev->power.wait_queue); +} + +/** + * pm_runtime_remove - Prepare for removing a device from device hierarchy. + * @dev: Device object being removed from device hierarchy. + */ +void pm_runtime_remove(struct device *dev) +{ + __pm_runtime_disable(dev, false); + + /* Change the status back to 'suspended' to match the initial status. */ + if (dev->power.runtime_status == RPM_ACTIVE) + pm_runtime_set_suspended(dev); +} diff --git a/include/linux/pm.h b/include/linux/pm.h index b3f74764a586..2b6e20df0e52 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -22,6 +22,10 @@ #define _LINUX_PM_H #include +#include +#include +#include +#include /* * Callbacks for platform drivers to implement. @@ -165,6 +169,28 @@ typedef struct pm_message { * It is allowed to unregister devices while the above callbacks are being * executed. However, it is not allowed to unregister a device from within any * of its own callbacks. + * + * There also are the following callbacks related to run-time power management + * of devices: + * + * @runtime_suspend: Prepare the device for a condition in which it won't be + * able to communicate with the CPU(s) and RAM due to power management. + * This need not mean that the device should be put into a low power state. + * For example, if the device is behind a link which is about to be turned + * off, the device may remain at full power. If the device does go to low + * power and if device_may_wakeup(dev) is true, remote wake-up (i.e., a + * hardware mechanism allowing the device to request a change of its power + * state, such as PCI PME) should be enabled for it. + * + * @runtime_resume: Put the device into the fully active state in response to a + * wake-up event generated by hardware or at the request of software. If + * necessary, put the device into the full power state and restore its + * registers, so that it is fully operational. + * + * @runtime_idle: Device appears to be inactive and it might be put into a low + * power state if all of the necessary conditions are satisfied. Check + * these conditions and handle the device as appropriate, possibly queueing + * a suspend request for it. The return value is ignored by the PM core. */ struct dev_pm_ops { @@ -182,6 +208,9 @@ struct dev_pm_ops { int (*thaw_noirq)(struct device *dev); int (*poweroff_noirq)(struct device *dev); int (*restore_noirq)(struct device *dev); + int (*runtime_suspend)(struct device *dev); + int (*runtime_resume)(struct device *dev); + int (*runtime_idle)(struct device *dev); }; /** @@ -315,14 +344,80 @@ enum dpm_state { DPM_OFF_IRQ, }; +/** + * Device run-time power management status. + * + * These status labels are used internally by the PM core to indicate the + * current status of a device with respect to the PM core operations. They do + * not reflect the actual power state of the device or its status as seen by the + * driver. + * + * RPM_ACTIVE Device is fully operational. Indicates that the device + * bus type's ->runtime_resume() callback has completed + * successfully. + * + * RPM_SUSPENDED Device bus type's ->runtime_suspend() callback has + * completed successfully. The device is regarded as + * suspended. + * + * RPM_RESUMING Device bus type's ->runtime_resume() callback is being + * executed. + * + * RPM_SUSPENDING Device bus type's ->runtime_suspend() callback is being + * executed. + */ + +enum rpm_status { + RPM_ACTIVE = 0, + RPM_RESUMING, + RPM_SUSPENDED, + RPM_SUSPENDING, +}; + +/** + * Device run-time power management request types. + * + * RPM_REQ_NONE Do nothing. + * + * RPM_REQ_IDLE Run the device bus type's ->runtime_idle() callback + * + * RPM_REQ_SUSPEND Run the device bus type's ->runtime_suspend() callback + * + * RPM_REQ_RESUME Run the device bus type's ->runtime_resume() callback + */ + +enum rpm_request { + RPM_REQ_NONE = 0, + RPM_REQ_IDLE, + RPM_REQ_SUSPEND, + RPM_REQ_RESUME, +}; + struct dev_pm_info { pm_message_t power_state; - unsigned can_wakeup:1; - unsigned should_wakeup:1; + unsigned int can_wakeup:1; + unsigned int should_wakeup:1; enum dpm_state status; /* Owned by the PM core */ -#ifdef CONFIG_PM_SLEEP +#ifdef CONFIG_PM_SLEEP struct list_head entry; #endif +#ifdef CONFIG_PM_RUNTIME + struct timer_list suspend_timer; + unsigned long timer_expires; + struct work_struct work; + wait_queue_head_t wait_queue; + spinlock_t lock; + atomic_t usage_count; + atomic_t child_count; + unsigned int disable_depth:3; + unsigned int ignore_children:1; + unsigned int idle_notification:1; + unsigned int request_pending:1; + unsigned int deferred_resume:1; + enum rpm_request request; + enum rpm_status runtime_status; + int runtime_error; +#endif }; /* diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h new file mode 100644 index 000000000000..44087044910f --- /dev/null +++ b/include/linux/pm_runtime.h @@ -0,0 +1,114 @@ +/* + * pm_runtime.h - Device run-time power management helper functions. + * + * Copyright (C) 2009 Rafael J. Wysocki + * + * This file is released under the GPLv2. + */ + +#ifndef _LINUX_PM_RUNTIME_H +#define _LINUX_PM_RUNTIME_H + +#include +#include + +#ifdef CONFIG_PM_RUNTIME + +extern struct workqueue_struct *pm_wq; + +extern int pm_runtime_idle(struct device *dev); +extern int pm_runtime_suspend(struct device *dev); +extern int pm_runtime_resume(struct device *dev); +extern int pm_request_idle(struct device *dev); +extern int pm_schedule_suspend(struct device *dev, unsigned int delay); +extern int pm_request_resume(struct device *dev); +extern int __pm_runtime_get(struct device *dev, bool sync); +extern int __pm_runtime_put(struct device *dev, bool sync); +extern int __pm_runtime_set_status(struct device *dev, unsigned int status); +extern int pm_runtime_barrier(struct device *dev); +extern void pm_runtime_enable(struct device *dev); +extern void __pm_runtime_disable(struct device *dev, bool check_resume); + +static inline bool pm_children_suspended(struct device *dev) +{ + return dev->power.ignore_children + || !atomic_read(&dev->power.child_count); +} + +static inline void pm_suspend_ignore_children(struct device *dev, bool enable) +{ + dev->power.ignore_children = enable; +} + +static inline void pm_runtime_get_noresume(struct device *dev) +{ + atomic_inc(&dev->power.usage_count); +} + +static inline void pm_runtime_put_noidle(struct device *dev) +{ + atomic_add_unless(&dev->power.usage_count, -1, 0); +} + +#else /* !CONFIG_PM_RUNTIME */ + +static inline int pm_runtime_idle(struct device *dev) { return -ENOSYS; } +static inline int pm_runtime_suspend(struct device *dev) { return -ENOSYS; } +static inline int pm_runtime_resume(struct device *dev) { return 0; } +static inline int pm_request_idle(struct device *dev) { return -ENOSYS; } +static inline int pm_schedule_suspend(struct device *dev, unsigned int delay) +{ + return -ENOSYS; +} +static inline int pm_request_resume(struct device *dev) { return 0; } +static inline int __pm_runtime_get(struct device *dev, bool sync) { return 1; } +static inline int __pm_runtime_put(struct device *dev, bool sync) { return 0; } +static inline int __pm_runtime_set_status(struct device *dev, + unsigned int status) { return 0; } +static inline int pm_runtime_barrier(struct device *dev) { return 0; } +static inline void pm_runtime_enable(struct device *dev) {} +static inline void __pm_runtime_disable(struct device *dev, bool c) {} + +static inline bool pm_children_suspended(struct device *dev) { return false; } +static inline void pm_suspend_ignore_children(struct device *dev, bool en) {} +static inline void pm_runtime_get_noresume(struct device *dev) {} +static inline void pm_runtime_put_noidle(struct device *dev) {} + +#endif /* !CONFIG_PM_RUNTIME */ + +static inline int pm_runtime_get(struct device *dev) +{ + return __pm_runtime_get(dev, false); +} + +static inline int pm_runtime_get_sync(struct device *dev) +{ + return __pm_runtime_get(dev, true); +} + +static inline int pm_runtime_put(struct device *dev) +{ + return __pm_runtime_put(dev, false); +} + +static inline int pm_runtime_put_sync(struct device *dev) +{ + return __pm_runtime_put(dev, true); +} + +static inline int pm_runtime_set_active(struct device *dev) +{ + return __pm_runtime_set_status(dev, RPM_ACTIVE); +} + +static inline void pm_runtime_set_suspended(struct device *dev) +{ + __pm_runtime_set_status(dev, RPM_SUSPENDED); +} + +static inline void pm_runtime_disable(struct device *dev) +{ + __pm_runtime_disable(dev, true); +} + +#endif diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 72067cbdb37f..91e09d3b2eb2 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -208,3 +208,17 @@ config APM_EMULATION random kernel OOPSes or reboots that don't seem to be related to anything, try disabling/enabling this option (or disabling/enabling APM in your BIOS). + +config PM_RUNTIME + bool "Run-time PM core functionality" + depends on PM + ---help--- + Enable functionality allowing I/O devices to be put into energy-saving + (low power) states at run time (or autosuspended) after a specified + period of inactivity and woken up in response to a hardware-generated + wake-up event or a driver's request. + + Hardware support is generally required for this functionality to work + and the bus type drivers of the buses the devices are on are + responsible for the actual handling of the autosuspend requests and + wake-up events. diff --git a/kernel/power/main.c b/kernel/power/main.c index f710e36930cc..347d2cc88cd0 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -11,6 +11,7 @@ #include #include #include +#include #include "power.h" @@ -217,8 +218,24 @@ static struct attribute_group attr_group = { .attrs = g, }; +#ifdef CONFIG_PM_RUNTIME +struct workqueue_struct *pm_wq; + +static int __init pm_start_workqueue(void) +{ + pm_wq = create_freezeable_workqueue("pm"); + + return pm_wq ? 0 : -ENOMEM; +} +#else +static inline int pm_start_workqueue(void) { return 0; } +#endif + static int __init pm_init(void) { + int error = pm_start_workqueue(); + if (error) + return error; power_kobj = kobject_create_and_add("power", NULL); if (!power_kobj) return -ENOMEM; -- cgit v1.2.3 From 163f52b6cf3a639df6a72c7937e0eb88b20f1ef3 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Sat, 1 Aug 2009 00:39:36 +0000 Subject: [SCSI] ses: fix hotplug with multiple devices and expanders In a situation either with expanders or with multiple enclosure devices, hot add doesn't always work. This is because we try to find a single enclosure device attached to the host. Fix this by looping over all enclosure devices attached to the host and also by making the find loop recognise that the enclosure devices may be expander remote (i.e. not parented by the host). Signed-off-by: James Bottomley Signed-off-by: James Bottomley --- drivers/misc/enclosure.c | 44 ++++++++++++++++++++++++++++++++------------ drivers/scsi/ses.c | 10 ++++++---- include/linux/enclosure.h | 3 ++- 3 files changed, 40 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/drivers/misc/enclosure.c b/drivers/misc/enclosure.c index 348443bdb23b..789d12128c24 100644 --- a/drivers/misc/enclosure.c +++ b/drivers/misc/enclosure.c @@ -33,24 +33,44 @@ static DEFINE_MUTEX(container_list_lock); static struct class enclosure_class; /** - * enclosure_find - find an enclosure given a device - * @dev: the device to find for + * enclosure_find - find an enclosure given a parent device + * @dev: the parent to match against + * @start: Optional enclosure device to start from (NULL if none) * - * Looks through the list of registered enclosures to see - * if it can find a match for a device. Returns NULL if no - * enclosure is found. Obtains a reference to the enclosure class - * device which must be released with device_put(). + * Looks through the list of registered enclosures to find all those + * with @dev as a parent. Returns NULL if no enclosure is + * found. @start can be used as a starting point to obtain multiple + * enclosures per parent (should begin with NULL and then be set to + * each returned enclosure device). Obtains a reference to the + * enclosure class device which must be released with device_put(). + * If @start is not NULL, a reference must be taken on it which is + * released before returning (this allows a loop through all + * enclosures to exit with only the reference on the enclosure of + * interest held). Note that the @dev may correspond to the actual + * device housing the enclosure, in which case no iteration via @start + * is required. */ -struct enclosure_device *enclosure_find(struct device *dev) +struct enclosure_device *enclosure_find(struct device *dev, + struct enclosure_device *start) { struct enclosure_device *edev; mutex_lock(&container_list_lock); - list_for_each_entry(edev, &container_list, node) { - if (edev->edev.parent == dev) { - get_device(&edev->edev); - mutex_unlock(&container_list_lock); - return edev; + edev = list_prepare_entry(start, &container_list, node); + if (start) + put_device(&start->edev); + + list_for_each_entry_continue(edev, &container_list, node) { + struct device *parent = edev->edev.parent; + /* parent might not be immediate, so iterate up to + * the root of the tree if necessary */ + while (parent) { + if (parent == dev) { + get_device(&edev->edev); + mutex_unlock(&container_list_lock); + return edev; + } + parent = parent->parent; } } mutex_unlock(&container_list_lock); diff --git a/drivers/scsi/ses.c b/drivers/scsi/ses.c index 4f618f487356..e1b8c828f03a 100644 --- a/drivers/scsi/ses.c +++ b/drivers/scsi/ses.c @@ -413,10 +413,11 @@ static int ses_intf_add(struct device *cdev, if (!scsi_device_enclosure(sdev)) { /* not an enclosure, but might be in one */ - edev = enclosure_find(&sdev->host->shost_gendev); - if (edev) { + struct enclosure_device *prev = NULL; + + while ((edev = enclosure_find(&sdev->host->shost_gendev, prev)) != NULL) { ses_match_to_enclosure(edev, sdev); - put_device(&edev->edev); + prev = edev; } return -ENODEV; } @@ -625,7 +626,8 @@ static void ses_intf_remove(struct device *cdev, if (!scsi_device_enclosure(sdev)) return; - edev = enclosure_find(cdev->parent); + /* exact match to this enclosure */ + edev = enclosure_find(cdev->parent, NULL); if (!edev) return; diff --git a/include/linux/enclosure.h b/include/linux/enclosure.h index 4332442b1b57..d77811e9ed84 100644 --- a/include/linux/enclosure.h +++ b/include/linux/enclosure.h @@ -123,7 +123,8 @@ enclosure_component_register(struct enclosure_device *, unsigned int, int enclosure_add_device(struct enclosure_device *enclosure, int component, struct device *dev); int enclosure_remove_device(struct enclosure_device *enclosure, int component); -struct enclosure_device *enclosure_find(struct device *dev); +struct enclosure_device *enclosure_find(struct device *dev, + struct enclosure_device *start); int enclosure_for_each_device(int (*fn)(struct enclosure_device *, void *), void *data); -- cgit v1.2.3 From 43d8eb9cfd0aea93be32181c64e18191b69c211c Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Sat, 1 Aug 2009 00:41:22 +0000 Subject: [SCSI] ses: add support for enclosure component hot removal Right at the moment, hot removal of a device within an enclosure does nothing (because the intf_remove only copes with enclosure removal not with component removal). Fix this by adding a function to remove the component. Also needed to fix the prototype of enclosure_remove_device, since we know the device we've removed but not the internal component number Signed-off-by: James Bottomley Signed-off-by: James Bottomley --- drivers/misc/enclosure.c | 22 ++++++++++++++-------- drivers/scsi/ses.c | 33 ++++++++++++++++++++++++++------- include/linux/enclosure.h | 2 +- 3 files changed, 41 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/drivers/misc/enclosure.c b/drivers/misc/enclosure.c index 789d12128c24..850706a5e553 100644 --- a/drivers/misc/enclosure.c +++ b/drivers/misc/enclosure.c @@ -332,19 +332,25 @@ EXPORT_SYMBOL_GPL(enclosure_add_device); * Returns zero on success or an error. * */ -int enclosure_remove_device(struct enclosure_device *edev, int component) +int enclosure_remove_device(struct enclosure_device *edev, struct device *dev) { struct enclosure_component *cdev; + int i; - if (!edev || component >= edev->components) + if (!edev || !dev) return -EINVAL; - cdev = &edev->component[component]; - - device_del(&cdev->cdev); - put_device(cdev->dev); - cdev->dev = NULL; - return device_add(&cdev->cdev); + for (i = 0; i < edev->components; i++) { + cdev = &edev->component[i]; + if (cdev->dev == dev) { + enclosure_remove_links(cdev); + device_del(&cdev->cdev); + put_device(dev); + cdev->dev = NULL; + return device_add(&cdev->cdev); + } + } + return -ENODEV; } EXPORT_SYMBOL_GPL(enclosure_remove_device); diff --git a/drivers/scsi/ses.c b/drivers/scsi/ses.c index e1b8c828f03a..be593c8525b5 100644 --- a/drivers/scsi/ses.c +++ b/drivers/scsi/ses.c @@ -616,18 +616,26 @@ static int ses_remove(struct device *dev) return 0; } -static void ses_intf_remove(struct device *cdev, - struct class_interface *intf) +static void ses_intf_remove_component(struct scsi_device *sdev) +{ + struct enclosure_device *edev, *prev = NULL; + + while ((edev = enclosure_find(&sdev->host->shost_gendev, prev)) != NULL) { + prev = edev; + if (!enclosure_remove_device(edev, &sdev->sdev_gendev)) + break; + } + if (edev) + put_device(&edev->edev); +} + +static void ses_intf_remove_enclosure(struct scsi_device *sdev) { - struct scsi_device *sdev = to_scsi_device(cdev->parent); struct enclosure_device *edev; struct ses_device *ses_dev; - if (!scsi_device_enclosure(sdev)) - return; - /* exact match to this enclosure */ - edev = enclosure_find(cdev->parent, NULL); + edev = enclosure_find(&sdev->sdev_gendev, NULL); if (!edev) return; @@ -645,6 +653,17 @@ static void ses_intf_remove(struct device *cdev, enclosure_unregister(edev); } +static void ses_intf_remove(struct device *cdev, + struct class_interface *intf) +{ + struct scsi_device *sdev = to_scsi_device(cdev->parent); + + if (!scsi_device_enclosure(sdev)) + ses_intf_remove_component(sdev); + else + ses_intf_remove_enclosure(sdev); +} + static struct class_interface ses_interface = { .add_dev = ses_intf_add, .remove_dev = ses_intf_remove, diff --git a/include/linux/enclosure.h b/include/linux/enclosure.h index d77811e9ed84..90d1c2184112 100644 --- a/include/linux/enclosure.h +++ b/include/linux/enclosure.h @@ -122,7 +122,7 @@ enclosure_component_register(struct enclosure_device *, unsigned int, enum enclosure_component_type, const char *); int enclosure_add_device(struct enclosure_device *enclosure, int component, struct device *dev); -int enclosure_remove_device(struct enclosure_device *enclosure, int component); +int enclosure_remove_device(struct enclosure_device *, struct device *); struct enclosure_device *enclosure_find(struct device *dev, struct enclosure_device *start); int enclosure_for_each_device(int (*fn)(struct enclosure_device *, void *), -- cgit v1.2.3 From 9f77da9f40045253e91f55c12d4481254b513d2d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 22 Aug 2009 13:56:45 -0700 Subject: rcu: Move private definitions from include/linux/rcutree.h to kernel/rcutree.h Some information hiding that makes it easier to merge preemptability into rcutree without descending into #include hell. Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: akpm@linux-foundation.org Cc: mathieu.desnoyers@polymtl.ca Cc: josht@linux.vnet.ibm.com Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org LKML-Reference: <1250974613373-git-send-email-> Signed-off-by: Ingo Molnar --- include/linux/rcutree.h | 211 ------------------------------------------ kernel/rcutree.c | 2 + kernel/rcutree.h | 238 ++++++++++++++++++++++++++++++++++++++++++++++++ kernel/rcutree_trace.c | 1 + 4 files changed, 241 insertions(+), 211 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index d4dfd2489633..e37d5e2a8353 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -30,217 +30,6 @@ #ifndef __LINUX_RCUTREE_H #define __LINUX_RCUTREE_H -#include -#include -#include -#include -#include - -/* - * Define shape of hierarchy based on NR_CPUS and CONFIG_RCU_FANOUT. - * In theory, it should be possible to add more levels straightforwardly. - * In practice, this has not been tested, so there is probably some - * bug somewhere. - */ -#define MAX_RCU_LVLS 3 -#define RCU_FANOUT (CONFIG_RCU_FANOUT) -#define RCU_FANOUT_SQ (RCU_FANOUT * RCU_FANOUT) -#define RCU_FANOUT_CUBE (RCU_FANOUT_SQ * RCU_FANOUT) - -#if NR_CPUS <= RCU_FANOUT -# define NUM_RCU_LVLS 1 -# define NUM_RCU_LVL_0 1 -# define NUM_RCU_LVL_1 (NR_CPUS) -# define NUM_RCU_LVL_2 0 -# define NUM_RCU_LVL_3 0 -#elif NR_CPUS <= RCU_FANOUT_SQ -# define NUM_RCU_LVLS 2 -# define NUM_RCU_LVL_0 1 -# define NUM_RCU_LVL_1 (((NR_CPUS) + RCU_FANOUT - 1) / RCU_FANOUT) -# define NUM_RCU_LVL_2 (NR_CPUS) -# define NUM_RCU_LVL_3 0 -#elif NR_CPUS <= RCU_FANOUT_CUBE -# define NUM_RCU_LVLS 3 -# define NUM_RCU_LVL_0 1 -# define NUM_RCU_LVL_1 (((NR_CPUS) + RCU_FANOUT_SQ - 1) / RCU_FANOUT_SQ) -# define NUM_RCU_LVL_2 (((NR_CPUS) + (RCU_FANOUT) - 1) / (RCU_FANOUT)) -# define NUM_RCU_LVL_3 NR_CPUS -#else -# error "CONFIG_RCU_FANOUT insufficient for NR_CPUS" -#endif /* #if (NR_CPUS) <= RCU_FANOUT */ - -#define RCU_SUM (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3) -#define NUM_RCU_NODES (RCU_SUM - NR_CPUS) - -/* - * Dynticks per-CPU state. - */ -struct rcu_dynticks { - int dynticks_nesting; /* Track nesting level, sort of. */ - int dynticks; /* Even value for dynticks-idle, else odd. */ - int dynticks_nmi; /* Even value for either dynticks-idle or */ - /* not in nmi handler, else odd. So this */ - /* remains even for nmi from irq handler. */ -}; - -/* - * Definition for node within the RCU grace-period-detection hierarchy. - */ -struct rcu_node { - spinlock_t lock; - unsigned long qsmask; /* CPUs or groups that need to switch in */ - /* order for current grace period to proceed.*/ - unsigned long qsmaskinit; - /* Per-GP initialization for qsmask. */ - unsigned long grpmask; /* Mask to apply to parent qsmask. */ - int grplo; /* lowest-numbered CPU or group here. */ - int grphi; /* highest-numbered CPU or group here. */ - u8 grpnum; /* CPU/group number for next level up. */ - u8 level; /* root is at level 0. */ - struct rcu_node *parent; -} ____cacheline_internodealigned_in_smp; - -/* Index values for nxttail array in struct rcu_data. */ -#define RCU_DONE_TAIL 0 /* Also RCU_WAIT head. */ -#define RCU_WAIT_TAIL 1 /* Also RCU_NEXT_READY head. */ -#define RCU_NEXT_READY_TAIL 2 /* Also RCU_NEXT head. */ -#define RCU_NEXT_TAIL 3 -#define RCU_NEXT_SIZE 4 - -/* Per-CPU data for read-copy update. */ -struct rcu_data { - /* 1) quiescent-state and grace-period handling : */ - long completed; /* Track rsp->completed gp number */ - /* in order to detect GP end. */ - long gpnum; /* Highest gp number that this CPU */ - /* is aware of having started. */ - long passed_quiesc_completed; - /* Value of completed at time of qs. */ - bool passed_quiesc; /* User-mode/idle loop etc. */ - bool qs_pending; /* Core waits for quiesc state. */ - bool beenonline; /* CPU online at least once. */ - struct rcu_node *mynode; /* This CPU's leaf of hierarchy */ - unsigned long grpmask; /* Mask to apply to leaf qsmask. */ - - /* 2) batch handling */ - /* - * If nxtlist is not NULL, it is partitioned as follows. - * Any of the partitions might be empty, in which case the - * pointer to that partition will be equal to the pointer for - * the following partition. When the list is empty, all of - * the nxttail elements point to nxtlist, which is NULL. - * - * [*nxttail[RCU_NEXT_READY_TAIL], NULL = *nxttail[RCU_NEXT_TAIL]): - * Entries that might have arrived after current GP ended - * [*nxttail[RCU_WAIT_TAIL], *nxttail[RCU_NEXT_READY_TAIL]): - * Entries known to have arrived before current GP ended - * [*nxttail[RCU_DONE_TAIL], *nxttail[RCU_WAIT_TAIL]): - * Entries that batch # <= ->completed - 1: waiting for current GP - * [nxtlist, *nxttail[RCU_DONE_TAIL]): - * Entries that batch # <= ->completed - * The grace period for these entries has completed, and - * the other grace-period-completed entries may be moved - * here temporarily in rcu_process_callbacks(). - */ - struct rcu_head *nxtlist; - struct rcu_head **nxttail[RCU_NEXT_SIZE]; - long qlen; /* # of queued callbacks */ - long blimit; /* Upper limit on a processed batch */ - -#ifdef CONFIG_NO_HZ - /* 3) dynticks interface. */ - struct rcu_dynticks *dynticks; /* Shared per-CPU dynticks state. */ - int dynticks_snap; /* Per-GP tracking for dynticks. */ - int dynticks_nmi_snap; /* Per-GP tracking for dynticks_nmi. */ -#endif /* #ifdef CONFIG_NO_HZ */ - - /* 4) reasons this CPU needed to be kicked by force_quiescent_state */ -#ifdef CONFIG_NO_HZ - unsigned long dynticks_fqs; /* Kicked due to dynticks idle. */ -#endif /* #ifdef CONFIG_NO_HZ */ - unsigned long offline_fqs; /* Kicked due to being offline. */ - unsigned long resched_ipi; /* Sent a resched IPI. */ - - /* 5) __rcu_pending() statistics. */ - long n_rcu_pending; /* rcu_pending() calls since boot. */ - long n_rp_qs_pending; - long n_rp_cb_ready; - long n_rp_cpu_needs_gp; - long n_rp_gp_completed; - long n_rp_gp_started; - long n_rp_need_fqs; - long n_rp_need_nothing; - - int cpu; -}; - -/* Values for signaled field in struct rcu_state. */ -#define RCU_GP_INIT 0 /* Grace period being initialized. */ -#define RCU_SAVE_DYNTICK 1 /* Need to scan dyntick state. */ -#define RCU_FORCE_QS 2 /* Need to force quiescent state. */ -#ifdef CONFIG_NO_HZ -#define RCU_SIGNAL_INIT RCU_SAVE_DYNTICK -#else /* #ifdef CONFIG_NO_HZ */ -#define RCU_SIGNAL_INIT RCU_FORCE_QS -#endif /* #else #ifdef CONFIG_NO_HZ */ - -#define RCU_JIFFIES_TILL_FORCE_QS 3 /* for rsp->jiffies_force_qs */ -#ifdef CONFIG_RCU_CPU_STALL_DETECTOR -#define RCU_SECONDS_TILL_STALL_CHECK (10 * HZ) /* for rsp->jiffies_stall */ -#define RCU_SECONDS_TILL_STALL_RECHECK (30 * HZ) /* for rsp->jiffies_stall */ -#define RCU_STALL_RAT_DELAY 2 /* Allow other CPUs time */ - /* to take at least one */ - /* scheduling clock irq */ - /* before ratting on them. */ - -#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ - -/* - * RCU global state, including node hierarchy. This hierarchy is - * represented in "heap" form in a dense array. The root (first level) - * of the hierarchy is in ->node[0] (referenced by ->level[0]), the second - * level in ->node[1] through ->node[m] (->node[1] referenced by ->level[1]), - * and the third level in ->node[m+1] and following (->node[m+1] referenced - * by ->level[2]). The number of levels is determined by the number of - * CPUs and by CONFIG_RCU_FANOUT. Small systems will have a "hierarchy" - * consisting of a single rcu_node. - */ -struct rcu_state { - struct rcu_node node[NUM_RCU_NODES]; /* Hierarchy. */ - struct rcu_node *level[NUM_RCU_LVLS]; /* Hierarchy levels. */ - u32 levelcnt[MAX_RCU_LVLS + 1]; /* # nodes in each level. */ - u8 levelspread[NUM_RCU_LVLS]; /* kids/node in each level. */ - struct rcu_data *rda[NR_CPUS]; /* array of rdp pointers. */ - - /* The following fields are guarded by the root rcu_node's lock. */ - - u8 signaled ____cacheline_internodealigned_in_smp; - /* Force QS state. */ - long gpnum; /* Current gp number. */ - long completed; /* # of last completed gp. */ - spinlock_t onofflock; /* exclude on/offline and */ - /* starting new GP. */ - spinlock_t fqslock; /* Only one task forcing */ - /* quiescent states. */ - unsigned long jiffies_force_qs; /* Time at which to invoke */ - /* force_quiescent_state(). */ - unsigned long n_force_qs; /* Number of calls to */ - /* force_quiescent_state(). */ - unsigned long n_force_qs_lh; /* ~Number of calls leaving */ - /* due to lock unavailable. */ - unsigned long n_force_qs_ngp; /* Number of calls leaving */ - /* due to no GP active. */ -#ifdef CONFIG_RCU_CPU_STALL_DETECTOR - unsigned long gp_start; /* Time at which GP started, */ - /* but in jiffies. */ - unsigned long jiffies_stall; /* Time at which to check */ - /* for CPU stalls. */ -#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ -#ifdef CONFIG_NO_HZ - long dynticks_completed; /* Value of completed @ snap. */ -#endif /* #ifdef CONFIG_NO_HZ */ -}; - extern void rcu_qsctr_inc(int cpu); extern void rcu_bh_qsctr_inc(int cpu); diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 75762cddbe03..a162f859dd32 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -46,6 +46,8 @@ #include #include +#include "rcutree.h" + #ifdef CONFIG_DEBUG_LOCK_ALLOC static struct lock_class_key rcu_lock_key; struct lockdep_map rcu_lock_map = diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 5e872bbf07f5..7cc830a1c44a 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -1,3 +1,239 @@ +/* + * Read-Copy Update mechanism for mutual exclusion (tree-based version) + * Internal non-public definitions. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright IBM Corporation, 2008 + * + * Author: Ingo Molnar + * Paul E. McKenney + */ + +#include +#include +#include +#include +#include + +/* + * Define shape of hierarchy based on NR_CPUS and CONFIG_RCU_FANOUT. + * In theory, it should be possible to add more levels straightforwardly. + * In practice, this has not been tested, so there is probably some + * bug somewhere. + */ +#define MAX_RCU_LVLS 3 +#define RCU_FANOUT (CONFIG_RCU_FANOUT) +#define RCU_FANOUT_SQ (RCU_FANOUT * RCU_FANOUT) +#define RCU_FANOUT_CUBE (RCU_FANOUT_SQ * RCU_FANOUT) + +#if NR_CPUS <= RCU_FANOUT +# define NUM_RCU_LVLS 1 +# define NUM_RCU_LVL_0 1 +# define NUM_RCU_LVL_1 (NR_CPUS) +# define NUM_RCU_LVL_2 0 +# define NUM_RCU_LVL_3 0 +#elif NR_CPUS <= RCU_FANOUT_SQ +# define NUM_RCU_LVLS 2 +# define NUM_RCU_LVL_0 1 +# define NUM_RCU_LVL_1 (((NR_CPUS) + RCU_FANOUT - 1) / RCU_FANOUT) +# define NUM_RCU_LVL_2 (NR_CPUS) +# define NUM_RCU_LVL_3 0 +#elif NR_CPUS <= RCU_FANOUT_CUBE +# define NUM_RCU_LVLS 3 +# define NUM_RCU_LVL_0 1 +# define NUM_RCU_LVL_1 (((NR_CPUS) + RCU_FANOUT_SQ - 1) / RCU_FANOUT_SQ) +# define NUM_RCU_LVL_2 (((NR_CPUS) + (RCU_FANOUT) - 1) / (RCU_FANOUT)) +# define NUM_RCU_LVL_3 NR_CPUS +#else +# error "CONFIG_RCU_FANOUT insufficient for NR_CPUS" +#endif /* #if (NR_CPUS) <= RCU_FANOUT */ + +#define RCU_SUM (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3) +#define NUM_RCU_NODES (RCU_SUM - NR_CPUS) + +/* + * Dynticks per-CPU state. + */ +struct rcu_dynticks { + int dynticks_nesting; /* Track nesting level, sort of. */ + int dynticks; /* Even value for dynticks-idle, else odd. */ + int dynticks_nmi; /* Even value for either dynticks-idle or */ + /* not in nmi handler, else odd. So this */ + /* remains even for nmi from irq handler. */ +}; + +/* + * Definition for node within the RCU grace-period-detection hierarchy. + */ +struct rcu_node { + spinlock_t lock; + unsigned long qsmask; /* CPUs or groups that need to switch in */ + /* order for current grace period to proceed.*/ + unsigned long qsmaskinit; + /* Per-GP initialization for qsmask. */ + unsigned long grpmask; /* Mask to apply to parent qsmask. */ + int grplo; /* lowest-numbered CPU or group here. */ + int grphi; /* highest-numbered CPU or group here. */ + u8 grpnum; /* CPU/group number for next level up. */ + u8 level; /* root is at level 0. */ + struct rcu_node *parent; +} ____cacheline_internodealigned_in_smp; + +/* Index values for nxttail array in struct rcu_data. */ +#define RCU_DONE_TAIL 0 /* Also RCU_WAIT head. */ +#define RCU_WAIT_TAIL 1 /* Also RCU_NEXT_READY head. */ +#define RCU_NEXT_READY_TAIL 2 /* Also RCU_NEXT head. */ +#define RCU_NEXT_TAIL 3 +#define RCU_NEXT_SIZE 4 + +/* Per-CPU data for read-copy update. */ +struct rcu_data { + /* 1) quiescent-state and grace-period handling : */ + long completed; /* Track rsp->completed gp number */ + /* in order to detect GP end. */ + long gpnum; /* Highest gp number that this CPU */ + /* is aware of having started. */ + long passed_quiesc_completed; + /* Value of completed at time of qs. */ + bool passed_quiesc; /* User-mode/idle loop etc. */ + bool qs_pending; /* Core waits for quiesc state. */ + bool beenonline; /* CPU online at least once. */ + struct rcu_node *mynode; /* This CPU's leaf of hierarchy */ + unsigned long grpmask; /* Mask to apply to leaf qsmask. */ + + /* 2) batch handling */ + /* + * If nxtlist is not NULL, it is partitioned as follows. + * Any of the partitions might be empty, in which case the + * pointer to that partition will be equal to the pointer for + * the following partition. When the list is empty, all of + * the nxttail elements point to nxtlist, which is NULL. + * + * [*nxttail[RCU_NEXT_READY_TAIL], NULL = *nxttail[RCU_NEXT_TAIL]): + * Entries that might have arrived after current GP ended + * [*nxttail[RCU_WAIT_TAIL], *nxttail[RCU_NEXT_READY_TAIL]): + * Entries known to have arrived before current GP ended + * [*nxttail[RCU_DONE_TAIL], *nxttail[RCU_WAIT_TAIL]): + * Entries that batch # <= ->completed - 1: waiting for current GP + * [nxtlist, *nxttail[RCU_DONE_TAIL]): + * Entries that batch # <= ->completed + * The grace period for these entries has completed, and + * the other grace-period-completed entries may be moved + * here temporarily in rcu_process_callbacks(). + */ + struct rcu_head *nxtlist; + struct rcu_head **nxttail[RCU_NEXT_SIZE]; + long qlen; /* # of queued callbacks */ + long blimit; /* Upper limit on a processed batch */ + +#ifdef CONFIG_NO_HZ + /* 3) dynticks interface. */ + struct rcu_dynticks *dynticks; /* Shared per-CPU dynticks state. */ + int dynticks_snap; /* Per-GP tracking for dynticks. */ + int dynticks_nmi_snap; /* Per-GP tracking for dynticks_nmi. */ +#endif /* #ifdef CONFIG_NO_HZ */ + + /* 4) reasons this CPU needed to be kicked by force_quiescent_state */ +#ifdef CONFIG_NO_HZ + unsigned long dynticks_fqs; /* Kicked due to dynticks idle. */ +#endif /* #ifdef CONFIG_NO_HZ */ + unsigned long offline_fqs; /* Kicked due to being offline. */ + unsigned long resched_ipi; /* Sent a resched IPI. */ + + /* 5) __rcu_pending() statistics. */ + long n_rcu_pending; /* rcu_pending() calls since boot. */ + long n_rp_qs_pending; + long n_rp_cb_ready; + long n_rp_cpu_needs_gp; + long n_rp_gp_completed; + long n_rp_gp_started; + long n_rp_need_fqs; + long n_rp_need_nothing; + + int cpu; +}; + +/* Values for signaled field in struct rcu_state. */ +#define RCU_GP_INIT 0 /* Grace period being initialized. */ +#define RCU_SAVE_DYNTICK 1 /* Need to scan dyntick state. */ +#define RCU_FORCE_QS 2 /* Need to force quiescent state. */ +#ifdef CONFIG_NO_HZ +#define RCU_SIGNAL_INIT RCU_SAVE_DYNTICK +#else /* #ifdef CONFIG_NO_HZ */ +#define RCU_SIGNAL_INIT RCU_FORCE_QS +#endif /* #else #ifdef CONFIG_NO_HZ */ + +#define RCU_JIFFIES_TILL_FORCE_QS 3 /* for rsp->jiffies_force_qs */ +#ifdef CONFIG_RCU_CPU_STALL_DETECTOR +#define RCU_SECONDS_TILL_STALL_CHECK (10 * HZ) /* for rsp->jiffies_stall */ +#define RCU_SECONDS_TILL_STALL_RECHECK (30 * HZ) /* for rsp->jiffies_stall */ +#define RCU_STALL_RAT_DELAY 2 /* Allow other CPUs time */ + /* to take at least one */ + /* scheduling clock irq */ + /* before ratting on them. */ + +#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ + +/* + * RCU global state, including node hierarchy. This hierarchy is + * represented in "heap" form in a dense array. The root (first level) + * of the hierarchy is in ->node[0] (referenced by ->level[0]), the second + * level in ->node[1] through ->node[m] (->node[1] referenced by ->level[1]), + * and the third level in ->node[m+1] and following (->node[m+1] referenced + * by ->level[2]). The number of levels is determined by the number of + * CPUs and by CONFIG_RCU_FANOUT. Small systems will have a "hierarchy" + * consisting of a single rcu_node. + */ +struct rcu_state { + struct rcu_node node[NUM_RCU_NODES]; /* Hierarchy. */ + struct rcu_node *level[NUM_RCU_LVLS]; /* Hierarchy levels. */ + u32 levelcnt[MAX_RCU_LVLS + 1]; /* # nodes in each level. */ + u8 levelspread[NUM_RCU_LVLS]; /* kids/node in each level. */ + struct rcu_data *rda[NR_CPUS]; /* array of rdp pointers. */ + + /* The following fields are guarded by the root rcu_node's lock. */ + + u8 signaled ____cacheline_internodealigned_in_smp; + /* Force QS state. */ + long gpnum; /* Current gp number. */ + long completed; /* # of last completed gp. */ + spinlock_t onofflock; /* exclude on/offline and */ + /* starting new GP. */ + spinlock_t fqslock; /* Only one task forcing */ + /* quiescent states. */ + unsigned long jiffies_force_qs; /* Time at which to invoke */ + /* force_quiescent_state(). */ + unsigned long n_force_qs; /* Number of calls to */ + /* force_quiescent_state(). */ + unsigned long n_force_qs_lh; /* ~Number of calls leaving */ + /* due to lock unavailable. */ + unsigned long n_force_qs_ngp; /* Number of calls leaving */ + /* due to no GP active. */ +#ifdef CONFIG_RCU_CPU_STALL_DETECTOR + unsigned long gp_start; /* Time at which GP started, */ + /* but in jiffies. */ + unsigned long jiffies_stall; /* Time at which to check */ + /* for CPU stalls. */ +#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ +#ifdef CONFIG_NO_HZ + long dynticks_completed; /* Value of completed @ snap. */ +#endif /* #ifdef CONFIG_NO_HZ */ +}; + +#ifdef RCU_TREE_NONCORE /* * RCU implementation internal declarations: @@ -8,3 +244,5 @@ DECLARE_PER_CPU(struct rcu_data, rcu_data); extern struct rcu_state rcu_bh_state; DECLARE_PER_CPU(struct rcu_data, rcu_bh_data); +#endif /* #ifdef RCU_TREE_NONCORE */ + diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c index fe1dcdbf1ca3..0cb52b887758 100644 --- a/kernel/rcutree_trace.c +++ b/kernel/rcutree_trace.c @@ -43,6 +43,7 @@ #include #include +#define RCU_TREE_NONCORE #include "rcutree.h" static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp) -- cgit v1.2.3 From d6714c22b43fbcbead7e7b706ff270e15f04a791 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 22 Aug 2009 13:56:46 -0700 Subject: rcu: Renamings to increase RCU clarity Make RCU-sched, RCU-bh, and RCU-preempt be underlying implementations, with "RCU" defined in terms of one of the three. Update the outdated rcu_qsctr_inc() names, as these functions no longer increment anything. Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: akpm@linux-foundation.org Cc: mathieu.desnoyers@polymtl.ca Cc: josht@linux.vnet.ibm.com Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org LKML-Reference: <12509746132696-git-send-email-> Signed-off-by: Ingo Molnar --- Documentation/RCU/trace.txt | 7 ++-- include/linux/rcupdate.h | 21 +++++++++--- include/linux/rcupreempt.h | 4 +-- include/linux/rcutree.h | 8 +++-- kernel/rcupreempt.c | 8 ++--- kernel/rcutree.c | 80 ++++++++++++++++++++++++++++----------------- kernel/rcutree.h | 4 +-- kernel/rcutree_trace.c | 20 ++++++------ kernel/sched.c | 2 +- kernel/softirq.c | 4 +-- 10 files changed, 95 insertions(+), 63 deletions(-) (limited to 'include/linux') diff --git a/Documentation/RCU/trace.txt b/Documentation/RCU/trace.txt index 02cced183b2d..187bbf10c923 100644 --- a/Documentation/RCU/trace.txt +++ b/Documentation/RCU/trace.txt @@ -191,8 +191,7 @@ rcu/rcuhier (which displays the struct rcu_node hierarchy). The output of "cat rcu/rcudata" looks as follows: -rcu: -rcu: +rcu_sched: 0 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=10951/1 dn=0 df=1101 of=0 ri=36 ql=0 b=10 1 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=16117/1 dn=0 df=1015 of=0 ri=0 ql=0 b=10 2 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=1445/1 dn=0 df=1839 of=0 ri=0 ql=0 b=10 @@ -306,7 +305,7 @@ comma-separated-variable spreadsheet format. The output of "cat rcu/rcugp" looks as follows: -rcu: completed=33062 gpnum=33063 +rcu_sched: completed=33062 gpnum=33063 rcu_bh: completed=464 gpnum=464 Again, this output is for both "rcu" and "rcu_bh". The fields are @@ -413,7 +412,7 @@ o Each element of the form "1/1 0:127 ^0" represents one struct The output of "cat rcu/rcu_pending" looks as follows: -rcu: +rcu_sched: 0 np=255892 qsp=53936 cbr=0 cng=14417 gpc=10033 gps=24320 nf=6445 nn=146741 1 np=261224 qsp=54638 cbr=0 cng=25723 gpc=16310 gps=2849 nf=5912 nn=155792 2 np=237496 qsp=49664 cbr=0 cng=2762 gpc=45478 gps=1762 nf=1201 nn=136629 diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 3c89d6a2591f..e920f0fd59d8 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -157,17 +157,28 @@ extern int rcu_scheduler_active; * - call_rcu_sched() and rcu_barrier_sched() * on the write-side to insure proper synchronization. */ -#define rcu_read_lock_sched() preempt_disable() -#define rcu_read_lock_sched_notrace() preempt_disable_notrace() +static inline void rcu_read_lock_sched(void) +{ + preempt_disable(); +} +static inline void rcu_read_lock_sched_notrace(void) +{ + preempt_disable_notrace(); +} /* * rcu_read_unlock_sched - marks the end of a RCU-classic critical section * * See rcu_read_lock_sched for more information. */ -#define rcu_read_unlock_sched() preempt_enable() -#define rcu_read_unlock_sched_notrace() preempt_enable_notrace() - +static inline void rcu_read_unlock_sched(void) +{ + preempt_enable(); +} +static inline void rcu_read_unlock_sched_notrace(void) +{ + preempt_enable_notrace(); +} /** diff --git a/include/linux/rcupreempt.h b/include/linux/rcupreempt.h index f164ac9b7807..2963f080e48d 100644 --- a/include/linux/rcupreempt.h +++ b/include/linux/rcupreempt.h @@ -40,8 +40,8 @@ #include #include -extern void rcu_qsctr_inc(int cpu); -static inline void rcu_bh_qsctr_inc(int cpu) { } +extern void rcu_sched_qs(int cpu); +static inline void rcu_bh_qs(int cpu) { } /* * Someone might want to pass call_rcu_bh as a function pointer. diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index e37d5e2a8353..a0852d0d915b 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -30,8 +30,8 @@ #ifndef __LINUX_RCUTREE_H #define __LINUX_RCUTREE_H -extern void rcu_qsctr_inc(int cpu); -extern void rcu_bh_qsctr_inc(int cpu); +extern void rcu_sched_qs(int cpu); +extern void rcu_bh_qs(int cpu); extern int rcu_pending(int cpu); extern int rcu_needs_cpu(int cpu); @@ -73,7 +73,8 @@ static inline void __rcu_read_unlock_bh(void) #define __synchronize_sched() synchronize_rcu() -#define call_rcu_sched(head, func) call_rcu(head, func) +extern void call_rcu_sched(struct rcu_head *head, + void (*func)(struct rcu_head *rcu)); static inline void synchronize_rcu_expedited(void) { @@ -91,6 +92,7 @@ extern void rcu_restart_cpu(int cpu); extern long rcu_batches_completed(void); extern long rcu_batches_completed_bh(void); +extern long rcu_batches_completed_sched(void); static inline void rcu_init_sched(void) { diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c index 510898a7bd69..7d777c9f394c 100644 --- a/kernel/rcupreempt.c +++ b/kernel/rcupreempt.c @@ -159,7 +159,7 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_dyntick_sched, rcu_dyntick_sched .dynticks = 1, }; -void rcu_qsctr_inc(int cpu) +void rcu_sched_qs(int cpu) { struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu); @@ -967,12 +967,12 @@ void rcu_check_callbacks(int cpu, int user) * If this CPU took its interrupt from user mode or from the * idle loop, and this is not a nested interrupt, then * this CPU has to have exited all prior preept-disable - * sections of code. So increment the counter to note this. + * sections of code. So invoke rcu_sched_qs() to note this. * * The memory barrier is needed to handle the case where * writes from a preempt-disable section of code get reordered * into schedule() by this CPU's write buffer. So the memory - * barrier makes sure that the rcu_qsctr_inc() is seen by other + * barrier makes sure that the rcu_sched_qs() is seen by other * CPUs to happen after any such write. */ @@ -980,7 +980,7 @@ void rcu_check_callbacks(int cpu, int user) (idle_cpu(cpu) && !in_softirq() && hardirq_count() <= (1 << HARDIRQ_SHIFT))) { smp_mb(); /* Guard against aggressive schedule(). */ - rcu_qsctr_inc(cpu); + rcu_sched_qs(cpu); } rcu_check_mb(cpu); diff --git a/kernel/rcutree.c b/kernel/rcutree.c index a162f859dd32..4d71d4e8b5a8 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -74,26 +74,25 @@ EXPORT_SYMBOL_GPL(rcu_lock_map); .n_force_qs_ngp = 0, \ } -struct rcu_state rcu_state = RCU_STATE_INITIALIZER(rcu_state); -DEFINE_PER_CPU(struct rcu_data, rcu_data); +struct rcu_state rcu_sched_state = RCU_STATE_INITIALIZER(rcu_sched_state); +DEFINE_PER_CPU(struct rcu_data, rcu_sched_data); struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state); DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); /* - * Increment the quiescent state counter. - * The counter is a bit degenerated: We do not need to know + * Note a quiescent state. Because we do not need to know * how many quiescent states passed, just if there was at least - * one since the start of the grace period. Thus just a flag. + * one since the start of the grace period, this just sets a flag. */ -void rcu_qsctr_inc(int cpu) +void rcu_sched_qs(int cpu) { - struct rcu_data *rdp = &per_cpu(rcu_data, cpu); + struct rcu_data *rdp = &per_cpu(rcu_sched_data, cpu); rdp->passed_quiesc = 1; rdp->passed_quiesc_completed = rdp->completed; } -void rcu_bh_qsctr_inc(int cpu) +void rcu_bh_qs(int cpu) { struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu); rdp->passed_quiesc = 1; @@ -113,12 +112,22 @@ static int qlowmark = 100; /* Once only this many pending, use blimit. */ static void force_quiescent_state(struct rcu_state *rsp, int relaxed); +/* + * Return the number of RCU-sched batches processed thus far for debug & stats. + */ +long rcu_batches_completed_sched(void) +{ + return rcu_sched_state.completed; +} +EXPORT_SYMBOL_GPL(rcu_batches_completed_sched); + /* * Return the number of RCU batches processed thus far for debug & stats. + * @@@ placeholder, maps to rcu_batches_completed_sched(). */ long rcu_batches_completed(void) { - return rcu_state.completed; + return rcu_batches_completed_sched(); } EXPORT_SYMBOL_GPL(rcu_batches_completed); @@ -310,7 +319,7 @@ void rcu_irq_exit(void) WARN_ON_RATELIMIT(rdtp->dynticks & 0x1, &rcu_rs); /* If the interrupt queued a callback, get out of dyntick mode. */ - if (__get_cpu_var(rcu_data).nxtlist || + if (__get_cpu_var(rcu_sched_data).nxtlist || __get_cpu_var(rcu_bh_data).nxtlist) set_need_resched(); } @@ -847,7 +856,7 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) /* * Move callbacks from the outgoing CPU to the running CPU. * Note that the outgoing CPU is now quiscent, so it is now - * (uncharacteristically) safe to access it rcu_data structure. + * (uncharacteristically) safe to access its rcu_data structure. * Note also that we must carefully retain the order of the * outgoing CPU's callbacks in order for rcu_barrier() to work * correctly. Finally, note that we start all the callbacks @@ -878,7 +887,7 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) */ static void rcu_offline_cpu(int cpu) { - __rcu_offline_cpu(cpu, &rcu_state); + __rcu_offline_cpu(cpu, &rcu_sched_state); __rcu_offline_cpu(cpu, &rcu_bh_state); } @@ -973,17 +982,16 @@ void rcu_check_callbacks(int cpu, int user) * Get here if this CPU took its interrupt from user * mode or from the idle loop, and if this is not a * nested interrupt. In this case, the CPU is in - * a quiescent state, so count it. + * a quiescent state, so note it. * * No memory barrier is required here because both - * rcu_qsctr_inc() and rcu_bh_qsctr_inc() reference - * only CPU-local variables that other CPUs neither - * access nor modify, at least not while the corresponding - * CPU is online. + * rcu_sched_qs() and rcu_bh_qs() reference only CPU-local + * variables that other CPUs neither access nor modify, + * at least not while the corresponding CPU is online. */ - rcu_qsctr_inc(cpu); - rcu_bh_qsctr_inc(cpu); + rcu_sched_qs(cpu); + rcu_bh_qs(cpu); } else if (!in_softirq()) { @@ -991,10 +999,10 @@ void rcu_check_callbacks(int cpu, int user) * Get here if this CPU did not take its interrupt from * softirq, in other words, if it is not interrupting * a rcu_bh read-side critical section. This is an _bh - * critical section, so count it. + * critical section, so note it. */ - rcu_bh_qsctr_inc(cpu); + rcu_bh_qs(cpu); } raise_softirq(RCU_SOFTIRQ); } @@ -1174,7 +1182,8 @@ static void rcu_process_callbacks(struct softirq_action *unused) */ smp_mb(); /* See above block comment. */ - __rcu_process_callbacks(&rcu_state, &__get_cpu_var(rcu_data)); + __rcu_process_callbacks(&rcu_sched_state, + &__get_cpu_var(rcu_sched_data)); __rcu_process_callbacks(&rcu_bh_state, &__get_cpu_var(rcu_bh_data)); /* @@ -1231,14 +1240,25 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), } /* - * Queue an RCU callback for invocation after a grace period. + * Queue an RCU-sched callback for invocation after a grace period. + */ +void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) +{ + __call_rcu(head, func, &rcu_sched_state); +} +EXPORT_SYMBOL_GPL(call_rcu_sched); + +/* + * @@@ Queue an RCU callback for invocation after a grace period. + * @@@ Placeholder pending rcutree_plugin.h. */ void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) { - __call_rcu(head, func, &rcu_state); + call_rcu_sched(head, func); } EXPORT_SYMBOL_GPL(call_rcu); + /* * Queue an RCU for invocation after a quicker grace period. */ @@ -1311,7 +1331,7 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp) */ int rcu_pending(int cpu) { - return __rcu_pending(&rcu_state, &per_cpu(rcu_data, cpu)) || + return __rcu_pending(&rcu_sched_state, &per_cpu(rcu_sched_data, cpu)) || __rcu_pending(&rcu_bh_state, &per_cpu(rcu_bh_data, cpu)); } @@ -1324,7 +1344,7 @@ int rcu_pending(int cpu) int rcu_needs_cpu(int cpu) { /* RCU callbacks either ready or pending? */ - return per_cpu(rcu_data, cpu).nxtlist || + return per_cpu(rcu_sched_data, cpu).nxtlist || per_cpu(rcu_bh_data, cpu).nxtlist; } @@ -1418,7 +1438,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp) static void __cpuinit rcu_online_cpu(int cpu) { - rcu_init_percpu_data(cpu, &rcu_state); + rcu_init_percpu_data(cpu, &rcu_sched_state); rcu_init_percpu_data(cpu, &rcu_bh_state); } @@ -1545,10 +1565,10 @@ void __init __rcu_init(void) #ifdef CONFIG_RCU_CPU_STALL_DETECTOR printk(KERN_INFO "RCU-based detection of stalled CPUs is enabled.\n"); #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ - rcu_init_one(&rcu_state); - RCU_DATA_PTR_INIT(&rcu_state, rcu_data); + rcu_init_one(&rcu_sched_state); + RCU_DATA_PTR_INIT(&rcu_sched_state, rcu_sched_data); for_each_possible_cpu(i) - rcu_boot_init_percpu_data(i, &rcu_state); + rcu_boot_init_percpu_data(i, &rcu_sched_state); rcu_init_one(&rcu_bh_state); RCU_DATA_PTR_INIT(&rcu_bh_state, rcu_bh_data); for_each_possible_cpu(i) diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 7cc830a1c44a..0024e5ddcc68 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -238,8 +238,8 @@ struct rcu_state { /* * RCU implementation internal declarations: */ -extern struct rcu_state rcu_state; -DECLARE_PER_CPU(struct rcu_data, rcu_data); +extern struct rcu_state rcu_sched_state; +DECLARE_PER_CPU(struct rcu_data, rcu_sched_data); extern struct rcu_state rcu_bh_state; DECLARE_PER_CPU(struct rcu_data, rcu_bh_data); diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c index 0cb52b887758..236c0504fee2 100644 --- a/kernel/rcutree_trace.c +++ b/kernel/rcutree_trace.c @@ -77,8 +77,8 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp) static int show_rcudata(struct seq_file *m, void *unused) { - seq_puts(m, "rcu:\n"); - PRINT_RCU_DATA(rcu_data, print_one_rcu_data, m); + seq_puts(m, "rcu_sched:\n"); + PRINT_RCU_DATA(rcu_sched_data, print_one_rcu_data, m); seq_puts(m, "rcu_bh:\n"); PRINT_RCU_DATA(rcu_bh_data, print_one_rcu_data, m); return 0; @@ -125,8 +125,8 @@ static int show_rcudata_csv(struct seq_file *m, void *unused) seq_puts(m, "\"dt\",\"dt nesting\",\"dn\",\"df\","); #endif /* #ifdef CONFIG_NO_HZ */ seq_puts(m, "\"of\",\"ri\",\"ql\",\"b\"\n"); - seq_puts(m, "\"rcu:\"\n"); - PRINT_RCU_DATA(rcu_data, print_one_rcu_data_csv, m); + seq_puts(m, "\"rcu_sched:\"\n"); + PRINT_RCU_DATA(rcu_sched_data, print_one_rcu_data_csv, m); seq_puts(m, "\"rcu_bh:\"\n"); PRINT_RCU_DATA(rcu_bh_data, print_one_rcu_data_csv, m); return 0; @@ -172,8 +172,8 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp) static int show_rcuhier(struct seq_file *m, void *unused) { - seq_puts(m, "rcu:\n"); - print_one_rcu_state(m, &rcu_state); + seq_puts(m, "rcu_sched:\n"); + print_one_rcu_state(m, &rcu_sched_state); seq_puts(m, "rcu_bh:\n"); print_one_rcu_state(m, &rcu_bh_state); return 0; @@ -194,8 +194,8 @@ static struct file_operations rcuhier_fops = { static int show_rcugp(struct seq_file *m, void *unused) { - seq_printf(m, "rcu: completed=%ld gpnum=%ld\n", - rcu_state.completed, rcu_state.gpnum); + seq_printf(m, "rcu_sched: completed=%ld gpnum=%ld\n", + rcu_sched_state.completed, rcu_sched_state.gpnum); seq_printf(m, "rcu_bh: completed=%ld gpnum=%ld\n", rcu_bh_state.completed, rcu_bh_state.gpnum); return 0; @@ -244,8 +244,8 @@ static void print_rcu_pendings(struct seq_file *m, struct rcu_state *rsp) static int show_rcu_pending(struct seq_file *m, void *unused) { - seq_puts(m, "rcu:\n"); - print_rcu_pendings(m, &rcu_state); + seq_puts(m, "rcu_sched:\n"); + print_rcu_pendings(m, &rcu_sched_state); seq_puts(m, "rcu_bh:\n"); print_rcu_pendings(m, &rcu_bh_state); return 0; diff --git a/kernel/sched.c b/kernel/sched.c index cda8b81f8801..c9beca67a53e 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -5325,7 +5325,7 @@ need_resched: preempt_disable(); cpu = smp_processor_id(); rq = cpu_rq(cpu); - rcu_qsctr_inc(cpu); + rcu_sched_qs(cpu); prev = rq->curr; switch_count = &prev->nivcsw; diff --git a/kernel/softirq.c b/kernel/softirq.c index eb5e131a0485..7db25067cd2d 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -227,7 +227,7 @@ restart: preempt_count() = prev_count; } - rcu_bh_qsctr_inc(cpu); + rcu_bh_qs(cpu); } h++; pending >>= 1; @@ -721,7 +721,7 @@ static int ksoftirqd(void * __bind_cpu) preempt_enable_no_resched(); cond_resched(); preempt_disable(); - rcu_qsctr_inc((long)__bind_cpu); + rcu_sched_qs((long)__bind_cpu); } preempt_enable(); set_current_state(TASK_INTERRUPTIBLE); -- cgit v1.2.3 From bc33f24bdca8b6e97376e3a182ab69e6cdefa989 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 22 Aug 2009 13:56:47 -0700 Subject: rcu: Consolidate sparse and lockdep declarations in include/linux/rcupdate.h Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: akpm@linux-foundation.org Cc: mathieu.desnoyers@polymtl.ca Cc: josht@linux.vnet.ibm.com Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org LKML-Reference: <12509746132349-git-send-email-> Signed-off-by: Ingo Molnar --- include/linux/rcupdate.h | 46 ++++++++++++++++++++++++++++++++++++++++++---- include/linux/rcupreempt.h | 4 ++-- include/linux/rcutree.h | 18 ------------------ 3 files changed, 44 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index e920f0fd59d8..9d85ee19492a 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -80,6 +80,16 @@ extern int rcu_scheduler_active; (ptr)->next = NULL; (ptr)->func = NULL; \ } while (0) +#ifdef CONFIG_DEBUG_LOCK_ALLOC +extern struct lockdep_map rcu_lock_map; +# define rcu_read_acquire() \ + lock_acquire(&rcu_lock_map, 0, 0, 2, 1, NULL, _THIS_IP_) +# define rcu_read_release() lock_release(&rcu_lock_map, 1, _THIS_IP_) +#else +# define rcu_read_acquire() do { } while (0) +# define rcu_read_release() do { } while (0) +#endif + /** * rcu_read_lock - mark the beginning of an RCU read-side critical section. * @@ -109,7 +119,12 @@ extern int rcu_scheduler_active; * * It is illegal to block while in an RCU read-side critical section. */ -#define rcu_read_lock() __rcu_read_lock() +static inline void rcu_read_lock(void) +{ + __rcu_read_lock(); + __acquire(RCU); + rcu_read_acquire(); +} /** * rcu_read_unlock - marks the end of an RCU read-side critical section. @@ -126,7 +141,12 @@ extern int rcu_scheduler_active; * used as well. RCU does not care how the writers keep out of each * others' way, as long as they do so. */ -#define rcu_read_unlock() __rcu_read_unlock() +static inline void rcu_read_unlock(void) +{ + rcu_read_release(); + __release(RCU); + __rcu_read_unlock(); +} /** * rcu_read_lock_bh - mark the beginning of a softirq-only RCU critical section @@ -139,14 +159,24 @@ extern int rcu_scheduler_active; * can use just rcu_read_lock(). * */ -#define rcu_read_lock_bh() __rcu_read_lock_bh() +static inline void rcu_read_lock_bh(void) +{ + __rcu_read_lock_bh(); + __acquire(RCU_BH); + rcu_read_acquire(); +} /* * rcu_read_unlock_bh - marks the end of a softirq-only RCU critical section * * See rcu_read_lock_bh() for more information. */ -#define rcu_read_unlock_bh() __rcu_read_unlock_bh() +static inline void rcu_read_unlock_bh(void) +{ + rcu_read_release(); + __release(RCU_BH); + __rcu_read_unlock_bh(); +} /** * rcu_read_lock_sched - mark the beginning of a RCU-classic critical section @@ -160,10 +190,14 @@ extern int rcu_scheduler_active; static inline void rcu_read_lock_sched(void) { preempt_disable(); + __acquire(RCU_SCHED); + rcu_read_acquire(); } static inline void rcu_read_lock_sched_notrace(void) { preempt_disable_notrace(); + __acquire(RCU_SCHED); + rcu_read_acquire(); } /* @@ -173,10 +207,14 @@ static inline void rcu_read_lock_sched_notrace(void) */ static inline void rcu_read_unlock_sched(void) { + rcu_read_release(); + __release(RCU_SCHED); preempt_enable(); } static inline void rcu_read_unlock_sched_notrace(void) { + rcu_read_release(); + __release(RCU_SCHED); preempt_enable_notrace(); } diff --git a/include/linux/rcupreempt.h b/include/linux/rcupreempt.h index 2963f080e48d..6c9dd9cf8b8e 100644 --- a/include/linux/rcupreempt.h +++ b/include/linux/rcupreempt.h @@ -64,8 +64,8 @@ static inline void rcu_bh_qs(int cpu) { } extern void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *head)); -extern void __rcu_read_lock(void) __acquires(RCU); -extern void __rcu_read_unlock(void) __releases(RCU); +extern void __rcu_read_lock(void); +extern void __rcu_read_unlock(void); extern int rcu_pending(int cpu); extern int rcu_needs_cpu(int cpu); diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index a0852d0d915b..8a0222ce3b13 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -36,38 +36,20 @@ extern void rcu_bh_qs(int cpu); extern int rcu_pending(int cpu); extern int rcu_needs_cpu(int cpu); -#ifdef CONFIG_DEBUG_LOCK_ALLOC -extern struct lockdep_map rcu_lock_map; -# define rcu_read_acquire() \ - lock_acquire(&rcu_lock_map, 0, 0, 2, 1, NULL, _THIS_IP_) -# define rcu_read_release() lock_release(&rcu_lock_map, 1, _THIS_IP_) -#else -# define rcu_read_acquire() do { } while (0) -# define rcu_read_release() do { } while (0) -#endif - static inline void __rcu_read_lock(void) { preempt_disable(); - __acquire(RCU); - rcu_read_acquire(); } static inline void __rcu_read_unlock(void) { - rcu_read_release(); - __release(RCU); preempt_enable(); } static inline void __rcu_read_lock_bh(void) { local_bh_disable(); - __acquire(RCU_BH); - rcu_read_acquire(); } static inline void __rcu_read_unlock_bh(void) { - rcu_read_release(); - __release(RCU_BH); local_bh_enable(); } -- cgit v1.2.3 From a157229cabd6dd8cfa82525fc9bf730c94cc9ac2 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 22 Aug 2009 13:56:51 -0700 Subject: rcu: Simplify rcu_pending()/rcu_check_callbacks() API All calls from outside RCU are of the form: if (rcu_pending(cpu)) rcu_check_callbacks(cpu, user); This is silly, instead we put a call to rcu_pending() in rcu_check_callbacks(), and then make the outside calls be to rcu_check_callbacks(). This cuts down on the code a bit and also gives the compiler a better chance of optimizing. Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: akpm@linux-foundation.org Cc: mathieu.desnoyers@polymtl.ca Cc: josht@linux.vnet.ibm.com Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org LKML-Reference: <125097461311-git-send-email-> Signed-off-by: Ingo Molnar --- arch/ia64/xen/time.c | 3 +-- include/linux/rcupreempt.h | 1 - include/linux/rcutree.h | 1 - kernel/rcupreempt.c | 10 ++++++++-- kernel/rcutree.c | 5 ++++- kernel/timer.c | 3 +-- 6 files changed, 14 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/arch/ia64/xen/time.c b/arch/ia64/xen/time.c index fb8332690179..dbeadb9c8e20 100644 --- a/arch/ia64/xen/time.c +++ b/arch/ia64/xen/time.c @@ -133,8 +133,7 @@ consider_steal_time(unsigned long new_itm) account_idle_ticks(blocked); run_local_timers(); - if (rcu_pending(cpu)) - rcu_check_callbacks(cpu, user_mode(get_irq_regs())); + rcu_check_callbacks(cpu, user_mode(get_irq_regs())); scheduler_tick(); run_posix_cpu_timers(p); diff --git a/include/linux/rcupreempt.h b/include/linux/rcupreempt.h index 6c9dd9cf8b8e..aff4772fb49e 100644 --- a/include/linux/rcupreempt.h +++ b/include/linux/rcupreempt.h @@ -66,7 +66,6 @@ extern void call_rcu_sched(struct rcu_head *head, extern void __rcu_read_lock(void); extern void __rcu_read_unlock(void); -extern int rcu_pending(int cpu); extern int rcu_needs_cpu(int cpu); #define __rcu_read_lock_bh() { rcu_read_lock(); local_bh_disable(); } diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 8a0222ce3b13..c739d90f5e68 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -33,7 +33,6 @@ extern void rcu_sched_qs(int cpu); extern void rcu_bh_qs(int cpu); -extern int rcu_pending(int cpu); extern int rcu_needs_cpu(int cpu); static inline void __rcu_read_lock(void) diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c index 7d777c9f394c..0053ce56e326 100644 --- a/kernel/rcupreempt.c +++ b/kernel/rcupreempt.c @@ -159,6 +159,8 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_dyntick_sched, rcu_dyntick_sched .dynticks = 1, }; +static int rcu_pending(int cpu); + void rcu_sched_qs(int cpu) { struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu); @@ -961,7 +963,10 @@ static void rcu_check_mb(int cpu) void rcu_check_callbacks(int cpu, int user) { unsigned long flags; - struct rcu_data *rdp = RCU_DATA_CPU(cpu); + struct rcu_data *rdp; + + if (!rcu_pending(cpu)) + return; /* if nothing for RCU to do. */ /* * If this CPU took its interrupt from user mode or from the @@ -976,6 +981,7 @@ void rcu_check_callbacks(int cpu, int user) * CPUs to happen after any such write. */ + rdp = RCU_DATA_CPU(cpu); if (user || (idle_cpu(cpu) && !in_softirq() && hardirq_count() <= (1 << HARDIRQ_SHIFT))) { @@ -1382,7 +1388,7 @@ int rcu_needs_cpu(int cpu) rdp->waitschedlist != NULL); } -int rcu_pending(int cpu) +static int rcu_pending(int cpu) { struct rcu_data *rdp = RCU_DATA_CPU(cpu); diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 7c515082ae84..4ce3adcfa94d 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -111,6 +111,7 @@ static int qhimark = 10000; /* If this many pending, ignore blimit. */ static int qlowmark = 100; /* Once only this many pending, use blimit. */ static void force_quiescent_state(struct rcu_state *rsp, int relaxed); +static int rcu_pending(int cpu); /* * Return the number of RCU-sched batches processed thus far for debug & stats. @@ -974,6 +975,8 @@ static void rcu_do_batch(struct rcu_data *rdp) */ void rcu_check_callbacks(int cpu, int user) { + if (!rcu_pending(cpu)) + return; /* if nothing for RCU to do. */ if (user || (idle_cpu(cpu) && rcu_scheduler_active && !in_softirq() && hardirq_count() <= (1 << HARDIRQ_SHIFT))) { @@ -1329,7 +1332,7 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp) * by the current CPU, returning 1 if so. This function is part of the * RCU implementation; it is -not- an exported member of the RCU API. */ -int rcu_pending(int cpu) +static int rcu_pending(int cpu) { return __rcu_pending(&rcu_sched_state, &per_cpu(rcu_sched_data, cpu)) || __rcu_pending(&rcu_bh_state, &per_cpu(rcu_bh_data, cpu)); diff --git a/kernel/timer.c b/kernel/timer.c index a7f07d5a6241..a3d25f415019 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1156,8 +1156,7 @@ void update_process_times(int user_tick) /* Note: this timer irq context must be accounted for as well. */ account_process_tick(p, user_tick); run_local_timers(); - if (rcu_pending(cpu)) - rcu_check_callbacks(cpu, user_tick); + rcu_check_callbacks(cpu, user_tick); printk_tick(); scheduler_tick(); run_posix_cpu_timers(p); -- cgit v1.2.3 From f41d911f8c49a5d65c86504c19e8204bb605c4fd Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 22 Aug 2009 13:56:52 -0700 Subject: rcu: Merge preemptable-RCU functionality into hierarchical RCU Create a kernel/rcutree_plugin.h file that contains definitions for preemptable RCU (or, under the #else branch of the #ifdef, empty definitions for the classic non-preemptable semantics). These definitions fit into plugins defined in kernel/rcutree.c for this purpose. This variant of preemptable RCU uses a new algorithm whose read-side expense is roughly that of classic hierarchical RCU under CONFIG_PREEMPT. This new algorithm's update-side expense is similar to that of classic hierarchical RCU, and, in absence of read-side preemption or blocking, is exactly that of classic hierarchical RCU. Perhaps more important, this new algorithm has a much simpler implementation, saving well over 1,000 lines of code compared to mainline's implementation of preemptable RCU, which will hopefully be retired in favor of this new algorithm. The simplifications are obtained by maintaining per-task nesting state for running tasks, and using a simple lock-protected algorithm to handle accounting when tasks block within RCU read-side critical sections, making use of lessons learned while creating numerous user-level RCU implementations over the past 18 months. Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: akpm@linux-foundation.org Cc: mathieu.desnoyers@polymtl.ca Cc: josht@linux.vnet.ibm.com Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org LKML-Reference: <12509746134003-git-send-email-> Signed-off-by: Ingo Molnar --- include/linux/init_task.h | 15 ++ include/linux/rcupdate.h | 2 +- include/linux/rcupreempt.h | 4 + include/linux/rcutree.h | 16 ++ include/linux/sched.h | 37 ++++ init/Kconfig | 22 ++- kernel/Makefile | 1 + kernel/exit.c | 1 + kernel/fork.c | 5 +- kernel/rcutree.c | 135 +++++++++----- kernel/rcutree.h | 9 + kernel/rcutree_plugin.h | 447 +++++++++++++++++++++++++++++++++++++++++++++ kernel/rcutree_trace.c | 20 ++ lib/Kconfig.debug | 2 +- 14 files changed, 661 insertions(+), 55 deletions(-) create mode 100644 kernel/rcutree_plugin.h (limited to 'include/linux') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 7fc01b13be43..971a968831bf 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -94,6 +94,20 @@ extern struct group_info init_groups; # define CAP_INIT_BSET CAP_INIT_EFF_SET #endif +#ifdef CONFIG_PREEMPT_RCU +#define INIT_TASK_RCU_PREEMPT(tsk) \ + .rcu_read_lock_nesting = 0, \ + .rcu_flipctr_idx = 0, +#elif defined(CONFIG_TREE_PREEMPT_RCU) +#define INIT_TASK_RCU_PREEMPT(tsk) \ + .rcu_read_lock_nesting = 0, \ + .rcu_read_unlock_special = 0, \ + .rcu_blocked_cpu = -1, \ + .rcu_node_entry = LIST_HEAD_INIT(tsk.rcu_node_entry), +#else +#define INIT_TASK_RCU_PREEMPT(tsk) +#endif + extern struct cred init_cred; #ifdef CONFIG_PERF_COUNTERS @@ -173,6 +187,7 @@ extern struct cred init_cred; INIT_LOCKDEP \ INIT_FTRACE_GRAPH \ INIT_TRACE_RECURSION \ + INIT_TASK_RCU_PREEMPT(tsk) \ } diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 9d85ee19492a..26892f5e7bd8 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -66,7 +66,7 @@ extern void rcu_scheduler_starting(void); extern int rcu_needs_cpu(int cpu); extern int rcu_scheduler_active; -#if defined(CONFIG_TREE_RCU) +#if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) #include #elif defined(CONFIG_PREEMPT_RCU) #include diff --git a/include/linux/rcupreempt.h b/include/linux/rcupreempt.h index aff4772fb49e..a42ab88e9210 100644 --- a/include/linux/rcupreempt.h +++ b/include/linux/rcupreempt.h @@ -98,6 +98,10 @@ static inline long rcu_batches_completed_bh(void) return rcu_batches_completed(); } +static inline void exit_rcu(void) +{ +} + #ifdef CONFIG_RCU_TRACE struct rcupreempt_trace; extern long *rcupreempt_flipctr(int cpu); diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index c739d90f5e68..a89307717825 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -35,14 +35,30 @@ extern void rcu_bh_qs(int cpu); extern int rcu_needs_cpu(int cpu); +#ifdef CONFIG_TREE_PREEMPT_RCU + +extern void __rcu_read_lock(void); +extern void __rcu_read_unlock(void); +extern void exit_rcu(void); + +#else /* #ifdef CONFIG_TREE_PREEMPT_RCU */ + static inline void __rcu_read_lock(void) { preempt_disable(); } + static inline void __rcu_read_unlock(void) { preempt_enable(); } + +static inline void exit_rcu(void) +{ +} + +#endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */ + static inline void __rcu_read_lock_bh(void) { local_bh_disable(); diff --git a/include/linux/sched.h b/include/linux/sched.h index 3ab08e4bb6b8..d7f98f637a2a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1210,6 +1210,13 @@ struct task_struct { int rcu_flipctr_idx; #endif /* #ifdef CONFIG_PREEMPT_RCU */ +#ifdef CONFIG_TREE_PREEMPT_RCU + int rcu_read_lock_nesting; + char rcu_read_unlock_special; + int rcu_blocked_cpu; + struct list_head rcu_node_entry; +#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ + #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) struct sched_info sched_info; #endif @@ -1723,6 +1730,36 @@ extern cputime_t task_gtime(struct task_struct *p); #define tsk_used_math(p) ((p)->flags & PF_USED_MATH) #define used_math() tsk_used_math(current) +#ifdef CONFIG_TREE_PREEMPT_RCU + +#define RCU_READ_UNLOCK_BLOCKED (1 << 0) /* blocked while in RCU read-side. */ +#define RCU_READ_UNLOCK_NEED_QS (1 << 1) /* RCU core needs CPU response. */ +#define RCU_READ_UNLOCK_GOT_QS (1 << 2) /* CPU has responded to RCU core. */ + +static inline void rcu_copy_process(struct task_struct *p) +{ + p->rcu_read_lock_nesting = 0; + p->rcu_read_unlock_special = 0; + p->rcu_blocked_cpu = -1; + INIT_LIST_HEAD(&p->rcu_node_entry); +} + +#elif defined(CONFIG_PREEMPT_RCU) + +static inline void rcu_copy_process(struct task_struct *p) +{ + p->rcu_read_lock_nesting = 0; + p->rcu_flipctr_idx = 0; +} + +#else + +static inline void rcu_copy_process(struct task_struct *p) +{ +} + +#endif + #ifdef CONFIG_SMP extern int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask); diff --git a/init/Kconfig b/init/Kconfig index 25373cf32672..f88da2d1c1fb 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -335,11 +335,20 @@ config PREEMPT_RCU now-naive assumptions about each RCU read-side critical section remaining on a given CPU through its execution. +config TREE_PREEMPT_RCU + bool "Preemptable tree-based hierarchical RCU" + depends on PREEMPT + help + This option selects the RCU implementation that is + designed for very large SMP systems with hundreds or + thousands of CPUs, but for which real-time response + is also required. + endchoice config RCU_TRACE bool "Enable tracing for RCU" - depends on TREE_RCU || PREEMPT_RCU + depends on TREE_RCU || PREEMPT_RCU || TREE_PREEMPT_RCU help This option provides tracing in RCU which presents stats in debugfs for debugging RCU implementation. @@ -351,7 +360,7 @@ config RCU_FANOUT int "Tree-based hierarchical RCU fanout value" range 2 64 if 64BIT range 2 32 if !64BIT - depends on TREE_RCU + depends on TREE_RCU || TREE_PREEMPT_RCU default 64 if 64BIT default 32 if !64BIT help @@ -366,7 +375,7 @@ config RCU_FANOUT config RCU_FANOUT_EXACT bool "Disable tree-based hierarchical RCU auto-balancing" - depends on TREE_RCU + depends on TREE_RCU || TREE_PREEMPT_RCU default n help This option forces use of the exact RCU_FANOUT value specified, @@ -379,11 +388,12 @@ config RCU_FANOUT_EXACT Say N if unsure. config TREE_RCU_TRACE - def_bool RCU_TRACE && TREE_RCU + def_bool RCU_TRACE && ( TREE_RCU || TREE_PREEMPT_RCU ) select DEBUG_FS help - This option provides tracing for the TREE_RCU implementation, - permitting Makefile to trivially select kernel/rcutree_trace.c. + This option provides tracing for the TREE_RCU and + TREE_PREEMPT_RCU implementations, permitting Makefile to + trivially select kernel/rcutree_trace.c. config PREEMPT_RCU_TRACE def_bool RCU_TRACE && PREEMPT_RCU diff --git a/kernel/Makefile b/kernel/Makefile index 2419c9d43918..1a38b4789dda 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -81,6 +81,7 @@ obj-$(CONFIG_GENERIC_HARDIRQS) += irq/ obj-$(CONFIG_SECCOMP) += seccomp.o obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o obj-$(CONFIG_TREE_RCU) += rcutree.o +obj-$(CONFIG_TREE_PREEMPT_RCU) += rcutree.o obj-$(CONFIG_PREEMPT_RCU) += rcupreempt.o obj-$(CONFIG_TREE_RCU_TRACE) += rcutree_trace.o obj-$(CONFIG_PREEMPT_RCU_TRACE) += rcupreempt_trace.o diff --git a/kernel/exit.c b/kernel/exit.c index 869dc221733e..263f95ed7201 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1010,6 +1010,7 @@ NORET_TYPE void do_exit(long code) __free_pipe_info(tsk->splice_pipe); preempt_disable(); + exit_rcu(); /* causes final put_task_struct in finish_task_switch(). */ tsk->state = TASK_DEAD; schedule(); diff --git a/kernel/fork.c b/kernel/fork.c index 021e1138556e..642e8b5edf00 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1022,10 +1022,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, copy_flags(clone_flags, p); INIT_LIST_HEAD(&p->children); INIT_LIST_HEAD(&p->sibling); -#ifdef CONFIG_PREEMPT_RCU - p->rcu_read_lock_nesting = 0; - p->rcu_flipctr_idx = 0; -#endif /* #ifdef CONFIG_PREEMPT_RCU */ + rcu_copy_process(p); p->vfork_done = NULL; spin_lock_init(&p->alloc_lock); diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 4ce3adcfa94d..cc0255714075 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -80,6 +80,21 @@ DEFINE_PER_CPU(struct rcu_data, rcu_sched_data); struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state); DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); +extern long rcu_batches_completed_sched(void); +static void cpu_quiet_msk(unsigned long mask, struct rcu_state *rsp, + struct rcu_node *rnp, unsigned long flags); +static void cpu_quiet_msk_finish(struct rcu_state *rsp, unsigned long flags); +static void __rcu_process_callbacks(struct rcu_state *rsp, + struct rcu_data *rdp); +static void __call_rcu(struct rcu_head *head, + void (*func)(struct rcu_head *rcu), + struct rcu_state *rsp); +static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp); +static void __cpuinit rcu_init_percpu_data(int cpu, struct rcu_state *rsp, + int preemptable); + +#include "rcutree_plugin.h" + /* * Note a quiescent state. Because we do not need to know * how many quiescent states passed, just if there was at least @@ -87,16 +102,27 @@ DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); */ void rcu_sched_qs(int cpu) { - struct rcu_data *rdp = &per_cpu(rcu_sched_data, cpu); + unsigned long flags; + struct rcu_data *rdp; + + local_irq_save(flags); + rdp = &per_cpu(rcu_sched_data, cpu); rdp->passed_quiesc = 1; rdp->passed_quiesc_completed = rdp->completed; + rcu_preempt_qs(cpu); + local_irq_restore(flags); } void rcu_bh_qs(int cpu) { - struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu); + unsigned long flags; + struct rcu_data *rdp; + + local_irq_save(flags); + rdp = &per_cpu(rcu_bh_data, cpu); rdp->passed_quiesc = 1; rdp->passed_quiesc_completed = rdp->completed; + local_irq_restore(flags); } #ifdef CONFIG_NO_HZ @@ -122,16 +148,6 @@ long rcu_batches_completed_sched(void) } EXPORT_SYMBOL_GPL(rcu_batches_completed_sched); -/* - * Return the number of RCU batches processed thus far for debug & stats. - * @@@ placeholder, maps to rcu_batches_completed_sched(). - */ -long rcu_batches_completed(void) -{ - return rcu_batches_completed_sched(); -} -EXPORT_SYMBOL_GPL(rcu_batches_completed); - /* * Return the number of RCU BH batches processed thus far for debug & stats. */ @@ -193,6 +209,10 @@ static int rcu_implicit_offline_qs(struct rcu_data *rdp) return 1; } + /* If preemptable RCU, no point in sending reschedule IPI. */ + if (rdp->preemptable) + return 0; + /* The CPU is online, so send it a reschedule IPI. */ if (rdp->cpu != smp_processor_id()) smp_send_reschedule(rdp->cpu); @@ -473,6 +493,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp) printk(KERN_ERR "INFO: RCU detected CPU stalls:"); for (; rnp_cur < rnp_end; rnp_cur++) { + rcu_print_task_stall(rnp); if (rnp_cur->qsmask == 0) continue; for (cpu = 0; cpu <= rnp_cur->grphi - rnp_cur->grplo; cpu++) @@ -685,6 +706,19 @@ rcu_process_gp_end(struct rcu_state *rsp, struct rcu_data *rdp) local_irq_restore(flags); } +/* + * Clean up after the prior grace period and let rcu_start_gp() start up + * the next grace period if one is needed. Note that the caller must + * hold rnp->lock, as required by rcu_start_gp(), which will release it. + */ +static void cpu_quiet_msk_finish(struct rcu_state *rsp, unsigned long flags) + __releases(rnp->lock) +{ + rsp->completed = rsp->gpnum; + rcu_process_gp_end(rsp, rsp->rda[smp_processor_id()]); + rcu_start_gp(rsp, flags); /* releases root node's rnp->lock. */ +} + /* * Similar to cpu_quiet(), for which it is a helper function. Allows * a group of CPUs to be quieted at one go, though all the CPUs in the @@ -706,7 +740,7 @@ cpu_quiet_msk(unsigned long mask, struct rcu_state *rsp, struct rcu_node *rnp, return; } rnp->qsmask &= ~mask; - if (rnp->qsmask != 0) { + if (rnp->qsmask != 0 || rcu_preempted_readers(rnp)) { /* Other bits still set at this level, so done. */ spin_unlock_irqrestore(&rnp->lock, flags); @@ -726,14 +760,10 @@ cpu_quiet_msk(unsigned long mask, struct rcu_state *rsp, struct rcu_node *rnp, /* * Get here if we are the last CPU to pass through a quiescent - * state for this grace period. Clean up and let rcu_start_gp() - * start up the next grace period if one is needed. Note that - * we still hold rnp->lock, as required by rcu_start_gp(), which - * will release it. + * state for this grace period. Invoke cpu_quiet_msk_finish() + * to clean up and start the next grace period if one is needed. */ - rsp->completed = rsp->gpnum; - rcu_process_gp_end(rsp, rsp->rda[smp_processor_id()]); - rcu_start_gp(rsp, flags); /* releases rnp->lock. */ + cpu_quiet_msk_finish(rsp, flags); /* releases rnp->lock. */ } /* @@ -840,11 +870,11 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) spin_lock(&rnp->lock); /* irqs already disabled. */ rnp->qsmaskinit &= ~mask; if (rnp->qsmaskinit != 0) { - spin_unlock(&rnp->lock); /* irqs already disabled. */ + spin_unlock(&rnp->lock); /* irqs remain disabled. */ break; } mask = rnp->grpmask; - spin_unlock(&rnp->lock); /* irqs already disabled. */ + spin_unlock(&rnp->lock); /* irqs remain disabled. */ rnp = rnp->parent; } while (rnp != NULL); lastcomp = rsp->completed; @@ -1007,6 +1037,7 @@ void rcu_check_callbacks(int cpu, int user) rcu_bh_qs(cpu); } + rcu_preempt_check_callbacks(cpu); raise_softirq(RCU_SOFTIRQ); } @@ -1188,6 +1219,7 @@ static void rcu_process_callbacks(struct softirq_action *unused) __rcu_process_callbacks(&rcu_sched_state, &__get_cpu_var(rcu_sched_data)); __rcu_process_callbacks(&rcu_bh_state, &__get_cpu_var(rcu_bh_data)); + rcu_preempt_process_callbacks(); /* * Memory references from any later RCU read-side critical sections @@ -1251,17 +1283,6 @@ void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) } EXPORT_SYMBOL_GPL(call_rcu_sched); -/* - * @@@ Queue an RCU callback for invocation after a grace period. - * @@@ Placeholder pending rcutree_plugin.h. - */ -void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) -{ - call_rcu_sched(head, func); -} -EXPORT_SYMBOL_GPL(call_rcu); - - /* * Queue an RCU for invocation after a quicker grace period. */ @@ -1335,7 +1356,8 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp) static int rcu_pending(int cpu) { return __rcu_pending(&rcu_sched_state, &per_cpu(rcu_sched_data, cpu)) || - __rcu_pending(&rcu_bh_state, &per_cpu(rcu_bh_data, cpu)); + __rcu_pending(&rcu_bh_state, &per_cpu(rcu_bh_data, cpu)) || + rcu_preempt_pending(cpu); } /* @@ -1348,7 +1370,8 @@ int rcu_needs_cpu(int cpu) { /* RCU callbacks either ready or pending? */ return per_cpu(rcu_sched_data, cpu).nxtlist || - per_cpu(rcu_bh_data, cpu).nxtlist; + per_cpu(rcu_bh_data, cpu).nxtlist || + rcu_preempt_needs_cpu(cpu); } /* @@ -1383,7 +1406,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) * that this CPU cannot possibly have any RCU callbacks in flight yet. */ static void __cpuinit -rcu_init_percpu_data(int cpu, struct rcu_state *rsp) +rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptable) { unsigned long flags; long lastcomp; @@ -1399,6 +1422,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp) rdp->passed_quiesc = 0; /* We could be racing with new GP, */ rdp->qs_pending = 1; /* so set up to respond to current GP. */ rdp->beenonline = 1; /* We have now been online. */ + rdp->preemptable = preemptable; rdp->passed_quiesc_completed = lastcomp - 1; rdp->blimit = blimit; spin_unlock(&rnp->lock); /* irqs remain disabled. */ @@ -1441,12 +1465,13 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp) static void __cpuinit rcu_online_cpu(int cpu) { - rcu_init_percpu_data(cpu, &rcu_sched_state); - rcu_init_percpu_data(cpu, &rcu_bh_state); + rcu_init_percpu_data(cpu, &rcu_sched_state, 0); + rcu_init_percpu_data(cpu, &rcu_bh_state, 0); + rcu_preempt_init_percpu_data(cpu); } /* - * Handle CPU online/offline notifcation events. + * Handle CPU online/offline notification events. */ int __cpuinit rcu_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) @@ -1521,6 +1546,7 @@ static void __init rcu_init_one(struct rcu_state *rsp) rnp = rsp->level[i]; for (j = 0; j < rsp->levelcnt[i]; j++, rnp++) { spin_lock_init(&rnp->lock); + rnp->gpnum = 0; rnp->qsmask = 0; rnp->qsmaskinit = 0; rnp->grplo = j * cpustride; @@ -1538,13 +1564,16 @@ static void __init rcu_init_one(struct rcu_state *rsp) j / rsp->levelspread[i - 1]; } rnp->level = i; + INIT_LIST_HEAD(&rnp->blocked_tasks[0]); + INIT_LIST_HEAD(&rnp->blocked_tasks[1]); } } } /* - * Helper macro for __rcu_init(). To be used nowhere else! - * Assigns leaf node pointers into each CPU's rcu_data structure. + * Helper macro for __rcu_init() and __rcu_init_preempt(). To be used + * nowhere else! Assigns leaf node pointers into each CPU's rcu_data + * structure. */ #define RCU_INIT_FLAVOR(rsp, rcu_data) \ do { \ @@ -1560,18 +1589,38 @@ do { \ } \ } while (0) +#ifdef CONFIG_TREE_PREEMPT_RCU + +void __init __rcu_init_preempt(void) +{ + int i; /* All used by RCU_INIT_FLAVOR(). */ + int j; + struct rcu_node *rnp; + + RCU_INIT_FLAVOR(&rcu_preempt_state, rcu_preempt_data); +} + +#else /* #ifdef CONFIG_TREE_PREEMPT_RCU */ + +void __init __rcu_init_preempt(void) +{ +} + +#endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */ + void __init __rcu_init(void) { - int i; /* All used by RCU_DATA_PTR_INIT(). */ + int i; /* All used by RCU_INIT_FLAVOR(). */ int j; struct rcu_node *rnp; - printk(KERN_INFO "Hierarchical RCU implementation.\n"); + rcu_bootup_announce(); #ifdef CONFIG_RCU_CPU_STALL_DETECTOR printk(KERN_INFO "RCU-based detection of stalled CPUs is enabled.\n"); #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ RCU_INIT_FLAVOR(&rcu_sched_state, rcu_sched_data); RCU_INIT_FLAVOR(&rcu_bh_state, rcu_bh_data); + __rcu_init_preempt(); open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); } diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 0024e5ddcc68..ca560364d8cd 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -80,6 +80,7 @@ struct rcu_dynticks { */ struct rcu_node { spinlock_t lock; + long gpnum; /* Current grace period for this node. */ unsigned long qsmask; /* CPUs or groups that need to switch in */ /* order for current grace period to proceed.*/ unsigned long qsmaskinit; @@ -90,6 +91,8 @@ struct rcu_node { u8 grpnum; /* CPU/group number for next level up. */ u8 level; /* root is at level 0. */ struct rcu_node *parent; + struct list_head blocked_tasks[2]; + /* Tasks blocked in RCU read-side critsect. */ } ____cacheline_internodealigned_in_smp; /* Index values for nxttail array in struct rcu_data. */ @@ -111,6 +114,7 @@ struct rcu_data { bool passed_quiesc; /* User-mode/idle loop etc. */ bool qs_pending; /* Core waits for quiesc state. */ bool beenonline; /* CPU online at least once. */ + bool preemptable; /* Preemptable RCU? */ struct rcu_node *mynode; /* This CPU's leaf of hierarchy */ unsigned long grpmask; /* Mask to apply to leaf qsmask. */ @@ -244,5 +248,10 @@ DECLARE_PER_CPU(struct rcu_data, rcu_sched_data); extern struct rcu_state rcu_bh_state; DECLARE_PER_CPU(struct rcu_data, rcu_bh_data); +#ifdef CONFIG_TREE_PREEMPT_RCU +extern struct rcu_state rcu_preempt_state; +DECLARE_PER_CPU(struct rcu_data, rcu_preempt_data); +#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ + #endif /* #ifdef RCU_TREE_NONCORE */ diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h new file mode 100644 index 000000000000..cd2ab67400c6 --- /dev/null +++ b/kernel/rcutree_plugin.h @@ -0,0 +1,447 @@ +/* + * Read-Copy Update mechanism for mutual exclusion (tree-based version) + * Internal non-public definitions that provide either classic + * or preemptable semantics. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright Red Hat, 2009 + * Copyright IBM Corporation, 2009 + * + * Author: Ingo Molnar + * Paul E. McKenney + */ + + +#ifdef CONFIG_TREE_PREEMPT_RCU + +struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt_state); +DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data); + +/* + * Tell them what RCU they are running. + */ +static inline void rcu_bootup_announce(void) +{ + printk(KERN_INFO + "Experimental preemptable hierarchical RCU implementation.\n"); +} + +/* + * Return the number of RCU-preempt batches processed thus far + * for debug and statistics. + */ +long rcu_batches_completed_preempt(void) +{ + return rcu_preempt_state.completed; +} +EXPORT_SYMBOL_GPL(rcu_batches_completed_preempt); + +/* + * Return the number of RCU batches processed thus far for debug & stats. + */ +long rcu_batches_completed(void) +{ + return rcu_batches_completed_preempt(); +} +EXPORT_SYMBOL_GPL(rcu_batches_completed); + +/* + * Record a preemptable-RCU quiescent state for the specified CPU. Note + * that this just means that the task currently running on the CPU is + * not in a quiescent state. There might be any number of tasks blocked + * while in an RCU read-side critical section. + */ +static void rcu_preempt_qs_record(int cpu) +{ + struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu); + rdp->passed_quiesc = 1; + rdp->passed_quiesc_completed = rdp->completed; +} + +/* + * We have entered the scheduler or are between softirqs in ksoftirqd. + * If we are in an RCU read-side critical section, we need to reflect + * that in the state of the rcu_node structure corresponding to this CPU. + * Caller must disable hardirqs. + */ +static void rcu_preempt_qs(int cpu) +{ + struct task_struct *t = current; + int phase; + struct rcu_data *rdp; + struct rcu_node *rnp; + + if (t->rcu_read_lock_nesting && + (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) { + + /* Possibly blocking in an RCU read-side critical section. */ + rdp = rcu_preempt_state.rda[cpu]; + rnp = rdp->mynode; + spin_lock(&rnp->lock); + t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED; + t->rcu_blocked_cpu = cpu; + + /* + * If this CPU has already checked in, then this task + * will hold up the next grace period rather than the + * current grace period. Queue the task accordingly. + * If the task is queued for the current grace period + * (i.e., this CPU has not yet passed through a quiescent + * state for the current grace period), then as long + * as that task remains queued, the current grace period + * cannot end. + */ + phase = !(rnp->qsmask & rdp->grpmask) ^ (rnp->gpnum & 0x1); + list_add(&t->rcu_node_entry, &rnp->blocked_tasks[phase]); + smp_mb(); /* Ensure later ctxt swtch seen after above. */ + spin_unlock(&rnp->lock); + } + + /* + * Either we were not in an RCU read-side critical section to + * begin with, or we have now recorded that critical section + * globally. Either way, we can now note a quiescent state + * for this CPU. Again, if we were in an RCU read-side critical + * section, and if that critical section was blocking the current + * grace period, then the fact that the task has been enqueued + * means that we continue to block the current grace period. + */ + rcu_preempt_qs_record(cpu); + t->rcu_read_unlock_special &= ~(RCU_READ_UNLOCK_NEED_QS | + RCU_READ_UNLOCK_GOT_QS); +} + +/* + * Tree-preemptable RCU implementation for rcu_read_lock(). + * Just increment ->rcu_read_lock_nesting, shared state will be updated + * if we block. + */ +void __rcu_read_lock(void) +{ + ACCESS_ONCE(current->rcu_read_lock_nesting)++; + barrier(); /* needed if we ever invoke rcu_read_lock in rcutree.c */ +} +EXPORT_SYMBOL_GPL(__rcu_read_lock); + +static void rcu_read_unlock_special(struct task_struct *t) +{ + int empty; + unsigned long flags; + unsigned long mask; + struct rcu_node *rnp; + int special; + + /* NMI handlers cannot block and cannot safely manipulate state. */ + if (in_nmi()) + return; + + local_irq_save(flags); + + /* + * If RCU core is waiting for this CPU to exit critical section, + * let it know that we have done so. + */ + special = t->rcu_read_unlock_special; + if (special & RCU_READ_UNLOCK_NEED_QS) { + t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; + t->rcu_read_unlock_special |= RCU_READ_UNLOCK_GOT_QS; + } + + /* Hardware IRQ handlers cannot block. */ + if (in_irq()) { + local_irq_restore(flags); + return; + } + + /* Clean up if blocked during RCU read-side critical section. */ + if (special & RCU_READ_UNLOCK_BLOCKED) { + t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BLOCKED; + + /* Remove this task from the list it blocked on. */ + rnp = rcu_preempt_state.rda[t->rcu_blocked_cpu]->mynode; + spin_lock(&rnp->lock); + empty = list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1]); + list_del_init(&t->rcu_node_entry); + t->rcu_blocked_cpu = -1; + + /* + * If this was the last task on the current list, and if + * we aren't waiting on any CPUs, report the quiescent state. + * Note that both cpu_quiet_msk_finish() and cpu_quiet_msk() + * drop rnp->lock and restore irq. + */ + if (!empty && rnp->qsmask == 0 && + list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1])) { + t->rcu_read_unlock_special &= + ~(RCU_READ_UNLOCK_NEED_QS | + RCU_READ_UNLOCK_GOT_QS); + if (rnp->parent == NULL) { + /* Only one rcu_node in the tree. */ + cpu_quiet_msk_finish(&rcu_preempt_state, flags); + return; + } + /* Report up the rest of the hierarchy. */ + mask = rnp->grpmask; + spin_unlock_irqrestore(&rnp->lock, flags); + rnp = rnp->parent; + spin_lock_irqsave(&rnp->lock, flags); + cpu_quiet_msk(mask, &rcu_preempt_state, rnp, flags); + return; + } + spin_unlock(&rnp->lock); + } + local_irq_restore(flags); +} + +/* + * Tree-preemptable RCU implementation for rcu_read_unlock(). + * Decrement ->rcu_read_lock_nesting. If the result is zero (outermost + * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then + * invoke rcu_read_unlock_special() to clean up after a context switch + * in an RCU read-side critical section and other special cases. + */ +void __rcu_read_unlock(void) +{ + struct task_struct *t = current; + + barrier(); /* needed if we ever invoke rcu_read_unlock in rcutree.c */ + if (--ACCESS_ONCE(t->rcu_read_lock_nesting) == 0 && + unlikely(ACCESS_ONCE(t->rcu_read_unlock_special))) + rcu_read_unlock_special(t); +} +EXPORT_SYMBOL_GPL(__rcu_read_unlock); + +#ifdef CONFIG_RCU_CPU_STALL_DETECTOR + +/* + * Scan the current list of tasks blocked within RCU read-side critical + * sections, printing out the tid of each. + */ +static void rcu_print_task_stall(struct rcu_node *rnp) +{ + unsigned long flags; + struct list_head *lp; + int phase = rnp->gpnum & 0x1; + struct task_struct *t; + + if (!list_empty(&rnp->blocked_tasks[phase])) { + spin_lock_irqsave(&rnp->lock, flags); + phase = rnp->gpnum & 0x1; /* re-read under lock. */ + lp = &rnp->blocked_tasks[phase]; + list_for_each_entry(t, lp, rcu_node_entry) + printk(" P%d", t->pid); + spin_unlock_irqrestore(&rnp->lock, flags); + } +} + +#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ + +/* + * Check for preempted RCU readers for the specified rcu_node structure. + * If the caller needs a reliable answer, it must hold the rcu_node's + * >lock. + */ +static int rcu_preempted_readers(struct rcu_node *rnp) +{ + return !list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1]); +} + +/* + * Check for a quiescent state from the current CPU. When a task blocks, + * the task is recorded in the corresponding CPU's rcu_node structure, + * which is checked elsewhere. + * + * Caller must disable hard irqs. + */ +static void rcu_preempt_check_callbacks(int cpu) +{ + struct task_struct *t = current; + + if (t->rcu_read_lock_nesting == 0) { + t->rcu_read_unlock_special &= + ~(RCU_READ_UNLOCK_NEED_QS | RCU_READ_UNLOCK_GOT_QS); + rcu_preempt_qs_record(cpu); + return; + } + if (per_cpu(rcu_preempt_data, cpu).qs_pending) { + if (t->rcu_read_unlock_special & RCU_READ_UNLOCK_GOT_QS) { + rcu_preempt_qs_record(cpu); + t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_GOT_QS; + } else if (!(t->rcu_read_unlock_special & + RCU_READ_UNLOCK_NEED_QS)) { + t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS; + } + } +} + +/* + * Process callbacks for preemptable RCU. + */ +static void rcu_preempt_process_callbacks(void) +{ + __rcu_process_callbacks(&rcu_preempt_state, + &__get_cpu_var(rcu_preempt_data)); +} + +/* + * Queue a preemptable-RCU callback for invocation after a grace period. + */ +void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) +{ + __call_rcu(head, func, &rcu_preempt_state); +} +EXPORT_SYMBOL_GPL(call_rcu); + +/* + * Check to see if there is any immediate preemptable-RCU-related work + * to be done. + */ +static int rcu_preempt_pending(int cpu) +{ + return __rcu_pending(&rcu_preempt_state, + &per_cpu(rcu_preempt_data, cpu)); +} + +/* + * Does preemptable RCU need the CPU to stay out of dynticks mode? + */ +static int rcu_preempt_needs_cpu(int cpu) +{ + return !!per_cpu(rcu_preempt_data, cpu).nxtlist; +} + +/* + * Initialize preemptable RCU's per-CPU data. + */ +static void __cpuinit rcu_preempt_init_percpu_data(int cpu) +{ + rcu_init_percpu_data(cpu, &rcu_preempt_state, 1); +} + +/* + * Check for a task exiting while in a preemptable-RCU read-side + * critical section, clean up if so. No need to issue warnings, + * as debug_check_no_locks_held() already does this if lockdep + * is enabled. + */ +void exit_rcu(void) +{ + struct task_struct *t = current; + + if (t->rcu_read_lock_nesting == 0) + return; + t->rcu_read_lock_nesting = 1; + rcu_read_unlock(); +} + +#else /* #ifdef CONFIG_TREE_PREEMPT_RCU */ + +/* + * Tell them what RCU they are running. + */ +static inline void rcu_bootup_announce(void) +{ + printk(KERN_INFO "Hierarchical RCU implementation.\n"); +} + +/* + * Return the number of RCU batches processed thus far for debug & stats. + */ +long rcu_batches_completed(void) +{ + return rcu_batches_completed_sched(); +} +EXPORT_SYMBOL_GPL(rcu_batches_completed); + +/* + * Because preemptable RCU does not exist, we never have to check for + * CPUs being in quiescent states. + */ +static void rcu_preempt_qs(int cpu) +{ +} + +#ifdef CONFIG_RCU_CPU_STALL_DETECTOR + +/* + * Because preemptable RCU does not exist, we never have to check for + * tasks blocked within RCU read-side critical sections. + */ +static void rcu_print_task_stall(struct rcu_node *rnp) +{ +} + +#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ + +/* + * Because preemptable RCU does not exist, there are never any preempted + * RCU readers. + */ +static int rcu_preempted_readers(struct rcu_node *rnp) +{ + return 0; +} + +/* + * Because preemptable RCU does not exist, it never has any callbacks + * to check. + */ +void rcu_preempt_check_callbacks(int cpu) +{ +} + +/* + * Because preemptable RCU does not exist, it never has any callbacks + * to process. + */ +void rcu_preempt_process_callbacks(void) +{ +} + +/* + * In classic RCU, call_rcu() is just call_rcu_sched(). + */ +void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) +{ + call_rcu_sched(head, func); +} +EXPORT_SYMBOL_GPL(call_rcu); + +/* + * Because preemptable RCU does not exist, it never has any work to do. + */ +static int rcu_preempt_pending(int cpu) +{ + return 0; +} + +/* + * Because preemptable RCU does not exist, it never needs any CPU. + */ +static int rcu_preempt_needs_cpu(int cpu) +{ + return 0; +} + +/* + * Because preemptable RCU does not exist, there is no per-CPU + * data to initialize. + */ +static void __cpuinit rcu_preempt_init_percpu_data(int cpu) +{ +} + +#endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */ diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c index 31af3a0fb6d5..0ea1bff69727 100644 --- a/kernel/rcutree_trace.c +++ b/kernel/rcutree_trace.c @@ -77,6 +77,10 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp) static int show_rcudata(struct seq_file *m, void *unused) { +#ifdef CONFIG_TREE_PREEMPT_RCU + seq_puts(m, "rcu_preempt:\n"); + PRINT_RCU_DATA(rcu_preempt_data, print_one_rcu_data, m); +#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ seq_puts(m, "rcu_sched:\n"); PRINT_RCU_DATA(rcu_sched_data, print_one_rcu_data, m); seq_puts(m, "rcu_bh:\n"); @@ -125,6 +129,10 @@ static int show_rcudata_csv(struct seq_file *m, void *unused) seq_puts(m, "\"dt\",\"dt nesting\",\"dn\",\"df\","); #endif /* #ifdef CONFIG_NO_HZ */ seq_puts(m, "\"of\",\"ri\",\"ql\",\"b\"\n"); +#ifdef CONFIG_TREE_PREEMPT_RCU + seq_puts(m, "\"rcu_preempt:\"\n"); + PRINT_RCU_DATA(rcu_preempt_data, print_one_rcu_data_csv, m); +#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ seq_puts(m, "\"rcu_sched:\"\n"); PRINT_RCU_DATA(rcu_sched_data, print_one_rcu_data_csv, m); seq_puts(m, "\"rcu_bh:\"\n"); @@ -172,6 +180,10 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp) static int show_rcuhier(struct seq_file *m, void *unused) { +#ifdef CONFIG_TREE_PREEMPT_RCU + seq_puts(m, "rcu_preempt:\n"); + print_one_rcu_state(m, &rcu_preempt_state); +#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ seq_puts(m, "rcu_sched:\n"); print_one_rcu_state(m, &rcu_sched_state); seq_puts(m, "rcu_bh:\n"); @@ -194,6 +206,10 @@ static struct file_operations rcuhier_fops = { static int show_rcugp(struct seq_file *m, void *unused) { +#ifdef CONFIG_TREE_PREEMPT_RCU + seq_printf(m, "rcu_preempt: completed=%ld gpnum=%ld\n", + rcu_preempt_state.completed, rcu_preempt_state.gpnum); +#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ seq_printf(m, "rcu_sched: completed=%ld gpnum=%ld\n", rcu_sched_state.completed, rcu_sched_state.gpnum); seq_printf(m, "rcu_bh: completed=%ld gpnum=%ld\n", @@ -244,6 +260,10 @@ static void print_rcu_pendings(struct seq_file *m, struct rcu_state *rsp) static int show_rcu_pending(struct seq_file *m, void *unused) { +#ifdef CONFIG_TREE_PREEMPT_RCU + seq_puts(m, "rcu_preempt:\n"); + print_rcu_pendings(m, &rcu_preempt_state); +#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ seq_puts(m, "rcu_sched:\n"); print_rcu_pendings(m, &rcu_sched_state); seq_puts(m, "rcu_bh:\n"); diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 12327b2bb785..f87fb0c8f924 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -725,7 +725,7 @@ config RCU_TORTURE_TEST_RUNNABLE config RCU_CPU_STALL_DETECTOR bool "Check for stalled CPUs delaying RCU grace periods" - depends on CLASSIC_RCU || TREE_RCU + depends on CLASSIC_RCU || TREE_RCU || TREE_PREEMPT_RCU default n help This option causes RCU to printk information on which -- cgit v1.2.3 From 6b3ef48adf847f7adf11c870e3ffacac150f1564 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 22 Aug 2009 13:56:53 -0700 Subject: rcu: Remove CONFIG_PREEMPT_RCU Now that CONFIG_TREE_PREEMPT_RCU is in place, there is no further need for CONFIG_PREEMPT_RCU. Remove it, along with whatever subtle bugs it may (or may not) contain. Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: akpm@linux-foundation.org Cc: mathieu.desnoyers@polymtl.ca Cc: josht@linux.vnet.ibm.com Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org LKML-Reference: <125097461396-git-send-email-> Signed-off-by: Ingo Molnar --- Documentation/RCU/rcu.txt | 10 +- Documentation/RCU/whatisRCU.txt | 8 +- include/linux/init_task.h | 6 +- include/linux/rcupdate.h | 4 +- include/linux/rcupreempt.h | 140 ---- include/linux/rcupreempt_trace.h | 97 --- include/linux/sched.h | 13 - init/Kconfig | 20 +- kernel/Makefile | 2 - kernel/rcupreempt.c | 1518 -------------------------------------- kernel/rcupreempt_trace.c | 335 --------- lib/Kconfig.debug | 2 +- 12 files changed, 13 insertions(+), 2142 deletions(-) delete mode 100644 include/linux/rcupreempt.h delete mode 100644 include/linux/rcupreempt_trace.h delete mode 100644 kernel/rcupreempt.c delete mode 100644 kernel/rcupreempt_trace.c (limited to 'include/linux') diff --git a/Documentation/RCU/rcu.txt b/Documentation/RCU/rcu.txt index 7aa2002ade77..2a23523ce471 100644 --- a/Documentation/RCU/rcu.txt +++ b/Documentation/RCU/rcu.txt @@ -36,7 +36,7 @@ o How can the updater tell when a grace period has completed executed in user mode, or executed in the idle loop, we can safely free up that item. - Preemptible variants of RCU (CONFIG_PREEMPT_RCU) get the + Preemptible variants of RCU (CONFIG_TREE_PREEMPT_RCU) get the same effect, but require that the readers manipulate CPU-local counters. These counters allow limited types of blocking within RCU read-side critical sections. SRCU also uses @@ -79,10 +79,10 @@ o I hear that RCU is patented? What is with that? o I hear that RCU needs work in order to support realtime kernels? This work is largely completed. Realtime-friendly RCU can be - enabled via the CONFIG_PREEMPT_RCU kernel configuration parameter. - However, work is in progress for enabling priority boosting of - preempted RCU read-side critical sections. This is needed if you - have CPU-bound realtime threads. + enabled via the CONFIG_TREE_PREEMPT_RCU kernel configuration + parameter. However, work is in progress for enabling priority + boosting of preempted RCU read-side critical sections. This is + needed if you have CPU-bound realtime threads. o Where can I find more information on RCU? diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt index 97ded2432c59..e41a7fecf0d3 100644 --- a/Documentation/RCU/whatisRCU.txt +++ b/Documentation/RCU/whatisRCU.txt @@ -136,10 +136,10 @@ rcu_read_lock() Used by a reader to inform the reclaimer that the reader is entering an RCU read-side critical section. It is illegal to block while in an RCU read-side critical section, though - kernels built with CONFIG_PREEMPT_RCU can preempt RCU read-side - critical sections. Any RCU-protected data structure accessed - during an RCU read-side critical section is guaranteed to remain - unreclaimed for the full duration of that critical section. + kernels built with CONFIG_TREE_PREEMPT_RCU can preempt RCU + read-side critical sections. Any RCU-protected data structure + accessed during an RCU read-side critical section is guaranteed to + remain unreclaimed for the full duration of that critical section. Reference counts may be used in conjunction with RCU to maintain longer-term references to data structures. diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 971a968831bf..79d4baee31b6 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -94,11 +94,7 @@ extern struct group_info init_groups; # define CAP_INIT_BSET CAP_INIT_EFF_SET #endif -#ifdef CONFIG_PREEMPT_RCU -#define INIT_TASK_RCU_PREEMPT(tsk) \ - .rcu_read_lock_nesting = 0, \ - .rcu_flipctr_idx = 0, -#elif defined(CONFIG_TREE_PREEMPT_RCU) +#ifdef CONFIG_TREE_PREEMPT_RCU #define INIT_TASK_RCU_PREEMPT(tsk) \ .rcu_read_lock_nesting = 0, \ .rcu_read_unlock_special = 0, \ diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 26892f5e7bd8..ec90fc34fea9 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -68,11 +68,9 @@ extern int rcu_scheduler_active; #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) #include -#elif defined(CONFIG_PREEMPT_RCU) -#include #else #error "Unknown RCU implementation specified to kernel configuration" -#endif /* #else #if defined(CONFIG_CLASSIC_RCU) */ +#endif #define RCU_HEAD_INIT { .next = NULL, .func = NULL } #define RCU_HEAD(head) struct rcu_head head = RCU_HEAD_INIT diff --git a/include/linux/rcupreempt.h b/include/linux/rcupreempt.h deleted file mode 100644 index a42ab88e9210..000000000000 --- a/include/linux/rcupreempt.h +++ /dev/null @@ -1,140 +0,0 @@ -/* - * Read-Copy Update mechanism for mutual exclusion (RT implementation) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * - * Copyright (C) IBM Corporation, 2006 - * - * Author: Paul McKenney - * - * Based on the original work by Paul McKenney - * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen. - * Papers: - * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf - * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001) - * - * For detailed explanation of Read-Copy Update mechanism see - - * Documentation/RCU - * - */ - -#ifndef __LINUX_RCUPREEMPT_H -#define __LINUX_RCUPREEMPT_H - -#include -#include -#include -#include -#include -#include - -extern void rcu_sched_qs(int cpu); -static inline void rcu_bh_qs(int cpu) { } - -/* - * Someone might want to pass call_rcu_bh as a function pointer. - * So this needs to just be a rename and not a macro function. - * (no parentheses) - */ -#define call_rcu_bh call_rcu - -/** - * call_rcu_sched - Queue RCU callback for invocation after sched grace period. - * @head: structure to be used for queueing the RCU updates. - * @func: actual update function to be invoked after the grace period - * - * The update function will be invoked some time after a full - * synchronize_sched()-style grace period elapses, in other words after - * all currently executing preempt-disabled sections of code (including - * hardirq handlers, NMI handlers, and local_irq_save() blocks) have - * completed. - */ -extern void call_rcu_sched(struct rcu_head *head, - void (*func)(struct rcu_head *head)); - -extern void __rcu_read_lock(void); -extern void __rcu_read_unlock(void); -extern int rcu_needs_cpu(int cpu); - -#define __rcu_read_lock_bh() { rcu_read_lock(); local_bh_disable(); } -#define __rcu_read_unlock_bh() { local_bh_enable(); rcu_read_unlock(); } - -extern void __synchronize_sched(void); - -static inline void synchronize_rcu_expedited(void) -{ - synchronize_rcu(); /* Placeholder for new rcupreempt implementation. */ -} - -static inline void synchronize_rcu_bh_expedited(void) -{ - synchronize_rcu_bh(); /* Placeholder for new rcupreempt impl. */ -} - -extern void __rcu_init(void); -extern void rcu_init_sched(void); -extern void rcu_check_callbacks(int cpu, int user); -extern void rcu_restart_cpu(int cpu); -extern long rcu_batches_completed(void); - -/* - * Return the number of RCU batches processed thus far. Useful for debug - * and statistic. The _bh variant is identifcal to straight RCU - */ -static inline long rcu_batches_completed_bh(void) -{ - return rcu_batches_completed(); -} - -static inline void exit_rcu(void) -{ -} - -#ifdef CONFIG_RCU_TRACE -struct rcupreempt_trace; -extern long *rcupreempt_flipctr(int cpu); -extern long rcupreempt_data_completed(void); -extern int rcupreempt_flip_flag(int cpu); -extern int rcupreempt_mb_flag(int cpu); -extern char *rcupreempt_try_flip_state_name(void); -extern struct rcupreempt_trace *rcupreempt_trace_cpu(int cpu); -#endif - -struct softirq_action; - -#ifdef CONFIG_NO_HZ -extern void rcu_enter_nohz(void); -extern void rcu_exit_nohz(void); -#else -# define rcu_enter_nohz() do { } while (0) -# define rcu_exit_nohz() do { } while (0) -#endif - -/* - * A context switch is a grace period for rcupreempt synchronize_rcu() - * only during early boot, before the scheduler has been initialized. - * So, how the heck do we get a context switch? Well, if the caller - * invokes synchronize_rcu(), they are willing to accept a context - * switch, so we simply pretend that one happened. - * - * After boot, there might be a blocked or preempted task in an RCU - * read-side critical section, so we cannot then take the fastpath. - */ -static inline int rcu_blocking_is_gp(void) -{ - return num_online_cpus() == 1 && !rcu_scheduler_active; -} - -#endif /* __LINUX_RCUPREEMPT_H */ diff --git a/include/linux/rcupreempt_trace.h b/include/linux/rcupreempt_trace.h deleted file mode 100644 index b99ae073192a..000000000000 --- a/include/linux/rcupreempt_trace.h +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Read-Copy Update mechanism for mutual exclusion (RT implementation) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * - * Copyright (C) IBM Corporation, 2006 - * - * Author: Paul McKenney - * - * Based on the original work by Paul McKenney - * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen. - * Papers: - * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf - * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001) - * - * For detailed explanation of the Preemptible Read-Copy Update mechanism see - - * http://lwn.net/Articles/253651/ - */ - -#ifndef __LINUX_RCUPREEMPT_TRACE_H -#define __LINUX_RCUPREEMPT_TRACE_H - -#include -#include - -#include - -/* - * PREEMPT_RCU data structures. - */ - -struct rcupreempt_trace { - long next_length; - long next_add; - long wait_length; - long wait_add; - long done_length; - long done_add; - long done_remove; - atomic_t done_invoked; - long rcu_check_callbacks; - atomic_t rcu_try_flip_1; - atomic_t rcu_try_flip_e1; - long rcu_try_flip_i1; - long rcu_try_flip_ie1; - long rcu_try_flip_g1; - long rcu_try_flip_a1; - long rcu_try_flip_ae1; - long rcu_try_flip_a2; - long rcu_try_flip_z1; - long rcu_try_flip_ze1; - long rcu_try_flip_z2; - long rcu_try_flip_m1; - long rcu_try_flip_me1; - long rcu_try_flip_m2; -}; - -#ifdef CONFIG_RCU_TRACE -#define RCU_TRACE(fn, arg) fn(arg); -#else -#define RCU_TRACE(fn, arg) -#endif - -extern void rcupreempt_trace_move2done(struct rcupreempt_trace *trace); -extern void rcupreempt_trace_move2wait(struct rcupreempt_trace *trace); -extern void rcupreempt_trace_try_flip_1(struct rcupreempt_trace *trace); -extern void rcupreempt_trace_try_flip_e1(struct rcupreempt_trace *trace); -extern void rcupreempt_trace_try_flip_i1(struct rcupreempt_trace *trace); -extern void rcupreempt_trace_try_flip_ie1(struct rcupreempt_trace *trace); -extern void rcupreempt_trace_try_flip_g1(struct rcupreempt_trace *trace); -extern void rcupreempt_trace_try_flip_a1(struct rcupreempt_trace *trace); -extern void rcupreempt_trace_try_flip_ae1(struct rcupreempt_trace *trace); -extern void rcupreempt_trace_try_flip_a2(struct rcupreempt_trace *trace); -extern void rcupreempt_trace_try_flip_z1(struct rcupreempt_trace *trace); -extern void rcupreempt_trace_try_flip_ze1(struct rcupreempt_trace *trace); -extern void rcupreempt_trace_try_flip_z2(struct rcupreempt_trace *trace); -extern void rcupreempt_trace_try_flip_m1(struct rcupreempt_trace *trace); -extern void rcupreempt_trace_try_flip_me1(struct rcupreempt_trace *trace); -extern void rcupreempt_trace_try_flip_m2(struct rcupreempt_trace *trace); -extern void rcupreempt_trace_check_callbacks(struct rcupreempt_trace *trace); -extern void rcupreempt_trace_done_remove(struct rcupreempt_trace *trace); -extern void rcupreempt_trace_invoke(struct rcupreempt_trace *trace); -extern void rcupreempt_trace_next_add(struct rcupreempt_trace *trace); - -#endif /* __LINUX_RCUPREEMPT_TRACE_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index d7f98f637a2a..bfca26d63b13 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1205,11 +1205,6 @@ struct task_struct { unsigned int policy; cpumask_t cpus_allowed; -#ifdef CONFIG_PREEMPT_RCU - int rcu_read_lock_nesting; - int rcu_flipctr_idx; -#endif /* #ifdef CONFIG_PREEMPT_RCU */ - #ifdef CONFIG_TREE_PREEMPT_RCU int rcu_read_lock_nesting; char rcu_read_unlock_special; @@ -1744,14 +1739,6 @@ static inline void rcu_copy_process(struct task_struct *p) INIT_LIST_HEAD(&p->rcu_node_entry); } -#elif defined(CONFIG_PREEMPT_RCU) - -static inline void rcu_copy_process(struct task_struct *p) -{ - p->rcu_read_lock_nesting = 0; - p->rcu_flipctr_idx = 0; -} - #else static inline void rcu_copy_process(struct task_struct *p) diff --git a/init/Kconfig b/init/Kconfig index f88da2d1c1fb..8e8b76d8a272 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -324,17 +324,6 @@ config TREE_RCU thousands of CPUs. It also scales down nicely to smaller systems. -config PREEMPT_RCU - bool "Preemptible RCU" - depends on PREEMPT - help - This option reduces the latency of the kernel by making certain - RCU sections preemptible. Normally RCU code is non-preemptible, if - this option is selected then read-only RCU sections become - preemptible. This helps latency, but may expose bugs due to - now-naive assumptions about each RCU read-side critical section - remaining on a given CPU through its execution. - config TREE_PREEMPT_RCU bool "Preemptable tree-based hierarchical RCU" depends on PREEMPT @@ -348,7 +337,7 @@ endchoice config RCU_TRACE bool "Enable tracing for RCU" - depends on TREE_RCU || PREEMPT_RCU || TREE_PREEMPT_RCU + depends on TREE_RCU || TREE_PREEMPT_RCU help This option provides tracing in RCU which presents stats in debugfs for debugging RCU implementation. @@ -395,13 +384,6 @@ config TREE_RCU_TRACE TREE_PREEMPT_RCU implementations, permitting Makefile to trivially select kernel/rcutree_trace.c. -config PREEMPT_RCU_TRACE - def_bool RCU_TRACE && PREEMPT_RCU - select DEBUG_FS - help - This option provides tracing for the PREEMPT_RCU implementation, - permitting Makefile to trivially select kernel/rcupreempt_trace.c. - endmenu # "RCU Subsystem" config IKCONFIG diff --git a/kernel/Makefile b/kernel/Makefile index 1a38b4789dda..b833bd5cc127 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -82,9 +82,7 @@ obj-$(CONFIG_SECCOMP) += seccomp.o obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o obj-$(CONFIG_TREE_RCU) += rcutree.o obj-$(CONFIG_TREE_PREEMPT_RCU) += rcutree.o -obj-$(CONFIG_PREEMPT_RCU) += rcupreempt.o obj-$(CONFIG_TREE_RCU_TRACE) += rcutree_trace.o -obj-$(CONFIG_PREEMPT_RCU_TRACE) += rcupreempt_trace.o obj-$(CONFIG_RELAY) += relay.o obj-$(CONFIG_SYSCTL) += utsname_sysctl.o obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c deleted file mode 100644 index 0053ce56e326..000000000000 --- a/kernel/rcupreempt.c +++ /dev/null @@ -1,1518 +0,0 @@ -/* - * Read-Copy Update mechanism for mutual exclusion, realtime implementation - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * - * Copyright IBM Corporation, 2006 - * - * Authors: Paul E. McKenney - * With thanks to Esben Nielsen, Bill Huey, and Ingo Molnar - * for pushing me away from locks and towards counters, and - * to Suparna Bhattacharya for pushing me completely away - * from atomic instructions on the read side. - * - * - Added handling of Dynamic Ticks - * Copyright 2007 - Paul E. Mckenney - * - Steven Rostedt - * - * Papers: http://www.rdrop.com/users/paulmck/RCU - * - * Design Document: http://lwn.net/Articles/253651/ - * - * For detailed explanation of Read-Copy Update mechanism see - - * Documentation/RCU/ *.txt - * - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* - * PREEMPT_RCU data structures. - */ - -/* - * GP_STAGES specifies the number of times the state machine has - * to go through the all the rcu_try_flip_states (see below) - * in a single Grace Period. - * - * GP in GP_STAGES stands for Grace Period ;) - */ -#define GP_STAGES 2 -struct rcu_data { - spinlock_t lock; /* Protect rcu_data fields. */ - long completed; /* Number of last completed batch. */ - int waitlistcount; - struct rcu_head *nextlist; - struct rcu_head **nexttail; - struct rcu_head *waitlist[GP_STAGES]; - struct rcu_head **waittail[GP_STAGES]; - struct rcu_head *donelist; /* from waitlist & waitschedlist */ - struct rcu_head **donetail; - long rcu_flipctr[2]; - struct rcu_head *nextschedlist; - struct rcu_head **nextschedtail; - struct rcu_head *waitschedlist; - struct rcu_head **waitschedtail; - int rcu_sched_sleeping; -#ifdef CONFIG_RCU_TRACE - struct rcupreempt_trace trace; -#endif /* #ifdef CONFIG_RCU_TRACE */ -}; - -/* - * States for rcu_try_flip() and friends. - */ - -enum rcu_try_flip_states { - - /* - * Stay here if nothing is happening. Flip the counter if somthing - * starts happening. Denoted by "I" - */ - rcu_try_flip_idle_state, - - /* - * Wait here for all CPUs to notice that the counter has flipped. This - * prevents the old set of counters from ever being incremented once - * we leave this state, which in turn is necessary because we cannot - * test any individual counter for zero -- we can only check the sum. - * Denoted by "A". - */ - rcu_try_flip_waitack_state, - - /* - * Wait here for the sum of the old per-CPU counters to reach zero. - * Denoted by "Z". - */ - rcu_try_flip_waitzero_state, - - /* - * Wait here for each of the other CPUs to execute a memory barrier. - * This is necessary to ensure that these other CPUs really have - * completed executing their RCU read-side critical sections, despite - * their CPUs wildly reordering memory. Denoted by "M". - */ - rcu_try_flip_waitmb_state, -}; - -/* - * States for rcu_ctrlblk.rcu_sched_sleep. - */ - -enum rcu_sched_sleep_states { - rcu_sched_not_sleeping, /* Not sleeping, callbacks need GP. */ - rcu_sched_sleep_prep, /* Thinking of sleeping, rechecking. */ - rcu_sched_sleeping, /* Sleeping, awaken if GP needed. */ -}; - -struct rcu_ctrlblk { - spinlock_t fliplock; /* Protect state-machine transitions. */ - long completed; /* Number of last completed batch. */ - enum rcu_try_flip_states rcu_try_flip_state; /* The current state of - the rcu state machine */ - spinlock_t schedlock; /* Protect rcu_sched sleep state. */ - enum rcu_sched_sleep_states sched_sleep; /* rcu_sched state. */ - wait_queue_head_t sched_wq; /* Place for rcu_sched to sleep. */ -}; - -struct rcu_dyntick_sched { - int dynticks; - int dynticks_snap; - int sched_qs; - int sched_qs_snap; - int sched_dynticks_snap; -}; - -static DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_dyntick_sched, rcu_dyntick_sched) = { - .dynticks = 1, -}; - -static int rcu_pending(int cpu); - -void rcu_sched_qs(int cpu) -{ - struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu); - - rdssp->sched_qs++; -} - -#ifdef CONFIG_NO_HZ - -void rcu_enter_nohz(void) -{ - static DEFINE_RATELIMIT_STATE(rs, 10 * HZ, 1); - - smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */ - __get_cpu_var(rcu_dyntick_sched).dynticks++; - WARN_ON_RATELIMIT(__get_cpu_var(rcu_dyntick_sched).dynticks & 0x1, &rs); -} - -void rcu_exit_nohz(void) -{ - static DEFINE_RATELIMIT_STATE(rs, 10 * HZ, 1); - - __get_cpu_var(rcu_dyntick_sched).dynticks++; - smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */ - WARN_ON_RATELIMIT(!(__get_cpu_var(rcu_dyntick_sched).dynticks & 0x1), - &rs); -} - -#endif /* CONFIG_NO_HZ */ - - -static DEFINE_PER_CPU(struct rcu_data, rcu_data); - -static struct rcu_ctrlblk rcu_ctrlblk = { - .fliplock = __SPIN_LOCK_UNLOCKED(rcu_ctrlblk.fliplock), - .completed = 0, - .rcu_try_flip_state = rcu_try_flip_idle_state, - .schedlock = __SPIN_LOCK_UNLOCKED(rcu_ctrlblk.schedlock), - .sched_sleep = rcu_sched_not_sleeping, - .sched_wq = __WAIT_QUEUE_HEAD_INITIALIZER(rcu_ctrlblk.sched_wq), -}; - -static struct task_struct *rcu_sched_grace_period_task; - -#ifdef CONFIG_RCU_TRACE -static char *rcu_try_flip_state_names[] = - { "idle", "waitack", "waitzero", "waitmb" }; -#endif /* #ifdef CONFIG_RCU_TRACE */ - -static DECLARE_BITMAP(rcu_cpu_online_map, NR_CPUS) __read_mostly - = CPU_BITS_NONE; - -/* - * Enum and per-CPU flag to determine when each CPU has seen - * the most recent counter flip. - */ - -enum rcu_flip_flag_values { - rcu_flip_seen, /* Steady/initial state, last flip seen. */ - /* Only GP detector can update. */ - rcu_flipped /* Flip just completed, need confirmation. */ - /* Only corresponding CPU can update. */ -}; -static DEFINE_PER_CPU_SHARED_ALIGNED(enum rcu_flip_flag_values, rcu_flip_flag) - = rcu_flip_seen; - -/* - * Enum and per-CPU flag to determine when each CPU has executed the - * needed memory barrier to fence in memory references from its last RCU - * read-side critical section in the just-completed grace period. - */ - -enum rcu_mb_flag_values { - rcu_mb_done, /* Steady/initial state, no mb()s required. */ - /* Only GP detector can update. */ - rcu_mb_needed /* Flip just completed, need an mb(). */ - /* Only corresponding CPU can update. */ -}; -static DEFINE_PER_CPU_SHARED_ALIGNED(enum rcu_mb_flag_values, rcu_mb_flag) - = rcu_mb_done; - -/* - * RCU_DATA_ME: find the current CPU's rcu_data structure. - * RCU_DATA_CPU: find the specified CPU's rcu_data structure. - */ -#define RCU_DATA_ME() (&__get_cpu_var(rcu_data)) -#define RCU_DATA_CPU(cpu) (&per_cpu(rcu_data, cpu)) - -/* - * Helper macro for tracing when the appropriate rcu_data is not - * cached in a local variable, but where the CPU number is so cached. - */ -#define RCU_TRACE_CPU(f, cpu) RCU_TRACE(f, &(RCU_DATA_CPU(cpu)->trace)); - -/* - * Helper macro for tracing when the appropriate rcu_data is not - * cached in a local variable. - */ -#define RCU_TRACE_ME(f) RCU_TRACE(f, &(RCU_DATA_ME()->trace)); - -/* - * Helper macro for tracing when the appropriate rcu_data is pointed - * to by a local variable. - */ -#define RCU_TRACE_RDP(f, rdp) RCU_TRACE(f, &((rdp)->trace)); - -#define RCU_SCHED_BATCH_TIME (HZ / 50) - -/* - * Return the number of RCU batches processed thus far. Useful - * for debug and statistics. - */ -long rcu_batches_completed(void) -{ - return rcu_ctrlblk.completed; -} -EXPORT_SYMBOL_GPL(rcu_batches_completed); - -void __rcu_read_lock(void) -{ - int idx; - struct task_struct *t = current; - int nesting; - - nesting = ACCESS_ONCE(t->rcu_read_lock_nesting); - if (nesting != 0) { - - /* An earlier rcu_read_lock() covers us, just count it. */ - - t->rcu_read_lock_nesting = nesting + 1; - - } else { - unsigned long flags; - - /* - * We disable interrupts for the following reasons: - * - If we get scheduling clock interrupt here, and we - * end up acking the counter flip, it's like a promise - * that we will never increment the old counter again. - * Thus we will break that promise if that - * scheduling clock interrupt happens between the time - * we pick the .completed field and the time that we - * increment our counter. - * - * - We don't want to be preempted out here. - * - * NMIs can still occur, of course, and might themselves - * contain rcu_read_lock(). - */ - - local_irq_save(flags); - - /* - * Outermost nesting of rcu_read_lock(), so increment - * the current counter for the current CPU. Use volatile - * casts to prevent the compiler from reordering. - */ - - idx = ACCESS_ONCE(rcu_ctrlblk.completed) & 0x1; - ACCESS_ONCE(RCU_DATA_ME()->rcu_flipctr[idx])++; - - /* - * Now that the per-CPU counter has been incremented, we - * are protected from races with rcu_read_lock() invoked - * from NMI handlers on this CPU. We can therefore safely - * increment the nesting counter, relieving further NMIs - * of the need to increment the per-CPU counter. - */ - - ACCESS_ONCE(t->rcu_read_lock_nesting) = nesting + 1; - - /* - * Now that we have preventing any NMIs from storing - * to the ->rcu_flipctr_idx, we can safely use it to - * remember which counter to decrement in the matching - * rcu_read_unlock(). - */ - - ACCESS_ONCE(t->rcu_flipctr_idx) = idx; - local_irq_restore(flags); - } -} -EXPORT_SYMBOL_GPL(__rcu_read_lock); - -void __rcu_read_unlock(void) -{ - int idx; - struct task_struct *t = current; - int nesting; - - nesting = ACCESS_ONCE(t->rcu_read_lock_nesting); - if (nesting > 1) { - - /* - * We are still protected by the enclosing rcu_read_lock(), - * so simply decrement the counter. - */ - - t->rcu_read_lock_nesting = nesting - 1; - - } else { - unsigned long flags; - - /* - * Disable local interrupts to prevent the grace-period - * detection state machine from seeing us half-done. - * NMIs can still occur, of course, and might themselves - * contain rcu_read_lock() and rcu_read_unlock(). - */ - - local_irq_save(flags); - - /* - * Outermost nesting of rcu_read_unlock(), so we must - * decrement the current counter for the current CPU. - * This must be done carefully, because NMIs can - * occur at any point in this code, and any rcu_read_lock() - * and rcu_read_unlock() pairs in the NMI handlers - * must interact non-destructively with this code. - * Lots of volatile casts, and -very- careful ordering. - * - * Changes to this code, including this one, must be - * inspected, validated, and tested extremely carefully!!! - */ - - /* - * First, pick up the index. - */ - - idx = ACCESS_ONCE(t->rcu_flipctr_idx); - - /* - * Now that we have fetched the counter index, it is - * safe to decrement the per-task RCU nesting counter. - * After this, any interrupts or NMIs will increment and - * decrement the per-CPU counters. - */ - ACCESS_ONCE(t->rcu_read_lock_nesting) = nesting - 1; - - /* - * It is now safe to decrement this task's nesting count. - * NMIs that occur after this statement will route their - * rcu_read_lock() calls through this "else" clause, and - * will thus start incrementing the per-CPU counter on - * their own. They will also clobber ->rcu_flipctr_idx, - * but that is OK, since we have already fetched it. - */ - - ACCESS_ONCE(RCU_DATA_ME()->rcu_flipctr[idx])--; - local_irq_restore(flags); - } -} -EXPORT_SYMBOL_GPL(__rcu_read_unlock); - -/* - * If a global counter flip has occurred since the last time that we - * advanced callbacks, advance them. Hardware interrupts must be - * disabled when calling this function. - */ -static void __rcu_advance_callbacks(struct rcu_data *rdp) -{ - int cpu; - int i; - int wlc = 0; - - if (rdp->completed != rcu_ctrlblk.completed) { - if (rdp->waitlist[GP_STAGES - 1] != NULL) { - *rdp->donetail = rdp->waitlist[GP_STAGES - 1]; - rdp->donetail = rdp->waittail[GP_STAGES - 1]; - RCU_TRACE_RDP(rcupreempt_trace_move2done, rdp); - } - for (i = GP_STAGES - 2; i >= 0; i--) { - if (rdp->waitlist[i] != NULL) { - rdp->waitlist[i + 1] = rdp->waitlist[i]; - rdp->waittail[i + 1] = rdp->waittail[i]; - wlc++; - } else { - rdp->waitlist[i + 1] = NULL; - rdp->waittail[i + 1] = - &rdp->waitlist[i + 1]; - } - } - if (rdp->nextlist != NULL) { - rdp->waitlist[0] = rdp->nextlist; - rdp->waittail[0] = rdp->nexttail; - wlc++; - rdp->nextlist = NULL; - rdp->nexttail = &rdp->nextlist; - RCU_TRACE_RDP(rcupreempt_trace_move2wait, rdp); - } else { - rdp->waitlist[0] = NULL; - rdp->waittail[0] = &rdp->waitlist[0]; - } - rdp->waitlistcount = wlc; - rdp->completed = rcu_ctrlblk.completed; - } - - /* - * Check to see if this CPU needs to report that it has seen - * the most recent counter flip, thereby declaring that all - * subsequent rcu_read_lock() invocations will respect this flip. - */ - - cpu = raw_smp_processor_id(); - if (per_cpu(rcu_flip_flag, cpu) == rcu_flipped) { - smp_mb(); /* Subsequent counter accesses must see new value */ - per_cpu(rcu_flip_flag, cpu) = rcu_flip_seen; - smp_mb(); /* Subsequent RCU read-side critical sections */ - /* seen -after- acknowledgement. */ - } -} - -#ifdef CONFIG_NO_HZ -static DEFINE_PER_CPU(int, rcu_update_flag); - -/** - * rcu_irq_enter - Called from Hard irq handlers and NMI/SMI. - * - * If the CPU was idle with dynamic ticks active, this updates the - * rcu_dyntick_sched.dynticks to let the RCU handling know that the - * CPU is active. - */ -void rcu_irq_enter(void) -{ - int cpu = smp_processor_id(); - struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu); - - if (per_cpu(rcu_update_flag, cpu)) - per_cpu(rcu_update_flag, cpu)++; - - /* - * Only update if we are coming from a stopped ticks mode - * (rcu_dyntick_sched.dynticks is even). - */ - if (!in_interrupt() && - (rdssp->dynticks & 0x1) == 0) { - /* - * The following might seem like we could have a race - * with NMI/SMIs. But this really isn't a problem. - * Here we do a read/modify/write, and the race happens - * when an NMI/SMI comes in after the read and before - * the write. But NMI/SMIs will increment this counter - * twice before returning, so the zero bit will not - * be corrupted by the NMI/SMI which is the most important - * part. - * - * The only thing is that we would bring back the counter - * to a postion that it was in during the NMI/SMI. - * But the zero bit would be set, so the rest of the - * counter would again be ignored. - * - * On return from the IRQ, the counter may have the zero - * bit be 0 and the counter the same as the return from - * the NMI/SMI. If the state machine was so unlucky to - * see that, it still doesn't matter, since all - * RCU read-side critical sections on this CPU would - * have already completed. - */ - rdssp->dynticks++; - /* - * The following memory barrier ensures that any - * rcu_read_lock() primitives in the irq handler - * are seen by other CPUs to follow the above - * increment to rcu_dyntick_sched.dynticks. This is - * required in order for other CPUs to correctly - * determine when it is safe to advance the RCU - * grace-period state machine. - */ - smp_mb(); /* see above block comment. */ - /* - * Since we can't determine the dynamic tick mode from - * the rcu_dyntick_sched.dynticks after this routine, - * we use a second flag to acknowledge that we came - * from an idle state with ticks stopped. - */ - per_cpu(rcu_update_flag, cpu)++; - /* - * If we take an NMI/SMI now, they will also increment - * the rcu_update_flag, and will not update the - * rcu_dyntick_sched.dynticks on exit. That is for - * this IRQ to do. - */ - } -} - -/** - * rcu_irq_exit - Called from exiting Hard irq context. - * - * If the CPU was idle with dynamic ticks active, update the - * rcu_dyntick_sched.dynticks to let the RCU handling be - * aware that the CPU is going back to idle with no ticks. - */ -void rcu_irq_exit(void) -{ - int cpu = smp_processor_id(); - struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu); - - /* - * rcu_update_flag is set if we interrupted the CPU - * when it was idle with ticks stopped. - * Once this occurs, we keep track of interrupt nesting - * because a NMI/SMI could also come in, and we still - * only want the IRQ that started the increment of the - * rcu_dyntick_sched.dynticks to be the one that modifies - * it on exit. - */ - if (per_cpu(rcu_update_flag, cpu)) { - if (--per_cpu(rcu_update_flag, cpu)) - return; - - /* This must match the interrupt nesting */ - WARN_ON(in_interrupt()); - - /* - * If an NMI/SMI happens now we are still - * protected by the rcu_dyntick_sched.dynticks being odd. - */ - - /* - * The following memory barrier ensures that any - * rcu_read_unlock() primitives in the irq handler - * are seen by other CPUs to preceed the following - * increment to rcu_dyntick_sched.dynticks. This - * is required in order for other CPUs to determine - * when it is safe to advance the RCU grace-period - * state machine. - */ - smp_mb(); /* see above block comment. */ - rdssp->dynticks++; - WARN_ON(rdssp->dynticks & 0x1); - } -} - -void rcu_nmi_enter(void) -{ - rcu_irq_enter(); -} - -void rcu_nmi_exit(void) -{ - rcu_irq_exit(); -} - -static void dyntick_save_progress_counter(int cpu) -{ - struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu); - - rdssp->dynticks_snap = rdssp->dynticks; -} - -static inline int -rcu_try_flip_waitack_needed(int cpu) -{ - long curr; - long snap; - struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu); - - curr = rdssp->dynticks; - snap = rdssp->dynticks_snap; - smp_mb(); /* force ordering with cpu entering/leaving dynticks. */ - - /* - * If the CPU remained in dynticks mode for the entire time - * and didn't take any interrupts, NMIs, SMIs, or whatever, - * then it cannot be in the middle of an rcu_read_lock(), so - * the next rcu_read_lock() it executes must use the new value - * of the counter. So we can safely pretend that this CPU - * already acknowledged the counter. - */ - - if ((curr == snap) && ((curr & 0x1) == 0)) - return 0; - - /* - * If the CPU passed through or entered a dynticks idle phase with - * no active irq handlers, then, as above, we can safely pretend - * that this CPU already acknowledged the counter. - */ - - if ((curr - snap) > 2 || (curr & 0x1) == 0) - return 0; - - /* We need this CPU to explicitly acknowledge the counter flip. */ - - return 1; -} - -static inline int -rcu_try_flip_waitmb_needed(int cpu) -{ - long curr; - long snap; - struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu); - - curr = rdssp->dynticks; - snap = rdssp->dynticks_snap; - smp_mb(); /* force ordering with cpu entering/leaving dynticks. */ - - /* - * If the CPU remained in dynticks mode for the entire time - * and didn't take any interrupts, NMIs, SMIs, or whatever, - * then it cannot have executed an RCU read-side critical section - * during that time, so there is no need for it to execute a - * memory barrier. - */ - - if ((curr == snap) && ((curr & 0x1) == 0)) - return 0; - - /* - * If the CPU either entered or exited an outermost interrupt, - * SMI, NMI, or whatever handler, then we know that it executed - * a memory barrier when doing so. So we don't need another one. - */ - if (curr != snap) - return 0; - - /* We need the CPU to execute a memory barrier. */ - - return 1; -} - -static void dyntick_save_progress_counter_sched(int cpu) -{ - struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu); - - rdssp->sched_dynticks_snap = rdssp->dynticks; -} - -static int rcu_qsctr_inc_needed_dyntick(int cpu) -{ - long curr; - long snap; - struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu); - - curr = rdssp->dynticks; - snap = rdssp->sched_dynticks_snap; - smp_mb(); /* force ordering with cpu entering/leaving dynticks. */ - - /* - * If the CPU remained in dynticks mode for the entire time - * and didn't take any interrupts, NMIs, SMIs, or whatever, - * then it cannot be in the middle of an rcu_read_lock(), so - * the next rcu_read_lock() it executes must use the new value - * of the counter. Therefore, this CPU has been in a quiescent - * state the entire time, and we don't need to wait for it. - */ - - if ((curr == snap) && ((curr & 0x1) == 0)) - return 0; - - /* - * If the CPU passed through or entered a dynticks idle phase with - * no active irq handlers, then, as above, this CPU has already - * passed through a quiescent state. - */ - - if ((curr - snap) > 2 || (snap & 0x1) == 0) - return 0; - - /* We need this CPU to go through a quiescent state. */ - - return 1; -} - -#else /* !CONFIG_NO_HZ */ - -# define dyntick_save_progress_counter(cpu) do { } while (0) -# define rcu_try_flip_waitack_needed(cpu) (1) -# define rcu_try_flip_waitmb_needed(cpu) (1) - -# define dyntick_save_progress_counter_sched(cpu) do { } while (0) -# define rcu_qsctr_inc_needed_dyntick(cpu) (1) - -#endif /* CONFIG_NO_HZ */ - -static void save_qsctr_sched(int cpu) -{ - struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu); - - rdssp->sched_qs_snap = rdssp->sched_qs; -} - -static inline int rcu_qsctr_inc_needed(int cpu) -{ - struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu); - - /* - * If there has been a quiescent state, no more need to wait - * on this CPU. - */ - - if (rdssp->sched_qs != rdssp->sched_qs_snap) { - smp_mb(); /* force ordering with cpu entering schedule(). */ - return 0; - } - - /* We need this CPU to go through a quiescent state. */ - - return 1; -} - -/* - * Get here when RCU is idle. Decide whether we need to - * move out of idle state, and return non-zero if so. - * "Straightforward" approach for the moment, might later - * use callback-list lengths, grace-period duration, or - * some such to determine when to exit idle state. - * Might also need a pre-idle test that does not acquire - * the lock, but let's get the simple case working first... - */ - -static int -rcu_try_flip_idle(void) -{ - int cpu; - - RCU_TRACE_ME(rcupreempt_trace_try_flip_i1); - if (!rcu_pending(smp_processor_id())) { - RCU_TRACE_ME(rcupreempt_trace_try_flip_ie1); - return 0; - } - - /* - * Do the flip. - */ - - RCU_TRACE_ME(rcupreempt_trace_try_flip_g1); - rcu_ctrlblk.completed++; /* stands in for rcu_try_flip_g2 */ - - /* - * Need a memory barrier so that other CPUs see the new - * counter value before they see the subsequent change of all - * the rcu_flip_flag instances to rcu_flipped. - */ - - smp_mb(); /* see above block comment. */ - - /* Now ask each CPU for acknowledgement of the flip. */ - - for_each_cpu(cpu, to_cpumask(rcu_cpu_online_map)) { - per_cpu(rcu_flip_flag, cpu) = rcu_flipped; - dyntick_save_progress_counter(cpu); - } - - return 1; -} - -/* - * Wait for CPUs to acknowledge the flip. - */ - -static int -rcu_try_flip_waitack(void) -{ - int cpu; - - RCU_TRACE_ME(rcupreempt_trace_try_flip_a1); - for_each_cpu(cpu, to_cpumask(rcu_cpu_online_map)) - if (rcu_try_flip_waitack_needed(cpu) && - per_cpu(rcu_flip_flag, cpu) != rcu_flip_seen) { - RCU_TRACE_ME(rcupreempt_trace_try_flip_ae1); - return 0; - } - - /* - * Make sure our checks above don't bleed into subsequent - * waiting for the sum of the counters to reach zero. - */ - - smp_mb(); /* see above block comment. */ - RCU_TRACE_ME(rcupreempt_trace_try_flip_a2); - return 1; -} - -/* - * Wait for collective ``last'' counter to reach zero, - * then tell all CPUs to do an end-of-grace-period memory barrier. - */ - -static int -rcu_try_flip_waitzero(void) -{ - int cpu; - int lastidx = !(rcu_ctrlblk.completed & 0x1); - int sum = 0; - - /* Check to see if the sum of the "last" counters is zero. */ - - RCU_TRACE_ME(rcupreempt_trace_try_flip_z1); - for_each_possible_cpu(cpu) - sum += RCU_DATA_CPU(cpu)->rcu_flipctr[lastidx]; - if (sum != 0) { - RCU_TRACE_ME(rcupreempt_trace_try_flip_ze1); - return 0; - } - - /* - * This ensures that the other CPUs see the call for - * memory barriers -after- the sum to zero has been - * detected here - */ - smp_mb(); /* ^^^^^^^^^^^^ */ - - /* Call for a memory barrier from each CPU. */ - for_each_cpu(cpu, to_cpumask(rcu_cpu_online_map)) { - per_cpu(rcu_mb_flag, cpu) = rcu_mb_needed; - dyntick_save_progress_counter(cpu); - } - - RCU_TRACE_ME(rcupreempt_trace_try_flip_z2); - return 1; -} - -/* - * Wait for all CPUs to do their end-of-grace-period memory barrier. - * Return 0 once all CPUs have done so. - */ - -static int -rcu_try_flip_waitmb(void) -{ - int cpu; - - RCU_TRACE_ME(rcupreempt_trace_try_flip_m1); - for_each_cpu(cpu, to_cpumask(rcu_cpu_online_map)) - if (rcu_try_flip_waitmb_needed(cpu) && - per_cpu(rcu_mb_flag, cpu) != rcu_mb_done) { - RCU_TRACE_ME(rcupreempt_trace_try_flip_me1); - return 0; - } - - smp_mb(); /* Ensure that the above checks precede any following flip. */ - RCU_TRACE_ME(rcupreempt_trace_try_flip_m2); - return 1; -} - -/* - * Attempt a single flip of the counters. Remember, a single flip does - * -not- constitute a grace period. Instead, the interval between - * at least GP_STAGES consecutive flips is a grace period. - * - * If anyone is nuts enough to run this CONFIG_PREEMPT_RCU implementation - * on a large SMP, they might want to use a hierarchical organization of - * the per-CPU-counter pairs. - */ -static void rcu_try_flip(void) -{ - unsigned long flags; - - RCU_TRACE_ME(rcupreempt_trace_try_flip_1); - if (unlikely(!spin_trylock_irqsave(&rcu_ctrlblk.fliplock, flags))) { - RCU_TRACE_ME(rcupreempt_trace_try_flip_e1); - return; - } - - /* - * Take the next transition(s) through the RCU grace-period - * flip-counter state machine. - */ - - switch (rcu_ctrlblk.rcu_try_flip_state) { - case rcu_try_flip_idle_state: - if (rcu_try_flip_idle()) - rcu_ctrlblk.rcu_try_flip_state = - rcu_try_flip_waitack_state; - break; - case rcu_try_flip_waitack_state: - if (rcu_try_flip_waitack()) - rcu_ctrlblk.rcu_try_flip_state = - rcu_try_flip_waitzero_state; - break; - case rcu_try_flip_waitzero_state: - if (rcu_try_flip_waitzero()) - rcu_ctrlblk.rcu_try_flip_state = - rcu_try_flip_waitmb_state; - break; - case rcu_try_flip_waitmb_state: - if (rcu_try_flip_waitmb()) - rcu_ctrlblk.rcu_try_flip_state = - rcu_try_flip_idle_state; - } - spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags); -} - -/* - * Check to see if this CPU needs to do a memory barrier in order to - * ensure that any prior RCU read-side critical sections have committed - * their counter manipulations and critical-section memory references - * before declaring the grace period to be completed. - */ -static void rcu_check_mb(int cpu) -{ - if (per_cpu(rcu_mb_flag, cpu) == rcu_mb_needed) { - smp_mb(); /* Ensure RCU read-side accesses are visible. */ - per_cpu(rcu_mb_flag, cpu) = rcu_mb_done; - } -} - -void rcu_check_callbacks(int cpu, int user) -{ - unsigned long flags; - struct rcu_data *rdp; - - if (!rcu_pending(cpu)) - return; /* if nothing for RCU to do. */ - - /* - * If this CPU took its interrupt from user mode or from the - * idle loop, and this is not a nested interrupt, then - * this CPU has to have exited all prior preept-disable - * sections of code. So invoke rcu_sched_qs() to note this. - * - * The memory barrier is needed to handle the case where - * writes from a preempt-disable section of code get reordered - * into schedule() by this CPU's write buffer. So the memory - * barrier makes sure that the rcu_sched_qs() is seen by other - * CPUs to happen after any such write. - */ - - rdp = RCU_DATA_CPU(cpu); - if (user || - (idle_cpu(cpu) && !in_softirq() && - hardirq_count() <= (1 << HARDIRQ_SHIFT))) { - smp_mb(); /* Guard against aggressive schedule(). */ - rcu_sched_qs(cpu); - } - - rcu_check_mb(cpu); - if (rcu_ctrlblk.completed == rdp->completed) - rcu_try_flip(); - spin_lock_irqsave(&rdp->lock, flags); - RCU_TRACE_RDP(rcupreempt_trace_check_callbacks, rdp); - __rcu_advance_callbacks(rdp); - if (rdp->donelist == NULL) { - spin_unlock_irqrestore(&rdp->lock, flags); - } else { - spin_unlock_irqrestore(&rdp->lock, flags); - raise_softirq(RCU_SOFTIRQ); - } -} - -/* - * Needed by dynticks, to make sure all RCU processing has finished - * when we go idle: - */ -void rcu_advance_callbacks(int cpu, int user) -{ - unsigned long flags; - struct rcu_data *rdp = RCU_DATA_CPU(cpu); - - if (rcu_ctrlblk.completed == rdp->completed) { - rcu_try_flip(); - if (rcu_ctrlblk.completed == rdp->completed) - return; - } - spin_lock_irqsave(&rdp->lock, flags); - RCU_TRACE_RDP(rcupreempt_trace_check_callbacks, rdp); - __rcu_advance_callbacks(rdp); - spin_unlock_irqrestore(&rdp->lock, flags); -} - -#ifdef CONFIG_HOTPLUG_CPU -#define rcu_offline_cpu_enqueue(srclist, srctail, dstlist, dsttail) do { \ - *dsttail = srclist; \ - if (srclist != NULL) { \ - dsttail = srctail; \ - srclist = NULL; \ - srctail = &srclist;\ - } \ - } while (0) - -void rcu_offline_cpu(int cpu) -{ - int i; - struct rcu_head *list = NULL; - unsigned long flags; - struct rcu_data *rdp = RCU_DATA_CPU(cpu); - struct rcu_head *schedlist = NULL; - struct rcu_head **schedtail = &schedlist; - struct rcu_head **tail = &list; - - /* - * Remove all callbacks from the newly dead CPU, retaining order. - * Otherwise rcu_barrier() will fail - */ - - spin_lock_irqsave(&rdp->lock, flags); - rcu_offline_cpu_enqueue(rdp->donelist, rdp->donetail, list, tail); - for (i = GP_STAGES - 1; i >= 0; i--) - rcu_offline_cpu_enqueue(rdp->waitlist[i], rdp->waittail[i], - list, tail); - rcu_offline_cpu_enqueue(rdp->nextlist, rdp->nexttail, list, tail); - rcu_offline_cpu_enqueue(rdp->waitschedlist, rdp->waitschedtail, - schedlist, schedtail); - rcu_offline_cpu_enqueue(rdp->nextschedlist, rdp->nextschedtail, - schedlist, schedtail); - rdp->rcu_sched_sleeping = 0; - spin_unlock_irqrestore(&rdp->lock, flags); - rdp->waitlistcount = 0; - - /* Disengage the newly dead CPU from the grace-period computation. */ - - spin_lock_irqsave(&rcu_ctrlblk.fliplock, flags); - rcu_check_mb(cpu); - if (per_cpu(rcu_flip_flag, cpu) == rcu_flipped) { - smp_mb(); /* Subsequent counter accesses must see new value */ - per_cpu(rcu_flip_flag, cpu) = rcu_flip_seen; - smp_mb(); /* Subsequent RCU read-side critical sections */ - /* seen -after- acknowledgement. */ - } - - cpumask_clear_cpu(cpu, to_cpumask(rcu_cpu_online_map)); - - spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags); - - /* - * Place the removed callbacks on the current CPU's queue. - * Make them all start a new grace period: simple approach, - * in theory could starve a given set of callbacks, but - * you would need to be doing some serious CPU hotplugging - * to make this happen. If this becomes a problem, adding - * a synchronize_rcu() to the hotplug path would be a simple - * fix. - */ - - local_irq_save(flags); /* disable preempt till we know what lock. */ - rdp = RCU_DATA_ME(); - spin_lock(&rdp->lock); - *rdp->nexttail = list; - if (list) - rdp->nexttail = tail; - *rdp->nextschedtail = schedlist; - if (schedlist) - rdp->nextschedtail = schedtail; - spin_unlock_irqrestore(&rdp->lock, flags); -} - -#else /* #ifdef CONFIG_HOTPLUG_CPU */ - -void rcu_offline_cpu(int cpu) -{ -} - -#endif /* #else #ifdef CONFIG_HOTPLUG_CPU */ - -void __cpuinit rcu_online_cpu(int cpu) -{ - unsigned long flags; - struct rcu_data *rdp; - - spin_lock_irqsave(&rcu_ctrlblk.fliplock, flags); - cpumask_set_cpu(cpu, to_cpumask(rcu_cpu_online_map)); - spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags); - - /* - * The rcu_sched grace-period processing might have bypassed - * this CPU, given that it was not in the rcu_cpu_online_map - * when the grace-period scan started. This means that the - * grace-period task might sleep. So make sure that if this - * should happen, the first callback posted to this CPU will - * wake up the grace-period task if need be. - */ - - rdp = RCU_DATA_CPU(cpu); - spin_lock_irqsave(&rdp->lock, flags); - rdp->rcu_sched_sleeping = 1; - spin_unlock_irqrestore(&rdp->lock, flags); -} - -static void rcu_process_callbacks(struct softirq_action *unused) -{ - unsigned long flags; - struct rcu_head *next, *list; - struct rcu_data *rdp; - - local_irq_save(flags); - rdp = RCU_DATA_ME(); - spin_lock(&rdp->lock); - list = rdp->donelist; - if (list == NULL) { - spin_unlock_irqrestore(&rdp->lock, flags); - return; - } - rdp->donelist = NULL; - rdp->donetail = &rdp->donelist; - RCU_TRACE_RDP(rcupreempt_trace_done_remove, rdp); - spin_unlock_irqrestore(&rdp->lock, flags); - while (list) { - next = list->next; - list->func(list); - list = next; - RCU_TRACE_ME(rcupreempt_trace_invoke); - } -} - -void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) -{ - unsigned long flags; - struct rcu_data *rdp; - - head->func = func; - head->next = NULL; - local_irq_save(flags); - rdp = RCU_DATA_ME(); - spin_lock(&rdp->lock); - __rcu_advance_callbacks(rdp); - *rdp->nexttail = head; - rdp->nexttail = &head->next; - RCU_TRACE_RDP(rcupreempt_trace_next_add, rdp); - spin_unlock_irqrestore(&rdp->lock, flags); -} -EXPORT_SYMBOL_GPL(call_rcu); - -void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) -{ - unsigned long flags; - struct rcu_data *rdp; - int wake_gp = 0; - - head->func = func; - head->next = NULL; - local_irq_save(flags); - rdp = RCU_DATA_ME(); - spin_lock(&rdp->lock); - *rdp->nextschedtail = head; - rdp->nextschedtail = &head->next; - if (rdp->rcu_sched_sleeping) { - - /* Grace-period processing might be sleeping... */ - - rdp->rcu_sched_sleeping = 0; - wake_gp = 1; - } - spin_unlock_irqrestore(&rdp->lock, flags); - if (wake_gp) { - - /* Wake up grace-period processing, unless someone beat us. */ - - spin_lock_irqsave(&rcu_ctrlblk.schedlock, flags); - if (rcu_ctrlblk.sched_sleep != rcu_sched_sleeping) - wake_gp = 0; - rcu_ctrlblk.sched_sleep = rcu_sched_not_sleeping; - spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags); - if (wake_gp) - wake_up_interruptible(&rcu_ctrlblk.sched_wq); - } -} -EXPORT_SYMBOL_GPL(call_rcu_sched); - -/* - * Wait until all currently running preempt_disable() code segments - * (including hardware-irq-disable segments) complete. Note that - * in -rt this does -not- necessarily result in all currently executing - * interrupt -handlers- having completed. - */ -void __synchronize_sched(void) -{ - struct rcu_synchronize rcu; - - if (num_online_cpus() == 1) - return; /* blocking is gp if only one CPU! */ - - init_completion(&rcu.completion); - /* Will wake me after RCU finished. */ - call_rcu_sched(&rcu.head, wakeme_after_rcu); - /* Wait for it. */ - wait_for_completion(&rcu.completion); -} -EXPORT_SYMBOL_GPL(__synchronize_sched); - -/* - * kthread function that manages call_rcu_sched grace periods. - */ -static int rcu_sched_grace_period(void *arg) -{ - int couldsleep; /* might sleep after current pass. */ - int couldsleepnext = 0; /* might sleep after next pass. */ - int cpu; - unsigned long flags; - struct rcu_data *rdp; - int ret; - - /* - * Each pass through the following loop handles one - * rcu_sched grace period cycle. - */ - do { - /* Save each CPU's current state. */ - - for_each_online_cpu(cpu) { - dyntick_save_progress_counter_sched(cpu); - save_qsctr_sched(cpu); - } - - /* - * Sleep for about an RCU grace-period's worth to - * allow better batching and to consume less CPU. - */ - schedule_timeout_interruptible(RCU_SCHED_BATCH_TIME); - - /* - * If there was nothing to do last time, prepare to - * sleep at the end of the current grace period cycle. - */ - couldsleep = couldsleepnext; - couldsleepnext = 1; - if (couldsleep) { - spin_lock_irqsave(&rcu_ctrlblk.schedlock, flags); - rcu_ctrlblk.sched_sleep = rcu_sched_sleep_prep; - spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags); - } - - /* - * Wait on each CPU in turn to have either visited - * a quiescent state or been in dynticks-idle mode. - */ - for_each_online_cpu(cpu) { - while (rcu_qsctr_inc_needed(cpu) && - rcu_qsctr_inc_needed_dyntick(cpu)) { - /* resched_cpu(cpu); @@@ */ - schedule_timeout_interruptible(1); - } - } - - /* Advance callbacks for each CPU. */ - - for_each_online_cpu(cpu) { - - rdp = RCU_DATA_CPU(cpu); - spin_lock_irqsave(&rdp->lock, flags); - - /* - * We are running on this CPU irq-disabled, so no - * CPU can go offline until we re-enable irqs. - * The current CPU might have already gone - * offline (between the for_each_offline_cpu and - * the spin_lock_irqsave), but in that case all its - * callback lists will be empty, so no harm done. - * - * Advance the callbacks! We share normal RCU's - * donelist, since callbacks are invoked the - * same way in either case. - */ - if (rdp->waitschedlist != NULL) { - *rdp->donetail = rdp->waitschedlist; - rdp->donetail = rdp->waitschedtail; - - /* - * Next rcu_check_callbacks() will - * do the required raise_softirq(). - */ - } - if (rdp->nextschedlist != NULL) { - rdp->waitschedlist = rdp->nextschedlist; - rdp->waitschedtail = rdp->nextschedtail; - couldsleep = 0; - couldsleepnext = 0; - } else { - rdp->waitschedlist = NULL; - rdp->waitschedtail = &rdp->waitschedlist; - } - rdp->nextschedlist = NULL; - rdp->nextschedtail = &rdp->nextschedlist; - - /* Mark sleep intention. */ - - rdp->rcu_sched_sleeping = couldsleep; - - spin_unlock_irqrestore(&rdp->lock, flags); - } - - /* If we saw callbacks on the last scan, go deal with them. */ - - if (!couldsleep) - continue; - - /* Attempt to block... */ - - spin_lock_irqsave(&rcu_ctrlblk.schedlock, flags); - if (rcu_ctrlblk.sched_sleep != rcu_sched_sleep_prep) { - - /* - * Someone posted a callback after we scanned. - * Go take care of it. - */ - spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags); - couldsleepnext = 0; - continue; - } - - /* Block until the next person posts a callback. */ - - rcu_ctrlblk.sched_sleep = rcu_sched_sleeping; - spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags); - ret = 0; /* unused */ - __wait_event_interruptible(rcu_ctrlblk.sched_wq, - rcu_ctrlblk.sched_sleep != rcu_sched_sleeping, - ret); - - couldsleepnext = 0; - - } while (!kthread_should_stop()); - - return (0); -} - -/* - * Check to see if any future RCU-related work will need to be done - * by the current CPU, even if none need be done immediately, returning - * 1 if so. Assumes that notifiers would take care of handling any - * outstanding requests from the RCU core. - * - * This function is part of the RCU implementation; it is -not- - * an exported member of the RCU API. - */ -int rcu_needs_cpu(int cpu) -{ - struct rcu_data *rdp = RCU_DATA_CPU(cpu); - - return (rdp->donelist != NULL || - !!rdp->waitlistcount || - rdp->nextlist != NULL || - rdp->nextschedlist != NULL || - rdp->waitschedlist != NULL); -} - -static int rcu_pending(int cpu) -{ - struct rcu_data *rdp = RCU_DATA_CPU(cpu); - - /* The CPU has at least one callback queued somewhere. */ - - if (rdp->donelist != NULL || - !!rdp->waitlistcount || - rdp->nextlist != NULL || - rdp->nextschedlist != NULL || - rdp->waitschedlist != NULL) - return 1; - - /* The RCU core needs an acknowledgement from this CPU. */ - - if ((per_cpu(rcu_flip_flag, cpu) == rcu_flipped) || - (per_cpu(rcu_mb_flag, cpu) == rcu_mb_needed)) - return 1; - - /* This CPU has fallen behind the global grace-period number. */ - - if (rdp->completed != rcu_ctrlblk.completed) - return 1; - - /* Nothing needed from this CPU. */ - - return 0; -} - -int __cpuinit rcu_cpu_notify(struct notifier_block *self, - unsigned long action, void *hcpu) -{ - long cpu = (long)hcpu; - - switch (action) { - case CPU_UP_PREPARE: - case CPU_UP_PREPARE_FROZEN: - rcu_online_cpu(cpu); - break; - case CPU_UP_CANCELED: - case CPU_UP_CANCELED_FROZEN: - case CPU_DEAD: - case CPU_DEAD_FROZEN: - rcu_offline_cpu(cpu); - break; - default: - break; - } - return NOTIFY_OK; -} - -void __init __rcu_init(void) -{ - int cpu; - int i; - struct rcu_data *rdp; - - printk(KERN_NOTICE "Preemptible RCU implementation.\n"); - for_each_possible_cpu(cpu) { - rdp = RCU_DATA_CPU(cpu); - spin_lock_init(&rdp->lock); - rdp->completed = 0; - rdp->waitlistcount = 0; - rdp->nextlist = NULL; - rdp->nexttail = &rdp->nextlist; - for (i = 0; i < GP_STAGES; i++) { - rdp->waitlist[i] = NULL; - rdp->waittail[i] = &rdp->waitlist[i]; - } - rdp->donelist = NULL; - rdp->donetail = &rdp->donelist; - rdp->rcu_flipctr[0] = 0; - rdp->rcu_flipctr[1] = 0; - rdp->nextschedlist = NULL; - rdp->nextschedtail = &rdp->nextschedlist; - rdp->waitschedlist = NULL; - rdp->waitschedtail = &rdp->waitschedlist; - rdp->rcu_sched_sleeping = 0; - } - open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); -} - -/* - * Late-boot-time RCU initialization that must wait until after scheduler - * has been initialized. - */ -void __init rcu_init_sched(void) -{ - rcu_sched_grace_period_task = kthread_run(rcu_sched_grace_period, - NULL, - "rcu_sched_grace_period"); - WARN_ON(IS_ERR(rcu_sched_grace_period_task)); -} - -#ifdef CONFIG_RCU_TRACE -long *rcupreempt_flipctr(int cpu) -{ - return &RCU_DATA_CPU(cpu)->rcu_flipctr[0]; -} -EXPORT_SYMBOL_GPL(rcupreempt_flipctr); - -int rcupreempt_flip_flag(int cpu) -{ - return per_cpu(rcu_flip_flag, cpu); -} -EXPORT_SYMBOL_GPL(rcupreempt_flip_flag); - -int rcupreempt_mb_flag(int cpu) -{ - return per_cpu(rcu_mb_flag, cpu); -} -EXPORT_SYMBOL_GPL(rcupreempt_mb_flag); - -char *rcupreempt_try_flip_state_name(void) -{ - return rcu_try_flip_state_names[rcu_ctrlblk.rcu_try_flip_state]; -} -EXPORT_SYMBOL_GPL(rcupreempt_try_flip_state_name); - -struct rcupreempt_trace *rcupreempt_trace_cpu(int cpu) -{ - struct rcu_data *rdp = RCU_DATA_CPU(cpu); - - return &rdp->trace; -} -EXPORT_SYMBOL_GPL(rcupreempt_trace_cpu); - -#endif /* #ifdef RCU_TRACE */ diff --git a/kernel/rcupreempt_trace.c b/kernel/rcupreempt_trace.c deleted file mode 100644 index 11640346a507..000000000000 --- a/kernel/rcupreempt_trace.c +++ /dev/null @@ -1,335 +0,0 @@ -/* - * Read-Copy Update tracing for realtime implementation - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * - * Copyright IBM Corporation, 2006 - * - * Papers: http://www.rdrop.com/users/paulmck/RCU - * - * For detailed explanation of Read-Copy Update mechanism see - - * Documentation/RCU/ *.txt - * - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static struct mutex rcupreempt_trace_mutex; -static char *rcupreempt_trace_buf; -#define RCUPREEMPT_TRACE_BUF_SIZE 4096 - -void rcupreempt_trace_move2done(struct rcupreempt_trace *trace) -{ - trace->done_length += trace->wait_length; - trace->done_add += trace->wait_length; - trace->wait_length = 0; -} -void rcupreempt_trace_move2wait(struct rcupreempt_trace *trace) -{ - trace->wait_length += trace->next_length; - trace->wait_add += trace->next_length; - trace->next_length = 0; -} -void rcupreempt_trace_try_flip_1(struct rcupreempt_trace *trace) -{ - atomic_inc(&trace->rcu_try_flip_1); -} -void rcupreempt_trace_try_flip_e1(struct rcupreempt_trace *trace) -{ - atomic_inc(&trace->rcu_try_flip_e1); -} -void rcupreempt_trace_try_flip_i1(struct rcupreempt_trace *trace) -{ - trace->rcu_try_flip_i1++; -} -void rcupreempt_trace_try_flip_ie1(struct rcupreempt_trace *trace) -{ - trace->rcu_try_flip_ie1++; -} -void rcupreempt_trace_try_flip_g1(struct rcupreempt_trace *trace) -{ - trace->rcu_try_flip_g1++; -} -void rcupreempt_trace_try_flip_a1(struct rcupreempt_trace *trace) -{ - trace->rcu_try_flip_a1++; -} -void rcupreempt_trace_try_flip_ae1(struct rcupreempt_trace *trace) -{ - trace->rcu_try_flip_ae1++; -} -void rcupreempt_trace_try_flip_a2(struct rcupreempt_trace *trace) -{ - trace->rcu_try_flip_a2++; -} -void rcupreempt_trace_try_flip_z1(struct rcupreempt_trace *trace) -{ - trace->rcu_try_flip_z1++; -} -void rcupreempt_trace_try_flip_ze1(struct rcupreempt_trace *trace) -{ - trace->rcu_try_flip_ze1++; -} -void rcupreempt_trace_try_flip_z2(struct rcupreempt_trace *trace) -{ - trace->rcu_try_flip_z2++; -} -void rcupreempt_trace_try_flip_m1(struct rcupreempt_trace *trace) -{ - trace->rcu_try_flip_m1++; -} -void rcupreempt_trace_try_flip_me1(struct rcupreempt_trace *trace) -{ - trace->rcu_try_flip_me1++; -} -void rcupreempt_trace_try_flip_m2(struct rcupreempt_trace *trace) -{ - trace->rcu_try_flip_m2++; -} -void rcupreempt_trace_check_callbacks(struct rcupreempt_trace *trace) -{ - trace->rcu_check_callbacks++; -} -void rcupreempt_trace_done_remove(struct rcupreempt_trace *trace) -{ - trace->done_remove += trace->done_length; - trace->done_length = 0; -} -void rcupreempt_trace_invoke(struct rcupreempt_trace *trace) -{ - atomic_inc(&trace->done_invoked); -} -void rcupreempt_trace_next_add(struct rcupreempt_trace *trace) -{ - trace->next_add++; - trace->next_length++; -} - -static void rcupreempt_trace_sum(struct rcupreempt_trace *sp) -{ - struct rcupreempt_trace *cp; - int cpu; - - memset(sp, 0, sizeof(*sp)); - for_each_possible_cpu(cpu) { - cp = rcupreempt_trace_cpu(cpu); - sp->next_length += cp->next_length; - sp->next_add += cp->next_add; - sp->wait_length += cp->wait_length; - sp->wait_add += cp->wait_add; - sp->done_length += cp->done_length; - sp->done_add += cp->done_add; - sp->done_remove += cp->done_remove; - atomic_add(atomic_read(&cp->done_invoked), &sp->done_invoked); - sp->rcu_check_callbacks += cp->rcu_check_callbacks; - atomic_add(atomic_read(&cp->rcu_try_flip_1), - &sp->rcu_try_flip_1); - atomic_add(atomic_read(&cp->rcu_try_flip_e1), - &sp->rcu_try_flip_e1); - sp->rcu_try_flip_i1 += cp->rcu_try_flip_i1; - sp->rcu_try_flip_ie1 += cp->rcu_try_flip_ie1; - sp->rcu_try_flip_g1 += cp->rcu_try_flip_g1; - sp->rcu_try_flip_a1 += cp->rcu_try_flip_a1; - sp->rcu_try_flip_ae1 += cp->rcu_try_flip_ae1; - sp->rcu_try_flip_a2 += cp->rcu_try_flip_a2; - sp->rcu_try_flip_z1 += cp->rcu_try_flip_z1; - sp->rcu_try_flip_ze1 += cp->rcu_try_flip_ze1; - sp->rcu_try_flip_z2 += cp->rcu_try_flip_z2; - sp->rcu_try_flip_m1 += cp->rcu_try_flip_m1; - sp->rcu_try_flip_me1 += cp->rcu_try_flip_me1; - sp->rcu_try_flip_m2 += cp->rcu_try_flip_m2; - } -} - -static ssize_t rcustats_read(struct file *filp, char __user *buffer, - size_t count, loff_t *ppos) -{ - struct rcupreempt_trace trace; - ssize_t bcount; - int cnt = 0; - - rcupreempt_trace_sum(&trace); - mutex_lock(&rcupreempt_trace_mutex); - snprintf(&rcupreempt_trace_buf[cnt], RCUPREEMPT_TRACE_BUF_SIZE - cnt, - "ggp=%ld rcc=%ld\n", - rcu_batches_completed(), - trace.rcu_check_callbacks); - snprintf(&rcupreempt_trace_buf[cnt], RCUPREEMPT_TRACE_BUF_SIZE - cnt, - "na=%ld nl=%ld wa=%ld wl=%ld da=%ld dl=%ld dr=%ld di=%d\n" - "1=%d e1=%d i1=%ld ie1=%ld g1=%ld a1=%ld ae1=%ld a2=%ld\n" - "z1=%ld ze1=%ld z2=%ld m1=%ld me1=%ld m2=%ld\n", - - trace.next_add, trace.next_length, - trace.wait_add, trace.wait_length, - trace.done_add, trace.done_length, - trace.done_remove, atomic_read(&trace.done_invoked), - atomic_read(&trace.rcu_try_flip_1), - atomic_read(&trace.rcu_try_flip_e1), - trace.rcu_try_flip_i1, trace.rcu_try_flip_ie1, - trace.rcu_try_flip_g1, - trace.rcu_try_flip_a1, trace.rcu_try_flip_ae1, - trace.rcu_try_flip_a2, - trace.rcu_try_flip_z1, trace.rcu_try_flip_ze1, - trace.rcu_try_flip_z2, - trace.rcu_try_flip_m1, trace.rcu_try_flip_me1, - trace.rcu_try_flip_m2); - bcount = simple_read_from_buffer(buffer, count, ppos, - rcupreempt_trace_buf, strlen(rcupreempt_trace_buf)); - mutex_unlock(&rcupreempt_trace_mutex); - return bcount; -} - -static ssize_t rcugp_read(struct file *filp, char __user *buffer, - size_t count, loff_t *ppos) -{ - long oldgp = rcu_batches_completed(); - ssize_t bcount; - - mutex_lock(&rcupreempt_trace_mutex); - synchronize_rcu(); - snprintf(rcupreempt_trace_buf, RCUPREEMPT_TRACE_BUF_SIZE, - "oldggp=%ld newggp=%ld\n", oldgp, rcu_batches_completed()); - bcount = simple_read_from_buffer(buffer, count, ppos, - rcupreempt_trace_buf, strlen(rcupreempt_trace_buf)); - mutex_unlock(&rcupreempt_trace_mutex); - return bcount; -} - -static ssize_t rcuctrs_read(struct file *filp, char __user *buffer, - size_t count, loff_t *ppos) -{ - int cnt = 0; - int cpu; - int f = rcu_batches_completed() & 0x1; - ssize_t bcount; - - mutex_lock(&rcupreempt_trace_mutex); - - cnt += snprintf(&rcupreempt_trace_buf[cnt], RCUPREEMPT_TRACE_BUF_SIZE, - "CPU last cur F M\n"); - for_each_possible_cpu(cpu) { - long *flipctr = rcupreempt_flipctr(cpu); - cnt += snprintf(&rcupreempt_trace_buf[cnt], - RCUPREEMPT_TRACE_BUF_SIZE - cnt, - "%3d%c %4ld %3ld %d %d\n", - cpu, - cpu_is_offline(cpu) ? '!' : ' ', - flipctr[!f], - flipctr[f], - rcupreempt_flip_flag(cpu), - rcupreempt_mb_flag(cpu)); - } - cnt += snprintf(&rcupreempt_trace_buf[cnt], - RCUPREEMPT_TRACE_BUF_SIZE - cnt, - "ggp = %ld, state = %s\n", - rcu_batches_completed(), - rcupreempt_try_flip_state_name()); - cnt += snprintf(&rcupreempt_trace_buf[cnt], - RCUPREEMPT_TRACE_BUF_SIZE - cnt, - "\n"); - bcount = simple_read_from_buffer(buffer, count, ppos, - rcupreempt_trace_buf, strlen(rcupreempt_trace_buf)); - mutex_unlock(&rcupreempt_trace_mutex); - return bcount; -} - -static struct file_operations rcustats_fops = { - .owner = THIS_MODULE, - .read = rcustats_read, -}; - -static struct file_operations rcugp_fops = { - .owner = THIS_MODULE, - .read = rcugp_read, -}; - -static struct file_operations rcuctrs_fops = { - .owner = THIS_MODULE, - .read = rcuctrs_read, -}; - -static struct dentry *rcudir, *statdir, *ctrsdir, *gpdir; -static int rcupreempt_debugfs_init(void) -{ - rcudir = debugfs_create_dir("rcu", NULL); - if (!rcudir) - goto out; - statdir = debugfs_create_file("rcustats", 0444, rcudir, - NULL, &rcustats_fops); - if (!statdir) - goto free_out; - - gpdir = debugfs_create_file("rcugp", 0444, rcudir, NULL, &rcugp_fops); - if (!gpdir) - goto free_out; - - ctrsdir = debugfs_create_file("rcuctrs", 0444, rcudir, - NULL, &rcuctrs_fops); - if (!ctrsdir) - goto free_out; - return 0; -free_out: - if (statdir) - debugfs_remove(statdir); - if (gpdir) - debugfs_remove(gpdir); - debugfs_remove(rcudir); -out: - return 1; -} - -static int __init rcupreempt_trace_init(void) -{ - int ret; - - mutex_init(&rcupreempt_trace_mutex); - rcupreempt_trace_buf = kmalloc(RCUPREEMPT_TRACE_BUF_SIZE, GFP_KERNEL); - if (!rcupreempt_trace_buf) - return 1; - ret = rcupreempt_debugfs_init(); - if (ret) - kfree(rcupreempt_trace_buf); - return ret; -} - -static void __exit rcupreempt_trace_cleanup(void) -{ - debugfs_remove(statdir); - debugfs_remove(gpdir); - debugfs_remove(ctrsdir); - debugfs_remove(rcudir); - kfree(rcupreempt_trace_buf); -} - - -module_init(rcupreempt_trace_init); -module_exit(rcupreempt_trace_cleanup); diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index f87fb0c8f924..82fbc49728df 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -725,7 +725,7 @@ config RCU_TORTURE_TEST_RUNNABLE config RCU_CPU_STALL_DETECTOR bool "Check for stalled CPUs delaying RCU grace periods" - depends on CLASSIC_RCU || TREE_RCU || TREE_PREEMPT_RCU + depends on TREE_RCU || TREE_PREEMPT_RCU default n help This option causes RCU to printk information on which -- cgit v1.2.3 From 70041088e3b976627ba9a183b812f39ef8a9ba0e Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Fri, 21 Aug 2009 12:28:31 +0000 Subject: RDS: Add TCP transport to RDS This code allows RDS to be tunneled over a TCP connection. RDMA operations are disabled when using TCP transport, but this frees RDS from the IB/RDMA stack dependency, and allows it to be used with standard Ethernet adapters, or in a VM. Signed-off-by: Andy Grover Signed-off-by: David S. Miller --- include/linux/rds.h | 12 ++ net/rds/tcp.c | 319 ++++++++++++++++++++++++++++++++++++++++++++ net/rds/tcp.h | 93 +++++++++++++ net/rds/tcp_connect.c | 153 ++++++++++++++++++++++ net/rds/tcp_listen.c | 199 ++++++++++++++++++++++++++++ net/rds/tcp_recv.c | 356 ++++++++++++++++++++++++++++++++++++++++++++++++++ net/rds/tcp_send.c | 263 +++++++++++++++++++++++++++++++++++++ net/rds/tcp_stats.c | 74 +++++++++++ 8 files changed, 1469 insertions(+) create mode 100644 net/rds/tcp.c create mode 100644 net/rds/tcp.h create mode 100644 net/rds/tcp_connect.c create mode 100644 net/rds/tcp_listen.c create mode 100644 net/rds/tcp_recv.c create mode 100644 net/rds/tcp_send.c create mode 100644 net/rds/tcp_stats.c (limited to 'include/linux') diff --git a/include/linux/rds.h b/include/linux/rds.h index d91dc91f5443..89d46e1afbb1 100644 --- a/include/linux/rds.h +++ b/include/linux/rds.h @@ -147,6 +147,18 @@ struct rds_info_socket { u_int64_t inum; } __attribute__((packed)); +struct rds_info_tcp_socket { + __be32 local_addr; + __be16 local_port; + __be32 peer_addr; + __be16 peer_port; + u_int64_t hdr_rem; + u_int64_t data_rem; + u_int32_t last_sent_nxt; + u_int32_t last_expected_una; + u_int32_t last_seen_una; +} __attribute__((packed)); + #define RDS_IB_GID_LEN 16 struct rds_info_rdma_connection { __be32 src_addr; diff --git a/net/rds/tcp.c b/net/rds/tcp.c new file mode 100644 index 000000000000..e0ac9009db1a --- /dev/null +++ b/net/rds/tcp.c @@ -0,0 +1,319 @@ +/* + * Copyright (c) 2006 Oracle. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ +#include +#include +#include + +#include "rds.h" +#include "tcp.h" + +/* only for info exporting */ +static DEFINE_SPINLOCK(rds_tcp_tc_list_lock); +static LIST_HEAD(rds_tcp_tc_list); +unsigned int rds_tcp_tc_count; + +/* Track rds_tcp_connection structs so they can be cleaned up */ +static DEFINE_SPINLOCK(rds_tcp_conn_lock); +static LIST_HEAD(rds_tcp_conn_list); + +static struct kmem_cache *rds_tcp_conn_slab; + +#define RDS_TCP_DEFAULT_BUFSIZE (128 * 1024) + +/* doing it this way avoids calling tcp_sk() */ +void rds_tcp_nonagle(struct socket *sock) +{ + mm_segment_t oldfs = get_fs(); + int val = 1; + + set_fs(KERNEL_DS); + sock->ops->setsockopt(sock, SOL_TCP, TCP_NODELAY, (char __user *)&val, + sizeof(val)); + set_fs(oldfs); +} + +void rds_tcp_tune(struct socket *sock) +{ + struct sock *sk = sock->sk; + + rds_tcp_nonagle(sock); + + /* + * We're trying to saturate gigabit with the default, + * see svc_sock_setbufsize(). + */ + lock_sock(sk); + sk->sk_sndbuf = RDS_TCP_DEFAULT_BUFSIZE; + sk->sk_rcvbuf = RDS_TCP_DEFAULT_BUFSIZE; + sk->sk_userlocks |= SOCK_SNDBUF_LOCK|SOCK_RCVBUF_LOCK; + release_sock(sk); +} + +u32 rds_tcp_snd_nxt(struct rds_tcp_connection *tc) +{ + return tcp_sk(tc->t_sock->sk)->snd_nxt; +} + +u32 rds_tcp_snd_una(struct rds_tcp_connection *tc) +{ + return tcp_sk(tc->t_sock->sk)->snd_una; +} + +void rds_tcp_restore_callbacks(struct socket *sock, + struct rds_tcp_connection *tc) +{ + rdsdebug("restoring sock %p callbacks from tc %p\n", sock, tc); + write_lock_bh(&sock->sk->sk_callback_lock); + + /* done under the callback_lock to serialize with write_space */ + spin_lock(&rds_tcp_tc_list_lock); + list_del_init(&tc->t_list_item); + rds_tcp_tc_count--; + spin_unlock(&rds_tcp_tc_list_lock); + + tc->t_sock = NULL; + + sock->sk->sk_write_space = tc->t_orig_write_space; + sock->sk->sk_data_ready = tc->t_orig_data_ready; + sock->sk->sk_state_change = tc->t_orig_state_change; + sock->sk->sk_user_data = NULL; + + write_unlock_bh(&sock->sk->sk_callback_lock); +} + +/* + * This is the only path that sets tc->t_sock. Send and receive trust that + * it is set. The RDS_CONN_CONNECTED bit protects those paths from being + * called while it isn't set. + */ +void rds_tcp_set_callbacks(struct socket *sock, struct rds_connection *conn) +{ + struct rds_tcp_connection *tc = conn->c_transport_data; + + rdsdebug("setting sock %p callbacks to tc %p\n", sock, tc); + write_lock_bh(&sock->sk->sk_callback_lock); + + /* done under the callback_lock to serialize with write_space */ + spin_lock(&rds_tcp_tc_list_lock); + list_add_tail(&tc->t_list_item, &rds_tcp_tc_list); + rds_tcp_tc_count++; + spin_unlock(&rds_tcp_tc_list_lock); + + /* accepted sockets need our listen data ready undone */ + if (sock->sk->sk_data_ready == rds_tcp_listen_data_ready) + sock->sk->sk_data_ready = sock->sk->sk_user_data; + + tc->t_sock = sock; + tc->conn = conn; + tc->t_orig_data_ready = sock->sk->sk_data_ready; + tc->t_orig_write_space = sock->sk->sk_write_space; + tc->t_orig_state_change = sock->sk->sk_state_change; + + sock->sk->sk_user_data = conn; + sock->sk->sk_data_ready = rds_tcp_data_ready; + sock->sk->sk_write_space = rds_tcp_write_space; + sock->sk->sk_state_change = rds_tcp_state_change; + + write_unlock_bh(&sock->sk->sk_callback_lock); +} + +static void rds_tcp_tc_info(struct socket *sock, unsigned int len, + struct rds_info_iterator *iter, + struct rds_info_lengths *lens) +{ + struct rds_info_tcp_socket tsinfo; + struct rds_tcp_connection *tc; + unsigned long flags; + struct sockaddr_in sin; + int sinlen; + + spin_lock_irqsave(&rds_tcp_tc_list_lock, flags); + + if (len / sizeof(tsinfo) < rds_tcp_tc_count) + goto out; + + list_for_each_entry(tc, &rds_tcp_tc_list, t_list_item) { + + sock->ops->getname(sock, (struct sockaddr *)&sin, &sinlen, 0); + tsinfo.local_addr = sin.sin_addr.s_addr; + tsinfo.local_port = sin.sin_port; + sock->ops->getname(sock, (struct sockaddr *)&sin, &sinlen, 1); + tsinfo.peer_addr = sin.sin_addr.s_addr; + tsinfo.peer_port = sin.sin_port; + + tsinfo.hdr_rem = tc->t_tinc_hdr_rem; + tsinfo.data_rem = tc->t_tinc_data_rem; + tsinfo.last_sent_nxt = tc->t_last_sent_nxt; + tsinfo.last_expected_una = tc->t_last_expected_una; + tsinfo.last_seen_una = tc->t_last_seen_una; + + rds_info_copy(iter, &tsinfo, sizeof(tsinfo)); + } + +out: + lens->nr = rds_tcp_tc_count; + lens->each = sizeof(tsinfo); + + spin_unlock_irqrestore(&rds_tcp_tc_list_lock, flags); +} + +static int rds_tcp_laddr_check(__be32 addr) +{ + if (inet_addr_type(&init_net, addr) == RTN_LOCAL) + return 0; + return -EADDRNOTAVAIL; +} + +static int rds_tcp_conn_alloc(struct rds_connection *conn, gfp_t gfp) +{ + struct rds_tcp_connection *tc; + + tc = kmem_cache_alloc(rds_tcp_conn_slab, gfp); + if (tc == NULL) + return -ENOMEM; + + tc->t_sock = NULL; + tc->t_tinc = NULL; + tc->t_tinc_hdr_rem = sizeof(struct rds_header); + tc->t_tinc_data_rem = 0; + + conn->c_transport_data = tc; + + spin_lock_irq(&rds_tcp_conn_lock); + list_add_tail(&tc->t_tcp_node, &rds_tcp_conn_list); + spin_unlock_irq(&rds_tcp_conn_lock); + + rdsdebug("alloced tc %p\n", conn->c_transport_data); + return 0; +} + +static void rds_tcp_conn_free(void *arg) +{ + struct rds_tcp_connection *tc = arg; + rdsdebug("freeing tc %p\n", tc); + kmem_cache_free(rds_tcp_conn_slab, tc); +} + +static void rds_tcp_destroy_conns(void) +{ + struct rds_tcp_connection *tc, *_tc; + LIST_HEAD(tmp_list); + + /* avoid calling conn_destroy with irqs off */ + spin_lock_irq(&rds_tcp_conn_lock); + list_splice(&rds_tcp_conn_list, &tmp_list); + INIT_LIST_HEAD(&rds_tcp_conn_list); + spin_unlock_irq(&rds_tcp_conn_lock); + + list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node) { + if (tc->conn->c_passive) + rds_conn_destroy(tc->conn->c_passive); + rds_conn_destroy(tc->conn); + } +} + +void rds_tcp_exit(void) +{ + rds_info_deregister_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info); + rds_tcp_listen_stop(); + rds_tcp_destroy_conns(); + rds_trans_unregister(&rds_tcp_transport); + rds_tcp_recv_exit(); + kmem_cache_destroy(rds_tcp_conn_slab); +} +module_exit(rds_tcp_exit); + +struct rds_transport rds_tcp_transport = { + .laddr_check = rds_tcp_laddr_check, + .xmit_prepare = rds_tcp_xmit_prepare, + .xmit_complete = rds_tcp_xmit_complete, + .xmit_cong_map = rds_tcp_xmit_cong_map, + .xmit = rds_tcp_xmit, + .recv = rds_tcp_recv, + .conn_alloc = rds_tcp_conn_alloc, + .conn_free = rds_tcp_conn_free, + .conn_connect = rds_tcp_conn_connect, + .conn_shutdown = rds_tcp_conn_shutdown, + .inc_copy_to_user = rds_tcp_inc_copy_to_user, + .inc_purge = rds_tcp_inc_purge, + .inc_free = rds_tcp_inc_free, + .stats_info_copy = rds_tcp_stats_info_copy, + .exit = rds_tcp_exit, + .t_owner = THIS_MODULE, + .t_name = "tcp", + .t_prefer_loopback = 1, +}; + +int __init rds_tcp_init(void) +{ + int ret; + + rds_tcp_conn_slab = kmem_cache_create("rds_tcp_connection", + sizeof(struct rds_tcp_connection), + 0, 0, NULL); + if (rds_tcp_conn_slab == NULL) { + ret = -ENOMEM; + goto out; + } + + ret = rds_tcp_recv_init(); + if (ret) + goto out_slab; + + ret = rds_trans_register(&rds_tcp_transport); + if (ret) + goto out_recv; + + ret = rds_tcp_listen_init(); + if (ret) + goto out_register; + + rds_info_register_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info); + + goto out; + +out_register: + rds_trans_unregister(&rds_tcp_transport); +out_recv: + rds_tcp_recv_exit(); +out_slab: + kmem_cache_destroy(rds_tcp_conn_slab); +out: + return ret; +} +module_init(rds_tcp_init); + +MODULE_AUTHOR("Oracle Corporation "); +MODULE_DESCRIPTION("RDS: TCP transport"); +MODULE_LICENSE("Dual BSD/GPL"); + diff --git a/net/rds/tcp.h b/net/rds/tcp.h new file mode 100644 index 000000000000..844fa6b9cf5a --- /dev/null +++ b/net/rds/tcp.h @@ -0,0 +1,93 @@ +#ifndef _RDS_TCP_H +#define _RDS_TCP_H + +#define RDS_TCP_PORT 16385 + +struct rds_tcp_incoming { + struct rds_incoming ti_inc; + struct sk_buff_head ti_skb_list; +}; + +struct rds_tcp_connection { + + struct list_head t_tcp_node; + struct rds_connection *conn; + struct socket *t_sock; + void *t_orig_write_space; + void *t_orig_data_ready; + void *t_orig_state_change; + + struct rds_tcp_incoming *t_tinc; + size_t t_tinc_hdr_rem; + size_t t_tinc_data_rem; + + /* XXX error report? */ + struct work_struct t_conn_w; + struct work_struct t_send_w; + struct work_struct t_down_w; + struct work_struct t_recv_w; + + /* for info exporting only */ + struct list_head t_list_item; + u32 t_last_sent_nxt; + u32 t_last_expected_una; + u32 t_last_seen_una; +}; + +struct rds_tcp_statistics { + uint64_t s_tcp_data_ready_calls; + uint64_t s_tcp_write_space_calls; + uint64_t s_tcp_sndbuf_full; + uint64_t s_tcp_connect_raced; + uint64_t s_tcp_listen_closed_stale; +}; + +/* tcp.c */ +int __init rds_tcp_init(void); +void rds_tcp_exit(void); +void rds_tcp_tune(struct socket *sock); +void rds_tcp_nonagle(struct socket *sock); +void rds_tcp_set_callbacks(struct socket *sock, struct rds_connection *conn); +void rds_tcp_restore_callbacks(struct socket *sock, + struct rds_tcp_connection *tc); +u32 rds_tcp_snd_nxt(struct rds_tcp_connection *tc); +u32 rds_tcp_snd_una(struct rds_tcp_connection *tc); +u64 rds_tcp_map_seq(struct rds_tcp_connection *tc, u32 seq); +extern struct rds_transport rds_tcp_transport; + +/* tcp_connect.c */ +int rds_tcp_conn_connect(struct rds_connection *conn); +void rds_tcp_conn_shutdown(struct rds_connection *conn); +void rds_tcp_state_change(struct sock *sk); + +/* tcp_listen.c */ +int __init rds_tcp_listen_init(void); +void rds_tcp_listen_stop(void); +void rds_tcp_listen_data_ready(struct sock *sk, int bytes); + +/* tcp_recv.c */ +int __init rds_tcp_recv_init(void); +void rds_tcp_recv_exit(void); +void rds_tcp_data_ready(struct sock *sk, int bytes); +int rds_tcp_recv(struct rds_connection *conn); +void rds_tcp_inc_purge(struct rds_incoming *inc); +void rds_tcp_inc_free(struct rds_incoming *inc); +int rds_tcp_inc_copy_to_user(struct rds_incoming *inc, struct iovec *iov, + size_t size); + +/* tcp_send.c */ +void rds_tcp_xmit_prepare(struct rds_connection *conn); +void rds_tcp_xmit_complete(struct rds_connection *conn); +int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm, + unsigned int hdr_off, unsigned int sg, unsigned int off); +void rds_tcp_write_space(struct sock *sk); +int rds_tcp_xmit_cong_map(struct rds_connection *conn, + struct rds_cong_map *map, unsigned long offset); + +/* tcp_stats.c */ +DECLARE_PER_CPU(struct rds_tcp_statistics, rds_tcp_stats); +#define rds_tcp_stats_inc(member) rds_stats_inc_which(rds_tcp_stats, member) +unsigned int rds_tcp_stats_info_copy(struct rds_info_iterator *iter, + unsigned int avail); + +#endif diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c new file mode 100644 index 000000000000..211522f9a9a2 --- /dev/null +++ b/net/rds/tcp_connect.c @@ -0,0 +1,153 @@ +/* + * Copyright (c) 2006 Oracle. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ +#include +#include +#include + +#include "rds.h" +#include "tcp.h" + +void rds_tcp_state_change(struct sock *sk) +{ + void (*state_change)(struct sock *sk); + struct rds_connection *conn; + struct rds_tcp_connection *tc; + + read_lock(&sk->sk_callback_lock); + conn = sk->sk_user_data; + if (conn == NULL) { + state_change = sk->sk_state_change; + goto out; + } + tc = conn->c_transport_data; + state_change = tc->t_orig_state_change; + + rdsdebug("sock %p state_change to %d\n", tc->t_sock, sk->sk_state); + + switch(sk->sk_state) { + /* ignore connecting sockets as they make progress */ + case TCP_SYN_SENT: + case TCP_SYN_RECV: + break; + case TCP_ESTABLISHED: + rds_connect_complete(conn); + break; + case TCP_CLOSE: + rds_conn_drop(conn); + default: + break; + } +out: + read_unlock(&sk->sk_callback_lock); + state_change(sk); +} + +int rds_tcp_conn_connect(struct rds_connection *conn) +{ + struct socket *sock = NULL; + struct sockaddr_in src, dest; + int ret; + + ret = sock_create(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock); + if (ret < 0) + goto out; + + rds_tcp_tune(sock); + + src.sin_family = AF_INET; + src.sin_addr.s_addr = (__force u32)conn->c_laddr; + src.sin_port = (__force u16)htons(0); + + ret = sock->ops->bind(sock, (struct sockaddr *)&src, sizeof(src)); + if (ret) { + rdsdebug("bind failed with %d at address %u.%u.%u.%u\n", + ret, NIPQUAD(conn->c_laddr)); + goto out; + } + + dest.sin_family = AF_INET; + dest.sin_addr.s_addr = (__force u32)conn->c_faddr; + dest.sin_port = (__force u16)htons(RDS_TCP_PORT); + + /* + * once we call connect() we can start getting callbacks and they + * own the socket + */ + rds_tcp_set_callbacks(sock, conn); + ret = sock->ops->connect(sock, (struct sockaddr *)&dest, sizeof(dest), + O_NONBLOCK); + sock = NULL; + + rdsdebug("connect to address %u.%u.%u.%u returned %d\n", + NIPQUAD(conn->c_faddr), ret); + if (ret == -EINPROGRESS) + ret = 0; + +out: + if (sock) + sock_release(sock); + return ret; +} + +/* + * Before killing the tcp socket this needs to serialize with callbacks. The + * caller has already grabbed the sending sem so we're serialized with other + * senders. + * + * TCP calls the callbacks with the sock lock so we hold it while we reset the + * callbacks to those set by TCP. Our callbacks won't execute again once we + * hold the sock lock. + */ +void rds_tcp_conn_shutdown(struct rds_connection *conn) +{ + struct rds_tcp_connection *tc = conn->c_transport_data; + struct socket *sock = tc->t_sock; + + rdsdebug("shutting down conn %p tc %p sock %p\n", conn, tc, sock); + + if (sock) { + sock->ops->shutdown(sock, RCV_SHUTDOWN | SEND_SHUTDOWN); + lock_sock(sock->sk); + rds_tcp_restore_callbacks(sock, tc); /* tc->tc_sock = NULL */ + + release_sock(sock->sk); + sock_release(sock); + }; + + if (tc->t_tinc) { + rds_inc_put(&tc->t_tinc->ti_inc); + tc->t_tinc = NULL; + } + tc->t_tinc_hdr_rem = sizeof(struct rds_header); + tc->t_tinc_data_rem = 0; +} diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c new file mode 100644 index 000000000000..24b743eb0b1b --- /dev/null +++ b/net/rds/tcp_listen.c @@ -0,0 +1,199 @@ +/* + * Copyright (c) 2006 Oracle. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ +#include +#include +#include + +#include "rds.h" +#include "tcp.h" + +/* + * cheesy, but simple.. + */ +static void rds_tcp_accept_worker(struct work_struct *work); +static DECLARE_WORK(rds_tcp_listen_work, rds_tcp_accept_worker); +static struct socket *rds_tcp_listen_sock; + +static int rds_tcp_accept_one(struct socket *sock) +{ + struct socket *new_sock = NULL; + struct rds_connection *conn; + int ret; + struct inet_sock *inet; + + ret = sock_create_lite(sock->sk->sk_family, sock->sk->sk_type, + sock->sk->sk_protocol, &new_sock); + if (ret) + goto out; + + new_sock->type = sock->type; + new_sock->ops = sock->ops; + ret = sock->ops->accept(sock, new_sock, O_NONBLOCK); + if (ret < 0) + goto out; + + rds_tcp_tune(new_sock); + + inet = inet_sk(new_sock->sk); + + rdsdebug("accepted tcp %u.%u.%u.%u:%u -> %u.%u.%u.%u:%u\n", + NIPQUAD(inet->saddr), ntohs(inet->sport), + NIPQUAD(inet->daddr), ntohs(inet->dport)); + + conn = rds_conn_create(inet->saddr, inet->daddr, &rds_tcp_transport, + GFP_KERNEL); + if (IS_ERR(conn)) { + ret = PTR_ERR(conn); + goto out; + } + + /* + * see the comment above rds_queue_delayed_reconnect() + */ + if (!rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_CONNECTING)) { + if (rds_conn_state(conn) == RDS_CONN_UP) + rds_tcp_stats_inc(s_tcp_listen_closed_stale); + else + rds_tcp_stats_inc(s_tcp_connect_raced); + rds_conn_drop(conn); + ret = 0; + goto out; + } + + rds_tcp_set_callbacks(new_sock, conn); + rds_connect_complete(conn); + new_sock = NULL; + ret = 0; + +out: + if (new_sock) + sock_release(new_sock); + return ret; +} + +static void rds_tcp_accept_worker(struct work_struct *work) +{ + while (rds_tcp_accept_one(rds_tcp_listen_sock) == 0) + cond_resched(); +} + +void rds_tcp_listen_data_ready(struct sock *sk, int bytes) +{ + void (*ready)(struct sock *sk, int bytes); + + rdsdebug("listen data ready sk %p\n", sk); + + read_lock(&sk->sk_callback_lock); + ready = sk->sk_user_data; + if (ready == NULL) { /* check for teardown race */ + ready = sk->sk_data_ready; + goto out; + } + + /* + * ->sk_data_ready is also called for a newly established child socket + * before it has been accepted and the accepter has set up their + * data_ready.. we only want to queue listen work for our listening + * socket + */ + if (sk->sk_state == TCP_LISTEN) + queue_work(rds_wq, &rds_tcp_listen_work); + +out: + read_unlock(&sk->sk_callback_lock); + ready(sk, bytes); +} + +int __init rds_tcp_listen_init(void) +{ + struct sockaddr_in sin; + struct socket *sock = NULL; + int ret; + + ret = sock_create(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock); + if (ret < 0) + goto out; + + sock->sk->sk_reuse = 1; + rds_tcp_nonagle(sock); + + write_lock_bh(&sock->sk->sk_callback_lock); + sock->sk->sk_user_data = sock->sk->sk_data_ready; + sock->sk->sk_data_ready = rds_tcp_listen_data_ready; + write_unlock_bh(&sock->sk->sk_callback_lock); + + sin.sin_family = PF_INET, + sin.sin_addr.s_addr = (__force u32)htonl(INADDR_ANY); + sin.sin_port = (__force u16)htons(RDS_TCP_PORT); + + ret = sock->ops->bind(sock, (struct sockaddr *)&sin, sizeof(sin)); + if (ret < 0) + goto out; + + ret = sock->ops->listen(sock, 64); + if (ret < 0) + goto out; + + rds_tcp_listen_sock = sock; + sock = NULL; +out: + if (sock) + sock_release(sock); + return ret; +} + +void rds_tcp_listen_stop(void) +{ + struct socket *sock = rds_tcp_listen_sock; + struct sock *sk; + + if (sock == NULL) + return; + + sk = sock->sk; + + /* serialize with and prevent further callbacks */ + lock_sock(sk); + write_lock_bh(&sk->sk_callback_lock); + if (sk->sk_user_data) { + sk->sk_data_ready = sk->sk_user_data; + sk->sk_user_data = NULL; + } + write_unlock_bh(&sk->sk_callback_lock); + release_sock(sk); + + /* wait for accepts to stop and close the socket */ + flush_workqueue(rds_wq); + sock_release(sock); + rds_tcp_listen_sock = NULL; +} diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c new file mode 100644 index 000000000000..c00dafffbb5a --- /dev/null +++ b/net/rds/tcp_recv.c @@ -0,0 +1,356 @@ +/* + * Copyright (c) 2006 Oracle. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ +#include +#include + +#include "rds.h" +#include "tcp.h" + +static struct kmem_cache *rds_tcp_incoming_slab; + +void rds_tcp_inc_purge(struct rds_incoming *inc) +{ + struct rds_tcp_incoming *tinc; + tinc = container_of(inc, struct rds_tcp_incoming, ti_inc); + rdsdebug("purging tinc %p inc %p\n", tinc, inc); + skb_queue_purge(&tinc->ti_skb_list); +} + +void rds_tcp_inc_free(struct rds_incoming *inc) +{ + struct rds_tcp_incoming *tinc; + tinc = container_of(inc, struct rds_tcp_incoming, ti_inc); + rds_tcp_inc_purge(inc); + rdsdebug("freeing tinc %p inc %p\n", tinc, inc); + kmem_cache_free(rds_tcp_incoming_slab, tinc); +} + +/* + * this is pretty lame, but, whatever. + */ +int rds_tcp_inc_copy_to_user(struct rds_incoming *inc, struct iovec *first_iov, + size_t size) +{ + struct rds_tcp_incoming *tinc; + struct iovec *iov, tmp; + struct sk_buff *skb; + unsigned long to_copy, skb_off; + int ret = 0; + + if (size == 0) + goto out; + + tinc = container_of(inc, struct rds_tcp_incoming, ti_inc); + iov = first_iov; + tmp = *iov; + + skb_queue_walk(&tinc->ti_skb_list, skb) { + skb_off = 0; + while (skb_off < skb->len) { + while (tmp.iov_len == 0) { + iov++; + tmp = *iov; + } + + to_copy = min(tmp.iov_len, size); + to_copy = min(to_copy, skb->len - skb_off); + + rdsdebug("ret %d size %zu skb %p skb_off %lu " + "skblen %d iov_base %p iov_len %zu cpy %lu\n", + ret, size, skb, skb_off, skb->len, + tmp.iov_base, tmp.iov_len, to_copy); + + /* modifies tmp as it copies */ + if (skb_copy_datagram_iovec(skb, skb_off, &tmp, + to_copy)) { + ret = -EFAULT; + goto out; + } + + size -= to_copy; + ret += to_copy; + skb_off += to_copy; + if (size == 0) + goto out; + } + } +out: + return ret; +} + +/* + * We have a series of skbs that have fragmented pieces of the congestion + * bitmap. They must add up to the exact size of the congestion bitmap. We + * use the skb helpers to copy those into the pages that make up the in-memory + * congestion bitmap for the remote address of this connection. We then tell + * the congestion core that the bitmap has been changed so that it can wake up + * sleepers. + * + * This is racing with sending paths which are using test_bit to see if the + * bitmap indicates that their recipient is congested. + */ + +static void rds_tcp_cong_recv(struct rds_connection *conn, + struct rds_tcp_incoming *tinc) +{ + struct sk_buff *skb; + unsigned int to_copy, skb_off; + unsigned int map_off; + unsigned int map_page; + struct rds_cong_map *map; + int ret; + + /* catch completely corrupt packets */ + if (be32_to_cpu(tinc->ti_inc.i_hdr.h_len) != RDS_CONG_MAP_BYTES) + return; + + map_page = 0; + map_off = 0; + map = conn->c_fcong; + + skb_queue_walk(&tinc->ti_skb_list, skb) { + skb_off = 0; + while (skb_off < skb->len) { + to_copy = min_t(unsigned int, PAGE_SIZE - map_off, + skb->len - skb_off); + + BUG_ON(map_page >= RDS_CONG_MAP_PAGES); + + /* only returns 0 or -error */ + ret = skb_copy_bits(skb, skb_off, + (void *)map->m_page_addrs[map_page] + map_off, + to_copy); + BUG_ON(ret != 0); + + skb_off += to_copy; + map_off += to_copy; + if (map_off == PAGE_SIZE) { + map_off = 0; + map_page++; + } + } + } + + rds_cong_map_updated(map, ~(u64) 0); +} + +struct rds_tcp_desc_arg { + struct rds_connection *conn; + gfp_t gfp; + enum km_type km; +}; + +static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb, + unsigned int offset, size_t len) +{ + struct rds_tcp_desc_arg *arg = desc->arg.data; + struct rds_connection *conn = arg->conn; + struct rds_tcp_connection *tc = conn->c_transport_data; + struct rds_tcp_incoming *tinc = tc->t_tinc; + struct sk_buff *clone; + size_t left = len, to_copy; + + rdsdebug("tcp data tc %p skb %p offset %u len %zu\n", tc, skb, offset, + len); + + /* + * tcp_read_sock() interprets partial progress as an indication to stop + * processing. + */ + while (left) { + if (tinc == NULL) { + tinc = kmem_cache_alloc(rds_tcp_incoming_slab, + arg->gfp); + if (tinc == NULL) { + desc->error = -ENOMEM; + goto out; + } + tc->t_tinc = tinc; + rdsdebug("alloced tinc %p\n", tinc); + rds_inc_init(&tinc->ti_inc, conn, conn->c_faddr); + /* + * XXX * we might be able to use the __ variants when + * we've already serialized at a higher level. + */ + skb_queue_head_init(&tinc->ti_skb_list); + } + + if (left && tc->t_tinc_hdr_rem) { + to_copy = min(tc->t_tinc_hdr_rem, left); + rdsdebug("copying %zu header from skb %p\n", to_copy, + skb); + skb_copy_bits(skb, offset, + (char *)&tinc->ti_inc.i_hdr + + sizeof(struct rds_header) - + tc->t_tinc_hdr_rem, + to_copy); + tc->t_tinc_hdr_rem -= to_copy; + left -= to_copy; + offset += to_copy; + + if (tc->t_tinc_hdr_rem == 0) { + /* could be 0 for a 0 len message */ + tc->t_tinc_data_rem = + be32_to_cpu(tinc->ti_inc.i_hdr.h_len); + } + } + + if (left && tc->t_tinc_data_rem) { + clone = skb_clone(skb, arg->gfp); + if (clone == NULL) { + desc->error = -ENOMEM; + goto out; + } + + to_copy = min(tc->t_tinc_data_rem, left); + pskb_pull(clone, offset); + pskb_trim(clone, to_copy); + skb_queue_tail(&tinc->ti_skb_list, clone); + + rdsdebug("skb %p data %p len %d off %u to_copy %zu -> " + "clone %p data %p len %d\n", + skb, skb->data, skb->len, offset, to_copy, + clone, clone->data, clone->len); + + tc->t_tinc_data_rem -= to_copy; + left -= to_copy; + offset += to_copy; + } + + if (tc->t_tinc_hdr_rem == 0 && tc->t_tinc_data_rem == 0) { + if (tinc->ti_inc.i_hdr.h_flags == RDS_FLAG_CONG_BITMAP) + rds_tcp_cong_recv(conn, tinc); + else + rds_recv_incoming(conn, conn->c_faddr, + conn->c_laddr, &tinc->ti_inc, + arg->gfp, arg->km); + + tc->t_tinc_hdr_rem = sizeof(struct rds_header); + tc->t_tinc_data_rem = 0; + tc->t_tinc = NULL; + rds_inc_put(&tinc->ti_inc); + tinc = NULL; + } + } +out: + rdsdebug("returning len %zu left %zu skb len %d rx queue depth %d\n", + len, left, skb->len, + skb_queue_len(&tc->t_sock->sk->sk_receive_queue)); + return len - left; +} + +/* the caller has to hold the sock lock */ +int rds_tcp_read_sock(struct rds_connection *conn, gfp_t gfp, enum km_type km) +{ + struct rds_tcp_connection *tc = conn->c_transport_data; + struct socket *sock = tc->t_sock; + read_descriptor_t desc; + struct rds_tcp_desc_arg arg; + + /* It's like glib in the kernel! */ + arg.conn = conn; + arg.gfp = gfp; + arg.km = km; + desc.arg.data = &arg; + desc.error = 0; + desc.count = 1; /* give more than one skb per call */ + + tcp_read_sock(sock->sk, &desc, rds_tcp_data_recv); + rdsdebug("tcp_read_sock for tc %p gfp 0x%x returned %d\n", tc, gfp, + desc.error); + + return desc.error; +} + +/* + * We hold the sock lock to serialize our rds_tcp_recv->tcp_read_sock from + * data_ready. + * + * if we fail to allocate we're in trouble.. blindly wait some time before + * trying again to see if the VM can free up something for us. + */ +int rds_tcp_recv(struct rds_connection *conn) +{ + struct rds_tcp_connection *tc = conn->c_transport_data; + struct socket *sock = tc->t_sock; + int ret = 0; + + rdsdebug("recv worker conn %p tc %p sock %p\n", conn, tc, sock); + + lock_sock(sock->sk); + ret = rds_tcp_read_sock(conn, GFP_KERNEL, KM_USER0); + release_sock(sock->sk); + + return ret; +} + +void rds_tcp_data_ready(struct sock *sk, int bytes) +{ + void (*ready)(struct sock *sk, int bytes); + struct rds_connection *conn; + struct rds_tcp_connection *tc; + + rdsdebug("data ready sk %p bytes %d\n", sk, bytes); + + read_lock(&sk->sk_callback_lock); + conn = sk->sk_user_data; + if (conn == NULL) { /* check for teardown race */ + ready = sk->sk_data_ready; + goto out; + } + + tc = conn->c_transport_data; + ready = tc->t_orig_data_ready; + rds_tcp_stats_inc(s_tcp_data_ready_calls); + + if (rds_tcp_read_sock(conn, GFP_ATOMIC, KM_SOFTIRQ0) == -ENOMEM) + queue_delayed_work(rds_wq, &conn->c_recv_w, 0); +out: + read_unlock(&sk->sk_callback_lock); + ready(sk, bytes); +} + +int __init rds_tcp_recv_init(void) +{ + rds_tcp_incoming_slab = kmem_cache_create("rds_tcp_incoming", + sizeof(struct rds_tcp_incoming), + 0, 0, NULL); + if (rds_tcp_incoming_slab == NULL) + return -ENOMEM; + return 0; +} + +void rds_tcp_recv_exit(void) +{ + kmem_cache_destroy(rds_tcp_incoming_slab); +} diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c new file mode 100644 index 000000000000..ab545e0cd5d6 --- /dev/null +++ b/net/rds/tcp_send.c @@ -0,0 +1,263 @@ +/* + * Copyright (c) 2006 Oracle. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ +#include +#include +#include + +#include "rds.h" +#include "tcp.h" + +static void rds_tcp_cork(struct socket *sock, int val) +{ + mm_segment_t oldfs; + + oldfs = get_fs(); + set_fs(KERNEL_DS); + sock->ops->setsockopt(sock, SOL_TCP, TCP_CORK, (char __user *)&val, + sizeof(val)); + set_fs(oldfs); +} + +void rds_tcp_xmit_prepare(struct rds_connection *conn) +{ + struct rds_tcp_connection *tc = conn->c_transport_data; + + rds_tcp_cork(tc->t_sock, 1); +} + +void rds_tcp_xmit_complete(struct rds_connection *conn) +{ + struct rds_tcp_connection *tc = conn->c_transport_data; + + rds_tcp_cork(tc->t_sock, 0); +} + +/* the core send_sem serializes this with other xmit and shutdown */ +int rds_tcp_sendmsg(struct socket *sock, void *data, unsigned int len) +{ + struct kvec vec = { + .iov_base = data, + .iov_len = len, + }; + struct msghdr msg = { + .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL, + }; + + return kernel_sendmsg(sock, &msg, &vec, 1, vec.iov_len); +} + +/* the core send_sem serializes this with other xmit and shutdown */ +int rds_tcp_xmit_cong_map(struct rds_connection *conn, + struct rds_cong_map *map, unsigned long offset) +{ + static struct rds_header rds_tcp_map_header = { + .h_flags = RDS_FLAG_CONG_BITMAP, + }; + struct rds_tcp_connection *tc = conn->c_transport_data; + unsigned long i; + int ret; + int copied = 0; + + /* Some problem claims cpu_to_be32(constant) isn't a constant. */ + rds_tcp_map_header.h_len = cpu_to_be32(RDS_CONG_MAP_BYTES); + + if (offset < sizeof(struct rds_header)) { + ret = rds_tcp_sendmsg(tc->t_sock, + (void *)&rds_tcp_map_header + offset, + sizeof(struct rds_header) - offset); + if (ret <= 0) + return ret; + offset += ret; + copied = ret; + if (offset < sizeof(struct rds_header)) + return ret; + } + + offset -= sizeof(struct rds_header); + i = offset / PAGE_SIZE; + offset = offset % PAGE_SIZE; + BUG_ON(i >= RDS_CONG_MAP_PAGES); + + do { + ret = tc->t_sock->ops->sendpage(tc->t_sock, + virt_to_page(map->m_page_addrs[i]), + offset, PAGE_SIZE - offset, + MSG_DONTWAIT); + if (ret <= 0) + break; + copied += ret; + offset += ret; + if (offset == PAGE_SIZE) { + offset = 0; + i++; + } + } while (i < RDS_CONG_MAP_PAGES); + + return copied ? copied : ret; +} + +/* the core send_sem serializes this with other xmit and shutdown */ +int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm, + unsigned int hdr_off, unsigned int sg, unsigned int off) +{ + struct rds_tcp_connection *tc = conn->c_transport_data; + int done = 0; + int ret = 0; + + if (hdr_off == 0) { + /* + * m_ack_seq is set to the sequence number of the last byte of + * header and data. see rds_tcp_is_acked(). + */ + tc->t_last_sent_nxt = rds_tcp_snd_nxt(tc); + rm->m_ack_seq = tc->t_last_sent_nxt + + sizeof(struct rds_header) + + be32_to_cpu(rm->m_inc.i_hdr.h_len) - 1; + smp_mb__before_clear_bit(); + set_bit(RDS_MSG_HAS_ACK_SEQ, &rm->m_flags); + tc->t_last_expected_una = rm->m_ack_seq + 1; + + rdsdebug("rm %p tcp nxt %u ack_seq %llu\n", + rm, rds_tcp_snd_nxt(tc), + (unsigned long long)rm->m_ack_seq); + } + + if (hdr_off < sizeof(struct rds_header)) { + /* see rds_tcp_write_space() */ + set_bit(SOCK_NOSPACE, &tc->t_sock->sk->sk_socket->flags); + + ret = rds_tcp_sendmsg(tc->t_sock, + (void *)&rm->m_inc.i_hdr + hdr_off, + sizeof(rm->m_inc.i_hdr) - hdr_off); + if (ret < 0) + goto out; + done += ret; + if (hdr_off + done != sizeof(struct rds_header)) + goto out; + } + + while (sg < rm->m_nents) { + ret = tc->t_sock->ops->sendpage(tc->t_sock, + sg_page(&rm->m_sg[sg]), + rm->m_sg[sg].offset + off, + rm->m_sg[sg].length - off, + MSG_DONTWAIT|MSG_NOSIGNAL); + rdsdebug("tcp sendpage %p:%u:%u ret %d\n", (void *)sg_page(&rm->m_sg[sg]), + rm->m_sg[sg].offset + off, rm->m_sg[sg].length - off, + ret); + if (ret <= 0) + break; + + off += ret; + done += ret; + if (off == rm->m_sg[sg].length) { + off = 0; + sg++; + } + } + +out: + if (ret <= 0) { + /* write_space will hit after EAGAIN, all else fatal */ + if (ret == -EAGAIN) { + rds_tcp_stats_inc(s_tcp_sndbuf_full); + ret = 0; + } else { + printk(KERN_WARNING "RDS/tcp: send to %u.%u.%u.%u " + "returned %d, disconnecting and reconnecting\n", + NIPQUAD(conn->c_faddr), ret); + rds_conn_drop(conn); + } + } + if (done == 0) + done = ret; + return done; +} + +/* + * rm->m_ack_seq is set to the tcp sequence number that corresponds to the + * last byte of the message, including the header. This means that the + * entire message has been received if rm->m_ack_seq is "before" the next + * unacked byte of the TCP sequence space. We have to do very careful + * wrapping 32bit comparisons here. + */ +static int rds_tcp_is_acked(struct rds_message *rm, uint64_t ack) +{ + if (!test_bit(RDS_MSG_HAS_ACK_SEQ, &rm->m_flags)) + return 0; + return (__s32)((u32)rm->m_ack_seq - (u32)ack) < 0; +} + +void rds_tcp_write_space(struct sock *sk) +{ + void (*write_space)(struct sock *sk); + struct rds_connection *conn; + struct rds_tcp_connection *tc; + + read_lock(&sk->sk_callback_lock); + conn = sk->sk_user_data; + if (conn == NULL) { + write_space = sk->sk_write_space; + goto out; + } + + tc = conn->c_transport_data; + rdsdebug("write_space for tc %p\n", tc); + write_space = tc->t_orig_write_space; + rds_tcp_stats_inc(s_tcp_write_space_calls); + + rdsdebug("tcp una %u\n", rds_tcp_snd_una(tc)); + tc->t_last_seen_una = rds_tcp_snd_una(tc); + rds_send_drop_acked(conn, rds_tcp_snd_una(tc), rds_tcp_is_acked); + + queue_delayed_work(rds_wq, &conn->c_send_w, 0); +out: + read_unlock(&sk->sk_callback_lock); + + /* + * write_space is only called when data leaves tcp's send queue if + * SOCK_NOSPACE is set. We set SOCK_NOSPACE every time we put + * data in tcp's send queue because we use write_space to parse the + * sequence numbers and notice that rds messages have been fully + * received. + * + * tcp's write_space clears SOCK_NOSPACE if the send queue has more + * than a certain amount of space. So we need to set it again *after* + * we call tcp's write_space or else we might only get called on the + * first of a series of incoming tcp acks. + */ + write_space(sk); + + if (sk->sk_socket) + set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); +} diff --git a/net/rds/tcp_stats.c b/net/rds/tcp_stats.c new file mode 100644 index 000000000000..d5898d03cd68 --- /dev/null +++ b/net/rds/tcp_stats.c @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2006 Oracle. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ +#include +#include +#include + +#include "rds.h" +#include "tcp.h" + +DEFINE_PER_CPU(struct rds_tcp_statistics, rds_tcp_stats) + ____cacheline_aligned; + +static const char const *rds_tcp_stat_names[] = { + "tcp_data_ready_calls", + "tcp_write_space_calls", + "tcp_sndbuf_full", + "tcp_connect_raced", + "tcp_listen_closed_stale", +}; + +unsigned int rds_tcp_stats_info_copy(struct rds_info_iterator *iter, + unsigned int avail) +{ + struct rds_tcp_statistics stats = {0, }; + uint64_t *src; + uint64_t *sum; + size_t i; + int cpu; + + if (avail < ARRAY_SIZE(rds_tcp_stat_names)) + goto out; + + for_each_online_cpu(cpu) { + src = (uint64_t *)&(per_cpu(rds_tcp_stats, cpu)); + sum = (uint64_t *)&stats; + for (i = 0; i < sizeof(stats) / sizeof(uint64_t); i++) + *(sum++) += *(src++); + } + + rds_stats_info_copy(iter, (uint64_t *)&stats, rds_tcp_stat_names, + ARRAY_SIZE(rds_tcp_stat_names)); +out: + return ARRAY_SIZE(rds_tcp_stat_names); +} -- cgit v1.2.3 From 05ecd5a1f76c183cca381705b3adb7d77c9a0439 Mon Sep 17 00:00:00 2001 From: Pawel Moll Date: Mon, 24 Aug 2009 19:52:38 +0900 Subject: sh: Simplify "multi-evt" interrupt handling. This patch changes the way in which "multi-evt" interrups are handled. The intc_evt2irq_table and related intc_evt2irq() have been removed and the "redirecting" handler is installed for the coupled interrupts. Thanks to that the do_IRQ() function don't have to use another level of indirection for all the interrupts... Signed-off-by: Pawel Moll Signed-off-by: Stuart Menefy Signed-off-by: Paul Mundt --- arch/sh/kernel/irq.c | 2 +- drivers/sh/intc.c | 54 ++++++++++++++++--------------------------------- include/linux/sh_intc.h | 1 - 3 files changed, 18 insertions(+), 39 deletions(-) (limited to 'include/linux') diff --git a/arch/sh/kernel/irq.c b/arch/sh/kernel/irq.c index 278c68c60488..d1053392e287 100644 --- a/arch/sh/kernel/irq.c +++ b/arch/sh/kernel/irq.c @@ -114,7 +114,7 @@ asmlinkage int do_IRQ(unsigned int irq, struct pt_regs *regs) #endif irq_enter(); - irq = irq_demux(intc_evt2irq(irq)); + irq = irq_demux(evt2irq(irq)); #ifdef CONFIG_IRQSTACKS curctx = (union irq_ctx *)current_thread_info(); diff --git a/drivers/sh/intc.c b/drivers/sh/intc.c index 4b1ca9d28353..a9174ec72853 100644 --- a/drivers/sh/intc.c +++ b/drivers/sh/intc.c @@ -663,16 +663,9 @@ static unsigned int __init save_reg(struct intc_desc_int *d, return 0; } -static unsigned char *intc_evt2irq_table; - -unsigned int intc_evt2irq(unsigned int vector) +static void intc_redirect_irq(unsigned int irq, struct irq_desc *desc) { - unsigned int irq = evt2irq(vector); - - if (intc_evt2irq_table && intc_evt2irq_table[irq]) - irq = intc_evt2irq_table[irq]; - - return irq; + generic_handle_irq((unsigned int)get_irq_data(irq)); } void __init register_intc_controller(struct intc_desc *desc) @@ -745,34 +738,6 @@ void __init register_intc_controller(struct intc_desc *desc) BUG_ON(k > 256); /* _INTC_ADDR_E() and _INTC_ADDR_D() are 8 bits */ - /* keep the first vector only if same enum is used multiple times */ - for (i = 0; i < desc->nr_vectors; i++) { - struct intc_vect *vect = desc->vectors + i; - int first_irq = evt2irq(vect->vect); - - if (!vect->enum_id) - continue; - - for (k = i + 1; k < desc->nr_vectors; k++) { - struct intc_vect *vect2 = desc->vectors + k; - - if (vect->enum_id != vect2->enum_id) - continue; - - vect2->enum_id = 0; - - if (!intc_evt2irq_table) - intc_evt2irq_table = kzalloc(NR_IRQS, GFP_NOWAIT); - - if (!intc_evt2irq_table) { - pr_warning("intc: cannot allocate evt2irq!\n"); - continue; - } - - intc_evt2irq_table[evt2irq(vect2->vect)] = first_irq; - } - } - /* register the vectors one by one */ for (i = 0; i < desc->nr_vectors; i++) { struct intc_vect *vect = desc->vectors + i; @@ -789,6 +754,21 @@ void __init register_intc_controller(struct intc_desc *desc) } intc_register_irq(desc, d, vect->enum_id, irq); + + for (k = i + 1; k < desc->nr_vectors; k++) { + struct intc_vect *vect2 = desc->vectors + k; + unsigned int irq2 = evt2irq(vect2->vect); + + if (vect->enum_id != vect2->enum_id) + continue; + + vect2->enum_id = 0; + + /* redirect this interrupts to the first one */ + set_irq_chip_and_handler_name(irq2, &d->chip, + intc_redirect_irq, "redirect"); + set_irq_data(irq2, (void *)irq); + } } } diff --git a/include/linux/sh_intc.h b/include/linux/sh_intc.h index eb1423a0078d..68e212ff9dde 100644 --- a/include/linux/sh_intc.h +++ b/include/linux/sh_intc.h @@ -85,7 +85,6 @@ struct intc_desc symbol __initdata = { \ } #endif -unsigned int intc_evt2irq(unsigned int vector); void __init register_intc_controller(struct intc_desc *desc); int intc_set_priority(unsigned int irq, unsigned int prio); -- cgit v1.2.3 From 35aad0ffdf548617940ca1e78be1f2e0bafc4496 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Mon, 24 Aug 2009 14:56:30 +0200 Subject: netfilter: xtables: mark initial tables constant The inputted table is never modified, so should be considered const. Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/linux/netfilter/x_tables.h | 2 +- include/linux/netfilter_arp/arp_tables.h | 2 +- include/linux/netfilter_bridge/ebtables.h | 2 +- include/linux/netfilter_ipv4/ip_tables.h | 2 +- include/linux/netfilter_ipv6/ip6_tables.h | 2 +- net/bridge/netfilter/ebtable_broute.c | 2 +- net/bridge/netfilter/ebtable_filter.c | 2 +- net/bridge/netfilter/ebtables.c | 13 +++++++------ net/ipv4/netfilter/arp_tables.c | 3 ++- net/ipv4/netfilter/arptable_filter.c | 4 ++-- net/ipv4/netfilter/ip_tables.c | 3 ++- net/ipv4/netfilter/iptable_filter.c | 2 +- net/ipv4/netfilter/iptable_mangle.c | 4 ++-- net/ipv4/netfilter/iptable_raw.c | 4 ++-- net/ipv4/netfilter/iptable_security.c | 4 ++-- net/ipv4/netfilter/nf_nat_rule.c | 4 ++-- net/ipv6/netfilter/ip6_tables.c | 3 ++- net/ipv6/netfilter/ip6table_filter.c | 2 +- net/ipv6/netfilter/ip6table_mangle.c | 4 ++-- net/ipv6/netfilter/ip6table_raw.c | 4 ++-- net/ipv6/netfilter/ip6table_security.c | 4 ++-- net/netfilter/x_tables.c | 7 ++++--- 22 files changed, 42 insertions(+), 37 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 4fa6e4c263e0..812cb153cabb 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -407,7 +407,7 @@ extern int xt_check_target(struct xt_tgchk_param *, unsigned int size, u_int8_t proto, bool inv_proto); extern struct xt_table *xt_register_table(struct net *net, - struct xt_table *table, + const struct xt_table *table, struct xt_table_info *bootstrap, struct xt_table_info *newinfo); extern void *xt_unregister_table(struct xt_table *table); diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h index 590ac3d6d5d6..6fe3e6aa10db 100644 --- a/include/linux/netfilter_arp/arp_tables.h +++ b/include/linux/netfilter_arp/arp_tables.h @@ -265,7 +265,7 @@ struct arpt_error } extern struct xt_table *arpt_register_table(struct net *net, - struct xt_table *table, + const struct xt_table *table, const struct arpt_replace *repl); extern void arpt_unregister_table(struct xt_table *table); extern unsigned int arpt_do_table(struct sk_buff *skb, diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index e40ddb94b1af..ea281e6a2048 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -301,7 +301,7 @@ struct ebt_table #define EBT_ALIGN(s) (((s) + (__alignof__(struct ebt_replace)-1)) & \ ~(__alignof__(struct ebt_replace)-1)) extern struct ebt_table *ebt_register_table(struct net *net, - struct ebt_table *table); + const struct ebt_table *table); extern void ebt_unregister_table(struct ebt_table *table); extern unsigned int ebt_do_table(unsigned int hook, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h index 092bd50581a9..61fafc868a7b 100644 --- a/include/linux/netfilter_ipv4/ip_tables.h +++ b/include/linux/netfilter_ipv4/ip_tables.h @@ -245,7 +245,7 @@ ipt_get_target(struct ipt_entry *e) extern void ipt_init(void) __init; extern struct xt_table *ipt_register_table(struct net *net, - struct xt_table *table, + const struct xt_table *table, const struct ipt_replace *repl); extern void ipt_unregister_table(struct xt_table *table); diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h index 1089e33cf633..a64e1451ac38 100644 --- a/include/linux/netfilter_ipv6/ip6_tables.h +++ b/include/linux/netfilter_ipv6/ip6_tables.h @@ -306,7 +306,7 @@ ip6t_get_target(struct ip6t_entry *e) extern void ip6t_init(void) __init; extern struct xt_table *ip6t_register_table(struct net *net, - struct xt_table *table, + const struct xt_table *table, const struct ip6t_replace *repl); extern void ip6t_unregister_table(struct xt_table *table); extern unsigned int ip6t_do_table(struct sk_buff *skb, diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c index c751111440f8..d32ab13e728c 100644 --- a/net/bridge/netfilter/ebtable_broute.c +++ b/net/bridge/netfilter/ebtable_broute.c @@ -41,7 +41,7 @@ static int check(const struct ebt_table_info *info, unsigned int valid_hooks) return 0; } -static struct ebt_table broute_table = +static const struct ebt_table broute_table = { .name = "broute", .table = &initial_table, diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c index 4b988db3cd4d..60b1a6ca7185 100644 --- a/net/bridge/netfilter/ebtable_filter.c +++ b/net/bridge/netfilter/ebtable_filter.c @@ -50,7 +50,7 @@ static int check(const struct ebt_table_info *info, unsigned int valid_hooks) return 0; } -static struct ebt_table frame_filter = +static const struct ebt_table frame_filter = { .name = "filter", .table = &initial_table, diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 37928d5f2840..bd1c65425d4f 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1103,23 +1103,24 @@ free_newinfo: return ret; } -struct ebt_table *ebt_register_table(struct net *net, struct ebt_table *table) +struct ebt_table * +ebt_register_table(struct net *net, const struct ebt_table *input_table) { struct ebt_table_info *newinfo; - struct ebt_table *t; + struct ebt_table *t, *table; struct ebt_replace_kernel *repl; int ret, i, countersize; void *p; - if (!table || !(repl = table->table) || !repl->entries || - repl->entries_size == 0 || - repl->counters || table->private) { + if (input_table == NULL || (repl = input_table->table) == NULL || + repl->entries == 0 || repl->entries_size == 0 || + repl->counters != NULL || input_table->private != NULL) { BUGPRINT("Bad table data for ebt_register_table!!!\n"); return ERR_PTR(-EINVAL); } /* Don't add one table to multiple lists. */ - table = kmemdup(table, sizeof(struct ebt_table), GFP_KERNEL); + table = kmemdup(input_table, sizeof(struct ebt_table), GFP_KERNEL); if (!table) { ret = -ENOMEM; goto out; diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 7bc11ffbb845..27774c99d888 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -1778,7 +1778,8 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len return ret; } -struct xt_table *arpt_register_table(struct net *net, struct xt_table *table, +struct xt_table *arpt_register_table(struct net *net, + const struct xt_table *table, const struct arpt_replace *repl) { int ret; diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c index 6ecfdae7c589..97337601827a 100644 --- a/net/ipv4/netfilter/arptable_filter.c +++ b/net/ipv4/netfilter/arptable_filter.c @@ -15,7 +15,7 @@ MODULE_DESCRIPTION("arptables filter table"); #define FILTER_VALID_HOOKS ((1 << NF_ARP_IN) | (1 << NF_ARP_OUT) | \ (1 << NF_ARP_FORWARD)) -static struct +static const struct { struct arpt_replace repl; struct arpt_standard entries[3]; @@ -45,7 +45,7 @@ static struct .term = ARPT_ERROR_INIT, }; -static struct xt_table packet_filter = { +static const struct xt_table packet_filter = { .name = "filter", .valid_hooks = FILTER_VALID_HOOKS, .me = THIS_MODULE, diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 0b43fd7ca04a..cde755d5eeab 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -2065,7 +2065,8 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) return ret; } -struct xt_table *ipt_register_table(struct net *net, struct xt_table *table, +struct xt_table *ipt_register_table(struct net *net, + const struct xt_table *table, const struct ipt_replace *repl) { int ret; diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index 97dbd94a8e37..df566cbd68e5 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c @@ -53,7 +53,7 @@ static struct .term = IPT_ERROR_INIT, /* ERROR */ }; -static struct xt_table packet_filter = { +static const struct xt_table packet_filter = { .name = "filter", .valid_hooks = FILTER_VALID_HOOKS, .me = THIS_MODULE, diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index 28647f10aa7e..036047f9b0f2 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -28,7 +28,7 @@ MODULE_DESCRIPTION("iptables mangle table"); (1 << NF_INET_POST_ROUTING)) /* Ouch - five different hooks? Maybe this should be a config option..... -- BC */ -static struct +static const struct { struct ipt_replace repl; struct ipt_standard entries[5]; @@ -64,7 +64,7 @@ static struct .term = IPT_ERROR_INIT, /* ERROR */ }; -static struct xt_table packet_mangler = { +static const struct xt_table packet_mangler = { .name = "mangle", .valid_hooks = MANGLE_VALID_HOOKS, .me = THIS_MODULE, diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index 494784c999eb..993edc23be09 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c @@ -9,7 +9,7 @@ #define RAW_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT)) -static struct +static const struct { struct ipt_replace repl; struct ipt_standard entries[2]; @@ -36,7 +36,7 @@ static struct .term = IPT_ERROR_INIT, /* ERROR */ }; -static struct xt_table packet_raw = { +static const struct xt_table packet_raw = { .name = "raw", .valid_hooks = RAW_VALID_HOOKS, .me = THIS_MODULE, diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c index 8804e1a0f915..99eb76c65d25 100644 --- a/net/ipv4/netfilter/iptable_security.c +++ b/net/ipv4/netfilter/iptable_security.c @@ -27,7 +27,7 @@ MODULE_DESCRIPTION("iptables security table, for MAC rules"); (1 << NF_INET_FORWARD) | \ (1 << NF_INET_LOCAL_OUT) -static struct +static const struct { struct ipt_replace repl; struct ipt_standard entries[3]; @@ -57,7 +57,7 @@ static struct .term = IPT_ERROR_INIT, /* ERROR */ }; -static struct xt_table security_table = { +static const struct xt_table security_table = { .name = "security", .valid_hooks = SECURITY_VALID_HOOKS, .me = THIS_MODULE, diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c index 6448a9b7d6f0..9e81e0dfb4ec 100644 --- a/net/ipv4/netfilter/nf_nat_rule.c +++ b/net/ipv4/netfilter/nf_nat_rule.c @@ -28,7 +28,7 @@ (1 << NF_INET_POST_ROUTING) | \ (1 << NF_INET_LOCAL_OUT)) -static struct +static const struct { struct ipt_replace repl; struct ipt_standard entries[3]; @@ -58,7 +58,7 @@ static struct .term = IPT_ERROR_INIT, /* ERROR */ }; -static struct xt_table nat_table = { +static const struct xt_table nat_table = { .name = "nat", .valid_hooks = NAT_VALID_HOOKS, .me = THIS_MODULE, diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index a5d0c27cc26f..cc9f8ef303fd 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -2100,7 +2100,8 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) return ret; } -struct xt_table *ip6t_register_table(struct net *net, struct xt_table *table, +struct xt_table *ip6t_register_table(struct net *net, + const struct xt_table *table, const struct ip6t_replace *repl) { int ret; diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c index 0a3ae48ac4d5..6f4383ad86f9 100644 --- a/net/ipv6/netfilter/ip6table_filter.c +++ b/net/ipv6/netfilter/ip6table_filter.c @@ -51,7 +51,7 @@ static struct .term = IP6T_ERROR_INIT, /* ERROR */ }; -static struct xt_table packet_filter = { +static const struct xt_table packet_filter = { .name = "filter", .valid_hooks = FILTER_VALID_HOOKS, .me = THIS_MODULE, diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index 0f49e005a8c5..0ad91433ed61 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -21,7 +21,7 @@ MODULE_DESCRIPTION("ip6tables mangle table"); (1 << NF_INET_LOCAL_OUT) | \ (1 << NF_INET_POST_ROUTING)) -static struct +static const struct { struct ip6t_replace repl; struct ip6t_standard entries[5]; @@ -57,7 +57,7 @@ static struct .term = IP6T_ERROR_INIT, /* ERROR */ }; -static struct xt_table packet_mangler = { +static const struct xt_table packet_mangler = { .name = "mangle", .valid_hooks = MANGLE_VALID_HOOKS, .me = THIS_MODULE, diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c index 679865e3d5ff..ed1a1180f3b3 100644 --- a/net/ipv6/netfilter/ip6table_raw.c +++ b/net/ipv6/netfilter/ip6table_raw.c @@ -8,7 +8,7 @@ #define RAW_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT)) -static struct +static const struct { struct ip6t_replace repl; struct ip6t_standard entries[2]; @@ -35,7 +35,7 @@ static struct .term = IP6T_ERROR_INIT, /* ERROR */ }; -static struct xt_table packet_raw = { +static const struct xt_table packet_raw = { .name = "raw", .valid_hooks = RAW_VALID_HOOKS, .me = THIS_MODULE, diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c index 822afabbdc88..41b444c60934 100644 --- a/net/ipv6/netfilter/ip6table_security.c +++ b/net/ipv6/netfilter/ip6table_security.c @@ -26,7 +26,7 @@ MODULE_DESCRIPTION("ip6tables security table, for MAC rules"); (1 << NF_INET_FORWARD) | \ (1 << NF_INET_LOCAL_OUT) -static struct +static const struct { struct ip6t_replace repl; struct ip6t_standard entries[3]; @@ -56,7 +56,7 @@ static struct .term = IP6T_ERROR_INIT, /* ERROR */ }; -static struct xt_table security_table = { +static const struct xt_table security_table = { .name = "security", .valid_hooks = SECURITY_VALID_HOOKS, .me = THIS_MODULE, diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 025d1a0af78b..a6ac83a93348 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -736,16 +736,17 @@ xt_replace_table(struct xt_table *table, } EXPORT_SYMBOL_GPL(xt_replace_table); -struct xt_table *xt_register_table(struct net *net, struct xt_table *table, +struct xt_table *xt_register_table(struct net *net, + const struct xt_table *input_table, struct xt_table_info *bootstrap, struct xt_table_info *newinfo) { int ret; struct xt_table_info *private; - struct xt_table *t; + struct xt_table *t, *table; /* Don't add one object to multiple lists. */ - table = kmemdup(table, sizeof(struct xt_table), GFP_KERNEL); + table = kmemdup(input_table, sizeof(struct xt_table), GFP_KERNEL); if (!table) { ret = -ENOMEM; goto out; -- cgit v1.2.3 From 7c614d6461399acca5c0ba444f5db49cb332fc08 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 24 Aug 2009 09:42:00 -0700 Subject: rcu: Add "notrace" to RCU function headers used by ftrace Both rcu_read_lock_sched_notrace() and rcu_read_unlock_sched_notrace() are used by ftrace, and thus need to be marked "notrace". Unfortunately, my naive assumption that gcc would see the inner "notrace" does not hold. Kudos to Lai Jiangshan for noting this. Reported-by: Ingo Molnar Bug-spotted-by: Lai Jiangshan Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: akpm@linux-foundation.org Cc: mathieu.desnoyers@polymtl.ca Cc: josht@linux.vnet.ibm.com Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org LKML-Reference: <12511321213243-git-send-email-> Signed-off-by: Ingo Molnar --- include/linux/rcupdate.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index ec90fc34fea9..8b4422c558da 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -191,7 +191,7 @@ static inline void rcu_read_lock_sched(void) __acquire(RCU_SCHED); rcu_read_acquire(); } -static inline void rcu_read_lock_sched_notrace(void) +static inline notrace void rcu_read_lock_sched_notrace(void) { preempt_disable_notrace(); __acquire(RCU_SCHED); @@ -209,7 +209,7 @@ static inline void rcu_read_unlock_sched(void) __release(RCU_SCHED); preempt_enable(); } -static inline void rcu_read_unlock_sched_notrace(void) +static inline notrace void rcu_read_unlock_sched_notrace(void) { rcu_read_release(); __release(RCU_SCHED); -- cgit v1.2.3 From a4be7c2778d1fd8e0a8a5607bec91fa221ab2c91 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 19 Aug 2009 11:18:27 +0200 Subject: perf_counter: Allow sharing of output channels Provide the ability to configure a counter to send its output to another (already existing) counter's output stream. Signed-off-by: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Mike Galbraith Cc: stephane eranian Cc: Paul Mackerras LKML-Reference: <20090819092023.980284148@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 5 +++ kernel/perf_counter.c | 83 ++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 85 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index b53f7006cc4e..e022b847c90d 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -216,6 +216,7 @@ struct perf_counter_attr { #define PERF_COUNTER_IOC_REFRESH _IO ('$', 2) #define PERF_COUNTER_IOC_RESET _IO ('$', 3) #define PERF_COUNTER_IOC_PERIOD _IOW('$', 4, u64) +#define PERF_COUNTER_IOC_SET_OUTPUT _IO ('$', 5) enum perf_counter_ioc_flags { PERF_IOC_FLAG_GROUP = 1U << 0, @@ -415,6 +416,9 @@ enum perf_callchain_context { PERF_CONTEXT_MAX = (__u64)-4095, }; +#define PERF_FLAG_FD_NO_GROUP (1U << 0) +#define PERF_FLAG_FD_OUTPUT (1U << 1) + #ifdef __KERNEL__ /* * Kernel-internal data types and definitions: @@ -536,6 +540,7 @@ struct perf_counter { struct list_head sibling_list; int nr_siblings; struct perf_counter *group_leader; + struct perf_counter *output; const struct pmu *pmu; enum perf_counter_active_state state; diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 06bf6a4f2608..53abcbefa0bf 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1692,6 +1692,11 @@ static void free_counter(struct perf_counter *counter) atomic_dec(&nr_task_counters); } + if (counter->output) { + fput(counter->output->filp); + counter->output = NULL; + } + if (counter->destroy) counter->destroy(counter); @@ -1977,6 +1982,8 @@ unlock: return ret; } +int perf_counter_set_output(struct perf_counter *counter, int output_fd); + static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct perf_counter *counter = file->private_data; @@ -2000,6 +2007,9 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case PERF_COUNTER_IOC_PERIOD: return perf_counter_period(counter, (u64 __user *)arg); + case PERF_COUNTER_IOC_SET_OUTPUT: + return perf_counter_set_output(counter, arg); + default: return -ENOTTY; } @@ -2270,6 +2280,11 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) WARN_ON_ONCE(counter->ctx->parent_ctx); mutex_lock(&counter->mmap_mutex); + if (counter->output) { + ret = -EINVAL; + goto unlock; + } + if (atomic_inc_not_zero(&counter->mmap_count)) { if (nr_pages != counter->data->nr_pages) ret = -EINVAL; @@ -2655,6 +2670,7 @@ static int perf_output_begin(struct perf_output_handle *handle, struct perf_counter *counter, unsigned int size, int nmi, int sample) { + struct perf_counter *output_counter; struct perf_mmap_data *data; unsigned int offset, head; int have_lost; @@ -2664,13 +2680,17 @@ static int perf_output_begin(struct perf_output_handle *handle, u64 lost; } lost_event; + rcu_read_lock(); /* * For inherited counters we send all the output towards the parent. */ if (counter->parent) counter = counter->parent; - rcu_read_lock(); + output_counter = rcu_dereference(counter->output); + if (output_counter) + counter = output_counter; + data = rcu_dereference(counter->data); if (!data) goto out; @@ -4218,6 +4238,57 @@ err_size: goto out; } +int perf_counter_set_output(struct perf_counter *counter, int output_fd) +{ + struct perf_counter *output_counter = NULL; + struct file *output_file = NULL; + struct perf_counter *old_output; + int fput_needed = 0; + int ret = -EINVAL; + + if (!output_fd) + goto set; + + output_file = fget_light(output_fd, &fput_needed); + if (!output_file) + return -EBADF; + + if (output_file->f_op != &perf_fops) + goto out; + + output_counter = output_file->private_data; + + /* Don't chain output fds */ + if (output_counter->output) + goto out; + + /* Don't set an output fd when we already have an output channel */ + if (counter->data) + goto out; + + atomic_long_inc(&output_file->f_count); + +set: + mutex_lock(&counter->mmap_mutex); + old_output = counter->output; + rcu_assign_pointer(counter->output, output_counter); + mutex_unlock(&counter->mmap_mutex); + + if (old_output) { + /* + * we need to make sure no existing perf_output_*() + * is still referencing this counter. + */ + synchronize_rcu(); + fput(old_output->filp); + } + + ret = 0; +out: + fput_light(output_file, fput_needed); + return ret; +} + /** * sys_perf_counter_open - open a performance counter, associate it to a task/cpu * @@ -4240,7 +4311,7 @@ SYSCALL_DEFINE5(perf_counter_open, int ret; /* for future expandability... */ - if (flags) + if (flags & ~(PERF_FLAG_FD_NO_GROUP | PERF_FLAG_FD_OUTPUT)) return -EINVAL; ret = perf_copy_attr(attr_uptr, &attr); @@ -4268,7 +4339,7 @@ SYSCALL_DEFINE5(perf_counter_open, * Look up the group leader (we will attach this counter to it): */ group_leader = NULL; - if (group_fd != -1) { + if (group_fd != -1 && !(flags & PERF_FLAG_FD_NO_GROUP)) { ret = -EINVAL; group_file = fget_light(group_fd, &fput_needed); if (!group_file) @@ -4310,6 +4381,12 @@ SYSCALL_DEFINE5(perf_counter_open, if (!counter_file) goto err_free_put_context; + if (flags & PERF_FLAG_FD_OUTPUT) { + ret = perf_counter_set_output(counter, group_fd); + if (ret) + goto err_free_put_context; + } + counter->filp = counter_file; WARN_ON_ONCE(ctx->parent_ctx); mutex_lock(&ctx->mutex); -- cgit v1.2.3 From 31b47cf7609288893a10706c648faa932c7aef90 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 24 Aug 2009 20:28:04 +0100 Subject: genirq: Add prototype for handle_nested_irq() The function is supposed to be called from the primary IRQ handler for a demultiplexing chip so make a protype visible for them. Signed-off-by: Mark Brown Cc: Mark Brown LKML-Reference: <1251142084-9852-1-git-send-email-broonie@opensource.wolfsonmicro.com> Signed-off-by: Ingo Molnar --- include/linux/irq.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 6956df9961ab..9e9eb76faf81 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -282,6 +282,7 @@ extern void handle_edge_irq(unsigned int irq, struct irq_desc *desc); extern void handle_simple_irq(unsigned int irq, struct irq_desc *desc); extern void handle_percpu_irq(unsigned int irq, struct irq_desc *desc); extern void handle_bad_irq(unsigned int irq, struct irq_desc *desc); +extern void handle_nested_irq(unsigned int irq); /* * Monolithic do_IRQ implementation. -- cgit v1.2.3 From b6ed2e03df1e2c6ee41cf0e2e2699f2410671916 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 25 Aug 2009 13:44:04 +0100 Subject: GFS2: Add explanation of extended attr on-disk format Some useful info regarding the on-disk representation of GFS2 extended attributes. Signed-off-by: Steven Whitehouse --- include/linux/gfs2_ondisk.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/linux') diff --git a/include/linux/gfs2_ondisk.h b/include/linux/gfs2_ondisk.h index c56b4bce56d0..b80c88dedbbb 100644 --- a/include/linux/gfs2_ondisk.h +++ b/include/linux/gfs2_ondisk.h @@ -333,6 +333,28 @@ struct gfs2_leaf { /* * Extended attribute header format + * + * This works in a similar way to dirents. There is a fixed size header + * followed by a variable length section made up of the name and the + * associated data. In the case of a "stuffed" entry, the value is + * inline directly after the name, the ea_num_ptrs entry will be + * zero in that case. For non-"stuffed" entries, there will be + * a set of pointers (aligned to 8 byte boundary) to the block(s) + * containing the value. + * + * The blocks containing the values and the blocks containing the + * extended attribute headers themselves all start with the common + * metadata header. Each inode, if it has extended attributes, will + * have either a single block containing the extended attribute headers + * or a single indirect block pointing to blocks containing the + * extended attribure headers. + * + * The maximim size of the data part of an extended attribute is 64k + * so the number of blocks required depends upon block size. Since the + * block size also determines the number of pointers in an indirect + * block, its a fairly complicated calculation to work out the maximum + * number of blocks that an inode may have relating to extended attributes. + * */ #define GFS2_EA_MAX_NAME_LEN 255 -- cgit v1.2.3 From 3a6c2b419b7768703cfb2cabdb894517c5065e33 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 25 Aug 2009 16:07:40 +0200 Subject: netlink: constify nlmsghdr arguments Consitfy nlmsghdr arguments to a couple of functions as preparation for the next patch, which will constify the netlink message data in all nfnetlink users. Signed-off-by: Patrick McHardy --- include/linux/netlink.h | 15 ++++++++------- include/net/netlink.h | 4 ++-- include/net/rtnetlink.h | 2 +- net/netlink/af_netlink.c | 2 +- net/sched/act_api.c | 2 +- 5 files changed, 13 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 5ba398e90304..0fbecbbe8e9e 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -217,12 +217,13 @@ int netlink_sendskb(struct sock *sk, struct sk_buff *skb); struct netlink_callback { - struct sk_buff *skb; - struct nlmsghdr *nlh; - int (*dump)(struct sk_buff * skb, struct netlink_callback *cb); - int (*done)(struct netlink_callback *cb); - int family; - long args[6]; + struct sk_buff *skb; + const struct nlmsghdr *nlh; + int (*dump)(struct sk_buff * skb, + struct netlink_callback *cb); + int (*done)(struct netlink_callback *cb); + int family; + long args[6]; }; struct netlink_notify @@ -258,7 +259,7 @@ __nlmsg_put(struct sk_buff *skb, u32 pid, u32 seq, int type, int len, int flags) NLMSG_NEW(skb, pid, seq, type, len, 0) extern int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, - struct nlmsghdr *nlh, + const struct nlmsghdr *nlh, int (*dump)(struct sk_buff *skb, struct netlink_callback*), int (*done)(struct netlink_callback*)); diff --git a/include/net/netlink.h b/include/net/netlink.h index 007bdb07dabb..a63b2192ac1c 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -365,7 +365,7 @@ static inline struct nlmsghdr *nlmsg_next(struct nlmsghdr *nlh, int *remaining) * * See nla_parse() */ -static inline int nlmsg_parse(struct nlmsghdr *nlh, int hdrlen, +static inline int nlmsg_parse(const struct nlmsghdr *nlh, int hdrlen, struct nlattr *tb[], int maxtype, const struct nla_policy *policy) { @@ -414,7 +414,7 @@ static inline int nlmsg_validate(struct nlmsghdr *nlh, int hdrlen, int maxtype, * * Returns 1 if a report back to the application is requested. */ -static inline int nlmsg_report(struct nlmsghdr *nlh) +static inline int nlmsg_report(const struct nlmsghdr *nlh) { return !!(nlh->nlmsg_flags & NLM_F_ECHO); } diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index 3c1895e54b7f..85ba560332ed 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -14,7 +14,7 @@ extern void rtnl_register(int protocol, int msgtype, extern int rtnl_unregister(int protocol, int msgtype); extern void rtnl_unregister_all(int protocol); -static inline int rtnl_msg_family(struct nlmsghdr *nlh) +static inline int rtnl_msg_family(const struct nlmsghdr *nlh) { if (nlmsg_len(nlh) >= sizeof(struct rtgenmsg)) return ((struct rtgenmsg *) nlmsg_data(nlh))->rtgen_family; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index da3163d15ef0..d0ff382c40ca 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1705,7 +1705,7 @@ errout: } int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, - struct nlmsghdr *nlh, + const struct nlmsghdr *nlh, int (*dump)(struct sk_buff *skb, struct netlink_callback *), int (*done)(struct netlink_callback *)) diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 9d03cc33b6cc..2dfb3e7a040d 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -1011,7 +1011,7 @@ replay: } static struct nlattr * -find_dump_kind(struct nlmsghdr *n) +find_dump_kind(const struct nlmsghdr *n) { struct nlattr *tb1, *tb2[TCA_ACT_MAX+1]; struct nlattr *tb[TCA_ACT_MAX_PRIO + 1]; -- cgit v1.2.3 From 3993832464dd4e14a4c926583a11f0fa92c1f0f0 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 25 Aug 2009 16:07:58 +0200 Subject: netfilter: nfnetlink: constify message attributes and headers Signed-off-by: Patrick McHardy --- include/linux/netfilter/nfnetlink.h | 3 +- include/net/netfilter/nf_nat_core.h | 2 +- net/ipv4/netfilter/nf_nat_core.c | 6 ++-- net/netfilter/nf_conntrack_core.c | 2 +- net/netfilter/nf_conntrack_netlink.c | 54 ++++++++++++++++++++++-------------- net/netfilter/nfnetlink.c | 2 +- net/netfilter/nfnetlink_log.c | 6 ++-- net/netfilter/nfnetlink_queue.c | 9 ++++-- net/netfilter/xt_osf.c | 6 ++-- 9 files changed, 55 insertions(+), 35 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index bff4d5741d98..9f00da287f2c 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -58,7 +58,8 @@ struct nfgenmsg { struct nfnl_callback { int (*call)(struct sock *nl, struct sk_buff *skb, - struct nlmsghdr *nlh, struct nlattr *cda[]); + const struct nlmsghdr *nlh, + const struct nlattr * const cda[]); const struct nla_policy *policy; /* netlink attribute policy */ const u_int16_t attr_count; /* number of nlattr's */ }; diff --git a/include/net/netfilter/nf_nat_core.h b/include/net/netfilter/nf_nat_core.h index 58684066388c..33602ab66190 100644 --- a/include/net/netfilter/nf_nat_core.h +++ b/include/net/netfilter/nf_nat_core.h @@ -31,6 +31,6 @@ struct nlattr; extern int (*nfnetlink_parse_nat_setup_hook)(struct nf_conn *ct, enum nf_nat_manip_type manip, - struct nlattr *attr); + const struct nlattr *attr); #endif /* _NF_NAT_CORE_H */ diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index b6ddd5633045..68afc6ecd343 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -620,7 +620,7 @@ static const struct nla_policy nat_nla_policy[CTA_NAT_MAX+1] = { }; static int -nfnetlink_parse_nat(struct nlattr *nat, +nfnetlink_parse_nat(const struct nlattr *nat, const struct nf_conn *ct, struct nf_nat_range *range) { struct nlattr *tb[CTA_NAT_MAX+1]; @@ -656,7 +656,7 @@ nfnetlink_parse_nat(struct nlattr *nat, static int nfnetlink_parse_nat_setup(struct nf_conn *ct, enum nf_nat_manip_type manip, - struct nlattr *attr) + const struct nlattr *attr) { struct nf_nat_range range; @@ -671,7 +671,7 @@ nfnetlink_parse_nat_setup(struct nf_conn *ct, static int nfnetlink_parse_nat_setup(struct nf_conn *ct, enum nf_nat_manip_type manip, - struct nlattr *attr) + const struct nlattr *attr) { return -EOPNOTSUPP; } diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index b5869b9574b0..565c3a86423f 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -47,7 +47,7 @@ int (*nfnetlink_parse_nat_setup_hook)(struct nf_conn *ct, enum nf_nat_manip_type manip, - struct nlattr *attr) __read_mostly; + const struct nlattr *attr) __read_mostly; EXPORT_SYMBOL_GPL(nfnetlink_parse_nat_setup_hook); DEFINE_SPINLOCK(nf_conntrack_lock); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 49479d194570..59d8064eb522 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -704,7 +704,8 @@ ctnetlink_parse_tuple_proto(struct nlattr *attr, } static int -ctnetlink_parse_tuple(struct nlattr *cda[], struct nf_conntrack_tuple *tuple, +ctnetlink_parse_tuple(const struct nlattr * const cda[], + struct nf_conntrack_tuple *tuple, enum ctattr_tuple type, u_int8_t l3num) { struct nlattr *tb[CTA_TUPLE_MAX+1]; @@ -740,7 +741,7 @@ ctnetlink_parse_tuple(struct nlattr *cda[], struct nf_conntrack_tuple *tuple, } static inline int -ctnetlink_parse_help(struct nlattr *attr, char **helper_name) +ctnetlink_parse_help(const struct nlattr *attr, char **helper_name) { struct nlattr *tb[CTA_HELP_MAX+1]; @@ -764,7 +765,8 @@ static const struct nla_policy ct_nla_policy[CTA_MAX+1] = { static int ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, - struct nlmsghdr *nlh, struct nlattr *cda[]) + const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) { struct nf_conntrack_tuple_hash *h; struct nf_conntrack_tuple tuple; @@ -823,7 +825,8 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, static int ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, - struct nlmsghdr *nlh, struct nlattr *cda[]) + const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) { struct nf_conntrack_tuple_hash *h; struct nf_conntrack_tuple tuple; @@ -884,7 +887,7 @@ out: static int ctnetlink_parse_nat_setup(struct nf_conn *ct, enum nf_nat_manip_type manip, - struct nlattr *attr) + const struct nlattr *attr) { typeof(nfnetlink_parse_nat_setup_hook) parse_nat_setup; @@ -914,7 +917,7 @@ ctnetlink_parse_nat_setup(struct nf_conn *ct, #endif static int -ctnetlink_change_status(struct nf_conn *ct, struct nlattr *cda[]) +ctnetlink_change_status(struct nf_conn *ct, const struct nlattr * const cda[]) { unsigned long d; unsigned int status = ntohl(nla_get_be32(cda[CTA_STATUS])); @@ -940,7 +943,7 @@ ctnetlink_change_status(struct nf_conn *ct, struct nlattr *cda[]) } static int -ctnetlink_change_nat(struct nf_conn *ct, struct nlattr *cda[]) +ctnetlink_change_nat(struct nf_conn *ct, const struct nlattr * const cda[]) { #ifdef CONFIG_NF_NAT_NEEDED int ret; @@ -966,7 +969,7 @@ ctnetlink_change_nat(struct nf_conn *ct, struct nlattr *cda[]) } static inline int -ctnetlink_change_helper(struct nf_conn *ct, struct nlattr *cda[]) +ctnetlink_change_helper(struct nf_conn *ct, const struct nlattr * const cda[]) { struct nf_conntrack_helper *helper; struct nf_conn_help *help = nfct_help(ct); @@ -1028,7 +1031,7 @@ ctnetlink_change_helper(struct nf_conn *ct, struct nlattr *cda[]) } static inline int -ctnetlink_change_timeout(struct nf_conn *ct, struct nlattr *cda[]) +ctnetlink_change_timeout(struct nf_conn *ct, const struct nlattr * const cda[]) { u_int32_t timeout = ntohl(nla_get_be32(cda[CTA_TIMEOUT])); @@ -1042,9 +1045,10 @@ ctnetlink_change_timeout(struct nf_conn *ct, struct nlattr *cda[]) } static inline int -ctnetlink_change_protoinfo(struct nf_conn *ct, struct nlattr *cda[]) +ctnetlink_change_protoinfo(struct nf_conn *ct, const struct nlattr * const cda[]) { - struct nlattr *tb[CTA_PROTOINFO_MAX+1], *attr = cda[CTA_PROTOINFO]; + const struct nlattr *attr = cda[CTA_PROTOINFO]; + struct nlattr *tb[CTA_PROTOINFO_MAX+1]; struct nf_conntrack_l4proto *l4proto; int err = 0; @@ -1061,7 +1065,7 @@ ctnetlink_change_protoinfo(struct nf_conn *ct, struct nlattr *cda[]) #ifdef CONFIG_NF_NAT_NEEDED static inline int -change_nat_seq_adj(struct nf_nat_seq *natseq, struct nlattr *attr) +change_nat_seq_adj(struct nf_nat_seq *natseq, const struct nlattr * const attr) { struct nlattr *cda[CTA_NAT_SEQ_MAX+1]; @@ -1089,7 +1093,8 @@ change_nat_seq_adj(struct nf_nat_seq *natseq, struct nlattr *attr) } static int -ctnetlink_change_nat_seq_adj(struct nf_conn *ct, struct nlattr *cda[]) +ctnetlink_change_nat_seq_adj(struct nf_conn *ct, + const struct nlattr * const cda[]) { int ret = 0; struct nf_conn_nat *nat = nfct_nat(ct); @@ -1120,7 +1125,8 @@ ctnetlink_change_nat_seq_adj(struct nf_conn *ct, struct nlattr *cda[]) #endif static int -ctnetlink_change_conntrack(struct nf_conn *ct, struct nlattr *cda[]) +ctnetlink_change_conntrack(struct nf_conn *ct, + const struct nlattr * const cda[]) { int err; @@ -1169,7 +1175,7 @@ ctnetlink_change_conntrack(struct nf_conn *ct, struct nlattr *cda[]) } static struct nf_conn * -ctnetlink_create_conntrack(struct nlattr *cda[], +ctnetlink_create_conntrack(const struct nlattr * const cda[], struct nf_conntrack_tuple *otuple, struct nf_conntrack_tuple *rtuple, u8 u3) @@ -1304,7 +1310,8 @@ err1: static int ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, - struct nlmsghdr *nlh, struct nlattr *cda[]) + const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) { struct nf_conntrack_tuple otuple, rtuple; struct nf_conntrack_tuple_hash *h = NULL; @@ -1629,7 +1636,8 @@ static const struct nla_policy exp_nla_policy[CTA_EXPECT_MAX+1] = { static int ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, - struct nlmsghdr *nlh, struct nlattr *cda[]) + const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) { struct nf_conntrack_tuple tuple; struct nf_conntrack_expect *exp; @@ -1689,7 +1697,8 @@ out: static int ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, - struct nlmsghdr *nlh, struct nlattr *cda[]) + const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) { struct nf_conntrack_expect *exp; struct nf_conntrack_tuple tuple; @@ -1767,13 +1776,15 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, return 0; } static int -ctnetlink_change_expect(struct nf_conntrack_expect *x, struct nlattr *cda[]) +ctnetlink_change_expect(struct nf_conntrack_expect *x, + const struct nlattr * const cda[]) { return -EOPNOTSUPP; } static int -ctnetlink_create_expect(struct nlattr *cda[], u_int8_t u3, u32 pid, int report) +ctnetlink_create_expect(const struct nlattr * const cda[], u_int8_t u3, + u32 pid, int report) { struct nf_conntrack_tuple tuple, mask, master_tuple; struct nf_conntrack_tuple_hash *h = NULL; @@ -1831,7 +1842,8 @@ out: static int ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb, - struct nlmsghdr *nlh, struct nlattr *cda[]) + const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) { struct nf_conntrack_tuple tuple; struct nf_conntrack_expect *exp; diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 92761a988375..eedc0c1ac7a4 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -170,7 +170,7 @@ replay: if (err < 0) return err; - err = nc->call(nfnl, skb, nlh, cda); + err = nc->call(nfnl, skb, nlh, (const struct nlattr **)cda); if (err == -EAGAIN) goto replay; return err; diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 66a6dd5c519a..f900dc3194af 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -694,7 +694,8 @@ static struct notifier_block nfulnl_rtnl_notifier = { static int nfulnl_recv_unsupp(struct sock *ctnl, struct sk_buff *skb, - struct nlmsghdr *nlh, struct nlattr *nfqa[]) + const struct nlmsghdr *nlh, + const struct nlattr * const nfqa[]) { return -ENOTSUPP; } @@ -716,7 +717,8 @@ static const struct nla_policy nfula_cfg_policy[NFULA_CFG_MAX+1] = { static int nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb, - struct nlmsghdr *nlh, struct nlattr *nfula[]) + const struct nlmsghdr *nlh, + const struct nlattr * const nfula[]) { struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); u_int16_t group_num = ntohs(nfmsg->res_id); diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 71daa0934b6c..7a9dec9fb822 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -608,7 +608,8 @@ static const struct nla_policy nfqa_verdict_policy[NFQA_MAX+1] = { static int nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb, - struct nlmsghdr *nlh, struct nlattr *nfqa[]) + const struct nlmsghdr *nlh, + const struct nlattr * const nfqa[]) { struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); u_int16_t queue_num = ntohs(nfmsg->res_id); @@ -670,7 +671,8 @@ err_out_unlock: static int nfqnl_recv_unsupp(struct sock *ctnl, struct sk_buff *skb, - struct nlmsghdr *nlh, struct nlattr *nfqa[]) + const struct nlmsghdr *nlh, + const struct nlattr * const nfqa[]) { return -ENOTSUPP; } @@ -687,7 +689,8 @@ static const struct nf_queue_handler nfqh = { static int nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb, - struct nlmsghdr *nlh, struct nlattr *nfqa[]) + const struct nlmsghdr *nlh, + const struct nlattr * const nfqa[]) { struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); u_int16_t queue_num = ntohs(nfmsg->res_id); diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c index 0f482e2440b4..63e190504656 100644 --- a/net/netfilter/xt_osf.c +++ b/net/netfilter/xt_osf.c @@ -70,7 +70,8 @@ static void xt_osf_finger_free_rcu(struct rcu_head *rcu_head) } static int xt_osf_add_callback(struct sock *ctnl, struct sk_buff *skb, - struct nlmsghdr *nlh, struct nlattr *osf_attrs[]) + const struct nlmsghdr *nlh, + const struct nlattr * const osf_attrs[]) { struct xt_osf_user_finger *f; struct xt_osf_finger *kf = NULL, *sf; @@ -112,7 +113,8 @@ static int xt_osf_add_callback(struct sock *ctnl, struct sk_buff *skb, } static int xt_osf_remove_callback(struct sock *ctnl, struct sk_buff *skb, - struct nlmsghdr *nlh, struct nlattr *osf_attrs[]) + const struct nlmsghdr *nlh, + const struct nlattr * const osf_attrs[]) { struct xt_osf_user_finger *f; struct xt_osf_finger *sf; -- cgit v1.2.3 From 06e799764eb7c2e4640888d438c3524d756613e1 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 25 Aug 2009 18:53:37 -0700 Subject: rcu: Remove lockdep annotations from RCU's _notrace() API members The lockdep annotations rcu_read_acquire() and rcu_read_release() might lead to infinite looping if called from lockdep. So this patch removes them. Formal repost of http://lkml.org/lkml/2009/8/25/309 on the strength of Lai Jiangshan's review. Suggested-by: Lai Jiangshan Suggested-by: Mathieu Desnoyers Reviewed-by: Lai Jiangshan Signed-off-by: Paul E. McKenney Cc: dipankar@in.ibm.com Cc: akpm@linux-foundation.org Cc: josht@linux.vnet.ibm.com Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org LKML-Reference: <20090826015337.GA18904@linux.vnet.ibm.com> Signed-off-by: Ingo Molnar --- include/linux/rcupdate.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 8b4422c558da..95e0615f4d75 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -195,7 +195,6 @@ static inline notrace void rcu_read_lock_sched_notrace(void) { preempt_disable_notrace(); __acquire(RCU_SCHED); - rcu_read_acquire(); } /* @@ -211,7 +210,6 @@ static inline void rcu_read_unlock_sched(void) } static inline notrace void rcu_read_unlock_sched_notrace(void) { - rcu_read_release(); __release(RCU_SCHED); preempt_enable_notrace(); } -- cgit v1.2.3 From 0396c215f301e92677d1e9a064b405e31501dc1d Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Tue, 25 Aug 2009 16:41:06 +0100 Subject: uwb: avoid radio controller reset loops If a radio controller reset attempt occurs while a probe() or remove() is in progress it fails and is retried endlessly, potentially preventing the probe() or remove() from completing. If a reset fails, sleep for a bit before retrying the reset. This allows the probe()/remove() to complete. Signed-off-by: David Vrabel --- drivers/uwb/hwa-rc.c | 3 +-- drivers/uwb/reset.c | 21 +++++++++++---------- drivers/uwb/umc-bus.c | 2 +- drivers/uwb/whc-rc.c | 3 +-- include/linux/uwb.h | 2 +- 5 files changed, 15 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/drivers/uwb/hwa-rc.c b/drivers/uwb/hwa-rc.c index 9052bcb4f528..e7eeb63fab23 100644 --- a/drivers/uwb/hwa-rc.c +++ b/drivers/uwb/hwa-rc.c @@ -887,8 +887,7 @@ static int hwarc_post_reset(struct usb_interface *iface) struct hwarc *hwarc = usb_get_intfdata(iface); struct uwb_rc *uwb_rc = hwarc->uwb_rc; - uwb_rc_post_reset(uwb_rc); - return 0; + return uwb_rc_post_reset(uwb_rc); } /** USB device ID's that we handle */ diff --git a/drivers/uwb/reset.c b/drivers/uwb/reset.c index 70f8050221ff..7f0512e43d9d 100644 --- a/drivers/uwb/reset.c +++ b/drivers/uwb/reset.c @@ -30,6 +30,7 @@ */ #include #include +#include #include "uwb-internal.h" @@ -323,13 +324,15 @@ int uwbd_msg_handle_reset(struct uwb_event *evt) dev_info(&rc->uwb_dev.dev, "resetting radio controller\n"); ret = rc->reset(rc); - if (ret) { + if (ret < 0) { dev_err(&rc->uwb_dev.dev, "failed to reset hardware: %d\n", ret); goto error; } return 0; error: - /* Nothing can be done except try the reset again. */ + /* Nothing can be done except try the reset again. Wait a bit + to avoid reset loops during probe() or remove(). */ + msleep(1000); uwb_rc_reset_all(rc); return ret; } @@ -368,22 +371,20 @@ void uwb_rc_pre_reset(struct uwb_rc *rc) } EXPORT_SYMBOL_GPL(uwb_rc_pre_reset); -void uwb_rc_post_reset(struct uwb_rc *rc) +int uwb_rc_post_reset(struct uwb_rc *rc) { int ret; ret = rc->start(rc); if (ret) - goto error; + goto out; ret = uwb_rc_mac_addr_set(rc, &rc->uwb_dev.mac_addr); if (ret) - goto error; + goto out; ret = uwb_rc_dev_addr_set(rc, &rc->uwb_dev.dev_addr); if (ret) - goto error; - return; -error: - /* Nothing can be done except try the reset again. */ - uwb_rc_reset_all(rc); + goto out; +out: + return ret; } EXPORT_SYMBOL_GPL(uwb_rc_post_reset); diff --git a/drivers/uwb/umc-bus.c b/drivers/uwb/umc-bus.c index 5ad36164c13b..cdd6c8efc9f8 100644 --- a/drivers/uwb/umc-bus.c +++ b/drivers/uwb/umc-bus.c @@ -66,7 +66,7 @@ int umc_controller_reset(struct umc_dev *umc) return -EAGAIN; ret = device_for_each_child(parent, parent, umc_bus_pre_reset_helper); if (ret >= 0) - device_for_each_child(parent, parent, umc_bus_post_reset_helper); + ret = device_for_each_child(parent, parent, umc_bus_post_reset_helper); up(&parent->sem); return ret; diff --git a/drivers/uwb/whc-rc.c b/drivers/uwb/whc-rc.c index 19a1dd129212..1d9a6f54658e 100644 --- a/drivers/uwb/whc-rc.c +++ b/drivers/uwb/whc-rc.c @@ -443,8 +443,7 @@ static int whcrc_post_reset(struct umc_dev *umc) struct whcrc *whcrc = umc_get_drvdata(umc); struct uwb_rc *uwb_rc = whcrc->uwb_rc; - uwb_rc_post_reset(uwb_rc); - return 0; + return uwb_rc_post_reset(uwb_rc); } /* PCI device ID's that we handle [so it gets loaded] */ diff --git a/include/linux/uwb.h b/include/linux/uwb.h index c02128991ff7..7fc9746f22cd 100644 --- a/include/linux/uwb.h +++ b/include/linux/uwb.h @@ -597,7 +597,7 @@ void uwb_rc_neh_grok(struct uwb_rc *, void *, size_t); void uwb_rc_neh_error(struct uwb_rc *, int); void uwb_rc_reset_all(struct uwb_rc *rc); void uwb_rc_pre_reset(struct uwb_rc *rc); -void uwb_rc_post_reset(struct uwb_rc *rc); +int uwb_rc_post_reset(struct uwb_rc *rc); /** * uwb_rsv_is_owner - is the owner of this reservation the RC? -- cgit v1.2.3 From 9e36fda0b359d2a6ae039c3d7e71a04502a77898 Mon Sep 17 00:00:00 2001 From: Venkatesh Pallipadi Date: Fri, 10 Jul 2009 09:57:35 -0700 Subject: x86, pat: Add PAT reserve free to io_mapping* APIs io_mapping_* interfaces were added, mainly for graphics drivers. Make this interface go through the PAT reserve/free, instead of hardcoding WC mapping. This makes sure that there are no aliases due to unconditional WC setting. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/iomap.h | 9 ++++++--- arch/x86/mm/iomap_32.c | 27 +++++++++++++++++++++++++-- include/linux/io-mapping.h | 17 ++++++++++++----- 3 files changed, 43 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/include/asm/iomap.h b/arch/x86/include/asm/iomap.h index 0e9fe1d9d971..f35eb45d6576 100644 --- a/arch/x86/include/asm/iomap.h +++ b/arch/x86/include/asm/iomap.h @@ -26,13 +26,16 @@ #include #include -int -is_io_mapping_possible(resource_size_t base, unsigned long size); - void * iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot); void iounmap_atomic(void *kvaddr, enum km_type type); +int +iomap_create_wc(resource_size_t base, unsigned long size, pgprot_t *prot); + +void +iomap_free(resource_size_t base, unsigned long size); + #endif /* _ASM_X86_IOMAP_H */ diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c index fe6f84ca121e..84e236ce76ba 100644 --- a/arch/x86/mm/iomap_32.c +++ b/arch/x86/mm/iomap_32.c @@ -21,7 +21,7 @@ #include #include -int is_io_mapping_possible(resource_size_t base, unsigned long size) +static int is_io_mapping_possible(resource_size_t base, unsigned long size) { #if !defined(CONFIG_X86_PAE) && defined(CONFIG_PHYS_ADDR_T_64BIT) /* There is no way to map greater than 1 << 32 address without PAE */ @@ -30,7 +30,30 @@ int is_io_mapping_possible(resource_size_t base, unsigned long size) #endif return 1; } -EXPORT_SYMBOL_GPL(is_io_mapping_possible); + +int iomap_create_wc(resource_size_t base, unsigned long size, pgprot_t *prot) +{ + unsigned long flag = _PAGE_CACHE_WC; + int ret; + + if (!is_io_mapping_possible(base, size)) + return -EINVAL; + + ret = io_reserve_memtype(base, base + size, &flag); + if (ret) + return ret; + + *prot = __pgprot(__PAGE_KERNEL | flag); + return 0; +} +EXPORT_SYMBOL_GPL(iomap_create_wc); + +void +iomap_free(resource_size_t base, unsigned long size) +{ + io_free_memtype(base, base + size); +} +EXPORT_SYMBOL_GPL(iomap_free); void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot) { diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h index 0adb0f91568c..97eb928b4924 100644 --- a/include/linux/io-mapping.h +++ b/include/linux/io-mapping.h @@ -49,23 +49,30 @@ static inline struct io_mapping * io_mapping_create_wc(resource_size_t base, unsigned long size) { struct io_mapping *iomap; - - if (!is_io_mapping_possible(base, size)) - return NULL; + pgprot_t prot; iomap = kmalloc(sizeof(*iomap), GFP_KERNEL); if (!iomap) - return NULL; + goto out_err; + + if (iomap_create_wc(base, size, &prot)) + goto out_free; iomap->base = base; iomap->size = size; - iomap->prot = pgprot_writecombine(__pgprot(__PAGE_KERNEL)); + iomap->prot = prot; return iomap; + +out_free: + kfree(iomap); +out_err: + return NULL; } static inline void io_mapping_free(struct io_mapping *mapping) { + iomap_free(mapping->base, mapping->size); kfree(mapping); } -- cgit v1.2.3 From 46cf98cdaef5471926010b5bddf84c44ec177fdd Mon Sep 17 00:00:00 2001 From: Venkatesh Pallipadi Date: Fri, 10 Jul 2009 09:57:37 -0700 Subject: x86, pat: Generalize the use of page flag PG_uncached Only IA64 was using PG_uncached as of now. We now intend to use this bit in x86 as well, to keep track of memory type of those addresses that have page struct for them. So, generalize the use of that bit across ia64 and x86. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- arch/ia64/Kconfig | 4 ++++ arch/x86/Kconfig | 4 ++++ include/linux/page-flags.h | 4 ++-- 3 files changed, 10 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 170042b420d4..e6246119932a 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -112,6 +112,10 @@ config IA64_UNCACHED_ALLOCATOR bool select GENERIC_ALLOCATOR +config ARCH_USES_PG_UNCACHED + def_bool y + depends on IA64_UNCACHED_ALLOCATOR + config AUDIT_ARCH bool default y diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index c07f72205909..8e1595382196 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1414,6 +1414,10 @@ config X86_PAT If unsure, say Y. +config ARCH_USES_PG_UNCACHED + def_bool y + depends on X86_PAT + config EFI bool "EFI runtime service support" depends on ACPI diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index e2e5ce543595..2b87acfc5f87 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -99,7 +99,7 @@ enum pageflags { #ifdef CONFIG_HAVE_MLOCKED_PAGE_BIT PG_mlocked, /* Page is vma mlocked */ #endif -#ifdef CONFIG_IA64_UNCACHED_ALLOCATOR +#ifdef CONFIG_ARCH_USES_PG_UNCACHED PG_uncached, /* Page has been mapped as uncached */ #endif __NR_PAGEFLAGS, @@ -257,7 +257,7 @@ PAGEFLAG_FALSE(Mlocked) SETPAGEFLAG_NOOP(Mlocked) TESTCLEARFLAG_FALSE(Mlocked) #endif -#ifdef CONFIG_IA64_UNCACHED_ALLOCATOR +#ifdef CONFIG_ARCH_USES_PG_UNCACHED PAGEFLAG(Uncached, uncached) #else PAGEFLAG_FALSE(Uncached) -- cgit v1.2.3 From a6186d89c913b176e7339f37a4ec6ccb38b2c5c0 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 27 Aug 2009 14:29:16 +0100 Subject: kmemleak: Mark the early log buffer as __initdata This buffer isn't needed after kmemleak was initialised so it can be freed together with the .init.data section. This patch also marks functions conditionally accessing the early log variables with __ref. Signed-off-by: Catalin Marinas --- include/linux/kmemleak.h | 18 +++++++++--------- mm/kmemleak.c | 26 ++++++++++++++------------ 2 files changed, 23 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kmemleak.h b/include/linux/kmemleak.h index 6a63807f714e..3c7497d46ee9 100644 --- a/include/linux/kmemleak.h +++ b/include/linux/kmemleak.h @@ -23,18 +23,18 @@ #ifdef CONFIG_DEBUG_KMEMLEAK -extern void kmemleak_init(void); +extern void kmemleak_init(void) __ref; extern void kmemleak_alloc(const void *ptr, size_t size, int min_count, - gfp_t gfp); -extern void kmemleak_free(const void *ptr); -extern void kmemleak_free_part(const void *ptr, size_t size); + gfp_t gfp) __ref; +extern void kmemleak_free(const void *ptr) __ref; +extern void kmemleak_free_part(const void *ptr, size_t size) __ref; extern void kmemleak_padding(const void *ptr, unsigned long offset, - size_t size); -extern void kmemleak_not_leak(const void *ptr); -extern void kmemleak_ignore(const void *ptr); + size_t size) __ref; +extern void kmemleak_not_leak(const void *ptr) __ref; +extern void kmemleak_ignore(const void *ptr) __ref; extern void kmemleak_scan_area(const void *ptr, unsigned long offset, - size_t length, gfp_t gfp); -extern void kmemleak_no_scan(const void *ptr); + size_t length, gfp_t gfp) __ref; +extern void kmemleak_no_scan(const void *ptr) __ref; static inline void kmemleak_alloc_recursive(const void *ptr, size_t size, int min_count, unsigned long flags, diff --git a/mm/kmemleak.c b/mm/kmemleak.c index c977f7a2f0e4..576c0a4cec52 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -232,8 +232,9 @@ struct early_log { }; /* early logging buffer and current position */ -static struct early_log early_log[CONFIG_DEBUG_KMEMLEAK_EARLY_LOG_SIZE]; -static int crt_early_log; +static struct early_log + early_log[CONFIG_DEBUG_KMEMLEAK_EARLY_LOG_SIZE] __initdata; +static int crt_early_log __initdata; static void kmemleak_disable(void); @@ -718,8 +719,8 @@ static void object_no_scan(unsigned long ptr) * Log an early kmemleak_* call to the early_log buffer. These calls will be * processed later once kmemleak is fully initialized. */ -static void log_early(int op_type, const void *ptr, size_t size, - int min_count, unsigned long offset, size_t length) +static void __init log_early(int op_type, const void *ptr, size_t size, + int min_count, unsigned long offset, size_t length) { unsigned long flags; struct early_log *log; @@ -751,7 +752,8 @@ static void log_early(int op_type, const void *ptr, size_t size, * kernel allocators when a new block is allocated (kmem_cache_alloc, kmalloc, * vmalloc etc.). */ -void kmemleak_alloc(const void *ptr, size_t size, int min_count, gfp_t gfp) +void __ref kmemleak_alloc(const void *ptr, size_t size, int min_count, + gfp_t gfp) { pr_debug("%s(0x%p, %zu, %d)\n", __func__, ptr, size, min_count); @@ -766,7 +768,7 @@ EXPORT_SYMBOL_GPL(kmemleak_alloc); * Memory freeing function callback. This function is called from the kernel * allocators when a block is freed (kmem_cache_free, kfree, vfree etc.). */ -void kmemleak_free(const void *ptr) +void __ref kmemleak_free(const void *ptr) { pr_debug("%s(0x%p)\n", __func__, ptr); @@ -781,7 +783,7 @@ EXPORT_SYMBOL_GPL(kmemleak_free); * Partial memory freeing function callback. This function is usually called * from bootmem allocator when (part of) a memory block is freed. */ -void kmemleak_free_part(const void *ptr, size_t size) +void __ref kmemleak_free_part(const void *ptr, size_t size) { pr_debug("%s(0x%p)\n", __func__, ptr); @@ -796,7 +798,7 @@ EXPORT_SYMBOL_GPL(kmemleak_free_part); * Mark an already allocated memory block as a false positive. This will cause * the block to no longer be reported as leak and always be scanned. */ -void kmemleak_not_leak(const void *ptr) +void __ref kmemleak_not_leak(const void *ptr) { pr_debug("%s(0x%p)\n", __func__, ptr); @@ -812,7 +814,7 @@ EXPORT_SYMBOL(kmemleak_not_leak); * corresponding block is not a leak and does not contain any references to * other allocated memory blocks. */ -void kmemleak_ignore(const void *ptr) +void __ref kmemleak_ignore(const void *ptr) { pr_debug("%s(0x%p)\n", __func__, ptr); @@ -826,8 +828,8 @@ EXPORT_SYMBOL(kmemleak_ignore); /* * Limit the range to be scanned in an allocated memory block. */ -void kmemleak_scan_area(const void *ptr, unsigned long offset, size_t length, - gfp_t gfp) +void __ref kmemleak_scan_area(const void *ptr, unsigned long offset, + size_t length, gfp_t gfp) { pr_debug("%s(0x%p)\n", __func__, ptr); @@ -841,7 +843,7 @@ EXPORT_SYMBOL(kmemleak_scan_area); /* * Inform kmemleak not to scan the given memory block. */ -void kmemleak_no_scan(const void *ptr) +void __ref kmemleak_no_scan(const void *ptr) { pr_debug("%s(0x%p)\n", __func__, ptr); -- cgit v1.2.3 From f726f30e32305a34a203ff975e60885aa7556c6a Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 4 Aug 2009 19:08:24 +0000 Subject: dma: Add set_dma_mask hook to struct dma_map_ops POWERPC needs this hook. SPARC could use it too. Signed-off-by: FUJITA Tomonori Acked-by: Becky Bruce Signed-off-by: Benjamin Herrenschmidt --- include/linux/dma-mapping.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index c0f6c3cd788c..91b761846061 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -58,6 +58,7 @@ struct dma_map_ops { enum dma_data_direction dir); int (*mapping_error)(struct device *dev, dma_addr_t dma_addr); int (*dma_supported)(struct device *dev, u64 mask); + int (*set_dma_mask)(struct device *dev, u64 mask); int is_phys; }; -- cgit v1.2.3 From 77a53fd21870c726b670c0d8179294ac1ea33468 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Tue, 25 Aug 2009 19:24:13 -0700 Subject: Input: matrix-keypad - add function to build device keymap Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/matrix_keypad.c | 15 +++------------ drivers/input/keyboard/w90p910_keypad.c | 16 ++-------------- include/linux/input/matrix_keypad.h | 32 ++++++++++++++++++++++++++++++++ 3 files changed, 37 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/drivers/input/keyboard/matrix_keypad.c b/drivers/input/keyboard/matrix_keypad.c index 541b981ff075..91cfe5170265 100644 --- a/drivers/input/keyboard/matrix_keypad.c +++ b/drivers/input/keyboard/matrix_keypad.c @@ -319,7 +319,6 @@ static int __devinit matrix_keypad_probe(struct platform_device *pdev) struct input_dev *input_dev; unsigned short *keycodes; unsigned int row_shift; - int i; int err; pdata = pdev->dev.platform_data; @@ -363,18 +362,10 @@ static int __devinit matrix_keypad_probe(struct platform_device *pdev) input_dev->keycode = keycodes; input_dev->keycodesize = sizeof(*keycodes); - input_dev->keycodemax = pdata->num_row_gpios << keypad->row_shift; - - for (i = 0; i < keymap_data->keymap_size; i++) { - unsigned int key = keymap_data->keymap[i]; - unsigned int row = KEY_ROW(key); - unsigned int col = KEY_COL(key); - unsigned short code = KEY_VAL(key); + input_dev->keycodemax = pdata->num_row_gpios << row_shift; - keycodes[MATRIX_SCAN_CODE(row, col, row_shift)] = code; - __set_bit(code, input_dev->keybit); - } - __clear_bit(KEY_RESERVED, input_dev->keybit); + matrix_keypad_build_keymap(keymap_data, row_shift, + input_dev->keycode, input_dev->keybit); input_set_capability(input_dev, EV_MSC, MSC_SCAN); input_set_drvdata(input_dev, keypad); diff --git a/drivers/input/keyboard/w90p910_keypad.c b/drivers/input/keyboard/w90p910_keypad.c index b8598ae124ee..2d03dd0f9e07 100644 --- a/drivers/input/keyboard/w90p910_keypad.c +++ b/drivers/input/keyboard/w90p910_keypad.c @@ -126,7 +126,6 @@ static int __devinit w90p910_keypad_probe(struct platform_device *pdev) struct resource *res; int irq; int error; - int i; if (!pdata) { dev_err(&pdev->dev, "no platform data defined\n"); @@ -197,19 +196,8 @@ static int __devinit w90p910_keypad_probe(struct platform_device *pdev) input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP); input_set_capability(input_dev, EV_MSC, MSC_SCAN); - for (i = 0; i < keymap_data->keymap_size; i++) { - unsigned int key = keymap_data->keymap[i]; - unsigned int row = KEY_ROW(key); - unsigned int col = KEY_COL(key); - unsigned short keycode = KEY_VAL(key); - unsigned int scancode = MATRIX_SCAN_CODE(row, col, - W90P910_ROW_SHIFT); - - keypad->keymap[scancode] = keycode; - __set_bit(keycode, input_dev->keybit); - } - __clear_bit(KEY_RESERVED, input_dev->keybit); - + matrix_keypad_build_keymap(keymap_data, W90P910_ROW_SHIFT, + input_dev->keycode, input_dev->keybit); error = request_irq(keypad->irq, w90p910_keypad_irq_handler, IRQF_DISABLED, pdev->name, keypad); diff --git a/include/linux/input/matrix_keypad.h b/include/linux/input/matrix_keypad.h index 15d5903af2dd..b3cd42d50e16 100644 --- a/include/linux/input/matrix_keypad.h +++ b/include/linux/input/matrix_keypad.h @@ -63,4 +63,36 @@ struct matrix_keypad_platform_data { bool wakeup; }; +/** + * matrix_keypad_build_keymap - convert platform keymap into matrix keymap + * @keymap_data: keymap supplied by the platform code + * @row_shift: number of bits to shift row value by to advance to the next + * line in the keymap + * @keymap: expanded version of keymap that is suitable for use by + * matrix keyboad driver + * @keybit: pointer to bitmap of keys supported by input device + * + * This function converts platform keymap (encoded with KEY() macro) into + * an array of keycodes that is suitable for using in a standard matrix + * keyboard driver that uses row and col as indices. + */ +static inline void +matrix_keypad_build_keymap(const struct matrix_keymap_data *keymap_data, + unsigned int row_shift, + unsigned short *keymap, unsigned long *keybit) +{ + int i; + + for (i = 0; i < keymap_data->keymap_size; i++) { + unsigned int key = keymap_data->keymap[i]; + unsigned int row = KEY_ROW(key); + unsigned int col = KEY_COL(key); + unsigned short code = KEY_VAL(key); + + keymap[MATRIX_SCAN_CODE(row, col, row_shift)] = code; + __set_bit(code, keybit); + } + __clear_bit(KEY_RESERVED, keybit); +} + #endif /* _MATRIX_KEYPAD_H */ -- cgit v1.2.3 From 9d8340687c524ce61e3c9c76758c4c81303acfc0 Mon Sep 17 00:00:00 2001 From: David Brownell Date: Tue, 25 Aug 2009 19:24:14 -0700 Subject: Input: add twl4030_keypad driver Add a driver for the keypad controller on TWL4030 family chips. These support up to an 8x8 key matrix. The TWL4030 multifunction chips are mostly used on OMAP3 (or OMAP 2430) based boards. [dtor@mail.ru: switch to matrix-keypad framework, fix changing keymap from userspace] Reviewed-by: Trilok Soni Signed-off-by: David Brownell Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/Kconfig | 11 + drivers/input/keyboard/Makefile | 1 + drivers/input/keyboard/twl4030_keypad.c | 480 ++++++++++++++++++++++++++++++++ include/linux/i2c/twl4030.h | 19 +- 4 files changed, 505 insertions(+), 6 deletions(-) create mode 100644 drivers/input/keyboard/twl4030_keypad.c (limited to 'include/linux') diff --git a/drivers/input/keyboard/Kconfig b/drivers/input/keyboard/Kconfig index 50e407de8a78..3525c19be428 100644 --- a/drivers/input/keyboard/Kconfig +++ b/drivers/input/keyboard/Kconfig @@ -330,6 +330,17 @@ config KEYBOARD_OMAP To compile this driver as a module, choose M here: the module will be called omap-keypad. +config KEYBOARD_TWL4030 + tristate "TI TWL4030/TWL5030/TPS659x0 keypad support" + depends on TWL4030_CORE + help + Say Y here if your board use the keypad controller on + TWL4030 family chips. It's safe to say enable this + even on boards that don't use the keypad controller. + + To compile this driver as a module, choose M here: the + module will be called twl4030_keypad. + config KEYBOARD_TOSA tristate "Tosa keyboard" depends on MACH_TOSA diff --git a/drivers/input/keyboard/Makefile b/drivers/input/keyboard/Makefile index 152303029203..8a7a22b30266 100644 --- a/drivers/input/keyboard/Makefile +++ b/drivers/input/keyboard/Makefile @@ -30,5 +30,6 @@ obj-$(CONFIG_KEYBOARD_SPITZ) += spitzkbd.o obj-$(CONFIG_KEYBOARD_STOWAWAY) += stowaway.o obj-$(CONFIG_KEYBOARD_SUNKBD) += sunkbd.o obj-$(CONFIG_KEYBOARD_TOSA) += tosakbd.o +obj-$(CONFIG_KEYBOARD_TWL4030) += twl4030_keypad.o obj-$(CONFIG_KEYBOARD_XTKBD) += xtkbd.o obj-$(CONFIG_KEYBOARD_W90P910) += w90p910_keypad.o diff --git a/drivers/input/keyboard/twl4030_keypad.c b/drivers/input/keyboard/twl4030_keypad.c new file mode 100644 index 000000000000..9a2977c21696 --- /dev/null +++ b/drivers/input/keyboard/twl4030_keypad.c @@ -0,0 +1,480 @@ +/* + * twl4030_keypad.c - driver for 8x8 keypad controller in twl4030 chips + * + * Copyright (C) 2007 Texas Instruments, Inc. + * Copyright (C) 2008 Nokia Corporation + * + * Code re-written for 2430SDP by: + * Syed Mohammed Khasim + * + * Initial Code: + * Manjunatha G K + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include +#include + + +/* + * The TWL4030 family chips include a keypad controller that supports + * up to an 8x8 switch matrix. The controller can issue system wakeup + * events, since it uses only the always-on 32KiHz oscillator, and has + * an internal state machine that decodes pressed keys, including + * multi-key combinations. + * + * This driver lets boards define what keycodes they wish to report for + * which scancodes, as part of the "struct twl4030_keypad_data" used in + * the probe() routine. + * + * See the TPS65950 documentation; that's the general availability + * version of the TWL5030 second generation part. + */ +#define TWL4030_MAX_ROWS 8 /* TWL4030 hard limit */ +#define TWL4030_MAX_COLS 8 +#define TWL4030_ROW_SHIFT 3 +#define TWL4030_KEYMAP_SIZE (TWL4030_MAX_ROWS * TWL4030_MAX_COLS) + +struct twl4030_keypad { + unsigned short keymap[TWL4030_KEYMAP_SIZE]; + u16 kp_state[TWL4030_MAX_ROWS]; + unsigned n_rows; + unsigned n_cols; + unsigned irq; + + struct device *dbg_dev; + struct input_dev *input; +}; + +/*----------------------------------------------------------------------*/ + +/* arbitrary prescaler value 0..7 */ +#define PTV_PRESCALER 4 + +/* Register Offsets */ +#define KEYP_CTRL 0x00 +#define KEYP_DEB 0x01 +#define KEYP_LONG_KEY 0x02 +#define KEYP_LK_PTV 0x03 +#define KEYP_TIMEOUT_L 0x04 +#define KEYP_TIMEOUT_H 0x05 +#define KEYP_KBC 0x06 +#define KEYP_KBR 0x07 +#define KEYP_SMS 0x08 +#define KEYP_FULL_CODE_7_0 0x09 /* row 0 column status */ +#define KEYP_FULL_CODE_15_8 0x0a /* ... row 1 ... */ +#define KEYP_FULL_CODE_23_16 0x0b +#define KEYP_FULL_CODE_31_24 0x0c +#define KEYP_FULL_CODE_39_32 0x0d +#define KEYP_FULL_CODE_47_40 0x0e +#define KEYP_FULL_CODE_55_48 0x0f +#define KEYP_FULL_CODE_63_56 0x10 +#define KEYP_ISR1 0x11 +#define KEYP_IMR1 0x12 +#define KEYP_ISR2 0x13 +#define KEYP_IMR2 0x14 +#define KEYP_SIR 0x15 +#define KEYP_EDR 0x16 /* edge triggers */ +#define KEYP_SIH_CTRL 0x17 + +/* KEYP_CTRL_REG Fields */ +#define KEYP_CTRL_SOFT_NRST BIT(0) +#define KEYP_CTRL_SOFTMODEN BIT(1) +#define KEYP_CTRL_LK_EN BIT(2) +#define KEYP_CTRL_TOE_EN BIT(3) +#define KEYP_CTRL_TOLE_EN BIT(4) +#define KEYP_CTRL_RP_EN BIT(5) +#define KEYP_CTRL_KBD_ON BIT(6) + +/* KEYP_DEB, KEYP_LONG_KEY, KEYP_TIMEOUT_x*/ +#define KEYP_PERIOD_US(t, prescale) ((t) / (31 << (prescale + 1)) - 1) + +/* KEYP_LK_PTV_REG Fields */ +#define KEYP_LK_PTV_PTV_SHIFT 5 + +/* KEYP_{IMR,ISR,SIR} Fields */ +#define KEYP_IMR1_MIS BIT(3) +#define KEYP_IMR1_TO BIT(2) +#define KEYP_IMR1_LK BIT(1) +#define KEYP_IMR1_KP BIT(0) + +/* KEYP_EDR Fields */ +#define KEYP_EDR_KP_FALLING 0x01 +#define KEYP_EDR_KP_RISING 0x02 +#define KEYP_EDR_KP_BOTH 0x03 +#define KEYP_EDR_LK_FALLING 0x04 +#define KEYP_EDR_LK_RISING 0x08 +#define KEYP_EDR_TO_FALLING 0x10 +#define KEYP_EDR_TO_RISING 0x20 +#define KEYP_EDR_MIS_FALLING 0x40 +#define KEYP_EDR_MIS_RISING 0x80 + + +/*----------------------------------------------------------------------*/ + +static int twl4030_kpread(struct twl4030_keypad *kp, + u8 *data, u32 reg, u8 num_bytes) +{ + int ret = twl4030_i2c_read(TWL4030_MODULE_KEYPAD, data, reg, num_bytes); + + if (ret < 0) + dev_warn(kp->dbg_dev, + "Couldn't read TWL4030: %X - ret %d[%x]\n", + reg, ret, ret); + + return ret; +} + +static int twl4030_kpwrite_u8(struct twl4030_keypad *kp, u8 data, u32 reg) +{ + int ret = twl4030_i2c_write_u8(TWL4030_MODULE_KEYPAD, data, reg); + + if (ret < 0) + dev_warn(kp->dbg_dev, + "Could not write TWL4030: %X - ret %d[%x]\n", + reg, ret, ret); + + return ret; +} + +static inline u16 twl4030_col_xlate(struct twl4030_keypad *kp, u8 col) +{ + /* If all bits in a row are active for all coloumns then + * we have that row line connected to gnd. Mark this + * key on as if it was on matrix position n_cols (ie + * one higher than the size of the matrix). + */ + if (col == 0xFF) + return 1 << kp->n_cols; + else + return col & ((1 << kp->n_cols) - 1); +} + +static int twl4030_read_kp_matrix_state(struct twl4030_keypad *kp, u16 *state) +{ + u8 new_state[TWL4030_MAX_ROWS]; + int row; + int ret = twl4030_kpread(kp, new_state, + KEYP_FULL_CODE_7_0, kp->n_rows); + if (ret >= 0) + for (row = 0; row < kp->n_rows; row++) + state[row] = twl4030_col_xlate(kp, new_state[row]); + + return ret; +} + +static int twl4030_is_in_ghost_state(struct twl4030_keypad *kp, u16 *key_state) +{ + int i; + u16 check = 0; + + for (i = 0; i < kp->n_rows; i++) { + u16 col = key_state[i]; + + if ((col & check) && hweight16(col) > 1) + return 1; + + check |= col; + } + + return 0; +} + +static void twl4030_kp_scan(struct twl4030_keypad *kp, bool release_all) +{ + struct input_dev *input = kp->input; + u16 new_state[TWL4030_MAX_ROWS]; + int col, row; + + if (release_all) + memset(new_state, 0, sizeof(new_state)); + else { + /* check for any changes */ + int ret = twl4030_read_kp_matrix_state(kp, new_state); + + if (ret < 0) /* panic ... */ + return; + + if (twl4030_is_in_ghost_state(kp, new_state)) + return; + } + + /* check for changes and print those */ + for (row = 0; row < kp->n_rows; row++) { + int changed = new_state[row] ^ kp->kp_state[row]; + + if (!changed) + continue; + + for (col = 0; col < kp->n_cols; col++) { + int code; + + if (!(changed & (1 << col))) + continue; + + dev_dbg(kp->dbg_dev, "key [%d:%d] %s\n", row, col, + (new_state[row] & (1 << col)) ? + "press" : "release"); + + code = MATRIX_SCAN_CODE(row, col, TWL4030_ROW_SHIFT); + input_event(input, EV_MSC, MSC_SCAN, code); + input_report_key(input, kp->keymap[code], + new_state[row] & (1 << col)); + } + kp->kp_state[row] = new_state[row]; + } + input_sync(input); +} + +/* + * Keypad interrupt handler + */ +static irqreturn_t do_kp_irq(int irq, void *_kp) +{ + struct twl4030_keypad *kp = _kp; + u8 reg; + int ret; + +#ifdef CONFIG_LOCKDEP + /* WORKAROUND for lockdep forcing IRQF_DISABLED on us, which + * we don't want and can't tolerate. Although it might be + * friendlier not to borrow this thread context... + */ + local_irq_enable(); +#endif + + /* Read & Clear TWL4030 pending interrupt */ + ret = twl4030_kpread(kp, ®, KEYP_ISR1, 1); + + /* Release all keys if I2C has gone bad or + * the KEYP has gone to idle state */ + if (ret >= 0 && (reg & KEYP_IMR1_KP)) + twl4030_kp_scan(kp, false); + else + twl4030_kp_scan(kp, true); + + return IRQ_HANDLED; +} + +static int __devinit twl4030_kp_program(struct twl4030_keypad *kp) +{ + u8 reg; + int i; + + /* Enable controller, with hardware decoding but not autorepeat */ + reg = KEYP_CTRL_SOFT_NRST | KEYP_CTRL_SOFTMODEN + | KEYP_CTRL_TOE_EN | KEYP_CTRL_KBD_ON; + if (twl4030_kpwrite_u8(kp, reg, KEYP_CTRL) < 0) + return -EIO; + + /* NOTE: we could use sih_setup() here to package keypad + * event sources as four different IRQs ... but we don't. + */ + + /* Enable TO rising and KP rising and falling edge detection */ + reg = KEYP_EDR_KP_BOTH | KEYP_EDR_TO_RISING; + if (twl4030_kpwrite_u8(kp, reg, KEYP_EDR) < 0) + return -EIO; + + /* Set PTV prescaler Field */ + reg = (PTV_PRESCALER << KEYP_LK_PTV_PTV_SHIFT); + if (twl4030_kpwrite_u8(kp, reg, KEYP_LK_PTV) < 0) + return -EIO; + + /* Set key debounce time to 20 ms */ + i = KEYP_PERIOD_US(20000, PTV_PRESCALER); + if (twl4030_kpwrite_u8(kp, i, KEYP_DEB) < 0) + return -EIO; + + /* Set timeout period to 100 ms */ + i = KEYP_PERIOD_US(200000, PTV_PRESCALER); + if (twl4030_kpwrite_u8(kp, (i & 0xFF), KEYP_TIMEOUT_L) < 0) + return -EIO; + + if (twl4030_kpwrite_u8(kp, (i >> 8), KEYP_TIMEOUT_H) < 0) + return -EIO; + + /* + * Enable Clear-on-Read; disable remembering events that fire + * after the IRQ but before our handler acks (reads) them, + */ + reg = TWL4030_SIH_CTRL_COR_MASK | TWL4030_SIH_CTRL_PENDDIS_MASK; + if (twl4030_kpwrite_u8(kp, reg, KEYP_SIH_CTRL) < 0) + return -EIO; + + /* initialize key state; irqs update it from here on */ + if (twl4030_read_kp_matrix_state(kp, kp->kp_state) < 0) + return -EIO; + + return 0; +} + +/* + * Registers keypad device with input subsystem + * and configures TWL4030 keypad registers + */ +static int __devinit twl4030_kp_probe(struct platform_device *pdev) +{ + struct twl4030_keypad_data *pdata = pdev->dev.platform_data; + const struct matrix_keymap_data *keymap_data = pdata->keymap_data; + struct twl4030_keypad *kp; + struct input_dev *input; + u8 reg; + int error; + + if (!pdata || !pdata->rows || !pdata->cols || + pdata->rows > TWL4030_MAX_ROWS || pdata->cols > TWL4030_MAX_COLS) { + dev_err(&pdev->dev, "Invalid platform_data\n"); + return -EINVAL; + } + + kp = kzalloc(sizeof(*kp), GFP_KERNEL); + input = input_allocate_device(); + if (!kp || !input) { + error = -ENOMEM; + goto err1; + } + + /* Get the debug Device */ + kp->dbg_dev = &pdev->dev; + kp->input = input; + + kp->n_rows = pdata->rows; + kp->n_cols = pdata->cols; + kp->irq = platform_get_irq(pdev, 0); + + /* setup input device */ + __set_bit(EV_KEY, input->evbit); + + /* Enable auto repeat feature of Linux input subsystem */ + if (pdata->rep) + __set_bit(EV_REP, input->evbit); + + input_set_capability(input, EV_MSC, MSC_SCAN); + + input->name = "TWL4030 Keypad"; + input->phys = "twl4030_keypad/input0"; + input->dev.parent = &pdev->dev; + + input->id.bustype = BUS_HOST; + input->id.vendor = 0x0001; + input->id.product = 0x0001; + input->id.version = 0x0003; + + input->keycode = kp->keymap; + input->keycodesize = sizeof(kp->keymap[0]); + input->keycodemax = ARRAY_SIZE(kp->keymap); + + matrix_keypad_build_keymap(keymap_data, TWL4030_ROW_SHIFT, + input->keycode, input->keybit); + + error = input_register_device(input); + if (error) { + dev_err(kp->dbg_dev, + "Unable to register twl4030 keypad device\n"); + goto err1; + } + + error = twl4030_kp_program(kp); + if (error) + goto err2; + + /* + * This ISR will always execute in kernel thread context because of + * the need to access the TWL4030 over the I2C bus. + * + * NOTE: we assume this host is wired to TWL4040 INT1, not INT2 ... + */ + error = request_irq(kp->irq, do_kp_irq, 0, pdev->name, kp); + if (error) { + dev_info(kp->dbg_dev, "request_irq failed for irq no=%d\n", + kp->irq); + goto err3; + } + + /* Enable KP and TO interrupts now. */ + reg = (u8) ~(KEYP_IMR1_KP | KEYP_IMR1_TO); + if (twl4030_kpwrite_u8(kp, reg, KEYP_IMR1)) { + error = -EIO; + goto err4; + } + + platform_set_drvdata(pdev, kp); + return 0; + +err4: + /* mask all events - we don't care about the result */ + (void) twl4030_kpwrite_u8(kp, 0xff, KEYP_IMR1); +err3: + free_irq(kp->irq, NULL); +err2: + input_unregister_device(input); + input = NULL; +err1: + input_free_device(input); + kfree(kp); + return error; +} + +static int __devexit twl4030_kp_remove(struct platform_device *pdev) +{ + struct twl4030_keypad *kp = platform_get_drvdata(pdev); + + free_irq(kp->irq, kp); + input_unregister_device(kp->input); + platform_set_drvdata(pdev, NULL); + kfree(kp); + + return 0; +} + +/* + * NOTE: twl4030 are multi-function devices connected via I2C. + * So this device is a child of an I2C parent, thus it needs to + * support unplug/replug (which most platform devices don't). + */ + +static struct platform_driver twl4030_kp_driver = { + .probe = twl4030_kp_probe, + .remove = __devexit_p(twl4030_kp_remove), + .driver = { + .name = "twl4030_keypad", + .owner = THIS_MODULE, + }, +}; + +static int __init twl4030_kp_init(void) +{ + return platform_driver_register(&twl4030_kp_driver); +} +module_init(twl4030_kp_init); + +static void __exit twl4030_kp_exit(void) +{ + platform_driver_unregister(&twl4030_kp_driver); +} +module_exit(twl4030_kp_exit); + +MODULE_AUTHOR("Texas Instruments"); +MODULE_DESCRIPTION("TWL4030 Keypad Driver"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:twl4030_keypad"); + diff --git a/include/linux/i2c/twl4030.h b/include/linux/i2c/twl4030.h index 0dc80ef24975..3fd21d7cb6bf 100644 --- a/include/linux/i2c/twl4030.h +++ b/include/linux/i2c/twl4030.h @@ -25,6 +25,9 @@ #ifndef __TWL4030_H_ #define __TWL4030_H_ +#include +#include + /* * Using the twl4030 core we address registers using a pair * { module id, relative register offset } @@ -302,13 +305,17 @@ struct twl4030_madc_platform_data { int irq_line; }; +/* Boards have uniqe mappings of {col, row} --> keycode. + * Column and row are 4 bits, but range only from 0..7. + * a PERSISTENT_KEY is "always on" and never reported. + */ +#define PERSISTENT_KEY(c, r) KEY((c), (r), KEY_RESERVED) + struct twl4030_keypad_data { - int rows; - int cols; - int *keymap; - int irq; - unsigned int keymapsize; - unsigned int rep:1; + const struct matrix_keymap_data *keymap_data; + unsigned rows; + unsigned cols; + bool rep; }; enum twl4030_usb_mode { -- cgit v1.2.3 From 468de9e54a900559b55aa939a4daeaea1915e572 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Thu, 27 Aug 2009 12:07:40 -0400 Subject: nfsd41: expand solo sequence check Compounds consisting of only a sequence operation don't need any additional caching beyond the sequence information we store in the slot entry. Fix nfsd4_is_solo_sequence to identify this case correctly. The additional check for a failed sequence in nfsd4_store_cache_entry() is redundant, since the nfsd4_is_solo_sequence call lower down catches this case. The final ce_cachethis set in nfsd4_sequence is also redundant. Signed-off-by: Andy Adamson Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 9 --------- include/linux/nfsd/xdr4.h | 2 +- 2 files changed, 1 insertion(+), 10 deletions(-) (limited to 'include/linux') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 5f634d24861c..b44a2cfde6f1 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -991,16 +991,10 @@ nfsd4_store_cache_entry(struct nfsd4_compoundres *resp) { struct nfsd4_cache_entry *entry = &resp->cstate.slot->sl_cache_entry; struct svc_rqst *rqstp = resp->rqstp; - struct nfsd4_compoundargs *args = rqstp->rq_argp; - struct nfsd4_op *op = &args->ops[resp->opcnt]; struct kvec *resv = &rqstp->rq_res.head[0]; dprintk("--> %s entry %p\n", __func__, entry); - /* Don't cache a failed OP_SEQUENCE. */ - if (resp->opcnt == 1 && op->opnum == OP_SEQUENCE && resp->cstate.status) - return; - nfsd4_release_respages(entry->ce_respages, entry->ce_resused); entry->ce_opcnt = resp->opcnt; entry->ce_status = resp->cstate.status; @@ -1490,9 +1484,6 @@ nfsd4_sequence(struct svc_rqst *rqstp, slot->sl_inuse = true; slot->sl_seqid = seq->seqid; slot->sl_cache_entry.ce_cachethis = seq->cachethis; - /* Always set the cache entry cachethis for solo sequence */ - if (nfsd4_is_solo_sequence(resp)) - slot->sl_cache_entry.ce_cachethis = 1; cstate->slot = slot; cstate->session = session; diff --git a/include/linux/nfsd/xdr4.h b/include/linux/nfsd/xdr4.h index 5e4beb0deb80..3f716607c86d 100644 --- a/include/linux/nfsd/xdr4.h +++ b/include/linux/nfsd/xdr4.h @@ -467,7 +467,7 @@ struct nfsd4_compoundres { static inline bool nfsd4_is_solo_sequence(struct nfsd4_compoundres *resp) { struct nfsd4_compoundargs *args = resp->rqstp->rq_argp; - return args->opcnt == 1; + return resp->opcnt == 1 && args->ops[0].opnum == OP_SEQUENCE; } static inline bool nfsd4_not_cached(struct nfsd4_compoundres *resp) -- cgit v1.2.3 From 7285dd7fd375763bfb8ab1ac9cf3f1206f503c16 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 28 Aug 2009 20:25:24 +0200 Subject: clocksource: Resolve cpu hotplug dead lock with TSC unstable Martin Schwidefsky analyzed it: To register a clocksource the clocksource_mutex is acquired and if necessary timekeeping_notify is called to install the clocksource as the timekeeper clock. timekeeping_notify uses stop_machine which needs to take cpu_add_remove_lock mutex. Starting a new cpu is done with the cpu_add_remove_lock mutex held. native_cpu_up checks the tsc of the new cpu and if the tsc is no good clocksource_change_rating is called. Which needs the clocksource_mutex and the deadlock is complete. The solution is to replace the TSC via the clocksource watchdog mechanism. Mark the TSC as unstable and schedule the watchdog work so it gets removed in the watchdog thread context. Signed-off-by: Thomas Gleixner LKML-Reference: Cc: Martin Schwidefsky Cc: John Stultz --- arch/x86/kernel/tsc.c | 8 +++++--- include/linux/clocksource.h | 1 + kernel/time/clocksource.c | 33 ++++++++++++++++++++++++++++++--- 3 files changed, 36 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 968425422c46..fc3672a303d6 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -767,12 +767,14 @@ void mark_tsc_unstable(char *reason) { if (!tsc_unstable) { tsc_unstable = 1; - printk("Marking TSC unstable due to %s\n", reason); + printk(KERN_INFO "Marking TSC unstable due to %s\n", reason); /* Change only the rating, when not registered */ if (clocksource_tsc.mult) - clocksource_change_rating(&clocksource_tsc, 0); - else + clocksource_mark_unstable(&clocksource_tsc); + else { + clocksource_tsc.flags |= CLOCK_SOURCE_UNSTABLE; clocksource_tsc.rating = 0; + } } } diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 9ea40ff26f0e..83d2fbd81b93 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -277,6 +277,7 @@ extern struct clocksource* clocksource_get_next(void); extern void clocksource_change_rating(struct clocksource *cs, int rating); extern void clocksource_resume(void); extern struct clocksource * __init __weak clocksource_default_clock(void); +extern void clocksource_mark_unstable(struct clocksource *cs); #ifdef CONFIG_GENERIC_TIME_VSYSCALL extern void update_vsyscall(struct timespec *ts, struct clocksource *c); diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index e0c86ad6e9fb..a0af4ffcb6e5 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -149,15 +149,42 @@ static void clocksource_watchdog_work(struct work_struct *work) kthread_run(clocksource_watchdog_kthread, NULL, "kwatchdog"); } -static void clocksource_unstable(struct clocksource *cs, int64_t delta) +static void __clocksource_unstable(struct clocksource *cs) { - printk(KERN_WARNING "Clocksource %s unstable (delta = %Ld ns)\n", - cs->name, delta); cs->flags &= ~(CLOCK_SOURCE_VALID_FOR_HRES | CLOCK_SOURCE_WATCHDOG); cs->flags |= CLOCK_SOURCE_UNSTABLE; schedule_work(&watchdog_work); } +static void clocksource_unstable(struct clocksource *cs, int64_t delta) +{ + printk(KERN_WARNING "Clocksource %s unstable (delta = %Ld ns)\n", + cs->name, delta); + __clocksource_unstable(cs); +} + +/** + * clocksource_mark_unstable - mark clocksource unstable via watchdog + * @cs: clocksource to be marked unstable + * + * This function is called instead of clocksource_change_rating from + * cpu hotplug code to avoid a deadlock between the clocksource mutex + * and the cpu hotplug mutex. It defers the update of the clocksource + * to the watchdog thread. + */ +void clocksource_mark_unstable(struct clocksource *cs) +{ + unsigned long flags; + + spin_lock_irqsave(&watchdog_lock, flags); + if (!(cs->flags & CLOCK_SOURCE_UNSTABLE)) { + if (list_empty(&cs->wd_list)) + list_add(&cs->wd_list, &watchdog_list); + __clocksource_unstable(cs); + } + spin_unlock_irqrestore(&watchdog_lock, flags); +} + static void clocksource_watchdog(unsigned long data) { struct clocksource *cs; -- cgit v1.2.3 From 9e03fdfd05e733e1136d431973625b174029c5e6 Mon Sep 17 00:00:00 2001 From: Javier Cardona Date: Thu, 20 Aug 2009 09:21:45 -0700 Subject: mac80211: Update mesh config IE to 11s draft 3.02 The mesh config information element has changed significantly since draft 1.08 This patch brings it up to date. Thanks to Sam Leffler and Rui Paulo for identifying this. Signed-off-by: Javier Cardona Signed-off-by: John W. Linville --- include/linux/ieee80211.h | 2 +- net/mac80211/ieee80211_i.h | 4 ++++ net/mac80211/mesh.c | 49 ++++++++++++++++++++++++++++++++-------------- 3 files changed, 39 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 21556a2d9e7e..52e15e079c61 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -115,7 +115,7 @@ #define IEEE80211_MAX_SSID_LEN 32 #define IEEE80211_MAX_MESH_ID_LEN 32 -#define IEEE80211_MESH_CONFIG_LEN 19 +#define IEEE80211_MESH_CONFIG_LEN 24 #define IEEE80211_QOS_CTL_LEN 2 #define IEEE80211_QOS_CTL_TID_MASK 0x000F diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 93e618a980d1..455cc7ade9f5 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -367,6 +367,10 @@ struct ieee80211_if_mesh { u8 mesh_pm_id[4]; /* Congestion Control Mode Identifier */ u8 mesh_cc_id[4]; + /* Synchronization Protocol Identifier */ + u8 mesh_sp_id[4]; + /* Authentication Protocol Identifier */ + u8 mesh_auth_id[4]; /* Local mesh Destination Sequence Number */ u32 dsn; /* Last used PREQ ID */ diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 3185e18c8214..f7364e56f1ee 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -18,8 +18,11 @@ #define PP_OFFSET 1 /* Path Selection Protocol */ #define PM_OFFSET 5 /* Path Selection Metric */ #define CC_OFFSET 9 /* Congestion Control Mode */ -#define CAPAB_OFFSET 17 -#define ACCEPT_PLINKS 0x80 +#define SP_OFFSET 13 /* Synchronization Protocol */ +#define AUTH_OFFSET 17 /* Authentication Protocol */ +#define CAPAB_OFFSET 22 +#define CAPAB_ACCEPT_PLINKS 0x80 +#define CAPAB_FORWARDING 0x10 #define TMR_RUNNING_HK 0 #define TMR_RUNNING_MP 1 @@ -84,7 +87,9 @@ bool mesh_matches_local(struct ieee802_11_elems *ie, struct ieee80211_sub_if_dat memcmp(ifmsh->mesh_id, ie->mesh_id, ie->mesh_id_len) == 0 && memcmp(ifmsh->mesh_pp_id, ie->mesh_config + PP_OFFSET, 4) == 0 && memcmp(ifmsh->mesh_pm_id, ie->mesh_config + PM_OFFSET, 4) == 0 && - memcmp(ifmsh->mesh_cc_id, ie->mesh_config + CC_OFFSET, 4) == 0) + memcmp(ifmsh->mesh_cc_id, ie->mesh_config + CC_OFFSET, 4) == 0 && + memcmp(ifmsh->mesh_sp_id, ie->mesh_config + SP_OFFSET, 4) == 0 && + memcmp(ifmsh->mesh_auth_id, ie->mesh_config + AUTH_OFFSET, 4) == 0) return true; return false; @@ -97,7 +102,7 @@ bool mesh_matches_local(struct ieee802_11_elems *ie, struct ieee80211_sub_if_dat */ bool mesh_peer_accepts_plinks(struct ieee802_11_elems *ie) { - return (*(ie->mesh_config + CAPAB_OFFSET) & ACCEPT_PLINKS) != 0; + return (*(ie->mesh_config + CAPAB_OFFSET) & CAPAB_ACCEPT_PLINKS) != 0; } /** @@ -123,11 +128,18 @@ void mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata) void mesh_ids_set_default(struct ieee80211_if_mesh *sta) { - u8 def_id[4] = {0x00, 0x0F, 0xAC, 0xff}; - - memcpy(sta->mesh_pp_id, def_id, 4); - memcpy(sta->mesh_pm_id, def_id, 4); - memcpy(sta->mesh_cc_id, def_id, 4); + u8 oui[3] = {0x00, 0x0F, 0xAC}; + + memcpy(sta->mesh_pp_id, oui, sizeof(oui)); + memcpy(sta->mesh_pm_id, oui, sizeof(oui)); + memcpy(sta->mesh_cc_id, oui, sizeof(oui)); + memcpy(sta->mesh_sp_id, oui, sizeof(oui)); + memcpy(sta->mesh_auth_id, oui, sizeof(oui)); + sta->mesh_pp_id[sizeof(oui)] = 0; + sta->mesh_pm_id[sizeof(oui)] = 0; + sta->mesh_cc_id[sizeof(oui)] = 0xff; + sta->mesh_sp_id[sizeof(oui)] = 0xff; + sta->mesh_auth_id[sizeof(oui)] = 0x0; } int mesh_rmc_init(struct ieee80211_sub_if_data *sdata) @@ -245,7 +257,7 @@ void mesh_mgmt_ies_add(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata) if (sdata->u.mesh.mesh_id_len) memcpy(pos, sdata->u.mesh.mesh_id, sdata->u.mesh.mesh_id_len); - pos = skb_put(skb, 21); + pos = skb_put(skb, 2 + IEEE80211_MESH_CONFIG_LEN); *pos++ = WLAN_EID_MESH_CONFIG; *pos++ = IEEE80211_MESH_CONFIG_LEN; /* Version */ @@ -263,15 +275,22 @@ void mesh_mgmt_ies_add(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata) memcpy(pos, sdata->u.mesh.mesh_cc_id, 4); pos += 4; - /* Channel precedence: - * Not running simple channel unification protocol - */ - memset(pos, 0x00, 4); + /* Synchronization protocol identifier */ + memcpy(pos, sdata->u.mesh.mesh_sp_id, 4); pos += 4; + /* Authentication Protocol identifier */ + memcpy(pos, sdata->u.mesh.mesh_auth_id, 4); + pos += 4; + + /* Mesh Formation Info */ + memset(pos, 0x00, 1); + pos += 1; + /* Mesh capability */ sdata->u.mesh.accepting_plinks = mesh_plink_availables(sdata); - *pos++ = sdata->u.mesh.accepting_plinks ? ACCEPT_PLINKS : 0x00; + *pos = CAPAB_FORWARDING; + *pos++ |= sdata->u.mesh.accepting_plinks ? CAPAB_ACCEPT_PLINKS : 0x00; *pos++ = 0x00; return; -- cgit v1.2.3 From 06e4da268c0e8f3b8408403d65e47d2885a78ff2 Mon Sep 17 00:00:00 2001 From: Gábor Stefanik Date: Wed, 26 Aug 2009 20:51:26 +0200 Subject: ssb: Implement PMU LDO control and use it in b43 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement the "PMU LDO set voltage" and "PMU LDO PA ref enable" functions, and use them during LP-PHY baseband init in b43. Signed-off-by: Gábor Stefanik Signed-off-by: John W. Linville --- drivers/net/wireless/b43/phy_lp.c | 10 +--- drivers/ssb/driver_chipcommon_pmu.c | 94 +++++++++++++++++++++++++++++++ include/linux/ssb/ssb_driver_chipcommon.h | 10 ++++ 3 files changed, 107 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/wireless/b43/phy_lp.c b/drivers/net/wireless/b43/phy_lp.c index 1a57d3390e92..80f245c04042 100644 --- a/drivers/net/wireless/b43/phy_lp.c +++ b/drivers/net/wireless/b43/phy_lp.c @@ -234,19 +234,15 @@ static void lpphy_baseband_rev0_1_init(struct b43_wldev *dev) if ((bus->sprom.boardflags_lo & B43_BFL_FEM) && ((b43_current_band(dev->wl) == IEEE80211_BAND_5GHZ) || (bus->sprom.boardflags_hi & B43_BFH_PAREF))) { - /* TODO: - * Set the LDO voltage to 0x0028 - FIXME: What is this? - * Call sb_pmu_set_ldo_voltage with 4 and the LDO voltage - * as arguments - * Call sb_pmu_paref_ldo_enable with argument TRUE - */ + ssb_pmu_set_ldo_voltage(&bus->chipco, LDO_PAREF, 0x28); + ssb_pmu_set_ldo_paref(&bus->chipco, true); if (dev->phy.rev == 0) { b43_phy_maskset(dev, B43_LPPHY_LP_RF_SIGNAL_LUT, 0xFFCF, 0x0010); } b43_lptab_write(dev, B43_LPTAB16(11, 7), 60); } else { - //TODO: Call ssb_pmu_paref_ldo_enable with argument FALSE + ssb_pmu_set_ldo_paref(&bus->chipco, false); b43_phy_maskset(dev, B43_LPPHY_LP_RF_SIGNAL_LUT, 0xFFCF, 0x0020); b43_lptab_write(dev, B43_LPTAB16(11, 7), 100); diff --git a/drivers/ssb/driver_chipcommon_pmu.c b/drivers/ssb/driver_chipcommon_pmu.c index 4aaddeec55a2..64abd11f6fbb 100644 --- a/drivers/ssb/driver_chipcommon_pmu.c +++ b/drivers/ssb/driver_chipcommon_pmu.c @@ -28,6 +28,21 @@ static void ssb_chipco_pll_write(struct ssb_chipcommon *cc, chipco_write32(cc, SSB_CHIPCO_PLLCTL_DATA, value); } +static void ssb_chipco_regctl_maskset(struct ssb_chipcommon *cc, + u32 offset, u32 mask, u32 set) +{ + u32 value; + + chipco_read32(cc, SSB_CHIPCO_REGCTL_ADDR); + chipco_write32(cc, SSB_CHIPCO_REGCTL_ADDR, offset); + chipco_read32(cc, SSB_CHIPCO_REGCTL_ADDR); + value = chipco_read32(cc, SSB_CHIPCO_REGCTL_DATA); + value &= mask; + value |= set; + chipco_write32(cc, SSB_CHIPCO_REGCTL_DATA, value); + chipco_read32(cc, SSB_CHIPCO_REGCTL_DATA); +} + struct pmu0_plltab_entry { u16 freq; /* Crystal frequency in kHz.*/ u8 xf; /* Crystal frequency value for PMU control */ @@ -506,3 +521,82 @@ void ssb_pmu_init(struct ssb_chipcommon *cc) ssb_pmu_pll_init(cc); ssb_pmu_resources_init(cc); } + +void ssb_pmu_set_ldo_voltage(struct ssb_chipcommon *cc, + enum ssb_pmu_ldo_volt_id id, u32 voltage) +{ + struct ssb_bus *bus = cc->dev->bus; + u32 addr, shift, mask; + + switch (bus->chip_id) { + case 0x4328: + case 0x5354: + switch (id) { + case LDO_VOLT1: + addr = 2; + shift = 25; + mask = 0xF; + break; + case LDO_VOLT2: + addr = 3; + shift = 1; + mask = 0xF; + break; + case LDO_VOLT3: + addr = 3; + shift = 9; + mask = 0xF; + break; + case LDO_PAREF: + addr = 3; + shift = 17; + mask = 0x3F; + break; + default: + SSB_WARN_ON(1); + return; + } + break; + case 0x4312: + if (SSB_WARN_ON(id != LDO_PAREF)) + return; + addr = 0; + shift = 21; + mask = 0x3F; + break; + default: + return; + } + + ssb_chipco_regctl_maskset(cc, addr, ~(mask << shift), + (voltage & mask) << shift); +} + +void ssb_pmu_set_ldo_paref(struct ssb_chipcommon *cc, bool on) +{ + struct ssb_bus *bus = cc->dev->bus; + int ldo; + + switch (bus->chip_id) { + case 0x4312: + ldo = SSB_PMURES_4312_PA_REF_LDO; + break; + case 0x4328: + ldo = SSB_PMURES_4328_PA_REF_LDO; + break; + case 0x5354: + ldo = SSB_PMURES_5354_PA_REF_LDO; + break; + default: + return; + } + + if (on) + chipco_set32(cc, SSB_CHIPCO_PMU_MINRES_MSK, 1 << ldo); + else + chipco_mask32(cc, SSB_CHIPCO_PMU_MINRES_MSK, ~(1 << ldo)); + chipco_read32(cc, SSB_CHIPCO_PMU_MINRES_MSK); //SPEC FIXME found via mmiotrace - dummy read? +} + +EXPORT_SYMBOL(ssb_pmu_set_ldo_voltage); +EXPORT_SYMBOL(ssb_pmu_set_ldo_paref); diff --git a/include/linux/ssb/ssb_driver_chipcommon.h b/include/linux/ssb/ssb_driver_chipcommon.h index d3b1d18922f2..4e27acf0a92f 100644 --- a/include/linux/ssb/ssb_driver_chipcommon.h +++ b/include/linux/ssb/ssb_driver_chipcommon.h @@ -629,5 +629,15 @@ extern int ssb_chipco_serial_init(struct ssb_chipcommon *cc, /* PMU support */ extern void ssb_pmu_init(struct ssb_chipcommon *cc); +enum ssb_pmu_ldo_volt_id { + LDO_PAREF = 0, + LDO_VOLT1, + LDO_VOLT2, + LDO_VOLT3, +}; + +void ssb_pmu_set_ldo_voltage(struct ssb_chipcommon *cc, + enum ssb_pmu_ldo_volt_id id, u32 voltage); +void ssb_pmu_set_ldo_paref(struct ssb_chipcommon *cc, bool on); #endif /* LINUX_SSB_CHIPCO_H_ */ -- cgit v1.2.3 From e55a5999ffcf72dc4d43d73618957964cb87065a Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Tue, 28 Jul 2009 17:41:53 +0800 Subject: ACPI: Handle CONFIG_ACPI=n better from linux/acpi.h linux/acpi.h is the top level header for interfacing with the ACPI sub-system, so acpi_disabled should be up there instead of down in asm/acpi.h -- particularly since asm/acpi.h doesn't exist for all architectures. Same story for acpi_table_parse(), which is a top-level API to Linux/ACPI. This is necessary for building some code that used to always depend on CONFIG_ACPI=y, but will soon also need to build with CONFIG_ACPI=n. Signed-off-by: Feng Tang Signed-off-by: Len Brown --- arch/x86/include/asm/acpi.h | 1 - include/linux/acpi.h | 11 ++++++++++- 2 files changed, 10 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 20d1465a2ab0..4518dc500903 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -144,7 +144,6 @@ static inline unsigned int acpi_processor_cstate_check(unsigned int max_cstate) #else /* !CONFIG_ACPI */ -#define acpi_disabled 1 #define acpi_lapic 0 #define acpi_ioapic 0 static inline void acpi_noirq_set(void) { } diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 34321cfffeab..3fce811bf9ac 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -292,7 +292,10 @@ void __init acpi_s4_no_nvs(void); extern acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 flags); extern void acpi_early_init(void); -#else /* CONFIG_ACPI */ +#else /* !CONFIG_ACPI */ + +#define acpi_disabled 1 + static inline void acpi_early_init(void) { } static inline int early_acpi_boot_init(void) @@ -331,5 +334,11 @@ static inline int acpi_check_mem_region(resource_size_t start, return 0; } +struct acpi_table_header; +static inline int acpi_table_parse(char *id, + int (*handler)(struct acpi_table_header *)) +{ + return -1; +} #endif /* !CONFIG_ACPI */ #endif /*_LINUX_ACPI_H*/ -- cgit v1.2.3 From 117a9ac777f8034d4675b821172d2ff71f6ec47a Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Fri, 14 Aug 2009 15:10:24 -0400 Subject: SFI: create linux/sfi.h include/linux/include/sfi.h defines everything that customers of SFI need to know in order to use the SFI suport in the kernel. The primary API is sfi_table_parse(), where a driver or another part of the kernel can supply a handler to parse the named table. sfi.h also includes the currently defined table signatures and table formats. Signed-off-by: Feng Tang Signed-off-by: Len Brown --- include/linux/sfi.h | 206 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 206 insertions(+) create mode 100644 include/linux/sfi.h (limited to 'include/linux') diff --git a/include/linux/sfi.h b/include/linux/sfi.h new file mode 100644 index 000000000000..9a6f7607174e --- /dev/null +++ b/include/linux/sfi.h @@ -0,0 +1,206 @@ +/* sfi.h Simple Firmware Interface */ + +/* + + This file is provided under a dual BSD/GPLv2 license. When using or + redistributing this file, you may do so under either license. + + GPL LICENSE SUMMARY + + Copyright(c) 2009 Intel Corporation. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + The full GNU General Public License is included in this distribution + in the file called LICENSE.GPL. + + BSD LICENSE + + Copyright(c) 2009 Intel Corporation. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#ifndef _LINUX_SFI_H +#define _LINUX_SFI_H + +/* Table signatures reserved by the SFI specification */ +#define SFI_SIG_SYST "SYST" +#define SFI_SIG_FREQ "FREQ" +#define SFI_SIG_IDLE "IDLE" +#define SFI_SIG_CPUS "CPUS" +#define SFI_SIG_MTMR "MTMR" +#define SFI_SIG_MRTC "MRTC" +#define SFI_SIG_MMAP "MMAP" +#define SFI_SIG_APIC "APIC" +#define SFI_SIG_XSDT "XSDT" +#define SFI_SIG_WAKE "WAKE" +#define SFI_SIG_SPIB "SPIB" +#define SFI_SIG_I2CB "I2CB" +#define SFI_SIG_GPEM "GPEM" + +#define SFI_SIGNATURE_SIZE 4 +#define SFI_OEM_ID_SIZE 6 +#define SFI_OEM_TABLE_ID_SIZE 8 + +#define SFI_SYST_SEARCH_BEGIN 0x000E0000 +#define SFI_SYST_SEARCH_END 0x000FFFFF + +#define SFI_GET_NUM_ENTRIES(ptable, entry_type) \ + ((ptable->header.len - sizeof(struct sfi_table_header)) / \ + (sizeof(entry_type))) +/* + * Table structures must be byte-packed to match the SFI specification, + * as they are provided by the BIOS. + */ +struct sfi_table_header { + char sig[SFI_SIGNATURE_SIZE]; + u32 len; + u8 rev; + u8 csum; + char oem_id[SFI_OEM_ID_SIZE]; + char oem_table_id[SFI_OEM_TABLE_ID_SIZE]; +} __packed; + +struct sfi_table_simple { + struct sfi_table_header header; + u64 pentry[1]; +} __packed; + +/* Comply with UEFI spec 2.1 */ +struct sfi_mem_entry { + u32 type; + u64 phys_start; + u64 virt_start; + u64 pages; + u64 attrib; +} __packed; + +struct sfi_cpu_table_entry { + u32 apic_id; +} __packed; + +struct sfi_cstate_table_entry { + u32 hint; /* MWAIT hint */ + u32 latency; /* latency in ms */ +} __packed; + +struct sfi_apic_table_entry { + u64 phys_addr; /* phy base addr for APIC reg */ +} __packed; + +struct sfi_freq_table_entry { + u32 freq_mhz; /* in MHZ */ + u32 latency; /* transition latency in ms */ + u32 ctrl_val; /* value to write to PERF_CTL */ +} __packed; + +struct sfi_wake_table_entry { + u64 phys_addr; /* pointer to where the wake vector locates */ +} __packed; + +struct sfi_timer_table_entry { + u64 phys_addr; /* phy base addr for the timer */ + u32 freq_hz; /* in HZ */ + u32 irq; +} __packed; + +struct sfi_rtc_table_entry { + u64 phys_addr; /* phy base addr for the RTC */ + u32 irq; +} __packed; + +struct sfi_spi_table_entry { + u16 host_num; /* attached to host 0, 1...*/ + u16 cs; /* chip select */ + u16 irq_info; + char name[16]; + u8 dev_info[10]; +} __packed; + +struct sfi_i2c_table_entry { + u16 host_num; + u16 addr; /* slave addr */ + u16 irq_info; + char name[16]; + u8 dev_info[10]; +} __packed; + +struct sfi_gpe_table_entry { + u16 logical_id; /* logical id */ + u16 phys_id; /* physical GPE id */ +} __packed; + + +typedef int (*sfi_table_handler) (struct sfi_table_header *table); + +#ifdef CONFIG_SFI +extern void __init sfi_init(void); +extern int __init sfi_platform_init(void); +extern void __init sfi_init_late(void); +extern int sfi_table_parse(char *signature, char *oem_id, char *oem_table_id, + sfi_table_handler handler); + +extern int sfi_disabled; +static inline void disable_sfi(void) +{ + sfi_disabled = 1; +} + +#else /* !CONFIG_SFI */ + +static inline void sfi_init(void) +{ +} + +static inline void sfi_init_late(void) +{ +} + +#define sfi_disabled 0 + +static inline int sfi_table_parse(char *signature, char *oem_id, + char *oem_table_id, + sfi_table_handler handler) +{ + return -1; +} + +#endif /* !CONFIG_SFI */ + +#endif /*_LINUX_SFI_H*/ -- cgit v1.2.3 From 13e82d023c4c3f13ab1e665cbb917a7ebba8935c Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Fri, 14 Aug 2009 15:17:53 -0400 Subject: SFI: add capability to parse ACPI tables Extend SFI to access standard ACPI tables. (eg. the PCI MCFG) using sfi_acpi_table_parse(). Note that this is _not_ a hybrid ACPI + SFI mode. The platform boots in either ACPI mode or SFI mode. SFI runs only with acpi_disabled=1, which can be set at build-time via CONFIG_ACPI=n, or at boot time by the failure to find ACPI platform support. So this extension simply allows SFI-platforms to re-use existing standard table formats that happen to be defined to live in ACPI envelopes. Signed-off-by: Feng Tang Signed-off-by: Len Brown --- drivers/sfi/sfi_acpi.c | 175 +++++++++++++++++++++++++++++++++++++++++++++++ include/linux/sfi_acpi.h | 93 +++++++++++++++++++++++++ 2 files changed, 268 insertions(+) create mode 100644 drivers/sfi/sfi_acpi.c create mode 100644 include/linux/sfi_acpi.h (limited to 'include/linux') diff --git a/drivers/sfi/sfi_acpi.c b/drivers/sfi/sfi_acpi.c new file mode 100644 index 000000000000..34aba30eb84b --- /dev/null +++ b/drivers/sfi/sfi_acpi.c @@ -0,0 +1,175 @@ +/* sfi_acpi.c Simple Firmware Interface - ACPI extensions */ + +/* + + This file is provided under a dual BSD/GPLv2 license. When using or + redistributing this file, you may do so under either license. + + GPL LICENSE SUMMARY + + Copyright(c) 2009 Intel Corporation. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + The full GNU General Public License is included in this distribution + in the file called LICENSE.GPL. + + BSD LICENSE + + Copyright(c) 2009 Intel Corporation. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#define KMSG_COMPONENT "SFI" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include +#include + +#include +#include "sfi_core.h" + +/* + * SFI can access ACPI-defined tables via an optional ACPI XSDT. + * + * This allows re-use, and avoids re-definition, of standard tables. + * For example, the "MCFG" table is defined by PCI, reserved by ACPI, + * and is expected to be present many SFI-only systems. + */ + +static struct acpi_table_xsdt *xsdt_va __read_mostly; + +#define XSDT_GET_NUM_ENTRIES(ptable, entry_type) \ + ((ptable->header.length - sizeof(struct acpi_table_header)) / \ + (sizeof(entry_type))) + +static inline struct sfi_table_header *acpi_to_sfi_th( + struct acpi_table_header *th) +{ + return (struct sfi_table_header *)th; +} + +static inline struct acpi_table_header *sfi_to_acpi_th( + struct sfi_table_header *th) +{ + return (struct acpi_table_header *)th; +} + +/* + * sfi_acpi_parse_xsdt() + * + * Parse the ACPI XSDT for later access by sfi_acpi_table_parse(). + */ +static int __init sfi_acpi_parse_xsdt(struct sfi_table_header *th) +{ + struct sfi_table_key key = SFI_ANY_KEY; + int tbl_cnt, i; + void *ret; + + xsdt_va = (struct acpi_table_xsdt *)th; + tbl_cnt = XSDT_GET_NUM_ENTRIES(xsdt_va, u64); + for (i = 0; i < tbl_cnt; i++) { + ret = sfi_check_table(xsdt_va->table_offset_entry[i], &key); + if (IS_ERR(ret)) { + disable_sfi(); + return -1; + } + } + + return 0; +} + +int __init sfi_acpi_init(void) +{ + struct sfi_table_key xsdt_key = { .sig = SFI_SIG_XSDT }; + + sfi_table_parse(SFI_SIG_XSDT, NULL, NULL, sfi_acpi_parse_xsdt); + + /* Only call the get_table to keep the table mapped */ + xsdt_va = (struct acpi_table_xsdt *)sfi_get_table(&xsdt_key); + return 0; +} + +static struct acpi_table_header *sfi_acpi_get_table(struct sfi_table_key *key) +{ + u32 tbl_cnt, i; + void *ret; + + tbl_cnt = XSDT_GET_NUM_ENTRIES(xsdt_va, u64); + for (i = 0; i < tbl_cnt; i++) { + ret = sfi_check_table(xsdt_va->table_offset_entry[i], key); + if (!IS_ERR(ret) && ret) + return sfi_to_acpi_th(ret); + } + + return NULL; +} + +static void sfi_acpi_put_table(struct acpi_table_header *table) +{ + sfi_put_table(acpi_to_sfi_th(table)); +} + +/* + * sfi_acpi_table_parse() + * + * Find specified table in XSDT, run handler on it and return its return value + */ +int sfi_acpi_table_parse(char *signature, char *oem_id, char *oem_table_id, + int(*handler)(struct acpi_table_header *)) +{ + struct acpi_table_header *table = NULL; + struct sfi_table_key key; + int ret = 0; + + if (sfi_disabled) + return -1; + + key.sig = signature; + key.oem_id = oem_id; + key.oem_table_id = oem_table_id; + + table = sfi_acpi_get_table(&key); + if (!table) + return -EINVAL; + + ret = handler(table); + sfi_acpi_put_table(table); + return ret; +} diff --git a/include/linux/sfi_acpi.h b/include/linux/sfi_acpi.h new file mode 100644 index 000000000000..c4a5a8cd4469 --- /dev/null +++ b/include/linux/sfi_acpi.h @@ -0,0 +1,93 @@ +/* sfi.h Simple Firmware Interface */ + +/* + + This file is provided under a dual BSD/GPLv2 license. When using or + redistributing this file, you may do so under either license. + + GPL LICENSE SUMMARY + + Copyright(c) 2009 Intel Corporation. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of version 2 of the GNU General Public License as + published by the Free Software Foundation. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + The full GNU General Public License is included in this distribution + in the file called LICENSE.GPL. + + BSD LICENSE + + Copyright(c) 2009 Intel Corporation. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#ifndef _LINUX_SFI_ACPI_H +#define _LINUX_SFI_ACPI_H + +#ifdef CONFIG_SFI +#include /* struct acpi_table_header */ + +extern int sfi_acpi_table_parse(char *signature, char *oem_id, + char *oem_table_id, + int (*handler)(struct acpi_table_header *)); + +static inline int acpi_sfi_table_parse(char *signature, + int (*handler)(struct acpi_table_header *)) +{ + if (!acpi_table_parse(signature, handler)) + return 0; + + return sfi_acpi_table_parse(signature, NULL, NULL, handler); +} +#else /* !CONFIG_SFI */ + +static inline int sfi_acpi_table_parse(char *signature, char *oem_id, + char *oem_table_id, + int (*handler)(struct acpi_table_header *)) +{ + return -1; +} + +static inline int acpi_sfi_table_parse(char *signature, + int (*handler)(struct acpi_table_header *)) +{ + return acpi_table_parse(signature, handler); +} +#endif /* !CONFIG_SFI */ + +#endif /*_LINUX_SFI_ACPI_H*/ -- cgit v1.2.3 From dd5d19bafd90d33043a4a14b2e2d98612caa293c Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 27 Aug 2009 14:58:16 -0700 Subject: rcu: Create rcutree plugins to handle hotplug CPU for multi-level trees When offlining CPUs from a multi-level tree, there is the possibility of offlining the last CPU from a given node when there are preempted RCU read-side critical sections that started life on one of the CPUs on that node. In this case, the corresponding tasks will be enqueued via the task_struct's rcu_node_entry list_head onto one of the rcu_node's blocked_tasks[] lists. These tasks need to be moved somewhere else so that they will prevent the current grace period from ending. That somewhere is the root rcu_node. Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: akpm@linux-foundation.org Cc: mathieu.desnoyers@polymtl.ca Cc: josht@linux.vnet.ibm.com Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org LKML-Reference: <20090827215816.GA30472@linux.vnet.ibm.com> Signed-off-by: Ingo Molnar --- include/linux/init_task.h | 2 +- include/linux/sched.h | 4 +-- kernel/rcutree.c | 2 ++ kernel/rcutree_plugin.h | 69 +++++++++++++++++++++++++++++++++++++++++++---- 4 files changed, 69 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 79d4baee31b6..9e7f2e8fc66e 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -98,7 +98,7 @@ extern struct group_info init_groups; #define INIT_TASK_RCU_PREEMPT(tsk) \ .rcu_read_lock_nesting = 0, \ .rcu_read_unlock_special = 0, \ - .rcu_blocked_cpu = -1, \ + .rcu_blocked_node = NULL, \ .rcu_node_entry = LIST_HEAD_INIT(tsk.rcu_node_entry), #else #define INIT_TASK_RCU_PREEMPT(tsk) diff --git a/include/linux/sched.h b/include/linux/sched.h index bfca26d63b13..3fe03151a8e6 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1208,7 +1208,7 @@ struct task_struct { #ifdef CONFIG_TREE_PREEMPT_RCU int rcu_read_lock_nesting; char rcu_read_unlock_special; - int rcu_blocked_cpu; + void *rcu_blocked_node; struct list_head rcu_node_entry; #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ @@ -1735,7 +1735,7 @@ static inline void rcu_copy_process(struct task_struct *p) { p->rcu_read_lock_nesting = 0; p->rcu_read_unlock_special = 0; - p->rcu_blocked_cpu = -1; + p->rcu_blocked_node = NULL; INIT_LIST_HEAD(&p->rcu_node_entry); } diff --git a/kernel/rcutree.c b/kernel/rcutree.c index fee6316a8673..d903e2f2b840 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -81,6 +81,7 @@ struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state); DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); extern long rcu_batches_completed_sched(void); +static struct rcu_node *rcu_get_root(struct rcu_state *rsp); static void cpu_quiet_msk(unsigned long mask, struct rcu_state *rsp, struct rcu_node *rnp, unsigned long flags); static void cpu_quiet_msk_finish(struct rcu_state *rsp, unsigned long flags); @@ -876,6 +877,7 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) spin_unlock(&rnp->lock); /* irqs remain disabled. */ break; } + rcu_preempt_offline_tasks(rsp, rnp); mask = rnp->grpmask; spin_unlock(&rnp->lock); /* irqs remain disabled. */ rnp = rnp->parent; diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 201334cdc200..04343bee646d 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -92,7 +92,7 @@ static void rcu_preempt_qs(int cpu) rnp = rdp->mynode; spin_lock(&rnp->lock); t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED; - t->rcu_blocked_cpu = cpu; + t->rcu_blocked_node = (void *)rnp; /* * If this CPU has already checked in, then this task @@ -170,12 +170,21 @@ static void rcu_read_unlock_special(struct task_struct *t) if (special & RCU_READ_UNLOCK_BLOCKED) { t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BLOCKED; - /* Remove this task from the list it blocked on. */ - rnp = rcu_preempt_state.rda[t->rcu_blocked_cpu]->mynode; - spin_lock(&rnp->lock); + /* + * Remove this task from the list it blocked on. The + * task can migrate while we acquire the lock, but at + * most one time. So at most two passes through loop. + */ + for (;;) { + rnp = (struct rcu_node *)t->rcu_blocked_node; + spin_lock(&rnp->lock); + if (rnp == (struct rcu_node *)t->rcu_blocked_node) + break; + spin_unlock(&rnp->lock); + } empty = list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1]); list_del_init(&t->rcu_node_entry); - t->rcu_blocked_cpu = -1; + t->rcu_blocked_node = NULL; /* * If this was the last task on the current list, and if @@ -261,6 +270,47 @@ static int rcu_preempted_readers(struct rcu_node *rnp) #ifdef CONFIG_HOTPLUG_CPU +/* + * Handle tasklist migration for case in which all CPUs covered by the + * specified rcu_node have gone offline. Move them up to the root + * rcu_node. The reason for not just moving them to the immediate + * parent is to remove the need for rcu_read_unlock_special() to + * make more than two attempts to acquire the target rcu_node's lock. + * + * The caller must hold rnp->lock with irqs disabled. + */ +static void rcu_preempt_offline_tasks(struct rcu_state *rsp, + struct rcu_node *rnp) +{ + int i; + struct list_head *lp; + struct list_head *lp_root; + struct rcu_node *rnp_root = rcu_get_root(rsp); + struct task_struct *tp; + + if (rnp == rnp_root) + return; /* Shouldn't happen: at least one CPU online. */ + + /* + * Move tasks up to root rcu_node. Rely on the fact that the + * root rcu_node can be at most one ahead of the rest of the + * rcu_nodes in terms of gp_num value. This fact allows us to + * move the blocked_tasks[] array directly, element by element. + */ + for (i = 0; i < 2; i++) { + lp = &rnp->blocked_tasks[i]; + lp_root = &rnp_root->blocked_tasks[i]; + while (!list_empty(lp)) { + tp = list_entry(lp->next, typeof(*tp), rcu_node_entry); + spin_lock(&rnp_root->lock); /* irqs already disabled */ + list_del(&tp->rcu_node_entry); + tp->rcu_blocked_node = rnp_root; + list_add(&tp->rcu_node_entry, lp_root); + spin_unlock(&rnp_root->lock); /* irqs remain disabled */ + } + } +} + /* * Do CPU-offline processing for preemptable RCU. */ @@ -409,6 +459,15 @@ static int rcu_preempted_readers(struct rcu_node *rnp) #ifdef CONFIG_HOTPLUG_CPU +/* + * Because preemptable RCU does not exist, it never needs to migrate + * tasks that were blocked within RCU read-side critical sections. + */ +static void rcu_preempt_offline_tasks(struct rcu_state *rsp, + struct rcu_node *rnp) +{ +} + /* * Because preemptable RCU does not exist, it never needs CPU-offline * processing. -- cgit v1.2.3 From 868489660dabc0c28087cca3dbc1adbbc398c6fe Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 27 Aug 2009 15:00:12 -0700 Subject: rcu: Changes from reviews: avoid casts, fix/add warnings, improve comments Changes suggested by review comments from Josh Triplett and Mathieu Desnoyers. Signed-off-by: Paul E. McKenney Acked-by: Josh Triplett Acked-by: Mathieu Desnoyers Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: akpm@linux-foundation.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org LKML-Reference: <20090827220012.GA30525@linux.vnet.ibm.com> Signed-off-by: Ingo Molnar --- include/linux/sched.h | 4 +++- kernel/rcutree.c | 13 ++++++------- kernel/rcutree.h | 2 ++ kernel/rcutree_plugin.h | 10 ++++++---- 4 files changed, 17 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 3fe03151a8e6..855fd0d3f174 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1163,6 +1163,8 @@ struct sched_rt_entity { #endif }; +struct rcu_node; + struct task_struct { volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ void *stack; @@ -1208,7 +1210,7 @@ struct task_struct { #ifdef CONFIG_TREE_PREEMPT_RCU int rcu_read_lock_nesting; char rcu_read_unlock_special; - void *rcu_blocked_node; + struct rcu_node *rcu_blocked_node; struct list_head rcu_node_entry; #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ diff --git a/kernel/rcutree.c b/kernel/rcutree.c index d903e2f2b840..71bc79791cd9 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -229,7 +229,6 @@ static int rcu_implicit_offline_qs(struct rcu_data *rdp) #endif /* #ifdef CONFIG_SMP */ #ifdef CONFIG_NO_HZ -static DEFINE_RATELIMIT_STATE(rcu_rs, 10 * HZ, 5); /** * rcu_enter_nohz - inform RCU that current CPU is entering nohz @@ -249,7 +248,7 @@ void rcu_enter_nohz(void) rdtp = &__get_cpu_var(rcu_dynticks); rdtp->dynticks++; rdtp->dynticks_nesting--; - WARN_ON_RATELIMIT(rdtp->dynticks & 0x1, &rcu_rs); + WARN_ON_ONCE(rdtp->dynticks & 0x1); local_irq_restore(flags); } @@ -268,7 +267,7 @@ void rcu_exit_nohz(void) rdtp = &__get_cpu_var(rcu_dynticks); rdtp->dynticks++; rdtp->dynticks_nesting++; - WARN_ON_RATELIMIT(!(rdtp->dynticks & 0x1), &rcu_rs); + WARN_ON_ONCE(!(rdtp->dynticks & 0x1)); local_irq_restore(flags); smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */ } @@ -287,7 +286,7 @@ void rcu_nmi_enter(void) if (rdtp->dynticks & 0x1) return; rdtp->dynticks_nmi++; - WARN_ON_RATELIMIT(!(rdtp->dynticks_nmi & 0x1), &rcu_rs); + WARN_ON_ONCE(!(rdtp->dynticks_nmi & 0x1)); smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */ } @@ -306,7 +305,7 @@ void rcu_nmi_exit(void) return; smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */ rdtp->dynticks_nmi++; - WARN_ON_RATELIMIT(rdtp->dynticks_nmi & 0x1, &rcu_rs); + WARN_ON_ONCE(rdtp->dynticks_nmi & 0x1); } /** @@ -322,7 +321,7 @@ void rcu_irq_enter(void) if (rdtp->dynticks_nesting++) return; rdtp->dynticks++; - WARN_ON_RATELIMIT(!(rdtp->dynticks & 0x1), &rcu_rs); + WARN_ON_ONCE(!(rdtp->dynticks & 0x1)); smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */ } @@ -341,7 +340,7 @@ void rcu_irq_exit(void) return; smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */ rdtp->dynticks++; - WARN_ON_RATELIMIT(rdtp->dynticks & 0x1, &rcu_rs); + WARN_ON_ONCE(rdtp->dynticks & 0x1); /* If the interrupt queued a callback, get out of dyntick mode. */ if (__get_cpu_var(rcu_sched_data).nxtlist || diff --git a/kernel/rcutree.h b/kernel/rcutree.h index ca560364d8cd..bf8a6f9f134d 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -81,6 +81,8 @@ struct rcu_dynticks { struct rcu_node { spinlock_t lock; long gpnum; /* Current grace period for this node. */ + /* This will either be equal to or one */ + /* behind the root rcu_node's gpnum. */ unsigned long qsmask; /* CPUs or groups that need to switch in */ /* order for current grace period to proceed.*/ unsigned long qsmaskinit; diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 04343bee646d..47789369ea59 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -92,7 +92,7 @@ static void rcu_preempt_qs(int cpu) rnp = rdp->mynode; spin_lock(&rnp->lock); t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED; - t->rcu_blocked_node = (void *)rnp; + t->rcu_blocked_node = rnp; /* * If this CPU has already checked in, then this task @@ -176,9 +176,9 @@ static void rcu_read_unlock_special(struct task_struct *t) * most one time. So at most two passes through loop. */ for (;;) { - rnp = (struct rcu_node *)t->rcu_blocked_node; + rnp = t->rcu_blocked_node; spin_lock(&rnp->lock); - if (rnp == (struct rcu_node *)t->rcu_blocked_node) + if (rnp == t->rcu_blocked_node) break; spin_unlock(&rnp->lock); } @@ -288,8 +288,10 @@ static void rcu_preempt_offline_tasks(struct rcu_state *rsp, struct rcu_node *rnp_root = rcu_get_root(rsp); struct task_struct *tp; - if (rnp == rnp_root) + if (rnp == rnp_root) { + WARN_ONCE(1, "Last CPU thought to be offlined?"); return; /* Shouldn't happen: at least one CPU online. */ + } /* * Move tasks up to root rcu_node. Rely on the fact that the -- cgit v1.2.3 From 5bfb5b51382f4bd01d9ced11503264d2cc74fe41 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 26 Aug 2009 16:20:48 -0700 Subject: irq: Add irq_node() primitive ... to return irq_desc node info without #ifdefs at the callsites. Signed-off-by: Yinghai Lu Cc: Andrew Morton Cc: Jesse Barnes LKML-Reference: <4A95C350.8060308@kernel.org> Signed-off-by: Ingo Molnar --- include/linux/irqnr.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irqnr.h b/include/linux/irqnr.h index ec87b212ff7d..7bf89bc8cbca 100644 --- a/include/linux/irqnr.h +++ b/include/linux/irqnr.h @@ -41,6 +41,12 @@ extern struct irq_desc *irq_to_desc(unsigned int irq); ; \ else +#ifdef CONFIG_SMP +#define irq_node(irq) (irq_to_desc(irq)->node) +#else +#define irq_node(irq) 0 +#endif + #endif /* CONFIG_GENERIC_HARDIRQS */ #define for_each_irq_nr(irq) \ -- cgit v1.2.3 From 2befdcea96fcd9a13e94373c66ea1dd7365d2a74 Mon Sep 17 00:00:00 2001 From: Matt Carlson Date: Fri, 28 Aug 2009 12:28:45 +0000 Subject: tg3: Add new 5785 10/100 only device ID This patch adds a new device ID for those 5785 devices that will only use 10/100 phys. Signed-off-by: Matt Carlson Signed-off-by: David S. Miller --- drivers/net/tg3.c | 3 ++- drivers/net/tg3.h | 2 ++ include/linux/pci_ids.h | 1 - 3 files changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index ab3159ef4c56..d43b30b80f1e 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c @@ -219,7 +219,8 @@ static struct pci_device_id tg3_pci_tbl[] = { {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5761E)}, {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, TG3PCI_DEVICE_TIGON3_5761S)}, {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, TG3PCI_DEVICE_TIGON3_5761SE)}, - {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5785)}, + {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, TG3PCI_DEVICE_TIGON3_5785_G)}, + {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, TG3PCI_DEVICE_TIGON3_5785_F)}, {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, TG3PCI_DEVICE_TIGON3_57780)}, {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, TG3PCI_DEVICE_TIGON3_57760)}, {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, TG3PCI_DEVICE_TIGON3_57790)}, diff --git a/drivers/net/tg3.h b/drivers/net/tg3.h index 60b12ab79334..1c9495df67ce 100644 --- a/drivers/net/tg3.h +++ b/drivers/net/tg3.h @@ -44,6 +44,8 @@ #define TG3PCI_DEVICE_TIGON3_57760 0x1690 #define TG3PCI_DEVICE_TIGON3_57790 0x1694 #define TG3PCI_DEVICE_TIGON3_57788 0x1691 +#define TG3PCI_DEVICE_TIGON3_5785_G 0x1699 /* GPHY */ +#define TG3PCI_DEVICE_TIGON3_5785_F 0x16a0 /* 10/100 only */ /* 0x04 --> 0x64 unused */ #define TG3PCI_MSI_DATA 0x00000064 /* 0x66 --> 0x68 unused */ diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index fdc3110c90c0..85492546d33d 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2064,7 +2064,6 @@ #define PCI_DEVICE_ID_TIGON3_5787M 0x1693 #define PCI_DEVICE_ID_TIGON3_5782 0x1696 #define PCI_DEVICE_ID_TIGON3_5784 0x1698 -#define PCI_DEVICE_ID_TIGON3_5785 0x1699 #define PCI_DEVICE_ID_TIGON3_5786 0x169a #define PCI_DEVICE_ID_TIGON3_5787 0x169b #define PCI_DEVICE_ID_TIGON3_5788 0x169c -- cgit v1.2.3 From acdfcd04d9df7d084ff752f82afad6ed4ad5f363 Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Fri, 28 Aug 2009 14:28:54 +0300 Subject: SLUB: fix ARCH_KMALLOC_MINALIGN cases 64 and 256 If the minalign is 64 bytes, then the 96 byte cache should not be created because it would conflict with the 128 byte cache. If the minalign is 256 bytes, patching the size_index table should not result in a buffer overrun. The calculation "(i - 1) / 8" used to access size_index[] is moved to a separate function as suggested by Christoph Lameter. Acked-by: Christoph Lameter Signed-off-by: Aaro Koskinen Signed-off-by: Pekka Enberg --- include/linux/slub_def.h | 6 ++---- mm/slub.c | 30 ++++++++++++++++++++++++------ 2 files changed, 26 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 4dcbc2c71491..aa5d4a69d461 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -152,12 +152,10 @@ static __always_inline int kmalloc_index(size_t size) if (size <= KMALLOC_MIN_SIZE) return KMALLOC_SHIFT_LOW; -#if KMALLOC_MIN_SIZE <= 64 - if (size > 64 && size <= 96) + if (KMALLOC_MIN_SIZE <= 32 && size > 64 && size <= 96) return 1; - if (size > 128 && size <= 192) + if (KMALLOC_MIN_SIZE <= 64 && size > 128 && size <= 192) return 2; -#endif if (size <= 8) return 3; if (size <= 16) return 4; if (size <= 32) return 5; diff --git a/mm/slub.c b/mm/slub.c index e16c9fb1f48b..be493bd63c31 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2825,6 +2825,11 @@ static s8 size_index[24] = { 2 /* 192 */ }; +static inline int size_index_elem(size_t bytes) +{ + return (bytes - 1) / 8; +} + static struct kmem_cache *get_slab(size_t size, gfp_t flags) { int index; @@ -2833,7 +2838,7 @@ static struct kmem_cache *get_slab(size_t size, gfp_t flags) if (!size) return ZERO_SIZE_PTR; - index = size_index[(size - 1) / 8]; + index = size_index[size_index_elem(size)]; } else index = fls(size - 1); @@ -3188,10 +3193,12 @@ void __init kmem_cache_init(void) slab_state = PARTIAL; /* Caches that are not of the two-to-the-power-of size */ - if (KMALLOC_MIN_SIZE <= 64) { + if (KMALLOC_MIN_SIZE <= 32) { create_kmalloc_cache(&kmalloc_caches[1], "kmalloc-96", 96, GFP_NOWAIT); caches++; + } + if (KMALLOC_MIN_SIZE <= 64) { create_kmalloc_cache(&kmalloc_caches[2], "kmalloc-192", 192, GFP_NOWAIT); caches++; @@ -3218,17 +3225,28 @@ void __init kmem_cache_init(void) BUILD_BUG_ON(KMALLOC_MIN_SIZE > 256 || (KMALLOC_MIN_SIZE & (KMALLOC_MIN_SIZE - 1))); - for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) - size_index[(i - 1) / 8] = KMALLOC_SHIFT_LOW; + for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) { + int elem = size_index_elem(i); + if (elem >= ARRAY_SIZE(size_index)) + break; + size_index[elem] = KMALLOC_SHIFT_LOW; + } - if (KMALLOC_MIN_SIZE == 128) { + if (KMALLOC_MIN_SIZE == 64) { + /* + * The 96 byte size cache is not used if the alignment + * is 64 byte. + */ + for (i = 64 + 8; i <= 96; i += 8) + size_index[size_index_elem(i)] = 7; + } else if (KMALLOC_MIN_SIZE == 128) { /* * The 192 byte sized cache is not used if the alignment * is 128 byte. Redirect kmalloc to use the 256 byte cache * instead. */ for (i = 128 + 8; i <= 192; i += 8) - size_index[(i - 1) / 8] = 8; + size_index[size_index_elem(i)] = 8; } slab_state = UP; -- cgit v1.2.3 From e500011ffa191d662ac64d4ada6a5187b3180e16 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 30 Aug 2009 13:19:12 -0700 Subject: timers: Drop a function prototype Drop prototype for non-existent next_timer_interrupt() function. Signed-off-by: Randy Dunlap Cc: akpm LKML-Reference: <4A9ADEC0.70306@oracle.com> Signed-off-by: Thomas Gleixner --- include/linux/timer.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/timer.h b/include/linux/timer.h index be62ec2ebea5..a2d1eb6cb3f0 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -173,11 +173,6 @@ extern int mod_timer_pinned(struct timer_list *timer, unsigned long expires); */ #define NEXT_TIMER_MAX_DELTA ((1UL << 30) - 1) -/* - * Return when the next timer-wheel timeout occurs (in absolute jiffies), - * locks the timer base: - */ -extern unsigned long next_timer_interrupt(void); /* * Return when the next timer-wheel timeout occurs (in absolute jiffies), * locks the timer base and does the comparison against the given -- cgit v1.2.3 From 7b3d3e4fc685a7d7e0b4c207ce24dfbab5689eb0 Mon Sep 17 00:00:00 2001 From: Krishna Kumar Date: Sat, 29 Aug 2009 20:21:21 +0000 Subject: netdevice: Consolidate to use existing macros where available. Patch compiled and 32 simultaneous netperf testing ran fine. Signed-off-by: Krishna Kumar Signed-off-by: David S. Miller --- include/linux/netdevice.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9192cdf5bd2c..60d3aac49ed4 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1257,7 +1257,7 @@ static inline void netif_tx_wake_queue(struct netdev_queue *dev_queue) { #ifdef CONFIG_NETPOLL_TRAP if (netpoll_trap()) { - clear_bit(__QUEUE_STATE_XOFF, &dev_queue->state); + netif_tx_start_queue(dev_queue); return; } #endif @@ -1363,7 +1363,8 @@ static inline int netif_running(const struct net_device *dev) static inline void netif_start_subqueue(struct net_device *dev, u16 queue_index) { struct netdev_queue *txq = netdev_get_tx_queue(dev, queue_index); - clear_bit(__QUEUE_STATE_XOFF, &txq->state); + + netif_tx_start_queue(txq); } /** @@ -1380,7 +1381,7 @@ static inline void netif_stop_subqueue(struct net_device *dev, u16 queue_index) if (netpoll_trap()) return; #endif - set_bit(__QUEUE_STATE_XOFF, &txq->state); + netif_tx_stop_queue(txq); } /** @@ -1394,7 +1395,8 @@ static inline int __netif_subqueue_stopped(const struct net_device *dev, u16 queue_index) { struct netdev_queue *txq = netdev_get_tx_queue(dev, queue_index); - return test_bit(__QUEUE_STATE_XOFF, &txq->state); + + return netif_tx_queue_stopped(txq); } static inline int netif_subqueue_stopped(const struct net_device *dev, @@ -1746,8 +1748,7 @@ static inline void netif_tx_unlock(struct net_device *dev) * force a schedule. */ clear_bit(__QUEUE_STATE_FROZEN, &txq->state); - if (!test_bit(__QUEUE_STATE_XOFF, &txq->state)) - __netif_schedule(txq->qdisc); + netif_schedule_queue(txq); } spin_unlock(&dev->tx_global_lock); } -- cgit v1.2.3 From 69d0ee7377eef808e34ba5542b554ec97244b871 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 31 Aug 2009 14:43:36 +0200 Subject: locking: Move spinlock function bodies to header file Move spinlock function bodies to header file by creating a static inline version of each variant. Use the inline version on the out-of-line code. This shouldn't make any difference besides that the spinlock code can now be used to generate inlined spinlock code. Signed-off-by: Heiko Carstens Acked-by: Arnd Bergmann Acked-by: Peter Zijlstra Cc: Nick Piggin Cc: Martin Schwidefsky Cc: Horst Hartmann Cc: Christian Ehrhardt Cc: Andrew Morton Cc: Linus Torvalds Cc: David Miller Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Geert Uytterhoeven Cc: Roman Zippel Cc: LKML-Reference: <20090831124417.859022429@de.ibm.com> Signed-off-by: Ingo Molnar --- include/linux/spinlock.h | 18 +-- include/linux/spinlock_api_smp.h | 263 +++++++++++++++++++++++++++++++++++++++ kernel/spinlock.c | 174 +++++--------------------- 3 files changed, 300 insertions(+), 155 deletions(-) (limited to 'include/linux') diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index 4be57ab03478..da76a06556bd 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -143,15 +143,6 @@ static inline void smp_mb__after_lock(void) { smp_mb(); } */ #define spin_unlock_wait(lock) __raw_spin_unlock_wait(&(lock)->raw_lock) -/* - * Pull the _spin_*()/_read_*()/_write_*() functions/declarations: - */ -#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) -# include -#else -# include -#endif - #ifdef CONFIG_DEBUG_SPINLOCK extern void _raw_spin_lock(spinlock_t *lock); #define _raw_spin_lock_flags(lock, flags) _raw_spin_lock(lock) @@ -380,4 +371,13 @@ extern int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock); */ #define spin_can_lock(lock) (!spin_is_locked(lock)) +/* + * Pull the _spin_*()/_read_*()/_write_*() functions/declarations: + */ +#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) +# include +#else +# include +#endif + #endif /* __LINUX_SPINLOCK_H */ diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h index d79845d034b5..6b108f5fb149 100644 --- a/include/linux/spinlock_api_smp.h +++ b/include/linux/spinlock_api_smp.h @@ -60,4 +60,267 @@ void __lockfunc _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags) void __lockfunc _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags) __releases(lock); +static inline int __spin_trylock(spinlock_t *lock) +{ + preempt_disable(); + if (_raw_spin_trylock(lock)) { + spin_acquire(&lock->dep_map, 0, 1, _RET_IP_); + return 1; + } + preempt_enable(); + return 0; +} + +static inline int __read_trylock(rwlock_t *lock) +{ + preempt_disable(); + if (_raw_read_trylock(lock)) { + rwlock_acquire_read(&lock->dep_map, 0, 1, _RET_IP_); + return 1; + } + preempt_enable(); + return 0; +} + +static inline int __write_trylock(rwlock_t *lock) +{ + preempt_disable(); + if (_raw_write_trylock(lock)) { + rwlock_acquire(&lock->dep_map, 0, 1, _RET_IP_); + return 1; + } + preempt_enable(); + return 0; +} + +/* + * If lockdep is enabled then we use the non-preemption spin-ops + * even on CONFIG_PREEMPT, because lockdep assumes that interrupts are + * not re-enabled during lock-acquire (which the preempt-spin-ops do): + */ +#if !defined(CONFIG_GENERIC_LOCKBREAK) || defined(CONFIG_DEBUG_LOCK_ALLOC) + +static inline void __read_lock(rwlock_t *lock) +{ + preempt_disable(); + rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_); + LOCK_CONTENDED(lock, _raw_read_trylock, _raw_read_lock); +} + +static inline unsigned long __spin_lock_irqsave(spinlock_t *lock) +{ + unsigned long flags; + + local_irq_save(flags); + preempt_disable(); + spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); + /* + * On lockdep we dont want the hand-coded irq-enable of + * _raw_spin_lock_flags() code, because lockdep assumes + * that interrupts are not re-enabled during lock-acquire: + */ +#ifdef CONFIG_LOCKDEP + LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock); +#else + _raw_spin_lock_flags(lock, &flags); +#endif + return flags; +} + +static inline void __spin_lock_irq(spinlock_t *lock) +{ + local_irq_disable(); + preempt_disable(); + spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); + LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock); +} + +static inline void __spin_lock_bh(spinlock_t *lock) +{ + local_bh_disable(); + preempt_disable(); + spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); + LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock); +} + +static inline unsigned long __read_lock_irqsave(rwlock_t *lock) +{ + unsigned long flags; + + local_irq_save(flags); + preempt_disable(); + rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_); + LOCK_CONTENDED_FLAGS(lock, _raw_read_trylock, _raw_read_lock, + _raw_read_lock_flags, &flags); + return flags; +} + +static inline void __read_lock_irq(rwlock_t *lock) +{ + local_irq_disable(); + preempt_disable(); + rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_); + LOCK_CONTENDED(lock, _raw_read_trylock, _raw_read_lock); +} + +static inline void __read_lock_bh(rwlock_t *lock) +{ + local_bh_disable(); + preempt_disable(); + rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_); + LOCK_CONTENDED(lock, _raw_read_trylock, _raw_read_lock); +} + +static inline unsigned long __write_lock_irqsave(rwlock_t *lock) +{ + unsigned long flags; + + local_irq_save(flags); + preempt_disable(); + rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_); + LOCK_CONTENDED_FLAGS(lock, _raw_write_trylock, _raw_write_lock, + _raw_write_lock_flags, &flags); + return flags; +} + +static inline void __write_lock_irq(rwlock_t *lock) +{ + local_irq_disable(); + preempt_disable(); + rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_); + LOCK_CONTENDED(lock, _raw_write_trylock, _raw_write_lock); +} + +static inline void __write_lock_bh(rwlock_t *lock) +{ + local_bh_disable(); + preempt_disable(); + rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_); + LOCK_CONTENDED(lock, _raw_write_trylock, _raw_write_lock); +} + +static inline void __spin_lock(spinlock_t *lock) +{ + preempt_disable(); + spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); + LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock); +} + +static inline void __write_lock(rwlock_t *lock) +{ + preempt_disable(); + rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_); + LOCK_CONTENDED(lock, _raw_write_trylock, _raw_write_lock); +} + +#endif /* CONFIG_PREEMPT */ + +static inline void __spin_unlock(spinlock_t *lock) +{ + spin_release(&lock->dep_map, 1, _RET_IP_); + _raw_spin_unlock(lock); + preempt_enable(); +} + +static inline void __write_unlock(rwlock_t *lock) +{ + rwlock_release(&lock->dep_map, 1, _RET_IP_); + _raw_write_unlock(lock); + preempt_enable(); +} + +static inline void __read_unlock(rwlock_t *lock) +{ + rwlock_release(&lock->dep_map, 1, _RET_IP_); + _raw_read_unlock(lock); + preempt_enable(); +} + +static inline void __spin_unlock_irqrestore(spinlock_t *lock, + unsigned long flags) +{ + spin_release(&lock->dep_map, 1, _RET_IP_); + _raw_spin_unlock(lock); + local_irq_restore(flags); + preempt_enable(); +} + +static inline void __spin_unlock_irq(spinlock_t *lock) +{ + spin_release(&lock->dep_map, 1, _RET_IP_); + _raw_spin_unlock(lock); + local_irq_enable(); + preempt_enable(); +} + +static inline void __spin_unlock_bh(spinlock_t *lock) +{ + spin_release(&lock->dep_map, 1, _RET_IP_); + _raw_spin_unlock(lock); + preempt_enable_no_resched(); + local_bh_enable_ip((unsigned long)__builtin_return_address(0)); +} + +static inline void __read_unlock_irqrestore(rwlock_t *lock, unsigned long flags) +{ + rwlock_release(&lock->dep_map, 1, _RET_IP_); + _raw_read_unlock(lock); + local_irq_restore(flags); + preempt_enable(); +} + +static inline void __read_unlock_irq(rwlock_t *lock) +{ + rwlock_release(&lock->dep_map, 1, _RET_IP_); + _raw_read_unlock(lock); + local_irq_enable(); + preempt_enable(); +} + +static inline void __read_unlock_bh(rwlock_t *lock) +{ + rwlock_release(&lock->dep_map, 1, _RET_IP_); + _raw_read_unlock(lock); + preempt_enable_no_resched(); + local_bh_enable_ip((unsigned long)__builtin_return_address(0)); +} + +static inline void __write_unlock_irqrestore(rwlock_t *lock, + unsigned long flags) +{ + rwlock_release(&lock->dep_map, 1, _RET_IP_); + _raw_write_unlock(lock); + local_irq_restore(flags); + preempt_enable(); +} + +static inline void __write_unlock_irq(rwlock_t *lock) +{ + rwlock_release(&lock->dep_map, 1, _RET_IP_); + _raw_write_unlock(lock); + local_irq_enable(); + preempt_enable(); +} + +static inline void __write_unlock_bh(rwlock_t *lock) +{ + rwlock_release(&lock->dep_map, 1, _RET_IP_); + _raw_write_unlock(lock); + preempt_enable_no_resched(); + local_bh_enable_ip((unsigned long)__builtin_return_address(0)); +} + +static inline int __spin_trylock_bh(spinlock_t *lock) +{ + local_bh_disable(); + preempt_disable(); + if (_raw_spin_trylock(lock)) { + spin_acquire(&lock->dep_map, 0, 1, _RET_IP_); + return 1; + } + preempt_enable_no_resched(); + local_bh_enable_ip((unsigned long)__builtin_return_address(0)); + return 0; +} + #endif /* __LINUX_SPINLOCK_API_SMP_H */ diff --git a/kernel/spinlock.c b/kernel/spinlock.c index 7932653c4ebd..2c000f5c070b 100644 --- a/kernel/spinlock.c +++ b/kernel/spinlock.c @@ -23,40 +23,19 @@ int __lockfunc _spin_trylock(spinlock_t *lock) { - preempt_disable(); - if (_raw_spin_trylock(lock)) { - spin_acquire(&lock->dep_map, 0, 1, _RET_IP_); - return 1; - } - - preempt_enable(); - return 0; + return __spin_trylock(lock); } EXPORT_SYMBOL(_spin_trylock); int __lockfunc _read_trylock(rwlock_t *lock) { - preempt_disable(); - if (_raw_read_trylock(lock)) { - rwlock_acquire_read(&lock->dep_map, 0, 1, _RET_IP_); - return 1; - } - - preempt_enable(); - return 0; + return __read_trylock(lock); } EXPORT_SYMBOL(_read_trylock); int __lockfunc _write_trylock(rwlock_t *lock) { - preempt_disable(); - if (_raw_write_trylock(lock)) { - rwlock_acquire(&lock->dep_map, 0, 1, _RET_IP_); - return 1; - } - - preempt_enable(); - return 0; + return __write_trylock(lock); } EXPORT_SYMBOL(_write_trylock); @@ -69,129 +48,74 @@ EXPORT_SYMBOL(_write_trylock); void __lockfunc _read_lock(rwlock_t *lock) { - preempt_disable(); - rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_); - LOCK_CONTENDED(lock, _raw_read_trylock, _raw_read_lock); + __read_lock(lock); } EXPORT_SYMBOL(_read_lock); unsigned long __lockfunc _spin_lock_irqsave(spinlock_t *lock) { - unsigned long flags; - - local_irq_save(flags); - preempt_disable(); - spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); - /* - * On lockdep we dont want the hand-coded irq-enable of - * _raw_spin_lock_flags() code, because lockdep assumes - * that interrupts are not re-enabled during lock-acquire: - */ -#ifdef CONFIG_LOCKDEP - LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock); -#else - _raw_spin_lock_flags(lock, &flags); -#endif - return flags; + return __spin_lock_irqsave(lock); } EXPORT_SYMBOL(_spin_lock_irqsave); void __lockfunc _spin_lock_irq(spinlock_t *lock) { - local_irq_disable(); - preempt_disable(); - spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); - LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock); + __spin_lock_irq(lock); } EXPORT_SYMBOL(_spin_lock_irq); void __lockfunc _spin_lock_bh(spinlock_t *lock) { - local_bh_disable(); - preempt_disable(); - spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); - LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock); + __spin_lock_bh(lock); } EXPORT_SYMBOL(_spin_lock_bh); unsigned long __lockfunc _read_lock_irqsave(rwlock_t *lock) { - unsigned long flags; - - local_irq_save(flags); - preempt_disable(); - rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_); - LOCK_CONTENDED_FLAGS(lock, _raw_read_trylock, _raw_read_lock, - _raw_read_lock_flags, &flags); - return flags; + return __read_lock_irqsave(lock); } EXPORT_SYMBOL(_read_lock_irqsave); void __lockfunc _read_lock_irq(rwlock_t *lock) { - local_irq_disable(); - preempt_disable(); - rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_); - LOCK_CONTENDED(lock, _raw_read_trylock, _raw_read_lock); + __read_lock_irq(lock); } EXPORT_SYMBOL(_read_lock_irq); void __lockfunc _read_lock_bh(rwlock_t *lock) { - local_bh_disable(); - preempt_disable(); - rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_); - LOCK_CONTENDED(lock, _raw_read_trylock, _raw_read_lock); + __read_lock_bh(lock); } EXPORT_SYMBOL(_read_lock_bh); unsigned long __lockfunc _write_lock_irqsave(rwlock_t *lock) { - unsigned long flags; - - local_irq_save(flags); - preempt_disable(); - rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_); - LOCK_CONTENDED_FLAGS(lock, _raw_write_trylock, _raw_write_lock, - _raw_write_lock_flags, &flags); - return flags; + return __write_lock_irqsave(lock); } EXPORT_SYMBOL(_write_lock_irqsave); void __lockfunc _write_lock_irq(rwlock_t *lock) { - local_irq_disable(); - preempt_disable(); - rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_); - LOCK_CONTENDED(lock, _raw_write_trylock, _raw_write_lock); + __write_lock_irq(lock); } EXPORT_SYMBOL(_write_lock_irq); void __lockfunc _write_lock_bh(rwlock_t *lock) { - local_bh_disable(); - preempt_disable(); - rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_); - LOCK_CONTENDED(lock, _raw_write_trylock, _raw_write_lock); + __write_lock_bh(lock); } EXPORT_SYMBOL(_write_lock_bh); void __lockfunc _spin_lock(spinlock_t *lock) { - preempt_disable(); - spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); - LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock); + __spin_lock(lock); } - EXPORT_SYMBOL(_spin_lock); void __lockfunc _write_lock(rwlock_t *lock) { - preempt_disable(); - rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_); - LOCK_CONTENDED(lock, _raw_write_trylock, _raw_write_lock); + __write_lock(lock); } - EXPORT_SYMBOL(_write_lock); #else /* CONFIG_PREEMPT: */ @@ -320,121 +244,79 @@ EXPORT_SYMBOL(_spin_lock_nest_lock); void __lockfunc _spin_unlock(spinlock_t *lock) { - spin_release(&lock->dep_map, 1, _RET_IP_); - _raw_spin_unlock(lock); - preempt_enable(); + __spin_unlock(lock); } EXPORT_SYMBOL(_spin_unlock); void __lockfunc _write_unlock(rwlock_t *lock) { - rwlock_release(&lock->dep_map, 1, _RET_IP_); - _raw_write_unlock(lock); - preempt_enable(); + __write_unlock(lock); } EXPORT_SYMBOL(_write_unlock); void __lockfunc _read_unlock(rwlock_t *lock) { - rwlock_release(&lock->dep_map, 1, _RET_IP_); - _raw_read_unlock(lock); - preempt_enable(); + __read_unlock(lock); } EXPORT_SYMBOL(_read_unlock); void __lockfunc _spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags) { - spin_release(&lock->dep_map, 1, _RET_IP_); - _raw_spin_unlock(lock); - local_irq_restore(flags); - preempt_enable(); + __spin_unlock_irqrestore(lock, flags); } EXPORT_SYMBOL(_spin_unlock_irqrestore); void __lockfunc _spin_unlock_irq(spinlock_t *lock) { - spin_release(&lock->dep_map, 1, _RET_IP_); - _raw_spin_unlock(lock); - local_irq_enable(); - preempt_enable(); + __spin_unlock_irq(lock); } EXPORT_SYMBOL(_spin_unlock_irq); void __lockfunc _spin_unlock_bh(spinlock_t *lock) { - spin_release(&lock->dep_map, 1, _RET_IP_); - _raw_spin_unlock(lock); - preempt_enable_no_resched(); - local_bh_enable_ip((unsigned long)__builtin_return_address(0)); + __spin_unlock_bh(lock); } EXPORT_SYMBOL(_spin_unlock_bh); void __lockfunc _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags) { - rwlock_release(&lock->dep_map, 1, _RET_IP_); - _raw_read_unlock(lock); - local_irq_restore(flags); - preempt_enable(); + __read_unlock_irqrestore(lock, flags); } EXPORT_SYMBOL(_read_unlock_irqrestore); void __lockfunc _read_unlock_irq(rwlock_t *lock) { - rwlock_release(&lock->dep_map, 1, _RET_IP_); - _raw_read_unlock(lock); - local_irq_enable(); - preempt_enable(); + __read_unlock_irq(lock); } EXPORT_SYMBOL(_read_unlock_irq); void __lockfunc _read_unlock_bh(rwlock_t *lock) { - rwlock_release(&lock->dep_map, 1, _RET_IP_); - _raw_read_unlock(lock); - preempt_enable_no_resched(); - local_bh_enable_ip((unsigned long)__builtin_return_address(0)); + __read_unlock_bh(lock); } EXPORT_SYMBOL(_read_unlock_bh); void __lockfunc _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags) { - rwlock_release(&lock->dep_map, 1, _RET_IP_); - _raw_write_unlock(lock); - local_irq_restore(flags); - preempt_enable(); + __write_unlock_irqrestore(lock, flags); } EXPORT_SYMBOL(_write_unlock_irqrestore); void __lockfunc _write_unlock_irq(rwlock_t *lock) { - rwlock_release(&lock->dep_map, 1, _RET_IP_); - _raw_write_unlock(lock); - local_irq_enable(); - preempt_enable(); + __write_unlock_irq(lock); } EXPORT_SYMBOL(_write_unlock_irq); void __lockfunc _write_unlock_bh(rwlock_t *lock) { - rwlock_release(&lock->dep_map, 1, _RET_IP_); - _raw_write_unlock(lock); - preempt_enable_no_resched(); - local_bh_enable_ip((unsigned long)__builtin_return_address(0)); + __write_unlock_bh(lock); } EXPORT_SYMBOL(_write_unlock_bh); int __lockfunc _spin_trylock_bh(spinlock_t *lock) { - local_bh_disable(); - preempt_disable(); - if (_raw_spin_trylock(lock)) { - spin_acquire(&lock->dep_map, 0, 1, _RET_IP_); - return 1; - } - - preempt_enable_no_resched(); - local_bh_enable_ip((unsigned long)__builtin_return_address(0)); - return 0; + return __spin_trylock_bh(lock); } EXPORT_SYMBOL(_spin_trylock_bh); -- cgit v1.2.3 From 892a7c67c12da63fa4b51728bbe5b982356a090a Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 31 Aug 2009 14:43:37 +0200 Subject: locking: Allow arch-inlined spinlocks This allows an architecture to specify per lock variant if the locking code should be kept out-of-line or inlined. If an architecure wants out-of-line locking code no change is needed. To force inlining of e.g. spin_lock() the line: #define __always_inline__spin_lock needs to be added to arch/<...>/include/asm/spinlock.h If CONFIG_DEBUG_SPINLOCK or CONFIG_GENERIC_LOCKBREAK are defined the per architecture defines are (partly) ignored and still out-of-line spinlock code will be generated. Signed-off-by: Heiko Carstens Acked-by: Peter Zijlstra Cc: Arnd Bergmann Cc: Nick Piggin Cc: Martin Schwidefsky Cc: Horst Hartmann Cc: Christian Ehrhardt Cc: Andrew Morton Cc: Linus Torvalds Cc: David Miller Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Geert Uytterhoeven Cc: Roman Zippel Cc: LKML-Reference: <20090831124418.375299024@de.ibm.com> Signed-off-by: Ingo Molnar --- include/linux/spinlock_api_smp.h | 119 +++++++++++++++++++++++++++++++++++++++ kernel/spinlock.c | 56 ++++++++++++++++++ 2 files changed, 175 insertions(+) (limited to 'include/linux') diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h index 6b108f5fb149..1a411e3fab95 100644 --- a/include/linux/spinlock_api_smp.h +++ b/include/linux/spinlock_api_smp.h @@ -60,6 +60,125 @@ void __lockfunc _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags) void __lockfunc _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags) __releases(lock); +#ifndef CONFIG_DEBUG_SPINLOCK +#ifndef CONFIG_GENERIC_LOCKBREAK + +#ifdef __always_inline__spin_lock +#define _spin_lock(lock) __spin_lock(lock) +#endif + +#ifdef __always_inline__read_lock +#define _read_lock(lock) __read_lock(lock) +#endif + +#ifdef __always_inline__write_lock +#define _write_lock(lock) __write_lock(lock) +#endif + +#ifdef __always_inline__spin_lock_bh +#define _spin_lock_bh(lock) __spin_lock_bh(lock) +#endif + +#ifdef __always_inline__read_lock_bh +#define _read_lock_bh(lock) __read_lock_bh(lock) +#endif + +#ifdef __always_inline__write_lock_bh +#define _write_lock_bh(lock) __write_lock_bh(lock) +#endif + +#ifdef __always_inline__spin_lock_irq +#define _spin_lock_irq(lock) __spin_lock_irq(lock) +#endif + +#ifdef __always_inline__read_lock_irq +#define _read_lock_irq(lock) __read_lock_irq(lock) +#endif + +#ifdef __always_inline__write_lock_irq +#define _write_lock_irq(lock) __write_lock_irq(lock) +#endif + +#ifdef __always_inline__spin_lock_irqsave +#define _spin_lock_irqsave(lock) __spin_lock_irqsave(lock) +#endif + +#ifdef __always_inline__read_lock_irqsave +#define _read_lock_irqsave(lock) __read_lock_irqsave(lock) +#endif + +#ifdef __always_inline__write_lock_irqsave +#define _write_lock_irqsave(lock) __write_lock_irqsave(lock) +#endif + +#endif /* !CONFIG_GENERIC_LOCKBREAK */ + +#ifdef __always_inline__spin_trylock +#define _spin_trylock(lock) __spin_trylock(lock) +#endif + +#ifdef __always_inline__read_trylock +#define _read_trylock(lock) __read_trylock(lock) +#endif + +#ifdef __always_inline__write_trylock +#define _write_trylock(lock) __write_trylock(lock) +#endif + +#ifdef __always_inline__spin_trylock_bh +#define _spin_trylock_bh(lock) __spin_trylock_bh(lock) +#endif + +#ifdef __always_inline__spin_unlock +#define _spin_unlock(lock) __spin_unlock(lock) +#endif + +#ifdef __always_inline__read_unlock +#define _read_unlock(lock) __read_unlock(lock) +#endif + +#ifdef __always_inline__write_unlock +#define _write_unlock(lock) __write_unlock(lock) +#endif + +#ifdef __always_inline__spin_unlock_bh +#define _spin_unlock_bh(lock) __spin_unlock_bh(lock) +#endif + +#ifdef __always_inline__read_unlock_bh +#define _read_unlock_bh(lock) __read_unlock_bh(lock) +#endif + +#ifdef __always_inline__write_unlock_bh +#define _write_unlock_bh(lock) __write_unlock_bh(lock) +#endif + +#ifdef __always_inline__spin_unlock_irq +#define _spin_unlock_irq(lock) __spin_unlock_irq(lock) +#endif + +#ifdef __always_inline__read_unlock_irq +#define _read_unlock_irq(lock) __read_unlock_irq(lock) +#endif + +#ifdef __always_inline__write_unlock_irq +#define _write_unlock_irq(lock) __write_unlock_irq(lock) +#endif + +#ifdef __always_inline__spin_unlock_irqrestore +#define _spin_unlock_irqrestore(lock, flags) __spin_unlock_irqrestore(lock, flags) +#endif + +#ifdef __always_inline__read_unlock_irqrestore +#define _read_unlock_irqrestore(lock, flags) __read_unlock_irqrestore(lock, flags) +#endif + +#ifdef __always_inline__write_unlock_irqrestore +#define _write_unlock_irqrestore(lock, flags) __write_unlock_irqrestore(lock, flags) +#endif + +#endif /* CONFIG_DEBUG_SPINLOCK */ + static inline int __spin_trylock(spinlock_t *lock) { preempt_disable(); diff --git a/kernel/spinlock.c b/kernel/spinlock.c index 2c000f5c070b..5ddab730cb2f 100644 --- a/kernel/spinlock.c +++ b/kernel/spinlock.c @@ -21,23 +21,29 @@ #include #include +#ifndef _spin_trylock int __lockfunc _spin_trylock(spinlock_t *lock) { return __spin_trylock(lock); } EXPORT_SYMBOL(_spin_trylock); +#endif +#ifndef _read_trylock int __lockfunc _read_trylock(rwlock_t *lock) { return __read_trylock(lock); } EXPORT_SYMBOL(_read_trylock); +#endif +#ifndef _write_trylock int __lockfunc _write_trylock(rwlock_t *lock) { return __write_trylock(lock); } EXPORT_SYMBOL(_write_trylock); +#endif /* * If lockdep is enabled then we use the non-preemption spin-ops @@ -46,77 +52,101 @@ EXPORT_SYMBOL(_write_trylock); */ #if !defined(CONFIG_GENERIC_LOCKBREAK) || defined(CONFIG_DEBUG_LOCK_ALLOC) +#ifndef _read_lock void __lockfunc _read_lock(rwlock_t *lock) { __read_lock(lock); } EXPORT_SYMBOL(_read_lock); +#endif +#ifndef _spin_lock_irqsave unsigned long __lockfunc _spin_lock_irqsave(spinlock_t *lock) { return __spin_lock_irqsave(lock); } EXPORT_SYMBOL(_spin_lock_irqsave); +#endif +#ifndef _spin_lock_irq void __lockfunc _spin_lock_irq(spinlock_t *lock) { __spin_lock_irq(lock); } EXPORT_SYMBOL(_spin_lock_irq); +#endif +#ifndef _spin_lock_bh void __lockfunc _spin_lock_bh(spinlock_t *lock) { __spin_lock_bh(lock); } EXPORT_SYMBOL(_spin_lock_bh); +#endif +#ifndef _read_lock_irqsave unsigned long __lockfunc _read_lock_irqsave(rwlock_t *lock) { return __read_lock_irqsave(lock); } EXPORT_SYMBOL(_read_lock_irqsave); +#endif +#ifndef _read_lock_irq void __lockfunc _read_lock_irq(rwlock_t *lock) { __read_lock_irq(lock); } EXPORT_SYMBOL(_read_lock_irq); +#endif +#ifndef _read_lock_bh void __lockfunc _read_lock_bh(rwlock_t *lock) { __read_lock_bh(lock); } EXPORT_SYMBOL(_read_lock_bh); +#endif +#ifndef _write_lock_irqsave unsigned long __lockfunc _write_lock_irqsave(rwlock_t *lock) { return __write_lock_irqsave(lock); } EXPORT_SYMBOL(_write_lock_irqsave); +#endif +#ifndef _write_lock_irq void __lockfunc _write_lock_irq(rwlock_t *lock) { __write_lock_irq(lock); } EXPORT_SYMBOL(_write_lock_irq); +#endif +#ifndef _write_lock_bh void __lockfunc _write_lock_bh(rwlock_t *lock) { __write_lock_bh(lock); } EXPORT_SYMBOL(_write_lock_bh); +#endif +#ifndef _spin_lock void __lockfunc _spin_lock(spinlock_t *lock) { __spin_lock(lock); } EXPORT_SYMBOL(_spin_lock); +#endif +#ifndef _write_lock void __lockfunc _write_lock(rwlock_t *lock) { __write_lock(lock); } EXPORT_SYMBOL(_write_lock); +#endif #else /* CONFIG_PREEMPT: */ @@ -242,83 +272,109 @@ EXPORT_SYMBOL(_spin_lock_nest_lock); #endif +#ifndef _spin_unlock void __lockfunc _spin_unlock(spinlock_t *lock) { __spin_unlock(lock); } EXPORT_SYMBOL(_spin_unlock); +#endif +#ifndef _write_unlock void __lockfunc _write_unlock(rwlock_t *lock) { __write_unlock(lock); } EXPORT_SYMBOL(_write_unlock); +#endif +#ifndef _read_unlock void __lockfunc _read_unlock(rwlock_t *lock) { __read_unlock(lock); } EXPORT_SYMBOL(_read_unlock); +#endif +#ifndef _spin_unlock_irqrestore void __lockfunc _spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags) { __spin_unlock_irqrestore(lock, flags); } EXPORT_SYMBOL(_spin_unlock_irqrestore); +#endif +#ifndef _spin_unlock_irq void __lockfunc _spin_unlock_irq(spinlock_t *lock) { __spin_unlock_irq(lock); } EXPORT_SYMBOL(_spin_unlock_irq); +#endif +#ifndef _spin_unlock_bh void __lockfunc _spin_unlock_bh(spinlock_t *lock) { __spin_unlock_bh(lock); } EXPORT_SYMBOL(_spin_unlock_bh); +#endif +#ifndef _read_unlock_irqrestore void __lockfunc _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags) { __read_unlock_irqrestore(lock, flags); } EXPORT_SYMBOL(_read_unlock_irqrestore); +#endif +#ifndef _read_unlock_irq void __lockfunc _read_unlock_irq(rwlock_t *lock) { __read_unlock_irq(lock); } EXPORT_SYMBOL(_read_unlock_irq); +#endif +#ifndef _read_unlock_bh void __lockfunc _read_unlock_bh(rwlock_t *lock) { __read_unlock_bh(lock); } EXPORT_SYMBOL(_read_unlock_bh); +#endif +#ifndef _write_unlock_irqrestore void __lockfunc _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags) { __write_unlock_irqrestore(lock, flags); } EXPORT_SYMBOL(_write_unlock_irqrestore); +#endif +#ifndef _write_unlock_irq void __lockfunc _write_unlock_irq(rwlock_t *lock) { __write_unlock_irq(lock); } EXPORT_SYMBOL(_write_unlock_irq); +#endif +#ifndef _write_unlock_bh void __lockfunc _write_unlock_bh(rwlock_t *lock) { __write_unlock_bh(lock); } EXPORT_SYMBOL(_write_unlock_bh); +#endif +#ifndef _spin_trylock_bh int __lockfunc _spin_trylock_bh(spinlock_t *lock) { return __spin_trylock_bh(lock); } EXPORT_SYMBOL(_spin_trylock_bh); +#endif notrace int in_lock_functions(unsigned long addr) { -- cgit v1.2.3 From bb7bed082500179519c7caf0678ba3bed9752658 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 31 Aug 2009 14:43:38 +0200 Subject: locking: Simplify spinlock inlining For !DEBUG_SPINLOCK && !PREEMPT && SMP the spin_unlock() functions were always inlined by using special defines which would call the __raw* functions. The out-of-line variants for these functions would be generated anyway. Use the new per unlock/locking variant mechanism to force inlining of the unlock functions like before. This is not a functional change, we just get rid of one additional way to force inlining. Signed-off-by: Heiko Carstens Acked-by: Peter Zijlstra Cc: Arnd Bergmann Cc: Nick Piggin Cc: Martin Schwidefsky Cc: Horst Hartmann Cc: Christian Ehrhardt Cc: Andrew Morton Cc: Linus Torvalds Cc: David Miller Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Geert Uytterhoeven Cc: Roman Zippel Cc: LKML-Reference: <20090831124418.848735034@de.ibm.com> Signed-off-by: Ingo Molnar --- include/linux/spinlock.h | 46 ++++++---------------------------------- include/linux/spinlock_api_smp.h | 12 +++++++++++ 2 files changed, 18 insertions(+), 40 deletions(-) (limited to 'include/linux') diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index da76a06556bd..f0ca7a7a1757 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -259,50 +259,16 @@ static inline void smp_mb__after_lock(void) { smp_mb(); } #define spin_lock_irq(lock) _spin_lock_irq(lock) #define spin_lock_bh(lock) _spin_lock_bh(lock) - #define read_lock_irq(lock) _read_lock_irq(lock) #define read_lock_bh(lock) _read_lock_bh(lock) - #define write_lock_irq(lock) _write_lock_irq(lock) #define write_lock_bh(lock) _write_lock_bh(lock) - -/* - * We inline the unlock functions in the nondebug case: - */ -#if defined(CONFIG_DEBUG_SPINLOCK) || defined(CONFIG_PREEMPT) || \ - !defined(CONFIG_SMP) -# define spin_unlock(lock) _spin_unlock(lock) -# define read_unlock(lock) _read_unlock(lock) -# define write_unlock(lock) _write_unlock(lock) -# define spin_unlock_irq(lock) _spin_unlock_irq(lock) -# define read_unlock_irq(lock) _read_unlock_irq(lock) -# define write_unlock_irq(lock) _write_unlock_irq(lock) -#else -# define spin_unlock(lock) \ - do {__raw_spin_unlock(&(lock)->raw_lock); __release(lock); } while (0) -# define read_unlock(lock) \ - do {__raw_read_unlock(&(lock)->raw_lock); __release(lock); } while (0) -# define write_unlock(lock) \ - do {__raw_write_unlock(&(lock)->raw_lock); __release(lock); } while (0) -# define spin_unlock_irq(lock) \ -do { \ - __raw_spin_unlock(&(lock)->raw_lock); \ - __release(lock); \ - local_irq_enable(); \ -} while (0) -# define read_unlock_irq(lock) \ -do { \ - __raw_read_unlock(&(lock)->raw_lock); \ - __release(lock); \ - local_irq_enable(); \ -} while (0) -# define write_unlock_irq(lock) \ -do { \ - __raw_write_unlock(&(lock)->raw_lock); \ - __release(lock); \ - local_irq_enable(); \ -} while (0) -#endif +#define spin_unlock(lock) _spin_unlock(lock) +#define read_unlock(lock) _read_unlock(lock) +#define write_unlock(lock) _write_unlock(lock) +#define spin_unlock_irq(lock) _spin_unlock_irq(lock) +#define read_unlock_irq(lock) _read_unlock_irq(lock) +#define write_unlock_irq(lock) _write_unlock_irq(lock) #define spin_unlock_irqrestore(lock, flags) \ do { \ diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h index 1a411e3fab95..7a7e18fc2415 100644 --- a/include/linux/spinlock_api_smp.h +++ b/include/linux/spinlock_api_smp.h @@ -60,6 +60,18 @@ void __lockfunc _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags) void __lockfunc _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags) __releases(lock); +/* + * We inline the unlock functions in the nondebug case: + */ +#if !defined(CONFIG_DEBUG_SPINLOCK) && !defined(CONFIG_PREEMPT) +#define __always_inline__spin_unlock +#define __always_inline__read_unlock +#define __always_inline__write_unlock +#define __always_inline__spin_unlock_irq +#define __always_inline__read_unlock_irq +#define __always_inline__write_unlock_irq +#endif + #ifndef CONFIG_DEBUG_SPINLOCK #ifndef CONFIG_GENERIC_LOCKBREAK -- cgit v1.2.3 From 2b980dbd77d229eb60588802162c9659726b11f4 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Fri, 28 Aug 2009 18:12:43 -0400 Subject: lsm: Add hooks to the TUN driver The TUN driver lacks any LSM hooks which makes it difficult for LSM modules, such as SELinux, to enforce access controls on network traffic generated by TUN users; this is particularly problematic for virtualization apps such as QEMU and KVM. This patch adds three new LSM hooks designed to control the creation and attachment of TUN devices, the hooks are: * security_tun_dev_create() Provides access control for the creation of new TUN devices * security_tun_dev_post_create() Provides the ability to create the necessary socket LSM state for newly created TUN devices * security_tun_dev_attach() Provides access control for attaching to existing, persistent TUN devices and the ability to update the TUN device's socket LSM state as necessary Signed-off-by: Paul Moore Acked-by: Eric Paris Acked-by: Serge Hallyn Acked-by: David S. Miller Signed-off-by: James Morris --- drivers/net/tun.c | 22 +++++++++++++++------- include/linux/security.h | 31 +++++++++++++++++++++++++++++++ security/capability.c | 19 +++++++++++++++++++ security/security.c | 18 ++++++++++++++++++ 4 files changed, 83 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 42b6c6319bc2..87214a257d2a 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -130,17 +130,10 @@ static inline struct tun_sock *tun_sk(struct sock *sk) static int tun_attach(struct tun_struct *tun, struct file *file) { struct tun_file *tfile = file->private_data; - const struct cred *cred = current_cred(); int err; ASSERT_RTNL(); - /* Check permissions */ - if (((tun->owner != -1 && cred->euid != tun->owner) || - (tun->group != -1 && !in_egroup_p(tun->group))) && - !capable(CAP_NET_ADMIN)) - return -EPERM; - netif_tx_lock_bh(tun->dev); err = -EINVAL; @@ -926,6 +919,8 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) dev = __dev_get_by_name(net, ifr->ifr_name); if (dev) { + const struct cred *cred = current_cred(); + if (ifr->ifr_flags & IFF_TUN_EXCL) return -EBUSY; if ((ifr->ifr_flags & IFF_TUN) && dev->netdev_ops == &tun_netdev_ops) @@ -935,6 +930,14 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) else return -EINVAL; + if (((tun->owner != -1 && cred->euid != tun->owner) || + (tun->group != -1 && !in_egroup_p(tun->group))) && + !capable(CAP_NET_ADMIN)) + return -EPERM; + err = security_tun_dev_attach(tun->sk); + if (err < 0) + return err; + err = tun_attach(tun, file); if (err < 0) return err; @@ -947,6 +950,9 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) if (!capable(CAP_NET_ADMIN)) return -EPERM; + err = security_tun_dev_create(); + if (err < 0) + return err; /* Set dev type */ if (ifr->ifr_flags & IFF_TUN) { @@ -989,6 +995,8 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) tun->sk = sk; container_of(sk, struct tun_sock, sk)->tun = tun; + security_tun_dev_post_create(sk); + tun_net_init(dev); if (strchr(dev->name, '%')) { diff --git a/include/linux/security.h b/include/linux/security.h index a16d6b7c4ebe..40ba39ea68ce 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -998,6 +998,17 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * Sets the connection's peersid to the secmark on skb. * @req_classify_flow: * Sets the flow's sid to the openreq sid. + * @tun_dev_create: + * Check permissions prior to creating a new TUN device. + * @tun_dev_post_create: + * This hook allows a module to update or allocate a per-socket security + * structure. + * @sk contains the newly created sock structure. + * @tun_dev_attach: + * Check permissions prior to attaching to a persistent TUN device. This + * hook can also be used by the module to update any security state + * associated with the TUN device's sock structure. + * @sk contains the existing sock structure. * * Security hooks for XFRM operations. * @@ -1597,6 +1608,9 @@ struct security_operations { void (*inet_csk_clone) (struct sock *newsk, const struct request_sock *req); void (*inet_conn_established) (struct sock *sk, struct sk_buff *skb); void (*req_classify_flow) (const struct request_sock *req, struct flowi *fl); + int (*tun_dev_create)(void); + void (*tun_dev_post_create)(struct sock *sk); + int (*tun_dev_attach)(struct sock *sk); #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_NETWORK_XFRM @@ -2586,6 +2600,9 @@ void security_inet_csk_clone(struct sock *newsk, const struct request_sock *req); void security_inet_conn_established(struct sock *sk, struct sk_buff *skb); +int security_tun_dev_create(void); +void security_tun_dev_post_create(struct sock *sk); +int security_tun_dev_attach(struct sock *sk); #else /* CONFIG_SECURITY_NETWORK */ static inline int security_unix_stream_connect(struct socket *sock, @@ -2736,6 +2753,20 @@ static inline void security_inet_conn_established(struct sock *sk, struct sk_buff *skb) { } + +static inline int security_tun_dev_create(void) +{ + return 0; +} + +static inline void security_tun_dev_post_create(struct sock *sk) +{ +} + +static inline int security_tun_dev_attach(struct sock *sk) +{ + return 0; +} #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_NETWORK_XFRM diff --git a/security/capability.c b/security/capability.c index 1b943f54b2ea..06400cf07757 100644 --- a/security/capability.c +++ b/security/capability.c @@ -706,10 +706,26 @@ static void cap_inet_conn_established(struct sock *sk, struct sk_buff *skb) { } + + static void cap_req_classify_flow(const struct request_sock *req, struct flowi *fl) { } + +static int cap_tun_dev_create(void) +{ + return 0; +} + +static void cap_tun_dev_post_create(struct sock *sk) +{ +} + +static int cap_tun_dev_attach(struct sock *sk) +{ + return 0; +} #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_NETWORK_XFRM @@ -1026,6 +1042,9 @@ void security_fixup_ops(struct security_operations *ops) set_to_cap_if_null(ops, inet_csk_clone); set_to_cap_if_null(ops, inet_conn_established); set_to_cap_if_null(ops, req_classify_flow); + set_to_cap_if_null(ops, tun_dev_create); + set_to_cap_if_null(ops, tun_dev_post_create); + set_to_cap_if_null(ops, tun_dev_attach); #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_NETWORK_XFRM set_to_cap_if_null(ops, xfrm_policy_alloc_security); diff --git a/security/security.c b/security/security.c index 0e993f42ce3d..f88eaf6b14cc 100644 --- a/security/security.c +++ b/security/security.c @@ -1117,6 +1117,24 @@ void security_inet_conn_established(struct sock *sk, security_ops->inet_conn_established(sk, skb); } +int security_tun_dev_create(void) +{ + return security_ops->tun_dev_create(); +} +EXPORT_SYMBOL(security_tun_dev_create); + +void security_tun_dev_post_create(struct sock *sk) +{ + return security_ops->tun_dev_post_create(sk); +} +EXPORT_SYMBOL(security_tun_dev_post_create); + +int security_tun_dev_attach(struct sock *sk) +{ + return security_ops->tun_dev_attach(sk); +} +EXPORT_SYMBOL(security_tun_dev_attach); + #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_NETWORK_XFRM -- cgit v1.2.3 From dc1f8bf68b311b1537cb65893430b6796118498a Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 31 Aug 2009 19:50:40 +0000 Subject: netdev: change transmit to limited range type The transmit function should only return one of three possible values, some drivers got confused and returned errno's or other values. This changes the definition so that this can be caught at compile time. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/netdevice.h | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 60d3aac49ed4..376a2e1ac00d 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -79,17 +79,19 @@ struct wireless_dev; #define net_xmit_eval(e) ((e) == NET_XMIT_CN? 0 : (e)) #define net_xmit_errno(e) ((e) != NET_XMIT_CN ? -ENOBUFS : 0) +/* Driver transmit return codes */ +enum netdev_tx { + NETDEV_TX_OK = 0, /* driver took care of packet */ + NETDEV_TX_BUSY, /* driver tx path was busy*/ + NETDEV_TX_LOCKED = -1, /* driver tx lock was already taken */ +}; +typedef enum netdev_tx netdev_tx_t; + #endif #define MAX_ADDR_LEN 32 /* Largest hardware address length */ -/* Driver transmit return codes */ -#define NETDEV_TX_OK 0 /* driver took care of packet */ -#define NETDEV_TX_BUSY 1 /* driver tx path was busy*/ -#define NETDEV_TX_LOCKED -1 /* driver tx lock was already taken */ - #ifdef __KERNEL__ - /* * Compute the worst case header length according to the protocols * used. @@ -507,9 +509,11 @@ struct netdev_queue { * This function is called when network device transistions to the down * state. * - * int (*ndo_start_xmit)(struct sk_buff *skb, struct net_device *dev); + * netdev_tx_t (*ndo_start_xmit)(struct sk_buff *skb, + * struct net_device *dev); * Called when a packet needs to be transmitted. - * Must return NETDEV_TX_OK , NETDEV_TX_BUSY, or NETDEV_TX_LOCKED, + * Must return NETDEV_TX_OK , NETDEV_TX_BUSY. + * (can also return NETDEV_TX_LOCKED iff NETIF_F_LLTX) * Required can not be NULL. * * u16 (*ndo_select_queue)(struct net_device *dev, struct sk_buff *skb); @@ -580,7 +584,7 @@ struct net_device_ops { void (*ndo_uninit)(struct net_device *dev); int (*ndo_open)(struct net_device *dev); int (*ndo_stop)(struct net_device *dev); - int (*ndo_start_xmit) (struct sk_buff *skb, + netdev_tx_t (*ndo_start_xmit) (struct sk_buff *skb, struct net_device *dev); u16 (*ndo_select_queue)(struct net_device *dev, struct sk_buff *skb); -- cgit v1.2.3 From 25a79c41ce0ce88a4288adf278e9b0e00f228383 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 31 Aug 2009 19:50:45 +0000 Subject: usbnet: convert to netdev_tx_t Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/usb/catc.c | 3 ++- drivers/net/usb/cdc-phonet.c | 6 +++--- drivers/net/usb/hso.c | 3 ++- drivers/net/usb/kaweth.c | 3 ++- drivers/net/usb/pegasus.c | 3 ++- drivers/net/usb/rtl8150.c | 3 ++- drivers/net/usb/usbnet.c | 8 ++++---- drivers/usb/gadget/u_ether.c | 3 ++- include/linux/usb/usbnet.h | 3 ++- 9 files changed, 21 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/usb/catc.c b/drivers/net/usb/catc.c index 7abdc4abbe07..0ffc0c6d03be 100644 --- a/drivers/net/usb/catc.c +++ b/drivers/net/usb/catc.c @@ -411,7 +411,8 @@ static void catc_tx_done(struct urb *urb) spin_unlock_irqrestore(&catc->tx_lock, flags); } -static int catc_start_xmit(struct sk_buff *skb, struct net_device *netdev) +static netdev_tx_t catc_start_xmit(struct sk_buff *skb, + struct net_device *netdev) { struct catc *catc = netdev_priv(netdev); unsigned long flags; diff --git a/drivers/net/usb/cdc-phonet.c b/drivers/net/usb/cdc-phonet.c index 792af72da8ac..0ca5916ca8df 100644 --- a/drivers/net/usb/cdc-phonet.c +++ b/drivers/net/usb/cdc-phonet.c @@ -55,7 +55,7 @@ static void rx_complete(struct urb *req); /* * Network device callbacks */ -static int usbpn_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t usbpn_xmit(struct sk_buff *skb, struct net_device *dev) { struct usbpn_dev *pnd = netdev_priv(dev); struct urb *req = NULL; @@ -82,12 +82,12 @@ static int usbpn_xmit(struct sk_buff *skb, struct net_device *dev) if (pnd->tx_queue >= dev->tx_queue_len) netif_stop_queue(dev); spin_unlock_irqrestore(&pnd->tx_lock, flags); - return 0; + return NETDEV_TX_OK; drop: dev_kfree_skb(skb); dev->stats.tx_dropped++; - return 0; + return NETDEV_TX_OK; } static void tx_complete(struct urb *req) diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c index ffe410635735..123f9b84dd29 100644 --- a/drivers/net/usb/hso.c +++ b/drivers/net/usb/hso.c @@ -771,7 +771,8 @@ static void write_bulk_callback(struct urb *urb) } /* called by kernel when we need to transmit a packet */ -static int hso_net_start_xmit(struct sk_buff *skb, struct net_device *net) +static netdev_tx_t hso_net_start_xmit(struct sk_buff *skb, + struct net_device *net) { struct hso_net *odev = netdev_priv(net); int result; diff --git a/drivers/net/usb/kaweth.c b/drivers/net/usb/kaweth.c index 200fe3d525ca..7f397365b437 100644 --- a/drivers/net/usb/kaweth.c +++ b/drivers/net/usb/kaweth.c @@ -803,7 +803,8 @@ static void kaweth_usb_transmit_complete(struct urb *urb) /**************************************************************** * kaweth_start_xmit ****************************************************************/ -static int kaweth_start_xmit(struct sk_buff *skb, struct net_device *net) +static netdev_tx_t kaweth_start_xmit(struct sk_buff *skb, + struct net_device *net) { struct kaweth_device *kaweth = netdev_priv(net); __le16 *private_header; diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c index 69d2df95ac86..7b935b846424 100644 --- a/drivers/net/usb/pegasus.c +++ b/drivers/net/usb/pegasus.c @@ -876,7 +876,8 @@ static void pegasus_tx_timeout(struct net_device *net) pegasus->stats.tx_errors++; } -static int pegasus_start_xmit(struct sk_buff *skb, struct net_device *net) +static netdev_tx_t pegasus_start_xmit(struct sk_buff *skb, + struct net_device *net) { pegasus_t *pegasus = netdev_priv(net); int count = ((skb->len + 2) & 0x3f) ? skb->len + 2 : skb->len + 3; diff --git a/drivers/net/usb/rtl8150.c b/drivers/net/usb/rtl8150.c index bac8b77fb25e..d9f84f22fbc7 100644 --- a/drivers/net/usb/rtl8150.c +++ b/drivers/net/usb/rtl8150.c @@ -727,7 +727,8 @@ static void rtl8150_set_multicast(struct net_device *netdev) netif_wake_queue(netdev); } -static int rtl8150_start_xmit(struct sk_buff *skb, struct net_device *netdev) +static netdev_tx_t rtl8150_start_xmit(struct sk_buff *skb, + struct net_device *netdev) { rtl8150_t *dev = netdev_priv(netdev); int count, res; diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 7d471fca2743..d166e3385c64 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -1007,15 +1007,16 @@ EXPORT_SYMBOL_GPL(usbnet_tx_timeout); /*-------------------------------------------------------------------------*/ -int usbnet_start_xmit (struct sk_buff *skb, struct net_device *net) +netdev_tx_t usbnet_start_xmit (struct sk_buff *skb, + struct net_device *net) { struct usbnet *dev = netdev_priv(net); int length; - int retval = NET_XMIT_SUCCESS; struct urb *urb = NULL; struct skb_data *entry; struct driver_info *info = dev->driver_info; unsigned long flags; + int retval; // some devices want funky USB-level framing, for // win32 driver (usually) and/or hardware quirks @@ -1079,7 +1080,6 @@ int usbnet_start_xmit (struct sk_buff *skb, struct net_device *net) if (netif_msg_tx_err (dev)) devdbg (dev, "drop, code %d", retval); drop: - retval = NET_XMIT_SUCCESS; dev->net->stats.tx_dropped++; if (skb) dev_kfree_skb_any (skb); @@ -1088,7 +1088,7 @@ drop: devdbg (dev, "> tx, len %d, type 0x%x", length, skb->protocol); } - return retval; + return NETDEV_TX_OK; } EXPORT_SYMBOL_GPL(usbnet_start_xmit); diff --git a/drivers/usb/gadget/u_ether.c b/drivers/usb/gadget/u_ether.c index aac69b591aeb..dc3ebd1e68ca 100644 --- a/drivers/usb/gadget/u_ether.c +++ b/drivers/usb/gadget/u_ether.c @@ -465,7 +465,8 @@ static inline int is_promisc(u16 cdc_filter) return cdc_filter & USB_CDC_PACKET_TYPE_PROMISCUOUS; } -static int eth_start_xmit(struct sk_buff *skb, struct net_device *net) +static netdev_tx_t eth_start_xmit(struct sk_buff *skb, + struct net_device *net) { struct eth_dev *dev = netdev_priv(net); int length = skb->len; diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index 09514252d84e..bb69e256cd16 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -182,7 +182,8 @@ struct skb_data { /* skb->cb is one of these */ extern int usbnet_open (struct net_device *net); extern int usbnet_stop (struct net_device *net); -extern int usbnet_start_xmit (struct sk_buff *skb, struct net_device *net); +extern netdev_tx_t usbnet_start_xmit (struct sk_buff *skb, + struct net_device *net); extern void usbnet_tx_timeout (struct net_device *net); extern int usbnet_change_mtu (struct net_device *net, int new_mtu); -- cgit v1.2.3 From 4c5d502d8b2db8947c44dc44bdc67dbe55cce2b9 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 31 Aug 2009 19:50:48 +0000 Subject: hdlc: convert to netdev_tx_t Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/char/pcmcia/synclink_cs.c | 7 +++---- drivers/char/synclink.c | 7 +++---- drivers/char/synclink_gt.c | 7 +++---- drivers/char/synclinkmp.c | 7 +++---- include/linux/hdlc.h | 8 ++++---- 5 files changed, 16 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/drivers/char/pcmcia/synclink_cs.c b/drivers/char/pcmcia/synclink_cs.c index 77b364889224..caf6e4d19469 100644 --- a/drivers/char/pcmcia/synclink_cs.c +++ b/drivers/char/pcmcia/synclink_cs.c @@ -4005,10 +4005,9 @@ static int hdlcdev_attach(struct net_device *dev, unsigned short encoding, * * skb socket buffer containing HDLC frame * dev pointer to network device structure - * - * returns 0 if success, otherwise error code */ -static int hdlcdev_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t hdlcdev_xmit(struct sk_buff *skb, + struct net_device *dev) { MGSLPC_INFO *info = dev_to_port(dev); unsigned long flags; @@ -4043,7 +4042,7 @@ static int hdlcdev_xmit(struct sk_buff *skb, struct net_device *dev) } spin_unlock_irqrestore(&info->lock,flags); - return 0; + return NETDEV_TX_OK; } /** diff --git a/drivers/char/synclink.c b/drivers/char/synclink.c index 813552f14884..4846b73ef28d 100644 --- a/drivers/char/synclink.c +++ b/drivers/char/synclink.c @@ -7697,10 +7697,9 @@ static int hdlcdev_attach(struct net_device *dev, unsigned short encoding, * * skb socket buffer containing HDLC frame * dev pointer to network device structure - * - * returns 0 if success, otherwise error code */ -static int hdlcdev_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t hdlcdev_xmit(struct sk_buff *skb, + struct net_device *dev) { struct mgsl_struct *info = dev_to_port(dev); unsigned long flags; @@ -7731,7 +7730,7 @@ static int hdlcdev_xmit(struct sk_buff *skb, struct net_device *dev) usc_start_transmitter(info); spin_unlock_irqrestore(&info->irq_spinlock,flags); - return 0; + return NETDEV_TX_OK; } /** diff --git a/drivers/char/synclink_gt.c b/drivers/char/synclink_gt.c index 91f20a92fddf..8678f0c8699d 100644 --- a/drivers/char/synclink_gt.c +++ b/drivers/char/synclink_gt.c @@ -1497,10 +1497,9 @@ static int hdlcdev_attach(struct net_device *dev, unsigned short encoding, * * skb socket buffer containing HDLC frame * dev pointer to network device structure - * - * returns 0 if success, otherwise error code */ -static int hdlcdev_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t hdlcdev_xmit(struct sk_buff *skb, + struct net_device *dev) { struct slgt_info *info = dev_to_port(dev); unsigned long flags; @@ -1529,7 +1528,7 @@ static int hdlcdev_xmit(struct sk_buff *skb, struct net_device *dev) update_tx_timer(info); spin_unlock_irqrestore(&info->lock,flags); - return 0; + return NETDEV_TX_OK; } /** diff --git a/drivers/char/synclinkmp.c b/drivers/char/synclinkmp.c index 8d4a2a8a0a70..2b18adc4ee19 100644 --- a/drivers/char/synclinkmp.c +++ b/drivers/char/synclinkmp.c @@ -1608,10 +1608,9 @@ static int hdlcdev_attach(struct net_device *dev, unsigned short encoding, * * skb socket buffer containing HDLC frame * dev pointer to network device structure - * - * returns 0 if success, otherwise error code */ -static int hdlcdev_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t hdlcdev_xmit(struct sk_buff *skb, + struct net_device *dev) { SLMP_INFO *info = dev_to_port(dev); unsigned long flags; @@ -1642,7 +1641,7 @@ static int hdlcdev_xmit(struct sk_buff *skb, struct net_device *dev) tx_start(info); spin_unlock_irqrestore(&info->lock,flags); - return 0; + return NETDEV_TX_OK; } /** diff --git a/include/linux/hdlc.h b/include/linux/hdlc.h index 6a6e701f1631..ee275c8b3df1 100644 --- a/include/linux/hdlc.h +++ b/include/linux/hdlc.h @@ -38,7 +38,7 @@ struct hdlc_proto { int (*ioctl)(struct net_device *dev, struct ifreq *ifr); __be16 (*type_trans)(struct sk_buff *skb, struct net_device *dev); int (*netif_rx)(struct sk_buff *skb); - int (*xmit)(struct sk_buff *skb, struct net_device *dev); + netdev_tx_t (*xmit)(struct sk_buff *skb, struct net_device *dev); struct module *module; struct hdlc_proto *next; /* next protocol in the list */ }; @@ -51,7 +51,7 @@ typedef struct hdlc_device { unsigned short encoding, unsigned short parity); /* hardware driver must handle this instead of dev->hard_start_xmit */ - int (*xmit)(struct sk_buff *skb, struct net_device *dev); + netdev_tx_t (*xmit)(struct sk_buff *skb, struct net_device *dev); /* Things below are for HDLC layer internal use only */ const struct hdlc_proto *proto; @@ -60,7 +60,7 @@ typedef struct hdlc_device { spinlock_t state_lock; void *state; void *priv; -}hdlc_device; +} hdlc_device; @@ -106,7 +106,7 @@ void hdlc_close(struct net_device *dev); /* May be used by hardware driver */ int hdlc_change_mtu(struct net_device *dev, int new_mtu); /* Must be pointed to by hw driver's dev->netdev_ops->ndo_start_xmit */ -int hdlc_start_xmit(struct sk_buff *skb, struct net_device *dev); +netdev_tx_t hdlc_start_xmit(struct sk_buff *skb, struct net_device *dev); int attach_hdlc_protocol(struct net_device *dev, struct hdlc_proto *proto, size_t size); -- cgit v1.2.3 From 61357325f377889a1daffa14962d705dc814dd0e Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 31 Aug 2009 19:50:58 +0000 Subject: netdev: convert bulk of drivers to netdev_tx_t In a couple of cases collapse some extra code like: int retval = NETDEV_TX_OK; ... return retval; into return NETDEV_TX_OK; Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/ieee802154/fakehard.c | 3 ++- drivers/net/8139cp.c | 3 ++- drivers/net/8139too.c | 7 ++++--- drivers/net/82596.c | 5 ++--- drivers/net/8390.c | 2 +- drivers/net/8390.h | 4 ++-- drivers/net/8390p.c | 2 +- drivers/net/a2065.c | 3 ++- drivers/net/acenic.c | 3 ++- drivers/net/acenic.h | 3 ++- drivers/net/amd8111e.c | 3 ++- drivers/net/arcnet/arcnet.c | 3 ++- drivers/net/ariadne.c | 6 ++++-- drivers/net/at1700.c | 6 ++++-- drivers/net/atl1c/atl1c_main.c | 3 ++- drivers/net/atl1e/atl1e_main.c | 3 ++- drivers/net/atlx/atl1.c | 3 ++- drivers/net/atlx/atl2.c | 3 ++- drivers/net/atp.c | 6 ++++-- drivers/net/au1000_eth.c | 2 +- drivers/net/b44.c | 2 +- drivers/net/benet/be_main.c | 3 ++- drivers/net/bnx2.c | 2 +- drivers/net/bnx2x_main.c | 2 +- drivers/net/can/sja1000/sja1000.c | 3 ++- drivers/net/cassini.c | 2 +- drivers/net/chelsio/sge.c | 2 +- drivers/net/chelsio/sge.h | 2 +- drivers/net/cs89x0.c | 4 ++-- drivers/net/cxgb3/adapter.h | 2 +- drivers/net/cxgb3/sge.c | 2 +- drivers/net/defxx.c | 10 ++++------ drivers/net/depca.c | 6 ++++-- drivers/net/dl2k.c | 4 ++-- drivers/net/dnet.c | 2 +- drivers/net/eepro.c | 6 ++++-- drivers/net/eexpress.c | 5 +++-- drivers/net/enc28j60.c | 3 ++- drivers/net/enic/enic_main.c | 3 ++- drivers/net/epic100.c | 5 +++-- drivers/net/eth16i.c | 4 ++-- drivers/net/ethoc.c | 2 +- drivers/net/ewrk3.c | 4 ++-- drivers/net/fealnx.c | 4 ++-- drivers/net/forcedeth.c | 5 +++-- drivers/net/hamachi.c | 6 ++++-- drivers/net/hp100.c | 13 ++++++++----- drivers/net/ibmlana.c | 2 +- drivers/net/ibmveth.c | 3 ++- drivers/net/ipg.c | 3 ++- drivers/net/jme.c | 2 +- drivers/net/ks8842.c | 3 ++- drivers/net/ks8851.c | 5 +++-- drivers/net/lance.c | 6 ++++-- drivers/net/lib8390.c | 3 ++- drivers/net/loopback.c | 3 ++- drivers/net/lp486e.c | 4 ++-- drivers/net/mlx4/en_tx.c | 2 +- drivers/net/mlx4/mlx4_en.h | 2 +- drivers/net/myri10ge/myri10ge.c | 11 +++++++---- drivers/net/natsemi.c | 4 ++-- drivers/net/netxen/netxen_nic_main.c | 5 +++-- drivers/net/ni52.c | 5 +++-- drivers/net/ni65.c | 6 ++++-- drivers/net/niu.c | 3 ++- drivers/net/ns83820.c | 3 ++- drivers/net/pcnet32.c | 6 ++++-- drivers/net/qla3xxx.c | 3 ++- drivers/net/qlge/qlge_main.c | 2 +- drivers/net/r6040.c | 10 +++++----- drivers/net/r8169.c | 12 ++++++------ drivers/net/rrunner.c | 3 ++- drivers/net/rrunner.h | 3 ++- drivers/net/s2io.c | 2 +- drivers/net/sb1000.c | 5 +++-- drivers/net/sc92031.c | 3 ++- drivers/net/seeq8005.c | 6 ++++-- drivers/net/sfc/efx.h | 5 +++-- drivers/net/sfc/selftest.c | 3 ++- drivers/net/sfc/tx.c | 18 ++++++++---------- drivers/net/sfc/tx.h | 3 ++- drivers/net/sis190.c | 3 ++- drivers/net/sis900.c | 5 +++-- drivers/net/skfp/skfddi.c | 6 ++++-- drivers/net/skge.c | 3 ++- drivers/net/sky2.c | 3 ++- drivers/net/smc9194.c | 6 ++++-- drivers/net/smsc9420.c | 3 ++- drivers/net/starfire.c | 4 ++-- drivers/net/sundance.c | 4 ++-- drivers/net/sungem.c | 3 ++- drivers/net/sunhme.c | 3 ++- drivers/net/tehuti.c | 3 ++- drivers/net/tg3.c | 9 ++++++--- drivers/net/tlan.c | 4 ++-- drivers/net/typhoon.c | 2 +- drivers/net/via-rhine.c | 6 ++++-- drivers/net/via-velocity.c | 3 ++- drivers/net/vxge/vxge-main.c | 2 +- drivers/net/yellowfin.c | 6 ++++-- drivers/net/znet.c | 5 +++-- include/linux/arcdevice.h | 3 ++- 102 files changed, 253 insertions(+), 173 deletions(-) (limited to 'include/linux') diff --git a/drivers/ieee802154/fakehard.c b/drivers/ieee802154/fakehard.c index c1c9697f9fde..96a2959ce877 100644 --- a/drivers/ieee802154/fakehard.c +++ b/drivers/ieee802154/fakehard.c @@ -257,7 +257,8 @@ static int ieee802154_fake_close(struct net_device *dev) return 0; } -static int ieee802154_fake_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t ieee802154_fake_xmit(struct sk_buff *skb, + struct net_device *dev) { skb->iif = dev->ifindex; skb->dev = dev; diff --git a/drivers/net/8139cp.c b/drivers/net/8139cp.c index 4a8995aaeca3..462d9f59c53a 100644 --- a/drivers/net/8139cp.c +++ b/drivers/net/8139cp.c @@ -736,7 +736,8 @@ static void cp_tx (struct cp_private *cp) netif_wake_queue(cp->dev); } -static int cp_start_xmit (struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t cp_start_xmit (struct sk_buff *skb, + struct net_device *dev) { struct cp_private *cp = netdev_priv(dev); unsigned entry; diff --git a/drivers/net/8139too.c b/drivers/net/8139too.c index b39ec98345ea..4a3628755026 100644 --- a/drivers/net/8139too.c +++ b/drivers/net/8139too.c @@ -628,8 +628,8 @@ static void mdio_write (struct net_device *dev, int phy_id, int location, static void rtl8139_start_thread(struct rtl8139_private *tp); static void rtl8139_tx_timeout (struct net_device *dev); static void rtl8139_init_ring (struct net_device *dev); -static int rtl8139_start_xmit (struct sk_buff *skb, - struct net_device *dev); +static netdev_tx_t rtl8139_start_xmit (struct sk_buff *skb, + struct net_device *dev); #ifdef CONFIG_NET_POLL_CONTROLLER static void rtl8139_poll_controller(struct net_device *dev); #endif @@ -1687,7 +1687,8 @@ static void rtl8139_tx_timeout (struct net_device *dev) } } -static int rtl8139_start_xmit (struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t rtl8139_start_xmit (struct sk_buff *skb, + struct net_device *dev) { struct rtl8139_private *tp = netdev_priv(dev); void __iomem *ioaddr = tp->mmio_addr; diff --git a/drivers/net/82596.c b/drivers/net/82596.c index 996cc9102215..ea6b139b812c 100644 --- a/drivers/net/82596.c +++ b/drivers/net/82596.c @@ -356,7 +356,7 @@ static char init_setup[] = 0x7f /* *multi IA */ }; static int i596_open(struct net_device *dev); -static int i596_start_xmit(struct sk_buff *skb, struct net_device *dev); +static netdev_tx_t i596_start_xmit(struct sk_buff *skb, struct net_device *dev); static irqreturn_t i596_interrupt(int irq, void *dev_id); static int i596_close(struct net_device *dev); static void i596_add_cmd(struct net_device *dev, struct i596_cmd *cmd); @@ -1054,8 +1054,7 @@ static void i596_tx_timeout (struct net_device *dev) netif_wake_queue (dev); } - -static int i596_start_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t i596_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct i596_private *lp = dev->ml_priv; struct tx_cmd *tx_cmd; diff --git a/drivers/net/8390.c b/drivers/net/8390.c index 21153dea8ebe..7c7518be1756 100644 --- a/drivers/net/8390.c +++ b/drivers/net/8390.c @@ -17,7 +17,7 @@ int ei_close(struct net_device *dev) } EXPORT_SYMBOL(ei_close); -int ei_start_xmit(struct sk_buff *skb, struct net_device *dev) +netdev_tx_t ei_start_xmit(struct sk_buff *skb, struct net_device *dev) { return __ei_start_xmit(skb, dev); } diff --git a/drivers/net/8390.h b/drivers/net/8390.h index 3c61d6d2748a..3d9e8fb4fbee 100644 --- a/drivers/net/8390.h +++ b/drivers/net/8390.h @@ -40,7 +40,7 @@ extern int ei_open(struct net_device *dev); extern int ei_close(struct net_device *dev); extern irqreturn_t ei_interrupt(int irq, void *dev_id); extern void ei_tx_timeout(struct net_device *dev); -extern int ei_start_xmit(struct sk_buff *skb, struct net_device *dev); +extern netdev_tx_t ei_start_xmit(struct sk_buff *skb, struct net_device *dev); extern void ei_set_multicast_list(struct net_device *dev); extern struct net_device_stats *ei_get_stats(struct net_device *dev); @@ -58,7 +58,7 @@ extern int eip_open(struct net_device *dev); extern int eip_close(struct net_device *dev); extern irqreturn_t eip_interrupt(int irq, void *dev_id); extern void eip_tx_timeout(struct net_device *dev); -extern int eip_start_xmit(struct sk_buff *skb, struct net_device *dev); +extern netdev_tx_t eip_start_xmit(struct sk_buff *skb, struct net_device *dev); extern void eip_set_multicast_list(struct net_device *dev); extern struct net_device_stats *eip_get_stats(struct net_device *dev); diff --git a/drivers/net/8390p.c b/drivers/net/8390p.c index d225c291fd93..a2a64ea0b691 100644 --- a/drivers/net/8390p.c +++ b/drivers/net/8390p.c @@ -22,7 +22,7 @@ int eip_close(struct net_device *dev) } EXPORT_SYMBOL(eip_close); -int eip_start_xmit(struct sk_buff *skb, struct net_device *dev) +netdev_tx_t eip_start_xmit(struct sk_buff *skb, struct net_device *dev) { return __ei_start_xmit(skb, dev); } diff --git a/drivers/net/a2065.c b/drivers/net/a2065.c index 174a81187a95..b7ec0368d7e8 100644 --- a/drivers/net/a2065.c +++ b/drivers/net/a2065.c @@ -547,7 +547,8 @@ static void lance_tx_timeout(struct net_device *dev) netif_wake_queue(dev); } -static int lance_start_xmit (struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t lance_start_xmit (struct sk_buff *skb, + struct net_device *dev) { struct lance_private *lp = netdev_priv(dev); volatile struct lance_regs *ll = lp->ll; diff --git a/drivers/net/acenic.c b/drivers/net/acenic.c index 08419ee10290..5f0b05c2d71f 100644 --- a/drivers/net/acenic.c +++ b/drivers/net/acenic.c @@ -2464,7 +2464,8 @@ ace_load_tx_bd(struct ace_private *ap, struct tx_desc *desc, u64 addr, } -static int ace_start_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t ace_start_xmit(struct sk_buff *skb, + struct net_device *dev) { struct ace_private *ap = netdev_priv(dev); struct ace_regs __iomem *regs = ap->regs; diff --git a/drivers/net/acenic.h b/drivers/net/acenic.h index c987c9b5a137..17079b927ffa 100644 --- a/drivers/net/acenic.h +++ b/drivers/net/acenic.h @@ -775,7 +775,8 @@ static void ace_load_jumbo_rx_ring(struct ace_private *ap, int nr_bufs); static irqreturn_t ace_interrupt(int irq, void *dev_id); static int ace_load_firmware(struct net_device *dev); static int ace_open(struct net_device *dev); -static int ace_start_xmit(struct sk_buff *skb, struct net_device *dev); +static netdev_tx_t ace_start_xmit(struct sk_buff *skb, + struct net_device *dev); static int ace_close(struct net_device *dev); static void ace_tasklet(unsigned long dev); static void ace_dump_trace(struct ace_private *ap); diff --git a/drivers/net/amd8111e.c b/drivers/net/amd8111e.c index 61ac671f97bf..98b5f462c092 100644 --- a/drivers/net/amd8111e.c +++ b/drivers/net/amd8111e.c @@ -1300,7 +1300,8 @@ static int amd8111e_tx_queue_avail(struct amd8111e_priv* lp ) This function will queue the transmit packets to the descriptors and will trigger the send operation. It also initializes the transmit descriptors with buffer physical address, byte count, ownership to hardware etc. */ -static int amd8111e_start_xmit(struct sk_buff *skb, struct net_device * dev) +static netdev_tx_t amd8111e_start_xmit(struct sk_buff *skb, + struct net_device * dev) { struct amd8111e_priv *lp = netdev_priv(dev); int tx_index; diff --git a/drivers/net/arcnet/arcnet.c b/drivers/net/arcnet/arcnet.c index 7d227cdab9f8..75a572560909 100644 --- a/drivers/net/arcnet/arcnet.c +++ b/drivers/net/arcnet/arcnet.c @@ -591,7 +591,8 @@ static int arcnet_rebuild_header(struct sk_buff *skb) /* Called by the kernel in order to transmit a packet. */ -int arcnet_send_packet(struct sk_buff *skb, struct net_device *dev) +netdev_tx_t arcnet_send_packet(struct sk_buff *skb, + struct net_device *dev) { struct arcnet_local *lp = netdev_priv(dev); struct archdr *pkt; diff --git a/drivers/net/ariadne.c b/drivers/net/ariadne.c index 47d976cc3d7d..c35af3e106b1 100644 --- a/drivers/net/ariadne.c +++ b/drivers/net/ariadne.c @@ -115,7 +115,8 @@ struct lancedata { static int ariadne_open(struct net_device *dev); static void ariadne_init_ring(struct net_device *dev); -static int ariadne_start_xmit(struct sk_buff *skb, struct net_device *dev); +static netdev_tx_t ariadne_start_xmit(struct sk_buff *skb, + struct net_device *dev); static void ariadne_tx_timeout(struct net_device *dev); static int ariadne_rx(struct net_device *dev); static void ariadne_reset(struct net_device *dev); @@ -589,7 +590,8 @@ static void ariadne_tx_timeout(struct net_device *dev) } -static int ariadne_start_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t ariadne_start_xmit(struct sk_buff *skb, + struct net_device *dev) { struct ariadne_private *priv = netdev_priv(dev); volatile struct Am79C960 *lance = (struct Am79C960*)dev->base_addr; diff --git a/drivers/net/at1700.c b/drivers/net/at1700.c index 5349c58d1fae..544d5af6950e 100644 --- a/drivers/net/at1700.c +++ b/drivers/net/at1700.c @@ -159,7 +159,8 @@ struct net_local { static int at1700_probe1(struct net_device *dev, int ioaddr); static int read_eeprom(long ioaddr, int location); static int net_open(struct net_device *dev); -static int net_send_packet(struct sk_buff *skb, struct net_device *dev); +static netdev_tx_t net_send_packet(struct sk_buff *skb, + struct net_device *dev); static irqreturn_t net_interrupt(int irq, void *dev_id); static void net_rx(struct net_device *dev); static int net_close(struct net_device *dev); @@ -595,7 +596,8 @@ static void net_tx_timeout (struct net_device *dev) } -static int net_send_packet (struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t net_send_packet (struct sk_buff *skb, + struct net_device *dev) { struct net_local *lp = netdev_priv(dev); int ioaddr = dev->base_addr; diff --git a/drivers/net/atl1c/atl1c_main.c b/drivers/net/atl1c/atl1c_main.c index 1d601ce7d5b2..bf7cc83e9836 100644 --- a/drivers/net/atl1c/atl1c_main.c +++ b/drivers/net/atl1c/atl1c_main.c @@ -2055,7 +2055,8 @@ static void atl1c_tx_queue(struct atl1c_adapter *adapter, struct sk_buff *skb, AT_WRITE_REG(&adapter->hw, REG_MB_PRIO_PROD_IDX, prod_data); } -static int atl1c_xmit_frame(struct sk_buff *skb, struct net_device *netdev) +static netdev_tx_t atl1c_xmit_frame(struct sk_buff *skb, + struct net_device *netdev) { struct atl1c_adapter *adapter = netdev_priv(netdev); unsigned long flags; diff --git a/drivers/net/atl1e/atl1e_main.c b/drivers/net/atl1e/atl1e_main.c index 4570749e3d3b..bca127e65f95 100644 --- a/drivers/net/atl1e/atl1e_main.c +++ b/drivers/net/atl1e/atl1e_main.c @@ -1839,7 +1839,8 @@ static void atl1e_tx_queue(struct atl1e_adapter *adapter, u16 count, AT_WRITE_REG(&adapter->hw, REG_MB_TPD_PROD_IDX, tx_ring->next_to_use); } -static int atl1e_xmit_frame(struct sk_buff *skb, struct net_device *netdev) +static netdev_tx_t atl1e_xmit_frame(struct sk_buff *skb, + struct net_device *netdev) { struct atl1e_adapter *adapter = netdev_priv(netdev); unsigned long flags; diff --git a/drivers/net/atlx/atl1.c b/drivers/net/atlx/atl1.c index 8bca12f71390..00569dc1313c 100644 --- a/drivers/net/atlx/atl1.c +++ b/drivers/net/atlx/atl1.c @@ -2349,7 +2349,8 @@ static void atl1_tx_queue(struct atl1_adapter *adapter, u16 count, atomic_set(&tpd_ring->next_to_use, next_to_use); } -static int atl1_xmit_frame(struct sk_buff *skb, struct net_device *netdev) +static netdev_tx_t atl1_xmit_frame(struct sk_buff *skb, + struct net_device *netdev) { struct atl1_adapter *adapter = netdev_priv(netdev); struct atl1_tpd_ring *tpd_ring = &adapter->tpd_ring; diff --git a/drivers/net/atlx/atl2.c b/drivers/net/atlx/atl2.c index 204db961029e..d0bcb572d51e 100644 --- a/drivers/net/atlx/atl2.c +++ b/drivers/net/atlx/atl2.c @@ -821,7 +821,8 @@ static inline int TxdFreeBytes(struct atl2_adapter *adapter) (int) (txd_read_ptr - adapter->txd_write_ptr - 1); } -static int atl2_xmit_frame(struct sk_buff *skb, struct net_device *netdev) +static netdev_tx_t atl2_xmit_frame(struct sk_buff *skb, + struct net_device *netdev) { struct atl2_adapter *adapter = netdev_priv(netdev); struct tx_pkt_header *txph; diff --git a/drivers/net/atp.c b/drivers/net/atp.c index 4beacc9c909f..9043294fe617 100644 --- a/drivers/net/atp.c +++ b/drivers/net/atp.c @@ -199,7 +199,8 @@ static int net_open(struct net_device *dev); static void hardware_init(struct net_device *dev); static void write_packet(long ioaddr, int length, unsigned char *packet, int pad, int mode); static void trigger_send(long ioaddr, int length); -static int atp_send_packet(struct sk_buff *skb, struct net_device *dev); +static netdev_tx_t atp_send_packet(struct sk_buff *skb, + struct net_device *dev); static irqreturn_t atp_interrupt(int irq, void *dev_id); static void net_rx(struct net_device *dev); static void read_block(long ioaddr, int length, unsigned char *buffer, int data_mode); @@ -552,7 +553,8 @@ static void tx_timeout(struct net_device *dev) dev->stats.tx_errors++; } -static int atp_send_packet(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t atp_send_packet(struct sk_buff *skb, + struct net_device *dev) { struct net_local *lp = netdev_priv(dev); long ioaddr = dev->base_addr; diff --git a/drivers/net/au1000_eth.c b/drivers/net/au1000_eth.c index 2aab1ebc6cd1..407fd45f56a1 100644 --- a/drivers/net/au1000_eth.c +++ b/drivers/net/au1000_eth.c @@ -937,7 +937,7 @@ static int au1000_close(struct net_device *dev) /* * Au1000 transmit routine. */ -static int au1000_tx(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t au1000_tx(struct sk_buff *skb, struct net_device *dev) { struct au1000_private *aup = netdev_priv(dev); struct net_device_stats *ps = &dev->stats; diff --git a/drivers/net/b44.c b/drivers/net/b44.c index 07c92f34d8d8..bee510177a3f 100644 --- a/drivers/net/b44.c +++ b/drivers/net/b44.c @@ -946,7 +946,7 @@ static void b44_tx_timeout(struct net_device *dev) netif_wake_queue(dev); } -static int b44_start_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t b44_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct b44 *bp = netdev_priv(dev); int rc = NETDEV_TX_OK; diff --git a/drivers/net/benet/be_main.c b/drivers/net/benet/be_main.c index 7b9efee890f8..e19fe1dcd144 100644 --- a/drivers/net/benet/be_main.c +++ b/drivers/net/benet/be_main.c @@ -427,7 +427,8 @@ static int make_tx_wrbs(struct be_adapter *adapter, return copied; } -static int be_xmit(struct sk_buff *skb, struct net_device *netdev) +static netdev_tx_t be_xmit(struct sk_buff *skb, + struct net_device *netdev) { struct be_adapter *adapter = netdev_priv(netdev); struct be_tx_obj *tx_obj = &adapter->tx_obj; diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c index c4e85f694272..fcaf3bc8277e 100644 --- a/drivers/net/bnx2.c +++ b/drivers/net/bnx2.c @@ -6283,7 +6283,7 @@ bnx2_vlan_rx_register(struct net_device *dev, struct vlan_group *vlgrp) * bnx2_tx_int() runs without netif_tx_lock unless it needs to call * netif_wake_queue(). */ -static int +static netdev_tx_t bnx2_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct bnx2 *bp = netdev_priv(dev); diff --git a/drivers/net/bnx2x_main.c b/drivers/net/bnx2x_main.c index b318d16a4d91..e2e50267cc64 100644 --- a/drivers/net/bnx2x_main.c +++ b/drivers/net/bnx2x_main.c @@ -10936,7 +10936,7 @@ exit_lbl: * bnx2x_tx_int() runs without netif_tx_lock unless it needs to call * netif_wake_queue() */ -static int bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct bnx2x *bp = netdev_priv(dev); struct bnx2x_fastpath *fp, *fp_stat; diff --git a/drivers/net/can/sja1000/sja1000.c b/drivers/net/can/sja1000/sja1000.c index b3004de1e5e4..9ce3ddac4e9b 100644 --- a/drivers/net/can/sja1000/sja1000.c +++ b/drivers/net/can/sja1000/sja1000.c @@ -238,7 +238,8 @@ static void chipset_init(struct net_device *dev) * xx xx xx xx ff ll 00 11 22 33 44 55 66 77 * [ can-id ] [flags] [len] [can data (up to 8 bytes] */ -static int sja1000_start_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t sja1000_start_xmit(struct sk_buff *skb, + struct net_device *dev) { struct sja1000_priv *priv = netdev_priv(dev); struct net_device_stats *stats = &dev->stats; diff --git a/drivers/net/cassini.c b/drivers/net/cassini.c index 299a33b914f8..7517dc1da650 100644 --- a/drivers/net/cassini.c +++ b/drivers/net/cassini.c @@ -2918,7 +2918,7 @@ static inline int cas_xmit_tx_ringN(struct cas *cp, int ring, return 0; } -static int cas_start_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t cas_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct cas *cp = netdev_priv(dev); diff --git a/drivers/net/chelsio/sge.c b/drivers/net/chelsio/sge.c index 3711d64e45ef..8c658cf6f62f 100644 --- a/drivers/net/chelsio/sge.c +++ b/drivers/net/chelsio/sge.c @@ -1776,7 +1776,7 @@ static inline int eth_hdr_len(const void *data) /* * Adds the CPL header to the sk_buff and passes it to t1_sge_tx. */ -int t1_start_xmit(struct sk_buff *skb, struct net_device *dev) +netdev_tx_t t1_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct adapter *adapter = dev->ml_priv; struct sge *sge = adapter->sge; diff --git a/drivers/net/chelsio/sge.h b/drivers/net/chelsio/sge.h index 8c9405123992..00cc37fc1f6f 100644 --- a/drivers/net/chelsio/sge.h +++ b/drivers/net/chelsio/sge.h @@ -78,7 +78,7 @@ void t1_sge_destroy(struct sge *); irqreturn_t t1_interrupt(int irq, void *cookie); int t1_poll(struct napi_struct *, int); -int t1_start_xmit(struct sk_buff *skb, struct net_device *dev); +netdev_tx_t t1_start_xmit(struct sk_buff *skb, struct net_device *dev); void t1_set_vlan_accel(struct adapter *adapter, int on_off); void t1_sge_start(struct sge *); void t1_sge_stop(struct sge *); diff --git a/drivers/net/cs89x0.c b/drivers/net/cs89x0.c index 839cac5fd8a5..0c54219960e2 100644 --- a/drivers/net/cs89x0.c +++ b/drivers/net/cs89x0.c @@ -246,7 +246,7 @@ struct net_local { static int cs89x0_probe1(struct net_device *dev, int ioaddr, int modular); static int net_open(struct net_device *dev); -static int net_send_packet(struct sk_buff *skb, struct net_device *dev); +static netdev_tx_t net_send_packet(struct sk_buff *skb, struct net_device *dev); static irqreturn_t net_interrupt(int irq, void *dev_id); static void set_multicast_list(struct net_device *dev); static void net_timeout(struct net_device *dev); @@ -1518,7 +1518,7 @@ static void net_timeout(struct net_device *dev) netif_wake_queue(dev); } -static int net_send_packet(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t net_send_packet(struct sk_buff *skb,struct net_device *dev) { struct net_local *lp = netdev_priv(dev); unsigned long flags; diff --git a/drivers/net/cxgb3/adapter.h b/drivers/net/cxgb3/adapter.h index 74723f2e7431..2b1aea6aa558 100644 --- a/drivers/net/cxgb3/adapter.h +++ b/drivers/net/cxgb3/adapter.h @@ -309,7 +309,7 @@ void t3_stop_sge_timers(struct adapter *adap); void t3_free_sge_resources(struct adapter *adap); void t3_sge_err_intr_handler(struct adapter *adapter); irq_handler_t t3_intr_handler(struct adapter *adap, int polling); -int t3_eth_xmit(struct sk_buff *skb, struct net_device *dev); +netdev_tx_t t3_eth_xmit(struct sk_buff *skb, struct net_device *dev); int t3_mgmt_tx(struct adapter *adap, struct sk_buff *skb); void t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p); int t3_sge_alloc_qset(struct adapter *adapter, unsigned int id, int nports, diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c index 29c79eb43beb..f86612857a73 100644 --- a/drivers/net/cxgb3/sge.c +++ b/drivers/net/cxgb3/sge.c @@ -1216,7 +1216,7 @@ static inline void t3_stop_tx_queue(struct netdev_queue *txq, * * Add a packet to an SGE Tx queue. Runs with softirqs disabled. */ -int t3_eth_xmit(struct sk_buff *skb, struct net_device *dev) +netdev_tx_t t3_eth_xmit(struct sk_buff *skb, struct net_device *dev) { int qidx; unsigned int ndesc, pidx, credits, gen, compl; diff --git a/drivers/net/defxx.c b/drivers/net/defxx.c index b2e0a8fc21d7..6a6ea038d7a3 100644 --- a/drivers/net/defxx.c +++ b/drivers/net/defxx.c @@ -300,7 +300,8 @@ static int dfx_rcv_init(DFX_board_t *bp, int get_buffers); static void dfx_rcv_queue_process(DFX_board_t *bp); static void dfx_rcv_flush(DFX_board_t *bp); -static int dfx_xmt_queue_pkt(struct sk_buff *skb, struct net_device *dev); +static netdev_tx_t dfx_xmt_queue_pkt(struct sk_buff *skb, + struct net_device *dev); static int dfx_xmt_done(DFX_board_t *bp); static void dfx_xmt_flush(DFX_board_t *bp); @@ -3188,11 +3189,8 @@ static void dfx_rcv_queue_process( * None */ -static int dfx_xmt_queue_pkt( - struct sk_buff *skb, - struct net_device *dev - ) - +static netdev_tx_t dfx_xmt_queue_pkt(struct sk_buff *skb, + struct net_device *dev) { DFX_board_t *bp = netdev_priv(dev); u8 prod; /* local transmit producer index */ diff --git a/drivers/net/depca.c b/drivers/net/depca.c index adb997c5bb5d..9686c1fa28f1 100644 --- a/drivers/net/depca.c +++ b/drivers/net/depca.c @@ -516,7 +516,8 @@ struct depca_private { ** Public Functions */ static int depca_open(struct net_device *dev); -static int depca_start_xmit(struct sk_buff *skb, struct net_device *dev); +static netdev_tx_t depca_start_xmit(struct sk_buff *skb, + struct net_device *dev); static irqreturn_t depca_interrupt(int irq, void *dev_id); static int depca_close(struct net_device *dev); static int depca_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); @@ -928,7 +929,8 @@ static void depca_tx_timeout(struct net_device *dev) /* ** Writes a socket buffer to TX descriptor ring and starts transmission */ -static int depca_start_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t depca_start_xmit(struct sk_buff *skb, + struct net_device *dev) { struct depca_private *lp = netdev_priv(dev); u_long ioaddr = dev->base_addr; diff --git a/drivers/net/dl2k.c b/drivers/net/dl2k.c index 4b6a219fecea..7fa7a907f134 100644 --- a/drivers/net/dl2k.c +++ b/drivers/net/dl2k.c @@ -59,7 +59,7 @@ static int rio_open (struct net_device *dev); static void rio_timer (unsigned long data); static void rio_tx_timeout (struct net_device *dev); static void alloc_list (struct net_device *dev); -static int start_xmit (struct sk_buff *skb, struct net_device *dev); +static netdev_tx_t start_xmit (struct sk_buff *skb, struct net_device *dev); static irqreturn_t rio_interrupt (int irq, void *dev_instance); static void rio_free_tx (struct net_device *dev, int irq); static void tx_error (struct net_device *dev, int tx_status); @@ -600,7 +600,7 @@ alloc_list (struct net_device *dev) return; } -static int +static netdev_tx_t start_xmit (struct sk_buff *skb, struct net_device *dev) { struct netdev_private *np = netdev_priv(dev); diff --git a/drivers/net/dnet.c b/drivers/net/dnet.c index 2818d5de3940..234685213f1a 100644 --- a/drivers/net/dnet.c +++ b/drivers/net/dnet.c @@ -541,7 +541,7 @@ static inline void dnet_print_skb(struct sk_buff *skb) #define dnet_print_skb(skb) do {} while (0) #endif -static int dnet_start_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t dnet_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct dnet *bp = netdev_priv(dev); diff --git a/drivers/net/eepro.c b/drivers/net/eepro.c index 53317a83857a..1e934160062c 100644 --- a/drivers/net/eepro.c +++ b/drivers/net/eepro.c @@ -309,7 +309,8 @@ struct eepro_local { static int eepro_probe1(struct net_device *dev, int autoprobe); static int eepro_open(struct net_device *dev); -static int eepro_send_packet(struct sk_buff *skb, struct net_device *dev); +static netdev_tx_t eepro_send_packet(struct sk_buff *skb, + struct net_device *dev); static irqreturn_t eepro_interrupt(int irq, void *dev_id); static void eepro_rx(struct net_device *dev); static void eepro_transmit_interrupt(struct net_device *dev); @@ -1133,7 +1134,8 @@ static void eepro_tx_timeout (struct net_device *dev) } -static int eepro_send_packet(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t eepro_send_packet(struct sk_buff *skb, + struct net_device *dev) { struct eepro_local *lp = netdev_priv(dev); unsigned long flags; diff --git a/drivers/net/eexpress.c b/drivers/net/eexpress.c index d1b6368faacd..592de8f1668a 100644 --- a/drivers/net/eexpress.c +++ b/drivers/net/eexpress.c @@ -246,7 +246,8 @@ static char mca_irqmap[] = { 12, 9, 3, 4, 5, 10, 11, 15 }; static int eexp_open(struct net_device *dev); static int eexp_close(struct net_device *dev); static void eexp_timeout(struct net_device *dev); -static int eexp_xmit(struct sk_buff *buf, struct net_device *dev); +static netdev_tx_t eexp_xmit(struct sk_buff *buf, + struct net_device *dev); static irqreturn_t eexp_irq(int irq, void *dev_addr); static void eexp_set_multicast(struct net_device *dev); @@ -650,7 +651,7 @@ static void eexp_timeout(struct net_device *dev) * Called to transmit a packet, or to allow us to right ourselves * if the kernel thinks we've died. */ -static int eexp_xmit(struct sk_buff *buf, struct net_device *dev) +static netdev_tx_t eexp_xmit(struct sk_buff *buf, struct net_device *dev) { short length = buf->len; #ifdef CONFIG_SMP diff --git a/drivers/net/enc28j60.c b/drivers/net/enc28j60.c index 372d6c6a4e7f..117fc6c12e34 100644 --- a/drivers/net/enc28j60.c +++ b/drivers/net/enc28j60.c @@ -1276,7 +1276,8 @@ static void enc28j60_hw_tx(struct enc28j60_net *priv) locked_reg_bfset(priv, ECON1, ECON1_TXRTS); } -static int enc28j60_send_packet(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t enc28j60_send_packet(struct sk_buff *skb, + struct net_device *dev) { struct enc28j60_net *priv = netdev_priv(dev); diff --git a/drivers/net/enic/enic_main.c b/drivers/net/enic/enic_main.c index 8005b602f776..49912eb2a338 100644 --- a/drivers/net/enic/enic_main.c +++ b/drivers/net/enic/enic_main.c @@ -622,7 +622,8 @@ static inline void enic_queue_wq_skb(struct enic *enic, } /* netif_tx_lock held, process context with BHs disabled, or BH */ -static int enic_hard_start_xmit(struct sk_buff *skb, struct net_device *netdev) +static netdev_tx_t enic_hard_start_xmit(struct sk_buff *skb, + struct net_device *netdev) { struct enic *enic = netdev_priv(netdev); struct vnic_wq *wq = &enic->wq[0]; diff --git a/drivers/net/epic100.c b/drivers/net/epic100.c index d668ff2af6e3..641a10d2e843 100644 --- a/drivers/net/epic100.c +++ b/drivers/net/epic100.c @@ -298,7 +298,8 @@ static void epic_restart(struct net_device *dev); static void epic_timer(unsigned long data); static void epic_tx_timeout(struct net_device *dev); static void epic_init_ring(struct net_device *dev); -static int epic_start_xmit(struct sk_buff *skb, struct net_device *dev); +static netdev_tx_t epic_start_xmit(struct sk_buff *skb, + struct net_device *dev); static int epic_rx(struct net_device *dev, int budget); static int epic_poll(struct napi_struct *napi, int budget); static irqreturn_t epic_interrupt(int irq, void *dev_instance); @@ -961,7 +962,7 @@ static void epic_init_ring(struct net_device *dev) return; } -static int epic_start_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t epic_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct epic_private *ep = netdev_priv(dev); int entry, free_count; diff --git a/drivers/net/eth16i.c b/drivers/net/eth16i.c index 97d5205edc8f..71bfeec33a0b 100644 --- a/drivers/net/eth16i.c +++ b/drivers/net/eth16i.c @@ -405,7 +405,7 @@ static int eth16i_read_eeprom_word(int ioaddr); static void eth16i_eeprom_cmd(int ioaddr, unsigned char command); static int eth16i_open(struct net_device *dev); static int eth16i_close(struct net_device *dev); -static int eth16i_tx(struct sk_buff *skb, struct net_device *dev); +static netdev_tx_t eth16i_tx(struct sk_buff *skb, struct net_device *dev); static void eth16i_rx(struct net_device *dev); static void eth16i_timeout(struct net_device *dev); static irqreturn_t eth16i_interrupt(int irq, void *dev_id); @@ -1053,7 +1053,7 @@ static void eth16i_timeout(struct net_device *dev) netif_wake_queue(dev); } -static int eth16i_tx(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t eth16i_tx(struct sk_buff *skb, struct net_device *dev) { struct eth16i_local *lp = netdev_priv(dev); int ioaddr = dev->base_addr; diff --git a/drivers/net/ethoc.c b/drivers/net/ethoc.c index 4dbe5f173273..b871aefed9c6 100644 --- a/drivers/net/ethoc.c +++ b/drivers/net/ethoc.c @@ -802,7 +802,7 @@ static struct net_device_stats *ethoc_stats(struct net_device *dev) return &priv->stats; } -static int ethoc_start_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t ethoc_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct ethoc *priv = netdev_priv(dev); struct ethoc_bd bd; diff --git a/drivers/net/ewrk3.c b/drivers/net/ewrk3.c index 9c51bc813ad3..b2a5ec8f3721 100644 --- a/drivers/net/ewrk3.c +++ b/drivers/net/ewrk3.c @@ -298,7 +298,7 @@ struct ewrk3_private { ** Public Functions */ static int ewrk3_open(struct net_device *dev); -static int ewrk3_queue_pkt(struct sk_buff *skb, struct net_device *dev); +static netdev_tx_t ewrk3_queue_pkt(struct sk_buff *skb, struct net_device *dev); static irqreturn_t ewrk3_interrupt(int irq, void *dev_id); static int ewrk3_close(struct net_device *dev); static void set_multicast_list(struct net_device *dev); @@ -764,7 +764,7 @@ static void ewrk3_timeout(struct net_device *dev) /* ** Writes a socket buffer to the free page queue */ -static int ewrk3_queue_pkt (struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t ewrk3_queue_pkt(struct sk_buff *skb, struct net_device *dev) { struct ewrk3_private *lp = netdev_priv(dev); u_long iobase = dev->base_addr; diff --git a/drivers/net/fealnx.c b/drivers/net/fealnx.c index f66da84a9398..18d5fbb9673e 100644 --- a/drivers/net/fealnx.c +++ b/drivers/net/fealnx.c @@ -433,7 +433,7 @@ static void netdev_timer(unsigned long data); static void reset_timer(unsigned long data); static void fealnx_tx_timeout(struct net_device *dev); static void init_ring(struct net_device *dev); -static int start_tx(struct sk_buff *skb, struct net_device *dev); +static netdev_tx_t start_tx(struct sk_buff *skb, struct net_device *dev); static irqreturn_t intr_handler(int irq, void *dev_instance); static int netdev_rx(struct net_device *dev); static void set_rx_mode(struct net_device *dev); @@ -1305,7 +1305,7 @@ static void init_ring(struct net_device *dev) } -static int start_tx(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t start_tx(struct sk_buff *skb, struct net_device *dev) { struct netdev_private *np = netdev_priv(dev); unsigned long flags; diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c index 3b4e0766c7b2..0a1c2bb27d4d 100644 --- a/drivers/net/forcedeth.c +++ b/drivers/net/forcedeth.c @@ -2137,7 +2137,7 @@ static void nv_gear_backoff_reseed(struct net_device *dev) * nv_start_xmit: dev->hard_start_xmit function * Called with netif_tx_lock held. */ -static int nv_start_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t nv_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct fe_priv *np = netdev_priv(dev); u32 tx_flags = 0; @@ -2257,7 +2257,8 @@ static int nv_start_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_OK; } -static int nv_start_xmit_optimized(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t nv_start_xmit_optimized(struct sk_buff *skb, + struct net_device *dev) { struct fe_priv *np = netdev_priv(dev); u32 tx_flags = 0; diff --git a/drivers/net/hamachi.c b/drivers/net/hamachi.c index 635341d6a028..1d5064a09aca 100644 --- a/drivers/net/hamachi.c +++ b/drivers/net/hamachi.c @@ -557,7 +557,8 @@ static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); static void hamachi_timer(unsigned long data); static void hamachi_tx_timeout(struct net_device *dev); static void hamachi_init_ring(struct net_device *dev); -static int hamachi_start_xmit(struct sk_buff *skb, struct net_device *dev); +static netdev_tx_t hamachi_start_xmit(struct sk_buff *skb, + struct net_device *dev); static irqreturn_t hamachi_interrupt(int irq, void *dev_instance); static int hamachi_rx(struct net_device *dev); static inline int hamachi_tx(struct net_device *dev); @@ -1263,7 +1264,8 @@ do { \ } while (0) #endif -static int hamachi_start_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t hamachi_start_xmit(struct sk_buff *skb, + struct net_device *dev) { struct hamachi_private *hmp = netdev_priv(dev); unsigned entry; diff --git a/drivers/net/hp100.c b/drivers/net/hp100.c index d1b63387e9bc..a9a1a99f02dd 100644 --- a/drivers/net/hp100.c +++ b/drivers/net/hp100.c @@ -240,9 +240,10 @@ static int hp100_probe1(struct net_device *dev, int ioaddr, u_char bus, static int hp100_open(struct net_device *dev); static int hp100_close(struct net_device *dev); -static int hp100_start_xmit(struct sk_buff *skb, struct net_device *dev); -static int hp100_start_xmit_bm(struct sk_buff *skb, - struct net_device *dev); +static netdev_tx_t hp100_start_xmit(struct sk_buff *skb, + struct net_device *dev); +static netdev_tx_t hp100_start_xmit_bm(struct sk_buff *skb, + struct net_device *dev); static void hp100_rx(struct net_device *dev); static struct net_device_stats *hp100_get_stats(struct net_device *dev); static void hp100_misc_interrupt(struct net_device *dev); @@ -1483,7 +1484,8 @@ static int hp100_check_lan(struct net_device *dev) */ /* tx function for busmaster mode */ -static int hp100_start_xmit_bm(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t hp100_start_xmit_bm(struct sk_buff *skb, + struct net_device *dev) { unsigned long flags; int i, ok_flag; @@ -1635,7 +1637,8 @@ static void hp100_clean_txring(struct net_device *dev) } /* tx function for slave modes */ -static int hp100_start_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t hp100_start_xmit(struct sk_buff *skb, + struct net_device *dev) { unsigned long flags; int i, ok_flag; diff --git a/drivers/net/ibmlana.c b/drivers/net/ibmlana.c index 448098d3b39b..090a6d3af112 100644 --- a/drivers/net/ibmlana.c +++ b/drivers/net/ibmlana.c @@ -812,7 +812,7 @@ static int ibmlana_close(struct net_device *dev) /* transmit a block. */ -static int ibmlana_tx(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t ibmlana_tx(struct sk_buff *skb, struct net_device *dev) { ibmlana_priv *priv = netdev_priv(dev); int tmplen, addr; diff --git a/drivers/net/ibmveth.c b/drivers/net/ibmveth.c index 76b295a18185..5862282ab2fe 100644 --- a/drivers/net/ibmveth.c +++ b/drivers/net/ibmveth.c @@ -887,7 +887,8 @@ static int ibmveth_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) #define page_offset(v) ((unsigned long)(v) & ((1 << 12) - 1)) -static int ibmveth_start_xmit(struct sk_buff *skb, struct net_device *netdev) +static netdev_tx_t ibmveth_start_xmit(struct sk_buff *skb, + struct net_device *netdev) { struct ibmveth_adapter *adapter = netdev_priv(netdev); union ibmveth_buf_desc desc; diff --git a/drivers/net/ipg.c b/drivers/net/ipg.c index 43019461b776..382c5532e6c5 100644 --- a/drivers/net/ipg.c +++ b/drivers/net/ipg.c @@ -1858,7 +1858,8 @@ static int ipg_nic_stop(struct net_device *dev) return 0; } -static int ipg_nic_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t ipg_nic_hard_start_xmit(struct sk_buff *skb, + struct net_device *dev) { struct ipg_nic_private *sp = netdev_priv(dev); void __iomem *ioaddr = sp->ioaddr; diff --git a/drivers/net/jme.c b/drivers/net/jme.c index e7068c7cd627..1d2a32544ed2 100644 --- a/drivers/net/jme.c +++ b/drivers/net/jme.c @@ -1931,7 +1931,7 @@ jme_stop_queue_if_full(struct jme_adapter *jme) * This function is already protected by netif_tx_lock() */ -static int +static netdev_tx_t jme_start_xmit(struct sk_buff *skb, struct net_device *netdev) { struct jme_adapter *jme = netdev_priv(netdev); diff --git a/drivers/net/ks8842.c b/drivers/net/ks8842.c index 39b0aea2aab3..6e74aa9eea44 100644 --- a/drivers/net/ks8842.c +++ b/drivers/net/ks8842.c @@ -551,7 +551,8 @@ static int ks8842_close(struct net_device *netdev) return 0; } -static int ks8842_xmit_frame(struct sk_buff *skb, struct net_device *netdev) +static netdev_tx_t ks8842_xmit_frame(struct sk_buff *skb, + struct net_device *netdev) { int ret; struct ks8842_adapter *adapter = netdev_priv(netdev); diff --git a/drivers/net/ks8851.c b/drivers/net/ks8851.c index 9a1dea60c1c4..547ac7c7479c 100644 --- a/drivers/net/ks8851.c +++ b/drivers/net/ks8851.c @@ -868,11 +868,12 @@ static int ks8851_net_stop(struct net_device *dev) * and secondly so we can round up more than one packet to transmit which * means we can try and avoid generating too many transmit done interrupts. */ -static int ks8851_start_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t ks8851_start_xmit(struct sk_buff *skb, + struct net_device *dev) { struct ks8851_net *ks = netdev_priv(dev); unsigned needed = calc_txlen(skb->len); - int ret = NETDEV_TX_OK; + netdev_tx_t ret = NETDEV_TX_OK; if (netif_msg_tx_queued(ks)) ks_dbg(ks, "%s: skb %p, %d@%p\n", __func__, diff --git a/drivers/net/lance.c b/drivers/net/lance.c index 30fd4f5f1d5a..dcda30338b65 100644 --- a/drivers/net/lance.c +++ b/drivers/net/lance.c @@ -300,7 +300,8 @@ static unsigned char lance_need_isa_bounce_buffers = 1; static int lance_open(struct net_device *dev); static void lance_init_ring(struct net_device *dev, gfp_t mode); -static int lance_start_xmit(struct sk_buff *skb, struct net_device *dev); +static netdev_tx_t lance_start_xmit(struct sk_buff *skb, + struct net_device *dev); static int lance_rx(struct net_device *dev); static irqreturn_t lance_interrupt(int irq, void *dev_id); static int lance_close(struct net_device *dev); @@ -949,7 +950,8 @@ static void lance_tx_timeout (struct net_device *dev) } -static int lance_start_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t lance_start_xmit(struct sk_buff *skb, + struct net_device *dev) { struct lance_private *lp = dev->ml_priv; int ioaddr = dev->base_addr; diff --git a/drivers/net/lib8390.c b/drivers/net/lib8390.c index d6be36000c5c..256119882b1e 100644 --- a/drivers/net/lib8390.c +++ b/drivers/net/lib8390.c @@ -299,7 +299,8 @@ static void __ei_tx_timeout(struct net_device *dev) * Sends a packet to an 8390 network device. */ -static int __ei_start_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t __ei_start_xmit(struct sk_buff *skb, + struct net_device *dev) { unsigned long e8390_base = dev->base_addr; struct ei_device *ei_local = (struct ei_device *) netdev_priv(dev); diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c index 51bbce72bede..1bc654a73c47 100644 --- a/drivers/net/loopback.c +++ b/drivers/net/loopback.c @@ -69,7 +69,8 @@ struct pcpu_lstats { * The higher levels take care of making this non-reentrant (it's * called with bh's disabled). */ -static int loopback_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t loopback_xmit(struct sk_buff *skb, + struct net_device *dev) { struct pcpu_lstats *pcpu_lstats, *lb_stats; int len; diff --git a/drivers/net/lp486e.c b/drivers/net/lp486e.c index c292bad411ee..cc3ed9cf28be 100644 --- a/drivers/net/lp486e.c +++ b/drivers/net/lp486e.c @@ -377,7 +377,7 @@ static char init_setup[14] = { }; static int i596_open(struct net_device *dev); -static int i596_start_xmit(struct sk_buff *skb, struct net_device *dev); +static netdev_tx_t i596_start_xmit(struct sk_buff *skb, struct net_device *dev); static irqreturn_t i596_interrupt(int irq, void *dev_id); static int i596_close(struct net_device *dev); static void i596_add_cmd(struct net_device *dev, struct i596_cmd *cmd); @@ -863,7 +863,7 @@ static int i596_open(struct net_device *dev) return 0; /* Always succeed */ } -static int i596_start_xmit (struct sk_buff *skb, struct net_device *dev) { +static netdev_tx_t i596_start_xmit (struct sk_buff *skb, struct net_device *dev) { struct tx_cmd *tx_cmd; short length; diff --git a/drivers/net/mlx4/en_tx.c b/drivers/net/mlx4/en_tx.c index 0ecc1e1b013e..d3d6e991065b 100644 --- a/drivers/net/mlx4/en_tx.c +++ b/drivers/net/mlx4/en_tx.c @@ -588,7 +588,7 @@ u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb) return skb_tx_hash(dev, skb); } -int mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) +netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; diff --git a/drivers/net/mlx4/mlx4_en.h b/drivers/net/mlx4/mlx4_en.h index 4513fb4960dc..4376147b0ea0 100644 --- a/drivers/net/mlx4/mlx4_en.h +++ b/drivers/net/mlx4/mlx4_en.h @@ -518,7 +518,7 @@ int mlx4_en_arm_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq); void mlx4_en_poll_tx_cq(unsigned long data); void mlx4_en_tx_irq(struct mlx4_cq *mcq); u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb); -int mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev); +netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev); int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring *ring, u32 size, u16 stride); diff --git a/drivers/net/myri10ge/myri10ge.c b/drivers/net/myri10ge/myri10ge.c index 75deef35b1e0..6930c87f362e 100644 --- a/drivers/net/myri10ge/myri10ge.c +++ b/drivers/net/myri10ge/myri10ge.c @@ -360,7 +360,8 @@ MODULE_PARM_DESC(myri10ge_dca, "Enable DCA if possible"); #define myri10ge_pio_copy(to,from,size) __iowrite64_copy(to,from,size/8) static void myri10ge_set_multicast_list(struct net_device *dev); -static int myri10ge_sw_tso(struct sk_buff *skb, struct net_device *dev); +static netdev_tx_t myri10ge_sw_tso(struct sk_buff *skb, + struct net_device *dev); static inline void put_be32(__be32 val, __be32 __iomem * p) { @@ -2656,7 +2657,8 @@ myri10ge_submit_req(struct myri10ge_tx_buf *tx, struct mcp_kreq_ether_send *src, * it and try again. */ -static int myri10ge_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t myri10ge_xmit(struct sk_buff *skb, + struct net_device *dev) { struct myri10ge_priv *mgp = netdev_priv(dev); struct myri10ge_slice_state *ss; @@ -2947,12 +2949,13 @@ drop: } -static int myri10ge_sw_tso(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t myri10ge_sw_tso(struct sk_buff *skb, + struct net_device *dev) { struct sk_buff *segs, *curr; struct myri10ge_priv *mgp = netdev_priv(dev); struct myri10ge_slice_state *ss; - int status; + netdev_tx_t status; segs = skb_gso_segment(skb, dev->features & ~NETIF_F_TSO6); if (IS_ERR(segs)) diff --git a/drivers/net/natsemi.c b/drivers/net/natsemi.c index 32db12a27342..bd41351e4e26 100644 --- a/drivers/net/natsemi.c +++ b/drivers/net/natsemi.c @@ -621,7 +621,7 @@ static void drain_ring(struct net_device *dev); static void free_ring(struct net_device *dev); static void reinit_ring(struct net_device *dev); static void init_registers(struct net_device *dev); -static int start_tx(struct sk_buff *skb, struct net_device *dev); +static netdev_tx_t start_tx(struct sk_buff *skb, struct net_device *dev); static irqreturn_t intr_handler(int irq, void *dev_instance); static void netdev_error(struct net_device *dev, int intr_status); static int natsemi_poll(struct napi_struct *napi, int budget); @@ -2079,7 +2079,7 @@ static void reinit_ring(struct net_device *dev) reinit_rx(dev); } -static int start_tx(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t start_tx(struct sk_buff *skb, struct net_device *dev) { struct netdev_private *np = netdev_priv(dev); void __iomem * ioaddr = ns_ioaddr(dev); diff --git a/drivers/net/netxen/netxen_nic_main.c b/drivers/net/netxen/netxen_nic_main.c index fab51d16f5fb..f824a392bf56 100644 --- a/drivers/net/netxen/netxen_nic_main.c +++ b/drivers/net/netxen/netxen_nic_main.c @@ -63,7 +63,8 @@ static int __devinit netxen_nic_probe(struct pci_dev *pdev, static void __devexit netxen_nic_remove(struct pci_dev *pdev); static int netxen_nic_open(struct net_device *netdev); static int netxen_nic_close(struct net_device *netdev); -static int netxen_nic_xmit_frame(struct sk_buff *, struct net_device *); +static netdev_tx_t netxen_nic_xmit_frame(struct sk_buff *, + struct net_device *); static void netxen_tx_timeout(struct net_device *netdev); static void netxen_reset_task(struct work_struct *work); static void netxen_watchdog(unsigned long); @@ -1599,7 +1600,7 @@ netxen_clear_cmddesc(u64 *desc) desc[2] = 0ULL; } -static int +static netdev_tx_t netxen_nic_xmit_frame(struct sk_buff *skb, struct net_device *netdev) { struct netxen_adapter *adapter = netdev_priv(netdev); diff --git a/drivers/net/ni52.c b/drivers/net/ni52.c index a0ac5d4f27d3..bd0ac690d12c 100644 --- a/drivers/net/ni52.c +++ b/drivers/net/ni52.c @@ -170,7 +170,7 @@ static int ni52_probe1(struct net_device *dev, int ioaddr); static irqreturn_t ni52_interrupt(int irq, void *dev_id); static int ni52_open(struct net_device *dev); static int ni52_close(struct net_device *dev); -static int ni52_send_packet(struct sk_buff *, struct net_device *); +static netdev_tx_t ni52_send_packet(struct sk_buff *, struct net_device *); static struct net_device_stats *ni52_get_stats(struct net_device *dev); static void set_multicast_list(struct net_device *dev); static void ni52_timeout(struct net_device *dev); @@ -1173,7 +1173,8 @@ static void ni52_timeout(struct net_device *dev) * send frame */ -static int ni52_send_packet(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t ni52_send_packet(struct sk_buff *skb, + struct net_device *dev) { int len, i; #ifndef NO_NOPCOMMANDS diff --git a/drivers/net/ni65.c b/drivers/net/ni65.c index 81a061785898..752c2e4d9cf4 100644 --- a/drivers/net/ni65.c +++ b/drivers/net/ni65.c @@ -252,7 +252,8 @@ static void ni65_xmit_intr(struct net_device *dev,int); static int ni65_open(struct net_device *dev); static int ni65_lance_reinit(struct net_device *dev); static void ni65_init_lance(struct priv *p,unsigned char*,int,int); -static int ni65_send_packet(struct sk_buff *skb, struct net_device *dev); +static netdev_tx_t ni65_send_packet(struct sk_buff *skb, + struct net_device *dev); static void ni65_timeout(struct net_device *dev); static int ni65_close(struct net_device *dev); static int ni65_alloc_buffer(struct net_device *dev); @@ -1157,7 +1158,8 @@ static void ni65_timeout(struct net_device *dev) * Send a packet */ -static int ni65_send_packet(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t ni65_send_packet(struct sk_buff *skb, + struct net_device *dev) { struct priv *p = dev->ml_priv; diff --git a/drivers/net/niu.c b/drivers/net/niu.c index 3ada7ea2abca..119fd4e04141 100644 --- a/drivers/net/niu.c +++ b/drivers/net/niu.c @@ -6657,7 +6657,8 @@ static u64 niu_compute_tx_flags(struct sk_buff *skb, struct ethhdr *ehdr, return ret; } -static int niu_start_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t niu_start_xmit(struct sk_buff *skb, + struct net_device *dev) { struct niu *np = netdev_priv(dev); unsigned long align, headroom; diff --git a/drivers/net/ns83820.c b/drivers/net/ns83820.c index 1576ac07216e..c594e1946476 100644 --- a/drivers/net/ns83820.c +++ b/drivers/net/ns83820.c @@ -1077,7 +1077,8 @@ static void ns83820_cleanup_tx(struct ns83820 *dev) * while trying to track down a bug in either the zero copy code or * the tx fifo (hence the MAX_FRAG_LEN). */ -static int ns83820_hard_start_xmit(struct sk_buff *skb, struct net_device *ndev) +static netdev_tx_t ns83820_hard_start_xmit(struct sk_buff *skb, + struct net_device *ndev) { struct ns83820 *dev = PRIV(ndev); u32 free_idx, cmdsts, extsts; diff --git a/drivers/net/pcnet32.c b/drivers/net/pcnet32.c index 6859442f1862..6d28b18e7e28 100644 --- a/drivers/net/pcnet32.c +++ b/drivers/net/pcnet32.c @@ -303,7 +303,8 @@ static int pcnet32_probe_pci(struct pci_dev *, const struct pci_device_id *); static int pcnet32_probe1(unsigned long, int, struct pci_dev *); static int pcnet32_open(struct net_device *); static int pcnet32_init_ring(struct net_device *); -static int pcnet32_start_xmit(struct sk_buff *, struct net_device *); +static netdev_tx_t pcnet32_start_xmit(struct sk_buff *, + struct net_device *); static void pcnet32_tx_timeout(struct net_device *dev); static irqreturn_t pcnet32_interrupt(int, void *); static int pcnet32_close(struct net_device *); @@ -2481,7 +2482,8 @@ static void pcnet32_tx_timeout(struct net_device *dev) spin_unlock_irqrestore(&lp->lock, flags); } -static int pcnet32_start_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t pcnet32_start_xmit(struct sk_buff *skb, + struct net_device *dev) { struct pcnet32_private *lp = netdev_priv(dev); unsigned long ioaddr = dev->base_addr; diff --git a/drivers/net/qla3xxx.c b/drivers/net/qla3xxx.c index 3e4b67aaa6ea..4c610511eb40 100644 --- a/drivers/net/qla3xxx.c +++ b/drivers/net/qla3xxx.c @@ -2572,7 +2572,8 @@ map_error: * The IOCB is always the top of the chain followed by one or more * OALs (when necessary). */ -static int ql3xxx_send(struct sk_buff *skb, struct net_device *ndev) +static netdev_tx_t ql3xxx_send(struct sk_buff *skb, + struct net_device *ndev) { struct ql3_adapter *qdev = (struct ql3_adapter *)netdev_priv(ndev); struct ql3xxx_port_registers __iomem *port_regs = qdev->mem_map_registers; diff --git a/drivers/net/qlge/qlge_main.c b/drivers/net/qlge/qlge_main.c index 8dd266befdc7..220529257828 100644 --- a/drivers/net/qlge/qlge_main.c +++ b/drivers/net/qlge/qlge_main.c @@ -2103,7 +2103,7 @@ static void ql_hw_csum_setup(struct sk_buff *skb, iph->daddr, len, iph->protocol, 0); } -static int qlge_send(struct sk_buff *skb, struct net_device *ndev) +static netdev_tx_t qlge_send(struct sk_buff *skb, struct net_device *ndev) { struct tx_ring_desc *tx_ring_desc; struct ob_mac_iocb_req *mac_iocb_ptr; diff --git a/drivers/net/r6040.c b/drivers/net/r6040.c index 8068a07eb2b3..7dfcb58b0eb4 100644 --- a/drivers/net/r6040.c +++ b/drivers/net/r6040.c @@ -883,13 +883,13 @@ static int r6040_open(struct net_device *dev) return 0; } -static int r6040_start_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t r6040_start_xmit(struct sk_buff *skb, + struct net_device *dev) { struct r6040_private *lp = netdev_priv(dev); struct r6040_descriptor *descptr; void __iomem *ioaddr = lp->base; unsigned long flags; - int ret = NETDEV_TX_OK; /* Critical Section */ spin_lock_irqsave(&lp->lock, flags); @@ -899,8 +899,7 @@ static int r6040_start_xmit(struct sk_buff *skb, struct net_device *dev) spin_unlock_irqrestore(&lp->lock, flags); netif_stop_queue(dev); printk(KERN_ERR DRV_NAME ": no tx descriptor\n"); - ret = NETDEV_TX_BUSY; - return ret; + return NETDEV_TX_BUSY; } /* Statistic Counter */ @@ -928,7 +927,8 @@ static int r6040_start_xmit(struct sk_buff *skb, struct net_device *dev) dev->trans_start = jiffies; spin_unlock_irqrestore(&lp->lock, flags); - return ret; + + return NETDEV_TX_OK; } static void r6040_multicast_list(struct net_device *dev) diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c index 8cd85309c675..ec0092affd5d 100644 --- a/drivers/net/r8169.c +++ b/drivers/net/r8169.c @@ -507,7 +507,8 @@ MODULE_LICENSE("GPL"); MODULE_VERSION(RTL8169_VERSION); static int rtl8169_open(struct net_device *dev); -static int rtl8169_start_xmit(struct sk_buff *skb, struct net_device *dev); +static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb, + struct net_device *dev); static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance); static int rtl8169_init_ring(struct net_device *dev); static void rtl_hw_start(struct net_device *dev); @@ -3357,7 +3358,8 @@ static inline u32 rtl8169_tso_csum(struct sk_buff *skb, struct net_device *dev) return 0; } -static int rtl8169_start_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb, + struct net_device *dev) { struct rtl8169_private *tp = netdev_priv(dev); unsigned int frags, entry = tp->cur_tx % NUM_TX_DESC; @@ -3366,7 +3368,6 @@ static int rtl8169_start_xmit(struct sk_buff *skb, struct net_device *dev) dma_addr_t mapping; u32 status, len; u32 opts1; - int ret = NETDEV_TX_OK; if (unlikely(TX_BUFFS_AVAIL(tp) < skb_shinfo(skb)->nr_frags)) { if (netif_msg_drv(tp)) { @@ -3418,13 +3419,12 @@ static int rtl8169_start_xmit(struct sk_buff *skb, struct net_device *dev) } out: - return ret; + return NETDEV_TX_OK; err_stop: netif_stop_queue(dev); - ret = NETDEV_TX_BUSY; dev->stats.tx_dropped++; - goto out; + return NETDEV_TX_BUSY; } static void rtl8169_pcierr_interrupt(struct net_device *dev) diff --git a/drivers/net/rrunner.c b/drivers/net/rrunner.c index d95534655911..20a71749154a 100644 --- a/drivers/net/rrunner.c +++ b/drivers/net/rrunner.c @@ -1401,7 +1401,8 @@ static int rr_close(struct net_device *dev) } -static int rr_start_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t rr_start_xmit(struct sk_buff *skb, + struct net_device *dev) { struct rr_private *rrpriv = netdev_priv(dev); struct rr_regs __iomem *regs = rrpriv->regs; diff --git a/drivers/net/rrunner.h b/drivers/net/rrunner.h index 6173f11218df..28169043ae49 100644 --- a/drivers/net/rrunner.h +++ b/drivers/net/rrunner.h @@ -831,7 +831,8 @@ static int rr_init1(struct net_device *dev); static irqreturn_t rr_interrupt(int irq, void *dev_id); static int rr_open(struct net_device *dev); -static int rr_start_xmit(struct sk_buff *skb, struct net_device *dev); +static netdev_tx_t rr_start_xmit(struct sk_buff *skb, + struct net_device *dev); static int rr_close(struct net_device *dev); static int rr_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); static unsigned int rr_read_eeprom(struct rr_private *rrpriv, diff --git a/drivers/net/s2io.c b/drivers/net/s2io.c index 3138df5773ee..ddccf5fa56b6 100644 --- a/drivers/net/s2io.c +++ b/drivers/net/s2io.c @@ -4072,7 +4072,7 @@ static int s2io_close(struct net_device *dev) * 0 on success & 1 on failure. */ -static int s2io_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t s2io_xmit(struct sk_buff *skb, struct net_device *dev) { struct s2io_nic *sp = netdev_priv(dev); u16 frg_cnt, frg_len, i, queue, queue_len, put_off, get_off; diff --git a/drivers/net/sb1000.c b/drivers/net/sb1000.c index 6a81aec645d9..ee366c5a8fa3 100644 --- a/drivers/net/sb1000.c +++ b/drivers/net/sb1000.c @@ -82,7 +82,8 @@ struct sb1000_private { extern int sb1000_probe(struct net_device *dev); static int sb1000_open(struct net_device *dev); static int sb1000_dev_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd); -static int sb1000_start_xmit(struct sk_buff *skb, struct net_device *dev); +static netdev_tx_t sb1000_start_xmit(struct sk_buff *skb, + struct net_device *dev); static irqreturn_t sb1000_interrupt(int irq, void *dev_id); static int sb1000_close(struct net_device *dev); @@ -1080,7 +1081,7 @@ static int sb1000_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) } /* transmit function: do nothing since SB1000 can't send anything out */ -static int +static netdev_tx_t sb1000_start_xmit(struct sk_buff *skb, struct net_device *dev) { printk(KERN_WARNING "%s: trying to transmit!!!\n", dev->name); diff --git a/drivers/net/sc92031.c b/drivers/net/sc92031.c index e3156c97bb58..8d6030022d14 100644 --- a/drivers/net/sc92031.c +++ b/drivers/net/sc92031.c @@ -941,7 +941,8 @@ static struct net_device_stats *sc92031_get_stats(struct net_device *dev) return &dev->stats; } -static int sc92031_start_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t sc92031_start_xmit(struct sk_buff *skb, + struct net_device *dev) { struct sc92031_priv *priv = netdev_priv(dev); void __iomem *port_base = priv->port_base; diff --git a/drivers/net/seeq8005.c b/drivers/net/seeq8005.c index 7cc8bb814137..39246d457ac2 100644 --- a/drivers/net/seeq8005.c +++ b/drivers/net/seeq8005.c @@ -81,7 +81,8 @@ struct net_local { static int seeq8005_probe1(struct net_device *dev, int ioaddr); static int seeq8005_open(struct net_device *dev); static void seeq8005_timeout(struct net_device *dev); -static int seeq8005_send_packet(struct sk_buff *skb, struct net_device *dev); +static netdev_tx_t seeq8005_send_packet(struct sk_buff *skb, + struct net_device *dev); static irqreturn_t seeq8005_interrupt(int irq, void *dev_id); static void seeq8005_rx(struct net_device *dev); static int seeq8005_close(struct net_device *dev); @@ -394,7 +395,8 @@ static void seeq8005_timeout(struct net_device *dev) netif_wake_queue(dev); } -static int seeq8005_send_packet(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t seeq8005_send_packet(struct sk_buff *skb, + struct net_device *dev) { short length = skb->len; unsigned char *buf; diff --git a/drivers/net/sfc/efx.h b/drivers/net/sfc/efx.h index da157aa74b83..aecaf62f4929 100644 --- a/drivers/net/sfc/efx.h +++ b/drivers/net/sfc/efx.h @@ -20,8 +20,9 @@ #define FALCON_B_P_DEVID 0x0710 /* TX */ -extern int efx_xmit(struct efx_nic *efx, - struct efx_tx_queue *tx_queue, struct sk_buff *skb); +extern netdev_tx_t efx_xmit(struct efx_nic *efx, + struct efx_tx_queue *tx_queue, + struct sk_buff *skb); extern void efx_stop_queue(struct efx_nic *efx); extern void efx_wake_queue(struct efx_nic *efx); diff --git a/drivers/net/sfc/selftest.c b/drivers/net/sfc/selftest.c index b67ccca3fc1a..817c7efc11e0 100644 --- a/drivers/net/sfc/selftest.c +++ b/drivers/net/sfc/selftest.c @@ -400,7 +400,8 @@ static int efx_begin_loopback(struct efx_tx_queue *tx_queue) struct efx_loopback_state *state = efx->loopback_selftest; struct efx_loopback_payload *payload; struct sk_buff *skb; - int i, rc; + int i; + netdev_tx_t rc; /* Transmit N copies of buffer */ for (i = 0; i < state->packet_count; i++) { diff --git a/drivers/net/sfc/tx.c b/drivers/net/sfc/tx.c index 14a14788566c..489c4de31447 100644 --- a/drivers/net/sfc/tx.c +++ b/drivers/net/sfc/tx.c @@ -138,8 +138,8 @@ static void efx_tsoh_free(struct efx_tx_queue *tx_queue, * Returns NETDEV_TX_OK or NETDEV_TX_BUSY * You must hold netif_tx_lock() to call this function. */ -static int efx_enqueue_skb(struct efx_tx_queue *tx_queue, - struct sk_buff *skb) +static netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, + struct sk_buff *skb) { struct efx_nic *efx = tx_queue->efx; struct pci_dev *pci_dev = efx->pci_dev; @@ -152,7 +152,7 @@ static int efx_enqueue_skb(struct efx_tx_queue *tx_queue, unsigned int dma_len; bool unmap_single; int q_space, i = 0; - int rc = NETDEV_TX_OK; + netdev_tx_t rc = NETDEV_TX_OK; EFX_BUG_ON_PARANOID(tx_queue->write_count != tx_queue->insert_count); @@ -353,14 +353,11 @@ static void efx_dequeue_buffers(struct efx_tx_queue *tx_queue, * * Context: netif_tx_lock held */ -inline int efx_xmit(struct efx_nic *efx, - struct efx_tx_queue *tx_queue, struct sk_buff *skb) +inline netdev_tx_t efx_xmit(struct efx_nic *efx, + struct efx_tx_queue *tx_queue, struct sk_buff *skb) { - int rc; - /* Map fragments for DMA and add to TX queue */ - rc = efx_enqueue_skb(tx_queue, skb); - return rc; + return efx_enqueue_skb(tx_queue, skb); } /* Initiate a packet transmission. We use one channel per CPU @@ -372,7 +369,8 @@ inline int efx_xmit(struct efx_nic *efx, * Note that returning anything other than NETDEV_TX_OK will cause the * OS to free the skb. */ -int efx_hard_start_xmit(struct sk_buff *skb, struct net_device *net_dev) +netdev_tx_t efx_hard_start_xmit(struct sk_buff *skb, + struct net_device *net_dev) { struct efx_nic *efx = netdev_priv(net_dev); struct efx_tx_queue *tx_queue; diff --git a/drivers/net/sfc/tx.h b/drivers/net/sfc/tx.h index 5e1cc234e42f..e3678962a5b4 100644 --- a/drivers/net/sfc/tx.h +++ b/drivers/net/sfc/tx.h @@ -18,7 +18,8 @@ void efx_remove_tx_queue(struct efx_tx_queue *tx_queue); void efx_init_tx_queue(struct efx_tx_queue *tx_queue); void efx_fini_tx_queue(struct efx_tx_queue *tx_queue); -int efx_hard_start_xmit(struct sk_buff *skb, struct net_device *net_dev); +netdev_tx_t efx_hard_start_xmit(struct sk_buff *skb, + struct net_device *net_dev); void efx_release_tx_buffers(struct efx_tx_queue *tx_queue); #endif /* EFX_TX_H */ diff --git a/drivers/net/sis190.c b/drivers/net/sis190.c index 1f040e8a000b..7cc9898f4e00 100644 --- a/drivers/net/sis190.c +++ b/drivers/net/sis190.c @@ -1168,7 +1168,8 @@ static int sis190_close(struct net_device *dev) return 0; } -static int sis190_start_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t sis190_start_xmit(struct sk_buff *skb, + struct net_device *dev) { struct sis190_private *tp = netdev_priv(dev); void __iomem *ioaddr = tp->mmio_addr; diff --git a/drivers/net/sis900.c b/drivers/net/sis900.c index 61ceeaaf104d..d8827323507a 100644 --- a/drivers/net/sis900.c +++ b/drivers/net/sis900.c @@ -214,7 +214,8 @@ static void sis900_check_mode (struct net_device *net_dev, struct mii_phy *mii_p static void sis900_tx_timeout(struct net_device *net_dev); static void sis900_init_tx_ring(struct net_device *net_dev); static void sis900_init_rx_ring(struct net_device *net_dev); -static int sis900_start_xmit(struct sk_buff *skb, struct net_device *net_dev); +static netdev_tx_t sis900_start_xmit(struct sk_buff *skb, + struct net_device *net_dev); static int sis900_rx(struct net_device *net_dev); static void sis900_finish_xmit (struct net_device *net_dev); static irqreturn_t sis900_interrupt(int irq, void *dev_instance); @@ -1571,7 +1572,7 @@ static void sis900_tx_timeout(struct net_device *net_dev) * tell upper layer if the buffer is full */ -static int +static netdev_tx_t sis900_start_xmit(struct sk_buff *skb, struct net_device *net_dev) { struct sis900_private *sis_priv = netdev_priv(net_dev); diff --git a/drivers/net/skfp/skfddi.c b/drivers/net/skfp/skfddi.c index 888a14a045ef..38a508b4aad9 100644 --- a/drivers/net/skfp/skfddi.c +++ b/drivers/net/skfp/skfddi.c @@ -107,7 +107,8 @@ static void skfp_ctl_set_multicast_list(struct net_device *dev); static void skfp_ctl_set_multicast_list_wo_lock(struct net_device *dev); static int skfp_ctl_set_mac_address(struct net_device *dev, void *addr); static int skfp_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); -static int skfp_send_pkt(struct sk_buff *skb, struct net_device *dev); +static netdev_tx_t skfp_send_pkt(struct sk_buff *skb, + struct net_device *dev); static void send_queued_packets(struct s_smc *smc); static void CheckSourceAddress(unsigned char *frame, unsigned char *hw_addr); static void ResetAdapter(struct s_smc *smc); @@ -1056,7 +1057,8 @@ static int skfp_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) * Side Effects: * None */ -static int skfp_send_pkt(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t skfp_send_pkt(struct sk_buff *skb, + struct net_device *dev) { struct s_smc *smc = netdev_priv(dev); skfddi_priv *bp = &smc->os; diff --git a/drivers/net/skge.c b/drivers/net/skge.c index 543af2044f40..1a1e68549f5c 100644 --- a/drivers/net/skge.c +++ b/drivers/net/skge.c @@ -2746,7 +2746,8 @@ static inline int skge_avail(const struct skge_ring *ring) + (ring->to_clean - ring->to_use) - 1; } -static int skge_xmit_frame(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t skge_xmit_frame(struct sk_buff *skb, + struct net_device *dev) { struct skge_port *skge = netdev_priv(dev); struct skge_hw *hw = skge->hw; diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c index dd630cf18b4d..c7c0a5b7b53a 100644 --- a/drivers/net/sky2.c +++ b/drivers/net/sky2.c @@ -1574,7 +1574,8 @@ static void sky2_tx_unmap(struct pci_dev *pdev, * the number of ring elements will probably be less than the number * of list elements used. */ -static int sky2_xmit_frame(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t sky2_xmit_frame(struct sk_buff *skb, + struct net_device *dev) { struct sky2_port *sky2 = netdev_priv(dev); struct sky2_hw *hw = sky2->hw; diff --git a/drivers/net/smc9194.c b/drivers/net/smc9194.c index 0a1b6f401087..934a12012829 100644 --- a/drivers/net/smc9194.c +++ b/drivers/net/smc9194.c @@ -299,7 +299,8 @@ static void smc_hardware_send_packet( struct net_device * dev ); . to store the packet, I call this routine, which either sends it . now, or generates an interrupt when the card is ready for the . packet */ -static int smc_wait_to_send_packet( struct sk_buff * skb, struct net_device *dev ); +static netdev_tx_t smc_wait_to_send_packet( struct sk_buff * skb, + struct net_device *dev ); /* this does a soft reset on the device */ static void smc_reset( int ioaddr ); @@ -487,7 +488,8 @@ static void smc_setmulticast( int ioaddr, int count, struct dev_mc_list * addrs . o (NO): Enable interrupts and let the interrupt handler deal with it. . o (YES):Send it now. */ -static int smc_wait_to_send_packet( struct sk_buff * skb, struct net_device * dev ) +static netdev_tx_t smc_wait_to_send_packet(struct sk_buff *skb, + struct net_device *dev) { struct smc_local *lp = netdev_priv(dev); unsigned int ioaddr = dev->base_addr; diff --git a/drivers/net/smsc9420.c b/drivers/net/smsc9420.c index 60abdb1081ad..514311d67b36 100644 --- a/drivers/net/smsc9420.c +++ b/drivers/net/smsc9420.c @@ -968,7 +968,8 @@ static void smsc9420_complete_tx(struct net_device *dev) } } -static int smsc9420_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t smsc9420_hard_start_xmit(struct sk_buff *skb, + struct net_device *dev) { struct smsc9420_pdata *pd = netdev_priv(dev); dma_addr_t mapping; diff --git a/drivers/net/starfire.c b/drivers/net/starfire.c index 5e7645ee8ab0..a36e2b51e88c 100644 --- a/drivers/net/starfire.c +++ b/drivers/net/starfire.c @@ -595,7 +595,7 @@ static int netdev_open(struct net_device *dev); static void check_duplex(struct net_device *dev); static void tx_timeout(struct net_device *dev); static void init_ring(struct net_device *dev); -static int start_tx(struct sk_buff *skb, struct net_device *dev); +static netdev_tx_t start_tx(struct sk_buff *skb, struct net_device *dev); static irqreturn_t intr_handler(int irq, void *dev_instance); static void netdev_error(struct net_device *dev, int intr_status); static int __netdev_rx(struct net_device *dev, int *quota); @@ -1223,7 +1223,7 @@ static void init_ring(struct net_device *dev) } -static int start_tx(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t start_tx(struct sk_buff *skb, struct net_device *dev) { struct netdev_private *np = netdev_priv(dev); unsigned int entry; diff --git a/drivers/net/sundance.c b/drivers/net/sundance.c index d09be481bcc4..e13685a570f4 100644 --- a/drivers/net/sundance.c +++ b/drivers/net/sundance.c @@ -415,7 +415,7 @@ static void check_duplex(struct net_device *dev); static void netdev_timer(unsigned long data); static void tx_timeout(struct net_device *dev); static void init_ring(struct net_device *dev); -static int start_tx(struct sk_buff *skb, struct net_device *dev); +static netdev_tx_t start_tx(struct sk_buff *skb, struct net_device *dev); static int reset_tx (struct net_device *dev); static irqreturn_t intr_handler(int irq, void *dev_instance); static void rx_poll(unsigned long data); @@ -1053,7 +1053,7 @@ static void tx_poll (unsigned long data) return; } -static int +static netdev_tx_t start_tx (struct sk_buff *skb, struct net_device *dev) { struct netdev_private *np = netdev_priv(dev); diff --git a/drivers/net/sungem.c b/drivers/net/sungem.c index d2dfe0ab5106..e0dfdd246470 100644 --- a/drivers/net/sungem.c +++ b/drivers/net/sungem.c @@ -1015,7 +1015,8 @@ static __inline__ int gem_intme(int entry) return 0; } -static int gem_start_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t gem_start_xmit(struct sk_buff *skb, + struct net_device *dev) { struct gem *gp = netdev_priv(dev); int entry; diff --git a/drivers/net/sunhme.c b/drivers/net/sunhme.c index 008bd59fc64b..37d721bbdb35 100644 --- a/drivers/net/sunhme.c +++ b/drivers/net/sunhme.c @@ -2252,7 +2252,8 @@ static void happy_meal_tx_timeout(struct net_device *dev) netif_wake_queue(dev); } -static int happy_meal_start_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t happy_meal_start_xmit(struct sk_buff *skb, + struct net_device *dev) { struct happy_meal *hp = netdev_priv(dev); int entry; diff --git a/drivers/net/tehuti.c b/drivers/net/tehuti.c index 3c2679cd196b..918d4c9e49b3 100644 --- a/drivers/net/tehuti.c +++ b/drivers/net/tehuti.c @@ -1622,7 +1622,8 @@ static inline int bdx_tx_space(struct bdx_priv *priv) * the driver. Note: the driver must NOT put the skb in its DMA ring. * o NETDEV_TX_LOCKED Locking failed, please retry quickly. */ -static int bdx_tx_transmit(struct sk_buff *skb, struct net_device *ndev) +static netdev_tx_t bdx_tx_transmit(struct sk_buff *skb, + struct net_device *ndev) { struct bdx_priv *priv = netdev_priv(ndev); struct txd_fifo *f = &priv->txd_fifo0; diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index a7d14aae258a..9d5c1786c664 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c @@ -5135,7 +5135,8 @@ static void tg3_set_txd(struct tg3_napi *tnapi, int entry, /* hard_start_xmit for devices that don't have any bugs and * support TG3_FLG2_HW_TSO_2 only. */ -static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, + struct net_device *dev) { struct tg3 *tp = netdev_priv(dev); u32 len, entry, base_flags, mss; @@ -5251,7 +5252,8 @@ out_unlock: return NETDEV_TX_OK; } -static int tg3_start_xmit_dma_bug(struct sk_buff *, struct net_device *); +static netdev_tx_t tg3_start_xmit_dma_bug(struct sk_buff *, + struct net_device *); /* Use GSO to workaround a rare TSO bug that may be triggered when the * TSO header is greater than 80 bytes. @@ -5290,7 +5292,8 @@ tg3_tso_bug_end: /* hard_start_xmit for devices that have the 4G bug and/or 40-bit bug and * support TG3_FLG2_HW_TSO_1 or firmware TSO only. */ -static int tg3_start_xmit_dma_bug(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t tg3_start_xmit_dma_bug(struct sk_buff *skb, + struct net_device *dev) { struct tg3 *tp = netdev_priv(dev); u32 len, entry, base_flags, mss; diff --git a/drivers/net/tlan.c b/drivers/net/tlan.c index 70c9ec45d8fb..49e273bceed6 100644 --- a/drivers/net/tlan.c +++ b/drivers/net/tlan.c @@ -289,7 +289,7 @@ static void TLan_EisaProbe( void ); static void TLan_Eisa_Cleanup( void ); static int TLan_Init( struct net_device * ); static int TLan_Open( struct net_device *dev ); -static int TLan_StartTx( struct sk_buff *, struct net_device *); +static netdev_tx_t TLan_StartTx( struct sk_buff *, struct net_device *); static irqreturn_t TLan_HandleInterrupt( int, void *); static int TLan_Close( struct net_device *); static struct net_device_stats *TLan_GetStats( struct net_device *); @@ -1083,7 +1083,7 @@ static void TLan_tx_timeout_work(struct work_struct *work) * **************************************************************/ -static int TLan_StartTx( struct sk_buff *skb, struct net_device *dev ) +static netdev_tx_t TLan_StartTx( struct sk_buff *skb, struct net_device *dev ) { TLanPrivateInfo *priv = netdev_priv(dev); dma_addr_t tail_list_phys; diff --git a/drivers/net/typhoon.c b/drivers/net/typhoon.c index 2c26b4577e8a..d6d345229fe9 100644 --- a/drivers/net/typhoon.c +++ b/drivers/net/typhoon.c @@ -762,7 +762,7 @@ typhoon_tso_fill(struct sk_buff *skb, struct transmit_ring *txRing, tcpd->status = 0; } -static int +static netdev_tx_t typhoon_start_tx(struct sk_buff *skb, struct net_device *dev) { struct typhoon *tp = netdev_priv(dev); diff --git a/drivers/net/via-rhine.c b/drivers/net/via-rhine.c index 46eb618bbc90..081402cb05fd 100644 --- a/drivers/net/via-rhine.c +++ b/drivers/net/via-rhine.c @@ -408,7 +408,8 @@ static int mdio_read(struct net_device *dev, int phy_id, int location); static void mdio_write(struct net_device *dev, int phy_id, int location, int value); static int rhine_open(struct net_device *dev); static void rhine_tx_timeout(struct net_device *dev); -static int rhine_start_tx(struct sk_buff *skb, struct net_device *dev); +static netdev_tx_t rhine_start_tx(struct sk_buff *skb, + struct net_device *dev); static irqreturn_t rhine_interrupt(int irq, void *dev_instance); static void rhine_tx(struct net_device *dev); static int rhine_rx(struct net_device *dev, int limit); @@ -1213,7 +1214,8 @@ static void rhine_tx_timeout(struct net_device *dev) netif_wake_queue(dev); } -static int rhine_start_tx(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t rhine_start_tx(struct sk_buff *skb, + struct net_device *dev) { struct rhine_private *rp = netdev_priv(dev); void __iomem *ioaddr = rp->base; diff --git a/drivers/net/via-velocity.c b/drivers/net/via-velocity.c index 47be41a39d35..e56cf6b548d6 100644 --- a/drivers/net/via-velocity.c +++ b/drivers/net/via-velocity.c @@ -2465,7 +2465,8 @@ static int velocity_close(struct net_device *dev) * Called by the networ layer to request a packet is queued to * the velocity. Returns zero on success. */ -static int velocity_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t velocity_xmit(struct sk_buff *skb, + struct net_device *dev) { struct velocity_info *vptr = netdev_priv(dev); int qnum = 0; diff --git a/drivers/net/vxge/vxge-main.c b/drivers/net/vxge/vxge-main.c index 094d15548a2b..41dccba50c46 100644 --- a/drivers/net/vxge/vxge-main.c +++ b/drivers/net/vxge/vxge-main.c @@ -812,7 +812,7 @@ static int vxge_learn_mac(struct vxgedev *vdev, u8 *mac_header) * NOTE: when device cant queue the pkt, just the trans_start variable will * not be upadted. */ -static int +static netdev_tx_t vxge_xmit(struct sk_buff *skb, struct net_device *dev) { struct vxge_fifo *fifo = NULL; diff --git a/drivers/net/yellowfin.c b/drivers/net/yellowfin.c index 76764237cde6..9509477f61f4 100644 --- a/drivers/net/yellowfin.c +++ b/drivers/net/yellowfin.c @@ -347,7 +347,8 @@ static int yellowfin_open(struct net_device *dev); static void yellowfin_timer(unsigned long data); static void yellowfin_tx_timeout(struct net_device *dev); static void yellowfin_init_ring(struct net_device *dev); -static int yellowfin_start_xmit(struct sk_buff *skb, struct net_device *dev); +static netdev_tx_t yellowfin_start_xmit(struct sk_buff *skb, + struct net_device *dev); static irqreturn_t yellowfin_interrupt(int irq, void *dev_instance); static int yellowfin_rx(struct net_device *dev); static void yellowfin_error(struct net_device *dev, int intr_status); @@ -808,7 +809,8 @@ static void yellowfin_init_ring(struct net_device *dev) return; } -static int yellowfin_start_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t yellowfin_start_xmit(struct sk_buff *skb, + struct net_device *dev) { struct yellowfin_private *yp = netdev_priv(dev); unsigned entry; diff --git a/drivers/net/znet.c b/drivers/net/znet.c index 7f9e14131a5c..a0384b6f09b6 100644 --- a/drivers/net/znet.c +++ b/drivers/net/znet.c @@ -156,7 +156,8 @@ struct netidblk { }; static int znet_open(struct net_device *dev); -static int znet_send_packet(struct sk_buff *skb, struct net_device *dev); +static netdev_tx_t znet_send_packet(struct sk_buff *skb, + struct net_device *dev); static irqreturn_t znet_interrupt(int irq, void *dev_id); static void znet_rx(struct net_device *dev); static int znet_close(struct net_device *dev); @@ -534,7 +535,7 @@ static void znet_tx_timeout (struct net_device *dev) netif_wake_queue (dev); } -static int znet_send_packet(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t znet_send_packet(struct sk_buff *skb, struct net_device *dev) { int ioaddr = dev->base_addr; struct znet_private *znet = netdev_priv(dev); diff --git a/include/linux/arcdevice.h b/include/linux/arcdevice.h index cd4bcb6989ce..7d650a0e3d8f 100644 --- a/include/linux/arcdevice.h +++ b/include/linux/arcdevice.h @@ -337,7 +337,8 @@ struct net_device *alloc_arcdev(const char *name); int arcnet_open(struct net_device *dev); int arcnet_close(struct net_device *dev); -int arcnet_send_packet(struct sk_buff *skb, struct net_device *dev); +netdev_tx_t arcnet_send_packet(struct sk_buff *skb, + struct net_device *dev); void arcnet_timeout(struct net_device *dev); #endif /* __KERNEL__ */ -- cgit v1.2.3 From cb45439977d3602b91dd0aca2d94fa3b32aebba6 Mon Sep 17 00:00:00 2001 From: Yi Zou Date: Mon, 31 Aug 2009 12:31:36 +0000 Subject: net: Add ndo_fcoe_enable/ndo_fcoe_disable to net_device_ops Add ndo_fcoe_enable/_disable to net_device_ops so the corresponding HW can initialize itself for FCoE traffic or clean up after FCoE traffic is done. This is expected to be called by the kernel FCoE stack upon receiving a request for creating an FCoE instance on the corresponding netdev interface. When implemented by the actual HW, the HW driver check the op code to perform corresponding initialization or clean up for FCoE. The initialization normally includes allocating extra queues for FCoE, setting corresponding HW registers for FCoE, indicating FCoE offload features via netdev, etc. The clean-up would include releasing the resources allocated for FCoE. Signed-off-by: Yi Zou Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 376a2e1ac00d..121cbad0aae5 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -627,6 +627,8 @@ struct net_device_ops { void (*ndo_poll_controller)(struct net_device *dev); #endif #if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) + int (*ndo_fcoe_enable)(struct net_device *dev); + int (*ndo_fcoe_disable)(struct net_device *dev); int (*ndo_fcoe_ddp_setup)(struct net_device *dev, u16 xid, struct scatterlist *sgl, -- cgit v1.2.3 From 0f6f290259896afdca30e1ff4a28aff8edd79a14 Mon Sep 17 00:00:00 2001 From: Yi Zou Date: Mon, 31 Aug 2009 12:32:34 +0000 Subject: dcbnl: Add support for setapp/getapp commands to dcbnl This patch adds dcbnl command definitions to support setapp/getapp functionality from the IEEE 802.1Qaz Data Center Bridging Capability Exchange protocol (DCBX) specification. Section 3.3 defines the application protocol and its 802.1p user priority in DCBX, which is implemented here as a pair of setapp/getapp commands in the kernel dcbnl for setting and retrieving the user priority for an given application protocol. The protocol is identified by the combination of an id and an idtype. Currently, when idtype is 0, the corresponding id gives the ether type of this protocol, e.g., for FCoE, it will be 0x8906; when idtype is 1, then the corresponding id gives the TCP or UDP port number. For more information regarding DCBX spec., please refer to the following: http://www.ieee802.org/1/files/public/docs2008/ az-wadekar-dcbx-capability-exchange-discovery-protocol-1108-v1.01.pdf Signed-off-by: Yi Zou Acked-by: Peter P Waskiewicz Jr Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- include/linux/dcbnl.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dcbnl.h b/include/linux/dcbnl.h index 7d2e10006188..b7cdbb4373df 100644 --- a/include/linux/dcbnl.h +++ b/include/linux/dcbnl.h @@ -50,6 +50,8 @@ struct dcbmsg { * @DCB_CMD_SNUMTCS: set the number of traffic classes * @DCB_CMD_GBCN: set backward congestion notification configuration * @DCB_CMD_SBCN: get backward congestion notification configration. + * @DCB_CMD_GAPP: get application protocol configuration + * @DCB_CMD_SAPP: set application protocol configuration */ enum dcbnl_commands { DCB_CMD_UNDEFINED, @@ -80,6 +82,9 @@ enum dcbnl_commands { DCB_CMD_BCN_GCFG, DCB_CMD_BCN_SCFG, + DCB_CMD_GAPP, + DCB_CMD_SAPP, + __DCB_CMD_ENUM_MAX, DCB_CMD_MAX = __DCB_CMD_ENUM_MAX - 1, }; @@ -114,6 +119,7 @@ enum dcbnl_attrs { DCB_ATTR_CAP, DCB_ATTR_NUMTCS, DCB_ATTR_BCN, + DCB_ATTR_APP, __DCB_ATTR_ENUM_MAX, DCB_ATTR_MAX = __DCB_ATTR_ENUM_MAX - 1, @@ -338,5 +344,17 @@ enum dcb_general_attr_values { DCB_ATTR_VALUE_UNDEFINED = 0xff }; +#define DCB_APP_IDTYPE_ETHTYPE 0x00 +#define DCB_APP_IDTYPE_PORTNUM 0x01 +enum dcbnl_app_attrs { + DCB_APP_ATTR_UNDEFINED, + + DCB_APP_ATTR_IDTYPE, + DCB_APP_ATTR_ID, + DCB_APP_ATTR_PRIORITY, + + __DCB_APP_ATTR_ENUM_MAX, + DCB_APP_ATTR_MAX = __DCB_APP_ATTR_ENUM_MAX - 1, +}; #endif /* __LINUX_DCBNL_H__ */ -- cgit v1.2.3 From 8aa84ad8d6c740a04386f599694609ee4998e82e Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Fri, 24 Jul 2009 15:25:05 +0200 Subject: [CPUFREQ] Introduce global, not per core: /sys/devices/system/cpu/cpufreq Currently everything in the cpufreq layer is per core based. This does not reflect reality, for example ondemand on conservative governors have global sysfs variables. Introduce a global cpufreq directory and add the kobject to the governor struct, so that governors can easily access it. The directory is initialized in the cpufreq_core_init initcall and thus will always be created if cpufreq is compiled in, even if no cpufreq driver is active later. Signed-off-by: Thomas Renninger Signed-off-by: Dave Jones --- drivers/cpufreq/cpufreq.c | 9 ++++++++- include/linux/cpufreq.h | 10 ++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index bbd5c2164ab6..4da28444b235 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -686,6 +686,9 @@ static struct attribute *default_attrs[] = { NULL }; +struct kobject *cpufreq_global_kobject; +EXPORT_SYMBOL(cpufreq_global_kobject); + #define to_policy(k) container_of(k, struct cpufreq_policy, kobj) #define to_attr(a) container_of(a, struct freq_attr, attr) @@ -1935,7 +1938,11 @@ static int __init cpufreq_core_init(void) per_cpu(policy_cpu, cpu) = -1; init_rwsem(&per_cpu(cpu_policy_rwsem, cpu)); } + + cpufreq_global_kobject = kobject_create_and_add("cpufreq", + &cpu_sysdev_class.kset.kobj); + BUG_ON(!cpufreq_global_kobject); + return 0; } - core_initcall(cpufreq_core_init); diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 161042746afc..44717eb47639 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -65,6 +65,9 @@ static inline int cpufreq_unregister_notifier(struct notifier_block *nb, struct cpufreq_governor; +/* /sys/devices/system/cpu/cpufreq: entry point for global variables */ +extern struct kobject *cpufreq_global_kobject; + #define CPUFREQ_ETERNAL (-1) struct cpufreq_cpuinfo { unsigned int max_freq; @@ -274,6 +277,13 @@ struct freq_attr { ssize_t (*store)(struct cpufreq_policy *, const char *, size_t count); }; +struct global_attr { + struct attribute attr; + ssize_t (*show)(struct kobject *kobj, + struct attribute *attr, char *buf); + ssize_t (*store)(struct kobject *a, struct attribute *b, + const char *c, size_t count); +}; /********************************************************************* * CPUFREQ 2.6. INTERFACE * -- cgit v1.2.3 From 8bc11b491b6cad75e737f1d38bb4b261bd95b291 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 26 Aug 2009 18:13:17 +0200 Subject: rfkill: relicense header file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This header file is copied into userspace tools that need not be GPLv2 licensed, make that easier. Signed-off-by: Johannes Berg Acked-by: Alan Jenkins Acked-by: Henrique de Moraes Holschuh Acked-by: Iñaky Pérez-González Acked-by: Ivo van Doorn Acked-by: Jaswinder Singh Rajput Acked-by: Michael Buesch Acked-by: Tomas Winkler Signed-off-by: John W. Linville --- include/linux/rfkill.h | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h index 21ca51bf4dd2..3392c59d2706 100644 --- a/include/linux/rfkill.h +++ b/include/linux/rfkill.h @@ -6,20 +6,17 @@ * Copyright (C) 2007 Dmitry Torokhov * Copyright 2009 Johannes Berg * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the - * Free Software Foundation, Inc., - * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include -- cgit v1.2.3 From 388539f3ff0cf1de926b03f94e1eec112358f74d Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Mon, 27 Jul 2009 09:24:35 +0800 Subject: [libata] add DMA setup FIS auto-activate feature Hopefully results in fewer on-the-wire FIS's and no breakage. We'll see! Signed-off-by: Shaohua Li Signed-off-by: Jeff Garzik --- drivers/ata/ahci.c | 2 +- drivers/ata/libata-core.c | 36 +++++++++++++++++++++++++++++------- include/linux/ata.h | 4 ++++ include/linux/libata.h | 2 ++ 4 files changed, 36 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index fe3eba5d6b3e..7a3124a9abc6 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -2869,7 +2869,7 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) /* prepare host */ if (hpriv->cap & HOST_CAP_NCQ) - pi.flags |= ATA_FLAG_NCQ; + pi.flags |= ATA_FLAG_NCQ | ATA_FLAG_FPDMA_AA; if (hpriv->cap & HOST_CAP_PMP) pi.flags |= ATA_FLAG_PMP; diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 072ba5ea138f..98af50f16e0c 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -2299,29 +2299,49 @@ static inline u8 ata_dev_knobble(struct ata_device *dev) return ((ap->cbl == ATA_CBL_SATA) && (!ata_id_is_sata(dev->id))); } -static void ata_dev_config_ncq(struct ata_device *dev, +static int ata_dev_config_ncq(struct ata_device *dev, char *desc, size_t desc_sz) { struct ata_port *ap = dev->link->ap; int hdepth = 0, ddepth = ata_id_queue_depth(dev->id); + unsigned int err_mask; + char *aa_desc = ""; if (!ata_id_has_ncq(dev->id)) { desc[0] = '\0'; - return; + return 0; } if (dev->horkage & ATA_HORKAGE_NONCQ) { snprintf(desc, desc_sz, "NCQ (not used)"); - return; + return 0; } if (ap->flags & ATA_FLAG_NCQ) { hdepth = min(ap->scsi_host->can_queue, ATA_MAX_QUEUE - 1); dev->flags |= ATA_DFLAG_NCQ; } + if (!(dev->horkage & ATA_HORKAGE_BROKEN_FPDMA_AA) && + (ap->flags & ATA_FLAG_FPDMA_AA) && + ata_id_has_fpdma_aa(dev->id)) { + err_mask = ata_dev_set_feature(dev, SETFEATURES_SATA_ENABLE, + SATA_FPDMA_AA); + if (err_mask) { + ata_dev_printk(dev, KERN_ERR, "failed to enable AA" + "(error_mask=0x%x)\n", err_mask); + if (err_mask != AC_ERR_DEV) { + dev->horkage |= ATA_HORKAGE_BROKEN_FPDMA_AA; + return -EIO; + } + } else + aa_desc = ", AA"; + } + if (hdepth >= ddepth) - snprintf(desc, desc_sz, "NCQ (depth %d)", ddepth); + snprintf(desc, desc_sz, "NCQ (depth %d)%s", ddepth, aa_desc); else - snprintf(desc, desc_sz, "NCQ (depth %d/%d)", hdepth, ddepth); + snprintf(desc, desc_sz, "NCQ (depth %d/%d)%s", hdepth, + ddepth, aa_desc); + return 0; } /** @@ -2461,7 +2481,7 @@ int ata_dev_configure(struct ata_device *dev) if (ata_id_has_lba(id)) { const char *lba_desc; - char ncq_desc[20]; + char ncq_desc[24]; lba_desc = "LBA"; dev->flags |= ATA_DFLAG_LBA; @@ -2475,7 +2495,9 @@ int ata_dev_configure(struct ata_device *dev) } /* config NCQ */ - ata_dev_config_ncq(dev, ncq_desc, sizeof(ncq_desc)); + rc = ata_dev_config_ncq(dev, ncq_desc, sizeof(ncq_desc)); + if (rc) + return rc; /* print device info to dmesg */ if (ata_msg_drv(ap) && print_info) { diff --git a/include/linux/ata.h b/include/linux/ata.h index 9c75921f0c16..f5494050df83 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -306,6 +306,7 @@ enum { /* SETFEATURE Sector counts for SATA features */ SATA_AN = 0x05, /* Asynchronous Notification */ SATA_DIPM = 0x03, /* Device Initiated Power Management */ + SATA_FPDMA_AA = 0x02, /* DMA Setup FIS Auto-Activate */ /* feature values for SET_MAX */ ATA_SET_MAX_ADDR = 0x00, @@ -525,6 +526,9 @@ static inline int ata_is_data(u8 prot) #define ata_id_has_atapi_AN(id) \ ( (((id)[76] != 0x0000) && ((id)[76] != 0xffff)) && \ ((id)[78] & (1 << 5)) ) +#define ata_id_has_fpdma_aa(id) \ + ( (((id)[76] != 0x0000) && ((id)[76] != 0xffff)) && \ + ((id)[78] & (1 << 2)) ) #define ata_id_iordy_disable(id) ((id)[ATA_ID_CAPABILITY] & (1 << 10)) #define ata_id_has_iordy(id) ((id)[ATA_ID_CAPABILITY] & (1 << 11)) #define ata_id_u32(id,n) \ diff --git a/include/linux/libata.h b/include/linux/libata.h index e5b6e33c6571..d3f7cab4873e 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -190,6 +190,7 @@ enum { ATA_FLAG_NO_POWEROFF_SPINDOWN = (1 << 11), /* don't spindown before poweroff */ ATA_FLAG_NO_HIBERNATE_SPINDOWN = (1 << 12), /* don't spindown before hibernation */ ATA_FLAG_DEBUGMSG = (1 << 13), + ATA_FLAG_FPDMA_AA = (1 << 14), /* driver supports Auto-Activate */ ATA_FLAG_IGN_SIMPLEX = (1 << 15), /* ignore SIMPLEX */ ATA_FLAG_NO_IORDY = (1 << 16), /* controller lacks iordy */ ATA_FLAG_ACPI_SATA = (1 << 17), /* need native SATA ACPI layout */ @@ -386,6 +387,7 @@ enum { ATA_HORKAGE_FIRMWARE_WARN = (1 << 12), /* firmware update warning */ ATA_HORKAGE_1_5_GBPS = (1 << 13), /* force 1.5 Gbps */ ATA_HORKAGE_NOSETXFER = (1 << 14), /* skip SETXFER, SATA only */ + ATA_HORKAGE_BROKEN_FPDMA_AA = (1 << 15), /* skip AA */ /* DMA mask for user DMA control: User visible values; DO NOT renumber */ -- cgit v1.2.3 From 6521148c6449724c3b707820b9c535c7e8b8afcd Mon Sep 17 00:00:00 2001 From: Robert Hancock Date: Tue, 14 Jul 2009 20:43:39 -0600 Subject: libata: add command name parsing for error output This patch improve libata's output for error/notification messages to allow easier comprehension and debugging: When ATAPI commands issued through the SCSI layer fail, use SCSI functions to print the CDB in human-readable form instead of just dumping out the CDB in hex. Print out the name of the failed command (as defined by the ATA specification) in error handling output along with the raw register contents. When reporting status of ACPI taskfile commands executed on resume, also output the names of the commands being executed (or not) in readable form. Since the extra data for printing command names increases kernel size slightly, a config option has been added to allow disabling command name output (as well as some of the error register parsing) for those highly sensitive to kernel text size. Signed-off-by: Robert Hancock Signed-off-by: Jeff Garzik --- drivers/ata/Kconfig | 11 ++++ drivers/ata/libata-acpi.c | 7 ++- drivers/ata/libata-eh.c | 128 +++++++++++++++++++++++++++++++++++++++++++++- drivers/ata/libata.h | 1 + include/linux/ata.h | 32 +++++++++++- 5 files changed, 174 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig index b17c57f85032..8e64d3c81d53 100644 --- a/drivers/ata/Kconfig +++ b/drivers/ata/Kconfig @@ -26,6 +26,17 @@ config ATA_NONSTANDARD bool default n +config ATA_VERBOSE_ERROR + bool "Verbose ATA error reporting" + default y + help + This option adds parsing of ATA command descriptions and error bits + in libata kernel output, making it easier to interpret. + This option will enlarge the kernel by approx. 6KB. Disable it only + if kernel size is more important than ease of debugging. + + If unsure, say Y. + config ATA_ACPI bool "ATA ACPI Support" depends on ACPI && PCI diff --git a/drivers/ata/libata-acpi.c b/drivers/ata/libata-acpi.c index ac176da1f94e..01964b6e6f6b 100644 --- a/drivers/ata/libata-acpi.c +++ b/drivers/ata/libata-acpi.c @@ -689,6 +689,7 @@ static int ata_acpi_run_tf(struct ata_device *dev, struct ata_taskfile tf, ptf, rtf; unsigned int err_mask; const char *level; + const char *descr; char msg[60]; int rc; @@ -736,11 +737,13 @@ static int ata_acpi_run_tf(struct ata_device *dev, snprintf(msg, sizeof(msg), "filtered out"); rc = 0; } + descr = ata_get_cmd_descript(tf.command); ata_dev_printk(dev, level, - "ACPI cmd %02x/%02x:%02x:%02x:%02x:%02x:%02x %s\n", + "ACPI cmd %02x/%02x:%02x:%02x:%02x:%02x:%02x (%s) %s\n", tf.command, tf.feature, tf.nsect, tf.lbal, - tf.lbam, tf.lbah, tf.device, msg); + tf.lbam, tf.lbah, tf.device, + (descr ? descr : "unknown"), msg); return rc; } diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index 2c34de841e11..a04488f0de88 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -40,6 +40,7 @@ #include #include #include +#include #include "../scsi/scsi_transport_api.h" #include @@ -2111,6 +2112,116 @@ void ata_eh_autopsy(struct ata_port *ap) ata_eh_link_autopsy(&ap->link); } +/** + * ata_get_cmd_descript - get description for ATA command + * @command: ATA command code to get description for + * + * Return a textual description of the given command, or NULL if the + * command is not known. + * + * LOCKING: + * None + */ +const char *ata_get_cmd_descript(u8 command) +{ +#ifdef CONFIG_ATA_VERBOSE_ERROR + static const struct + { + u8 command; + const char *text; + } cmd_descr[] = { + { ATA_CMD_DEV_RESET, "DEVICE RESET" }, + { ATA_CMD_CHK_POWER, "CHECK POWER MODE" }, + { ATA_CMD_STANDBY, "STANDBY" }, + { ATA_CMD_IDLE, "IDLE" }, + { ATA_CMD_EDD, "EXECUTE DEVICE DIAGNOSTIC" }, + { ATA_CMD_DOWNLOAD_MICRO, "DOWNLOAD MICROCODE" }, + { ATA_CMD_NOP, "NOP" }, + { ATA_CMD_FLUSH, "FLUSH CACHE" }, + { ATA_CMD_FLUSH_EXT, "FLUSH CACHE EXT" }, + { ATA_CMD_ID_ATA, "IDENTIFY DEVICE" }, + { ATA_CMD_ID_ATAPI, "IDENTIFY PACKET DEVICE" }, + { ATA_CMD_SERVICE, "SERVICE" }, + { ATA_CMD_READ, "READ DMA" }, + { ATA_CMD_READ_EXT, "READ DMA EXT" }, + { ATA_CMD_READ_QUEUED, "READ DMA QUEUED" }, + { ATA_CMD_READ_STREAM_EXT, "READ STREAM EXT" }, + { ATA_CMD_READ_STREAM_DMA_EXT, "READ STREAM DMA EXT" }, + { ATA_CMD_WRITE, "WRITE DMA" }, + { ATA_CMD_WRITE_EXT, "WRITE DMA EXT" }, + { ATA_CMD_WRITE_QUEUED, "WRITE DMA QUEUED EXT" }, + { ATA_CMD_WRITE_STREAM_EXT, "WRITE STREAM EXT" }, + { ATA_CMD_WRITE_STREAM_DMA_EXT, "WRITE STREAM DMA EXT" }, + { ATA_CMD_WRITE_FUA_EXT, "WRITE DMA FUA EXT" }, + { ATA_CMD_WRITE_QUEUED_FUA_EXT, "WRITE DMA QUEUED FUA EXT" }, + { ATA_CMD_FPDMA_READ, "READ FPDMA QUEUED" }, + { ATA_CMD_FPDMA_WRITE, "WRITE FPDMA QUEUED" }, + { ATA_CMD_PIO_READ, "READ SECTOR(S)" }, + { ATA_CMD_PIO_READ_EXT, "READ SECTOR(S) EXT" }, + { ATA_CMD_PIO_WRITE, "WRITE SECTOR(S)" }, + { ATA_CMD_PIO_WRITE_EXT, "WRITE SECTOR(S) EXT" }, + { ATA_CMD_READ_MULTI, "READ MULTIPLE" }, + { ATA_CMD_READ_MULTI_EXT, "READ MULTIPLE EXT" }, + { ATA_CMD_WRITE_MULTI, "WRITE MULTIPLE" }, + { ATA_CMD_WRITE_MULTI_EXT, "WRITE MULTIPLE EXT" }, + { ATA_CMD_WRITE_MULTI_FUA_EXT, "WRITE MULTIPLE FUA EXT" }, + { ATA_CMD_SET_FEATURES, "SET FEATURES" }, + { ATA_CMD_SET_MULTI, "SET MULTIPLE MODE" }, + { ATA_CMD_VERIFY, "READ VERIFY SECTOR(S)" }, + { ATA_CMD_VERIFY_EXT, "READ VERIFY SECTOR(S) EXT" }, + { ATA_CMD_WRITE_UNCORR_EXT, "WRITE UNCORRECTABLE EXT" }, + { ATA_CMD_STANDBYNOW1, "STANDBY IMMEDIATE" }, + { ATA_CMD_IDLEIMMEDIATE, "IDLE IMMEDIATE" }, + { ATA_CMD_SLEEP, "SLEEP" }, + { ATA_CMD_INIT_DEV_PARAMS, "INITIALIZE DEVICE PARAMETERS" }, + { ATA_CMD_READ_NATIVE_MAX, "READ NATIVE MAX ADDRESS" }, + { ATA_CMD_READ_NATIVE_MAX_EXT, "READ NATIVE MAX ADDRESS EXT" }, + { ATA_CMD_SET_MAX, "SET MAX ADDRESS" }, + { ATA_CMD_SET_MAX_EXT, "SET MAX ADDRESS EXT" }, + { ATA_CMD_READ_LOG_EXT, "READ LOG EXT" }, + { ATA_CMD_WRITE_LOG_EXT, "WRITE LOG EXT" }, + { ATA_CMD_READ_LOG_DMA_EXT, "READ LOG DMA EXT" }, + { ATA_CMD_WRITE_LOG_DMA_EXT, "WRITE LOG DMA EXT" }, + { ATA_CMD_TRUSTED_RCV, "TRUSTED RECEIVE" }, + { ATA_CMD_TRUSTED_RCV_DMA, "TRUSTED RECEIVE DMA" }, + { ATA_CMD_TRUSTED_SND, "TRUSTED SEND" }, + { ATA_CMD_TRUSTED_SND_DMA, "TRUSTED SEND DMA" }, + { ATA_CMD_PMP_READ, "READ BUFFER" }, + { ATA_CMD_PMP_WRITE, "WRITE BUFFER" }, + { ATA_CMD_CONF_OVERLAY, "DEVICE CONFIGURATION OVERLAY" }, + { ATA_CMD_SEC_SET_PASS, "SECURITY SET PASSWORD" }, + { ATA_CMD_SEC_UNLOCK, "SECURITY UNLOCK" }, + { ATA_CMD_SEC_ERASE_PREP, "SECURITY ERASE PREPARE" }, + { ATA_CMD_SEC_ERASE_UNIT, "SECURITY ERASE UNIT" }, + { ATA_CMD_SEC_FREEZE_LOCK, "SECURITY FREEZE LOCK" }, + { ATA_CMD_SEC_DISABLE_PASS, "SECURITY DISABLE PASSWORD" }, + { ATA_CMD_CONFIG_STREAM, "CONFIGURE STREAM" }, + { ATA_CMD_SMART, "SMART" }, + { ATA_CMD_MEDIA_LOCK, "DOOR LOCK" }, + { ATA_CMD_MEDIA_UNLOCK, "DOOR UNLOCK" }, + { ATA_CMD_CHK_MED_CRD_TYP, "CHECK MEDIA CARD TYPE" }, + { ATA_CMD_CFA_REQ_EXT_ERR, "CFA REQUEST EXTENDED ERROR" }, + { ATA_CMD_CFA_WRITE_NE, "CFA WRITE SECTORS WITHOUT ERASE" }, + { ATA_CMD_CFA_TRANS_SECT, "CFA TRANSLATE SECTOR" }, + { ATA_CMD_CFA_ERASE, "CFA ERASE SECTORS" }, + { ATA_CMD_CFA_WRITE_MULT_NE, "CFA WRITE MULTIPLE WITHOUT ERASE" }, + { ATA_CMD_READ_LONG, "READ LONG (with retries)" }, + { ATA_CMD_READ_LONG_ONCE, "READ LONG (without retries)" }, + { ATA_CMD_WRITE_LONG, "WRITE LONG (with retries)" }, + { ATA_CMD_WRITE_LONG_ONCE, "WRITE LONG (without retries)" }, + { ATA_CMD_RESTORE, "RECALIBRATE" }, + { 0, NULL } /* terminate list */ + }; + + unsigned int i; + for (i = 0; cmd_descr[i].text; i++) + if (cmd_descr[i].command == command) + return cmd_descr[i].text; +#endif + + return NULL; +} + /** * ata_eh_link_report - report error handling to user * @link: ATA link EH is going on @@ -2177,6 +2288,7 @@ static void ata_eh_link_report(struct ata_link *link) ata_link_printk(link, KERN_ERR, "%s\n", desc); } +#ifdef CONFIG_ATA_VERBOSE_ERROR if (ehc->i.serror) ata_link_printk(link, KERN_ERR, "SError: { %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s}\n", @@ -2197,6 +2309,7 @@ static void ata_eh_link_report(struct ata_link *link) ehc->i.serror & SERR_TRANS_ST_ERROR ? "TrStaTrns " : "", ehc->i.serror & SERR_UNRECOG_FIS ? "UnrecFIS " : "", ehc->i.serror & SERR_DEV_XCHG ? "DevExch " : ""); +#endif for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); @@ -2228,14 +2341,23 @@ static void ata_eh_link_report(struct ata_link *link) dma_str[qc->dma_dir]); } - if (ata_is_atapi(qc->tf.protocol)) - snprintf(cdb_buf, sizeof(cdb_buf), + if (ata_is_atapi(qc->tf.protocol)) { + if (qc->scsicmd) + scsi_print_command(qc->scsicmd); + else + snprintf(cdb_buf, sizeof(cdb_buf), "cdb %02x %02x %02x %02x %02x %02x %02x %02x " "%02x %02x %02x %02x %02x %02x %02x %02x\n ", cdb[0], cdb[1], cdb[2], cdb[3], cdb[4], cdb[5], cdb[6], cdb[7], cdb[8], cdb[9], cdb[10], cdb[11], cdb[12], cdb[13], cdb[14], cdb[15]); + } else { + const char *descr = ata_get_cmd_descript(cmd->command); + if (descr) + ata_dev_printk(qc->dev, KERN_ERR, + "failed command: %s\n", descr); + } ata_dev_printk(qc->dev, KERN_ERR, "cmd %02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x " @@ -2254,6 +2376,7 @@ static void ata_eh_link_report(struct ata_link *link) res->device, qc->err_mask, ata_err_string(qc->err_mask), qc->err_mask & AC_ERR_NCQ ? " " : ""); +#ifdef CONFIG_ATA_VERBOSE_ERROR if (res->command & (ATA_BUSY | ATA_DRDY | ATA_DF | ATA_DRQ | ATA_ERR)) { if (res->command & ATA_BUSY) @@ -2277,6 +2400,7 @@ static void ata_eh_link_report(struct ata_link *link) res->feature & ATA_UNC ? "UNC " : "", res->feature & ATA_IDNF ? "IDNF " : "", res->feature & ATA_ABORTED ? "ABRT " : ""); +#endif } } diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h index 89a1e0018e71..be8e2628f82c 100644 --- a/drivers/ata/libata.h +++ b/drivers/ata/libata.h @@ -164,6 +164,7 @@ extern void ata_eh_about_to_do(struct ata_link *link, struct ata_device *dev, extern void ata_eh_done(struct ata_link *link, struct ata_device *dev, unsigned int action); extern void ata_eh_autopsy(struct ata_port *ap); +const char *ata_get_cmd_descript(u8 command); extern void ata_eh_report(struct ata_port *ap); extern int ata_eh_reset(struct ata_link *link, int classify, ata_prereset_fn_t prereset, ata_reset_fn_t softreset, diff --git a/include/linux/ata.h b/include/linux/ata.h index f5494050df83..6299a259ed19 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -210,15 +210,25 @@ enum { ATA_CMD_STANDBY = 0xE2, /* place in standby power mode */ ATA_CMD_IDLE = 0xE3, /* place in idle power mode */ ATA_CMD_EDD = 0x90, /* execute device diagnostic */ + ATA_CMD_DOWNLOAD_MICRO = 0x92, + ATA_CMD_NOP = 0x00, ATA_CMD_FLUSH = 0xE7, ATA_CMD_FLUSH_EXT = 0xEA, ATA_CMD_ID_ATA = 0xEC, ATA_CMD_ID_ATAPI = 0xA1, + ATA_CMD_SERVICE = 0xA2, ATA_CMD_READ = 0xC8, ATA_CMD_READ_EXT = 0x25, + ATA_CMD_READ_QUEUED = 0x26, + ATA_CMD_READ_STREAM_EXT = 0x2B, + ATA_CMD_READ_STREAM_DMA_EXT = 0x2A, ATA_CMD_WRITE = 0xCA, ATA_CMD_WRITE_EXT = 0x35, + ATA_CMD_WRITE_QUEUED = 0x36, + ATA_CMD_WRITE_STREAM_EXT = 0x3B, + ATA_CMD_WRITE_STREAM_DMA_EXT = 0x3A, ATA_CMD_WRITE_FUA_EXT = 0x3D, + ATA_CMD_WRITE_QUEUED_FUA_EXT = 0x3E, ATA_CMD_FPDMA_READ = 0x60, ATA_CMD_FPDMA_WRITE = 0x61, ATA_CMD_PIO_READ = 0x20, @@ -235,6 +245,7 @@ enum { ATA_CMD_PACKET = 0xA0, ATA_CMD_VERIFY = 0x40, ATA_CMD_VERIFY_EXT = 0x42, + ATA_CMD_WRITE_UNCORR_EXT = 0x45, ATA_CMD_STANDBYNOW1 = 0xE0, ATA_CMD_IDLEIMMEDIATE = 0xE1, ATA_CMD_SLEEP = 0xE6, @@ -243,15 +254,34 @@ enum { ATA_CMD_READ_NATIVE_MAX_EXT = 0x27, ATA_CMD_SET_MAX = 0xF9, ATA_CMD_SET_MAX_EXT = 0x37, - ATA_CMD_READ_LOG_EXT = 0x2f, + ATA_CMD_READ_LOG_EXT = 0x2F, + ATA_CMD_WRITE_LOG_EXT = 0x3F, + ATA_CMD_READ_LOG_DMA_EXT = 0x47, + ATA_CMD_WRITE_LOG_DMA_EXT = 0x57, + ATA_CMD_TRUSTED_RCV = 0x5C, + ATA_CMD_TRUSTED_RCV_DMA = 0x5D, + ATA_CMD_TRUSTED_SND = 0x5E, + ATA_CMD_TRUSTED_SND_DMA = 0x5F, ATA_CMD_PMP_READ = 0xE4, ATA_CMD_PMP_WRITE = 0xE8, ATA_CMD_CONF_OVERLAY = 0xB1, + ATA_CMD_SEC_SET_PASS = 0xF1, + ATA_CMD_SEC_UNLOCK = 0xF2, + ATA_CMD_SEC_ERASE_PREP = 0xF3, + ATA_CMD_SEC_ERASE_UNIT = 0xF4, ATA_CMD_SEC_FREEZE_LOCK = 0xF5, + ATA_CMD_SEC_DISABLE_PASS = 0xF6, + ATA_CMD_CONFIG_STREAM = 0x51, ATA_CMD_SMART = 0xB0, ATA_CMD_MEDIA_LOCK = 0xDE, ATA_CMD_MEDIA_UNLOCK = 0xDF, ATA_CMD_DSM = 0x06, + ATA_CMD_CHK_MED_CRD_TYP = 0xD1, + ATA_CMD_CFA_REQ_EXT_ERR = 0x03, + ATA_CMD_CFA_WRITE_NE = 0x38, + ATA_CMD_CFA_TRANS_SECT = 0x87, + ATA_CMD_CFA_ERASE = 0xC0, + ATA_CMD_CFA_WRITE_MULT_NE = 0xCD, /* marked obsolete in the ATA/ATAPI-7 spec */ ATA_CMD_RESTORE = 0x10, -- cgit v1.2.3 From 051d9fbdd1d1ec85ea18ba20581234cf23f1c217 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Jul 2009 11:46:12 +0900 Subject: libata: remove spindown skipping and warning This was a hack to give userland shutdown tools time to drop manual spindown. All popular distros updated quite some time ago and the due is well passed. Drop it. Signed-off-by: Tejun Heo Cc: Jaswinder Singh Rajput Signed-off-by: Jeff Garzik --- Documentation/feature-removal-schedule.txt | 18 ----------- drivers/ata/libata-scsi.c | 51 ------------------------------ include/linux/libata.h | 1 - 3 files changed, 70 deletions(-) (limited to 'include/linux') diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 09e031c55887..b3b62903f08c 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -206,24 +206,6 @@ Who: Len Brown --------------------------- -What: libata spindown skipping and warning -When: Dec 2008 -Why: Some halt(8) implementations synchronize caches for and spin - down libata disks because libata didn't use to spin down disk on - system halt (only synchronized caches). - Spin down on system halt is now implemented. sysfs node - /sys/class/scsi_disk/h:c:i:l/manage_start_stop is present if - spin down support is available. - Because issuing spin down command to an already spun down disk - makes some disks spin up just to spin down again, libata tracks - device spindown status to skip the extra spindown command and - warn about it. - This is to give userspace tools the time to get updated and will - be removed after userspace is reasonably updated. -Who: Tejun Heo - ---------------------------- - What: i386/x86_64 bzImage symlinks When: April 2010 diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index d0dfeef55db5..de3a0050760a 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -1257,23 +1257,6 @@ int ata_scsi_change_queue_depth(struct scsi_device *sdev, int queue_depth) return queue_depth; } -/* XXX: for spindown warning */ -static void ata_delayed_done_timerfn(unsigned long arg) -{ - struct scsi_cmnd *scmd = (void *)arg; - - scmd->scsi_done(scmd); -} - -/* XXX: for spindown warning */ -static void ata_delayed_done(struct scsi_cmnd *scmd) -{ - static struct timer_list timer; - - setup_timer(&timer, ata_delayed_done_timerfn, (unsigned long)scmd); - mod_timer(&timer, jiffies + 5 * HZ); -} - /** * ata_scsi_start_stop_xlat - Translate SCSI START STOP UNIT command * @qc: Storage for translated ATA taskfile @@ -1338,32 +1321,6 @@ static unsigned int ata_scsi_start_stop_xlat(struct ata_queued_cmd *qc) system_entering_hibernation()) goto skip; - /* XXX: This is for backward compatibility, will be - * removed. Read Documentation/feature-removal-schedule.txt - * for more info. - */ - if ((qc->dev->flags & ATA_DFLAG_SPUNDOWN) && - (system_state == SYSTEM_HALT || - system_state == SYSTEM_POWER_OFF)) { - static unsigned long warned; - - if (!test_and_set_bit(0, &warned)) { - ata_dev_printk(qc->dev, KERN_WARNING, - "DISK MIGHT NOT BE SPUN DOWN PROPERLY. " - "UPDATE SHUTDOWN UTILITY\n"); - ata_dev_printk(qc->dev, KERN_WARNING, - "For more info, visit " - "http://linux-ata.org/shutdown.html\n"); - - /* ->scsi_done is not used, use it for - * delayed completion. - */ - scmd->scsi_done = qc->scsidone; - qc->scsidone = ata_delayed_done; - } - goto skip; - } - /* Issue ATA STANDBY IMMEDIATE command */ tf->command = ATA_CMD_STANDBYNOW1; } @@ -1764,14 +1721,6 @@ static void ata_scsi_qc_complete(struct ata_queued_cmd *qc) } } - /* XXX: track spindown state for spindown skipping and warning */ - if (unlikely(qc->tf.command == ATA_CMD_STANDBY || - qc->tf.command == ATA_CMD_STANDBYNOW1)) - qc->dev->flags |= ATA_DFLAG_SPUNDOWN; - else if (likely(system_state != SYSTEM_HALT && - system_state != SYSTEM_POWER_OFF)) - qc->dev->flags &= ~ATA_DFLAG_SPUNDOWN; - if (need_sense && !ap->ops->error_handler) ata_dump_status(ap->print_id, &qc->result_tf); diff --git a/include/linux/libata.h b/include/linux/libata.h index d3f7cab4873e..76319bf03e37 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -143,7 +143,6 @@ enum { ATA_DFLAG_PIO = (1 << 12), /* device limited to PIO mode */ ATA_DFLAG_NCQ_OFF = (1 << 13), /* device limited to non-NCQ mode */ - ATA_DFLAG_SPUNDOWN = (1 << 14), /* XXX: for spindown_compat */ ATA_DFLAG_SLEEPING = (1 << 15), /* device is sleeping */ ATA_DFLAG_DUBIOUS_XFER = (1 << 16), /* data transfer not verified */ ATA_DFLAG_NO_UNLOAD = (1 << 17), /* device doesn't support unload */ -- cgit v1.2.3 From 6d703a81ad5fdd102334751ddacb053ecc6ff046 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 1 Sep 2009 17:52:57 -0700 Subject: ide: convert to ->proc_fops ->read_proc, ->write_proc are going away, ->proc_fops should be used instead. The only tricky place is IDENTIFY handling: if for some reason taskfile_lib_get_identify() fails, buffer _is_ changed and at least first byte is overwritten. Emulate old behaviour with returning that first byte to userspace and reporting length=1 despite overall -E. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- drivers/ide/ide-cd.c | 28 +++- drivers/ide/ide-disk_proc.c | 129 +++++++++++------ drivers/ide/ide-floppy_proc.c | 30 ++-- drivers/ide/ide-proc.c | 330 +++++++++++++++++++++++++++--------------- drivers/ide/ide-tape.c | 31 ++-- include/linux/ide.h | 24 +-- 6 files changed, 365 insertions(+), 207 deletions(-) (limited to 'include/linux') diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index ad0ab0c0a493..b79ca419d8d9 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -1389,19 +1390,30 @@ static sector_t ide_cdrom_capacity(ide_drive_t *drive) return capacity * sectors_per_frame; } -static int proc_idecd_read_capacity(char *page, char **start, off_t off, - int count, int *eof, void *data) +static int idecd_capacity_proc_show(struct seq_file *m, void *v) { - ide_drive_t *drive = data; - int len; + ide_drive_t *drive = m->private; - len = sprintf(page, "%llu\n", (long long)ide_cdrom_capacity(drive)); - PROC_IDE_READ_RETURN(page, start, off, count, eof, len); + seq_printf(m, "%llu\n", (long long)ide_cdrom_capacity(drive)); + return 0; +} + +static int idecd_capacity_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, idecd_capacity_proc_show, PDE(inode)->data); } +static const struct file_operations idecd_capacity_proc_fops = { + .owner = THIS_MODULE, + .open = idecd_capacity_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + static ide_proc_entry_t idecd_proc[] = { - { "capacity", S_IFREG|S_IRUGO, proc_idecd_read_capacity, NULL }, - { NULL, 0, NULL, NULL } + { "capacity", S_IFREG|S_IRUGO, &idecd_capacity_proc_fops }, + {} }; static ide_proc_entry_t *ide_cd_proc_entries(ide_drive_t *drive) diff --git a/drivers/ide/ide-disk_proc.c b/drivers/ide/ide-disk_proc.c index 19f263bf0a9e..60b0590ccc9c 100644 --- a/drivers/ide/ide-disk_proc.c +++ b/drivers/ide/ide-disk_proc.c @@ -1,5 +1,6 @@ #include #include +#include #include "ide-disk.h" @@ -37,77 +38,117 @@ static int get_smart_data(ide_drive_t *drive, u8 *buf, u8 sub_cmd) return ide_raw_taskfile(drive, &cmd, buf, 1); } -static int proc_idedisk_read_cache - (char *page, char **start, off_t off, int count, int *eof, void *data) +static int idedisk_cache_proc_show(struct seq_file *m, void *v) { - ide_drive_t *drive = (ide_drive_t *) data; - char *out = page; - int len; + ide_drive_t *drive = (ide_drive_t *) m->private; if (drive->dev_flags & IDE_DFLAG_ID_READ) - len = sprintf(out, "%i\n", drive->id[ATA_ID_BUF_SIZE] / 2); + seq_printf(m, "%i\n", drive->id[ATA_ID_BUF_SIZE] / 2); else - len = sprintf(out, "(none)\n"); + seq_printf(m, "(none)\n"); + return 0; +} - PROC_IDE_READ_RETURN(page, start, off, count, eof, len); +static int idedisk_cache_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, idedisk_cache_proc_show, PDE(inode)->data); } -static int proc_idedisk_read_capacity - (char *page, char **start, off_t off, int count, int *eof, void *data) +static const struct file_operations idedisk_cache_proc_fops = { + .owner = THIS_MODULE, + .open = idedisk_cache_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int idedisk_capacity_proc_show(struct seq_file *m, void *v) { - ide_drive_t*drive = (ide_drive_t *)data; - int len; + ide_drive_t*drive = (ide_drive_t *)m->private; - len = sprintf(page, "%llu\n", (long long)ide_gd_capacity(drive)); + seq_printf(m, "%llu\n", (long long)ide_gd_capacity(drive)); + return 0; +} - PROC_IDE_READ_RETURN(page, start, off, count, eof, len); +static int idedisk_capacity_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, idedisk_capacity_proc_show, PDE(inode)->data); } -static int proc_idedisk_read_smart(char *page, char **start, off_t off, - int count, int *eof, void *data, u8 sub_cmd) +static const struct file_operations idedisk_capacity_proc_fops = { + .owner = THIS_MODULE, + .open = idedisk_capacity_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int __idedisk_proc_show(struct seq_file *m, ide_drive_t *drive, u8 sub_cmd) { - ide_drive_t *drive = (ide_drive_t *)data; - int len = 0, i = 0; + u8 *buf; + + buf = kmalloc(SECTOR_SIZE, GFP_KERNEL); + if (!buf) + return -ENOMEM; (void)smart_enable(drive); - if (get_smart_data(drive, page, sub_cmd) == 0) { - unsigned short *val = (unsigned short *) page; - char *out = (char *)val + SECTOR_SIZE; - - page = out; - do { - out += sprintf(out, "%04x%c", le16_to_cpu(*val), - (++i & 7) ? ' ' : '\n'); - val += 1; - } while (i < SECTOR_SIZE / 2); - len = out - page; + if (get_smart_data(drive, buf, sub_cmd) == 0) { + __le16 *val = (__le16 *)buf; + int i; + + for (i = 0; i < SECTOR_SIZE / 2; i++) { + seq_printf(m, "%04x%c", le16_to_cpu(val[i]), + (i % 8) == 7 ? '\n' : ' '); + } } + kfree(buf); + return 0; +} - PROC_IDE_READ_RETURN(page, start, off, count, eof, len); +static int idedisk_sv_proc_show(struct seq_file *m, void *v) +{ + return __idedisk_proc_show(m, m->private, ATA_SMART_READ_VALUES); } -static int proc_idedisk_read_sv - (char *page, char **start, off_t off, int count, int *eof, void *data) +static int idedisk_sv_proc_open(struct inode *inode, struct file *file) { - return proc_idedisk_read_smart(page, start, off, count, eof, data, - ATA_SMART_READ_VALUES); + return single_open(file, idedisk_sv_proc_show, PDE(inode)->data); } -static int proc_idedisk_read_st - (char *page, char **start, off_t off, int count, int *eof, void *data) +static const struct file_operations idedisk_sv_proc_fops = { + .owner = THIS_MODULE, + .open = idedisk_sv_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int idedisk_st_proc_show(struct seq_file *m, void *v) { - return proc_idedisk_read_smart(page, start, off, count, eof, data, - ATA_SMART_READ_THRESHOLDS); + return __idedisk_proc_show(m, m->private, ATA_SMART_READ_THRESHOLDS); } +static int idedisk_st_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, idedisk_st_proc_show, PDE(inode)->data); +} + +static const struct file_operations idedisk_st_proc_fops = { + .owner = THIS_MODULE, + .open = idedisk_st_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + ide_proc_entry_t ide_disk_proc[] = { - { "cache", S_IFREG|S_IRUGO, proc_idedisk_read_cache, NULL }, - { "capacity", S_IFREG|S_IRUGO, proc_idedisk_read_capacity, NULL }, - { "geometry", S_IFREG|S_IRUGO, proc_ide_read_geometry, NULL }, - { "smart_values", S_IFREG|S_IRUSR, proc_idedisk_read_sv, NULL }, - { "smart_thresholds", S_IFREG|S_IRUSR, proc_idedisk_read_st, NULL }, - { NULL, 0, NULL, NULL } + { "cache", S_IFREG|S_IRUGO, &idedisk_cache_proc_fops }, + { "capacity", S_IFREG|S_IRUGO, &idedisk_capacity_proc_fops }, + { "geometry", S_IFREG|S_IRUGO, &ide_geometry_proc_fops }, + { "smart_values", S_IFREG|S_IRUSR, &idedisk_sv_proc_fops }, + { "smart_thresholds", S_IFREG|S_IRUSR, &idedisk_st_proc_fops }, + {} }; ide_devset_rw_field(bios_cyl, bios_cyl); diff --git a/drivers/ide/ide-floppy_proc.c b/drivers/ide/ide-floppy_proc.c index fcd4d8153df5..d711d9b883de 100644 --- a/drivers/ide/ide-floppy_proc.c +++ b/drivers/ide/ide-floppy_proc.c @@ -1,22 +1,34 @@ #include #include +#include #include "ide-floppy.h" -static int proc_idefloppy_read_capacity(char *page, char **start, off_t off, - int count, int *eof, void *data) +static int idefloppy_capacity_proc_show(struct seq_file *m, void *v) { - ide_drive_t*drive = (ide_drive_t *)data; - int len; + ide_drive_t*drive = (ide_drive_t *)m->private; - len = sprintf(page, "%llu\n", (long long)ide_gd_capacity(drive)); - PROC_IDE_READ_RETURN(page, start, off, count, eof, len); + seq_printf(m, "%llu\n", (long long)ide_gd_capacity(drive)); + return 0; } +static int idefloppy_capacity_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, idefloppy_capacity_proc_show, PDE(inode)->data); +} + +static const struct file_operations idefloppy_capacity_proc_fops = { + .owner = THIS_MODULE, + .open = idefloppy_capacity_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + ide_proc_entry_t ide_floppy_proc[] = { - { "capacity", S_IFREG|S_IRUGO, proc_idefloppy_read_capacity, NULL }, - { "geometry", S_IFREG|S_IRUGO, proc_ide_read_geometry, NULL }, - { NULL, 0, NULL, NULL } + { "capacity", S_IFREG|S_IRUGO, &idefloppy_capacity_proc_fops }, + { "geometry", S_IFREG|S_IRUGO, &ide_geometry_proc_fops }, + {} }; ide_devset_rw_field(bios_cyl, bios_cyl); diff --git a/drivers/ide/ide-proc.c b/drivers/ide/ide-proc.c index 021de41655e6..28d09a5d8450 100644 --- a/drivers/ide/ide-proc.c +++ b/drivers/ide/ide-proc.c @@ -30,11 +30,9 @@ static struct proc_dir_entry *proc_ide_root; -static int proc_ide_read_imodel - (char *page, char **start, off_t off, int count, int *eof, void *data) +static int ide_imodel_proc_show(struct seq_file *m, void *v) { - ide_hwif_t *hwif = (ide_hwif_t *) data; - int len; + ide_hwif_t *hwif = (ide_hwif_t *) m->private; const char *name; switch (hwif->chipset) { @@ -53,63 +51,108 @@ static int proc_ide_read_imodel case ide_acorn: name = "acorn"; break; default: name = "(unknown)"; break; } - len = sprintf(page, "%s\n", name); - PROC_IDE_READ_RETURN(page, start, off, count, eof, len); + seq_printf(m, "%s\n", name); + return 0; } -static int proc_ide_read_mate - (char *page, char **start, off_t off, int count, int *eof, void *data) +static int ide_imodel_proc_open(struct inode *inode, struct file *file) { - ide_hwif_t *hwif = (ide_hwif_t *) data; - int len; + return single_open(file, ide_imodel_proc_show, PDE(inode)->data); +} + +static const struct file_operations ide_imodel_proc_fops = { + .owner = THIS_MODULE, + .open = ide_imodel_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int ide_mate_proc_show(struct seq_file *m, void *v) +{ + ide_hwif_t *hwif = (ide_hwif_t *) m->private; if (hwif && hwif->mate) - len = sprintf(page, "%s\n", hwif->mate->name); + seq_printf(m, "%s\n", hwif->mate->name); else - len = sprintf(page, "(none)\n"); - PROC_IDE_READ_RETURN(page, start, off, count, eof, len); + seq_printf(m, "(none)\n"); + return 0; +} + +static int ide_mate_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, ide_mate_proc_show, PDE(inode)->data); } -static int proc_ide_read_channel - (char *page, char **start, off_t off, int count, int *eof, void *data) +static const struct file_operations ide_mate_proc_fops = { + .owner = THIS_MODULE, + .open = ide_mate_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int ide_channel_proc_show(struct seq_file *m, void *v) { - ide_hwif_t *hwif = (ide_hwif_t *) data; - int len; + ide_hwif_t *hwif = (ide_hwif_t *) m->private; - page[0] = hwif->channel ? '1' : '0'; - page[1] = '\n'; - len = 2; - PROC_IDE_READ_RETURN(page, start, off, count, eof, len); + seq_printf(m, "%c\n", hwif->channel ? '1' : '0'); + return 0; } -static int proc_ide_read_identify - (char *page, char **start, off_t off, int count, int *eof, void *data) +static int ide_channel_proc_open(struct inode *inode, struct file *file) { - ide_drive_t *drive = (ide_drive_t *)data; - int len = 0, i = 0; - int err = 0; + return single_open(file, ide_channel_proc_show, PDE(inode)->data); +} - len = sprintf(page, "\n"); +static const struct file_operations ide_channel_proc_fops = { + .owner = THIS_MODULE, + .open = ide_channel_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; - if (drive) { - __le16 *val = (__le16 *)page; +static int ide_identify_proc_show(struct seq_file *m, void *v) +{ + ide_drive_t *drive = (ide_drive_t *)m->private; + u8 *buf; - err = taskfile_lib_get_identify(drive, page); - if (!err) { - char *out = (char *)page + SECTOR_SIZE; + if (!drive) { + seq_putc(m, '\n'); + return 0; + } - page = out; - do { - out += sprintf(out, "%04x%c", - le16_to_cpup(val), (++i & 7) ? ' ' : '\n'); - val += 1; - } while (i < SECTOR_SIZE / 2); - len = out - page; + buf = kmalloc(SECTOR_SIZE, GFP_KERNEL); + if (!buf) + return -ENOMEM; + if (taskfile_lib_get_identify(drive, buf) == 0) { + __le16 *val = (__le16 *)buf; + int i; + + for (i = 0; i < SECTOR_SIZE / 2; i++) { + seq_printf(m, "%04x%c", le16_to_cpu(val[i]), + (i % 8) == 7 ? '\n' : ' '); } - } - PROC_IDE_READ_RETURN(page, start, off, count, eof, len); + } else + seq_putc(m, buf[0]); + kfree(buf); + return 0; +} + +static int ide_identify_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, ide_identify_proc_show, PDE(inode)->data); } +static const struct file_operations ide_identify_proc_fops = { + .owner = THIS_MODULE, + .open = ide_identify_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + /** * ide_find_setting - find a specific setting * @st: setting table pointer @@ -240,22 +283,20 @@ static void proc_ide_settings_warn(void) warned = 1; } -static int proc_ide_read_settings - (char *page, char **start, off_t off, int count, int *eof, void *data) +static int ide_settings_proc_show(struct seq_file *m, void *v) { const struct ide_proc_devset *setting, *g, *d; const struct ide_devset *ds; - ide_drive_t *drive = (ide_drive_t *) data; - char *out = page; - int len, rc, mul_factor, div_factor; + ide_drive_t *drive = (ide_drive_t *) m->private; + int rc, mul_factor, div_factor; proc_ide_settings_warn(); mutex_lock(&ide_setting_mtx); g = ide_generic_settings; d = drive->settings; - out += sprintf(out, "name\t\t\tvalue\t\tmin\t\tmax\t\tmode\n"); - out += sprintf(out, "----\t\t\t-----\t\t---\t\t---\t\t----\n"); + seq_printf(m, "name\t\t\tvalue\t\tmin\t\tmax\t\tmode\n"); + seq_printf(m, "----\t\t\t-----\t\t---\t\t---\t\t----\n"); while (g->name || (d && d->name)) { /* read settings in the alphabetical order */ if (g->name && d && d->name) { @@ -269,31 +310,35 @@ static int proc_ide_read_settings setting = g++; mul_factor = setting->mulf ? setting->mulf(drive) : 1; div_factor = setting->divf ? setting->divf(drive) : 1; - out += sprintf(out, "%-24s", setting->name); + seq_printf(m, "%-24s", setting->name); rc = ide_read_setting(drive, setting); if (rc >= 0) - out += sprintf(out, "%-16d", rc * mul_factor / div_factor); + seq_printf(m, "%-16d", rc * mul_factor / div_factor); else - out += sprintf(out, "%-16s", "write-only"); - out += sprintf(out, "%-16d%-16d", (setting->min * mul_factor + div_factor - 1) / div_factor, setting->max * mul_factor / div_factor); + seq_printf(m, "%-16s", "write-only"); + seq_printf(m, "%-16d%-16d", (setting->min * mul_factor + div_factor - 1) / div_factor, setting->max * mul_factor / div_factor); ds = setting->setting; if (ds->get) - out += sprintf(out, "r"); + seq_printf(m, "r"); if (ds->set) - out += sprintf(out, "w"); - out += sprintf(out, "\n"); + seq_printf(m, "w"); + seq_printf(m, "\n"); } - len = out - page; mutex_unlock(&ide_setting_mtx); - PROC_IDE_READ_RETURN(page, start, off, count, eof, len); + return 0; +} + +static int ide_settings_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, ide_settings_proc_show, PDE(inode)->data); } #define MAX_LEN 30 -static int proc_ide_write_settings(struct file *file, const char __user *buffer, - unsigned long count, void *data) +static ssize_t ide_settings_proc_write(struct file *file, const char __user *buffer, + size_t count, loff_t *pos) { - ide_drive_t *drive = (ide_drive_t *) data; + ide_drive_t *drive = (ide_drive_t *) PDE(file->f_path.dentry->d_inode)->data; char name[MAX_LEN + 1]; int for_real = 0, mul_factor, div_factor; unsigned long n; @@ -388,63 +433,104 @@ static int proc_ide_write_settings(struct file *file, const char __user *buffer, return count; parse_error: free_page((unsigned long)buf); - printk("proc_ide_write_settings(): parse error\n"); + printk("%s(): parse error\n", __func__); return -EINVAL; } -int proc_ide_read_capacity - (char *page, char **start, off_t off, int count, int *eof, void *data) +static const struct file_operations ide_settings_proc_fops = { + .owner = THIS_MODULE, + .open = ide_settings_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .write = ide_settings_proc_write, +}; + +static int ide_capacity_proc_show(struct seq_file *m, void *v) { - int len = sprintf(page, "%llu\n", (long long)0x7fffffff); - PROC_IDE_READ_RETURN(page, start, off, count, eof, len); + seq_printf(m, "%llu\n", (long long)0x7fffffff); + return 0; } -EXPORT_SYMBOL_GPL(proc_ide_read_capacity); +static int ide_capacity_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, ide_capacity_proc_show, NULL); +} -int proc_ide_read_geometry - (char *page, char **start, off_t off, int count, int *eof, void *data) +const struct file_operations ide_capacity_proc_fops = { + .owner = THIS_MODULE, + .open = ide_capacity_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; +EXPORT_SYMBOL_GPL(ide_capacity_proc_fops); + +static int ide_geometry_proc_show(struct seq_file *m, void *v) { - ide_drive_t *drive = (ide_drive_t *) data; - char *out = page; - int len; + ide_drive_t *drive = (ide_drive_t *) m->private; - out += sprintf(out, "physical %d/%d/%d\n", + seq_printf(m, "physical %d/%d/%d\n", drive->cyl, drive->head, drive->sect); - out += sprintf(out, "logical %d/%d/%d\n", + seq_printf(m, "logical %d/%d/%d\n", drive->bios_cyl, drive->bios_head, drive->bios_sect); + return 0; +} - len = out - page; - PROC_IDE_READ_RETURN(page, start, off, count, eof, len); +static int ide_geometry_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, ide_geometry_proc_show, PDE(inode)->data); } -EXPORT_SYMBOL(proc_ide_read_geometry); +const struct file_operations ide_geometry_proc_fops = { + .owner = THIS_MODULE, + .open = ide_geometry_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; +EXPORT_SYMBOL(ide_geometry_proc_fops); -static int proc_ide_read_dmodel - (char *page, char **start, off_t off, int count, int *eof, void *data) +static int ide_dmodel_proc_show(struct seq_file *seq, void *v) { - ide_drive_t *drive = (ide_drive_t *) data; + ide_drive_t *drive = (ide_drive_t *) seq->private; char *m = (char *)&drive->id[ATA_ID_PROD]; - int len; - len = sprintf(page, "%.40s\n", m[0] ? m : "(none)"); - PROC_IDE_READ_RETURN(page, start, off, count, eof, len); + seq_printf(seq, "%.40s\n", m[0] ? m : "(none)"); + return 0; +} + +static int ide_dmodel_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, ide_dmodel_proc_show, PDE(inode)->data); } -static int proc_ide_read_driver - (char *page, char **start, off_t off, int count, int *eof, void *data) +static const struct file_operations ide_dmodel_proc_fops = { + .owner = THIS_MODULE, + .open = ide_dmodel_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int ide_driver_proc_show(struct seq_file *m, void *v) { - ide_drive_t *drive = (ide_drive_t *)data; + ide_drive_t *drive = (ide_drive_t *)m->private; struct device *dev = &drive->gendev; struct ide_driver *ide_drv; - int len; if (dev->driver) { ide_drv = to_ide_driver(dev->driver); - len = sprintf(page, "%s version %s\n", + seq_printf(m, "%s version %s\n", dev->driver->name, ide_drv->version); } else - len = sprintf(page, "ide-default version 0.9.newide\n"); - PROC_IDE_READ_RETURN(page, start, off, count, eof, len); + seq_printf(m, "ide-default version 0.9.newide\n"); + return 0; +} + +static int ide_driver_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, ide_driver_proc_show, PDE(inode)->data); } static int ide_replace_subdriver(ide_drive_t *drive, const char *driver) @@ -474,10 +560,10 @@ static int ide_replace_subdriver(ide_drive_t *drive, const char *driver) return ret; } -static int proc_ide_write_driver - (struct file *file, const char __user *buffer, unsigned long count, void *data) +static ssize_t ide_driver_proc_write(struct file *file, const char __user *buffer, + size_t count, loff_t *pos) { - ide_drive_t *drive = (ide_drive_t *) data; + ide_drive_t *drive = (ide_drive_t *) PDE(file->f_path.dentry->d_inode)->data; char name[32]; if (!capable(CAP_SYS_ADMIN)) @@ -492,12 +578,19 @@ static int proc_ide_write_driver return count; } -static int proc_ide_read_media - (char *page, char **start, off_t off, int count, int *eof, void *data) +static const struct file_operations ide_driver_proc_fops = { + .owner = THIS_MODULE, + .open = ide_driver_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .write = ide_driver_proc_write, +}; + +static int ide_media_proc_show(struct seq_file *m, void *v) { - ide_drive_t *drive = (ide_drive_t *) data; + ide_drive_t *drive = (ide_drive_t *) m->private; const char *media; - int len; switch (drive->media) { case ide_disk: media = "disk\n"; break; @@ -507,20 +600,30 @@ static int proc_ide_read_media case ide_optical: media = "optical\n"; break; default: media = "UNKNOWN\n"; break; } - strcpy(page, media); - len = strlen(media); - PROC_IDE_READ_RETURN(page, start, off, count, eof, len); + seq_puts(m, media); + return 0; +} + +static int ide_media_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, ide_media_proc_show, PDE(inode)->data); } +static const struct file_operations ide_media_proc_fops = { + .owner = THIS_MODULE, + .open = ide_media_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + static ide_proc_entry_t generic_drive_entries[] = { - { "driver", S_IFREG|S_IRUGO, proc_ide_read_driver, - proc_ide_write_driver }, - { "identify", S_IFREG|S_IRUSR, proc_ide_read_identify, NULL }, - { "media", S_IFREG|S_IRUGO, proc_ide_read_media, NULL }, - { "model", S_IFREG|S_IRUGO, proc_ide_read_dmodel, NULL }, - { "settings", S_IFREG|S_IRUSR|S_IWUSR, proc_ide_read_settings, - proc_ide_write_settings }, - { NULL, 0, NULL, NULL } + { "driver", S_IFREG|S_IRUGO, &ide_driver_proc_fops }, + { "identify", S_IFREG|S_IRUSR, &ide_identify_proc_fops}, + { "media", S_IFREG|S_IRUGO, &ide_media_proc_fops }, + { "model", S_IFREG|S_IRUGO, &ide_dmodel_proc_fops }, + { "settings", S_IFREG|S_IRUSR|S_IWUSR, &ide_settings_proc_fops}, + {} }; static void ide_add_proc_entries(struct proc_dir_entry *dir, ide_proc_entry_t *p, void *data) @@ -530,11 +633,8 @@ static void ide_add_proc_entries(struct proc_dir_entry *dir, ide_proc_entry_t *p if (!dir || !p) return; while (p->name != NULL) { - ent = create_proc_entry(p->name, p->mode, dir); + ent = proc_create_data(p->name, p->mode, dir, p->proc_fops, data); if (!ent) return; - ent->data = data; - ent->read_proc = p->read_proc; - ent->write_proc = p->write_proc; p++; } } @@ -617,10 +717,10 @@ void ide_proc_unregister_device(ide_drive_t *drive) } static ide_proc_entry_t hwif_entries[] = { - { "channel", S_IFREG|S_IRUGO, proc_ide_read_channel, NULL }, - { "mate", S_IFREG|S_IRUGO, proc_ide_read_mate, NULL }, - { "model", S_IFREG|S_IRUGO, proc_ide_read_imodel, NULL }, - { NULL, 0, NULL, NULL } + { "channel", S_IFREG|S_IRUGO, &ide_channel_proc_fops }, + { "mate", S_IFREG|S_IRUGO, &ide_mate_proc_fops }, + { "model", S_IFREG|S_IRUGO, &ide_imodel_proc_fops }, + {} }; void ide_proc_register_port(ide_hwif_t *hwif) diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index 7b2032bc357b..9d6f62baac27 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -1816,22 +1817,32 @@ static void ide_tape_release(struct device *dev) } #ifdef CONFIG_IDE_PROC_FS -static int proc_idetape_read_name - (char *page, char **start, off_t off, int count, int *eof, void *data) +static int idetape_name_proc_show(struct seq_file *m, void *v) { - ide_drive_t *drive = (ide_drive_t *) data; + ide_drive_t *drive = (ide_drive_t *) m->private; idetape_tape_t *tape = drive->driver_data; - char *out = page; - int len; - len = sprintf(out, "%s\n", tape->name); - PROC_IDE_READ_RETURN(page, start, off, count, eof, len); + seq_printf(m, "%s\n", tape->name); + return 0; +} + +static int idetape_name_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, idetape_name_proc_show, PDE(inode)->data); } +static const struct file_operations idetape_name_proc_fops = { + .owner = THIS_MODULE, + .open = idetape_name_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + static ide_proc_entry_t idetape_proc[] = { - { "capacity", S_IFREG|S_IRUGO, proc_ide_read_capacity, NULL }, - { "name", S_IFREG|S_IRUGO, proc_idetape_read_name, NULL }, - { NULL, 0, NULL, NULL } + { "capacity", S_IFREG|S_IRUGO, &ide_capacity_proc_fops }, + { "name", S_IFREG|S_IRUGO, &idetape_name_proc_fops }, + {} }; static ide_proc_entry_t *ide_tape_proc_entries(ide_drive_t *drive) diff --git a/include/linux/ide.h b/include/linux/ide.h index 803c1ae31237..e4135d6e0556 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -919,8 +919,7 @@ __IDE_PROC_DEVSET(_name, _min, _max, NULL, NULL) typedef struct { const char *name; mode_t mode; - read_proc_t *read_proc; - write_proc_t *write_proc; + const struct file_operations *proc_fops; } ide_proc_entry_t; void proc_ide_create(void); @@ -932,24 +931,8 @@ void ide_proc_unregister_port(ide_hwif_t *); void ide_proc_register_driver(ide_drive_t *, struct ide_driver *); void ide_proc_unregister_driver(ide_drive_t *, struct ide_driver *); -read_proc_t proc_ide_read_capacity; -read_proc_t proc_ide_read_geometry; - -/* - * Standard exit stuff: - */ -#define PROC_IDE_READ_RETURN(page,start,off,count,eof,len) \ -{ \ - len -= off; \ - if (len < count) { \ - *eof = 1; \ - if (len <= 0) \ - return 0; \ - } else \ - len = count; \ - *start = page + off; \ - return len; \ -} +extern const struct file_operations ide_capacity_proc_fops; +extern const struct file_operations ide_geometry_proc_fops; #else static inline void proc_ide_create(void) { ; } static inline void proc_ide_destroy(void) { ; } @@ -961,7 +944,6 @@ static inline void ide_proc_register_driver(ide_drive_t *drive, struct ide_driver *driver) { ; } static inline void ide_proc_unregister_driver(ide_drive_t *drive, struct ide_driver *driver) { ; } -#define PROC_IDE_READ_RETURN(page,start,off,count,eof,len) return 0; #endif enum { -- cgit v1.2.3 From 69575d388603365f2afbf4166df93152df59b165 Mon Sep 17 00:00:00 2001 From: Shane Wang Date: Tue, 1 Sep 2009 18:25:07 -0700 Subject: x86, intel_txt: clean up the impact on generic code, unbreak non-x86 Move tboot.h from asm to linux to fix the build errors of intel_txt patch on non-X86 platforms. Remove the tboot code from generic code init/main.c and kernel/cpu.c. Signed-off-by: Shane Wang Signed-off-by: H. Peter Anvin --- arch/x86/Kconfig | 4 + arch/x86/include/asm/tboot.h | 197 ------------------------------------------ arch/x86/kernel/reboot.c | 3 +- arch/x86/kernel/setup.c | 3 +- arch/x86/kernel/smpboot.c | 2 +- arch/x86/kernel/tboot.c | 58 ++++++++++--- drivers/acpi/acpica/hwsleep.c | 2 +- drivers/pci/dmar.c | 2 +- drivers/pci/intel-iommu.c | 2 +- include/linux/tboot.h | 162 ++++++++++++++++++++++++++++++++++ init/main.c | 3 - kernel/cpu.c | 6 +- security/Kconfig | 2 +- 13 files changed, 221 insertions(+), 225 deletions(-) delete mode 100644 arch/x86/include/asm/tboot.h create mode 100644 include/linux/tboot.h (limited to 'include/linux') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 738bdc6b0f8b..b66f2102c35d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -178,6 +178,10 @@ config ARCH_SUPPORTS_OPTIMIZED_INLINING config ARCH_SUPPORTS_DEBUG_PAGEALLOC def_bool y +config HAVE_INTEL_TXT + def_bool y + depends on EXPERIMENTAL && DMAR && ACPI + # Use the generic interrupt handling code in kernel/irq/: config GENERIC_HARDIRQS bool diff --git a/arch/x86/include/asm/tboot.h b/arch/x86/include/asm/tboot.h deleted file mode 100644 index b13929d4e5f4..000000000000 --- a/arch/x86/include/asm/tboot.h +++ /dev/null @@ -1,197 +0,0 @@ -/* - * tboot.h: shared data structure with tboot and kernel and functions - * used by kernel for runtime support of Intel(R) Trusted - * Execution Technology - * - * Copyright (c) 2006-2009, Intel Corporation - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - * - */ - -#ifndef _ASM_TBOOT_H -#define _ASM_TBOOT_H - -#include - -/* these must have the values from 0-5 in this order */ -enum { - TB_SHUTDOWN_REBOOT = 0, - TB_SHUTDOWN_S5, - TB_SHUTDOWN_S4, - TB_SHUTDOWN_S3, - TB_SHUTDOWN_HALT, - TB_SHUTDOWN_WFS -}; - -#ifdef CONFIG_INTEL_TXT - -/* used to communicate between tboot and the launched kernel */ - -#define TB_KEY_SIZE 64 /* 512 bits */ - -#define MAX_TB_MAC_REGIONS 32 - -struct tboot_mac_region { - u64 start; /* must be 64 byte -aligned */ - u32 size; /* must be 64 byte -granular */ -} __packed; - -/* GAS - Generic Address Structure (ACPI 2.0+) */ -struct tboot_acpi_generic_address { - u8 space_id; - u8 bit_width; - u8 bit_offset; - u8 access_width; - u64 address; -} __packed; - -/* - * combines Sx info from FADT and FACS tables per ACPI 2.0+ spec - * (http://www.acpi.info/) - */ -struct tboot_acpi_sleep_info { - struct tboot_acpi_generic_address pm1a_cnt_blk; - struct tboot_acpi_generic_address pm1b_cnt_blk; - struct tboot_acpi_generic_address pm1a_evt_blk; - struct tboot_acpi_generic_address pm1b_evt_blk; - u16 pm1a_cnt_val; - u16 pm1b_cnt_val; - u64 wakeup_vector; - u32 vector_width; - u64 kernel_s3_resume_vector; -} __packed; - -/* - * shared memory page used for communication between tboot and kernel - */ -struct tboot { - /* - * version 3+ fields: - */ - - /* TBOOT_UUID */ - u8 uuid[16]; - - /* version number: 5 is current */ - u32 version; - - /* physical addr of tb_log_t log */ - u32 log_addr; - - /* - * physical addr of entry point for tboot shutdown and - * type of shutdown (TB_SHUTDOWN_*) being requested - */ - u32 shutdown_entry; - u32 shutdown_type; - - /* kernel-specified ACPI info for Sx shutdown */ - struct tboot_acpi_sleep_info acpi_sinfo; - - /* tboot location in memory (physical) */ - u32 tboot_base; - u32 tboot_size; - - /* memory regions (phys addrs) for tboot to MAC on S3 */ - u8 num_mac_regions; - struct tboot_mac_region mac_regions[MAX_TB_MAC_REGIONS]; - - - /* - * version 4+ fields: - */ - - /* symmetric key for use by kernel; will be encrypted on S3 */ - u8 s3_key[TB_KEY_SIZE]; - - - /* - * version 5+ fields: - */ - - /* used to 4byte-align num_in_wfs */ - u8 reserved_align[3]; - - /* number of processors in wait-for-SIPI */ - u32 num_in_wfs; -} __packed; - -/* - * UUID for tboot data struct to facilitate matching - * defined as {663C8DFF-E8B3-4b82-AABF-19EA4D057A08} by tboot, which is - * represented as {} in the char array used here - */ -#define TBOOT_UUID {0xff, 0x8d, 0x3c, 0x66, 0xb3, 0xe8, 0x82, 0x4b, 0xbf,\ - 0xaa, 0x19, 0xea, 0x4d, 0x5, 0x7a, 0x8} - -extern struct tboot *tboot; - -static inline int tboot_enabled(void) -{ - return tboot != NULL; -} - -extern void tboot_probe(void); -extern void tboot_create_trampoline(void); -extern void tboot_shutdown(u32 shutdown_type); -extern void tboot_sleep(u8 sleep_state, u32 pm1a_control, u32 pm1b_control); -extern int tboot_wait_for_aps(int num_aps); -extern struct acpi_table_header *tboot_get_dmar_table( - struct acpi_table_header *dmar_tbl); -extern int tboot_force_iommu(void); - -#else /* CONFIG_INTEL_TXT */ - -static inline int tboot_enabled(void) -{ - return 0; -} - -static inline void tboot_probe(void) -{ -} - -static inline void tboot_create_trampoline(void) -{ -} - -static inline void tboot_shutdown(u32 shutdown_type) -{ -} - -static inline void tboot_sleep(u8 sleep_state, u32 pm1a_control, - u32 pm1b_control) -{ -} - -static inline int tboot_wait_for_aps(int num_aps) -{ - return 0; -} - -static inline struct acpi_table_header *tboot_get_dmar_table( - struct acpi_table_header *dmar_tbl) -{ - return dmar_tbl; -} - -static inline int tboot_force_iommu(void) -{ - return 0; -} - -#endif /* !CONFIG_INTEL_TXT */ - -#endif /* _ASM_TBOOT_H */ diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 9de01c5d9794..18ce5c04242a 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -24,8 +25,6 @@ # include #endif -#include - /* * Power off function, if any */ diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 80d6e9e32483..6ce0d6f38f7f 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -66,6 +66,7 @@ #include #include +#include #include