From 464b4279d34ff4ac724b3e1966cc11dba564eba3 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 25 Jan 2019 12:53:07 +0530 Subject: cpufreq: Use struct kobj_attribute instead of struct global_attr commit 625c85a62cb7d3c79f6e16de3cfa972033658250 upstream. The cpufreq_global_kobject is created using kobject_create_and_add() helper, which assigns the kobj_type as dynamic_kobj_ktype and show/store routines are set to kobj_attr_show() and kobj_attr_store(). These routines pass struct kobj_attribute as an argument to the show/store callbacks. But all the cpufreq files created using the cpufreq_global_kobject expect the argument to be of type struct attribute. Things work fine currently as no one accesses the "attr" argument. We may not see issues even if the argument is used, as struct kobj_attribute has struct attribute as its first element and so they will both get same address. But this is logically incorrect and we should rather use struct kobj_attribute instead of struct global_attr in the cpufreq core and drivers and the show/store callbacks should take struct kobj_attribute as argument instead. This bug is caught using CFI CLANG builds in android kernel which catches mismatch in function prototypes for such callbacks. Reported-by: Donghee Han Reported-by: Sangkyu Kim Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/cpufreq/cpufreq.c | 6 +++--- drivers/cpufreq/intel_pstate.c | 23 ++++++++++++----------- include/linux/cpufreq.h | 12 ++---------- 3 files changed, 17 insertions(+), 24 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index b100260b6ed2..505c9a55d555 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -545,13 +545,13 @@ EXPORT_SYMBOL_GPL(cpufreq_policy_transition_delay_us); * SYSFS INTERFACE * *********************************************************************/ static ssize_t show_boost(struct kobject *kobj, - struct attribute *attr, char *buf) + struct kobj_attribute *attr, char *buf) { return sprintf(buf, "%d\n", cpufreq_driver->boost_enabled); } -static ssize_t store_boost(struct kobject *kobj, struct attribute *attr, - const char *buf, size_t count) +static ssize_t store_boost(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) { int ret, enable; diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index b6a1aadaff9f..a005711f909e 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -833,7 +833,7 @@ static void intel_pstate_update_policies(void) /************************** sysfs begin ************************/ #define show_one(file_name, object) \ static ssize_t show_##file_name \ - (struct kobject *kobj, struct attribute *attr, char *buf) \ + (struct kobject *kobj, struct kobj_attribute *attr, char *buf) \ { \ return sprintf(buf, "%u\n", global.object); \ } @@ -842,7 +842,7 @@ static ssize_t intel_pstate_show_status(char *buf); static int intel_pstate_update_status(const char *buf, size_t size); static ssize_t show_status(struct kobject *kobj, - struct attribute *attr, char *buf) + struct kobj_attribute *attr, char *buf) { ssize_t ret; @@ -853,7 +853,7 @@ static ssize_t show_status(struct kobject *kobj, return ret; } -static ssize_t store_status(struct kobject *a, struct attribute *b, +static ssize_t store_status(struct kobject *a, struct kobj_attribute *b, const char *buf, size_t count) { char *p = memchr(buf, '\n', count); @@ -867,7 +867,7 @@ static ssize_t store_status(struct kobject *a, struct attribute *b, } static ssize_t show_turbo_pct(struct kobject *kobj, - struct attribute *attr, char *buf) + struct kobj_attribute *attr, char *buf) { struct cpudata *cpu; int total, no_turbo, turbo_pct; @@ -893,7 +893,7 @@ static ssize_t show_turbo_pct(struct kobject *kobj, } static ssize_t show_num_pstates(struct kobject *kobj, - struct attribute *attr, char *buf) + struct kobj_attribute *attr, char *buf) { struct cpudata *cpu; int total; @@ -914,7 +914,7 @@ static ssize_t show_num_pstates(struct kobject *kobj, } static ssize_t show_no_turbo(struct kobject *kobj, - struct attribute *attr, char *buf) + struct kobj_attribute *attr, char *buf) { ssize_t ret; @@ -936,7 +936,7 @@ static ssize_t show_no_turbo(struct kobject *kobj, return ret; } -static ssize_t store_no_turbo(struct kobject *a, struct attribute *b, +static ssize_t store_no_turbo(struct kobject *a, struct kobj_attribute *b, const char *buf, size_t count) { unsigned int input; @@ -983,7 +983,7 @@ static ssize_t store_no_turbo(struct kobject *a, struct attribute *b, return count; } -static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b, +static ssize_t store_max_perf_pct(struct kobject *a, struct kobj_attribute *b, const char *buf, size_t count) { unsigned int input; @@ -1013,7 +1013,7 @@ static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b, return count; } -static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b, +static ssize_t store_min_perf_pct(struct kobject *a, struct kobj_attribute *b, const char *buf, size_t count) { unsigned int input; @@ -1045,12 +1045,13 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b, } static ssize_t show_hwp_dynamic_boost(struct kobject *kobj, - struct attribute *attr, char *buf) + struct kobj_attribute *attr, char *buf) { return sprintf(buf, "%u\n", hwp_boost); } -static ssize_t store_hwp_dynamic_boost(struct kobject *a, struct attribute *b, +static ssize_t store_hwp_dynamic_boost(struct kobject *a, + struct kobj_attribute *b, const char *buf, size_t count) { unsigned int input; diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 882a9b9e34bc..72f59e8321e7 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -254,20 +254,12 @@ __ATTR(_name, 0644, show_##_name, store_##_name) static struct freq_attr _name = \ __ATTR(_name, 0200, NULL, store_##_name) -struct global_attr { - struct attribute attr; - ssize_t (*show)(struct kobject *kobj, - struct attribute *attr, char *buf); - ssize_t (*store)(struct kobject *a, struct attribute *b, - const char *c, size_t count); -}; - #define define_one_global_ro(_name) \ -static struct global_attr _name = \ +static struct kobj_attribute _name = \ __ATTR(_name, 0444, show_##_name, NULL) #define define_one_global_rw(_name) \ -static struct global_attr _name = \ +static struct kobj_attribute _name = \ __ATTR(_name, 0644, show_##_name, store_##_name) -- cgit v1.2.3 From cbace523cbaafa9e315fc87c227665ffb33dc557 Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Wed, 27 Feb 2019 13:33:32 +0800 Subject: staging: erofs: fix mis-acted TAIL merging behavior commit a112152f6f3a2a88caa6f414d540bd49e406af60 upstream. EROFS has an optimized path called TAIL merging, which is designed to merge multiple reads and the corresponding decompressions into one if these requests read continuous pages almost at the same time. In general, it behaves as follows: ________________________________________________________________ ... | TAIL . HEAD | PAGE | PAGE | TAIL . HEAD | ... _____|_combined page A_|________|________|_combined page B_|____ 1 ] -> [ 2 ] -> [ 3 If the above three reads are requested in the order 1-2-3, it will generate a large work chain rather than 3 individual work chains to reduce scheduling overhead and boost up sequential read. However, if Read 2 is processed slightly earlier than Read 1, currently it still generates 2 individual work chains (chain 1, 2) but it does in-place decompression for combined page A, moreover, if chain 2 decompresses ahead of chain 1, it will be a race and lead to corrupted decompressed page. This patch fixes it. Fixes: 3883a79abd02 ("staging: erofs: introduce VLE decompression support") Cc: # 4.19+ Signed-off-by: Gao Xiang Reviewed-by: Chao Yu Signed-off-by: Greg Kroah-Hartman --- drivers/staging/erofs/unzip_vle.c | 69 +++++++++++++++++++++++++-------------- 1 file changed, 44 insertions(+), 25 deletions(-) diff --git a/drivers/staging/erofs/unzip_vle.c b/drivers/staging/erofs/unzip_vle.c index 1279241449f4..24bd1d6deb66 100644 --- a/drivers/staging/erofs/unzip_vle.c +++ b/drivers/staging/erofs/unzip_vle.c @@ -57,15 +57,30 @@ enum z_erofs_vle_work_role { Z_EROFS_VLE_WORK_SECONDARY, Z_EROFS_VLE_WORK_PRIMARY, /* - * The current work has at least been linked with the following - * processed chained works, which means if the processing page - * is the tail partial page of the work, the current work can - * safely use the whole page, as illustrated below: - * +--------------+-------------------------------------------+ - * | tail page | head page (of the previous work) | - * +--------------+-------------------------------------------+ - * /\ which belongs to the current work - * [ (*) this page can be used for the current work itself. ] + * The current work was the tail of an exist chain, and the previous + * processed chained works are all decided to be hooked up to it. + * A new chain should be created for the remaining unprocessed works, + * therefore different from Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED, + * the next work cannot reuse the whole page in the following scenario: + * ________________________________________________________________ + * | tail (partial) page | head (partial) page | + * | (belongs to the next work) | (belongs to the current work) | + * |_______PRIMARY_FOLLOWED_______|________PRIMARY_HOOKED___________| + */ + Z_EROFS_VLE_WORK_PRIMARY_HOOKED, + /* + * The current work has been linked with the processed chained works, + * and could be also linked with the potential remaining works, which + * means if the processing page is the tail partial page of the work, + * the current work can safely use the whole page (since the next work + * is under control) for in-place decompression, as illustrated below: + * ________________________________________________________________ + * | tail (partial) page | head (partial) page | + * | (of the current work) | (of the previous work) | + * | PRIMARY_FOLLOWED or | | + * |_____PRIMARY_HOOKED____|____________PRIMARY_FOLLOWED____________| + * + * [ (*) the above page can be used for the current work itself. ] */ Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED, Z_EROFS_VLE_WORK_MAX @@ -234,10 +249,10 @@ static int z_erofs_vle_work_add_page( return ret ? 0 : -EAGAIN; } -static inline bool try_to_claim_workgroup( - struct z_erofs_vle_workgroup *grp, - z_erofs_vle_owned_workgrp_t *owned_head, - bool *hosted) +static enum z_erofs_vle_work_role +try_to_claim_workgroup(struct z_erofs_vle_workgroup *grp, + z_erofs_vle_owned_workgrp_t *owned_head, + bool *hosted) { DBG_BUGON(*hosted == true); @@ -251,6 +266,9 @@ retry: *owned_head = grp; *hosted = true; + /* lucky, I am the followee :) */ + return Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED; + } else if (grp->next == Z_EROFS_VLE_WORKGRP_TAIL) { /* * type 2, link to the end of a existing open chain, @@ -260,12 +278,11 @@ retry: if (Z_EROFS_VLE_WORKGRP_TAIL != cmpxchg(&grp->next, Z_EROFS_VLE_WORKGRP_TAIL, *owned_head)) goto retry; - *owned_head = Z_EROFS_VLE_WORKGRP_TAIL; - } else - return false; /* :( better luck next time */ + return Z_EROFS_VLE_WORK_PRIMARY_HOOKED; + } - return true; /* lucky, I am the followee :) */ + return Z_EROFS_VLE_WORK_PRIMARY; /* :( better luck next time */ } static struct z_erofs_vle_work * @@ -337,12 +354,8 @@ z_erofs_vle_work_lookup(struct super_block *sb, *hosted = false; if (!primary) *role = Z_EROFS_VLE_WORK_SECONDARY; - /* claim the workgroup if possible */ - else if (try_to_claim_workgroup(grp, owned_head, hosted)) - *role = Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED; - else - *role = Z_EROFS_VLE_WORK_PRIMARY; - + else /* claim the workgroup if possible */ + *role = try_to_claim_workgroup(grp, owned_head, hosted); return work; } @@ -419,6 +432,9 @@ static inline void __update_workgrp_llen(struct z_erofs_vle_workgroup *grp, } } +#define builder_is_hooked(builder) \ + ((builder)->role >= Z_EROFS_VLE_WORK_PRIMARY_HOOKED) + #define builder_is_followed(builder) \ ((builder)->role >= Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED) @@ -583,7 +599,7 @@ static int z_erofs_do_read_page(struct z_erofs_vle_frontend *fe, struct z_erofs_vle_work_builder *const builder = &fe->builder; const loff_t offset = page_offset(page); - bool tight = builder_is_followed(builder); + bool tight = builder_is_hooked(builder); struct z_erofs_vle_work *work = builder->work; #ifdef EROFS_FS_HAS_MANAGED_CACHE @@ -646,7 +662,7 @@ repeat: builder->role = Z_EROFS_VLE_WORK_PRIMARY; #endif - tight &= builder_is_followed(builder); + tight &= builder_is_hooked(builder); work = builder->work; hitted: cur = end - min_t(unsigned, offset + end - map->m_la, end); @@ -661,6 +677,9 @@ hitted: (tight ? Z_EROFS_PAGE_TYPE_EXCLUSIVE : Z_EROFS_VLE_PAGE_TYPE_TAIL_SHARED)); + if (cur) + tight &= builder_is_followed(builder); + retry: err = z_erofs_vle_work_add_page(builder, page, page_type); /* should allocate an additional staging page for pagevec */ -- cgit v1.2.3 From 965e716001c9b2685e3018399ff1dfb17b5c7428 Mon Sep 17 00:00:00 2001 From: Daniele Palmas Date: Wed, 20 Feb 2019 11:43:17 +0100 Subject: USB: serial: option: add Telit ME910 ECM composition commit 6431866b6707d27151be381252d6eef13025cfce upstream. This patch adds Telit ME910 family ECM composition 0x1102. Signed-off-by: Daniele Palmas Cc: stable Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index fb544340888b..faf833e8f557 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1148,6 +1148,8 @@ static const struct usb_device_id option_ids[] = { .driver_info = NCTRL(0) | RSVD(1) | RSVD(3) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910_DUAL_MODEM), .driver_info = NCTRL(0) | RSVD(3) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1102, 0xff), /* Telit ME910 (ECM) */ + .driver_info = NCTRL(0) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910), .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910_USBCFG4), -- cgit v1.2.3 From da20be9991d8cc9da9a242227aeee9bb4c3dd02a Mon Sep 17 00:00:00 2001 From: Ivan Mironov Date: Wed, 6 Feb 2019 21:14:13 +0500 Subject: USB: serial: cp210x: add ID for Ingenico 3070 commit dd9d3d86b08d6a106830364879c42c78db85389c upstream. Here is how this device appears in kernel log: usb 3-1: new full-speed USB device number 18 using xhci_hcd usb 3-1: New USB device found, idVendor=0b00, idProduct=3070 usb 3-1: New USB device strings: Mfr=1, Product=2, SerialNumber=3 usb 3-1: Product: Ingenico 3070 usb 3-1: Manufacturer: Silicon Labs usb 3-1: SerialNumber: 0001 Apparently this is a POS terminal with embedded USB-to-Serial converter. Cc: stable@vger.kernel.org Signed-off-by: Ivan Mironov Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/cp210x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index c0777a374a88..3286ed462fc5 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -61,6 +61,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x08e6, 0x5501) }, /* Gemalto Prox-PU/CU contactless smartcard reader */ { USB_DEVICE(0x08FD, 0x000A) }, /* Digianswer A/S , ZigBee/802.15.4 MAC Device */ { USB_DEVICE(0x0908, 0x01FF) }, /* Siemens RUGGEDCOM USB Serial Console */ + { USB_DEVICE(0x0B00, 0x3070) }, /* Ingenico 3070 */ { USB_DEVICE(0x0BED, 0x1100) }, /* MEI (TM) Cashflow-SC Bill/Voucher Acceptor */ { USB_DEVICE(0x0BED, 0x1101) }, /* MEI series 2000 Combo Acceptor */ { USB_DEVICE(0x0FCF, 0x1003) }, /* Dynastream ANT development board */ -- cgit v1.2.3 From b46e1fc6ccee6ad21f258ca4f4d2f1ead6c6828e Mon Sep 17 00:00:00 2001 From: Mans Rullgard Date: Thu, 14 Feb 2019 19:45:33 +0000 Subject: USB: serial: ftdi_sio: add ID for Hjelmslund Electronics USB485 commit 8d7fa3d4ea3f0ca69554215e87411494e6346fdc upstream. This adds the USB ID of the Hjelmslund Electronics USB485 Iso stick. Signed-off-by: Mans Rullgard Cc: stable Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/ftdi_sio.c | 2 ++ drivers/usb/serial/ftdi_sio_ids.h | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index b5cef322826f..1d8077e880a0 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -1015,6 +1015,8 @@ static const struct usb_device_id id_table_combined[] = { { USB_DEVICE(CYPRESS_VID, CYPRESS_WICED_BT_USB_PID) }, { USB_DEVICE(CYPRESS_VID, CYPRESS_WICED_WL_USB_PID) }, { USB_DEVICE(AIRBUS_DS_VID, AIRBUS_DS_P8GR) }, + /* EZPrototypes devices */ + { USB_DEVICE(EZPROTOTYPES_VID, HJELMSLUND_USB485_ISO_PID) }, { } /* Terminating entry */ }; diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h index 975d02666c5a..b863bedb55a1 100644 --- a/drivers/usb/serial/ftdi_sio_ids.h +++ b/drivers/usb/serial/ftdi_sio_ids.h @@ -1308,6 +1308,12 @@ #define IONICS_VID 0x1c0c #define IONICS_PLUGCOMPUTER_PID 0x0102 +/* + * EZPrototypes (PID reseller) + */ +#define EZPROTOTYPES_VID 0x1c40 +#define HJELMSLUND_USB485_ISO_PID 0x0477 + /* * Dresden Elektronik Sensor Terminal Board */ -- cgit v1.2.3 From 1fa7c9b4226fa9d56e8ef58f4c06f29490493c03 Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Wed, 27 Feb 2019 13:33:31 +0800 Subject: staging: erofs: fix illegal address access under memory pressure commit 1e5ceeab6929585512c63d05911d6657064abf7b upstream. Considering a read request with two decompressed file pages, If a decompression work cannot be started on the previous page due to memory pressure but in-memory LTP map lookup is done, builder->work should be still NULL. Moreover, if the current page also belongs to the same map, it won't try to start the decompression work again and then run into trouble. This patch aims to solve the above issue only with little changes as much as possible in order to make the fix backport easier. kernel message is: <4>[1051408.015930s]SLUB: Unable to allocate memory on node -1, gfp=0x2408040(GFP_NOFS|__GFP_ZERO) <4>[1051408.015930s] cache: erofs_compress, object size: 144, buffer size: 144, default order: 0, min order: 0 <4>[1051408.015930s] node 0: slabs: 98, objs: 2744, free: 0 * Cannot allocate the decompression work <3>[1051408.015960s]erofs: z_erofs_vle_normalaccess_readpages, readahead error at page 1008 of nid 5391488 * Note that the previous page was failed to read <0>[1051408.015960s]Internal error: Accessing user space memory outside uaccess.h routines: 96000005 [#1] PREEMPT SMP ... <4>[1051408.015991s]Hardware name: kirin710 (DT) ... <4>[1051408.016021s]PC is at z_erofs_vle_work_add_page+0xa0/0x17c <4>[1051408.016021s]LR is at z_erofs_do_read_page+0x12c/0xcf0 ... <4>[1051408.018096s][] z_erofs_vle_work_add_page+0xa0/0x17c <4>[1051408.018096s][] z_erofs_vle_normalaccess_readpages+0x1a0/0x37c <4>[1051408.018096s][] read_pages+0x70/0x190 <4>[1051408.018127s][] __do_page_cache_readahead+0x194/0x1a8 <4>[1051408.018127s][] filemap_fault+0x398/0x684 <4>[1051408.018127s][] __do_fault+0x8c/0x138 <4>[1051408.018127s][] handle_pte_fault+0x730/0xb7c <4>[1051408.018127s][] __handle_mm_fault+0xac/0xf4 <4>[1051408.018157s][] handle_mm_fault+0x7c/0x118 <4>[1051408.018157s][] do_page_fault+0x354/0x474 <4>[1051408.018157s][] do_translation_fault+0x40/0x48 <4>[1051408.018157s][] do_mem_abort+0x80/0x100 <4>[1051408.018310s]---[ end trace 9f4009a3283bd78b ]--- Fixes: 3883a79abd02 ("staging: erofs: introduce VLE decompression support") Cc: # 4.19+ Signed-off-by: Gao Xiang Reviewed-by: Chao Yu Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/staging/erofs/unzip_vle.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/staging/erofs/unzip_vle.c b/drivers/staging/erofs/unzip_vle.c index 24bd1d6deb66..b7aff65d85a7 100644 --- a/drivers/staging/erofs/unzip_vle.c +++ b/drivers/staging/erofs/unzip_vle.c @@ -622,8 +622,12 @@ repeat: /* lucky, within the range of the current map_blocks */ if (offset + cur >= map->m_la && - offset + cur < map->m_la + map->m_llen) + offset + cur < map->m_la + map->m_llen) { + /* didn't get a valid unzip work previously (very rare) */ + if (!builder->work) + goto restart_now; goto hitted; + } /* go ahead the next map_blocks */ debugln("%s: [out-of-range] pos %llu", __func__, offset + cur); @@ -637,6 +641,7 @@ repeat: if (unlikely(err)) goto err_out; +restart_now: if (unlikely(!(map->m_flags & EROFS_MAP_MAPPED))) goto hitted; -- cgit v1.2.3 From 40245f24133c880dbbbcf1387d8087f4a066b1a8 Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Wed, 27 Feb 2019 13:33:30 +0800 Subject: staging: erofs: compressed_pages should not be accessed again after freed commit af692e117cb8cd9d3d844d413095775abc1217f9 upstream. This patch resolves the following page use-after-free issue, z_erofs_vle_unzip: ... for (i = 0; i < nr_pages; ++i) { ... z_erofs_onlinepage_endio(page); (1) } for (i = 0; i < clusterpages; ++i) { page = compressed_pages[i]; if (page->mapping == mngda) (2) continue; /* recycle all individual staging pages */ (void)z_erofs_gather_if_stagingpage(page_pool, page); (3) WRITE_ONCE(compressed_pages[i], NULL); } ... After (1) is executed, page is freed and could be then reused, if compressed_pages is scanned after that, it could fall info (2) or (3) by mistake and that could finally be in a mess. This patch aims to solve the above issue only with little changes as much as possible in order to make the fix backport easier. Fixes: 3883a79abd02 ("staging: erofs: introduce VLE decompression support") Cc: # 4.19+ Signed-off-by: Gao Xiang Reviewed-by: Chao Yu Signed-off-by: Greg Kroah-Hartman --- drivers/staging/erofs/unzip_vle.c | 38 ++++++++++++++++++----------------- drivers/staging/erofs/unzip_vle.h | 3 +-- drivers/staging/erofs/unzip_vle_lz4.c | 20 +++++++++--------- 3 files changed, 30 insertions(+), 31 deletions(-) diff --git a/drivers/staging/erofs/unzip_vle.c b/drivers/staging/erofs/unzip_vle.c index b7aff65d85a7..f44662dd795c 100644 --- a/drivers/staging/erofs/unzip_vle.c +++ b/drivers/staging/erofs/unzip_vle.c @@ -925,11 +925,10 @@ repeat: if (llen > grp->llen) llen = grp->llen; - err = z_erofs_vle_unzip_fast_percpu(compressed_pages, - clusterpages, pages, llen, work->pageofs, - z_erofs_onlinepage_endio); + err = z_erofs_vle_unzip_fast_percpu(compressed_pages, clusterpages, + pages, llen, work->pageofs); if (err != -ENOTSUPP) - goto out_percpu; + goto out; if (sparsemem_pages >= nr_pages) goto skip_allocpage; @@ -950,8 +949,25 @@ skip_allocpage: erofs_vunmap(vout, nr_pages); out: + /* must handle all compressed pages before endding pages */ + for (i = 0; i < clusterpages; ++i) { + page = compressed_pages[i]; + +#ifdef EROFS_FS_HAS_MANAGED_CACHE + if (page->mapping == mngda) + continue; +#endif + /* recycle all individual staging pages */ + (void)z_erofs_gather_if_stagingpage(page_pool, page); + + WRITE_ONCE(compressed_pages[i], NULL); + } + for (i = 0; i < nr_pages; ++i) { page = pages[i]; + if (!page) + continue; + DBG_BUGON(page->mapping == NULL); /* recycle all individual staging pages */ @@ -964,20 +980,6 @@ out: z_erofs_onlinepage_endio(page); } -out_percpu: - for (i = 0; i < clusterpages; ++i) { - page = compressed_pages[i]; - -#ifdef EROFS_FS_HAS_MANAGED_CACHE - if (page->mapping == mngda) - continue; -#endif - /* recycle all individual staging pages */ - (void)z_erofs_gather_if_stagingpage(page_pool, page); - - WRITE_ONCE(compressed_pages[i], NULL); - } - if (pages == z_pagemap_global) mutex_unlock(&z_pagemap_global_lock); else if (unlikely(pages != pages_onstack)) diff --git a/drivers/staging/erofs/unzip_vle.h b/drivers/staging/erofs/unzip_vle.h index 3316bc36965d..684ff06fc7bf 100644 --- a/drivers/staging/erofs/unzip_vle.h +++ b/drivers/staging/erofs/unzip_vle.h @@ -218,8 +218,7 @@ extern int z_erofs_vle_plain_copy(struct page **compressed_pages, extern int z_erofs_vle_unzip_fast_percpu(struct page **compressed_pages, unsigned clusterpages, struct page **pages, - unsigned outlen, unsigned short pageofs, - void (*endio)(struct page *)); + unsigned int outlen, unsigned short pageofs); extern int z_erofs_vle_unzip_vmap(struct page **compressed_pages, unsigned clusterpages, void *vaddr, unsigned llen, diff --git a/drivers/staging/erofs/unzip_vle_lz4.c b/drivers/staging/erofs/unzip_vle_lz4.c index 9cb35cd33365..055420e8af2c 100644 --- a/drivers/staging/erofs/unzip_vle_lz4.c +++ b/drivers/staging/erofs/unzip_vle_lz4.c @@ -105,8 +105,7 @@ int z_erofs_vle_unzip_fast_percpu(struct page **compressed_pages, unsigned clusterpages, struct page **pages, unsigned outlen, - unsigned short pageofs, - void (*endio)(struct page *)) + unsigned short pageofs) { void *vin, *vout; unsigned nr_pages, i, j; @@ -128,31 +127,30 @@ int z_erofs_vle_unzip_fast_percpu(struct page **compressed_pages, ret = z_erofs_unzip_lz4(vin, vout + pageofs, clusterpages * PAGE_SIZE, outlen); - if (ret >= 0) { - outlen = ret; - ret = 0; - } + if (ret < 0) + goto out; + ret = 0; for (i = 0; i < nr_pages; ++i) { j = min((unsigned)PAGE_SIZE - pageofs, outlen); if (pages[i] != NULL) { - if (ret < 0) - SetPageError(pages[i]); - else if (clusterpages == 1 && pages[i] == compressed_pages[0]) + if (clusterpages == 1 && + pages[i] == compressed_pages[0]) { memcpy(vin + pageofs, vout + pageofs, j); - else { + } else { void *dst = kmap_atomic(pages[i]); memcpy(dst + pageofs, vout + pageofs, j); kunmap_atomic(dst); } - endio(pages[i]); } vout += PAGE_SIZE; outlen -= j; pageofs = 0; } + +out: preempt_enable(); if (clusterpages == 1) -- cgit v1.2.3 From 63efda29f359591c43d52cc4a3e86b7f693e2269 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 12 Feb 2019 12:44:50 -0600 Subject: staging: comedi: ni_660x: fix missing break in switch statement commit 479826cc86118e0d87e5cefb3df5b748e0480924 upstream. Add missing break statement in order to prevent the code from falling through to the default case and return -EINVAL every time. This bug was found thanks to the ongoing efforts to enable -Wimplicit-fallthrough. Fixes: aa94f2888825 ("staging: comedi: ni_660x: tidy up ni_660x_set_pfi_routing()") Cc: stable@vger.kernel.org Signed-off-by: Gustavo A. R. Silva Reviewed-by: Ian Abbott Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/drivers/ni_660x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/staging/comedi/drivers/ni_660x.c b/drivers/staging/comedi/drivers/ni_660x.c index e521ed9d0887..35bd4d2efe16 100644 --- a/drivers/staging/comedi/drivers/ni_660x.c +++ b/drivers/staging/comedi/drivers/ni_660x.c @@ -602,6 +602,7 @@ static int ni_660x_set_pfi_routing(struct comedi_device *dev, case NI_660X_PFI_OUTPUT_DIO: if (chan > 31) return -EINVAL; + break; default: return -EINVAL; } -- cgit v1.2.3 From 14af4eff14c2d079d1dede5eb2771815a9db8100 Mon Sep 17 00:00:00 2001 From: Ajay Singh Date: Thu, 7 Feb 2019 11:28:58 +0000 Subject: staging: wilc1000: fix to set correct value for 'vif_num' commit dda037057a572f5c82ac2499eb4e6fb17600ba3e upstream. Set correct value in '->vif_num' for the total number of interfaces and set '->idx' value using 'i'. Fixes: 735bb39ca3be ("staging: wilc1000: simplify vif[i]->ndev accesses") Fixes: 0e490657c721 ("staging: wilc1000: Fix problem with wrong vif index") Cc: Suggested-by: Dan Carpenter Reviewed-by: Dan Carpenter Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/staging/wilc1000/linux_wlan.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/staging/wilc1000/linux_wlan.c b/drivers/staging/wilc1000/linux_wlan.c index 3b8d237decbf..649caae2b603 100644 --- a/drivers/staging/wilc1000/linux_wlan.c +++ b/drivers/staging/wilc1000/linux_wlan.c @@ -1090,8 +1090,8 @@ int wilc_netdev_init(struct wilc **wilc, struct device *dev, int io_type, vif->wilc = *wilc; vif->ndev = ndev; wl->vif[i] = vif; - wl->vif_num = i; - vif->idx = wl->vif_num; + wl->vif_num = i + 1; + vif->idx = i; ndev->netdev_ops = &wilc_netdev_ops; -- cgit v1.2.3 From 271800f564db5568ac324a5c4072d877633b823d Mon Sep 17 00:00:00 2001 From: Qing Xia Date: Fri, 1 Feb 2019 14:59:46 +0800 Subject: staging: android: ion: fix sys heap pool's gfp_flags commit 9bcf065e28122588a6cbee08cf847826dacbb438 upstream. In the first loop, gfp_flags will be modified to high_order_gfp_flags, and there will be no chance to change back to low_order_gfp_flags. Fixes: e7f63771b60e ("ION: Sys_heap: Add cached pool to spead up cached buffer alloc") Signed-off-by: Qing Xia Cc: stable Signed-off-by: Jing Xia Reviewed-by: Yuming Han Reviewed-by: Zhaoyang Huang Reviewed-by: Orson Zhai Signed-off-by: Greg Kroah-Hartman --- drivers/staging/android/ion/ion_system_heap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/android/ion/ion_system_heap.c b/drivers/staging/android/ion/ion_system_heap.c index 701eb9f3b0f1..0a1b1ff1be04 100644 --- a/drivers/staging/android/ion/ion_system_heap.c +++ b/drivers/staging/android/ion/ion_system_heap.c @@ -247,10 +247,10 @@ static void ion_system_heap_destroy_pools(struct ion_page_pool **pools) static int ion_system_heap_create_pools(struct ion_page_pool **pools) { int i; - gfp_t gfp_flags = low_order_gfp_flags; for (i = 0; i < NUM_ORDERS; i++) { struct ion_page_pool *pool; + gfp_t gfp_flags = low_order_gfp_flags; if (orders[i] > 4) gfp_flags = high_order_gfp_flags; -- cgit v1.2.3 From b8d048b739007a0fd370b52033cb7357f960e4a0 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Tue, 5 Feb 2019 19:28:40 +0900 Subject: staging: android: ashmem: Don't call fallocate() with ashmem_mutex held. commit fb4415a12632f0b9078a0aa80c16745d48fcfc74 upstream. syzbot is hitting lockdep warnings [1][2][3]. This patch tries to fix the warning by eliminating ashmem_shrink_scan() => {shmem|vfs}_fallocate() sequence. [1] https://syzkaller.appspot.com/bug?id=87c399f6fa6955006080b24142e2ce7680295ad4 [2] https://syzkaller.appspot.com/bug?id=7ebea492de7521048355fc84210220e1038a7908 [3] https://syzkaller.appspot.com/bug?id=e02419c12131c24e2a957ea050c2ab6dcbbc3270 Reported-by: syzbot Reported-by: syzbot Reported-by: syzbot Signed-off-by: Tetsuo Handa Cc: stable@vger.kernel.org Acked-by: Joel Fernandes (Google) Signed-off-by: Greg Kroah-Hartman --- drivers/staging/android/ashmem.c | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/drivers/staging/android/ashmem.c b/drivers/staging/android/ashmem.c index a880b5c6c6c3..1cc451bb15a4 100644 --- a/drivers/staging/android/ashmem.c +++ b/drivers/staging/android/ashmem.c @@ -75,6 +75,9 @@ struct ashmem_range { /* LRU list of unpinned pages, protected by ashmem_mutex */ static LIST_HEAD(ashmem_lru_list); +static atomic_t ashmem_shrink_inflight = ATOMIC_INIT(0); +static DECLARE_WAIT_QUEUE_HEAD(ashmem_shrink_wait); + /* * long lru_count - The count of pages on our LRU list. * @@ -438,7 +441,6 @@ out: static unsigned long ashmem_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) { - struct ashmem_range *range, *next; unsigned long freed = 0; /* We might recurse into filesystem code, so bail out if necessary */ @@ -448,21 +450,33 @@ ashmem_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) if (!mutex_trylock(&ashmem_mutex)) return -1; - list_for_each_entry_safe(range, next, &ashmem_lru_list, lru) { + while (!list_empty(&ashmem_lru_list)) { + struct ashmem_range *range = + list_first_entry(&ashmem_lru_list, typeof(*range), lru); loff_t start = range->pgstart * PAGE_SIZE; loff_t end = (range->pgend + 1) * PAGE_SIZE; + struct file *f = range->asma->file; - range->asma->file->f_op->fallocate(range->asma->file, - FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, - start, end - start); + get_file(f); + atomic_inc(&ashmem_shrink_inflight); range->purged = ASHMEM_WAS_PURGED; lru_del(range); freed += range_size(range); + mutex_unlock(&ashmem_mutex); + f->f_op->fallocate(f, + FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, + start, end - start); + fput(f); + if (atomic_dec_and_test(&ashmem_shrink_inflight)) + wake_up_all(&ashmem_shrink_wait); + if (!mutex_trylock(&ashmem_mutex)) + goto out; if (--sc->nr_to_scan <= 0) break; } mutex_unlock(&ashmem_mutex); +out: return freed; } @@ -713,6 +727,7 @@ static int ashmem_pin_unpin(struct ashmem_area *asma, unsigned long cmd, return -EFAULT; mutex_lock(&ashmem_mutex); + wait_event(ashmem_shrink_wait, !atomic_read(&ashmem_shrink_inflight)); if (!asma->file) goto out_unlock; -- cgit v1.2.3 From de40920f369488929456b2614b9be14242024279 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Fri, 22 Feb 2019 20:03:55 +0900 Subject: staging: android: ashmem: Avoid range_alloc() allocation with ashmem_mutex held. commit ecd182cbf4e107928077866399100228d2359c60 upstream. ashmem_pin() is calling range_shrink() without checking whether range_alloc() succeeded. Also, doing memory allocation with ashmem_mutex held should be avoided because ashmem_shrink_scan() tries to hold it. Therefore, move memory allocation for range_alloc() to ashmem_pin_unpin() and make range_alloc() not to fail. This patch is mostly meant for backporting purpose for fuzz testing on stable/distributor kernels, for there is a plan to remove this code in near future. Signed-off-by: Tetsuo Handa Cc: stable@vger.kernel.org Reviewed-by: Joel Fernandes Signed-off-by: Greg Kroah-Hartman --- drivers/staging/android/ashmem.c | 42 +++++++++++++++++++++++----------------- 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/drivers/staging/android/ashmem.c b/drivers/staging/android/ashmem.c index 1cc451bb15a4..be815330ed95 100644 --- a/drivers/staging/android/ashmem.c +++ b/drivers/staging/android/ashmem.c @@ -171,19 +171,15 @@ static inline void lru_del(struct ashmem_range *range) * @end: The ending page (inclusive) * * This function is protected by ashmem_mutex. - * - * Return: 0 if successful, or -ENOMEM if there is an error */ -static int range_alloc(struct ashmem_area *asma, - struct ashmem_range *prev_range, unsigned int purged, - size_t start, size_t end) +static void range_alloc(struct ashmem_area *asma, + struct ashmem_range *prev_range, unsigned int purged, + size_t start, size_t end, + struct ashmem_range **new_range) { - struct ashmem_range *range; - - range = kmem_cache_zalloc(ashmem_range_cachep, GFP_KERNEL); - if (!range) - return -ENOMEM; + struct ashmem_range *range = *new_range; + *new_range = NULL; range->asma = asma; range->pgstart = start; range->pgend = end; @@ -193,8 +189,6 @@ static int range_alloc(struct ashmem_area *asma, if (range_on_lru(range)) lru_add(range); - - return 0; } /** @@ -596,7 +590,8 @@ static int get_name(struct ashmem_area *asma, void __user *name) * * Caller must hold ashmem_mutex. */ -static int ashmem_pin(struct ashmem_area *asma, size_t pgstart, size_t pgend) +static int ashmem_pin(struct ashmem_area *asma, size_t pgstart, size_t pgend, + struct ashmem_range **new_range) { struct ashmem_range *range, *next; int ret = ASHMEM_NOT_PURGED; @@ -649,7 +644,7 @@ static int ashmem_pin(struct ashmem_area *asma, size_t pgstart, size_t pgend) * second half and adjust the first chunk's endpoint. */ range_alloc(asma, range, range->purged, - pgend + 1, range->pgend); + pgend + 1, range->pgend, new_range); range_shrink(range, range->pgstart, pgstart - 1); break; } @@ -663,7 +658,8 @@ static int ashmem_pin(struct ashmem_area *asma, size_t pgstart, size_t pgend) * * Caller must hold ashmem_mutex. */ -static int ashmem_unpin(struct ashmem_area *asma, size_t pgstart, size_t pgend) +static int ashmem_unpin(struct ashmem_area *asma, size_t pgstart, size_t pgend, + struct ashmem_range **new_range) { struct ashmem_range *range, *next; unsigned int purged = ASHMEM_NOT_PURGED; @@ -689,7 +685,8 @@ restart: } } - return range_alloc(asma, range, purged, pgstart, pgend); + range_alloc(asma, range, purged, pgstart, pgend, new_range); + return 0; } /* @@ -722,10 +719,17 @@ static int ashmem_pin_unpin(struct ashmem_area *asma, unsigned long cmd, struct ashmem_pin pin; size_t pgstart, pgend; int ret = -EINVAL; + struct ashmem_range *range = NULL; if (copy_from_user(&pin, p, sizeof(pin))) return -EFAULT; + if (cmd == ASHMEM_PIN || cmd == ASHMEM_UNPIN) { + range = kmem_cache_zalloc(ashmem_range_cachep, GFP_KERNEL); + if (!range) + return -ENOMEM; + } + mutex_lock(&ashmem_mutex); wait_event(ashmem_shrink_wait, !atomic_read(&ashmem_shrink_inflight)); @@ -750,10 +754,10 @@ static int ashmem_pin_unpin(struct ashmem_area *asma, unsigned long cmd, switch (cmd) { case ASHMEM_PIN: - ret = ashmem_pin(asma, pgstart, pgend); + ret = ashmem_pin(asma, pgstart, pgend, &range); break; case ASHMEM_UNPIN: - ret = ashmem_unpin(asma, pgstart, pgend); + ret = ashmem_unpin(asma, pgstart, pgend, &range); break; case ASHMEM_GET_PIN_STATUS: ret = ashmem_get_pin_status(asma, pgstart, pgend); @@ -762,6 +766,8 @@ static int ashmem_pin_unpin(struct ashmem_area *asma, unsigned long cmd, out_unlock: mutex_unlock(&ashmem_mutex); + if (range) + kmem_cache_free(ashmem_range_cachep, range); return ret; } -- cgit v1.2.3 From b5ff77ddd96f2ab2209143990a8d0efcffb7f6b9 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Sun, 3 Mar 2019 07:34:57 +0000 Subject: ip6mr: Do not call __IP6_INC_STATS() from preemptible context [ Upstream commit 87c11f1ddbbad38ad8bad47af133a8208985fbdf ] Similar to commit 44f49dd8b5a6 ("ipmr: fix possible race resulting from improper usage of IP_INC_STATS_BH() in preemptible context."), we cannot assume preemption is disabled when incrementing the counter and accessing a per-CPU variable. Preemption can be enabled when we add a route in process context that corresponds to packets stored in the unresolved queue, which are then forwarded using this route [1]. Fix this by using IP6_INC_STATS() which takes care of disabling preemption on architectures where it is needed. [1] [ 157.451447] BUG: using __this_cpu_add() in preemptible [00000000] code: smcrouted/2314 [ 157.460409] caller is ip6mr_forward2+0x73e/0x10e0 [ 157.460434] CPU: 3 PID: 2314 Comm: smcrouted Not tainted 5.0.0-rc7-custom-03635-g22f2712113f1 #1336 [ 157.460449] Hardware name: Mellanox Technologies Ltd. MSN2100-CB2FO/SA001017, BIOS 5.6.5 06/07/2016 [ 157.460461] Call Trace: [ 157.460486] dump_stack+0xf9/0x1be [ 157.460553] check_preemption_disabled+0x1d6/0x200 [ 157.460576] ip6mr_forward2+0x73e/0x10e0 [ 157.460705] ip6_mr_forward+0x9a0/0x1510 [ 157.460771] ip6mr_mfc_add+0x16b3/0x1e00 [ 157.461155] ip6_mroute_setsockopt+0x3cb/0x13c0 [ 157.461384] do_ipv6_setsockopt.isra.8+0x348/0x4060 [ 157.462013] ipv6_setsockopt+0x90/0x110 [ 157.462036] rawv6_setsockopt+0x4a/0x120 [ 157.462058] __sys_setsockopt+0x16b/0x340 [ 157.462198] __x64_sys_setsockopt+0xbf/0x160 [ 157.462220] do_syscall_64+0x14d/0x610 [ 157.462349] entry_SYSCALL_64_after_hwframe+0x49/0xbe Fixes: 0912ea38de61 ("[IPV6] MROUTE: Add stats in multicast routing module method ip6_mr_forward().") Signed-off-by: Ido Schimmel Reported-by: Amit Cohen Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6mr.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 10aafea3af0f..35e7092eceb3 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1954,10 +1954,10 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { - __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_OUTFORWDATAGRAMS); - __IP6_ADD_STATS(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_OUTOCTETS, skb->len); + IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_OUTFORWDATAGRAMS); + IP6_ADD_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_OUTOCTETS, skb->len); return dst_output(net, sk, skb); } -- cgit v1.2.3 From dea818999a1e455bbf31b6383ded407a345141af Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Fri, 1 Mar 2019 19:53:57 +0100 Subject: net: dsa: mv88e6xxx: handle unknown duplex modes gracefully in mv88e6xxx_port_set_duplex [ Upstream commit c6195a8bdfc62a7cecf7df685e64847a4b700275 ] When testing another issue I faced the problem that mv88e6xxx_port_setup_mac() failed due to DUPLEX_UNKNOWN being passed as argument to mv88e6xxx_port_set_duplex(). We should handle this case gracefully and return -EOPNOTSUPP, like e.g. mv88e6xxx_port_set_speed() is doing it. Fixes: 7f1ae07b51e8 ("net: dsa: mv88e6xxx: add port duplex setter") Signed-off-by: Heiner Kallweit Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/dsa/mv88e6xxx/port.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/mv88e6xxx/port.c b/drivers/net/dsa/mv88e6xxx/port.c index 92945841c8e8..2c75460ad57a 100644 --- a/drivers/net/dsa/mv88e6xxx/port.c +++ b/drivers/net/dsa/mv88e6xxx/port.c @@ -190,7 +190,7 @@ int mv88e6xxx_port_set_duplex(struct mv88e6xxx_chip *chip, int port, int dup) /* normal duplex detection */ break; default: - return -EINVAL; + return -EOPNOTSUPP; } err = mv88e6xxx_port_write(chip, port, MV88E6XXX_PORT_MAC_CTL, reg); -- cgit v1.2.3 From 0429c9ef94c6dc720ee5869f2ebaf02f1d411880 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sat, 2 Mar 2019 10:06:05 +0100 Subject: net: dsa: mv8e6xxx: fix number of internal PHYs for 88E6x90 family [ Upstream commit 95150f29ae480276e76368cdf8a9524b5a96c0ca ] Ports 9 and 10 don't have internal PHY's but are (dependent on the version) SERDES/SGMII/XAUI/RXAUI ports. v2: - fix it for all 88E6x90 family members Fixes: bc3931557d1d ("net: dsa: mv88e6xxx: Add number of internal PHYs") Signed-off-by: Heiner Kallweit Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/dsa/mv88e6xxx/chip.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 9f697a5b8e3d..f31ee6ef8c19 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -4188,7 +4188,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .name = "Marvell 88E6190", .num_databases = 4096, .num_ports = 11, /* 10 + Z80 */ - .num_internal_phys = 11, + .num_internal_phys = 9, .num_gpio = 16, .max_vid = 8191, .port_base_addr = 0x0, @@ -4211,7 +4211,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .name = "Marvell 88E6190X", .num_databases = 4096, .num_ports = 11, /* 10 + Z80 */ - .num_internal_phys = 11, + .num_internal_phys = 9, .num_gpio = 16, .max_vid = 8191, .port_base_addr = 0x0, @@ -4234,7 +4234,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .name = "Marvell 88E6191", .num_databases = 4096, .num_ports = 11, /* 10 + Z80 */ - .num_internal_phys = 11, + .num_internal_phys = 9, .max_vid = 8191, .port_base_addr = 0x0, .phy_base_addr = 0x0, @@ -4281,7 +4281,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .name = "Marvell 88E6290", .num_databases = 4096, .num_ports = 11, /* 10 + Z80 */ - .num_internal_phys = 11, + .num_internal_phys = 9, .num_gpio = 16, .max_vid = 8191, .port_base_addr = 0x0, @@ -4443,7 +4443,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .name = "Marvell 88E6390", .num_databases = 4096, .num_ports = 11, /* 10 + Z80 */ - .num_internal_phys = 11, + .num_internal_phys = 9, .num_gpio = 16, .max_vid = 8191, .port_base_addr = 0x0, @@ -4466,7 +4466,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .name = "Marvell 88E6390X", .num_databases = 4096, .num_ports = 11, /* 10 + Z80 */ - .num_internal_phys = 11, + .num_internal_phys = 9, .num_gpio = 16, .max_vid = 8191, .port_base_addr = 0x0, -- cgit v1.2.3 From 3043bfe024e8b17c6b881316a4647e53f737d5ce Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 28 Feb 2019 12:55:43 -0800 Subject: net: sched: put back q.qlen into a single location [ Upstream commit 46b1c18f9deb326a7e18348e668e4c7ab7c7458b ] In the series fc8b81a5981f ("Merge branch 'lockless-qdisc-series'") John made the assumption that the data path had no need to read the qdisc qlen (number of packets in the qdisc). It is true when pfifo_fast is used as the root qdisc, or as direct MQ/MQPRIO children. But pfifo_fast can be used as leaf in class full qdiscs, and existing logic needs to access the child qlen in an efficient way. HTB breaks badly, since it uses cl->leaf.q->q.qlen in : htb_activate() -> WARN_ON() htb_dequeue_tree() to decide if a class can be htb_deactivated when it has no more packets. HFSC, DRR, CBQ, QFQ have similar issues, and some calls to qdisc_tree_reduce_backlog() also read q.qlen directly. Using qdisc_qlen_sum() (which iterates over all possible cpus) in the data path is a non starter. It seems we have to put back qlen in a central location, at least for stable kernels. For all qdisc but pfifo_fast, qlen is guarded by the qdisc lock, so the existing q.qlen{++|--} are correct. For 'lockless' qdisc (pfifo_fast so far), we need to use atomic_{inc|dec}() because the spinlock might be not held (for example from pfifo_fast_enqueue() and pfifo_fast_dequeue()) This patch adds atomic_qlen (in the same location than qlen) and renames the following helpers, since we want to express they can be used without qdisc lock, and that qlen is no longer percpu. - qdisc_qstats_cpu_qlen_dec -> qdisc_qstats_atomic_qlen_dec() - qdisc_qstats_cpu_qlen_inc -> qdisc_qstats_atomic_qlen_inc() Later (net-next) we might revert this patch by tracking all these qlen uses and replace them by a more efficient method (not having to access a precise qlen, but an empty/non_empty status that might be less expensive to maintain/track). Another possibility is to have a legacy pfifo_fast version that would be used when used a a child qdisc, since the parent qdisc needs a spinlock anyway. But then, future lockless qdiscs would also have the same problem. Fixes: 7e66016f2c65 ("net: sched: helpers to sum qlen and qlen for per cpu logic") Signed-off-by: Eric Dumazet Cc: John Fastabend Cc: Jamal Hadi Salim Cc: Cong Wang Cc: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/sch_generic.h | 31 +++++++++++++------------------ net/core/gen_stats.c | 2 -- net/sched/sch_generic.c | 13 ++++++------- 3 files changed, 19 insertions(+), 27 deletions(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index a6d00093f35e..c44da48de7df 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -47,7 +47,10 @@ struct qdisc_size_table { struct qdisc_skb_head { struct sk_buff *head; struct sk_buff *tail; - __u32 qlen; + union { + u32 qlen; + atomic_t atomic_qlen; + }; spinlock_t lock; }; @@ -384,27 +387,19 @@ static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz) BUILD_BUG_ON(sizeof(qcb->data) < sz); } -static inline int qdisc_qlen_cpu(const struct Qdisc *q) -{ - return this_cpu_ptr(q->cpu_qstats)->qlen; -} - static inline int qdisc_qlen(const struct Qdisc *q) { return q->q.qlen; } -static inline int qdisc_qlen_sum(const struct Qdisc *q) +static inline u32 qdisc_qlen_sum(const struct Qdisc *q) { - __u32 qlen = q->qstats.qlen; - int i; + u32 qlen = q->qstats.qlen; - if (q->flags & TCQ_F_NOLOCK) { - for_each_possible_cpu(i) - qlen += per_cpu_ptr(q->cpu_qstats, i)->qlen; - } else { + if (q->flags & TCQ_F_NOLOCK) + qlen += atomic_read(&q->q.atomic_qlen); + else qlen += q->q.qlen; - } return qlen; } @@ -776,14 +771,14 @@ static inline void qdisc_qstats_cpu_backlog_inc(struct Qdisc *sch, this_cpu_add(sch->cpu_qstats->backlog, qdisc_pkt_len(skb)); } -static inline void qdisc_qstats_cpu_qlen_inc(struct Qdisc *sch) +static inline void qdisc_qstats_atomic_qlen_inc(struct Qdisc *sch) { - this_cpu_inc(sch->cpu_qstats->qlen); + atomic_inc(&sch->q.atomic_qlen); } -static inline void qdisc_qstats_cpu_qlen_dec(struct Qdisc *sch) +static inline void qdisc_qstats_atomic_qlen_dec(struct Qdisc *sch) { - this_cpu_dec(sch->cpu_qstats->qlen); + atomic_dec(&sch->q.atomic_qlen); } static inline void qdisc_qstats_cpu_requeues_inc(struct Qdisc *sch) diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c index 188d693cb251..e2fd8baec65f 100644 --- a/net/core/gen_stats.c +++ b/net/core/gen_stats.c @@ -256,7 +256,6 @@ __gnet_stats_copy_queue_cpu(struct gnet_stats_queue *qstats, for_each_possible_cpu(i) { const struct gnet_stats_queue *qcpu = per_cpu_ptr(q, i); - qstats->qlen = 0; qstats->backlog += qcpu->backlog; qstats->drops += qcpu->drops; qstats->requeues += qcpu->requeues; @@ -272,7 +271,6 @@ void __gnet_stats_copy_queue(struct gnet_stats_queue *qstats, if (cpu) { __gnet_stats_copy_queue_cpu(qstats, cpu); } else { - qstats->qlen = q->qlen; qstats->backlog = q->backlog; qstats->drops = q->drops; qstats->requeues = q->requeues; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 69078c82963e..77b289da7763 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -68,7 +68,7 @@ static inline struct sk_buff *__skb_dequeue_bad_txq(struct Qdisc *q) skb = __skb_dequeue(&q->skb_bad_txq); if (qdisc_is_percpu_stats(q)) { qdisc_qstats_cpu_backlog_dec(q, skb); - qdisc_qstats_cpu_qlen_dec(q); + qdisc_qstats_atomic_qlen_dec(q); } else { qdisc_qstats_backlog_dec(q, skb); q->q.qlen--; @@ -108,7 +108,7 @@ static inline void qdisc_enqueue_skb_bad_txq(struct Qdisc *q, if (qdisc_is_percpu_stats(q)) { qdisc_qstats_cpu_backlog_inc(q, skb); - qdisc_qstats_cpu_qlen_inc(q); + qdisc_qstats_atomic_qlen_inc(q); } else { qdisc_qstats_backlog_inc(q, skb); q->q.qlen++; @@ -147,7 +147,7 @@ static inline int dev_requeue_skb_locked(struct sk_buff *skb, struct Qdisc *q) qdisc_qstats_cpu_requeues_inc(q); qdisc_qstats_cpu_backlog_inc(q, skb); - qdisc_qstats_cpu_qlen_inc(q); + qdisc_qstats_atomic_qlen_inc(q); skb = next; } @@ -252,7 +252,7 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate, skb = __skb_dequeue(&q->gso_skb); if (qdisc_is_percpu_stats(q)) { qdisc_qstats_cpu_backlog_dec(q, skb); - qdisc_qstats_cpu_qlen_dec(q); + qdisc_qstats_atomic_qlen_dec(q); } else { qdisc_qstats_backlog_dec(q, skb); q->q.qlen--; @@ -633,7 +633,7 @@ static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc, if (unlikely(err)) return qdisc_drop_cpu(skb, qdisc, to_free); - qdisc_qstats_cpu_qlen_inc(qdisc); + qdisc_qstats_atomic_qlen_inc(qdisc); /* Note: skb can not be used after skb_array_produce(), * so we better not use qdisc_qstats_cpu_backlog_inc() */ @@ -658,7 +658,7 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc) if (likely(skb)) { qdisc_qstats_cpu_backlog_dec(qdisc, skb); qdisc_bstats_cpu_update(qdisc, skb); - qdisc_qstats_cpu_qlen_dec(qdisc); + qdisc_qstats_atomic_qlen_dec(qdisc); } return skb; @@ -702,7 +702,6 @@ static void pfifo_fast_reset(struct Qdisc *qdisc) struct gnet_stats_queue *q = per_cpu_ptr(qdisc->cpu_qstats, i); q->backlog = 0; - q->qlen = 0; } } -- cgit v1.2.3 From 7ce2a517fd8b7742e381d3b3551b9b03e667ad79 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Sat, 2 Mar 2019 10:34:55 +0800 Subject: net-sysfs: Fix mem leak in netdev_register_kobject [ Upstream commit 895a5e96dbd6386c8e78e5b78e067dcc67b7f0ab ] syzkaller report this: BUG: memory leak unreferenced object 0xffff88837a71a500 (size 256): comm "syz-executor.2", pid 9770, jiffies 4297825125 (age 17.843s) hex dump (first 32 bytes): 00 00 00 00 ad 4e ad de ff ff ff ff 00 00 00 00 .....N.......... ff ff ff ff ff ff ff ff 20 c0 ef 86 ff ff ff ff ........ ....... backtrace: [<00000000db12624b>] netdev_register_kobject+0x124/0x2e0 net/core/net-sysfs.c:1751 [<00000000dc49a994>] register_netdevice+0xcc1/0x1270 net/core/dev.c:8516 [<00000000e5f3fea0>] tun_set_iff drivers/net/tun.c:2649 [inline] [<00000000e5f3fea0>] __tun_chr_ioctl+0x2218/0x3d20 drivers/net/tun.c:2883 [<000000001b8ac127>] vfs_ioctl fs/ioctl.c:46 [inline] [<000000001b8ac127>] do_vfs_ioctl+0x1a5/0x10e0 fs/ioctl.c:690 [<0000000079b269f8>] ksys_ioctl+0x89/0xa0 fs/ioctl.c:705 [<00000000de649beb>] __do_sys_ioctl fs/ioctl.c:712 [inline] [<00000000de649beb>] __se_sys_ioctl fs/ioctl.c:710 [inline] [<00000000de649beb>] __x64_sys_ioctl+0x74/0xb0 fs/ioctl.c:710 [<000000007ebded1e>] do_syscall_64+0xc8/0x580 arch/x86/entry/common.c:290 [<00000000db315d36>] entry_SYSCALL_64_after_hwframe+0x49/0xbe [<00000000115be9bb>] 0xffffffffffffffff It should call kset_unregister to free 'dev->queues_kset' in error path of register_queue_kobjects, otherwise will cause a mem leak. Reported-by: Hulk Robot Fixes: 1d24eb4815d1 ("xps: Transmit Packet Steering") Signed-off-by: YueHaibing Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/net-sysfs.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index bd67c4d0fcfd..2aabb7eb0854 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1547,6 +1547,9 @@ static int register_queue_kobjects(struct net_device *dev) error: netdev_queue_update_kobjects(dev, txq, 0); net_rx_queue_update_kobjects(dev, rxq, 0); +#ifdef CONFIG_SYSFS + kset_unregister(dev->queues_kset); +#endif return error; } -- cgit v1.2.3 From 16a006d72f333882b6affdebcaa0e84524e4a0c8 Mon Sep 17 00:00:00 2001 From: Kristian Evensen Date: Sat, 2 Mar 2019 13:32:26 +0100 Subject: qmi_wwan: Add support for Quectel EG12/EM12 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 822e44b45eb991c63487c5e2ce7d636411870a8d ] Quectel EG12 (module)/EM12 (M.2 card) is a Cat. 12 LTE modem. The modem behaves in the same way as the EP06, so the "set DTR"-quirk must be applied and the diagnostic-interface check performed. Since the diagnostic-check now applies to more modems, I have renamed the function from quectel_ep06_diag_detected() to quectel_diag_detected(). Signed-off-by: Kristian Evensen Acked-by: Bjørn Mork Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/qmi_wwan.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 735ad838e2ba..6e381354f658 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -976,6 +976,13 @@ static const struct usb_device_id products[] = { 0xff), .driver_info = (unsigned long)&qmi_wwan_info_quirk_dtr, }, + { /* Quectel EG12/EM12 */ + USB_DEVICE_AND_INTERFACE_INFO(0x2c7c, 0x0512, + USB_CLASS_VENDOR_SPEC, + USB_SUBCLASS_VENDOR_SPEC, + 0xff), + .driver_info = (unsigned long)&qmi_wwan_info_quirk_dtr, + }, /* 3. Combined interface devices matching on interface number */ {QMI_FIXED_INTF(0x0408, 0xea42, 4)}, /* Yota / Megafon M100-1 */ @@ -1343,17 +1350,20 @@ static bool quectel_ec20_detected(struct usb_interface *intf) return false; } -static bool quectel_ep06_diag_detected(struct usb_interface *intf) +static bool quectel_diag_detected(struct usb_interface *intf) { struct usb_device *dev = interface_to_usbdev(intf); struct usb_interface_descriptor intf_desc = intf->cur_altsetting->desc; + u16 id_vendor = le16_to_cpu(dev->descriptor.idVendor); + u16 id_product = le16_to_cpu(dev->descriptor.idProduct); - if (le16_to_cpu(dev->descriptor.idVendor) == 0x2c7c && - le16_to_cpu(dev->descriptor.idProduct) == 0x0306 && - intf_desc.bNumEndpoints == 2) - return true; + if (id_vendor != 0x2c7c || intf_desc.bNumEndpoints != 2) + return false; - return false; + if (id_product == 0x0306 || id_product == 0x0512) + return true; + else + return false; } static int qmi_wwan_probe(struct usb_interface *intf, @@ -1390,13 +1400,13 @@ static int qmi_wwan_probe(struct usb_interface *intf, return -ENODEV; } - /* Quectel EP06/EM06/EG06 supports dynamic interface configuration, so + /* Several Quectel modems supports dynamic interface configuration, so * we need to match on class/subclass/protocol. These values are * identical for the diagnostic- and QMI-interface, but bNumEndpoints is * different. Ignore the current interface if the number of endpoints * the number for the diag interface (two). */ - if (quectel_ep06_diag_detected(intf)) + if (quectel_diag_detected(intf)) return -ENODEV; return usbnet_probe(intf, id); -- cgit v1.2.3 From 8085d6d03fe3702e9cf37fff3677c08b40b1007f Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 3 Mar 2019 16:50:26 +0800 Subject: sctp: call iov_iter_revert() after sending ABORT [ Upstream commit 901efe12318b1ea8d3e2c88a7b75ed6e6d5d7245 ] The user msg is also copied to the abort packet when doing SCTP_ABORT in sctp_sendmsg_check_sflags(). When SCTP_SENDALL is set, iov_iter_revert() should have been called for sending abort on the next asoc with copying this msg. Otherwise, memcpy_from_msg() in sctp_make_abort_user() will fail and return error. Fixes: 4910280503f3 ("sctp: add support for snd flag SCTP_SENDALL process in sendmsg") Reported-by: Ying Xu Signed-off-by: Xin Long Acked-by: Neil Horman Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sctp/socket.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index e5e70cff5bb3..1b16250c5718 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1884,6 +1884,7 @@ static int sctp_sendmsg_check_sflags(struct sctp_association *asoc, pr_debug("%s: aborting association:%p\n", __func__, asoc); sctp_primitive_ABORT(net, asoc, chunk); + iov_iter_revert(&msg->msg_iter, msg_len); return 0; } -- cgit v1.2.3 From 5e311e537e638960eb1da1b76a1b482dc05b11c2 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Mon, 4 Mar 2019 15:00:03 +0800 Subject: sky2: Disable MSI on Dell Inspiron 1545 and Gateway P-79 [ Upstream commit b33b7cd6fd86478dd2890a9abeb6f036aa01fdf7 ] Some sky2 chips fire IRQ after S3, before the driver is fully resumed: [ 686.804877] do_IRQ: 1.37 No irq handler for vector This is likely a platform bug that device isn't fully quiesced during S3. Use MSI-X, maskable MSI or INTx can prevent this issue from happening. Since MSI-X and maskable MSI are not supported by this device, fallback to use INTx on affected platforms. BugLink: https://bugs.launchpad.net/bugs/1807259 BugLink: https://bugs.launchpad.net/bugs/1809843 Signed-off-by: Kai-Heng Feng Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/marvell/sky2.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c index ae2f35039343..1485f66cf7b0 100644 --- a/drivers/net/ethernet/marvell/sky2.c +++ b/drivers/net/ethernet/marvell/sky2.c @@ -46,6 +46,7 @@ #include #include #include +#include #include @@ -93,7 +94,7 @@ static int copybreak __read_mostly = 128; module_param(copybreak, int, 0); MODULE_PARM_DESC(copybreak, "Receive copy threshold"); -static int disable_msi = 0; +static int disable_msi = -1; module_param(disable_msi, int, 0); MODULE_PARM_DESC(disable_msi, "Disable Message Signaled Interrupt (MSI)"); @@ -4931,6 +4932,24 @@ static const char *sky2_name(u8 chipid, char *buf, int sz) return buf; } +static const struct dmi_system_id msi_blacklist[] = { + { + .ident = "Dell Inspiron 1545", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 1545"), + }, + }, + { + .ident = "Gateway P-79", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Gateway"), + DMI_MATCH(DMI_PRODUCT_NAME, "P-79"), + }, + }, + {} +}; + static int sky2_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { struct net_device *dev, *dev1; @@ -5042,6 +5061,9 @@ static int sky2_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_out_free_pci; } + if (disable_msi == -1) + disable_msi = !!dmi_check_system(msi_blacklist); + if (!disable_msi && pci_enable_msi(pdev) == 0) { err = sky2_test_msi(hw); if (err) { -- cgit v1.2.3 From 089100d5fb434a655706799279b1ce0f79d864d1 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Sun, 3 Mar 2019 07:35:51 +0000 Subject: team: Free BPF filter when unregistering netdev [ Upstream commit 692c31bd4054212312396b1d303bffab2c5b93a7 ] When team is used in loadbalance mode a BPF filter can be used to provide a hash which will determine the Tx port. When the netdev is later unregistered the filter is not freed which results in memory leaks [1]. Fix by freeing the program and the corresponding filter when unregistering the netdev. [1] unreferenced object 0xffff8881dbc47cc8 (size 16): comm "teamd", pid 3068, jiffies 4294997779 (age 438.247s) hex dump (first 16 bytes): a3 00 6b 6b 6b 6b 6b 6b 88 a5 82 e1 81 88 ff ff ..kkkkkk........ backtrace: [<000000008a3b47e3>] team_nl_cmd_options_set+0x88f/0x11b0 [<00000000c4f4f27e>] genl_family_rcv_msg+0x78f/0x1080 [<00000000610ef838>] genl_rcv_msg+0xca/0x170 [<00000000a281df93>] netlink_rcv_skb+0x132/0x380 [<000000004d9448a2>] genl_rcv+0x29/0x40 [<000000000321b2f4>] netlink_unicast+0x4c0/0x690 [<000000008c25dffb>] netlink_sendmsg+0x929/0xe10 [<00000000068298c5>] sock_sendmsg+0xc8/0x110 [<0000000082a61ff0>] ___sys_sendmsg+0x77a/0x8f0 [<00000000663ae29d>] __sys_sendmsg+0xf7/0x250 [<0000000027c5f11a>] do_syscall_64+0x14d/0x610 [<000000006cfbc8d3>] entry_SYSCALL_64_after_hwframe+0x49/0xbe [<00000000e23197e2>] 0xffffffffffffffff unreferenced object 0xffff8881e182a588 (size 2048): comm "teamd", pid 3068, jiffies 4294997780 (age 438.247s) hex dump (first 32 bytes): 20 00 00 00 02 00 00 00 30 00 00 00 28 f0 ff ff .......0...(... 07 00 00 00 00 00 00 00 28 00 00 00 00 00 00 00 ........(....... backtrace: [<000000002daf01fb>] lb_bpf_func_set+0x45c/0x6d0 [<000000008a3b47e3>] team_nl_cmd_options_set+0x88f/0x11b0 [<00000000c4f4f27e>] genl_family_rcv_msg+0x78f/0x1080 [<00000000610ef838>] genl_rcv_msg+0xca/0x170 [<00000000a281df93>] netlink_rcv_skb+0x132/0x380 [<000000004d9448a2>] genl_rcv+0x29/0x40 [<000000000321b2f4>] netlink_unicast+0x4c0/0x690 [<000000008c25dffb>] netlink_sendmsg+0x929/0xe10 [<00000000068298c5>] sock_sendmsg+0xc8/0x110 [<0000000082a61ff0>] ___sys_sendmsg+0x77a/0x8f0 [<00000000663ae29d>] __sys_sendmsg+0xf7/0x250 [<0000000027c5f11a>] do_syscall_64+0x14d/0x610 [<000000006cfbc8d3>] entry_SYSCALL_64_after_hwframe+0x49/0xbe [<00000000e23197e2>] 0xffffffffffffffff Fixes: 01d7f30a9f96 ("team: add loadbalance mode") Signed-off-by: Ido Schimmel Reported-by: Amit Cohen Acked-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/team/team_mode_loadbalance.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/net/team/team_mode_loadbalance.c b/drivers/net/team/team_mode_loadbalance.c index a5ef97010eb3..5541e1c19936 100644 --- a/drivers/net/team/team_mode_loadbalance.c +++ b/drivers/net/team/team_mode_loadbalance.c @@ -325,6 +325,20 @@ static int lb_bpf_func_set(struct team *team, struct team_gsetter_ctx *ctx) return 0; } +static void lb_bpf_func_free(struct team *team) +{ + struct lb_priv *lb_priv = get_lb_priv(team); + struct bpf_prog *fp; + + if (!lb_priv->ex->orig_fprog) + return; + + __fprog_destroy(lb_priv->ex->orig_fprog); + fp = rcu_dereference_protected(lb_priv->fp, + lockdep_is_held(&team->lock)); + bpf_prog_destroy(fp); +} + static int lb_tx_method_get(struct team *team, struct team_gsetter_ctx *ctx) { struct lb_priv *lb_priv = get_lb_priv(team); @@ -639,6 +653,7 @@ static void lb_exit(struct team *team) team_options_unregister(team, lb_options, ARRAY_SIZE(lb_options)); + lb_bpf_func_free(team); cancel_delayed_work_sync(&lb_priv->ex->stats.refresh_dw); free_percpu(lb_priv->pcpu_stats); kfree(lb_priv->ex); -- cgit v1.2.3 From 8d1b9800c1ea9f3c27a2a724e0024a30784b39a9 Mon Sep 17 00:00:00 2001 From: Erik Hugne Date: Mon, 4 Mar 2019 23:26:10 +0100 Subject: tipc: fix RDM/DGRAM connect() regression [ Upstream commit 0e63208915a8d7590d0a6218dadb2a6a00ac705a ] Fix regression bug introduced in commit 365ad353c256 ("tipc: reduce risk of user starvation during link congestion") Only signal -EDESTADDRREQ for RDM/DGRAM if we don't have a cached sockaddr. Fixes: 365ad353c256 ("tipc: reduce risk of user starvation during link congestion") Signed-off-by: Erik Hugne Signed-off-by: Jon Maloy Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/tipc/socket.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index e1bdaf056c8f..c5614d8e74fa 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1318,7 +1318,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen) if (unlikely(!dest)) { dest = &tsk->peer; - if (!syn || dest->family != AF_TIPC) + if (!syn && dest->family != AF_TIPC) return -EDESTADDRREQ; } -- cgit v1.2.3 From 1713c8e18b44de3b9df3e69e0181b3b896280032 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Wed, 27 Feb 2019 03:58:53 -0500 Subject: bnxt_en: Drop oversize TX packets to prevent errors. [ Upstream commit 2b3c6885386020b1b9d92d45e8349637e27d1f66 ] There have been reports of oversize UDP packets being sent to the driver to be transmitted, causing error conditions. The issue is likely caused by the dst of the SKB switching between 'lo' with 64K MTU and the hardware device with a smaller MTU. Patches are being proposed by Mahesh Bandewar to fix the issue. In the meantime, add a quick length check in the driver to prevent the error. The driver uses the TX packet size as index to look up an array to setup the TX BD. The array is large enough to support all MTU sizes supported by the driver. The oversize TX packet causes the driver to index beyond the array and put garbage values into the TX BD. Add a simple check to prevent this. Signed-off-by: Michael Chan Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 034f57500f00..1fdaf86bbe8f 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -463,6 +463,12 @@ normal_tx: } length >>= 9; + if (unlikely(length >= ARRAY_SIZE(bnxt_lhint_arr))) { + dev_warn_ratelimited(&pdev->dev, "Dropped oversize %d bytes TX packet.\n", + skb->len); + i = 0; + goto tx_dma_error; + } flags |= bnxt_lhint_arr[length]; txbd->tx_bd_len_flags_type = cpu_to_le32(flags); -- cgit v1.2.3 From 36bd44bcb4b2fef0f1fabc28c4f99f3098ab2cbd Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Thu, 28 Feb 2019 14:56:04 +0100 Subject: geneve: correctly handle ipv6.disable module parameter [ Upstream commit cf1c9ccba7308e48a68fa77f476287d9d614e4c7 ] When IPv6 is compiled but disabled at runtime, geneve_sock_add returns -EAFNOSUPPORT. For metadata based tunnels, this causes failure of the whole operation of bringing up the tunnel. Ignore failure of IPv6 socket creation for metadata based tunnels caused by IPv6 not being available. This is the same fix as what commit d074bf960044 ("vxlan: correctly handle ipv6.disable module parameter") is doing for vxlan. Note there's also commit c0a47e44c098 ("geneve: should not call rt6_lookup() when ipv6 was disabled") which fixes a similar issue but for regular tunnels, while this patch is needed for metadata based tunnels. Signed-off-by: Jiri Benc Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/geneve.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 01711e6e9a39..e1427b56a073 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -636,15 +636,20 @@ out: static int geneve_open(struct net_device *dev) { struct geneve_dev *geneve = netdev_priv(dev); - bool ipv6 = !!(geneve->info.mode & IP_TUNNEL_INFO_IPV6); bool metadata = geneve->collect_md; + bool ipv4, ipv6; int ret = 0; + ipv6 = geneve->info.mode & IP_TUNNEL_INFO_IPV6 || metadata; + ipv4 = !ipv6 || metadata; #if IS_ENABLED(CONFIG_IPV6) - if (ipv6 || metadata) + if (ipv6) { ret = geneve_sock_add(geneve, true); + if (ret < 0 && ret != -EAFNOSUPPORT) + ipv4 = false; + } #endif - if (!ret && (!ipv6 || metadata)) + if (ipv4) ret = geneve_sock_add(geneve, false); if (ret < 0) geneve_sock_release(geneve); -- cgit v1.2.3 From d61918a5e401bc1911d761a7f0c5c760ea1cac4b Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Fri, 22 Feb 2019 18:25:03 +0000 Subject: hv_netvsc: Fix IP header checksum for coalesced packets [ Upstream commit bf48648d650db1146b75b9bd358502431e86cf4f ] Incoming packets may have IP header checksum verified by the host. They may not have IP header checksum computed after coalescing. This patch re-compute the checksum when necessary, otherwise the packets may be dropped, because Linux network stack always checks it. Signed-off-by: Haiyang Zhang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/hyperv/netvsc_drv.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index c9e2a986ccb7..c8320405c8f1 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -743,6 +743,14 @@ void netvsc_linkstatus_callback(struct net_device *net, schedule_delayed_work(&ndev_ctx->dwork, 0); } +static void netvsc_comp_ipcsum(struct sk_buff *skb) +{ + struct iphdr *iph = (struct iphdr *)skb->data; + + iph->check = 0; + iph->check = ip_fast_csum(iph, iph->ihl); +} + static struct sk_buff *netvsc_alloc_recv_skb(struct net_device *net, struct napi_struct *napi, const struct ndis_tcp_ip_checksum_info *csum_info, @@ -766,9 +774,17 @@ static struct sk_buff *netvsc_alloc_recv_skb(struct net_device *net, /* skb is already created with CHECKSUM_NONE */ skb_checksum_none_assert(skb); - /* - * In Linux, the IP checksum is always checked. - * Do L4 checksum offload if enabled and present. + /* Incoming packets may have IP header checksum verified by the host. + * They may not have IP header checksum computed after coalescing. + * We compute it here if the flags are set, because on Linux, the IP + * checksum is always checked. + */ + if (csum_info && csum_info->receive.ip_checksum_value_invalid && + csum_info->receive.ip_checksum_succeeded && + skb->protocol == htons(ETH_P_IP)) + netvsc_comp_ipcsum(skb); + + /* Do L4 checksum offload if enabled and present. */ if (csum_info && (net->features & NETIF_F_RXCSUM)) { if (csum_info->receive.tcp_checksum_succeeded || -- cgit v1.2.3 From 99ed9458212602e1a286196c488c52952d2e9e7a Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 27 Feb 2019 16:15:29 +0800 Subject: ipv4: Add ICMPv6 support when parse route ipproto [ Upstream commit 5e1a99eae84999a2536f50a0beaf5d5262337f40 ] For ip rules, we need to use 'ipproto ipv6-icmp' to match ICMPv6 headers. But for ip -6 route, currently we only support tcp, udp and icmp. Add ICMPv6 support so we can match ipv6-icmp rules for route lookup. v2: As David Ahern and Sabrina Dubroca suggested, Add an argument to rtm_getroute_parse_ip_proto() to handle ICMP/ICMPv6 with different family. Reported-by: Jianlin Shi Fixes: eacb9384a3fe ("ipv6: support sport, dport and ip_proto in RTM_GETROUTE") Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/ip.h | 2 +- net/ipv4/netlink.c | 17 +++++++++++++---- net/ipv4/route.c | 2 +- net/ipv6/route.c | 3 ++- 4 files changed, 17 insertions(+), 7 deletions(-) diff --git a/include/net/ip.h b/include/net/ip.h index e44b1a44f67a..bf2209f861af 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -687,7 +687,7 @@ extern int sysctl_icmp_msgs_burst; int ip_misc_proc_init(void); #endif -int rtm_getroute_parse_ip_proto(struct nlattr *attr, u8 *ip_proto, +int rtm_getroute_parse_ip_proto(struct nlattr *attr, u8 *ip_proto, u8 family, struct netlink_ext_ack *extack); #endif /* _IP_H */ diff --git a/net/ipv4/netlink.c b/net/ipv4/netlink.c index f86bb4f06609..d8e3a1fb8e82 100644 --- a/net/ipv4/netlink.c +++ b/net/ipv4/netlink.c @@ -3,9 +3,10 @@ #include #include #include +#include #include -int rtm_getroute_parse_ip_proto(struct nlattr *attr, u8 *ip_proto, +int rtm_getroute_parse_ip_proto(struct nlattr *attr, u8 *ip_proto, u8 family, struct netlink_ext_ack *extack) { *ip_proto = nla_get_u8(attr); @@ -13,11 +14,19 @@ int rtm_getroute_parse_ip_proto(struct nlattr *attr, u8 *ip_proto, switch (*ip_proto) { case IPPROTO_TCP: case IPPROTO_UDP: + return 0; case IPPROTO_ICMP: + if (family != AF_INET) + break; + return 0; +#if IS_ENABLED(CONFIG_IPV6) + case IPPROTO_ICMPV6: + if (family != AF_INET6) + break; return 0; - default: - NL_SET_ERR_MSG(extack, "Unsupported ip proto"); - return -EOPNOTSUPP; +#endif } + NL_SET_ERR_MSG(extack, "Unsupported ip proto"); + return -EOPNOTSUPP; } EXPORT_SYMBOL_GPL(rtm_getroute_parse_ip_proto); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 436b46c0e687..ca87bb6784e5 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2814,7 +2814,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, if (tb[RTA_IP_PROTO]) { err = rtm_getroute_parse_ip_proto(tb[RTA_IP_PROTO], - &ip_proto, extack); + &ip_proto, AF_INET, extack); if (err) return err; } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 08c4516ae4a4..cf4a8c25458f 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -4907,7 +4907,8 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, if (tb[RTA_IP_PROTO]) { err = rtm_getroute_parse_ip_proto(tb[RTA_IP_PROTO], - &fl6.flowi6_proto, extack); + &fl6.flowi6_proto, AF_INET6, + extack); if (err) goto errout; } -- cgit v1.2.3 From ceb7c249869527f28e8caf6989b616740bd454e0 Mon Sep 17 00:00:00 2001 From: Bryan Whitehead Date: Tue, 26 Feb 2019 14:06:26 -0500 Subject: lan743x: Fix TX Stall Issue [ Upstream commit 90490ef7269906423a1c1b917fc24be8b1602658 ] It has been observed that tx queue stalls while downloading from certain web sites (example www.speedtest.net) The cause has been tracked down to a corner case where dma descriptors where not setup properly. And there for a tx completion interrupt was not signaled. This fix corrects the problem by properly marking the end of a multi descriptor transmission. Fixes: 23f0703c125b ("lan743x: Add main source files for new lan743x driver") Signed-off-by: Bryan Whitehead Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/microchip/lan743x_main.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index 42f5bfa33694..de5a6abda7e3 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -1403,7 +1403,8 @@ static int lan743x_tx_frame_start(struct lan743x_tx *tx, } static void lan743x_tx_frame_add_lso(struct lan743x_tx *tx, - unsigned int frame_length) + unsigned int frame_length, + int nr_frags) { /* called only from within lan743x_tx_xmit_frame. * assuming tx->ring_lock has already been acquired. @@ -1413,6 +1414,10 @@ static void lan743x_tx_frame_add_lso(struct lan743x_tx *tx, /* wrap up previous descriptor */ tx->frame_data0 |= TX_DESC_DATA0_EXT_; + if (nr_frags <= 0) { + tx->frame_data0 |= TX_DESC_DATA0_LS_; + tx->frame_data0 |= TX_DESC_DATA0_IOC_; + } tx_descriptor = &tx->ring_cpu_ptr[tx->frame_tail]; tx_descriptor->data0 = tx->frame_data0; @@ -1517,8 +1522,11 @@ static void lan743x_tx_frame_end(struct lan743x_tx *tx, u32 tx_tail_flags = 0; /* wrap up previous descriptor */ - tx->frame_data0 |= TX_DESC_DATA0_LS_; - tx->frame_data0 |= TX_DESC_DATA0_IOC_; + if ((tx->frame_data0 & TX_DESC_DATA0_DTYPE_MASK_) == + TX_DESC_DATA0_DTYPE_DATA_) { + tx->frame_data0 |= TX_DESC_DATA0_LS_; + tx->frame_data0 |= TX_DESC_DATA0_IOC_; + } tx_descriptor = &tx->ring_cpu_ptr[tx->frame_tail]; buffer_info = &tx->buffer_info[tx->frame_tail]; @@ -1603,7 +1611,7 @@ static netdev_tx_t lan743x_tx_xmit_frame(struct lan743x_tx *tx, } if (gso) - lan743x_tx_frame_add_lso(tx, frame_length); + lan743x_tx_frame_add_lso(tx, frame_length, nr_frags); if (nr_frags <= 0) goto finish; -- cgit v1.2.3 From 05d9f554b7136dca02e76218d79c694f8c5deab4 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Fri, 1 Mar 2019 23:43:39 +0100 Subject: net: dsa: mv88e6xxx: Fix statistics on mv88e6161 [ Upstream commit a6da21bb0eae459a375d5bd48baed821d14301d0 ] Despite what the datesheet says, the silicon implements the older way of snapshoting the statistics. Change the op. Reported-by: Chris.Healy@zii.aero Tested-by: Chris.Healy@zii.aero Fixes: 0ac64c394900 ("net: dsa: mv88e6xxx: mv88e6161 uses mv88e6320 stats snapshot") Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/dsa/mv88e6xxx/chip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index f31ee6ef8c19..eb3804546f1f 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -3070,7 +3070,7 @@ static const struct mv88e6xxx_ops mv88e6161_ops = { .port_disable_pri_override = mv88e6xxx_port_disable_pri_override, .port_link_state = mv88e6352_port_link_state, .port_get_cmode = mv88e6185_port_get_cmode, - .stats_snapshot = mv88e6320_g1_stats_snapshot, + .stats_snapshot = mv88e6xxx_g1_stats_snapshot, .stats_set_histogram = mv88e6095_g1_stats_set_histogram, .stats_get_sset_count = mv88e6095_stats_get_sset_count, .stats_get_strings = mv88e6095_stats_get_strings, -- cgit v1.2.3 From 4afc9831f895ff1440e42f2f6ea5297a90da1001 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Thu, 28 Feb 2019 18:14:03 +0100 Subject: net: dsa: mv88e6xxx: Fix u64 statistics [ Upstream commit 6e46e2d821bb22b285ae8187959096b65d063b0d ] The switch maintains u64 counters for the number of octets sent and received. These are kept as two u32's which need to be combined. Fix the combing, which wrongly worked on u16's. Fixes: 80c4627b2719 ("dsa: mv88x6xxx: Refactor getting a single statistic") Reported-by: Chris Healy Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/dsa/mv88e6xxx/chip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index eb3804546f1f..d045d3cd00a6 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -884,7 +884,7 @@ static uint64_t _mv88e6xxx_get_ethtool_stat(struct mv88e6xxx_chip *chip, default: return U64_MAX; } - value = (((u64)high) << 16) | low; + value = (((u64)high) << 32) | low; return value; } -- cgit v1.2.3 From e3713abc4248aa6bcc11173d754c418b02a62cbb Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Mon, 25 Feb 2019 19:06:06 -0500 Subject: netlabel: fix out-of-bounds memory accesses [ Upstream commit 5578de4834fe0f2a34fedc7374be691443396d1f ] There are two array out-of-bounds memory accesses, one in cipso_v4_map_lvl_valid(), the other in netlbl_bitmap_walk(). Both errors are embarassingly simple, and the fixes are straightforward. As a FYI for anyone backporting this patch to kernels prior to v4.8, you'll want to apply the netlbl_bitmap_walk() patch to cipso_v4_bitmap_walk() as netlbl_bitmap_walk() doesn't exist before Linux v4.8. Reported-by: Jann Horn Fixes: 446fda4f2682 ("[NetLabel]: CIPSOv4 engine") Fixes: 3faa8f982f95 ("netlabel: Move bitmap manipulation functions to the NetLabel core.") Signed-off-by: Paul Moore Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/cipso_ipv4.c | 3 ++- net/netlabel/netlabel_kapi.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 777fa3b7fb13..f4b83de2263e 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -667,7 +667,8 @@ static int cipso_v4_map_lvl_valid(const struct cipso_v4_doi *doi_def, u8 level) case CIPSO_V4_MAP_PASS: return 0; case CIPSO_V4_MAP_TRANS: - if (doi_def->map.std->lvl.cipso[level] < CIPSO_V4_INV_LVL) + if ((level < doi_def->map.std->lvl.cipso_size) && + (doi_def->map.std->lvl.cipso[level] < CIPSO_V4_INV_LVL)) return 0; break; } diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c index ea7c67050792..ee3e5b6471a6 100644 --- a/net/netlabel/netlabel_kapi.c +++ b/net/netlabel/netlabel_kapi.c @@ -903,7 +903,8 @@ int netlbl_bitmap_walk(const unsigned char *bitmap, u32 bitmap_len, (state == 0 && (byte & bitmask) == 0)) return bit_spot; - bit_spot++; + if (++bit_spot >= bitmap_len) + return -1; bitmask >>= 1; if (bitmask == 0) { byte = bitmap[++byte_offset]; -- cgit v1.2.3 From d1dd2e15c85e890a1cc9bde5ba07ae63331e5c73 Mon Sep 17 00:00:00 2001 From: Sheng Lan Date: Thu, 28 Feb 2019 18:47:58 +0800 Subject: net: netem: fix skb length BUG_ON in __skb_to_sgvec [ Upstream commit 5845f706388a4cde0f6b80f9e5d33527e942b7d9 ] It can be reproduced by following steps: 1. virtio_net NIC is configured with gso/tso on 2. configure nginx as http server with an index file bigger than 1M bytes 3. use tc netem to produce duplicate packets and delay: tc qdisc add dev eth0 root netem delay 100ms 10ms 30% duplicate 90% 4. continually curl the nginx http server to get index file on client 5. BUG_ON is seen quickly [10258690.371129] kernel BUG at net/core/skbuff.c:4028! [10258690.371748] invalid opcode: 0000 [#1] SMP PTI [10258690.372094] CPU: 5 PID: 0 Comm: swapper/5 Tainted: G W 5.0.0-rc6 #2 [10258690.372094] RSP: 0018:ffffa05797b43da0 EFLAGS: 00010202 [10258690.372094] RBP: 00000000000005ea R08: 0000000000000000 R09: 00000000000005ea [10258690.372094] R10: ffffa0579334d800 R11: 00000000000002c0 R12: 0000000000000002 [10258690.372094] R13: 0000000000000000 R14: ffffa05793122900 R15: ffffa0578f7cb028 [10258690.372094] FS: 0000000000000000(0000) GS:ffffa05797b40000(0000) knlGS:0000000000000000 [10258690.372094] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [10258690.372094] CR2: 00007f1a6dc00868 CR3: 000000001000e000 CR4: 00000000000006e0 [10258690.372094] Call Trace: [10258690.372094] [10258690.372094] skb_to_sgvec+0x11/0x40 [10258690.372094] start_xmit+0x38c/0x520 [virtio_net] [10258690.372094] dev_hard_start_xmit+0x9b/0x200 [10258690.372094] sch_direct_xmit+0xff/0x260 [10258690.372094] __qdisc_run+0x15e/0x4e0 [10258690.372094] net_tx_action+0x137/0x210 [10258690.372094] __do_softirq+0xd6/0x2a9 [10258690.372094] irq_exit+0xde/0xf0 [10258690.372094] smp_apic_timer_interrupt+0x74/0x140 [10258690.372094] apic_timer_interrupt+0xf/0x20 [10258690.372094] In __skb_to_sgvec(), the skb->len is not equal to the sum of the skb's linear data size and nonlinear data size, thus BUG_ON triggered. Because the skb is cloned and a part of nonlinear data is split off. Duplicate packet is cloned in netem_enqueue() and may be delayed some time in qdisc. When qdisc len reached the limit and returns NET_XMIT_DROP, the skb will be retransmit later in write queue. the skb will be fragmented by tso_fragment(), the limit size that depends on cwnd and mss decrease, the skb's nonlinear data will be split off. The length of the skb cloned by netem will not be updated. When we use virtio_net NIC and invoke skb_to_sgvec(), the BUG_ON trigger. To fix it, netem returns NET_XMIT_SUCCESS to upper stack when it clones a duplicate packet. Fixes: 35d889d1 ("sch_netem: fix skb leak in netem_enqueue()") Signed-off-by: Sheng Lan Reported-by: Qin Ji Suggested-by: Eric Dumazet Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_netem.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 74c0f656f28c..4dfe10b9f96c 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -440,6 +440,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, int nb = 0; int count = 1; int rc = NET_XMIT_SUCCESS; + int rc_drop = NET_XMIT_DROP; /* Do not fool qdisc_drop_all() */ skb->prev = NULL; @@ -479,6 +480,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, q->duplicate = 0; rootq->enqueue(skb2, rootq, to_free); q->duplicate = dupsave; + rc_drop = NET_XMIT_SUCCESS; } /* @@ -491,7 +493,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, if (skb_is_gso(skb)) { segs = netem_segment(skb, sch, to_free); if (!segs) - return NET_XMIT_DROP; + return rc_drop; } else { segs = skb; } @@ -514,8 +516,10 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, 1<<(prandom_u32() % 8); } - if (unlikely(sch->q.qlen >= sch->limit)) - return qdisc_drop_all(skb, sch, to_free); + if (unlikely(sch->q.qlen >= sch->limit)) { + qdisc_drop_all(skb, sch, to_free); + return rc_drop; + } qdisc_qstats_backlog_inc(sch, skb); -- cgit v1.2.3 From f132b3f5f1ad1cbe818474ab8d0b555ff39369d5 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Fri, 22 Feb 2019 15:37:58 +0800 Subject: net: nfc: Fix NULL dereference on nfc_llcp_build_tlv fails [ Upstream commit 58bdd544e2933a21a51eecf17c3f5f94038261b5 ] KASAN report this: BUG: KASAN: null-ptr-deref in nfc_llcp_build_gb+0x37f/0x540 [nfc] Read of size 3 at addr 0000000000000000 by task syz-executor.0/5401 CPU: 0 PID: 5401 Comm: syz-executor.0 Not tainted 5.0.0-rc7+ #45 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0xfa/0x1ce lib/dump_stack.c:113 kasan_report+0x171/0x18d mm/kasan/report.c:321 memcpy+0x1f/0x50 mm/kasan/common.c:130 nfc_llcp_build_gb+0x37f/0x540 [nfc] nfc_llcp_register_device+0x6eb/0xb50 [nfc] nfc_register_device+0x50/0x1d0 [nfc] nfcsim_device_new+0x394/0x67d [nfcsim] ? 0xffffffffc1080000 nfcsim_init+0x6b/0x1000 [nfcsim] do_one_initcall+0xfa/0x5ca init/main.c:887 do_init_module+0x204/0x5f6 kernel/module.c:3460 load_module+0x66b2/0x8570 kernel/module.c:3808 __do_sys_finit_module+0x238/0x2a0 kernel/module.c:3902 do_syscall_64+0x147/0x600 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x462e99 Code: f7 d8 64 89 02 b8 ff ff ff ff c3 66 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f9cb79dcc58 EFLAGS: 00000246 ORIG_RAX: 0000000000000139 RAX: ffffffffffffffda RBX: 000000000073bf00 RCX: 0000000000462e99 RDX: 0000000000000000 RSI: 0000000020000280 RDI: 0000000000000003 RBP: 00007f9cb79dcc70 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00007f9cb79dd6bc R13: 00000000004bcefb R14: 00000000006f7030 R15: 0000000000000004 nfc_llcp_build_tlv will return NULL on fails, caller should check it, otherwise will trigger a NULL dereference. Reported-by: Hulk Robot Fixes: eda21f16a5ed ("NFC: Set MIU and RW values from CONNECT and CC LLCP frames") Fixes: d646960f7986 ("NFC: Initial LLCP support") Signed-off-by: YueHaibing Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/nfc/llcp_commands.c | 20 ++++++++++++++++++++ net/nfc/llcp_core.c | 24 ++++++++++++++++++++---- 2 files changed, 40 insertions(+), 4 deletions(-) diff --git a/net/nfc/llcp_commands.c b/net/nfc/llcp_commands.c index 6a196e438b6c..d1fc019e932e 100644 --- a/net/nfc/llcp_commands.c +++ b/net/nfc/llcp_commands.c @@ -419,6 +419,10 @@ int nfc_llcp_send_connect(struct nfc_llcp_sock *sock) sock->service_name, sock->service_name_len, &service_name_tlv_length); + if (!service_name_tlv) { + err = -ENOMEM; + goto error_tlv; + } size += service_name_tlv_length; } @@ -429,9 +433,17 @@ int nfc_llcp_send_connect(struct nfc_llcp_sock *sock) miux_tlv = nfc_llcp_build_tlv(LLCP_TLV_MIUX, (u8 *)&miux, 0, &miux_tlv_length); + if (!miux_tlv) { + err = -ENOMEM; + goto error_tlv; + } size += miux_tlv_length; rw_tlv = nfc_llcp_build_tlv(LLCP_TLV_RW, &rw, 0, &rw_tlv_length); + if (!rw_tlv) { + err = -ENOMEM; + goto error_tlv; + } size += rw_tlv_length; pr_debug("SKB size %d SN length %zu\n", size, sock->service_name_len); @@ -484,9 +496,17 @@ int nfc_llcp_send_cc(struct nfc_llcp_sock *sock) miux_tlv = nfc_llcp_build_tlv(LLCP_TLV_MIUX, (u8 *)&miux, 0, &miux_tlv_length); + if (!miux_tlv) { + err = -ENOMEM; + goto error_tlv; + } size += miux_tlv_length; rw_tlv = nfc_llcp_build_tlv(LLCP_TLV_RW, &rw, 0, &rw_tlv_length); + if (!rw_tlv) { + err = -ENOMEM; + goto error_tlv; + } size += rw_tlv_length; skb = llcp_allocate_pdu(sock, LLCP_PDU_CC, size); diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c index ef4026a23e80..4fa015208aab 100644 --- a/net/nfc/llcp_core.c +++ b/net/nfc/llcp_core.c @@ -532,10 +532,10 @@ static u8 nfc_llcp_reserve_sdp_ssap(struct nfc_llcp_local *local) static int nfc_llcp_build_gb(struct nfc_llcp_local *local) { - u8 *gb_cur, *version_tlv, version, version_length; - u8 *lto_tlv, lto_length; - u8 *wks_tlv, wks_length; - u8 *miux_tlv, miux_length; + u8 *gb_cur, version, version_length; + u8 lto_length, wks_length, miux_length; + u8 *version_tlv = NULL, *lto_tlv = NULL, + *wks_tlv = NULL, *miux_tlv = NULL; __be16 wks = cpu_to_be16(local->local_wks); u8 gb_len = 0; int ret = 0; @@ -543,17 +543,33 @@ static int nfc_llcp_build_gb(struct nfc_llcp_local *local) version = LLCP_VERSION_11; version_tlv = nfc_llcp_build_tlv(LLCP_TLV_VERSION, &version, 1, &version_length); + if (!version_tlv) { + ret = -ENOMEM; + goto out; + } gb_len += version_length; lto_tlv = nfc_llcp_build_tlv(LLCP_TLV_LTO, &local->lto, 1, <o_length); + if (!lto_tlv) { + ret = -ENOMEM; + goto out; + } gb_len += lto_length; pr_debug("Local wks 0x%lx\n", local->local_wks); wks_tlv = nfc_llcp_build_tlv(LLCP_TLV_WKS, (u8 *)&wks, 2, &wks_length); + if (!wks_tlv) { + ret = -ENOMEM; + goto out; + } gb_len += wks_length; miux_tlv = nfc_llcp_build_tlv(LLCP_TLV_MIUX, (u8 *)&local->miux, 0, &miux_length); + if (!miux_tlv) { + ret = -ENOMEM; + goto out; + } gb_len += miux_length; gb_len += ARRAY_SIZE(llcp_magic); -- cgit v1.2.3 From d068168997580d1a9c81b7322a365e05229479a1 Mon Sep 17 00:00:00 2001 From: Rajasingh Thavamani Date: Wed, 27 Feb 2019 17:43:19 +0530 Subject: net: phy: Micrel KSZ8061: link failure after cable connect [ Upstream commit 232ba3a51cc224b339c7114888ed7f0d4d95695e ] With Micrel KSZ8061 PHY, the link may occasionally not come up after Ethernet cable connect. The vendor's (Microchip, former Micrel) errata sheet 80000688A.pdf descripes the problem and possible workarounds in detail, see below. The batch implements workaround 1, which permanently fixes the issue. DESCRIPTION Link-up may not occur properly when the Ethernet cable is initially connected. This issue occurs more commonly when the cable is connected slowly, but it may occur any time a cable is connected. This issue occurs in the auto-negotiation circuit, and will not occur if auto-negotiation is disabled (which requires that the two link partners be set to the same speed and duplex). END USER IMPLICATIONS When this issue occurs, link is not established. Subsequent cable plug/unplaug cycle will not correct the issue. WORk AROUND There are four approaches to work around this issue: 1. This issue can be prevented by setting bit 15 in MMD device address 1, register 2, prior to connecting the cable or prior to setting the Restart Auto-negotiation bit in register 0h. The MMD registers are accessed via the indirect access registers Dh and Eh, or via the Micrel EthUtil utility as shown here: . if using the EthUtil utility (usually with a Micrel KSZ8061 Evaluation Board), type the following commands: > address 1 > mmd 1 > iw 2 b61a . Alternatively, write the following registers to write to the indirect MMD register: Write register Dh, data 0001h Write register Eh, data 0002h Write register Dh, data 4001h Write register Eh, data B61Ah 2. The issue can be avoided by disabling auto-negotiation in the KSZ8061, either by the strapping option, or by clearing bit 12 in register 0h. Care must be taken to ensure that the KSZ8061 and the link partner will link with the same speed and duplex. Note that the KSZ8061 defaults to full-duplex when auto-negotiation is off, but other devices may default to half-duplex in the event of failed auto-negotiation. 3. The issue can be avoided by connecting the cable prior to powering-up or resetting the KSZ8061, and leaving it plugged in thereafter. 4. If the above measures are not taken and the problem occurs, link can be recovered by setting the Restart Auto-Negotiation bit in register 0h, or by resetting or power cycling the device. Reset may be either hardware reset or software reset (register 0h, bit 15). PLAN This errata will not be corrected in the future revision. Fixes: 7ab59dc15e2f ("drivers/net/phy/micrel_phy: Add support for new PHYs") Signed-off-by: Alexander Onnasch Signed-off-by: Rajasingh Thavamani Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/micrel.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 3db06b40580d..05a6ae32ff65 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -339,6 +339,17 @@ static int ksz8041_config_aneg(struct phy_device *phydev) return genphy_config_aneg(phydev); } +static int ksz8061_config_init(struct phy_device *phydev) +{ + int ret; + + ret = phy_write_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_DEVID1, 0xB61A); + if (ret) + return ret; + + return kszphy_config_init(phydev); +} + static int ksz9021_load_values_from_of(struct phy_device *phydev, const struct device_node *of_node, u16 reg, @@ -934,7 +945,7 @@ static struct phy_driver ksphy_driver[] = { .phy_id_mask = MICREL_PHY_ID_MASK, .features = PHY_BASIC_FEATURES, .flags = PHY_HAS_INTERRUPT, - .config_init = kszphy_config_init, + .config_init = ksz8061_config_init, .ack_interrupt = kszphy_ack_interrupt, .config_intr = kszphy_config_intr, .suspend = genphy_suspend, -- cgit v1.2.3 From ed7a54419ef29342f430136e02d2fd8c447d8ca1 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Tue, 26 Feb 2019 19:29:22 +0100 Subject: net: phy: phylink: fix uninitialized variable in phylink_get_mac_state [ Upstream commit d25ed413d5e51644e18f66e34eec049f17a7abcb ] When debugging an issue I found implausible values in state->pause. Reason in that state->pause isn't initialized and later only single bits are changed. Also the struct itself isn't initialized in phylink_resolve(). So better initialize state->pause and other not yet initialized fields. v2: - use right function name in subject v3: - initialize additional fields Fixes: 9525ae83959b ("phylink: add phylink infrastructure") Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/phylink.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 2787e8b1d668..f6e70f2dfd12 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -348,6 +348,10 @@ static int phylink_get_mac_state(struct phylink *pl, struct phylink_link_state * linkmode_zero(state->lp_advertising); state->interface = pl->link_config.interface; state->an_enabled = pl->link_config.an_enabled; + state->speed = SPEED_UNKNOWN; + state->duplex = DUPLEX_UNKNOWN; + state->pause = MLO_PAUSE_NONE; + state->an_complete = 0; state->link = 1; return pl->ops->mac_link_state(ndev, state); -- cgit v1.2.3 From d0bedaac932f4c02c080a50d4a30b2a1fec5d682 Mon Sep 17 00:00:00 2001 From: Mao Wenan Date: Fri, 1 Mar 2019 23:06:40 +0800 Subject: net: sit: fix memory leak in sit_init_net() [ Upstream commit 07f12b26e21ab359261bf75cfcb424fdc7daeb6d ] If register_netdev() is failed to register sitn->fb_tunnel_dev, it will go to err_reg_dev and forget to free netdev(sitn->fb_tunnel_dev). BUG: memory leak unreferenced object 0xffff888378daad00 (size 512): comm "syz-executor.1", pid 4006, jiffies 4295121142 (age 16.115s) hex dump (first 32 bytes): 00 e6 ed c0 83 88 ff ff 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<00000000d6dcb63e>] kvmalloc include/linux/mm.h:577 [inline] [<00000000d6dcb63e>] kvzalloc include/linux/mm.h:585 [inline] [<00000000d6dcb63e>] netif_alloc_netdev_queues net/core/dev.c:8380 [inline] [<00000000d6dcb63e>] alloc_netdev_mqs+0x600/0xcc0 net/core/dev.c:8970 [<00000000867e172f>] sit_init_net+0x295/0xa40 net/ipv6/sit.c:1848 [<00000000871019fa>] ops_init+0xad/0x3e0 net/core/net_namespace.c:129 [<00000000319507f6>] setup_net+0x2ba/0x690 net/core/net_namespace.c:314 [<0000000087db4f96>] copy_net_ns+0x1dc/0x330 net/core/net_namespace.c:437 [<0000000057efc651>] create_new_namespaces+0x382/0x730 kernel/nsproxy.c:107 [<00000000676f83de>] copy_namespaces+0x2ed/0x3d0 kernel/nsproxy.c:165 [<0000000030b74bac>] copy_process.part.27+0x231e/0x6db0 kernel/fork.c:1919 [<00000000fff78746>] copy_process kernel/fork.c:1713 [inline] [<00000000fff78746>] _do_fork+0x1bc/0xe90 kernel/fork.c:2224 [<000000001c2e0d1c>] do_syscall_64+0xc8/0x580 arch/x86/entry/common.c:290 [<00000000ec48bd44>] entry_SYSCALL_64_after_hwframe+0x49/0xbe [<0000000039acff8a>] 0xffffffffffffffff Signed-off-by: Mao Wenan Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/sit.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index da6d5a3f5399..868d7da7a0cb 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1873,6 +1873,7 @@ static int __net_init sit_init_net(struct net *net) err_reg_dev: ipip6_dev_free(sitn->fb_tunnel_dev); + free_netdev(sitn->fb_tunnel_dev); err_alloc_dev: return err; } -- cgit v1.2.3 From 5fdb551fd6a6bda0010458fd34aa73d17e56c572 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 21 Feb 2019 14:13:56 -0800 Subject: net: socket: set sock->sk to NULL after calling proto_ops::release() [ Upstream commit ff7b11aa481f682e0e9711abfeb7d03f5cd612bf ] Commit 9060cb719e61 ("net: crypto set sk to NULL when af_alg_release.") fixed a use-after-free in sockfs_setattr() when an AF_ALG socket is closed concurrently with fchownat(). However, it ignored that many other proto_ops::release() methods don't set sock->sk to NULL and therefore allow the same use-after-free: - base_sock_release - bnep_sock_release - cmtp_sock_release - data_sock_release - dn_release - hci_sock_release - hidp_sock_release - iucv_sock_release - l2cap_sock_release - llcp_sock_release - llc_ui_release - rawsock_release - rfcomm_sock_release - sco_sock_release - svc_release - vcc_release - x25_release Rather than fixing all these and relying on every socket type to get this right forever, just make __sock_release() set sock->sk to NULL itself after calling proto_ops::release(). Reproducer that produces the KASAN splat when any of these socket types are configured into the kernel: #include #include #include #include pthread_t t; volatile int fd; void *close_thread(void *arg) { for (;;) { usleep(rand() % 100); close(fd); } } int main() { pthread_create(&t, NULL, close_thread, NULL); for (;;) { fd = socket(rand() % 50, rand() % 11, 0); fchownat(fd, "", 1000, 1000, 0x1000); close(fd); } } Fixes: 86741ec25462 ("net: core: Add a UID field to struct sock.") Signed-off-by: Eric Biggers Acked-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/socket.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/socket.c b/net/socket.c index 5c820212ba81..18d27b8c2511 100644 --- a/net/socket.c +++ b/net/socket.c @@ -577,6 +577,7 @@ static void __sock_release(struct socket *sock, struct inode *inode) if (inode) inode_lock(inode); sock->ops->release(sock); + sock->sk = NULL; if (inode) inode_unlock(inode); sock->ops = NULL; -- cgit v1.2.3 From ab04570d82b164daca4917c459c152dfb3839448 Mon Sep 17 00:00:00 2001 From: Tung Nguyen Date: Mon, 25 Feb 2019 10:57:20 +0700 Subject: tipc: fix race condition causing hung sendto [ Upstream commit bfd07f3dd4f111b884d7922b37eb239280f83d8c ] When sending multicast messages via blocking socket, if sending link is congested (tsk->cong_link_cnt is set to 1), the sending thread will be put into sleeping state. However, tipc_sk_filter_rcv() is called under socket spin lock but tipc_wait_for_cond() is not. So, there is no guarantee that the setting of tsk->cong_link_cnt to 0 in tipc_sk_proto_rcv() in CPU-1 will be perceived by CPU-0. If that is the case, the sending thread in CPU-0 after being waken up, will continue to see tsk->cong_link_cnt as 1 and put the sending thread into sleeping state again. The sending thread will sleep forever. CPU-0 | CPU-1 tipc_wait_for_cond() | { | // condition_ = !tsk->cong_link_cnt | while ((rc_ = !(condition_))) { | ... | release_sock(sk_); | wait_woken(); | | if (!sock_owned_by_user(sk)) | tipc_sk_filter_rcv() | { | ... | tipc_sk_proto_rcv() | { | ... | tsk->cong_link_cnt--; | ... | sk->sk_write_space(sk); | ... | } | ... | } sched_annotate_sleep(); | lock_sock(sk_); | remove_wait_queue(); | } | } | This commit fixes it by adding memory barrier to tipc_sk_proto_rcv() and tipc_wait_for_cond(). Acked-by: Jon Maloy Signed-off-by: Tung Nguyen Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/tipc/socket.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index c5614d8e74fa..88c307ef1318 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -377,11 +377,13 @@ static int tipc_sk_sock_err(struct socket *sock, long *timeout) #define tipc_wait_for_cond(sock_, timeo_, condition_) \ ({ \ + DEFINE_WAIT_FUNC(wait_, woken_wake_function); \ struct sock *sk_; \ int rc_; \ \ while ((rc_ = !(condition_))) { \ - DEFINE_WAIT_FUNC(wait_, woken_wake_function); \ + /* coupled with smp_wmb() in tipc_sk_proto_rcv() */ \ + smp_rmb(); \ sk_ = (sock_)->sk; \ rc_ = tipc_sk_sock_err((sock_), timeo_); \ if (rc_) \ @@ -1961,6 +1963,8 @@ static void tipc_sk_proto_rcv(struct sock *sk, return; case SOCK_WAKEUP: tipc_dest_del(&tsk->cong_links, msg_orignode(hdr), 0); + /* coupled with smp_rmb() in tipc_wait_for_cond() */ + smp_wmb(); tsk->cong_link_cnt--; wakeup = true; break; -- cgit v1.2.3 From 488b940719b708c9fae904bac95d4db832bf05f0 Mon Sep 17 00:00:00 2001 From: Timur Celik Date: Sat, 23 Feb 2019 12:53:13 +0100 Subject: tun: fix blocking read [ Upstream commit 71828b2240692cec0e68b8d867bc00e1745e7fae ] This patch moves setting of the current state into the loop. Otherwise the task may end up in a busy wait loop if none of the break conditions are met. Signed-off-by: Timur Celik Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/tun.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 65844f28db30..27ac30da7e31 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -2122,9 +2122,9 @@ static void *tun_ring_recv(struct tun_file *tfile, int noblock, int *err) } add_wait_queue(&tfile->wq.wait, &wait); - current->state = TASK_INTERRUPTIBLE; while (1) { + set_current_state(TASK_INTERRUPTIBLE); ptr = ptr_ring_consume(&tfile->tx_ring); if (ptr) break; @@ -2140,7 +2140,7 @@ static void *tun_ring_recv(struct tun_file *tfile, int noblock, int *err) schedule(); } - current->state = TASK_RUNNING; + set_current_state(TASK_RUNNING); remove_wait_queue(&tfile->wq.wait, &wait); out: -- cgit v1.2.3 From e5e5840183deac7b135b35c199968b3e61949851 Mon Sep 17 00:00:00 2001 From: Igor Druzhinin Date: Thu, 28 Feb 2019 14:11:26 +0000 Subject: xen-netback: don't populate the hash cache on XenBus disconnect [ Upstream commit a2288d4e355992d369c50c45d017a85f6061ff71 ] Occasionally, during the disconnection procedure on XenBus which includes hash cache deinitialization there might be some packets still in-flight on other processors. Handling of these packets includes hashing and hash cache population that finally results in hash cache data structure corruption. In order to avoid this we prevent hashing of those packets if there are no queues initialized. In that case RCU protection of queues guards the hash cache as well. Signed-off-by: Igor Druzhinin Reviewed-by: Paul Durrant Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/xen-netback/hash.c | 2 ++ drivers/net/xen-netback/interface.c | 7 +++++++ 2 files changed, 9 insertions(+) diff --git a/drivers/net/xen-netback/hash.c b/drivers/net/xen-netback/hash.c index 0ccb021f1e78..10d580c3dea3 100644 --- a/drivers/net/xen-netback/hash.c +++ b/drivers/net/xen-netback/hash.c @@ -454,6 +454,8 @@ void xenvif_init_hash(struct xenvif *vif) if (xenvif_hash_cache_size == 0) return; + BUG_ON(vif->hash.cache.count); + spin_lock_init(&vif->hash.cache.lock); INIT_LIST_HEAD(&vif->hash.cache.list); } diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index f6ae23fc3f6b..82add0ac4a5f 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -153,6 +153,13 @@ static u16 xenvif_select_queue(struct net_device *dev, struct sk_buff *skb, { struct xenvif *vif = netdev_priv(dev); unsigned int size = vif->hash.size; + unsigned int num_queues; + + /* If queues are not set up internally - always return 0 + * as the packet going to be dropped anyway */ + num_queues = READ_ONCE(vif->num_queues); + if (num_queues < 1) + return 0; if (vif->hash.alg == XEN_NETIF_CTRL_HASH_ALGORITHM_NONE) return fallback(dev, skb, NULL) % dev->real_num_tx_queues; -- cgit v1.2.3 From 947fc52b6bf470fda416fac80051c1b9d401ac59 Mon Sep 17 00:00:00 2001 From: Igor Druzhinin Date: Thu, 28 Feb 2019 12:48:03 +0000 Subject: xen-netback: fix occasional leak of grant ref mappings under memory pressure [ Upstream commit 99e87f56b48f490fb16b6e0f74691c1e664dea95 ] Zero-copy callback flag is not yet set on frag list skb at the moment xenvif_handle_frag_list() returns -ENOMEM. This eventually results in leaking grant ref mappings since xenvif_zerocopy_callback() is never called for these fragments. Those eventually build up and cause Xen to kill Dom0 as the slots get reused for new mappings: "d0v0 Attempt to implicitly unmap a granted PTE c010000329fce005" That behavior is observed under certain workloads where sudden spikes of page cache writes coexist with active atomic skb allocations from network traffic. Additionally, rework the logic to deal with frag_list deallocation in a single place. Signed-off-by: Paul Durrant Signed-off-by: Igor Druzhinin Acked-by: Wei Liu Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/xen-netback/netback.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 3621e05a7494..d5081ffdc8f0 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -1072,11 +1072,6 @@ static int xenvif_handle_frag_list(struct xenvif_queue *queue, struct sk_buff *s skb_frag_size_set(&frags[i], len); } - /* Copied all the bits from the frag list -- free it. */ - skb_frag_list_init(skb); - xenvif_skb_zerocopy_prepare(queue, nskb); - kfree_skb(nskb); - /* Release all the original (foreign) frags. */ for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) skb_frag_unref(skb, f); @@ -1145,6 +1140,8 @@ static int xenvif_tx_submit(struct xenvif_queue *queue) xenvif_fill_frags(queue, skb); if (unlikely(skb_has_frag_list(skb))) { + struct sk_buff *nskb = skb_shinfo(skb)->frag_list; + xenvif_skb_zerocopy_prepare(queue, nskb); if (xenvif_handle_frag_list(queue, skb)) { if (net_ratelimit()) netdev_err(queue->vif->dev, @@ -1153,6 +1150,9 @@ static int xenvif_tx_submit(struct xenvif_queue *queue) kfree_skb(skb); continue; } + /* Copied all the bits from the frag list -- free it. */ + skb_frag_list_init(skb); + kfree_skb(nskb); } skb->dev = queue->vif->dev; -- cgit v1.2.3 From e6620defc483b94bae5de691ca5797ae013cdf69 Mon Sep 17 00:00:00 2001 From: Timur Celik Date: Mon, 25 Feb 2019 21:13:13 +0100 Subject: tun: remove unnecessary memory barrier [ Upstream commit ecef67cb10db7b83b3b71c61dbb29aa070ab0112 ] Replace set_current_state with __set_current_state since no memory barrier is needed at this point. Signed-off-by: Timur Celik Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/tun.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 27ac30da7e31..f3293355c784 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -2140,7 +2140,7 @@ static void *tun_ring_recv(struct tun_file *tfile, int noblock, int *err) schedule(); } - set_current_state(TASK_RUNNING); + __set_current_state(TASK_RUNNING); remove_wait_queue(&tfile->wq.wait, &wait); out: -- cgit v1.2.3 From f2397468fbd7bb5ab8753cef694acf294d1bb383 Mon Sep 17 00:00:00 2001 From: Nazarov Sergey Date: Mon, 25 Feb 2019 19:24:15 +0300 Subject: net: Add __icmp_send helper. [ Upstream commit 9ef6b42ad6fd7929dd1b6092cb02014e382c6a91 ] Add __icmp_send function having ip_options struct parameter Signed-off-by: Sergey Nazarov Reviewed-by: Paul Moore Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/icmp.h | 9 ++++++++- net/ipv4/icmp.c | 7 ++++--- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/include/net/icmp.h b/include/net/icmp.h index 3ef2743a8eec..8665bf24e3b7 100644 --- a/include/net/icmp.h +++ b/include/net/icmp.h @@ -22,6 +22,7 @@ #include #include +#include struct icmp_err { int errno; @@ -39,7 +40,13 @@ struct net_proto_family; struct sk_buff; struct net; -void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info); +void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, + const struct ip_options *opt); +static inline void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) +{ + __icmp_send(skb_in, type, code, info, &IPCB(skb_in)->opt); +} + int icmp_rcv(struct sk_buff *skb); void icmp_err(struct sk_buff *skb, u32 info); int icmp_init(void); diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 695979b7ef6d..ad75c468ecfb 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -570,7 +570,8 @@ relookup_failed: * MUST reply to only the first fragment. */ -void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) +void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, + const struct ip_options *opt) { struct iphdr *iph; int room; @@ -691,7 +692,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) iph->tos; mark = IP4_REPLY_MARK(net, skb_in->mark); - if (ip_options_echo(net, &icmp_param.replyopts.opt.opt, skb_in)) + if (__ip_options_echo(net, &icmp_param.replyopts.opt.opt, skb_in, opt)) goto out_unlock; @@ -742,7 +743,7 @@ out_bh_enable: local_bh_enable(); out:; } -EXPORT_SYMBOL(icmp_send); +EXPORT_SYMBOL(__icmp_send); static void icmp_socket_deliver(struct sk_buff *skb, u32 info) -- cgit v1.2.3 From 125bc1e67eee3dd70edc3349068bfd997a118989 Mon Sep 17 00:00:00 2001 From: Nazarov Sergey Date: Mon, 25 Feb 2019 19:27:15 +0300 Subject: net: avoid use IPCB in cipso_v4_error [ Upstream commit 3da1ed7ac398f34fff1694017a07054d69c5f5c5 ] Extract IP options in cipso_v4_error and use __icmp_send. Signed-off-by: Sergey Nazarov Acked-by: Paul Moore Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/ip.h | 2 ++ net/ipv4/cipso_ipv4.c | 17 +++++++++++++++-- net/ipv4/ip_options.c | 22 +++++++++++++++++----- 3 files changed, 34 insertions(+), 7 deletions(-) diff --git a/include/net/ip.h b/include/net/ip.h index bf2209f861af..71d31e4d4391 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -638,6 +638,8 @@ static inline int ip_options_echo(struct net *net, struct ip_options *dopt, } void ip_options_fragment(struct sk_buff *skb); +int __ip_options_compile(struct net *net, struct ip_options *opt, + struct sk_buff *skb, __be32 *info); int ip_options_compile(struct net *net, struct ip_options *opt, struct sk_buff *skb); int ip_options_get(struct net *net, struct ip_options_rcu **optp, diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index f4b83de2263e..f0165c5f376b 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -1736,13 +1736,26 @@ validate_return: */ void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway) { + unsigned char optbuf[sizeof(struct ip_options) + 40]; + struct ip_options *opt = (struct ip_options *)optbuf; + if (ip_hdr(skb)->protocol == IPPROTO_ICMP || error != -EACCES) return; + /* + * We might be called above the IP layer, + * so we can not use icmp_send and IPCB here. + */ + + memset(opt, 0, sizeof(struct ip_options)); + opt->optlen = ip_hdr(skb)->ihl*4 - sizeof(struct iphdr); + if (__ip_options_compile(dev_net(skb->dev), opt, skb, NULL)) + return; + if (gateway) - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_ANO, 0); + __icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_ANO, 0, opt); else - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_ANO, 0); + __icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_ANO, 0, opt); } /** diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index ed194d46c00e..32a35043c9f5 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -251,8 +251,9 @@ static void spec_dst_fill(__be32 *spec_dst, struct sk_buff *skb) * If opt == NULL, then skb->data should point to IP header. */ -int ip_options_compile(struct net *net, - struct ip_options *opt, struct sk_buff *skb) +int __ip_options_compile(struct net *net, + struct ip_options *opt, struct sk_buff *skb, + __be32 *info) { __be32 spec_dst = htonl(INADDR_ANY); unsigned char *pp_ptr = NULL; @@ -468,11 +469,22 @@ eol: return 0; error: - if (skb) { - icmp_send(skb, ICMP_PARAMETERPROB, 0, htonl((pp_ptr-iph)<<24)); - } + if (info) + *info = htonl((pp_ptr-iph)<<24); return -EINVAL; } + +int ip_options_compile(struct net *net, + struct ip_options *opt, struct sk_buff *skb) +{ + int ret; + __be32 info; + + ret = __ip_options_compile(net, opt, skb, &info); + if (ret != 0 && skb) + icmp_send(skb, ICMP_PARAMETERPROB, 0, info); + return ret; +} EXPORT_SYMBOL(ip_options_compile); /* -- cgit v1.2.3 From 8c0aa3f6908c400d85830bf8bce06b7abb85451f Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 26 Feb 2019 09:00:02 -0800 Subject: ipv4: Return error for RTA_VIA attribute [ Upstream commit b6e9e5df4ecf100f6a10ab2ade8e46d47a4b9779 ] IPv4 currently does not support nexthops outside of the AF_INET family. Specifically, it does not handle RTA_VIA attribute. If it is passed in a route add request, the actual route added only uses the device which is clearly not what the user intended: $ ip ro add 172.16.1.0/24 via inet6 2001:db8:1::1 dev eth0 $ ip ro ls ... 172.16.1.0/24 dev eth0 Catch this and fail the route add: $ ip ro add 172.16.1.0/24 via inet6 2001:db8:1::1 dev eth0 Error: IPv4 does not support RTA_VIA attribute. Fixes: 03c0566542f4c ("mpls: Netlink commands to add, remove, and dump routes") Signed-off-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/fib_frontend.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 958e185a8e8d..dae743b649c1 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -700,6 +700,10 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, case RTA_GATEWAY: cfg->fc_gw = nla_get_be32(attr); break; + case RTA_VIA: + NL_SET_ERR_MSG(extack, "IPv4 does not support RTA_VIA attribute"); + err = -EINVAL; + goto errout; case RTA_PRIORITY: cfg->fc_priority = nla_get_u32(attr); break; -- cgit v1.2.3 From a68d31cc530649a714bd309dde6d12083231a518 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 26 Feb 2019 09:00:03 -0800 Subject: ipv6: Return error for RTA_VIA attribute [ Upstream commit e3818541b49fb88650ba339d33cc53e4095da5b3 ] IPv6 currently does not support nexthops outside of the AF_INET6 family. Specifically, it does not handle RTA_VIA attribute. If it is passed in a route add request, the actual route added only uses the device which is clearly not what the user intended: $ ip -6 ro add 2001:db8:2::/64 via inet 172.16.1.1 dev eth0 $ ip ro ls ... 2001:db8:2::/64 dev eth0 metric 1024 pref medium Catch this and fail the route add: $ ip -6 ro add 2001:db8:2::/64 via inet 172.16.1.1 dev eth0 Error: IPv6 does not support RTA_VIA attribute. Fixes: 03c0566542f4c ("mpls: Netlink commands to add, remove, and dump routes") Signed-off-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/route.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index cf4a8c25458f..ba59a9c14e02 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -4213,6 +4213,10 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]); cfg->fc_flags |= RTF_GATEWAY; } + if (tb[RTA_VIA]) { + NL_SET_ERR_MSG(extack, "IPv6 does not support RTA_VIA attribute"); + goto errout; + } if (tb[RTA_DST]) { int plen = (rtm->rtm_dst_len + 7) >> 3; -- cgit v1.2.3 From 4f3221dee1e1230555cbf24c2d5907f470b5e10e Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 26 Feb 2019 09:00:04 -0800 Subject: mpls: Return error for RTA_GATEWAY attribute [ Upstream commit be48220edd48ca0d569782992840488a52373a24 ] MPLS does not support nexthops with an MPLS address family. Specifically, it does not handle RTA_GATEWAY attribute. Make it clear by returning an error. Fixes: 03c0566542f4c ("mpls: Netlink commands to add, remove, and dump routes") Signed-off-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/mpls/af_mpls.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 8fbe6cdbe255..d5a4db5b3fe7 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -1822,6 +1822,9 @@ static int rtm_to_route_config(struct sk_buff *skb, goto errout; break; } + case RTA_GATEWAY: + NL_SET_ERR_MSG(extack, "MPLS does not support RTA_GATEWAY attribute"); + goto errout; case RTA_VIA: { if (nla_get_via(nla, &cfg->rc_via_alen, -- cgit v1.2.3 From cc211561d12656dff6d73bc61094cb6366cac4e6 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 25 Feb 2019 13:55:48 -0800 Subject: ipv4: Pass original device to ip_rcv_finish_core [ Upstream commit a1fd1ad2552fad9e649eeb85fd79301e2880a886 ] ip_route_input_rcu expects the original ingress device (e.g., for proper multicast handling). The skb->dev can be changed by l3mdev_ip_rcv, so dev needs to be saved prior to calling it. This was the behavior prior to the listify changes. Fixes: 5fa12739a53d0 ("net: ipv4: listify ip_rcv_finish") Cc: Edward Cree Signed-off-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_input.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 6f977b0fef54..bd8ef4f87c79 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -308,11 +308,10 @@ drop: } static int ip_rcv_finish_core(struct net *net, struct sock *sk, - struct sk_buff *skb) + struct sk_buff *skb, struct net_device *dev) { const struct iphdr *iph = ip_hdr(skb); int (*edemux)(struct sk_buff *skb); - struct net_device *dev = skb->dev; struct rtable *rt; int err; @@ -401,6 +400,7 @@ drop_error: static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { + struct net_device *dev = skb->dev; int ret; /* if ingress device is enslaved to an L3 master device pass the @@ -410,7 +410,7 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) if (!skb) return NET_RX_SUCCESS; - ret = ip_rcv_finish_core(net, sk, skb); + ret = ip_rcv_finish_core(net, sk, skb, dev); if (ret != NET_RX_DROP) ret = dst_input(skb); return ret; @@ -550,6 +550,7 @@ static void ip_list_rcv_finish(struct net *net, struct sock *sk, INIT_LIST_HEAD(&sublist); list_for_each_entry_safe(skb, next, head, list) { + struct net_device *dev = skb->dev; struct dst_entry *dst; skb_list_del_init(skb); @@ -559,7 +560,7 @@ static void ip_list_rcv_finish(struct net *net, struct sock *sk, skb = l3mdev_ip_rcv(skb); if (!skb) continue; - if (ip_rcv_finish_core(net, sk, skb) == NET_RX_DROP) + if (ip_rcv_finish_core(net, sk, skb, dev) == NET_RX_DROP) continue; dst = skb_dst(skb); -- cgit v1.2.3 From 457c1190c65cff2bf98295a9122379a0eb7e7564 Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Thu, 28 Feb 2019 22:14:33 +0100 Subject: net: dsa: mv88e6xxx: power serdes on/off for 10G interfaces on 6390X [ Upstream commit d235c48b40d399328585a68f3f9bf7cc3062d586 ] Upon setting the cmode on 6390 and 6390X, the associated serdes interfaces must be powered off/on. Both 6390X and 6390 share code to do so, but it currently uses the 6390 specific helper mv88e6390_serdes_power() to disable and enable the serdes interface. This call will fail silently on 6390X when trying so set a 10G interface such as XAUI or RXAUI, since mv88e6390_serdes_power() internally grabs the lane number based on modes supported by the 6390, and returns 0 when getting -ENODEV as a lane number. Using mv88e6390x_serdes_power() should be safe here, since we explicitly rule-out all ports but the 9 and 10, and because modes supported by 6390 ports 9 and 10 are a subset of those supported on 6390X. This was tested on 6390X using RXAUI mode. Fixes: 364e9d7776a3 ("net: dsa: mv88e6xxx: Power on/off SERDES on cmode change") Signed-off-by: Maxime Chevallier Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/dsa/mv88e6xxx/port.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/port.c b/drivers/net/dsa/mv88e6xxx/port.c index 2c75460ad57a..d5212e4ecaab 100644 --- a/drivers/net/dsa/mv88e6xxx/port.c +++ b/drivers/net/dsa/mv88e6xxx/port.c @@ -384,7 +384,7 @@ int mv88e6390x_port_set_cmode(struct mv88e6xxx_chip *chip, int port, return err; } - err = mv88e6390_serdes_power(chip, port, false); + err = mv88e6390x_serdes_power(chip, port, false); if (err) return err; @@ -400,7 +400,7 @@ int mv88e6390x_port_set_cmode(struct mv88e6xxx_chip *chip, int port, if (err) return err; - err = mv88e6390_serdes_power(chip, port, true); + err = mv88e6390x_serdes_power(chip, port, true); if (err) return err; -- cgit v1.2.3 From 4d8f5df0cbe926bb43a5cf1dff19b2c72f924395 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Thu, 28 Feb 2019 07:39:15 +0100 Subject: net: dsa: mv88e6xxx: prevent interrupt storm caused by mv88e6390x_port_set_cmode [ Upstream commit ed8fe20205ac054bf585156709de3913d1890f30 ] When debugging another issue I faced an interrupt storm in this driver (88E6390, port 9 in SGMII mode), consisting of alternating link-up / link-down interrupts. Analysis showed that the driver wanted to set a cmode that was set already. But so far mv88e6390x_port_set_cmode() doesn't check this and powers down SERDES, what causes the link to break, and eventually results in the described interrupt storm. Fix this by checking whether the cmode actually changes. We want that the very first call to mv88e6390x_port_set_cmode() always configures the registers, therefore initialize port.cmode with a value that is different from any supported cmode value. We have to take care that we only init the ports cmode once chip->info->num_ports is set. v2: - add small helper and init the number of actual ports only Fixes: 364e9d7776a3 ("net: dsa: mv88e6xxx: Power on/off SERDES on cmode change") Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/dsa/mv88e6xxx/chip.c | 10 ++++++++++ drivers/net/dsa/mv88e6xxx/port.c | 4 ++++ drivers/net/dsa/mv88e6xxx/port.h | 1 + 3 files changed, 15 insertions(+) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index d045d3cd00a6..c078c791f481 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -4561,6 +4561,14 @@ static int mv88e6xxx_smi_init(struct mv88e6xxx_chip *chip, return 0; } +static void mv88e6xxx_ports_cmode_init(struct mv88e6xxx_chip *chip) +{ + int i; + + for (i = 0; i < mv88e6xxx_num_ports(chip); i++) + chip->ports[i].cmode = MV88E6XXX_PORT_STS_CMODE_INVALID; +} + static enum dsa_tag_protocol mv88e6xxx_get_tag_protocol(struct dsa_switch *ds, int port) { @@ -4597,6 +4605,8 @@ static const char *mv88e6xxx_drv_probe(struct device *dsa_dev, if (err) goto free; + mv88e6xxx_ports_cmode_init(chip); + mutex_lock(&chip->reg_lock); err = mv88e6xxx_switch_reset(chip); mutex_unlock(&chip->reg_lock); diff --git a/drivers/net/dsa/mv88e6xxx/port.c b/drivers/net/dsa/mv88e6xxx/port.c index d5212e4ecaab..7fffce734f0a 100644 --- a/drivers/net/dsa/mv88e6xxx/port.c +++ b/drivers/net/dsa/mv88e6xxx/port.c @@ -374,6 +374,10 @@ int mv88e6390x_port_set_cmode(struct mv88e6xxx_chip *chip, int port, cmode = 0; } + /* cmode doesn't change, nothing to do for us */ + if (cmode == chip->ports[port].cmode) + return 0; + lane = mv88e6390x_serdes_get_lane(chip, port); if (lane < 0) return lane; diff --git a/drivers/net/dsa/mv88e6xxx/port.h b/drivers/net/dsa/mv88e6xxx/port.h index b31910023bb6..95b59f5eb393 100644 --- a/drivers/net/dsa/mv88e6xxx/port.h +++ b/drivers/net/dsa/mv88e6xxx/port.h @@ -52,6 +52,7 @@ #define MV88E6185_PORT_STS_CMODE_1000BASE_X 0x0005 #define MV88E6185_PORT_STS_CMODE_PHY 0x0006 #define MV88E6185_PORT_STS_CMODE_DISABLED 0x0007 +#define MV88E6XXX_PORT_STS_CMODE_INVALID 0xff /* Offset 0x01: MAC (or PCS or Physical) Control Register */ #define MV88E6XXX_PORT_MAC_CTL 0x01 -- cgit v1.2.3 From f1446b164925f6c993328efe892457934764cdbf Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Fri, 22 Feb 2019 12:33:25 +0100 Subject: net/sched: act_ipt: fix refcount leak when replace fails [ Upstream commit 8f67c90ee9148eab3d2b4393c3cf76489b27f87c ] After commit 4e8ddd7f1758 ("net: sched: don't release reference on action overwrite"), the error path of all actions was converted to drop refcount also when the action was being overwritten. But we forgot act_ipt_init(), in case allocation of 'tname' was not successful: # tc action add action xt -j LOG --log-prefix hello index 100 tablename: mangle hook: NF_IP_POST_ROUTING target: LOG level warning prefix "hello" index 100 # tc action show action xt total acts 1 action order 0: tablename: mangle hook: NF_IP_POST_ROUTING target LOG level warning prefix "hello" index 100 ref 1 bind 0 # tc action replace action xt -j LOG --log-prefix world index 100 tablename: mangle hook: NF_IP_POST_ROUTING target: LOG level warning prefix "world" index 100 RTNETLINK answers: Cannot allocate memory We have an error talking to the kernel # tc action show action xt total acts 1 action order 0: tablename: mangle hook: NF_IP_POST_ROUTING target LOG level warning prefix "hello" index 100 ref 2 bind 0 Ensure we call tcf_idr_release(), in case 'tname' allocation failed, also when the action is being replaced. Fixes: 4e8ddd7f1758 ("net: sched: don't release reference on action overwrite") Signed-off-by: Davide Caratti Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/act_ipt.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 8525de811616..334f3a057671 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -199,8 +199,7 @@ err3: err2: kfree(tname); err1: - if (ret == ACT_P_CREATED) - tcf_idr_release(*a, bind); + tcf_idr_release(*a, bind); return err; } -- cgit v1.2.3 From 69e6fb1804cf679457d384e1bfc519a1f1da9553 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Fri, 22 Feb 2019 12:33:26 +0100 Subject: net/sched: act_skbedit: fix refcount leak when replace fails [ Upstream commit 6191da98062d25276a3b88fb2a94dcbcfb3ea65d ] when act_skbedit was converted to use RCU in the data plane, we added an error path, but we forgot to drop the action refcount in case of failure during a 'replace' operation: # tc actions add action skbedit ptype otherhost pass index 100 # tc action show action skbedit total acts 1 action order 0: skbedit ptype otherhost pass index 100 ref 1 bind 0 # tc actions replace action skbedit ptype otherhost drop index 100 RTNETLINK answers: Cannot allocate memory We have an error talking to the kernel # tc action show action skbedit total acts 1 action order 0: skbedit ptype otherhost pass index 100 ref 2 bind 0 Ensure we call tcf_idr_release(), in case 'params_new' allocation failed, also when the action is being replaced. Fixes: c749cdda9089 ("net/sched: act_skbedit: don't use spinlock in the data path") Signed-off-by: Davide Caratti Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/act_skbedit.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index 73e44ce2a883..86d90fc5e97e 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -191,8 +191,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, params_new = kzalloc(sizeof(*params_new), GFP_KERNEL); if (unlikely(!params_new)) { - if (ret == ACT_P_CREATED) - tcf_idr_release(*a, bind); + tcf_idr_release(*a, bind); return -ENOMEM; } -- cgit v1.2.3 From 3846080998b93b300237ed8cef5694f8cec5a4d0 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Mon, 25 Feb 2019 17:28:27 +0200 Subject: net: sched: act_tunnel_key: fix NULL pointer dereference during init [ Upstream commit a3df633a3c92bb96b06552c3f828d7c267774379 ] Metadata pointer is only initialized for action TCA_TUNNEL_KEY_ACT_SET, but it is unconditionally dereferenced in tunnel_key_init() error handler. Verify that metadata pointer is not NULL before dereferencing it in tunnel_key_init error handling code. Fixes: ee28bb56ac5b ("net/sched: fix memory leak in act_tunnel_key_init()") Signed-off-by: Vlad Buslov Reviewed-by: Davide Caratti Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/act_tunnel_key.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index 0f6601fdf889..72d9c432e8b4 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -377,7 +377,8 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, return ret; release_tun_meta: - dst_release(&metadata->dst); + if (metadata) + dst_release(&metadata->dst); err_out: if (exists) -- cgit v1.2.3 From eab5ea25ebee29b2bd8cefe98f12afe2159cf530 Mon Sep 17 00:00:00 2001 From: Jiaxun Yang Date: Tue, 20 Nov 2018 11:00:18 +0800 Subject: x86/CPU/AMD: Set the CPB bit unconditionally on F17h commit 0237199186e7a4aa5310741f0a6498a20c820fd7 upstream. Some F17h models do not have CPB set in CPUID even though the CPU supports it. Set the feature bit unconditionally on all F17h. [ bp: Rewrite commit message and patch. ] Signed-off-by: Jiaxun Yang Signed-off-by: Borislav Petkov Acked-by: Tom Lendacky Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Sherry Hurwitz Cc: Suravee Suthikulpanit Cc: Thomas Gleixner Cc: x86-ml Link: https://lkml.kernel.org/r/20181120030018.5185-1-jiaxun.yang@flygoat.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/amd.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index eeea634bee0a..6a25278e0092 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -818,11 +818,9 @@ static void init_amd_bd(struct cpuinfo_x86 *c) static void init_amd_zn(struct cpuinfo_x86 *c) { set_cpu_cap(c, X86_FEATURE_ZEN); - /* - * Fix erratum 1076: CPB feature bit not being set in CPUID. It affects - * all up to and including B1. - */ - if (c->x86_model <= 1 && c->x86_stepping <= 1) + + /* Fix erratum 1076: CPB feature bit not being set in CPUID. */ + if (!cpu_has(c, X86_FEATURE_CPB)) set_cpu_cap(c, X86_FEATURE_CPB); } -- cgit v1.2.3 From 51c5318058013f06bd5e85166d60d4dbfb9a2072 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Tue, 19 Feb 2019 10:52:24 +0300 Subject: x86/boot/compressed/64: Do not read legacy ROM on EFI system commit 6f913de3231e1d70a871135b38219da7810df218 upstream. EFI systems do not necessarily provide a legacy ROM. If the ROM is missing the memory is not mapped at all. Trying to dereference values in the legacy ROM area leads to a crash on Macbook Pro. Only look for values in the legacy ROM area for non-EFI system. Fixes: 3548e131ec6a ("x86/boot/compressed/64: Find a place for 32-bit trampoline") Reported-by: Pitam Mitra Signed-off-by: Kirill A. Shutemov Signed-off-by: Thomas Gleixner Tested-by: Bockjoo Kim Cc: bp@alien8.de Cc: hpa@zytor.com Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20190219075224.35058-1-kirill.shutemov@linux.intel.com Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=202351 Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/pgtable_64.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c index 9e2157371491..f8debf7aeb4c 100644 --- a/arch/x86/boot/compressed/pgtable_64.c +++ b/arch/x86/boot/compressed/pgtable_64.c @@ -1,5 +1,7 @@ +#include #include #include +#include #include "pgtable.h" #include "../string.h" @@ -37,9 +39,10 @@ int cmdline_find_option_bool(const char *option); static unsigned long find_trampoline_placement(void) { - unsigned long bios_start, ebda_start; + unsigned long bios_start = 0, ebda_start = 0; unsigned long trampoline_start; struct boot_e820_entry *entry; + char *signature; int i; /* @@ -47,8 +50,18 @@ static unsigned long find_trampoline_placement(void) * This code is based on reserve_bios_regions(). */ - ebda_start = *(unsigned short *)0x40e << 4; - bios_start = *(unsigned short *)0x413 << 10; + /* + * EFI systems may not provide legacy ROM. The memory may not be mapped + * at all. + * + * Only look for values in the legacy ROM for non-EFI system. + */ + signature = (char *)&boot_params->efi_info.efi_loader_signature; + if (strncmp(signature, EFI32_LOADER_SIGNATURE, 4) && + strncmp(signature, EFI64_LOADER_SIGNATURE, 4)) { + ebda_start = *(unsigned short *)0x40e << 4; + bios_start = *(unsigned short *)0x413 << 10; + } if (bios_start < BIOS_START_MIN || bios_start > BIOS_START_MAX) bios_start = BIOS_START_MAX; -- cgit v1.2.3 From 690e939da71d6b38a04b605300fc05e22ad991a3 Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Thu, 23 Aug 2018 13:25:34 +0300 Subject: tracing: Fix event filters and triggers to handle negative numbers commit 6a072128d262d2b98d31626906a96700d1fc11eb upstream. Then tracing syscall exit event it is extremely useful to filter exit codes equal to some negative value, to react only to required errors. But negative numbers does not work: [root@snorch sys_exit_read]# echo "ret == -1" > filter bash: echo: write error: Invalid argument [root@snorch sys_exit_read]# cat filter ret == -1 ^ parse_error: Invalid value (did you forget quotes)? Similar thing happens when setting triggers. These is a regression in v4.17 introduced by the commit mentioned below, testing without these commit shows no problem with negative numbers. Link: http://lkml.kernel.org/r/20180823102534.7642-1-ptikhomirov@virtuozzo.com Cc: stable@vger.kernel.org Fixes: 80765597bc58 ("tracing: Rewrite filter logic to be simpler and faster") Signed-off-by: Pavel Tikhomirov Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_events_filter.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 5574e862de8d..5a1c64a26e81 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -1301,7 +1301,7 @@ static int parse_pred(const char *str, void *data, /* go past the last quote */ i++; - } else if (isdigit(str[i])) { + } else if (isdigit(str[i]) || str[i] == '-') { /* Make sure the field is not a string */ if (is_string_field(field)) { @@ -1314,6 +1314,9 @@ static int parse_pred(const char *str, void *data, goto err_free; } + if (str[i] == '-') + i++; + /* We allow 0xDEADBEEF */ while (isalnum(str[i])) i++; -- cgit v1.2.3 From 9d53e36c8c098544fc62e7868ebb989168110c4c Mon Sep 17 00:00:00 2001 From: Balaji Manoharan Date: Wed, 20 Feb 2019 19:50:53 +0200 Subject: usb: xhci: Fix for Enabling USB ROLE SWITCH QUIRK on INTEL_SUNRISEPOINT_LP_XHCI commit 8fde481ef3674ae5ad0dbfef4df18ff507c5675a upstream. This fix enables USB role feature on intel commercial nuc platform which is based on Kabylake chipset. Signed-off-by: Balaji Manoharan Reviewed-by: Hans de Goede Reviewed-by: Heikki Krogerus Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 09bf6b4b741b..1493d0fdf5ad 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -187,6 +187,7 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) xhci->quirks |= XHCI_SSIC_PORT_UNUSED; if (pdev->vendor == PCI_VENDOR_ID_INTEL && (pdev->device == PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI || + pdev->device == PCI_DEVICE_ID_INTEL_SUNRISEPOINT_LP_XHCI || pdev->device == PCI_DEVICE_ID_INTEL_APL_XHCI)) xhci->quirks |= XHCI_INTEL_USB_ROLE_SW; if (pdev->vendor == PCI_VENDOR_ID_INTEL && -- cgit v1.2.3 From 5691b93f686e6551a71b27af93b4edc234062e63 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 9 Jan 2019 16:05:10 -0600 Subject: applicom: Fix potential Spectre v1 vulnerabilities commit d7ac3c6ef5d8ce14b6381d52eb7adafdd6c8bb3c upstream. IndexCard is indirectly controlled by user-space, hence leading to a potential exploitation of the Spectre variant 1 vulnerability. This issue was detected with the help of Smatch: drivers/char/applicom.c:418 ac_write() warn: potential spectre issue 'apbs' [r] drivers/char/applicom.c:728 ac_ioctl() warn: potential spectre issue 'apbs' [r] (local cap) Fix this by sanitizing IndexCard before using it to index apbs. Notice that given that speculation windows are large, the policy is to kill the speculation on the first load and not worry if it can be completed with a dependent load/store [1]. [1] https://lore.kernel.org/lkml/20180423164740.GY17484@dhcp22.suse.cz/ Cc: stable@vger.kernel.org Signed-off-by: Gustavo A. R. Silva Signed-off-by: Greg Kroah-Hartman --- drivers/char/applicom.c | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/drivers/char/applicom.c b/drivers/char/applicom.c index c0a5b1f3a986..4ccc39e00ced 100644 --- a/drivers/char/applicom.c +++ b/drivers/char/applicom.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -386,7 +387,11 @@ static ssize_t ac_write(struct file *file, const char __user *buf, size_t count, TicCard = st_loc.tic_des_from_pc; /* tic number to send */ IndexCard = NumCard - 1; - if((NumCard < 1) || (NumCard > MAX_BOARD) || !apbs[IndexCard].RamIO) + if (IndexCard >= MAX_BOARD) + return -EINVAL; + IndexCard = array_index_nospec(IndexCard, MAX_BOARD); + + if (!apbs[IndexCard].RamIO) return -EINVAL; #ifdef DEBUG @@ -697,6 +702,7 @@ static long ac_ioctl(struct file *file, unsigned int cmd, unsigned long arg) unsigned char IndexCard; void __iomem *pmem; int ret = 0; + static int warncount = 10; volatile unsigned char byte_reset_it; struct st_ram_io *adgl; void __user *argp = (void __user *)arg; @@ -711,16 +717,12 @@ static long ac_ioctl(struct file *file, unsigned int cmd, unsigned long arg) mutex_lock(&ac_mutex); IndexCard = adgl->num_card-1; - if(cmd != 6 && ((IndexCard >= MAX_BOARD) || !apbs[IndexCard].RamIO)) { - static int warncount = 10; - if (warncount) { - printk( KERN_WARNING "APPLICOM driver IOCTL, bad board number %d\n",(int)IndexCard+1); - warncount--; - } - kfree(adgl); - mutex_unlock(&ac_mutex); - return -EINVAL; - } + if (cmd != 6 && IndexCard >= MAX_BOARD) + goto err; + IndexCard = array_index_nospec(IndexCard, MAX_BOARD); + + if (cmd != 6 && !apbs[IndexCard].RamIO) + goto err; switch (cmd) { @@ -838,5 +840,16 @@ static long ac_ioctl(struct file *file, unsigned int cmd, unsigned long arg) kfree(adgl); mutex_unlock(&ac_mutex); return 0; + +err: + if (warncount) { + pr_warn("APPLICOM driver IOCTL, bad board number %d\n", + (int)IndexCard + 1); + warncount--; + } + kfree(adgl); + mutex_unlock(&ac_mutex); + return -EINVAL; + } -- cgit v1.2.3 From 88793c034edff193c63828f79d0a9e24e49025a8 Mon Sep 17 00:00:00 2001 From: Liu Xiang Date: Sat, 16 Feb 2019 17:12:24 +0800 Subject: MIPS: irq: Allocate accurate order pages for irq stack commit 72faa7a773ca59336f3c889e878de81445c5a85c upstream. The irq_pages is the number of pages for irq stack, but not the order which is needed by __get_free_pages(). We can use get_order() to calculate the accurate order. Signed-off-by: Liu Xiang Signed-off-by: Paul Burton Fixes: fe8bd18ffea5 ("MIPS: Introduce irq_stack") Cc: linux-mips@vger.kernel.org Cc: stable@vger.kernel.org # v4.11+ Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/irq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/mips/kernel/irq.c b/arch/mips/kernel/irq.c index ba150c755fcc..85b6c60f285d 100644 --- a/arch/mips/kernel/irq.c +++ b/arch/mips/kernel/irq.c @@ -52,6 +52,7 @@ asmlinkage void spurious_interrupt(void) void __init init_IRQ(void) { int i; + unsigned int order = get_order(IRQ_STACK_SIZE); for (i = 0; i < NR_IRQS; i++) irq_set_noprobe(i); @@ -62,8 +63,7 @@ void __init init_IRQ(void) arch_init_irq(); for_each_possible_cpu(i) { - int irq_pages = IRQ_STACK_SIZE / PAGE_SIZE; - void *s = (void *)__get_free_pages(GFP_KERNEL, irq_pages); + void *s = (void *)__get_free_pages(GFP_KERNEL, order); irq_stack[i] = s; pr_debug("CPU%d IRQ stack at 0x%p - 0x%p\n", i, -- cgit v1.2.3 From f5e66cdb51fd352d7091cb2516b24734e524cb33 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 8 Feb 2019 16:59:49 -0800 Subject: aio: Fix locking in aio_poll() commit d3d6a18d7d351cbcc9b33dbedf710e65f8ce1595 upstream. wake_up_locked() may but does not have to be called with interrupts disabled. Since the fuse filesystem calls wake_up_locked() without disabling interrupts aio_poll_wake() may be called with interrupts enabled. Since the kioctx.ctx_lock may be acquired from IRQ context, all code that acquires that lock from thread context must disable interrupts. Hence change the spin_trylock() call in aio_poll_wake() into a spin_trylock_irqsave() call. This patch fixes the following lockdep complaint: ===================================================== WARNING: SOFTIRQ-safe -> SOFTIRQ-unsafe lock order detected 5.0.0-rc4-next-20190131 #23 Not tainted ----------------------------------------------------- syz-executor2/13779 [HC0[0]:SC0[0]:HE0:SE1] is trying to acquire: 0000000098ac1230 (&fiq->waitq){+.+.}, at: spin_lock include/linux/spinlock.h:329 [inline] 0000000098ac1230 (&fiq->waitq){+.+.}, at: aio_poll fs/aio.c:1772 [inline] 0000000098ac1230 (&fiq->waitq){+.+.}, at: __io_submit_one fs/aio.c:1875 [inline] 0000000098ac1230 (&fiq->waitq){+.+.}, at: io_submit_one+0xedf/0x1cf0 fs/aio.c:1908 and this task is already holding: 000000003c46111c (&(&ctx->ctx_lock)->rlock){..-.}, at: spin_lock_irq include/linux/spinlock.h:354 [inline] 000000003c46111c (&(&ctx->ctx_lock)->rlock){..-.}, at: aio_poll fs/aio.c:1771 [inline] 000000003c46111c (&(&ctx->ctx_lock)->rlock){..-.}, at: __io_submit_one fs/aio.c:1875 [inline] 000000003c46111c (&(&ctx->ctx_lock)->rlock){..-.}, at: io_submit_one+0xeb6/0x1cf0 fs/aio.c:1908 which would create a new lock dependency: (&(&ctx->ctx_lock)->rlock){..-.} -> (&fiq->waitq){+.+.} but this new dependency connects a SOFTIRQ-irq-safe lock: (&(&ctx->ctx_lock)->rlock){..-.} ... which became SOFTIRQ-irq-safe at: lock_acquire+0x16f/0x3f0 kernel/locking/lockdep.c:3826 __raw_spin_lock_irq include/linux/spinlock_api_smp.h:128 [inline] _raw_spin_lock_irq+0x60/0x80 kernel/locking/spinlock.c:160 spin_lock_irq include/linux/spinlock.h:354 [inline] free_ioctx_users+0x2d/0x4a0 fs/aio.c:610 percpu_ref_put_many include/linux/percpu-refcount.h:285 [inline] percpu_ref_put include/linux/percpu-refcount.h:301 [inline] percpu_ref_call_confirm_rcu lib/percpu-refcount.c:123 [inline] percpu_ref_switch_to_atomic_rcu+0x3e7/0x520 lib/percpu-refcount.c:158 __rcu_reclaim kernel/rcu/rcu.h:240 [inline] rcu_do_batch kernel/rcu/tree.c:2486 [inline] invoke_rcu_callbacks kernel/rcu/tree.c:2799 [inline] rcu_core+0x928/0x1390 kernel/rcu/tree.c:2780 __do_softirq+0x266/0x95a kernel/softirq.c:292 run_ksoftirqd kernel/softirq.c:654 [inline] run_ksoftirqd+0x8e/0x110 kernel/softirq.c:646 smpboot_thread_fn+0x6ab/0xa10 kernel/smpboot.c:164 kthread+0x357/0x430 kernel/kthread.c:247 ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:352 to a SOFTIRQ-irq-unsafe lock: (&fiq->waitq){+.+.} ... which became SOFTIRQ-irq-unsafe at: ... lock_acquire+0x16f/0x3f0 kernel/locking/lockdep.c:3826 __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline] _raw_spin_lock+0x2f/0x40 kernel/locking/spinlock.c:144 spin_lock include/linux/spinlock.h:329 [inline] flush_bg_queue+0x1f3/0x3c0 fs/fuse/dev.c:415 fuse_request_queue_background+0x2d1/0x580 fs/fuse/dev.c:676 fuse_request_send_background+0x58/0x120 fs/fuse/dev.c:687 fuse_send_init fs/fuse/inode.c:989 [inline] fuse_fill_super+0x13bb/0x1730 fs/fuse/inode.c:1214 mount_nodev+0x68/0x110 fs/super.c:1392 fuse_mount+0x2d/0x40 fs/fuse/inode.c:1239 legacy_get_tree+0xf2/0x200 fs/fs_context.c:590 vfs_get_tree+0x123/0x450 fs/super.c:1481 do_new_mount fs/namespace.c:2610 [inline] do_mount+0x1436/0x2c40 fs/namespace.c:2932 ksys_mount+0xdb/0x150 fs/namespace.c:3148 __do_sys_mount fs/namespace.c:3162 [inline] __se_sys_mount fs/namespace.c:3159 [inline] __x64_sys_mount+0xbe/0x150 fs/namespace.c:3159 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe other info that might help us debug this: Possible interrupt unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&fiq->waitq); local_irq_disable(); lock(&(&ctx->ctx_lock)->rlock); lock(&fiq->waitq); lock(&(&ctx->ctx_lock)->rlock); *** DEADLOCK *** 1 lock held by syz-executor2/13779: #0: 000000003c46111c (&(&ctx->ctx_lock)->rlock){..-.}, at: spin_lock_irq include/linux/spinlock.h:354 [inline] #0: 000000003c46111c (&(&ctx->ctx_lock)->rlock){..-.}, at: aio_poll fs/aio.c:1771 [inline] #0: 000000003c46111c (&(&ctx->ctx_lock)->rlock){..-.}, at: __io_submit_one fs/aio.c:1875 [inline] #0: 000000003c46111c (&(&ctx->ctx_lock)->rlock){..-.}, at: io_submit_one+0xeb6/0x1cf0 fs/aio.c:1908 the dependencies between SOFTIRQ-irq-safe lock and the holding lock: -> (&(&ctx->ctx_lock)->rlock){..-.} { IN-SOFTIRQ-W at: lock_acquire+0x16f/0x3f0 kernel/locking/lockdep.c:3826 __raw_spin_lock_irq include/linux/spinlock_api_smp.h:128 [inline] _raw_spin_lock_irq+0x60/0x80 kernel/locking/spinlock.c:160 spin_lock_irq include/linux/spinlock.h:354 [inline] free_ioctx_users+0x2d/0x4a0 fs/aio.c:610 percpu_ref_put_many include/linux/percpu-refcount.h:285 [inline] percpu_ref_put include/linux/percpu-refcount.h:301 [inline] percpu_ref_call_confirm_rcu lib/percpu-refcount.c:123 [inline] percpu_ref_switch_to_atomic_rcu+0x3e7/0x520 lib/percpu-refcount.c:158 __rcu_reclaim kernel/rcu/rcu.h:240 [inline] rcu_do_batch kernel/rcu/tree.c:2486 [inline] invoke_rcu_callbacks kernel/rcu/tree.c:2799 [inline] rcu_core+0x928/0x1390 kernel/rcu/tree.c:2780 __do_softirq+0x266/0x95a kernel/softirq.c:292 run_ksoftirqd kernel/softirq.c:654 [inline] run_ksoftirqd+0x8e/0x110 kernel/softirq.c:646 smpboot_thread_fn+0x6ab/0xa10 kernel/smpboot.c:164 kthread+0x357/0x430 kernel/kthread.c:247 ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:352 INITIAL USE at: lock_acquire+0x16f/0x3f0 kernel/locking/lockdep.c:3826 __raw_spin_lock_irq include/linux/spinlock_api_smp.h:128 [inline] _raw_spin_lock_irq+0x60/0x80 kernel/locking/spinlock.c:160 spin_lock_irq include/linux/spinlock.h:354 [inline] __do_sys_io_cancel fs/aio.c:2052 [inline] __se_sys_io_cancel fs/aio.c:2035 [inline] __x64_sys_io_cancel+0xd5/0x5a0 fs/aio.c:2035 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe } ... key at: [] __key.52370+0x0/0x40 ... acquired at: lock_acquire+0x16f/0x3f0 kernel/locking/lockdep.c:3826 __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline] _raw_spin_lock+0x2f/0x40 kernel/locking/spinlock.c:144 spin_lock include/linux/spinlock.h:329 [inline] aio_poll fs/aio.c:1772 [inline] __io_submit_one fs/aio.c:1875 [inline] io_submit_one+0xedf/0x1cf0 fs/aio.c:1908 __do_sys_io_submit fs/aio.c:1953 [inline] __se_sys_io_submit fs/aio.c:1923 [inline] __x64_sys_io_submit+0x1bd/0x580 fs/aio.c:1923 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe the dependencies between the lock to be acquired and SOFTIRQ-irq-unsafe lock: -> (&fiq->waitq){+.+.} { HARDIRQ-ON-W at: lock_acquire+0x16f/0x3f0 kernel/locking/lockdep.c:3826 __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline] _raw_spin_lock+0x2f/0x40 kernel/locking/spinlock.c:144 spin_lock include/linux/spinlock.h:329 [inline] flush_bg_queue+0x1f3/0x3c0 fs/fuse/dev.c:415 fuse_request_queue_background+0x2d1/0x580 fs/fuse/dev.c:676 fuse_request_send_background+0x58/0x120 fs/fuse/dev.c:687 fuse_send_init fs/fuse/inode.c:989 [inline] fuse_fill_super+0x13bb/0x1730 fs/fuse/inode.c:1214 mount_nodev+0x68/0x110 fs/super.c:1392 fuse_mount+0x2d/0x40 fs/fuse/inode.c:1239 legacy_get_tree+0xf2/0x200 fs/fs_context.c:590 vfs_get_tree+0x123/0x450 fs/super.c:1481 do_new_mount fs/namespace.c:2610 [inline] do_mount+0x1436/0x2c40 fs/namespace.c:2932 ksys_mount+0xdb/0x150 fs/namespace.c:3148 __do_sys_mount fs/namespace.c:3162 [inline] __se_sys_mount fs/namespace.c:3159 [inline] __x64_sys_mount+0xbe/0x150 fs/namespace.c:3159 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe SOFTIRQ-ON-W at: lock_acquire+0x16f/0x3f0 kernel/locking/lockdep.c:3826 __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline] _raw_spin_lock+0x2f/0x40 kernel/locking/spinlock.c:144 spin_lock include/linux/spinlock.h:329 [inline] flush_bg_queue+0x1f3/0x3c0 fs/fuse/dev.c:415 fuse_request_queue_background+0x2d1/0x580 fs/fuse/dev.c:676 fuse_request_send_background+0x58/0x120 fs/fuse/dev.c:687 fuse_send_init fs/fuse/inode.c:989 [inline] fuse_fill_super+0x13bb/0x1730 fs/fuse/inode.c:1214 mount_nodev+0x68/0x110 fs/super.c:1392 fuse_mount+0x2d/0x40 fs/fuse/inode.c:1239 legacy_get_tree+0xf2/0x200 fs/fs_context.c:590 vfs_get_tree+0x123/0x450 fs/super.c:1481 do_new_mount fs/namespace.c:2610 [inline] do_mount+0x1436/0x2c40 fs/namespace.c:2932 ksys_mount+0xdb/0x150 fs/namespace.c:3148 __do_sys_mount fs/namespace.c:3162 [inline] __se_sys_mount fs/namespace.c:3159 [inline] __x64_sys_mount+0xbe/0x150 fs/namespace.c:3159 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe INITIAL USE at: lock_acquire+0x16f/0x3f0 kernel/locking/lockdep.c:3826 __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline] _raw_spin_lock+0x2f/0x40 kernel/locking/spinlock.c:144 spin_lock include/linux/spinlock.h:329 [inline] flush_bg_queue+0x1f3/0x3c0 fs/fuse/dev.c:415 fuse_request_queue_background+0x2d1/0x580 fs/fuse/dev.c:676 fuse_request_send_background+0x58/0x120 fs/fuse/dev.c:687 fuse_send_init fs/fuse/inode.c:989 [inline] fuse_fill_super+0x13bb/0x1730 fs/fuse/inode.c:1214 mount_nodev+0x68/0x110 fs/super.c:1392 fuse_mount+0x2d/0x40 fs/fuse/inode.c:1239 legacy_get_tree+0xf2/0x200 fs/fs_context.c:590 vfs_get_tree+0x123/0x450 fs/super.c:1481 do_new_mount fs/namespace.c:2610 [inline] do_mount+0x1436/0x2c40 fs/namespace.c:2932 ksys_mount+0xdb/0x150 fs/namespace.c:3148 __do_sys_mount fs/namespace.c:3162 [inline] __se_sys_mount fs/namespace.c:3159 [inline] __x64_sys_mount+0xbe/0x150 fs/namespace.c:3159 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe } ... key at: [] __key.43450+0x0/0x40 ... acquired at: lock_acquire+0x16f/0x3f0 kernel/locking/lockdep.c:3826 __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline] _raw_spin_lock+0x2f/0x40 kernel/locking/spinlock.c:144 spin_lock include/linux/spinlock.h:329 [inline] aio_poll fs/aio.c:1772 [inline] __io_submit_one fs/aio.c:1875 [inline] io_submit_one+0xedf/0x1cf0 fs/aio.c:1908 __do_sys_io_submit fs/aio.c:1953 [inline] __se_sys_io_submit fs/aio.c:1923 [inline] __x64_sys_io_submit+0x1bd/0x580 fs/aio.c:1923 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe stack backtrace: CPU: 0 PID: 13779 Comm: syz-executor2 Not tainted 5.0.0-rc4-next-20190131 #23 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x172/0x1f0 lib/dump_stack.c:113 print_bad_irq_dependency kernel/locking/lockdep.c:1573 [inline] check_usage.cold+0x60f/0x940 kernel/locking/lockdep.c:1605 check_irq_usage kernel/locking/lockdep.c:1650 [inline] check_prev_add_irq kernel/locking/lockdep_states.h:8 [inline] check_prev_add kernel/locking/lockdep.c:1860 [inline] check_prevs_add kernel/locking/lockdep.c:1968 [inline] validate_chain kernel/locking/lockdep.c:2339 [inline] __lock_acquire+0x1f12/0x4790 kernel/locking/lockdep.c:3320 lock_acquire+0x16f/0x3f0 kernel/locking/lockdep.c:3826 __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline] _raw_spin_lock+0x2f/0x40 kernel/locking/spinlock.c:144 spin_lock include/linux/spinlock.h:329 [inline] aio_poll fs/aio.c:1772 [inline] __io_submit_one fs/aio.c:1875 [inline] io_submit_one+0xedf/0x1cf0 fs/aio.c:1908 __do_sys_io_submit fs/aio.c:1953 [inline] __se_sys_io_submit fs/aio.c:1923 [inline] __x64_sys_io_submit+0x1bd/0x580 fs/aio.c:1923 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe Reported-by: syzbot Cc: Christoph Hellwig Cc: Avi Kivity Cc: Miklos Szeredi Cc: Fixes: e8693bcfa0b4 ("aio: allow direct aio poll comletions for keyed wakeups") # v4.19 Signed-off-by: Miklos Szeredi [ bvanassche: added a comment ] Reluctantly-Acked-by: Christoph Hellwig Signed-off-by: Bart Van Assche Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/aio.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/fs/aio.c b/fs/aio.c index 44551d96eaa4..45d5ef8dd0a8 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -1661,6 +1661,7 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, struct poll_iocb *req = container_of(wait, struct poll_iocb, wait); struct aio_kiocb *iocb = container_of(req, struct aio_kiocb, poll); __poll_t mask = key_to_poll(key); + unsigned long flags; req->woken = true; @@ -1669,10 +1670,15 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, if (!(mask & req->events)) return 0; - /* try to complete the iocb inline if we can: */ - if (spin_trylock(&iocb->ki_ctx->ctx_lock)) { + /* + * Try to complete the iocb inline if we can. Use + * irqsave/irqrestore because not all filesystems (e.g. fuse) + * call this function with IRQs disabled and because IRQs + * have to be disabled before ctx_lock is obtained. + */ + if (spin_trylock_irqsave(&iocb->ki_ctx->ctx_lock, flags)) { list_del(&iocb->ki_list); - spin_unlock(&iocb->ki_ctx->ctx_lock); + spin_unlock_irqrestore(&iocb->ki_ctx->ctx_lock, flags); list_del_init(&req->wait.entry); aio_poll_complete(iocb, mask); -- cgit v1.2.3 From c426de69ded0d41c7fe56302597c4d5bf9c734fc Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Wed, 2 Jan 2019 01:08:32 -0800 Subject: xtensa: fix get_wchan commit d90b88fd3653f1fb66ecc6571b860d5a5749fa56 upstream. Stack unwinding is implemented incorrectly in xtensa get_wchan: instead of extracting a0 and a1 registers from the spill location under the stack pointer it extracts a word pointed to by the stack pointer and subtracts 4 or 3 from it. Cc: stable@vger.kernel.org Signed-off-by: Max Filippov Signed-off-by: Greg Kroah-Hartman --- arch/xtensa/kernel/process.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c index 4bb68133a72a..5a0e0bd68b76 100644 --- a/arch/xtensa/kernel/process.c +++ b/arch/xtensa/kernel/process.c @@ -320,8 +320,8 @@ unsigned long get_wchan(struct task_struct *p) /* Stack layout: sp-4: ra, sp-3: sp' */ - pc = MAKE_PC_FROM_RA(*(unsigned long*)sp - 4, sp); - sp = *(unsigned long *)sp - 3; + pc = MAKE_PC_FROM_RA(SPILL_SLOT(sp, 0), sp); + sp = SPILL_SLOT(sp, 1); } while (count++ < 16); return 0; } -- cgit v1.2.3 From 09675c2f84bc6fff4d4bb0d4587876d5daa0b107 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 22 Jan 2019 18:22:53 +0100 Subject: gnss: sirf: fix premature wakeup interrupt enable commit 82f844c22588bf47132c82faeda50b6db473162c upstream. Make sure the receiver is powered (and booted) before enabling the wakeup interrupt to avoid spurious interrupts due to a floating input. Similarly, disable the interrupt before powering off on probe errors and on unbind. Fixes: d2efbbd18b1e ("gnss: add driver for sirfstar-based receivers") Cc: stable # 4.19 Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/gnss/sirf.c | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/drivers/gnss/sirf.c b/drivers/gnss/sirf.c index 2c22836d3ffd..4596fde16dfe 100644 --- a/drivers/gnss/sirf.c +++ b/drivers/gnss/sirf.c @@ -310,30 +310,26 @@ static int sirf_probe(struct serdev_device *serdev) ret = -ENODEV; goto err_put_device; } + + ret = regulator_enable(data->vcc); + if (ret) + goto err_put_device; + + /* Wait for chip to boot into hibernate mode. */ + msleep(SIRF_BOOT_DELAY); } if (data->wakeup) { ret = gpiod_to_irq(data->wakeup); if (ret < 0) - goto err_put_device; - + goto err_disable_vcc; data->irq = ret; - ret = devm_request_threaded_irq(dev, data->irq, NULL, - sirf_wakeup_handler, + ret = request_threaded_irq(data->irq, NULL, sirf_wakeup_handler, IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING | IRQF_ONESHOT, "wakeup", data); if (ret) - goto err_put_device; - } - - if (data->on_off) { - ret = regulator_enable(data->vcc); - if (ret) - goto err_put_device; - - /* Wait for chip to boot into hibernate mode */ - msleep(SIRF_BOOT_DELAY); + goto err_disable_vcc; } if (IS_ENABLED(CONFIG_PM)) { @@ -342,7 +338,7 @@ static int sirf_probe(struct serdev_device *serdev) } else { ret = sirf_runtime_resume(dev); if (ret < 0) - goto err_disable_vcc; + goto err_free_irq; } ret = gnss_register_device(gdev); @@ -356,6 +352,9 @@ err_disable_rpm: pm_runtime_disable(dev); else sirf_runtime_suspend(dev); +err_free_irq: + if (data->wakeup) + free_irq(data->irq, data); err_disable_vcc: if (data->on_off) regulator_disable(data->vcc); @@ -376,6 +375,9 @@ static void sirf_remove(struct serdev_device *serdev) else sirf_runtime_suspend(&serdev->dev); + if (data->wakeup) + free_irq(data->irq, data); + if (data->on_off) regulator_disable(data->vcc); -- cgit v1.2.3 From 9765ec7f7a02299d8508f85085519d32ac330b82 Mon Sep 17 00:00:00 2001 From: Karoly Pados Date: Sun, 17 Feb 2019 18:59:01 +0100 Subject: USB: serial: cp210x: fix GPIO in autosuspend commit 7b0b644b9aa2de5032db0f468fddca091d0b7b90 upstream. Current GPIO code in cp210x fails to take USB autosuspend into account, making it practically impossible to use GPIOs with autosuspend enabled without user configuration. Fix this like for ftdi_sio in a previous patch. Tested on a CP2102N. Signed-off-by: Karoly Pados Fixes: cf5276ce7867 ("USB: serial: cp210x: Adding GPIO support for CP2105") Cc: stable Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/cp210x.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index 3286ed462fc5..4c66edf533fe 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -1354,8 +1354,13 @@ static int cp210x_gpio_get(struct gpio_chip *gc, unsigned int gpio) if (priv->partnum == CP210X_PARTNUM_CP2105) req_type = REQTYPE_INTERFACE_TO_HOST; + result = usb_autopm_get_interface(serial->interface); + if (result) + return result; + result = cp210x_read_vendor_block(serial, req_type, CP210X_READ_LATCH, &buf, sizeof(buf)); + usb_autopm_put_interface(serial->interface); if (result < 0) return result; @@ -1376,6 +1381,10 @@ static void cp210x_gpio_set(struct gpio_chip *gc, unsigned int gpio, int value) buf.mask = BIT(gpio); + result = usb_autopm_get_interface(serial->interface); + if (result) + goto out; + if (priv->partnum == CP210X_PARTNUM_CP2105) { result = cp210x_write_vendor_block(serial, REQTYPE_HOST_TO_INTERFACE, @@ -1393,6 +1402,8 @@ static void cp210x_gpio_set(struct gpio_chip *gc, unsigned int gpio, int value) NULL, 0, USB_CTRL_SET_TIMEOUT); } + usb_autopm_put_interface(serial->interface); +out: if (result < 0) { dev_err(&serial->interface->dev, "failed to set GPIO value: %d\n", result); -- cgit v1.2.3 From cd61d473f71f0188cbc5c5d0bd6dbdb8ec96018c Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Thu, 7 Feb 2019 11:06:02 -0800 Subject: selftests: firmware: fix verify_reqs() return value commit 344c0152d878922365464b7140c74c2a5e073d99 upstream. commit a6a9be9270c87 ("selftests: firmware: return Kselftest Skip code for skipped tests") by Shuah modified failures to return the special error code of $ksft_skip (4). We have a corner case issue where we *do* want to verify_reqs(). Cc: # >= 4.18 Fixes: a6a9be9270c87 ("selftests: firmware: return Kselftest Skip code for for skipped tests") Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/firmware/fw_lib.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/firmware/fw_lib.sh b/tools/testing/selftests/firmware/fw_lib.sh index 6c5f1b2ffb74..1cbb12e284a6 100755 --- a/tools/testing/selftests/firmware/fw_lib.sh +++ b/tools/testing/selftests/firmware/fw_lib.sh @@ -91,7 +91,7 @@ verify_reqs() if [ "$TEST_REQS_FW_SYSFS_FALLBACK" = "yes" ]; then if [ ! "$HAS_FW_LOADER_USER_HELPER" = "yes" ]; then echo "usermode helper disabled so ignoring test" - exit $ksft_skip + exit 0 fi fi } -- cgit v1.2.3 From 43593a30a068da533d48b638d452016fe9de3abc Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Sun, 27 Jan 2019 16:33:59 +0800 Subject: Bluetooth: btrtl: Restore old logic to assume firmware is already loaded commit 00df214b1faae520880cc5c57e206f21239ef741 upstream. Realtek bluetooth may not work after reboot: [ 12.446130] Bluetooth: hci0: RTL: rtl: unknown IC info, lmp subver a99e, hci rev 826c, hci ver 0008 This is a regression introduced by commit 26503ad25de8 ("Bluetooth: btrtl: split the device initialization into smaller parts"). The new logic errors out early when no matching IC info can be found, in this case it means the firmware is already loaded. So let's assume the firmware is already loaded when we can't find matching IC info, like the old logic did. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=201921 Fixes: 26503ad25de8 ("Bluetooth: btrtl: split the device initialization into smaller parts") Cc: stable@vger.kernel.org # 4.19+ Signed-off-by: Kai-Heng Feng Signed-off-by: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/btrtl.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/bluetooth/btrtl.c b/drivers/bluetooth/btrtl.c index 7f9ea8e4c1b2..1342f8e6025c 100644 --- a/drivers/bluetooth/btrtl.c +++ b/drivers/bluetooth/btrtl.c @@ -544,10 +544,9 @@ struct btrtl_device_info *btrtl_initialize(struct hci_dev *hdev, hdev->bus); if (!btrtl_dev->ic_info) { - rtl_dev_err(hdev, "rtl: unknown IC info, lmp subver %04x, hci rev %04x, hci ver %04x", + rtl_dev_info(hdev, "rtl: unknown IC info, lmp subver %04x, hci rev %04x, hci ver %04x", lmp_subver, hci_rev, hci_ver); - ret = -EINVAL; - goto err_free; + return btrtl_dev; } if (btrtl_dev->ic_info->has_rom_version) { @@ -602,6 +601,11 @@ int btrtl_download_firmware(struct hci_dev *hdev, * standard btusb. Once that firmware is uploaded, the subver changes * to a different value. */ + if (!btrtl_dev->ic_info) { + rtl_dev_info(hdev, "rtl: assuming no firmware upload needed\n"); + return 0; + } + switch (btrtl_dev->ic_info->lmp_subver) { case RTL_ROM_LMP_8723A: case RTL_ROM_LMP_3499: -- cgit v1.2.3 From 8d368fc58e7aeb42b39d7bec7c585efdfbc49074 Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Wed, 2 Jan 2019 16:11:20 -0800 Subject: Bluetooth: Fix locking in bt_accept_enqueue() for BH context commit c4f5627f7eeecde1bb6b646d8c0907b96dc2b2a6 upstream. With commit e16337622016 ("Bluetooth: Handle bt_accept_enqueue() socket atomically") lock_sock[_nested]() is used to acquire the socket lock before manipulating the socket. lock_sock[_nested]() may block, which is problematic since bt_accept_enqueue() can be called in bottom half context (e.g. from rfcomm_connect_ind()): [] __might_sleep+0x4c/0x80 [] lock_sock_nested+0x24/0x58 [] bt_accept_enqueue+0x48/0xd4 [bluetooth] [] rfcomm_connect_ind+0x190/0x218 [rfcomm] Add a parameter to bt_accept_enqueue() to indicate whether the function is called from BH context, and acquire the socket lock with bh_lock_sock_nested() if that's the case. Also adapt all callers of bt_accept_enqueue() to pass the new parameter: - l2cap_sock_new_connection_cb() - uses lock_sock() to lock the parent socket => process context - rfcomm_connect_ind() - acquires the parent socket lock with bh_lock_sock() => BH context - __sco_chan_add() - called from sco_chan_add(), which is called from sco_connect(). parent is NULL, hence bt_accept_enqueue() isn't called in this code path and we can ignore it - also called from sco_conn_ready(). uses bh_lock_sock() to acquire the parent lock => BH context Fixes: e16337622016 ("Bluetooth: Handle bt_accept_enqueue() socket atomically") Signed-off-by: Matthias Kaehlcke Reviewed-by: Douglas Anderson Signed-off-by: Marcel Holtmann Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- include/net/bluetooth/bluetooth.h | 2 +- net/bluetooth/af_bluetooth.c | 16 +++++++++++++--- net/bluetooth/l2cap_sock.c | 2 +- net/bluetooth/rfcomm/sock.c | 2 +- net/bluetooth/sco.c | 2 +- 5 files changed, 17 insertions(+), 7 deletions(-) diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index ec9d6bc65855..fabee6db0abb 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -276,7 +276,7 @@ int bt_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); int bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo); int bt_sock_wait_ready(struct sock *sk, unsigned long flags); -void bt_accept_enqueue(struct sock *parent, struct sock *sk); +void bt_accept_enqueue(struct sock *parent, struct sock *sk, bool bh); void bt_accept_unlink(struct sock *sk); struct sock *bt_accept_dequeue(struct sock *parent, struct socket *newsock); diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index deacc52d7ff1..8d12198eaa94 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -154,15 +154,25 @@ void bt_sock_unlink(struct bt_sock_list *l, struct sock *sk) } EXPORT_SYMBOL(bt_sock_unlink); -void bt_accept_enqueue(struct sock *parent, struct sock *sk) +void bt_accept_enqueue(struct sock *parent, struct sock *sk, bool bh) { BT_DBG("parent %p, sk %p", parent, sk); sock_hold(sk); - lock_sock_nested(sk, SINGLE_DEPTH_NESTING); + + if (bh) + bh_lock_sock_nested(sk); + else + lock_sock_nested(sk, SINGLE_DEPTH_NESTING); + list_add_tail(&bt_sk(sk)->accept_q, &bt_sk(parent)->accept_q); bt_sk(sk)->parent = parent; - release_sock(sk); + + if (bh) + bh_unlock_sock(sk); + else + release_sock(sk); + parent->sk_ack_backlog++; } EXPORT_SYMBOL(bt_accept_enqueue); diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 686bdc6b35b0..a3a2cd55e23a 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -1252,7 +1252,7 @@ static struct l2cap_chan *l2cap_sock_new_connection_cb(struct l2cap_chan *chan) l2cap_sock_init(sk, parent); - bt_accept_enqueue(parent, sk); + bt_accept_enqueue(parent, sk, false); release_sock(parent); diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index d606e9212291..c044ff2f73e6 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -988,7 +988,7 @@ int rfcomm_connect_ind(struct rfcomm_session *s, u8 channel, struct rfcomm_dlc * rfcomm_pi(sk)->channel = channel; sk->sk_state = BT_CONFIG; - bt_accept_enqueue(parent, sk); + bt_accept_enqueue(parent, sk, true); /* Accept connection and return socket DLC */ *d = rfcomm_pi(sk)->dlc; diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 8f0f9279eac9..a4ca55df7390 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -193,7 +193,7 @@ static void __sco_chan_add(struct sco_conn *conn, struct sock *sk, conn->sk = sk; if (parent) - bt_accept_enqueue(parent, sk); + bt_accept_enqueue(parent, sk, true); } static int sco_chan_add(struct sco_conn *conn, struct sock *sk, -- cgit v1.2.3 From b60d90b2d3d14c426693a0a34041db11be66d29e Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Tue, 19 Feb 2019 10:10:38 +0800 Subject: exec: Fix mem leak in kernel_read_file commit f612acfae86af7ecad754ae6a46019be9da05b8e upstream. syzkaller report this: BUG: memory leak unreferenced object 0xffffc9000488d000 (size 9195520): comm "syz-executor.0", pid 2752, jiffies 4294787496 (age 18.757s) hex dump (first 32 bytes): ff ff ff ff ff ff ff ff a8 00 00 00 01 00 00 00 ................ 02 00 00 00 00 00 00 00 80 a1 7a c1 ff ff ff ff ..........z..... backtrace: [<000000000863775c>] __vmalloc_node mm/vmalloc.c:1795 [inline] [<000000000863775c>] __vmalloc_node_flags mm/vmalloc.c:1809 [inline] [<000000000863775c>] vmalloc+0x8c/0xb0 mm/vmalloc.c:1831 [<000000003f668111>] kernel_read_file+0x58f/0x7d0 fs/exec.c:924 [<000000002385813f>] kernel_read_file_from_fd+0x49/0x80 fs/exec.c:993 [<0000000011953ff1>] __do_sys_finit_module+0x13b/0x2a0 kernel/module.c:3895 [<000000006f58491f>] do_syscall_64+0x147/0x600 arch/x86/entry/common.c:290 [<00000000ee78baf4>] entry_SYSCALL_64_after_hwframe+0x49/0xbe [<00000000241f889b>] 0xffffffffffffffff It should goto 'out_free' lable to free allocated buf while kernel_read fails. Fixes: 39d637af5aa7 ("vfs: forbid write access when reading a file into memory") Signed-off-by: YueHaibing Signed-off-by: Al Viro Cc: Thibaut Sautereau Signed-off-by: Greg Kroah-Hartman --- fs/exec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/exec.c b/fs/exec.c index 1ebf6e5a521d..433b1257694a 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -929,7 +929,7 @@ int kernel_read_file(struct file *file, void **buf, loff_t *size, bytes = kernel_read(file, *buf + pos, i_size - pos, &pos); if (bytes < 0) { ret = bytes; - goto out; + goto out_free; } if (bytes == 0) -- cgit v1.2.3 From ebfb07e8c23e0c0bb4ffd9cb26adec992ac47911 Mon Sep 17 00:00:00 2001 From: Martin Wilck Date: Thu, 14 Feb 2019 22:57:41 +0100 Subject: scsi: core: reset host byte in DID_NEXUS_FAILURE case commit 4a067cf823d9d8e50d41cfb618011c0d4a969c72 upstream. Up to 4.12, __scsi_error_from_host_byte() would reset the host byte to DID_OK for various cases including DID_NEXUS_FAILURE. Commit 2a842acab109 ("block: introduce new block status code type") replaced this function with scsi_result_to_blk_status() and removed the host-byte resetting code for the DID_NEXUS_FAILURE case. As the line set_host_byte(cmd, DID_OK) was preserved for the other cases, I suppose this was an editing mistake. The fact that the host byte remains set after 4.13 is causing problems with the sg_persist tool, which now returns success rather then exit status 24 when a RESERVATION CONFLICT error is encountered. Fixes: 2a842acab109 "block: introduce new block status code type" Signed-off-by: Martin Wilck Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/scsi_lib.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index b5f638286037..18e4289baf99 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -757,6 +757,7 @@ static blk_status_t scsi_result_to_blk_status(struct scsi_cmnd *cmd, int result) set_host_byte(cmd, DID_OK); return BLK_STS_TARGET; case DID_NEXUS_FAILURE: + set_host_byte(cmd, DID_OK); return BLK_STS_NEXUS; case DID_ALLOC_FAILURE: set_host_byte(cmd, DID_OK); -- cgit v1.2.3 From ca490a987365252c2785fc13714c8e4219363122 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 1 Mar 2019 22:05:29 +0100 Subject: bpf: fix sanitation rewrite in case of non-pointers commit 3612af783cf52c74a031a2f11b82247b2599d3cd upstream. Marek reported that he saw an issue with the below snippet in that timing measurements where off when loaded as unpriv while results were reasonable when loaded as privileged: [...] uint64_t a = bpf_ktime_get_ns(); uint64_t b = bpf_ktime_get_ns(); uint64_t delta = b - a; if ((int64_t)delta > 0) { [...] Turns out there is a bug where a corner case is missing in the fix d3bd7413e0ca ("bpf: fix sanitation of alu op with pointer / scalar type from different paths"), namely fixup_bpf_calls() only checks whether aux has a non-zero alu_state, but it also needs to test for the case of BPF_ALU_NON_POINTER since in both occasions we need to skip the masking rewrite (as there is nothing to mask). Fixes: d3bd7413e0ca ("bpf: fix sanitation of alu op with pointer / scalar type from different paths") Reported-by: Marek Majkowski Reported-by: Arthur Fabre Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/netdev/CAJPywTJqP34cK20iLM5YmUMz9KXQOdu1-+BZrGMAGgLuBWz7fg@mail.gmail.com/T/ Acked-by: Song Liu Signed-off-by: Alexei Starovoitov Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/verifier.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 4d81be2d0739..bcb42aaf1b3a 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -6035,7 +6035,8 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) u32 off_reg; aux = &env->insn_aux_data[i + delta]; - if (!aux->alu_state) + if (!aux->alu_state || + aux->alu_state == BPF_ALU_NON_POINTER) continue; isneg = aux->alu_state & BPF_ALU_NEG_VALUE; -- cgit v1.2.3 From 6a31767f84ad31445865f1297d49937319f775c3 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 10 Mar 2019 07:17:22 +0100 Subject: Linux 4.19.28 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 70ed9a53558a..c6ac023ba33a 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 19 -SUBLEVEL = 27 +SUBLEVEL = 28 EXTRAVERSION = NAME = "People's Front" -- cgit v1.2.3 From ac8befb6dd601fd35c1d64167750c6698bc27c80 Mon Sep 17 00:00:00 2001 From: Alistair Strachan Date: Tue, 18 Dec 2018 20:32:48 -0500 Subject: media: uvcvideo: Fix 'type' check leading to overflow commit 47bb117911b051bbc90764a8bff96543cbd2005f upstream. When initially testing the Camera Terminal Descriptor wTerminalType field (buffer[4]), no mask is used. Later in the function, the MSB is overloaded to store the descriptor subtype, and so a mask of 0x7fff is used to check the type. If a descriptor is specially crafted to set this overloaded bit in the original wTerminalType field, the initial type check will fail (falling through, without adjusting the buffer size), but the later type checks will pass, assuming the buffer has been made suitably large, causing an overflow. Avoid this problem by checking for the MSB in the wTerminalType field. If the bit is set, assume the descriptor is bad, and abort parsing it. Originally reported here: https://groups.google.com/forum/#!topic/syzkaller/Ot1fOE6v1d8 A similar (non-compiling) patch was provided at that time. Reported-by: syzbot Signed-off-by: Alistair Strachan Signed-off-by: Laurent Pinchart Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/uvc/uvc_driver.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/media/usb/uvc/uvc_driver.c b/drivers/media/usb/uvc/uvc_driver.c index 361abbc00486..6f1fd40fce10 100644 --- a/drivers/media/usb/uvc/uvc_driver.c +++ b/drivers/media/usb/uvc/uvc_driver.c @@ -1065,11 +1065,19 @@ static int uvc_parse_standard_control(struct uvc_device *dev, return -EINVAL; } - /* Make sure the terminal type MSB is not null, otherwise it - * could be confused with a unit. + /* + * Reject invalid terminal types that would cause issues: + * + * - The high byte must be non-zero, otherwise it would be + * confused with a unit. + * + * - Bit 15 must be 0, as we use it internally as a terminal + * direction flag. + * + * Other unknown types are accepted. */ type = get_unaligned_le16(&buffer[4]); - if ((type & 0xff00) == 0) { + if ((type & 0x7f00) == 0 || (type & 0x8000) != 0) { uvc_trace(UVC_TRACE_DESCR, "device %d videocontrol " "interface %d INPUT_TERMINAL %d has invalid " "type 0x%04x, skipping\n", udev->devnum, -- cgit v1.2.3 From 8ce41db0dcfc5c8884064ce0d8fde8a74ee27a91 Mon Sep 17 00:00:00 2001 From: Su Yanjun Date: Sun, 6 Jan 2019 21:31:20 -0500 Subject: vti4: Fix a ipip packet processing bug in 'IPCOMP' virtual tunnel [ Upstream commit dd9ee3444014e8f28c0eefc9fffc9ac9c5248c12 ] Recently we run a network test over ipcomp virtual tunnel.We find that if a ipv4 packet needs fragment, then the peer can't receive it. We deep into the code and find that when packet need fragment the smaller fragment will be encapsulated by ipip not ipcomp. So when the ipip packet goes into xfrm, it's skb->dev is not properly set. The ipv4 reassembly code always set skb'dev to the last fragment's dev. After ipv4 defrag processing, when the kernel rp_filter parameter is set, the skb will be drop by -EXDEV error. This patch adds compatible support for the ipip process in ipcomp virtual tunnel. Signed-off-by: Su Yanjun Signed-off-by: Steffen Klassert Signed-off-by: Sasha Levin --- net/ipv4/ip_vti.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 7f56944b020f..40a7cd56e008 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -74,6 +74,33 @@ drop: return 0; } +static int vti_input_ipip(struct sk_buff *skb, int nexthdr, __be32 spi, + int encap_type) +{ + struct ip_tunnel *tunnel; + const struct iphdr *iph = ip_hdr(skb); + struct net *net = dev_net(skb->dev); + struct ip_tunnel_net *itn = net_generic(net, vti_net_id); + + tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, + iph->saddr, iph->daddr, 0); + if (tunnel) { + if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) + goto drop; + + XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = tunnel; + + skb->dev = tunnel->dev; + + return xfrm_input(skb, nexthdr, spi, encap_type); + } + + return -EINVAL; +drop: + kfree_skb(skb); + return 0; +} + static int vti_rcv(struct sk_buff *skb) { XFRM_SPI_SKB_CB(skb)->family = AF_INET; @@ -82,6 +109,14 @@ static int vti_rcv(struct sk_buff *skb) return vti_input(skb, ip_hdr(skb)->protocol, 0, 0); } +static int vti_rcv_ipip(struct sk_buff *skb) +{ + XFRM_SPI_SKB_CB(skb)->family = AF_INET; + XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr); + + return vti_input_ipip(skb, ip_hdr(skb)->protocol, ip_hdr(skb)->saddr, 0); +} + static int vti_rcv_cb(struct sk_buff *skb, int err) { unsigned short family; @@ -435,6 +470,12 @@ static struct xfrm4_protocol vti_ipcomp4_protocol __read_mostly = { .priority = 100, }; +static struct xfrm_tunnel ipip_handler __read_mostly = { + .handler = vti_rcv_ipip, + .err_handler = vti4_err, + .priority = 0, +}; + static int __net_init vti_init_net(struct net *net) { int err; @@ -603,6 +644,13 @@ static int __init vti_init(void) if (err < 0) goto xfrm_proto_comp_failed; + msg = "ipip tunnel"; + err = xfrm4_tunnel_register(&ipip_handler, AF_INET); + if (err < 0) { + pr_info("%s: cant't register tunnel\n",__func__); + goto xfrm_tunnel_failed; + } + msg = "netlink interface"; err = rtnl_link_register(&vti_link_ops); if (err < 0) @@ -612,6 +660,8 @@ static int __init vti_init(void) rtnl_link_failed: xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP); +xfrm_tunnel_failed: + xfrm4_tunnel_deregister(&ipip_handler, AF_INET); xfrm_proto_comp_failed: xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH); xfrm_proto_ah_failed: -- cgit v1.2.3 From 5d1dc10ba3ac70d384b60baa343511551c0fe89c Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 17 Jan 2019 11:48:34 -0800 Subject: perf script: Fix crash with printing mixed trace point and other events [ Upstream commit 96167167b6e17b25c0e05ecc31119b73baeab094 ] 'perf script' crashes currently when printing mixed trace points and other events because the trace format does not handle events without trace meta data. Add a simple check to avoid that. % cat > test.c main() { printf("Hello world\n"); } ^D % gcc -g -o test test.c % sudo perf probe -x test 'test.c:3' % perf record -e '{cpu/cpu-cycles,period=10000/,probe_test:main}:S' ./test % perf script Committer testing: Before: # perf probe -x /lib64/libc-2.28.so malloc Added new event: probe_libc:malloc (on malloc in /usr/lib64/libc-2.28.so) You can now use it in all perf tools, such as: perf record -e probe_libc:malloc -aR sleep 1 # perf probe -l probe_libc:malloc (on __libc_malloc@malloc/malloc.c in /usr/lib64/libc-2.28.so) # perf record -e '{cpu/cpu-cycles,period=10000/,probe_libc:*}:S' sleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.023 MB perf.data (40 samples) ] # perf script Segmentation fault (core dumped) ^C # After: # perf script | head -6 sleep 2888 94796.944981: 16198 cpu/cpu-cycles,period=10000/: ffffffff925dc04f get_random_u32+0x1f (/lib/modules/5.0.0-rc2+/build/vmlinux) sleep 2888 [-01] 94796.944981: probe_libc:malloc: sleep 2888 94796.944983: 4713 cpu/cpu-cycles,period=10000/: ffffffff922763af change_protection+0xcf (/lib/modules/5.0.0-rc2+/build/vmlinux) sleep 2888 [-01] 94796.944983: probe_libc:malloc: sleep 2888 94796.944986: 9934 cpu/cpu-cycles,period=10000/: ffffffff922777e0 move_page_tables+0x0 (/lib/modules/5.0.0-rc2+/build/vmlinux) sleep 2888 [-01] 94796.944986: probe_libc:malloc: # Signed-off-by: Andi Kleen Tested-by: Arnaldo Carvalho de Melo Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20190117194834.21940-1-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/builtin-script.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 6c1e7ceedcf3..ce2aee11e360 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -1658,7 +1658,7 @@ static void process_event(struct perf_script *script, return; } - if (PRINT_FIELD(TRACE)) { + if (PRINT_FIELD(TRACE) && sample->raw_data) { event_format__fprintf(evsel->tp_format, sample->cpu, sample->raw_data, sample->raw_size, fp); } -- cgit v1.2.3 From 6ec0698f1c40b8c5f9f73b70d411003e680c3275 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Thu, 10 Jan 2019 17:17:16 -0800 Subject: perf core: Fix perf_proc_update_handler() bug [ Upstream commit 1a51c5da5acc6c188c917ba572eebac5f8793432 ] The perf_proc_update_handler() handles /proc/sys/kernel/perf_event_max_sample_rate syctl variable. When the PMU IRQ handler timing monitoring is disabled, i.e, when /proc/sys/kernel/perf_cpu_time_max_percent is equal to 0 or 100, then no modification to sysctl_perf_event_sample_rate is allowed to prevent possible hang from wrong values. The problem is that the test to prevent modification is made after the sysctl variable is modified in perf_proc_update_handler(). You get an error: $ echo 10001 >/proc/sys/kernel/perf_event_max_sample_rate echo: write error: invalid argument But the value is still modified causing all sorts of inconsistencies: $ cat /proc/sys/kernel/perf_event_max_sample_rate 10001 This patch fixes the problem by moving the parsing of the value after the test. Committer testing: # echo 100 > /proc/sys/kernel/perf_cpu_time_max_percent # echo 10001 > /proc/sys/kernel/perf_event_max_sample_rate -bash: echo: write error: Invalid argument # cat /proc/sys/kernel/perf_event_max_sample_rate 10001 # Signed-off-by: Stephane Eranian Reviewed-by: Andi Kleen Reviewed-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1547169436-6266-1-git-send-email-eranian@google.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- kernel/events/core.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 4fb9d5054618..aa996a0854b9 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -436,18 +436,18 @@ int perf_proc_update_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); - - if (ret || !write) - return ret; - + int ret; + int perf_cpu = sysctl_perf_cpu_time_max_percent; /* * If throttling is disabled don't allow the write: */ - if (sysctl_perf_cpu_time_max_percent == 100 || - sysctl_perf_cpu_time_max_percent == 0) + if (write && (perf_cpu == 100 || perf_cpu == 0)) return -EINVAL; + ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); + if (ret || !write) + return ret; + max_samples_per_tick = DIV_ROUND_UP(sysctl_perf_event_sample_rate, HZ); perf_sample_period_ns = NSEC_PER_SEC / sysctl_perf_event_sample_rate; update_perf_cpu_limits(); -- cgit v1.2.3 From 1e4b75416682df6dfbd17f05b0653cb52536d2b5 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Sat, 19 Jan 2019 00:12:39 -0800 Subject: perf tools: Handle TOPOLOGY headers with no CPU [ Upstream commit 1497e804d1a6e2bd9107ddf64b0310449f4673eb ] This patch fixes an issue in cpumap.c when used with the TOPOLOGY header. In some configurations, some NUMA nodes may have no CPU (empty cpulist). Yet a cpumap map must be created otherwise perf abort with an error. This patch handles this case by creating a dummy map. Before: $ perf record -o - -e cycles noploop 2 | perf script -i - 0x6e8 [0x6c]: failed to process type: 80 After: $ perf record -o - -e cycles noploop 2 | perf script -i - noploop for 2 seconds Signed-off-by: Stephane Eranian Acked-by: Jiri Olsa Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1547885559-1657-1-git-send-email-eranian@google.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/cpumap.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 1ccbd3342069..383674f448fc 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -134,7 +134,12 @@ struct cpu_map *cpu_map__new(const char *cpu_list) if (!cpu_list) return cpu_map__read_all_cpu_map(); - if (!isdigit(*cpu_list)) + /* + * must handle the case of empty cpumap to cover + * TOPOLOGY header for NUMA nodes with no CPU + * ( e.g., because of CPU hotplug) + */ + if (!isdigit(*cpu_list) && *cpu_list != '\0') goto out; while (isdigit(*cpu_list)) { @@ -181,8 +186,10 @@ struct cpu_map *cpu_map__new(const char *cpu_list) if (nr_cpus > 0) cpus = cpu_map__trim_new(nr_cpus, tmp_cpus); - else + else if (*cpu_list != '\0') cpus = cpu_map__default_new(); + else + cpus = cpu_map__dummy_new(); invalid: free(tmp_cpus); out: -- cgit v1.2.3 From d5f05016b0e1704d7754bb5e5dd736435d81a0db Mon Sep 17 00:00:00 2001 From: Tony Jones Date: Sun, 20 Jan 2019 11:14:14 -0800 Subject: perf script: Fix crash when processing recorded stat data [ Upstream commit 8bf8c6da53c2265aea365a1de6038f118f522113 ] While updating perf to work with Python3 and Python2 I noticed that the stat-cpi script was dumping core. $ perf stat -e cycles,instructions record -o /tmp/perf.data /bin/false Performance counter stats for '/bin/false': 802,148 cycles 604,622 instructions 802,148 cycles 604,622 instructions 0.001445842 seconds time elapsed $ perf script -i /tmp/perf.data -s scripts/python/stat-cpi.py Segmentation fault (core dumped) ... ... rblist=rblist@entry=0xb2a200 , new_entry=new_entry@entry=0x7ffcb755c310) at util/rblist.c:33 ctx=, type=, create=, cpu=, evsel=) at util/stat-shadow.c:118 ctx=, type=, st=) at util/stat-shadow.c:196 count=count@entry=727442, cpu=cpu@entry=0, st=0xb2a200 ) at util/stat-shadow.c:239 config=config@entry=0xafeb40 , counter=counter@entry=0x133c6e0) at util/stat.c:372 ... ... The issue is that since 1fcd03946b52 perf_stat__update_shadow_stats now calls update_runtime_stat passing rt_stat rather than calling update_stats but perf_stat__init_shadow_stats has never been called to initialize rt_stat in the script path processing recorded stat data. Since I can't see any reason why perf_stat__init_shadow_stats() is presently initialized like it is in builtin-script.c::perf_sample__fprint_metric() [4bd1bef8bba2f] I'm proposing it instead be initialized once in __cmd_script Committer testing: After applying the patch: # perf script -i /tmp/perf.data -s tools/perf/scripts/python/stat-cpi.py 0.001970: cpu -1, thread -1 -> cpi 1.709079 (1075684/629394) # No segfault. Signed-off-by: Tony Jones Reviewed-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Tested-by: Ravi Bangoria Cc: Andi Kleen Cc: Jin Yao Fixes: 1fcd03946b52 ("perf stat: Update per-thread shadow stats") Link: http://lkml.kernel.org/r/20190120191414.12925-1-tonyj@suse.de Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/builtin-script.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index ce2aee11e360..53c11fc0855e 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -1589,13 +1589,8 @@ static void perf_sample__fprint_metric(struct perf_script *script, .force_header = false, }; struct perf_evsel *ev2; - static bool init; u64 val; - if (!init) { - perf_stat__init_shadow_stats(); - init = true; - } if (!evsel->stats) perf_evlist__alloc_stats(script->session->evlist, false); if (evsel_script(evsel->leader)->gnum++ == 0) @@ -2214,6 +2209,8 @@ static int __cmd_script(struct perf_script *script) signal(SIGINT, sig_handler); + perf_stat__init_shadow_stats(); + /* override event processing functions */ if (script->show_task_events) { script->tool.comm = process_comm_event; -- cgit v1.2.3 From 43b0c93918a0d208340343cf3fc923a060d86c93 Mon Sep 17 00:00:00 2001 From: Brian Welty Date: Thu, 17 Jan 2019 12:41:32 -0800 Subject: IB/{hfi1, qib}: Fix WC.byte_len calculation for UD_SEND_WITH_IMM [ Upstream commit 904bba211acc2112fdf866e5a2bc6cd9ecd0de1b ] The work completion length for a receiving a UD send with immediate is short by 4 bytes causing application using this opcode to fail. The UD receive logic incorrectly subtracts 4 bytes for immediate value. These bytes are already included in header length and are used to calculate header/payload split, so the result is these 4 bytes are subtracted twice, once when the header length subtracted from the overall length and once again in the UD opcode specific path. Remove the extra subtraction when handling the opcode. Fixes: 7724105686e7 ("IB/hfi1: add driver files") Reviewed-by: Michael J. Ruhl Signed-off-by: Brian Welty Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/hfi1/ud.c | 1 - drivers/infiniband/hw/qib/qib_ud.c | 1 - 2 files changed, 2 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c index 70d39fc450a1..54eb69564264 100644 --- a/drivers/infiniband/hw/hfi1/ud.c +++ b/drivers/infiniband/hw/hfi1/ud.c @@ -980,7 +980,6 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) opcode == IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE) { wc.ex.imm_data = packet->ohdr->u.ud.imm_data; wc.wc_flags = IB_WC_WITH_IMM; - tlen -= sizeof(u32); } else if (opcode == IB_OPCODE_UD_SEND_ONLY) { wc.ex.imm_data = 0; wc.wc_flags = 0; diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c index f8d029a2390f..bce2b5cd3c7b 100644 --- a/drivers/infiniband/hw/qib/qib_ud.c +++ b/drivers/infiniband/hw/qib/qib_ud.c @@ -513,7 +513,6 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct ib_header *hdr, opcode == IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE) { wc.ex.imm_data = ohdr->u.ud.imm_data; wc.wc_flags = IB_WC_WITH_IMM; - tlen -= sizeof(u32); } else if (opcode == IB_OPCODE_UD_SEND_ONLY) { wc.ex.imm_data = 0; wc.wc_flags = 0; -- cgit v1.2.3 From 697863bf7d724ad067988468b19f8b411993151b Mon Sep 17 00:00:00 2001 From: Jerry Snitselaar Date: Thu, 17 Jan 2019 12:29:02 -0700 Subject: iommu/amd: Call free_iova_fast with pfn in map_sg [ Upstream commit 51d8838d66d3249508940d8f59b07701f2129723 ] In the error path of map_sg, free_iova_fast is being called with address instead of the pfn. This results in a bad value getting into the rcache, and can result in hitting a BUG_ON when iova_magazine_free_pfns is called. Cc: Joerg Roedel Cc: Suravee Suthikulpanit Signed-off-by: Jerry Snitselaar Fixes: 80187fd39dcb ("iommu/amd: Optimize map_sg and unmap_sg") Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- drivers/iommu/amd_iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 34c9aa76a7bd..2a5c05ca7fca 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -2561,7 +2561,7 @@ out_unmap: } out_free_iova: - free_iova_fast(&dma_dom->iovad, address, npages); + free_iova_fast(&dma_dom->iovad, address >> PAGE_SHIFT, npages); out_err: return 0; -- cgit v1.2.3 From 9e1f977d82eca514e71826bdb9cb34da2e687f0d Mon Sep 17 00:00:00 2001 From: Jerry Snitselaar Date: Sat, 19 Jan 2019 10:38:05 -0700 Subject: iommu/amd: Unmap all mapped pages in error path of map_sg [ Upstream commit f1724c0883bb0ce93b8dcb94b53dcca3b75ac9a7 ] In the error path of map_sg there is an incorrect if condition for breaking out of the loop that searches the scatterlist for mapped pages to unmap. Instead of breaking out of the loop once all the pages that were mapped have been unmapped, it will break out of the loop after it has unmapped 1 page. Fix the condition, so it breaks out of the loop only after all the mapped pages have been unmapped. Fixes: 80187fd39dcb ("iommu/amd: Optimize map_sg and unmap_sg") Cc: Joerg Roedel Signed-off-by: Jerry Snitselaar Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- drivers/iommu/amd_iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 2a5c05ca7fca..2ab87e3f07e6 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -2555,7 +2555,7 @@ out_unmap: bus_addr = address + s->dma_address + (j << PAGE_SHIFT); iommu_unmap_page(domain, bus_addr, PAGE_SIZE); - if (--mapped_pages) + if (--mapped_pages == 0) goto out_free_iova; } } -- cgit v1.2.3 From ffabf74c67967ac6c14f66d8a4f0f218c890e2cd Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Sat, 12 Jan 2019 16:16:27 +0800 Subject: riscv: fixup max_low_pfn with PFN_DOWN. [ Upstream commit 28198c4639b39899a728ac89aea29d2a7a72562f ] max_low_pfn should be pfn_size not byte_size. Signed-off-by: Guo Ren Signed-off-by: Mao Han Signed-off-by: Palmer Dabbelt Signed-off-by: Sasha Levin --- arch/riscv/kernel/setup.c | 2 +- arch/riscv/mm/init.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index b2d26d9d8489..9713d4e8c22b 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -186,7 +186,7 @@ static void __init setup_bootmem(void) BUG_ON(mem_size == 0); set_max_mapnr(PFN_DOWN(mem_size)); - max_low_pfn = memblock_end_of_DRAM(); + max_low_pfn = PFN_DOWN(memblock_end_of_DRAM()); #ifdef CONFIG_BLK_DEV_INITRD setup_initrd(); diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 58a522f9bcc3..200a4b315e15 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -29,7 +29,8 @@ static void __init zone_sizes_init(void) unsigned long max_zone_pfns[MAX_NR_ZONES] = { 0, }; #ifdef CONFIG_ZONE_DMA32 - max_zone_pfns[ZONE_DMA32] = PFN_DOWN(min(4UL * SZ_1G, max_low_pfn)); + max_zone_pfns[ZONE_DMA32] = PFN_DOWN(min(4UL * SZ_1G, + (unsigned long) PFN_PHYS(max_low_pfn))); #endif max_zone_pfns[ZONE_NORMAL] = max_low_pfn; -- cgit v1.2.3 From e0b03a6bad1a7c3f89d0a2d4aa0584376ebe3ec3 Mon Sep 17 00:00:00 2001 From: ZhangXiaoxu Date: Thu, 10 Jan 2019 16:39:06 +0800 Subject: ipvs: Fix signed integer overflow when setsockopt timeout [ Upstream commit 53ab60baa1ac4f20b080a22c13b77b6373922fd7 ] There is a UBSAN bug report as below: UBSAN: Undefined behaviour in net/netfilter/ipvs/ip_vs_ctl.c:2227:21 signed integer overflow: -2147483647 * 1000 cannot be represented in type 'int' Reproduce program: #include #include #include #define IPPROTO_IP 0 #define IPPROTO_RAW 255 #define IP_VS_BASE_CTL (64+1024+64) #define IP_VS_SO_SET_TIMEOUT (IP_VS_BASE_CTL+10) /* The argument to IP_VS_SO_GET_TIMEOUT */ struct ipvs_timeout_t { int tcp_timeout; int tcp_fin_timeout; int udp_timeout; }; int main() { int ret = -1; int sockfd = -1; struct ipvs_timeout_t to; sockfd = socket(AF_INET, SOCK_RAW, IPPROTO_RAW); if (sockfd == -1) { printf("socket init error\n"); return -1; } to.tcp_timeout = -2147483647; to.tcp_fin_timeout = -2147483647; to.udp_timeout = -2147483647; ret = setsockopt(sockfd, IPPROTO_IP, IP_VS_SO_SET_TIMEOUT, (char *)(&to), sizeof(to)); printf("setsockopt return %d\n", ret); return ret; } Return -EINVAL if the timeout value is negative or max than 'INT_MAX / HZ'. Signed-off-by: ZhangXiaoxu Acked-by: Simon Horman Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/ipvs/ip_vs_ctl.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 518364f4abcc..55a77314340a 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -2220,6 +2220,18 @@ static int ip_vs_set_timeout(struct netns_ipvs *ipvs, struct ip_vs_timeout_user u->tcp_fin_timeout, u->udp_timeout); +#ifdef CONFIG_IP_VS_PROTO_TCP + if (u->tcp_timeout < 0 || u->tcp_timeout > (INT_MAX / HZ) || + u->tcp_fin_timeout < 0 || u->tcp_fin_timeout > (INT_MAX / HZ)) { + return -EINVAL; + } +#endif + +#ifdef CONFIG_IP_VS_PROTO_UDP + if (u->udp_timeout < 0 || u->udp_timeout > (INT_MAX / HZ)) + return -EINVAL; +#endif + #ifdef CONFIG_IP_VS_PROTO_TCP if (u->tcp_timeout) { pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP); -- cgit v1.2.3 From a038ed686aa6a504114e55f5b2c5493cdfefe2bf Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Thu, 24 Jan 2019 04:16:45 +0000 Subject: iommu/amd: Fix IOMMU page flush when detach device from a domain [ Upstream commit 9825bd94e3a2baae1f4874767ae3a7d4c049720e ] When a VM is terminated, the VFIO driver detaches all pass-through devices from VFIO domain by clearing domain id and page table root pointer from each device table entry (DTE), and then invalidates the DTE. Then, the VFIO driver unmap pages and invalidate IOMMU pages. Currently, the IOMMU driver keeps track of which IOMMU and how many devices are attached to the domain. When invalidate IOMMU pages, the driver checks if the IOMMU is still attached to the domain before issuing the invalidate page command. However, since VFIO has already detached all devices from the domain, the subsequent INVALIDATE_IOMMU_PAGES commands are being skipped as there is no IOMMU attached to the domain. This results in data corruption and could cause the PCI device to end up in indeterministic state. Fix this by invalidate IOMMU pages when detach a device, and before decrementing the per-domain device reference counts. Cc: Boris Ostrovsky Suggested-by: Joerg Roedel Co-developed-by: Brijesh Singh Signed-off-by: Brijesh Singh Signed-off-by: Suravee Suthikulpanit Fixes: 6de8ad9b9ee0 ('x86/amd-iommu: Make iommu_flush_pages aware of multiple IOMMUs') Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- drivers/iommu/amd_iommu.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 2ab87e3f07e6..27500abe8ca7 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -1929,16 +1929,13 @@ static void do_attach(struct iommu_dev_data *dev_data, static void do_detach(struct iommu_dev_data *dev_data) { + struct protection_domain *domain = dev_data->domain; struct amd_iommu *iommu; u16 alias; iommu = amd_iommu_rlookup_table[dev_data->devid]; alias = dev_data->alias; - /* decrease reference counters */ - dev_data->domain->dev_iommu[iommu->index] -= 1; - dev_data->domain->dev_cnt -= 1; - /* Update data structures */ dev_data->domain = NULL; list_del(&dev_data->list); @@ -1948,6 +1945,16 @@ static void do_detach(struct iommu_dev_data *dev_data) /* Flush the DTE entry */ device_flush_dte(dev_data); + + /* Flush IOTLB */ + domain_flush_tlb_pde(domain); + + /* Wait for the flushes to finish */ + domain_flush_complete(domain); + + /* decrease reference counters - needs to happen after the flushes */ + domain->dev_iommu[iommu->index] -= 1; + domain->dev_cnt -= 1; } /* -- cgit v1.2.3 From cf872189c804dce1eb104643c7eec173b3933227 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 15 Jan 2019 22:46:25 +0300 Subject: clk: ti: Fix error handling in ti_clk_parse_divider_data() [ Upstream commit 303aef8b84272d73999a3207dd05bbe10ed89dc5 ] The ti_clk_parse_divider_data() function is only called from _get_div_table_from_setup(). That function doesn't look at the return value but instead looks at the "*table" pointer. In this case, if the kcalloc() fails then *table is NULL (which means success). It should instead be an error pointer. The ti_clk_parse_divider_data() function has two callers. One checks for errors and the other doesn't. I have fixed it so now both handle errors. Fixes: 4f6be5655dc9 ("clk: ti: divider: add driver internal API for parsing divider data") Signed-off-by: Dan Carpenter Acked-by: Tero Kristo Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/ti/divider.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/clk/ti/divider.c b/drivers/clk/ti/divider.c index ccfb4d9a152a..079f0beda8b6 100644 --- a/drivers/clk/ti/divider.c +++ b/drivers/clk/ti/divider.c @@ -367,8 +367,10 @@ int ti_clk_parse_divider_data(int *div_table, int num_dividers, int max_div, num_dividers = i; tmp = kcalloc(valid_div + 1, sizeof(*tmp), GFP_KERNEL); - if (!tmp) + if (!tmp) { + *table = ERR_PTR(-ENOMEM); return -ENOMEM; + } valid_div = 0; *width = 0; @@ -403,6 +405,7 @@ struct clk_hw *ti_clk_build_component_div(struct ti_clk_divider *setup) { struct clk_omap_divider *div; struct clk_omap_reg *reg; + int ret; if (!setup) return NULL; @@ -422,6 +425,12 @@ struct clk_hw *ti_clk_build_component_div(struct ti_clk_divider *setup) div->flags |= CLK_DIVIDER_POWER_OF_TWO; div->table = _get_div_table_from_setup(setup, &div->width); + if (IS_ERR(div->table)) { + ret = PTR_ERR(div->table); + kfree(div); + return ERR_PTR(ret); + } + div->shift = setup->bit_shift; div->latch = -EINVAL; -- cgit v1.2.3 From aad4dc749ed58c6f37abbe2b6f55a90a2129c375 Mon Sep 17 00:00:00 2001 From: Taniya Das Date: Tue, 18 Dec 2018 23:49:41 +0530 Subject: clk: qcom: gcc: Use active only source for CPUSS clocks [ Upstream commit 9ff1a3b4912528f853048ccd9757ba6a2cc75557 ] The clocks of the CPUSS such as "gcc_cpuss_ahb_clk_src" is a CRITICAL clock and needs to vote on the active only source of XO, so as to keep the vote as long as CPUSS is active. Similar rbcpr_clk_src is also has the same requirement. Signed-off-by: Taniya Das Fixes: 06391eddb60a ("clk: qcom: Add Global Clock controller (GCC) driver for SDM845") Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/qcom/gcc-sdm845.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/clk/qcom/gcc-sdm845.c b/drivers/clk/qcom/gcc-sdm845.c index fa1a196350f1..3bf11a620094 100644 --- a/drivers/clk/qcom/gcc-sdm845.c +++ b/drivers/clk/qcom/gcc-sdm845.c @@ -131,8 +131,8 @@ static const char * const gcc_parent_names_6[] = { "core_bi_pll_test_se", }; -static const char * const gcc_parent_names_7[] = { - "bi_tcxo", +static const char * const gcc_parent_names_7_ao[] = { + "bi_tcxo_ao", "gpll0", "gpll0_out_even", "core_bi_pll_test_se", @@ -144,6 +144,12 @@ static const char * const gcc_parent_names_8[] = { "core_bi_pll_test_se", }; +static const char * const gcc_parent_names_8_ao[] = { + "bi_tcxo_ao", + "gpll0", + "core_bi_pll_test_se", +}; + static const struct parent_map gcc_parent_map_10[] = { { P_BI_TCXO, 0 }, { P_GPLL0_OUT_MAIN, 1 }, @@ -226,7 +232,7 @@ static struct clk_rcg2 gcc_cpuss_ahb_clk_src = { .freq_tbl = ftbl_gcc_cpuss_ahb_clk_src, .clkr.hw.init = &(struct clk_init_data){ .name = "gcc_cpuss_ahb_clk_src", - .parent_names = gcc_parent_names_7, + .parent_names = gcc_parent_names_7_ao, .num_parents = 4, .ops = &clk_rcg2_ops, }, @@ -245,7 +251,7 @@ static struct clk_rcg2 gcc_cpuss_rbcpr_clk_src = { .freq_tbl = ftbl_gcc_cpuss_rbcpr_clk_src, .clkr.hw.init = &(struct clk_init_data){ .name = "gcc_cpuss_rbcpr_clk_src", - .parent_names = gcc_parent_names_8, + .parent_names = gcc_parent_names_8_ao, .num_parents = 3, .ops = &clk_rcg2_ops, }, -- cgit v1.2.3 From f43e42f46aa87bc0507bde9a0cd6ead663ba6a30 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Mon, 29 Jan 2018 09:09:41 -0800 Subject: xtensa: SMP: fix ccount_timer_shutdown [ Upstream commit 4fe8713b873fc881284722ce4ac47995de7cf62c ] ccount_timer_shutdown is called from the atomic context in the secondary_start_kernel, resulting in the following BUG: BUG: sleeping function called from invalid context in_atomic(): 1, irqs_disabled(): 1, pid: 0, name: swapper/1 Preemption disabled at: secondary_start_kernel+0xa1/0x130 Call Trace: ___might_sleep+0xe7/0xfc __might_sleep+0x41/0x44 synchronize_irq+0x24/0x64 disable_irq+0x11/0x14 ccount_timer_shutdown+0x12/0x20 clockevents_switch_state+0x82/0xb4 clockevents_exchange_device+0x54/0x60 tick_check_new_device+0x46/0x70 clockevents_register_device+0x8c/0xc8 clockevents_config_and_register+0x1d/0x2c local_timer_setup+0x75/0x7c secondary_start_kernel+0xb4/0x130 should_never_return+0x32/0x35 Use disable_irq_nosync instead of disable_irq to avoid it. This is safe because the ccount timer IRQ is per-CPU, and once IRQ is masked the ISR will not be called. Signed-off-by: Max Filippov Signed-off-by: Sasha Levin --- arch/xtensa/kernel/time.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/xtensa/kernel/time.c b/arch/xtensa/kernel/time.c index fd524a54d2ab..378186b5eb40 100644 --- a/arch/xtensa/kernel/time.c +++ b/arch/xtensa/kernel/time.c @@ -89,7 +89,7 @@ static int ccount_timer_shutdown(struct clock_event_device *evt) container_of(evt, struct ccount_timer, evt); if (timer->irq_enabled) { - disable_irq(evt->irq); + disable_irq_nosync(evt->irq); timer->irq_enabled = 0; } return 0; -- cgit v1.2.3 From dc04a00b3cbb89c96e3b5d5ef7b7cb75d1d747ec Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Mon, 10 Dec 2018 06:21:46 +0000 Subject: riscv: Adjust mmap base address at a third of task size [ Upstream commit ae662eec8a515ab550524e04c793b5ddf1aae3a1 ] This ratio is the most used among all other architectures and make icache_hygiene libhugetlbfs test pass: this test mmap lots of hugepages whose addresses, without this patch, reach the end of the process user address space. Signed-off-by: Alexandre Ghiti Reviewed-by: Christoph Hellwig Signed-off-by: Palmer Dabbelt Signed-off-by: Sasha Levin --- arch/riscv/include/asm/processor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h index 3fe4af8147d2..c23578a37b44 100644 --- a/arch/riscv/include/asm/processor.h +++ b/arch/riscv/include/asm/processor.h @@ -22,7 +22,7 @@ * This decides where the kernel will search for a free chunk of vm * space during mmap's. */ -#define TASK_UNMAPPED_BASE PAGE_ALIGN(TASK_SIZE >> 1) +#define TASK_UNMAPPED_BASE PAGE_ALIGN(TASK_SIZE / 3) #define STACK_TOP TASK_SIZE #define STACK_TOP_MAX STACK_TOP -- cgit v1.2.3 From 1ee82160e2eff77606a70e72a2017c16d88876a7 Mon Sep 17 00:00:00 2001 From: Feras Daoud Date: Thu, 24 Jan 2019 14:33:19 +0200 Subject: IB/ipoib: Fix for use-after-free in ipoib_cm_tx_start [ Upstream commit 6ab4aba00f811a5265acc4d3eb1863bb3ca60562 ] The following BUG was reported by kasan: BUG: KASAN: use-after-free in ipoib_cm_tx_start+0x430/0x1390 [ib_ipoib] Read of size 80 at addr ffff88034c30bcd0 by task kworker/u16:1/24020 Workqueue: ipoib_wq ipoib_cm_tx_start [ib_ipoib] Call Trace: dump_stack+0x9a/0xeb print_address_description+0xe3/0x2e0 kasan_report+0x18a/0x2e0 ? ipoib_cm_tx_start+0x430/0x1390 [ib_ipoib] memcpy+0x1f/0x50 ipoib_cm_tx_start+0x430/0x1390 [ib_ipoib] ? kvm_clock_read+0x1f/0x30 ? ipoib_cm_skb_reap+0x610/0x610 [ib_ipoib] ? __lock_is_held+0xc2/0x170 ? process_one_work+0x880/0x1960 ? process_one_work+0x912/0x1960 process_one_work+0x912/0x1960 ? wq_pool_ids_show+0x310/0x310 ? lock_acquire+0x145/0x440 worker_thread+0x87/0xbb0 ? process_one_work+0x1960/0x1960 kthread+0x314/0x3d0 ? kthread_create_worker_on_cpu+0xc0/0xc0 ret_from_fork+0x3a/0x50 Allocated by task 0: kasan_kmalloc+0xa0/0xd0 kmem_cache_alloc_trace+0x168/0x3e0 path_rec_create+0xa2/0x1f0 [ib_ipoib] ipoib_start_xmit+0xa98/0x19e0 [ib_ipoib] dev_hard_start_xmit+0x159/0x8d0 sch_direct_xmit+0x226/0xb40 __dev_queue_xmit+0x1d63/0x2950 neigh_update+0x889/0x1770 arp_process+0xc47/0x21f0 arp_rcv+0x462/0x760 __netif_receive_skb_core+0x1546/0x2da0 netif_receive_skb_internal+0xf2/0x590 napi_gro_receive+0x28e/0x390 ipoib_ib_handle_rx_wc_rss+0x873/0x1b60 [ib_ipoib] ipoib_rx_poll_rss+0x17d/0x320 [ib_ipoib] net_rx_action+0x427/0xe30 __do_softirq+0x28e/0xc42 Freed by task 26680: __kasan_slab_free+0x11d/0x160 kfree+0xf5/0x360 ipoib_flush_paths+0x532/0x9d0 [ib_ipoib] ipoib_set_mode_rss+0x1ad/0x560 [ib_ipoib] set_mode+0xc8/0x150 [ib_ipoib] kernfs_fop_write+0x279/0x440 __vfs_write+0xd8/0x5c0 vfs_write+0x15e/0x470 ksys_write+0xb8/0x180 do_syscall_64+0x9b/0x420 entry_SYSCALL_64_after_hwframe+0x49/0xbe The buggy address belongs to the object at ffff88034c30bcc8 which belongs to the cache kmalloc-512 of size 512 The buggy address is located 8 bytes inside of 512-byte region [ffff88034c30bcc8, ffff88034c30bec8) The buggy address belongs to the page: The following race between change mode and xmit flow is the reason for this use-after-free: Change mode Send packet 1 to GID XX Send packet 2 to GID XX | | | start | | | | | | | | | Create new path for GID XX | | and update neigh path | | | | | | | | | | flush_paths | | | | queue_work(cm.start_task) | | Path for GID XX not found | create new path | | start_task runs with old released path There is no locking to protect the lifetime of the path through the ipoib_cm_tx struct, so delete it entirely and always use the newly looked up path under the priv->lock. Fixes: 546481c2816e ("IB/ipoib: Fix memory corruption in ipoib cm mode connect flow") Signed-off-by: Feras Daoud Reviewed-by: Erez Shitrit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/ulp/ipoib/ipoib.h | 1 - drivers/infiniband/ulp/ipoib/ipoib_cm.c | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 1abe3c62f106..b22d02c9de90 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -248,7 +248,6 @@ struct ipoib_cm_tx { struct list_head list; struct net_device *dev; struct ipoib_neigh *neigh; - struct ipoib_path *path; struct ipoib_tx_buf *tx_ring; unsigned int tx_head; unsigned int tx_tail; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 0428e01e8f69..aa9dcfc36cd3 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -1312,7 +1312,6 @@ struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path neigh->cm = tx; tx->neigh = neigh; - tx->path = path; tx->dev = dev; list_add(&tx->list, &priv->cm.start_list); set_bit(IPOIB_FLAG_INITIALIZED, &tx->flags); @@ -1371,7 +1370,7 @@ static void ipoib_cm_tx_start(struct work_struct *work) neigh->daddr + QPN_AND_OPTIONS_OFFSET); goto free_neigh; } - memcpy(&pathrec, &p->path->pathrec, sizeof(pathrec)); + memcpy(&pathrec, &path->pathrec, sizeof(pathrec)); spin_unlock_irqrestore(&priv->lock, flags); netif_tx_unlock_bh(dev); -- cgit v1.2.3 From 0165df14095b3d48c5568373e18da16784b58435 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 10 Jan 2019 12:38:02 +0000 Subject: selftests: cpu-hotplug: fix case where CPUs offline > CPUs present [ Upstream commit 2b531b6137834a55857a337ac17510d6436b6fbb ] The cpu-hotplug test assumes that we can offline the maximum CPU as described by /sys/devices/system/cpu/offline. However, in the case where the number of CPUs exceeds like kernel configuration then the offline count can be greater than the present count and we end up trying to test the offlining of a CPU that is not available to offline. Fix this by testing the maximum present CPU instead. Also, the test currently offlines the CPU and does not online it, so fix this by onlining the CPU after the test. Fixes: d89dffa976bc ("fault-injection: add selftests for cpu and memory hotplug") Signed-off-by: Colin Ian King Signed-off-by: Shuah Khan Signed-off-by: Sasha Levin --- tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh b/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh index bab13dd025a6..0d26b5e3f966 100755 --- a/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh +++ b/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh @@ -37,6 +37,10 @@ prerequisite() exit $ksft_skip fi + present_cpus=`cat $SYSFS/devices/system/cpu/present` + present_max=${present_cpus##*-} + echo "present_cpus = $present_cpus present_max = $present_max" + echo -e "\t Cpus in online state: $online_cpus" offline_cpus=`cat $SYSFS/devices/system/cpu/offline` @@ -151,6 +155,8 @@ online_cpus=0 online_max=0 offline_cpus=0 offline_max=0 +present_cpus=0 +present_max=0 while getopts e:ahp: opt; do case $opt in @@ -190,9 +196,10 @@ if [ $allcpus -eq 0 ]; then online_cpu_expect_success $online_max if [[ $offline_cpus -gt 0 ]]; then - echo -e "\t offline to online to offline: cpu $offline_max" - online_cpu_expect_success $offline_max - offline_cpu_expect_success $offline_max + echo -e "\t offline to online to offline: cpu $present_max" + online_cpu_expect_success $present_max + offline_cpu_expect_success $present_max + online_cpu $present_max fi exit 0 else -- cgit v1.2.3 From 8655802aa1556962562179536eabcfc17f413053 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Fri, 21 Dec 2018 08:26:20 -0800 Subject: xtensa: SMP: fix secondary CPU initialization [ Upstream commit 32a7726c4f4aadfabdb82440d84f88a5a2c8fe13 ] - add missing memory barriers to the secondary CPU synchronization spin loops; add comment to the matching memory barrier in the boot_secondary and __cpu_die functions; - use READ_ONCE/WRITE_ONCE to access cpu_start_id/cpu_start_ccount instead of reading/writing them directly; - re-initialize cpu_running every time before starting secondary CPU to flush possible previous CPU startup results. Signed-off-by: Max Filippov Signed-off-by: Sasha Levin --- arch/xtensa/kernel/head.S | 5 ++++- arch/xtensa/kernel/smp.c | 34 +++++++++++++++++++++------------- 2 files changed, 25 insertions(+), 14 deletions(-) diff --git a/arch/xtensa/kernel/head.S b/arch/xtensa/kernel/head.S index 9053a5622d2c..5bd38ea2da38 100644 --- a/arch/xtensa/kernel/head.S +++ b/arch/xtensa/kernel/head.S @@ -280,12 +280,13 @@ should_never_return: movi a2, cpu_start_ccount 1: + memw l32i a3, a2, 0 beqi a3, 0, 1b movi a3, 0 s32i a3, a2, 0 - memw 1: + memw l32i a3, a2, 0 beqi a3, 0, 1b wsr a3, ccount @@ -321,11 +322,13 @@ ENTRY(cpu_restart) rsr a0, prid neg a2, a0 movi a3, cpu_start_id + memw s32i a2, a3, 0 #if XCHAL_DCACHE_IS_WRITEBACK dhwbi a3, 0 #endif 1: + memw l32i a2, a3, 0 dhi a3, 0 bne a2, a0, 1b diff --git a/arch/xtensa/kernel/smp.c b/arch/xtensa/kernel/smp.c index 932d64689bac..c9fc2c4f71b3 100644 --- a/arch/xtensa/kernel/smp.c +++ b/arch/xtensa/kernel/smp.c @@ -195,9 +195,11 @@ static int boot_secondary(unsigned int cpu, struct task_struct *ts) int i; #ifdef CONFIG_HOTPLUG_CPU - cpu_start_id = cpu; - system_flush_invalidate_dcache_range( - (unsigned long)&cpu_start_id, sizeof(cpu_start_id)); + WRITE_ONCE(cpu_start_id, cpu); + /* Pairs with the third memw in the cpu_restart */ + mb(); + system_flush_invalidate_dcache_range((unsigned long)&cpu_start_id, + sizeof(cpu_start_id)); #endif smp_call_function_single(0, mx_cpu_start, (void *)cpu, 1); @@ -206,18 +208,21 @@ static int boot_secondary(unsigned int cpu, struct task_struct *ts) ccount = get_ccount(); while (!ccount); - cpu_start_ccount = ccount; + WRITE_ONCE(cpu_start_ccount, ccount); - while (time_before(jiffies, timeout)) { + do { + /* + * Pairs with the first two memws in the + * .Lboot_secondary. + */ mb(); - if (!cpu_start_ccount) - break; - } + ccount = READ_ONCE(cpu_start_ccount); + } while (ccount && time_before(jiffies, timeout)); - if (cpu_start_ccount) { + if (ccount) { smp_call_function_single(0, mx_cpu_stop, - (void *)cpu, 1); - cpu_start_ccount = 0; + (void *)cpu, 1); + WRITE_ONCE(cpu_start_ccount, 0); return -EIO; } } @@ -237,6 +242,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) pr_debug("%s: Calling wakeup_secondary(cpu:%d, idle:%p, sp: %08lx)\n", __func__, cpu, idle, start_info.stack); + init_completion(&cpu_running); ret = boot_secondary(cpu, idle); if (ret == 0) { wait_for_completion_timeout(&cpu_running, @@ -298,8 +304,10 @@ void __cpu_die(unsigned int cpu) unsigned long timeout = jiffies + msecs_to_jiffies(1000); while (time_before(jiffies, timeout)) { system_invalidate_dcache_range((unsigned long)&cpu_start_id, - sizeof(cpu_start_id)); - if (cpu_start_id == -cpu) { + sizeof(cpu_start_id)); + /* Pairs with the second memw in the cpu_restart */ + mb(); + if (READ_ONCE(cpu_start_id) == -cpu) { platform_cpu_kill(cpu); return; } -- cgit v1.2.3 From c1327f9a12481ee40116dbfec6a466f439990dc5 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Thu, 24 Jan 2019 17:16:11 -0800 Subject: xtensa: smp_lx200_defconfig: fix vectors clash [ Upstream commit 306b38305c0f86de7f17c5b091a95451dcc93d7d ] Secondary CPU reset vector overlaps part of the double exception handler code, resulting in weird crashes and hangups when running user code. Move exception vectors one page up so that they don't clash with the secondary CPU reset vector. Signed-off-by: Max Filippov Signed-off-by: Sasha Levin --- arch/xtensa/configs/smp_lx200_defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/xtensa/configs/smp_lx200_defconfig b/arch/xtensa/configs/smp_lx200_defconfig index 11fed6c06a7c..b5938160fb3d 100644 --- a/arch/xtensa/configs/smp_lx200_defconfig +++ b/arch/xtensa/configs/smp_lx200_defconfig @@ -33,6 +33,7 @@ CONFIG_SMP=y CONFIG_HOTPLUG_CPU=y # CONFIG_INITIALIZE_XTENSA_MMU_INSIDE_VMLINUX is not set # CONFIG_PCI is not set +CONFIG_VECTORS_OFFSET=0x00002000 CONFIG_XTENSA_PLATFORM_XTFPGA=y CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="earlycon=uart8250,mmio32native,0xfd050020,115200n8 console=ttyS0,115200n8 ip=dhcp root=/dev/nfs rw debug memmap=96M@0" -- cgit v1.2.3 From 38f47557ae49582bc99775c3172601acc773a7a5 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Sat, 19 Jan 2019 00:26:48 -0800 Subject: xtensa: SMP: mark each possible CPU as present [ Upstream commit 8b1c42cdd7181200dc1fff39dcb6ac1a3fac2c25 ] Otherwise it is impossible to enable CPUs after booting with 'maxcpus' parameter. Signed-off-by: Max Filippov Signed-off-by: Sasha Levin --- arch/xtensa/kernel/smp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/xtensa/kernel/smp.c b/arch/xtensa/kernel/smp.c index c9fc2c4f71b3..80be6449c497 100644 --- a/arch/xtensa/kernel/smp.c +++ b/arch/xtensa/kernel/smp.c @@ -83,7 +83,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus) { unsigned i; - for (i = 0; i < max_cpus; ++i) + for_each_possible_cpu(i) set_cpu_present(i, true); } -- cgit v1.2.3 From d23792f53f0de5706520f1302a8a4143e13a1680 Mon Sep 17 00:00:00 2001 From: Piotr Jaroszynski Date: Sun, 27 Jan 2019 08:46:45 -0800 Subject: iomap: get/put the page in iomap_page_create/release() [ Upstream commit 8e47a457321ca1a74ad194ab5dcbca764bc70731 ] migrate_page_move_mapping() expects pages with private data set to have a page_count elevated by 1. This is what used to happen for xfs through the buffer_heads code before the switch to iomap in commit 82cb14175e7d ("xfs: add support for sub-pagesize writeback without buffer_heads"). Not having the count elevated causes move_pages() to fail on memory mapped files coming from xfs. Make iomap compatible with the migrate_page_move_mapping() assumption by elevating the page count as part of iomap_page_create() and lowering it in iomap_page_release(). It causes the move_pages() syscall to misbehave on memory mapped files from xfs. It does not not move any pages, which I suppose is "just" a perf issue, but it also ends up returning a positive number which is out of spec for the syscall. Talking to Michal Hocko, it sounds like returning positive numbers might be a necessary update to move_pages() anyway though. Fixes: 82cb14175e7d ("xfs: add support for sub-pagesize writeback without buffer_heads") Signed-off-by: Piotr Jaroszynski [hch: actually get/put the page iomap_migrate_page() to make it work properly] Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Sasha Levin --- fs/iomap.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/iomap.c b/fs/iomap.c index e57fb1e534c5..2e3e64012db7 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -117,6 +117,12 @@ iomap_page_create(struct inode *inode, struct page *page) atomic_set(&iop->read_count, 0); atomic_set(&iop->write_count, 0); bitmap_zero(iop->uptodate, PAGE_SIZE / SECTOR_SIZE); + + /* + * migrate_page_move_mapping() assumes that pages with private data have + * their count elevated by 1. + */ + get_page(page); set_page_private(page, (unsigned long)iop); SetPagePrivate(page); return iop; @@ -133,6 +139,7 @@ iomap_page_release(struct page *page) WARN_ON_ONCE(atomic_read(&iop->write_count)); ClearPagePrivate(page); set_page_private(page, 0); + put_page(page); kfree(iop); } @@ -565,8 +572,10 @@ iomap_migrate_page(struct address_space *mapping, struct page *newpage, if (page_has_private(page)) { ClearPagePrivate(page); + get_page(newpage); set_page_private(newpage, page_private(page)); set_page_private(page, 0); + put_page(page); SetPagePrivate(newpage); } -- cgit v1.2.3 From d9ba842efdf068c53095b15f68377327259c3d16 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 17 Jan 2019 08:58:58 -0800 Subject: iomap: fix a use after free in iomap_dio_rw [ Upstream commit 4ea899ead2786a30aaa8181fefa81a3df4ad28f6 ] Introduce a local wait_for_completion variable to avoid an access to the potentially freed dio struture after dropping the last reference count. Also use the chance to document the completion behavior to make the refcounting clear to the reader of the code. Fixes: ff6a9292e6 ("iomap: implement direct I/O") Reported-by: Chandan Rajendra Reported-by: Darrick J. Wong Signed-off-by: Christoph Hellwig Tested-by: Chandan Rajendra Tested-by: Darrick J. Wong Reviewed-by: Dave Chinner Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Sasha Levin --- fs/iomap.c | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/fs/iomap.c b/fs/iomap.c index 2e3e64012db7..fac45206418a 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -1787,6 +1787,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, loff_t pos = iocb->ki_pos, start = pos; loff_t end = iocb->ki_pos + count - 1, ret = 0; unsigned int flags = IOMAP_DIRECT; + bool wait_for_completion = is_sync_kiocb(iocb); struct blk_plug plug; struct iomap_dio *dio; @@ -1806,7 +1807,6 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, dio->end_io = end_io; dio->error = 0; dio->flags = 0; - dio->wait_for_completion = is_sync_kiocb(iocb); dio->submit.iter = iter; dio->submit.waiter = current; @@ -1861,7 +1861,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, dio_warn_stale_pagecache(iocb->ki_filp); ret = 0; - if (iov_iter_rw(iter) == WRITE && !dio->wait_for_completion && + if (iov_iter_rw(iter) == WRITE && !wait_for_completion && !inode->i_sb->s_dio_done_wq) { ret = sb_init_dio_done_wq(inode->i_sb); if (ret < 0) @@ -1877,7 +1877,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, if (ret <= 0) { /* magic error code to fall back to buffered I/O */ if (ret == -ENOTBLK) { - dio->wait_for_completion = true; + wait_for_completion = true; ret = 0; } break; @@ -1899,8 +1899,24 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, if (dio->flags & IOMAP_DIO_WRITE_FUA) dio->flags &= ~IOMAP_DIO_NEED_SYNC; + /* + * We are about to drop our additional submission reference, which + * might be the last reference to the dio. There are three three + * different ways we can progress here: + * + * (a) If this is the last reference we will always complete and free + * the dio ourselves. + * (b) If this is not the last reference, and we serve an asynchronous + * iocb, we must never touch the dio after the decrement, the + * I/O completion handler will complete and free it. + * (c) If this is not the last reference, but we serve a synchronous + * iocb, the I/O completion handler will wake us up on the drop + * of the final reference, and we will complete and free it here + * after we got woken by the I/O completion handler. + */ + dio->wait_for_completion = wait_for_completion; if (!atomic_dec_and_test(&dio->ref)) { - if (!dio->wait_for_completion) + if (!wait_for_completion) return -EIOCBQUEUED; for (;;) { @@ -1917,9 +1933,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, __set_current_state(TASK_RUNNING); } - ret = iomap_dio_complete(dio); - - return ret; + return iomap_dio_complete(dio); out_free_dio: kfree(dio); -- cgit v1.2.3 From 419bb616e9eadbcc9faf1dd58e12e2bb13fcf500 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Sat, 26 Jan 2019 20:35:18 -0800 Subject: xtensa: SMP: limit number of possible CPUs by NR_CPUS [ Upstream commit 25384ce5f9530def39421597b1457d9462df6455 ] This fixes the following warning at boot when the kernel is booted on a board with more CPU cores than was configured in NR_CPUS: smp_init_cpus: Core Count = 8 smp_init_cpus: Core Id = 0 ------------[ cut here ]------------ WARNING: CPU: 0 PID: 0 at include/linux/cpumask.h:121 smp_init_cpus+0x54/0x74 Modules linked in: CPU: 0 PID: 0 Comm: swapper Not tainted 5.0.0-rc3-00015-g1459333f88a0 #124 Call Trace: __warn$part$3+0x6a/0x7c warn_slowpath_null+0x35/0x3c smp_init_cpus+0x54/0x74 setup_arch+0x1c0/0x1d0 start_kernel+0x44/0x310 _startup+0x107/0x107 Signed-off-by: Max Filippov Signed-off-by: Sasha Levin --- arch/xtensa/kernel/smp.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/xtensa/kernel/smp.c b/arch/xtensa/kernel/smp.c index 80be6449c497..be1f280c322c 100644 --- a/arch/xtensa/kernel/smp.c +++ b/arch/xtensa/kernel/smp.c @@ -96,6 +96,11 @@ void __init smp_init_cpus(void) pr_info("%s: Core Count = %d\n", __func__, ncpus); pr_info("%s: Core Id = %d\n", __func__, core_id); + if (ncpus > NR_CPUS) { + ncpus = NR_CPUS; + pr_info("%s: limiting core count by %d\n", __func__, ncpus); + } + for (i = 0; i < ncpus; ++i) set_cpu_possible(i, true); } -- cgit v1.2.3 From c91f641625a74aa0d978f2f0c620f98b87784a03 Mon Sep 17 00:00:00 2001 From: Tomonori Sakita Date: Fri, 25 Jan 2019 11:02:22 +0900 Subject: net: altera_tse: fix msgdma_tx_completion on non-zero fill_level case [ Upstream commit 6571ebce112a21ec9be68ef2f53b96fcd41fd81b ] If fill_level was not zero and status was not BUSY, result of "tx_prod - tx_cons - inuse" might be zero. Subtracting 1 unconditionally results invalid negative return value on this case. Make sure not to return an negative value. Signed-off-by: Tomonori Sakita Signed-off-by: Atsushi Nemoto Reviewed-by: Dalon L Westergreen Acked-by: Thor Thayer Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/altera/altera_msgdma.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/altera/altera_msgdma.c b/drivers/net/ethernet/altera/altera_msgdma.c index 0fb986ba3290..0ae723f75341 100644 --- a/drivers/net/ethernet/altera/altera_msgdma.c +++ b/drivers/net/ethernet/altera/altera_msgdma.c @@ -145,7 +145,8 @@ u32 msgdma_tx_completions(struct altera_tse_private *priv) & 0xffff; if (inuse) { /* Tx FIFO is not empty */ - ready = priv->tx_prod - priv->tx_cons - inuse - 1; + ready = max_t(int, + priv->tx_prod - priv->tx_cons - inuse - 1, 0); } else { /* Check for buffered last packet */ status = csrrd32(priv->tx_dma_csr, msgdma_csroffs(status)); -- cgit v1.2.3 From 6b7d3544095519ae850989e238df17ae6bc38a0b Mon Sep 17 00:00:00 2001 From: Yonglong Liu Date: Sat, 26 Jan 2019 17:18:25 +0800 Subject: net: hns: Fix for missing of_node_put() after of_parse_phandle() [ Upstream commit 263c6d75f9a544a3c2f8f6a26de4f4808d8f59cf ] In hns enet driver, we use of_parse_handle() to get hold of the device node related to "ae-handle" but we have missed to put the node reference using of_node_put() after we are done using the node. This patch fixes it. Note: This problem is stated in Link: https://lkml.org/lkml/2018/12/22/217 Fixes: 48189d6aaf1e ("net: hns: enet specifies a reference to dsaf") Reported-by: Alexey Khoroshilov Signed-off-by: Yonglong Liu Signed-off-by: Peng Li Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/hisilicon/hns/hns_enet.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index 6242249c9f4c..b043370c2685 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -2419,6 +2419,8 @@ static int hns_nic_dev_probe(struct platform_device *pdev) out_notify_fail: (void)cancel_work_sync(&priv->service_task); out_read_prop_fail: + /* safe for ACPI FW */ + of_node_put(to_of_node(priv->fwnode)); free_netdev(ndev); return ret; } @@ -2448,6 +2450,9 @@ static int hns_nic_dev_remove(struct platform_device *pdev) set_bit(NIC_STATE_REMOVING, &priv->state); (void)cancel_work_sync(&priv->service_task); + /* safe for ACPI FW */ + of_node_put(to_of_node(priv->fwnode)); + free_netdev(ndev); return 0; } -- cgit v1.2.3 From 3e640b2cdc6ee44b9057fbcc14951b41da82eb9d Mon Sep 17 00:00:00 2001 From: Yonglong Liu Date: Sat, 26 Jan 2019 17:18:26 +0800 Subject: net: hns: Restart autoneg need return failed when autoneg off [ Upstream commit ed29ca8b9592562559c64d027fb5eb126e463e2c ] The hns driver of earlier devices, when autoneg off, restart autoneg will return -EINVAL, so make the hns driver for the latest devices do the same. Signed-off-by: Yonglong Liu Signed-off-by: Peng Li Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/hisilicon/hns/hns_ethtool.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c index 774beda040a1..e2710ff48fb0 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c @@ -1157,16 +1157,18 @@ static int hns_get_regs_len(struct net_device *net_dev) */ static int hns_nic_nway_reset(struct net_device *netdev) { - int ret = 0; struct phy_device *phy = netdev->phydev; - if (netif_running(netdev)) { - /* if autoneg is disabled, don't restart auto-negotiation */ - if (phy && phy->autoneg == AUTONEG_ENABLE) - ret = genphy_restart_aneg(phy); - } + if (!netif_running(netdev)) + return 0; - return ret; + if (!phy) + return -EOPNOTSUPP; + + if (phy->autoneg != AUTONEG_ENABLE) + return -EINVAL; + + return genphy_restart_aneg(phy); } static u32 -- cgit v1.2.3 From 4a22084f6bbc1c43910917d8162d674ae6e2f2b1 Mon Sep 17 00:00:00 2001 From: Yonglong Liu Date: Sat, 26 Jan 2019 17:18:27 +0800 Subject: net: hns: Fix wrong read accesses via Clause 45 MDIO protocol [ Upstream commit cec8abba13e6a26729dfed41019720068eeeff2b ] When reading phy registers via Clause 45 MDIO protocol, after write address operation, the driver use another write address operation, so can not read the right value of any phy registers. This patch fixes it. Signed-off-by: Yonglong Liu Signed-off-by: Peng Li Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/hisilicon/hns_mdio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns_mdio.c b/drivers/net/ethernet/hisilicon/hns_mdio.c index 017e08452d8c..baf5cc251f32 100644 --- a/drivers/net/ethernet/hisilicon/hns_mdio.c +++ b/drivers/net/ethernet/hisilicon/hns_mdio.c @@ -321,7 +321,7 @@ static int hns_mdio_read(struct mii_bus *bus, int phy_id, int regnum) } hns_mdio_cmd_write(mdio_dev, is_c45, - MDIO_C45_WRITE_ADDR, phy_id, devad); + MDIO_C45_READ, phy_id, devad); } /* Step 5: waitting for MDIO_COMMAND_REG 's mdio_start==0,*/ -- cgit v1.2.3 From 1f4ccda30391284201d732d0d360138efcfe01eb Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Sat, 26 Jan 2019 22:48:57 +0300 Subject: net: stmmac: dwmac-rk: fix error handling in rk_gmac_powerup() [ Upstream commit c69c29a1a0a8f68cd87e98ba4a5a79fb8ef2a58c ] If phy_power_on() fails in rk_gmac_powerup(), clocks are left enabled. Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Alexey Khoroshilov Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c index 7b923362ee55..3b174eae77c1 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c @@ -1342,8 +1342,10 @@ static int rk_gmac_powerup(struct rk_priv_data *bsp_priv) } ret = phy_power_on(bsp_priv, true); - if (ret) + if (ret) { + gmac_clk_enable(bsp_priv, false); return ret; + } pm_runtime_enable(dev); pm_runtime_get_sync(dev); -- cgit v1.2.3 From 3355d641269f099906b995063fdadd6785488729 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 21 Jan 2019 21:54:36 +0100 Subject: netfilter: ebtables: compat: un-break 32bit setsockopt when no rules are present [ Upstream commit 2035f3ff8eaa29cfb5c8e2160b0f6e85eeb21a95 ] Unlike ip(6)tables ebtables only counts user-defined chains. The effect is that a 32bit ebtables binary on a 64bit kernel can do 'ebtables -N FOO' only after adding at least one rule, else the request fails with -EINVAL. This is a similar fix as done in 3f1e53abff84 ("netfilter: ebtables: don't attempt to allocate 0-sized compat array"). Fixes: 7d7d7e02111e9 ("netfilter: compat: reject huge allocation requests") Reported-by: Francesco Ruggeri Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/bridge/netfilter/ebtables.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 5e55cef0cec3..6693e209efe8 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -2293,9 +2293,12 @@ static int compat_do_replace(struct net *net, void __user *user, xt_compat_lock(NFPROTO_BRIDGE); - ret = xt_compat_init_offsets(NFPROTO_BRIDGE, tmp.nentries); - if (ret < 0) - goto out_unlock; + if (tmp.nentries) { + ret = xt_compat_init_offsets(NFPROTO_BRIDGE, tmp.nentries); + if (ret < 0) + goto out_unlock; + } + ret = compat_copy_entries(entries_tmp, tmp.entries_size, &state); if (ret < 0) goto out_unlock; -- cgit v1.2.3 From eda52fa18e4f8e8764cf27bf8398789a40704187 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sun, 27 Jan 2019 22:58:00 +0100 Subject: gpio: vf610: Mask all GPIO interrupts [ Upstream commit 7ae710f9f8b2cf95297e7bbfe1c09789a7dc43d4 ] On SoC reset all GPIO interrupts are disable. However, if kexec is used to boot into a new kernel, the SoC does not experience a reset. Hence GPIO interrupts can be left enabled from the previous kernel. It is then possible for the interrupt to fire before an interrupt handler is registered, resulting in the kernel complaining of an "unexpected IRQ trap", the interrupt is never cleared, and so fires again, resulting in an interrupt storm. Disable all GPIO interrupts before registering the GPIO IRQ chip. Fixes: 7f2691a19627 ("gpio: vf610: add gpiolib/IRQ chip driver for Vybrid") Signed-off-by: Andrew Lunn Acked-by: Stefan Agner Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/gpio/gpio-vf610.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpio/gpio-vf610.c b/drivers/gpio/gpio-vf610.c index d4ad6d0e02a2..7e09ce75ffb2 100644 --- a/drivers/gpio/gpio-vf610.c +++ b/drivers/gpio/gpio-vf610.c @@ -259,6 +259,7 @@ static int vf610_gpio_probe(struct platform_device *pdev) struct vf610_gpio_port *port; struct resource *iores; struct gpio_chip *gc; + int i; int ret; port = devm_kzalloc(&pdev->dev, sizeof(*port), GFP_KERNEL); @@ -298,6 +299,10 @@ static int vf610_gpio_probe(struct platform_device *pdev) if (ret < 0) return ret; + /* Mask all GPIO interrupts */ + for (i = 0; i < gc->ngpio; i++) + vf610_gpio_writel(0, port->base + PORT_PCR(i)); + /* Clear the interrupt status register for all GPIO's */ vf610_gpio_writel(~0, port->base + PORT_ISFR); -- cgit v1.2.3 From c68cf0831b25b55b058bf180682c9bd7f476475c Mon Sep 17 00:00:00 2001 From: Fathi Boudra Date: Wed, 16 Jan 2019 11:43:18 -0600 Subject: selftests: net: use LDLIBS instead of LDFLAGS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 870f193d48c25a97d61a8e6c04e3c29a2c606850 ] reuseport_bpf_numa fails to build due to undefined reference errors: aarch64-linaro-linux-gcc --sysroot=/build/tmp-rpb-glibc/sysroots/hikey -Wall -Wl,--no-as-needed -O2 -g -I../../../../usr/include/ -Wl,-O1 -Wl,--hash-style=gnu -Wl,--as-needed -lnuma reuseport_bpf_numa.c -o /build/tmp-rpb-glibc/work/hikey-linaro-linux/kselftests/4.12-r0/linux-4.12-rc7/tools/testing/selftests/net/reuseport_bpf_numa /tmp/ccfUuExT.o: In function `send_from_node': /build/tmp-rpb-glibc/work/hikey-linaro-linux/kselftests/4.12-r0/linux-4.12-rc7/tools/testing/selftests/net/reuseport_bpf_numa.c:138: undefined reference to `numa_run_on_node' /tmp/ccfUuExT.o: In function `main': /build/tmp-rpb-glibc/work/hikey-linaro-linux/kselftests/4.12-r0/linux-4.12-rc7/tools/testing/selftests/net/reuseport_bpf_numa.c:230: undefined reference to `numa_available' /build/tmp-rpb-glibc/work/hikey-linaro-linux/kselftests/4.12-r0/linux-4.12-rc7/tools/testing/selftests/net/reuseport_bpf_numa.c:233: undefined reference to `numa_max_node' It's GNU Make and linker specific. The default Makefile rule looks like: $(CC) $(CFLAGS) $(LDFLAGS) $@ $^ $(LDLIBS) When linking is done by gcc itself, no issue, but when it needs to be passed to proper ld, only LDLIBS follows and then ld cannot know what libs to link with. More detail: https://www.gnu.org/software/make/manual/html_node/Implicit-Variables.html LDFLAGS Extra flags to give to compilers when they are supposed to invoke the linker, ‘ld’, such as -L. Libraries (-lfoo) should be added to the LDLIBS variable instead. LDLIBS Library flags or names given to compilers when they are supposed to invoke the linker, ‘ld’. LOADLIBES is a deprecated (but still supported) alternative to LDLIBS. Non-library linker flags, such as -L, should go in the LDFLAGS variable. https://lkml.org/lkml/2010/2/10/362 tools/perf: libraries must come after objects Link order matters, use LDLIBS instead of LDFLAGS to properly link against libnuma. Signed-off-by: Fathi Boudra Signed-off-by: Shuah Khan Signed-off-by: Sasha Levin --- tools/testing/selftests/net/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 919aa2ac00af..9a3764a1084e 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -18,6 +18,6 @@ TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls KSFT_KHDR_INSTALL := 1 include ../lib.mk -$(OUTPUT)/reuseport_bpf_numa: LDFLAGS += -lnuma +$(OUTPUT)/reuseport_bpf_numa: LDLIBS += -lnuma $(OUTPUT)/tcp_mmap: LDFLAGS += -lpthread $(OUTPUT)/tcp_inq: LDFLAGS += -lpthread -- cgit v1.2.3 From daf04674d08ad4962ac3e04d4a46e971c6f9c2a7 Mon Sep 17 00:00:00 2001 From: Fathi Boudra Date: Wed, 16 Jan 2019 11:43:20 -0600 Subject: selftests: timers: use LDLIBS instead of LDFLAGS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 7d4e591bc051d3382c45caaa2530969fb42ed23d ] posix_timers fails to build due to undefined reference errors: aarch64-linaro-linux-gcc --sysroot=/build/tmp-rpb-glibc/sysroots/hikey -O2 -pipe -g -feliminate-unused-debug-types -O3 -Wl,-no-as-needed -Wall -DKTEST -Wl,-O1 -Wl,--hash-style=gnu -Wl,--as-needed -lrt -lpthread posix_timers.c -o /build/tmp-rpb-glibc/work/hikey-linaro-linux/kselftests/4.12-r0/linux-4.12-rc7/tools/testing/selftests/timers/posix_timers /tmp/cc1FTZzT.o: In function `check_timer_create': /usr/src/debug/kselftests/4.12-r0/linux-4.12-rc7/tools/testing/selftests/timers/posix_timers.c:157: undefined reference to `timer_create' /usr/src/debug/kselftests/4.12-r0/linux-4.12-rc7/tools/testing/selftests/timers/posix_timers.c:170: undefined reference to `timer_settime' collect2: error: ld returned 1 exit status It's GNU Make and linker specific. The default Makefile rule looks like: $(CC) $(CFLAGS) $(LDFLAGS) $@ $^ $(LDLIBS) When linking is done by gcc itself, no issue, but when it needs to be passed to proper ld, only LDLIBS follows and then ld cannot know what libs to link with. More detail: https://www.gnu.org/software/make/manual/html_node/Implicit-Variables.html LDFLAGS Extra flags to give to compilers when they are supposed to invoke the linker, ‘ld’, such as -L. Libraries (-lfoo) should be added to the LDLIBS variable instead. LDLIBS Library flags or names given to compilers when they are supposed to invoke the linker, ‘ld’. LOADLIBES is a deprecated (but still supported) alternative to LDLIBS. Non-library linker flags, such as -L, should go in the LDFLAGS variable. https://lkml.org/lkml/2010/2/10/362 tools/perf: libraries must come after objects Link order matters, use LDLIBS instead of LDFLAGS to properly link against libpthread. Signed-off-by: Denys Dmytriyenko Signed-off-by: Fathi Boudra Signed-off-by: Shuah Khan Signed-off-by: Sasha Levin --- tools/testing/selftests/timers/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/timers/Makefile b/tools/testing/selftests/timers/Makefile index c02683cfb6c9..7656c7ce79d9 100644 --- a/tools/testing/selftests/timers/Makefile +++ b/tools/testing/selftests/timers/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 CFLAGS += -O3 -Wl,-no-as-needed -Wall -LDFLAGS += -lrt -lpthread -lm +LDLIBS += -lrt -lpthread -lm # these are all "safe" tests that don't modify # system time or require escalated privileges -- cgit v1.2.3 From 5c72ca3bf62502ace23aa6281b6dd6867417d642 Mon Sep 17 00:00:00 2001 From: Yao Liu Date: Mon, 28 Jan 2019 19:44:14 +0800 Subject: nfs: Fix NULL pointer dereference of dev_name [ Upstream commit 80ff00172407e0aad4b10b94ef0816fc3e7813cb ] There is a NULL pointer dereference of dev_name in nfs_parse_devname() The oops looks something like: BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 ... RIP: 0010:nfs_fs_mount+0x3b6/0xc20 [nfs] ... Call Trace: ? ida_alloc_range+0x34b/0x3d0 ? nfs_clone_super+0x80/0x80 [nfs] ? nfs_free_parsed_mount_data+0x60/0x60 [nfs] mount_fs+0x52/0x170 ? __init_waitqueue_head+0x3b/0x50 vfs_kern_mount+0x6b/0x170 do_mount+0x216/0xdc0 ksys_mount+0x83/0xd0 __x64_sys_mount+0x25/0x30 do_syscall_64+0x65/0x220 entry_SYSCALL_64_after_hwframe+0x49/0xbe Fix this by adding a NULL check on dev_name Signed-off-by: Yao Liu Signed-off-by: Anna Schumaker Signed-off-by: Sasha Levin --- fs/nfs/super.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 5ef2c71348bd..6b666d187907 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1906,6 +1906,11 @@ static int nfs_parse_devname(const char *dev_name, size_t len; char *end; + if (unlikely(!dev_name || !*dev_name)) { + dfprintk(MOUNT, "NFS: device name not specified\n"); + return -EINVAL; + } + /* Is the host name protected with square brakcets? */ if (*dev_name == '[') { end = strchr(++dev_name, ']'); -- cgit v1.2.3 From 88bce339de23a46999ec9e716887eb9be7a1516d Mon Sep 17 00:00:00 2001 From: Manish Chopra Date: Mon, 28 Jan 2019 10:05:04 -0800 Subject: qed: Fix bug in tx promiscuous mode settings [ Upstream commit 9e71a15d8b5bbce25c637f7f8833cd3f45b65646 ] When running tx switched traffic between VNICs created via a bridge(to which VFs are added), adapter drops the unicast packets in tx flow due to VNIC's ucast mac being unknown to it. But VF interfaces being in promiscuous mode should have caused adapter to accept all the unknown ucast packets. Later, it was found that driver doesn't really configure tx promiscuous mode settings to accept all unknown unicast macs. This patch fixes tx promiscuous mode settings to accept all unknown/unmatched unicast macs and works out the scenario. Signed-off-by: Manish Chopra Signed-off-by: Ariel Elior Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/qlogic/qed/qed_l2.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c index 67c02ea93906..b8baa6fcef8e 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c @@ -609,6 +609,10 @@ qed_sp_update_accept_mode(struct qed_hwfn *p_hwfn, (!!(accept_filter & QED_ACCEPT_MCAST_MATCHED) && !!(accept_filter & QED_ACCEPT_MCAST_UNMATCHED))); + SET_FIELD(state, ETH_VPORT_TX_MODE_UCAST_ACCEPT_ALL, + (!!(accept_filter & QED_ACCEPT_UCAST_MATCHED) && + !!(accept_filter & QED_ACCEPT_UCAST_UNMATCHED))); + SET_FIELD(state, ETH_VPORT_TX_MODE_BCAST_ACCEPT_ALL, !!(accept_filter & QED_ACCEPT_BCAST)); @@ -2688,7 +2692,8 @@ static int qed_configure_filter_rx_mode(struct qed_dev *cdev, if (type == QED_FILTER_RX_MODE_TYPE_PROMISC) { accept_flags.rx_accept_filter |= QED_ACCEPT_UCAST_UNMATCHED | QED_ACCEPT_MCAST_UNMATCHED; - accept_flags.tx_accept_filter |= QED_ACCEPT_MCAST_UNMATCHED; + accept_flags.tx_accept_filter |= QED_ACCEPT_UCAST_UNMATCHED | + QED_ACCEPT_MCAST_UNMATCHED; } else if (type == QED_FILTER_RX_MODE_TYPE_MULTI_PROMISC) { accept_flags.rx_accept_filter |= QED_ACCEPT_MCAST_UNMATCHED; accept_flags.tx_accept_filter |= QED_ACCEPT_MCAST_UNMATCHED; -- cgit v1.2.3 From 1ba3511006f1f24a3b40db8257bf6ab9d34028cf Mon Sep 17 00:00:00 2001 From: Manish Chopra Date: Mon, 28 Jan 2019 10:05:05 -0800 Subject: qed: Fix LACP pdu drops for VFs [ Upstream commit ff9296966e5e00b0d0d00477b2365a178f0f06a3 ] VF is always configured to drop control frames (with reserved mac addresses) but to work LACP on the VFs, it would require LACP control frames to be forwarded or transmitted successfully. This patch fixes this in such a way that trusted VFs (marked through ndo_set_vf_trust) would be allowed to pass the control frames such as LACP pdus. Signed-off-by: Manish Chopra Signed-off-by: Ariel Elior Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/qlogic/qed/qed_l2.c | 5 +++++ drivers/net/ethernet/qlogic/qed/qed_l2.h | 3 +++ drivers/net/ethernet/qlogic/qed/qed_sriov.c | 10 ++++++++-- 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c index b8baa6fcef8e..e68ca83ae915 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c @@ -748,6 +748,11 @@ int qed_sp_vport_update(struct qed_hwfn *p_hwfn, return rc; } + if (p_params->update_ctl_frame_check) { + p_cmn->ctl_frame_mac_check_en = p_params->mac_chk_en; + p_cmn->ctl_frame_ethtype_check_en = p_params->ethtype_chk_en; + } + /* Update mcast bins for VFs, PF doesn't use this functionality */ qed_sp_update_mcast_bin(p_hwfn, p_ramrod, p_params); diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.h b/drivers/net/ethernet/qlogic/qed/qed_l2.h index 8d80f1095d17..7127d5aaac42 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.h +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.h @@ -219,6 +219,9 @@ struct qed_sp_vport_update_params { struct qed_rss_params *rss_params; struct qed_filter_accept_flags accept_flags; struct qed_sge_tpa_params *sge_tpa_params; + u8 update_ctl_frame_check; + u8 mac_chk_en; + u8 ethtype_chk_en; }; int qed_sp_vport_update(struct qed_hwfn *p_hwfn, diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c index ca6290fa0f30..71a7af134dd8 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c +++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c @@ -1969,7 +1969,9 @@ static void qed_iov_vf_mbx_start_vport(struct qed_hwfn *p_hwfn, params.vport_id = vf->vport_id; params.max_buffers_per_cqe = start->max_buffers_per_cqe; params.mtu = vf->mtu; - params.check_mac = true; + + /* Non trusted VFs should enable control frame filtering */ + params.check_mac = !vf->p_vf_info.is_trusted_configured; rc = qed_sp_eth_vport_start(p_hwfn, ¶ms); if (rc) { @@ -5130,6 +5132,9 @@ static void qed_iov_handle_trust_change(struct qed_hwfn *hwfn) params.opaque_fid = vf->opaque_fid; params.vport_id = vf->vport_id; + params.update_ctl_frame_check = 1; + params.mac_chk_en = !vf_info->is_trusted_configured; + if (vf_info->rx_accept_mode & mask) { flags->update_rx_mode_config = 1; flags->rx_accept_filter = vf_info->rx_accept_mode; @@ -5147,7 +5152,8 @@ static void qed_iov_handle_trust_change(struct qed_hwfn *hwfn) } if (flags->update_rx_mode_config || - flags->update_tx_mode_config) + flags->update_tx_mode_config || + params.update_ctl_frame_check) qed_sp_vport_update(hwfn, ¶ms, QED_SPQ_MODE_EBLOCK, NULL); } -- cgit v1.2.3 From fac238779b1426c36b21e45489a47211e2236280 Mon Sep 17 00:00:00 2001 From: Manish Chopra Date: Mon, 28 Jan 2019 10:05:06 -0800 Subject: qed: Fix VF probe failure while FLR [ Upstream commit 327852ec64205bb651be391a069784872098a3b2 ] VFs may hit VF-PF channel timeout while probing, as in some cases it was observed that VF FLR and VF "acquire" message transaction (i.e first message from VF to PF in VF's probe flow) could occur simultaneously which could lead VF to fail sending "acquire" message to PF as VF is marked disabled from HW perspective due to FLR, which will result into channel timeout and VF probe failure. In such cases, try retrying VF "acquire" message so that in later attempts it could be successful to pass message to PF after the VF FLR is completed and can be probed successfully. Signed-off-by: Manish Chopra Signed-off-by: Ariel Elior Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/qlogic/qed/qed_vf.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.c b/drivers/net/ethernet/qlogic/qed/qed_vf.c index be118d057b92..6ab3fb008139 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_vf.c +++ b/drivers/net/ethernet/qlogic/qed/qed_vf.c @@ -261,6 +261,7 @@ static int qed_vf_pf_acquire(struct qed_hwfn *p_hwfn) struct pfvf_acquire_resp_tlv *resp = &p_iov->pf2vf_reply->acquire_resp; struct pf_vf_pfdev_info *pfdev_info = &resp->pfdev_info; struct vf_pf_resc_request *p_resc; + u8 retry_cnt = VF_ACQUIRE_THRESH; bool resources_acquired = false; struct vfpf_acquire_tlv *req; int rc = 0, attempts = 0; @@ -314,6 +315,15 @@ static int qed_vf_pf_acquire(struct qed_hwfn *p_hwfn) /* send acquire request */ rc = qed_send_msg2pf(p_hwfn, &resp->hdr.status, sizeof(*resp)); + + /* Re-try acquire in case of vf-pf hw channel timeout */ + if (retry_cnt && rc == -EBUSY) { + DP_VERBOSE(p_hwfn, QED_MSG_IOV, + "VF retrying to acquire due to VPC timeout\n"); + retry_cnt--; + continue; + } + if (rc) goto exit; -- cgit v1.2.3 From 9414e0854bbcaf42d20c4bfd5c50822f41985e00 Mon Sep 17 00:00:00 2001 From: Manish Chopra Date: Mon, 28 Jan 2019 10:05:07 -0800 Subject: qed: Fix system crash in ll2 xmit [ Upstream commit 7c81626a3c37e4ac320b8ad785694ba498f24794 ] Cache number of fragments in the skb locally as in case of linear skb (with zero fragments), tx completion (or freeing of skb) may happen before driver tries to get number of frgaments from the skb which could lead to stale access to an already freed skb. Signed-off-by: Manish Chopra Signed-off-by: Ariel Elior Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/qlogic/qed/qed_ll2.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c index 92cd8abeb41d..015de1e0addd 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c @@ -2430,19 +2430,24 @@ static int qed_ll2_start_xmit(struct qed_dev *cdev, struct sk_buff *skb, { struct qed_ll2_tx_pkt_info pkt; const skb_frag_t *frag; + u8 flags = 0, nr_frags; int rc = -EINVAL, i; dma_addr_t mapping; u16 vlan = 0; - u8 flags = 0; if (unlikely(skb->ip_summed != CHECKSUM_NONE)) { DP_INFO(cdev, "Cannot transmit a checksummed packet\n"); return -EINVAL; } - if (1 + skb_shinfo(skb)->nr_frags > CORE_LL2_TX_MAX_BDS_PER_PACKET) { + /* Cache number of fragments from SKB since SKB may be freed by + * the completion routine after calling qed_ll2_prepare_tx_packet() + */ + nr_frags = skb_shinfo(skb)->nr_frags; + + if (1 + nr_frags > CORE_LL2_TX_MAX_BDS_PER_PACKET) { DP_ERR(cdev, "Cannot transmit a packet with %d fragments\n", - 1 + skb_shinfo(skb)->nr_frags); + 1 + nr_frags); return -EINVAL; } @@ -2464,7 +2469,7 @@ static int qed_ll2_start_xmit(struct qed_dev *cdev, struct sk_buff *skb, } memset(&pkt, 0, sizeof(pkt)); - pkt.num_of_bds = 1 + skb_shinfo(skb)->nr_frags; + pkt.num_of_bds = 1 + nr_frags; pkt.vlan = vlan; pkt.bd_flags = flags; pkt.tx_dest = QED_LL2_TX_DEST_NW; @@ -2475,12 +2480,17 @@ static int qed_ll2_start_xmit(struct qed_dev *cdev, struct sk_buff *skb, test_bit(QED_LL2_XMIT_FLAGS_FIP_DISCOVERY, &xmit_flags)) pkt.remove_stag = true; + /* qed_ll2_prepare_tx_packet() may actually send the packet if + * there are no fragments in the skb and subsequently the completion + * routine may run and free the SKB, so no dereferencing the SKB + * beyond this point unless skb has any fragments. + */ rc = qed_ll2_prepare_tx_packet(&cdev->hwfns[0], cdev->ll2->handle, &pkt, 1); if (rc) goto err; - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + for (i = 0; i < nr_frags; i++) { frag = &skb_shinfo(skb)->frags[i]; mapping = skb_frag_dma_map(&cdev->pdev->dev, frag, 0, -- cgit v1.2.3 From 40e35210abeb7accf15a3463910a0cde709e62e4 Mon Sep 17 00:00:00 2001 From: Manish Chopra Date: Mon, 28 Jan 2019 10:05:08 -0800 Subject: qed: Fix stack out of bounds bug [ Upstream commit ffb057f98928aa099b08e419bbe5afc26ec9f448 ] KASAN reported following bug in qed_init_qm_get_idx_from_flags due to inappropriate casting of "pq_flags". Fix the type of "pq_flags". [ 196.624707] BUG: KASAN: stack-out-of-bounds in qed_init_qm_get_idx_from_flags+0x1a4/0x1b8 [qed] [ 196.624712] Read of size 8 at addr ffff809b00bc7360 by task kworker/0:9/1712 [ 196.624714] [ 196.624720] CPU: 0 PID: 1712 Comm: kworker/0:9 Not tainted 4.18.0-60.el8.aarch64+debug #1 [ 196.624723] Hardware name: To be filled by O.E.M. Saber/Saber, BIOS 0ACKL024 09/26/2018 [ 196.624733] Workqueue: events work_for_cpu_fn [ 196.624738] Call trace: [ 196.624742] dump_backtrace+0x0/0x2f8 [ 196.624745] show_stack+0x24/0x30 [ 196.624749] dump_stack+0xe0/0x11c [ 196.624755] print_address_description+0x68/0x260 [ 196.624759] kasan_report+0x178/0x340 [ 196.624762] __asan_report_load_n_noabort+0x38/0x48 [ 196.624786] qed_init_qm_get_idx_from_flags+0x1a4/0x1b8 [qed] [ 196.624808] qed_init_qm_info+0xec0/0x2200 [qed] [ 196.624830] qed_resc_alloc+0x284/0x7e8 [qed] [ 196.624853] qed_slowpath_start+0x6cc/0x1ae8 [qed] [ 196.624864] __qede_probe.isra.10+0x1cc/0x12c0 [qede] [ 196.624874] qede_probe+0x78/0xf0 [qede] [ 196.624879] local_pci_probe+0xc4/0x180 [ 196.624882] work_for_cpu_fn+0x54/0x98 [ 196.624885] process_one_work+0x758/0x1900 [ 196.624888] worker_thread+0x4e0/0xd18 [ 196.624892] kthread+0x2c8/0x350 [ 196.624897] ret_from_fork+0x10/0x18 [ 196.624899] [ 196.624902] Allocated by task 2: [ 196.624906] kasan_kmalloc.part.1+0x40/0x108 [ 196.624909] kasan_kmalloc+0xb4/0xc8 [ 196.624913] kasan_slab_alloc+0x14/0x20 [ 196.624916] kmem_cache_alloc_node+0x1dc/0x480 [ 196.624921] copy_process.isra.1.part.2+0x1d8/0x4a98 [ 196.624924] _do_fork+0x150/0xfa0 [ 196.624926] kernel_thread+0x48/0x58 [ 196.624930] kthreadd+0x3a4/0x5a0 [ 196.624932] ret_from_fork+0x10/0x18 [ 196.624934] [ 196.624937] Freed by task 0: [ 196.624938] (stack is not available) [ 196.624940] [ 196.624943] The buggy address belongs to the object at ffff809b00bc0000 [ 196.624943] which belongs to the cache thread_stack of size 32768 [ 196.624946] The buggy address is located 29536 bytes inside of [ 196.624946] 32768-byte region [ffff809b00bc0000, ffff809b00bc8000) [ 196.624948] The buggy address belongs to the page: [ 196.624952] page:ffff7fe026c02e00 count:1 mapcount:0 mapping:ffff809b4001c000 index:0x0 compound_mapcount: 0 [ 196.624960] flags: 0xfffff8000008100(slab|head) [ 196.624967] raw: 0fffff8000008100 dead000000000100 dead000000000200 ffff809b4001c000 [ 196.624970] raw: 0000000000000000 0000000000080008 00000001ffffffff 0000000000000000 [ 196.624973] page dumped because: kasan: bad access detected [ 196.624974] [ 196.624976] Memory state around the buggy address: [ 196.624980] ffff809b00bc7200: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 196.624983] ffff809b00bc7280: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 196.624985] >ffff809b00bc7300: 00 00 00 00 00 00 00 00 f1 f1 f1 f1 04 f2 f2 f2 [ 196.624988] ^ [ 196.624990] ffff809b00bc7380: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 196.624993] ffff809b00bc7400: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 196.624995] ================================================================== Signed-off-by: Manish Chopra Signed-off-by: Ariel Elior Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/qlogic/qed/qed_dev.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c index 2f69ee9221c6..4dd82a1612aa 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c @@ -473,19 +473,19 @@ static void qed_init_qm_pq(struct qed_hwfn *p_hwfn, /* get pq index according to PQ_FLAGS */ static u16 *qed_init_qm_get_idx_from_flags(struct qed_hwfn *p_hwfn, - u32 pq_flags) + unsigned long pq_flags) { struct qed_qm_info *qm_info = &p_hwfn->qm_info; /* Can't have multiple flags set here */ - if (bitmap_weight((unsigned long *)&pq_flags, + if (bitmap_weight(&pq_flags, sizeof(pq_flags) * BITS_PER_BYTE) > 1) { - DP_ERR(p_hwfn, "requested multiple pq flags 0x%x\n", pq_flags); + DP_ERR(p_hwfn, "requested multiple pq flags 0x%lx\n", pq_flags); goto err; } if (!(qed_get_pq_flags(p_hwfn) & pq_flags)) { - DP_ERR(p_hwfn, "pq flag 0x%x is not set\n", pq_flags); + DP_ERR(p_hwfn, "pq flag 0x%lx is not set\n", pq_flags); goto err; } -- cgit v1.2.3 From e546c8787e7b3d400dbb2aacb95b66438bcaa86e Mon Sep 17 00:00:00 2001 From: Ming Lu Date: Thu, 24 Jan 2019 13:25:42 +0800 Subject: scsi: libfc: free skb when receiving invalid flogi resp [ Upstream commit 5d8fc4a9f0eec20b6c07895022a6bea3fb6dfb38 ] The issue to be fixed in this commit is when libfc found it received a invalid FLOGI response from FC switch, it would return without freeing the fc frame, which is just the skb data. This would cause memory leak if FC switch keeps sending invalid FLOGI responses. This fix is just to make it execute `fc_frame_free(fp)` before returning from function `fc_lport_flogi_resp`. Signed-off-by: Ming Lu Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/libfc/fc_lport.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/libfc/fc_lport.c b/drivers/scsi/libfc/fc_lport.c index be83590ed955..ff943f477d6f 100644 --- a/drivers/scsi/libfc/fc_lport.c +++ b/drivers/scsi/libfc/fc_lport.c @@ -1726,14 +1726,14 @@ void fc_lport_flogi_resp(struct fc_seq *sp, struct fc_frame *fp, fc_frame_payload_op(fp) != ELS_LS_ACC) { FC_LPORT_DBG(lport, "FLOGI not accepted or bad response\n"); fc_lport_error(lport, fp); - goto err; + goto out; } flp = fc_frame_payload_get(fp, sizeof(*flp)); if (!flp) { FC_LPORT_DBG(lport, "FLOGI bad response\n"); fc_lport_error(lport, fp); - goto err; + goto out; } mfs = ntohs(flp->fl_csp.sp_bb_data) & @@ -1743,7 +1743,7 @@ void fc_lport_flogi_resp(struct fc_seq *sp, struct fc_frame *fp, FC_LPORT_DBG(lport, "FLOGI bad mfs:%hu response, " "lport->mfs:%hu\n", mfs, lport->mfs); fc_lport_error(lport, fp); - goto err; + goto out; } if (mfs <= lport->mfs) { -- cgit v1.2.3 From 8d27f830b6159c687e704e98a96133e746a9ed22 Mon Sep 17 00:00:00 2001 From: Douglas Gilbert Date: Fri, 25 Jan 2019 12:46:09 -0500 Subject: scsi: scsi_debug: fix write_same with virtual_gb problem [ Upstream commit 40d07b523cf434f252b134c86b1f8f2d907ffb0b ] The WRITE SAME(10) and (16) implementations didn't take account of the buffer wrap required when the virtual_gb parameter is greater than 0. Fix that and rename the fake_store() function to lba2fake_store() to lessen confusion with the global fake_storep pointer. Bump version date. Signed-off-by: Douglas Gilbert Reported-by: Bart Van Assche Tested by: Bart Van Assche Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/scsi_debug.c | 41 +++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index 60bcc6df97a9..65305b3848bc 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -62,7 +62,7 @@ /* make sure inq_product_rev string corresponds to this version */ #define SDEBUG_VERSION "0188" /* format to fit INQUIRY revision field */ -static const char *sdebug_version_date = "20180128"; +static const char *sdebug_version_date = "20190125"; #define MY_NAME "scsi_debug" @@ -735,7 +735,7 @@ static inline bool scsi_debug_lbp(void) (sdebug_lbpu || sdebug_lbpws || sdebug_lbpws10); } -static void *fake_store(unsigned long long lba) +static void *lba2fake_store(unsigned long long lba) { lba = do_div(lba, sdebug_store_sectors); @@ -2514,8 +2514,8 @@ static int do_device_access(struct scsi_cmnd *scmd, u32 sg_skip, u64 lba, return ret; } -/* If fake_store(lba,num) compares equal to arr(num), then copy top half of - * arr into fake_store(lba,num) and return true. If comparison fails then +/* If lba2fake_store(lba,num) compares equal to arr(num), then copy top half of + * arr into lba2fake_store(lba,num) and return true. If comparison fails then * return false. */ static bool comp_write_worker(u64 lba, u32 num, const u8 *arr) { @@ -2643,7 +2643,7 @@ static int prot_verify_read(struct scsi_cmnd *SCpnt, sector_t start_sec, if (sdt->app_tag == cpu_to_be16(0xffff)) continue; - ret = dif_verify(sdt, fake_store(sector), sector, ei_lba); + ret = dif_verify(sdt, lba2fake_store(sector), sector, ei_lba); if (ret) { dif_errors++; return ret; @@ -3261,10 +3261,12 @@ err_out: static int resp_write_same(struct scsi_cmnd *scp, u64 lba, u32 num, u32 ei_lba, bool unmap, bool ndob) { + int ret; unsigned long iflags; unsigned long long i; - int ret; - u64 lba_off; + u32 lb_size = sdebug_sector_size; + u64 block, lbaa; + u8 *fs1p; ret = check_device_access_params(scp, lba, num); if (ret) @@ -3276,31 +3278,30 @@ static int resp_write_same(struct scsi_cmnd *scp, u64 lba, u32 num, unmap_region(lba, num); goto out; } - - lba_off = lba * sdebug_sector_size; + lbaa = lba; + block = do_div(lbaa, sdebug_store_sectors); /* if ndob then zero 1 logical block, else fetch 1 logical block */ + fs1p = fake_storep + (block * lb_size); if (ndob) { - memset(fake_storep + lba_off, 0, sdebug_sector_size); + memset(fs1p, 0, lb_size); ret = 0; } else - ret = fetch_to_dev_buffer(scp, fake_storep + lba_off, - sdebug_sector_size); + ret = fetch_to_dev_buffer(scp, fs1p, lb_size); if (-1 == ret) { write_unlock_irqrestore(&atomic_rw, iflags); return DID_ERROR << 16; - } else if (sdebug_verbose && !ndob && (ret < sdebug_sector_size)) + } else if (sdebug_verbose && !ndob && (ret < lb_size)) sdev_printk(KERN_INFO, scp->device, "%s: %s: lb size=%u, IO sent=%d bytes\n", - my_name, "write same", - sdebug_sector_size, ret); + my_name, "write same", lb_size, ret); /* Copy first sector to remaining blocks */ - for (i = 1 ; i < num ; i++) - memcpy(fake_storep + ((lba + i) * sdebug_sector_size), - fake_storep + lba_off, - sdebug_sector_size); - + for (i = 1 ; i < num ; i++) { + lbaa = lba + i; + block = do_div(lbaa, sdebug_store_sectors); + memmove(fake_storep + (block * lb_size), fs1p, lb_size); + } if (scsi_debug_lbp()) map_region(lba, num); out: -- cgit v1.2.3 From 59411d98fb10c6897f9170bd17ad4e34d01209b9 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 24 Jan 2019 13:29:40 +0300 Subject: scsi: bnx2fc: Fix error handling in probe() [ Upstream commit b2d3492fc591b1fb46b81d79ca1fc44cac6ae0ae ] There are two issues here. First if cmgr->hba is not set early enough then it leads to a NULL dereference. Second if we don't completely initialize cmgr->io_bdt_pool[] then we end up dereferencing uninitialized pointers. Fixes: 853e2bd2103a ("[SCSI] bnx2fc: Broadcom FCoE offload driver") Signed-off-by: Dan Carpenter Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/bnx2fc/bnx2fc_io.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/bnx2fc/bnx2fc_io.c b/drivers/scsi/bnx2fc/bnx2fc_io.c index 350257c13a5b..bc9f2a2365f4 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_io.c +++ b/drivers/scsi/bnx2fc/bnx2fc_io.c @@ -240,6 +240,7 @@ struct bnx2fc_cmd_mgr *bnx2fc_cmd_mgr_alloc(struct bnx2fc_hba *hba) return NULL; } + cmgr->hba = hba; cmgr->free_list = kcalloc(arr_sz, sizeof(*cmgr->free_list), GFP_KERNEL); if (!cmgr->free_list) { @@ -256,7 +257,6 @@ struct bnx2fc_cmd_mgr *bnx2fc_cmd_mgr_alloc(struct bnx2fc_hba *hba) goto mem_err; } - cmgr->hba = hba; cmgr->cmds = (struct bnx2fc_cmd **)(cmgr + 1); for (i = 0; i < arr_sz; i++) { @@ -295,7 +295,7 @@ struct bnx2fc_cmd_mgr *bnx2fc_cmd_mgr_alloc(struct bnx2fc_hba *hba) /* Allocate pool of io_bdts - one for each bnx2fc_cmd */ mem_size = num_ios * sizeof(struct io_bdt *); - cmgr->io_bdt_pool = kmalloc(mem_size, GFP_KERNEL); + cmgr->io_bdt_pool = kzalloc(mem_size, GFP_KERNEL); if (!cmgr->io_bdt_pool) { printk(KERN_ERR PFX "failed to alloc io_bdt_pool\n"); goto mem_err; -- cgit v1.2.3 From bdf1bd01327d19956addc81c380ceb711d5c2fe9 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 24 Jan 2019 13:33:27 +0300 Subject: scsi: 53c700: pass correct "dev" to dma_alloc_attrs() [ Upstream commit 8437fcf14deed67e5ad90b5e8abf62fb20f30881 ] The "hostdata->dev" pointer is NULL here. We set "hostdata->dev = dev;" later in the function and we also use "hostdata->dev" when we call dma_free_attrs() in NCR_700_release(). This bug predates git version control. Signed-off-by: Dan Carpenter Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/53c700.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/53c700.c b/drivers/scsi/53c700.c index 6be77b3aa8a5..ac79f2088b31 100644 --- a/drivers/scsi/53c700.c +++ b/drivers/scsi/53c700.c @@ -295,7 +295,7 @@ NCR_700_detect(struct scsi_host_template *tpnt, if(tpnt->sdev_attrs == NULL) tpnt->sdev_attrs = NCR_700_dev_attrs; - memory = dma_alloc_attrs(hostdata->dev, TOTAL_MEM_SIZE, &pScript, + memory = dma_alloc_attrs(dev, TOTAL_MEM_SIZE, &pScript, GFP_KERNEL, DMA_ATTR_NON_CONSISTENT); if(memory == NULL) { printk(KERN_ERR "53c700: Failed to allocate memory for driver, detaching\n"); -- cgit v1.2.3 From 25f37d15f080a3562c651f82c256c136de21e610 Mon Sep 17 00:00:00 2001 From: Sinan Kaya Date: Thu, 24 Jan 2019 19:31:00 +0000 Subject: platform/x86: Fix unmet dependency warning for ACPI_CMPC [ Upstream commit d58bf90a32a33becec78c3034e781735049fcd25 ] Add BACKLIGHT_LCD_SUPPORT for ACPI_CMPC to fix the warning: unmet direct dependencies detected for BACKLIGHT_CLASS_DEVICE. ACPI_CMPC selects BACKLIGHT_CLASS_DEVICE but BACKLIGHT_CLASS_DEVICE depends on BACKLIGHT_LCD_SUPPORT. Copy BACKLIGHT_LCD_SUPPORT dependency into ACPI_CMPC to fix WARNING: unmet direct dependencies detected for BACKLIGHT_CLASS_DEVICE Depends on [n]: HAS_IOMEM [=y] && BACKLIGHT_LCD_SUPPORT [=n] Selected by [y]: - ACPI_CMPC [=y] && X86 [=y] && X86_PLATFORM_DEVICES [=y] && ACPI [=y] && INPUT [=y] && (RFKILL [=n] || RFKILL [=n]=n) Signed-off-by: Sinan Kaya Acked-by: Andy Shevchenko Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/platform/x86/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index 0c1aa6c314f5..96afca4648d5 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -856,6 +856,7 @@ config TOSHIBA_WMI config ACPI_CMPC tristate "CMPC Laptop Extras" depends on ACPI && INPUT + depends on BACKLIGHT_LCD_SUPPORT depends on RFKILL || RFKILL=n select BACKLIGHT_CLASS_DEVICE help -- cgit v1.2.3 From 38be5d445c5b5b2bfc84a1fa1c4a6d26860490db Mon Sep 17 00:00:00 2001 From: Sinan Kaya Date: Thu, 24 Jan 2019 19:31:01 +0000 Subject: platform/x86: Fix unmet dependency warning for SAMSUNG_Q10 [ Upstream commit 0ee4b5f801b73b83a9fb3921d725f2162fd4a2e5 ] Add BACKLIGHT_LCD_SUPPORT for SAMSUNG_Q10 to fix the warning: unmet direct dependencies detected for BACKLIGHT_CLASS_DEVICE. SAMSUNG_Q10 selects BACKLIGHT_CLASS_DEVICE but BACKLIGHT_CLASS_DEVICE depends on BACKLIGHT_LCD_SUPPORT. Copy BACKLIGHT_LCD_SUPPORT dependency into SAMSUNG_Q10 to fix: WARNING: unmet direct dependencies detected for BACKLIGHT_CLASS_DEVICE Depends on [n]: HAS_IOMEM [=y] && BACKLIGHT_LCD_SUPPORT [=n] Selected by [y]: - SAMSUNG_Q10 [=y] && X86 [=y] && X86_PLATFORM_DEVICES [=y] && ACPI [=y] Signed-off-by: Sinan Kaya Acked-by: Andy Shevchenko Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/platform/x86/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index 96afca4648d5..7563c07e14e4 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -1078,6 +1078,7 @@ config INTEL_OAKTRAIL config SAMSUNG_Q10 tristate "Samsung Q10 Extras" depends on ACPI + depends on BACKLIGHT_LCD_SUPPORT select BACKLIGHT_CLASS_DEVICE ---help--- This driver provides support for backlight control on Samsung Q10 -- cgit v1.2.3 From bd13a72a537af1ddf2ae744df8a9193e64f18495 Mon Sep 17 00:00:00 2001 From: Harini Katakam Date: Tue, 29 Jan 2019 15:20:03 +0530 Subject: net: macb: Apply RXUBR workaround only to versions with errata [ Upstream commit e501070e4db0b67a4c17a5557d1e9d098f3db310 ] The interrupt handler contains a workaround for RX hang applicable to Zynq and AT91RM9200 only. Subsequent versions do not need this workaround. This workaround unnecessarily resets RX whenever RX used bit read is observed, which can be often under heavy traffic. There is no other action performed on RX UBR interrupt. Hence introduce a CAPS mask; enable this interrupt and workaround only on affected versions. Signed-off-by: Harini Katakam Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/cadence/macb.h | 3 +++ drivers/net/ethernet/cadence/macb_main.c | 28 +++++++++++++++++----------- 2 files changed, 20 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h index 3d45f4c92cf6..9bbaad9f3d63 100644 --- a/drivers/net/ethernet/cadence/macb.h +++ b/drivers/net/ethernet/cadence/macb.h @@ -643,6 +643,7 @@ #define MACB_CAPS_JUMBO 0x00000020 #define MACB_CAPS_GEM_HAS_PTP 0x00000040 #define MACB_CAPS_BD_RD_PREFETCH 0x00000080 +#define MACB_CAPS_NEEDS_RSTONUBR 0x00000100 #define MACB_CAPS_FIFO_MODE 0x10000000 #define MACB_CAPS_GIGABIT_MODE_AVAILABLE 0x20000000 #define MACB_CAPS_SG_DISABLED 0x40000000 @@ -1214,6 +1215,8 @@ struct macb { int rx_bd_rd_prefetch; int tx_bd_rd_prefetch; + + u32 rx_intr_mask; }; #ifdef CONFIG_MACB_USE_HWSTAMP diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 8f4b2f9a8e07..8abea1c3844f 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -56,8 +56,7 @@ /* level of occupied TX descriptors under which we wake up TX process */ #define MACB_TX_WAKEUP_THRESH(bp) (3 * (bp)->tx_ring_size / 4) -#define MACB_RX_INT_FLAGS (MACB_BIT(RCOMP) | MACB_BIT(RXUBR) \ - | MACB_BIT(ISR_ROVR)) +#define MACB_RX_INT_FLAGS (MACB_BIT(RCOMP) | MACB_BIT(ISR_ROVR)) #define MACB_TX_ERR_FLAGS (MACB_BIT(ISR_TUND) \ | MACB_BIT(ISR_RLE) \ | MACB_BIT(TXERR)) @@ -1271,7 +1270,7 @@ static int macb_poll(struct napi_struct *napi, int budget) queue_writel(queue, ISR, MACB_BIT(RCOMP)); napi_reschedule(napi); } else { - queue_writel(queue, IER, MACB_RX_INT_FLAGS); + queue_writel(queue, IER, bp->rx_intr_mask); } } @@ -1289,7 +1288,7 @@ static void macb_hresp_error_task(unsigned long data) u32 ctrl; for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) { - queue_writel(queue, IDR, MACB_RX_INT_FLAGS | + queue_writel(queue, IDR, bp->rx_intr_mask | MACB_TX_INT_FLAGS | MACB_BIT(HRESP)); } @@ -1319,7 +1318,7 @@ static void macb_hresp_error_task(unsigned long data) /* Enable interrupts */ queue_writel(queue, IER, - MACB_RX_INT_FLAGS | + bp->rx_intr_mask | MACB_TX_INT_FLAGS | MACB_BIT(HRESP)); } @@ -1373,14 +1372,14 @@ static irqreturn_t macb_interrupt(int irq, void *dev_id) (unsigned int)(queue - bp->queues), (unsigned long)status); - if (status & MACB_RX_INT_FLAGS) { + if (status & bp->rx_intr_mask) { /* There's no point taking any more interrupts * until we have processed the buffers. The * scheduling call may fail if the poll routine * is already scheduled, so disable interrupts * now. */ - queue_writel(queue, IDR, MACB_RX_INT_FLAGS); + queue_writel(queue, IDR, bp->rx_intr_mask); if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE) queue_writel(queue, ISR, MACB_BIT(RCOMP)); @@ -1413,8 +1412,9 @@ static irqreturn_t macb_interrupt(int irq, void *dev_id) /* There is a hardware issue under heavy load where DMA can * stop, this causes endless "used buffer descriptor read" * interrupts but it can be cleared by re-enabling RX. See - * the at91 manual, section 41.3.1 or the Zynq manual - * section 16.7.4 for details. + * the at91rm9200 manual, section 41.3.1 or the Zynq manual + * section 16.7.4 for details. RXUBR is only enabled for + * these two versions. */ if (status & MACB_BIT(RXUBR)) { ctrl = macb_readl(bp, NCR); @@ -2264,7 +2264,7 @@ static void macb_init_hw(struct macb *bp) /* Enable interrupts */ queue_writel(queue, IER, - MACB_RX_INT_FLAGS | + bp->rx_intr_mask | MACB_TX_INT_FLAGS | MACB_BIT(HRESP)); } @@ -3912,6 +3912,7 @@ static const struct macb_config sama5d4_config = { }; static const struct macb_config emac_config = { + .caps = MACB_CAPS_NEEDS_RSTONUBR, .clk_init = at91ether_clk_init, .init = at91ether_init, }; @@ -3933,7 +3934,8 @@ static const struct macb_config zynqmp_config = { }; static const struct macb_config zynq_config = { - .caps = MACB_CAPS_GIGABIT_MODE_AVAILABLE | MACB_CAPS_NO_GIGABIT_HALF, + .caps = MACB_CAPS_GIGABIT_MODE_AVAILABLE | MACB_CAPS_NO_GIGABIT_HALF | + MACB_CAPS_NEEDS_RSTONUBR, .dma_burst_length = 16, .clk_init = macb_clk_init, .init = macb_init, @@ -4088,6 +4090,10 @@ static int macb_probe(struct platform_device *pdev) macb_dma_desc_get_size(bp); } + bp->rx_intr_mask = MACB_RX_INT_FLAGS; + if (bp->caps & MACB_CAPS_NEEDS_RSTONUBR) + bp->rx_intr_mask |= MACB_BIT(RXUBR); + mac = of_get_mac_address(np); if (mac) { ether_addr_copy(bp->dev->dev_addr, mac); -- cgit v1.2.3 From 15fb5d73e47e89171a38962a23cc39c213e40500 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Thu, 3 Jan 2019 23:44:11 -0600 Subject: x86/boot/compressed/64: Set EFER.LME=1 in 32-bit trampoline before returning to long mode [ Upstream commit b677dfae5aa197afc5191755a76a8727ffca538a ] In some old AMD KVM implementation, guest's EFER.LME bit is cleared by KVM when the hypervsior detects that the guest sets CR0.PG to 0. This causes the guest OS to reboot when it tries to return from 32-bit trampoline code because the CPU is in incorrect state: CR4.PAE=1, CR0.PG=1, CS.L=1, but EFER.LME=0. As a precaution, set EFER.LME=1 as part of long mode activation procedure. This extra step won't cause any harm when Linux is booted on a bare-metal machine. Signed-off-by: Wei Huang Signed-off-by: Thomas Gleixner Acked-by: Kirill A. Shutemov Cc: bp@alien8.de Cc: hpa@zytor.com Link: https://lkml.kernel.org/r/20190104054411.12489-1-wei@redhat.com Signed-off-by: Sasha Levin --- arch/x86/boot/compressed/head_64.S | 8 ++++++++ arch/x86/boot/compressed/pgtable.h | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 64037895b085..f105ae8651c9 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -600,6 +600,14 @@ ENTRY(trampoline_32bit_src) leal TRAMPOLINE_32BIT_PGTABLE_OFFSET(%ecx), %eax movl %eax, %cr3 3: + /* Set EFER.LME=1 as a precaution in case hypervsior pulls the rug */ + pushl %ecx + movl $MSR_EFER, %ecx + rdmsr + btsl $_EFER_LME, %eax + wrmsr + popl %ecx + /* Enable PAE and LA57 (if required) paging modes */ movl $X86_CR4_PAE, %eax cmpl $0, %edx diff --git a/arch/x86/boot/compressed/pgtable.h b/arch/x86/boot/compressed/pgtable.h index 91f75638f6e6..6ff7e81b5628 100644 --- a/arch/x86/boot/compressed/pgtable.h +++ b/arch/x86/boot/compressed/pgtable.h @@ -6,7 +6,7 @@ #define TRAMPOLINE_32BIT_PGTABLE_OFFSET 0 #define TRAMPOLINE_32BIT_CODE_OFFSET PAGE_SIZE -#define TRAMPOLINE_32BIT_CODE_SIZE 0x60 +#define TRAMPOLINE_32BIT_CODE_SIZE 0x70 #define TRAMPOLINE_32BIT_STACK_END TRAMPOLINE_32BIT_SIZE -- cgit v1.2.3 From 6efd69d63339904fce3095723e470fd8f86a49a8 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Tue, 29 Jan 2019 12:46:16 +1000 Subject: cifs: fix computation for MAX_SMB2_HDR_SIZE [ Upstream commit 58d15ed1203f4d858c339ea4d7dafa94bd2a56d3 ] The size of the fixed part of the create response is 88 bytes not 56. Signed-off-by: Ronnie Sahlberg Signed-off-by: Steve French Reviewed-by: Pavel Shilovsky Signed-off-by: Sasha Levin --- fs/cifs/smb2pdu.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index 8fb7887f2b3d..437257d1116f 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -84,8 +84,8 @@ #define NUMBER_OF_SMB2_COMMANDS 0x0013 -/* 4 len + 52 transform hdr + 64 hdr + 56 create rsp */ -#define MAX_SMB2_HDR_SIZE 0x00b0 +/* 52 transform hdr + 64 hdr + 88 create rsp */ +#define MAX_SMB2_HDR_SIZE 204 #define SMB2_PROTO_NUMBER cpu_to_le32(0x424d53fe) #define SMB2_TRANSFORM_PROTO_NUM cpu_to_le32(0x424d53fd) -- cgit v1.2.3 From f964a4d2bb68662c1e3f60f80ede0c90455c1c3c Mon Sep 17 00:00:00 2001 From: Thomas Lendacky Date: Thu, 31 Jan 2019 14:33:06 +0000 Subject: x86/microcode/amd: Don't falsely trick the late loading mechanism [ Upstream commit 912139cfbfa6a2bc1da052314d2c29338dae1f6a ] The load_microcode_amd() function searches for microcode patches and attempts to apply a microcode patch if it is of different level than the currently installed level. While the processor won't actually load a level that is less than what is already installed, the logic wrongly returns UCODE_NEW thus signaling to its caller reload_store() that a late loading should be attempted. If the file-system contains an older microcode revision than what is currently running, such a late microcode reload can result in these misleading messages: x86/CPU: CPU features have changed after loading microcode, but might not take effect. x86/CPU: Please consider either early loading through initrd/built-in or a potential BIOS update. These messages were issued on a system where SME/SEV are not enabled by the BIOS (MSR C001_0010[23] = 0b) because during boot, early_detect_mem_encrypt() is called and cleared the SME and SEV features in this case. However, after the wrong late load attempt, get_cpu_cap() is called and reloads the SME and SEV feature bits, resulting in the messages. Update the microcode level check to not attempt microcode loading if the current level is greater than(!) and not only equal to the current patch level. [ bp: massage commit message. ] Fixes: 2613f36ed965 ("x86/microcode: Attempt late loading only when new microcode is present") Signed-off-by: Tom Lendacky Signed-off-by: Borislav Petkov Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Thomas Gleixner Cc: x86-ml Link: https://lkml.kernel.org/r/154894518427.9406.8246222496874202773.stgit@tlendack-t1.amdoffice.net Signed-off-by: Sasha Levin --- arch/x86/kernel/cpu/microcode/amd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 07b5fc00b188..a4e7e100ed26 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -707,7 +707,7 @@ load_microcode_amd(bool save, u8 family, const u8 *data, size_t size) if (!p) { return ret; } else { - if (boot_cpu_data.microcode == p->patch_id) + if (boot_cpu_data.microcode >= p->patch_id) return ret; ret = UCODE_NEW; -- cgit v1.2.3 From d92d3caf818dd9b78bdbef179d68647305a01678 Mon Sep 17 00:00:00 2001 From: James Morse Date: Thu, 24 Jan 2019 16:32:55 +0000 Subject: arm64: kprobe: Always blacklist the KVM world-switch code [ Upstream commit f2b3d8566d81deaca31f4e3163def0bea7746e11 ] On systems with VHE the kernel and KVM's world-switch code run at the same exception level. Code that is only used on a VHE system does not need to be annotated as __hyp_text as it can reside anywhere in the kernel text. __hyp_text was also used to prevent kprobes from patching breakpoint instructions into this region, as this code runs at a different exception level. While this is no longer true with VHE, KVM still switches VBAR_EL1, meaning a kprobe's breakpoint executed in the world-switch code will cause a hyp-panic. Move the __hyp_text check in the kprobes blacklist so it applies on VHE systems too, to cover the common code and guest enter/exit assembly. Fixes: 888b3c8720e0 ("arm64: Treat all entry code as non-kprobe-able") Reviewed-by: Christoffer Dall Signed-off-by: James Morse Acked-by: Masami Hiramatsu Signed-off-by: Will Deacon Signed-off-by: Sasha Levin --- arch/arm64/kernel/probes/kprobes.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index b5a367d4bba6..30bb13797034 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -478,13 +478,13 @@ bool arch_within_kprobe_blacklist(unsigned long addr) addr < (unsigned long)__entry_text_end) || (addr >= (unsigned long)__idmap_text_start && addr < (unsigned long)__idmap_text_end) || + (addr >= (unsigned long)__hyp_text_start && + addr < (unsigned long)__hyp_text_end) || !!search_exception_tables(addr)) return true; if (!is_kernel_in_hyp_mode()) { - if ((addr >= (unsigned long)__hyp_text_start && - addr < (unsigned long)__hyp_text_end) || - (addr >= (unsigned long)__hyp_idmap_text_start && + if ((addr >= (unsigned long)__hyp_idmap_text_start && addr < (unsigned long)__hyp_idmap_text_end)) return true; } -- cgit v1.2.3 From d7807b62276efdb49d9ade2af39fcc409c568f7b Mon Sep 17 00:00:00 2001 From: John Johansen Date: Thu, 24 Jan 2019 13:53:05 -0800 Subject: apparmor: Fix aa_label_build() error handling for failed merges [ Upstream commit d6d478aee003e19ef90321176552a8ad2929a47f ] aa_label_merge() can return NULL for memory allocations failures make sure to handle and set the correct error in this case. Reported-by: Peng Hao Signed-off-by: John Johansen Signed-off-by: Sasha Levin --- security/apparmor/domain.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c index 08c88de0ffda..11975ec8d566 100644 --- a/security/apparmor/domain.c +++ b/security/apparmor/domain.c @@ -1444,7 +1444,10 @@ check: new = aa_label_merge(label, target, GFP_KERNEL); if (IS_ERR_OR_NULL(new)) { info = "failed to build target label"; - error = PTR_ERR(new); + if (!new) + error = -ENOMEM; + else + error = PTR_ERR(new); new = NULL; perms.allow = 0; goto audit; -- cgit v1.2.3 From e2b45446c34f3f8ae31575235a09781e67ad4f03 Mon Sep 17 00:00:00 2001 From: Kairui Song Date: Fri, 18 Jan 2019 19:13:08 +0800 Subject: x86/kexec: Don't setup EFI info if EFI runtime is not enabled [ Upstream commit 2aa958c99c7fd3162b089a1a56a34a0cdb778de1 ] Kexec-ing a kernel with "efi=noruntime" on the first kernel's command line causes the following null pointer dereference: BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 #PF error: [normal kernel read fault] Call Trace: efi_runtime_map_copy+0x28/0x30 bzImage64_load+0x688/0x872 arch_kexec_kernel_image_load+0x6d/0x70 kimage_file_alloc_init+0x13e/0x220 __x64_sys_kexec_file_load+0x144/0x290 do_syscall_64+0x55/0x1a0 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Just skip the EFI info setup if EFI runtime services are not enabled. [ bp: Massage commit message. ] Suggested-by: Dave Young Signed-off-by: Kairui Song Signed-off-by: Borislav Petkov Acked-by: Dave Young Cc: AKASHI Takahiro Cc: Andrew Morton Cc: Ard Biesheuvel Cc: bhe@redhat.com Cc: David Howells Cc: erik.schmauss@intel.com Cc: fanc.fnst@cn.fujitsu.com Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: kexec@lists.infradead.org Cc: lenb@kernel.org Cc: linux-acpi@vger.kernel.org Cc: Philipp Rudo Cc: rafael.j.wysocki@intel.com Cc: robert.moore@intel.com Cc: Thomas Gleixner Cc: x86-ml Cc: Yannik Sembritzki Link: https://lkml.kernel.org/r/20190118111310.29589-2-kasong@redhat.com Signed-off-by: Sasha Levin --- arch/x86/kernel/kexec-bzimage64.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c index 278cd07228dd..9490a2845f14 100644 --- a/arch/x86/kernel/kexec-bzimage64.c +++ b/arch/x86/kernel/kexec-bzimage64.c @@ -167,6 +167,9 @@ setup_efi_state(struct boot_params *params, unsigned long params_load_addr, struct efi_info *current_ei = &boot_params.efi_info; struct efi_info *ei = ¶ms->efi_info; + if (!efi_enabled(EFI_RUNTIME_SERVICES)) + return 0; + if (!current_ei->efi_memmap_size) return 0; -- cgit v1.2.3 From 845d73be1b4cba6c353e99f79eff0f17e0d7c1d8 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Fri, 1 Feb 2019 14:20:01 -0800 Subject: proc: fix /proc/net/* after setns(2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 1fde6f21d90f8ba5da3cb9c54ca991ed72696c43 ] /proc entries under /proc/net/* can't be cached into dcache because setns(2) can change current net namespace. [akpm@linux-foundation.org: coding-style fixes] [akpm@linux-foundation.org: avoid vim miscolorization] [adobriyan@gmail.com: write test, add dummy ->d_revalidate hook: necessary if /proc/net/* is pinned at setns time] Link: http://lkml.kernel.org/r/20190108192350.GA12034@avx2 Link: http://lkml.kernel.org/r/20190107162336.GA9239@avx2 Fixes: 1da4d377f943fe4194ffb9fb9c26cc58fad4dd24 ("proc: revalidate misc dentries") Signed-off-by: Alexey Dobriyan Reported-by: Mateusz Stępień Reported-by: Ahmad Fatoum Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/proc/generic.c | 4 +- fs/proc/internal.h | 1 + fs/proc/proc_net.c | 20 +++++ tools/testing/selftests/proc/.gitignore | 1 + tools/testing/selftests/proc/Makefile | 1 + tools/testing/selftests/proc/setns-dcache.c | 129 ++++++++++++++++++++++++++++ 6 files changed, 155 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/proc/setns-dcache.c diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 8ae109429a88..e39bac94dead 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -256,7 +256,7 @@ struct dentry *proc_lookup_de(struct inode *dir, struct dentry *dentry, inode = proc_get_inode(dir->i_sb, de); if (!inode) return ERR_PTR(-ENOMEM); - d_set_d_op(dentry, &proc_misc_dentry_ops); + d_set_d_op(dentry, de->proc_dops); return d_splice_alias(inode, dentry); } read_unlock(&proc_subdir_lock); @@ -429,6 +429,8 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent, INIT_LIST_HEAD(&ent->pde_openers); proc_set_user(ent, (*parent)->uid, (*parent)->gid); + ent->proc_dops = &proc_misc_dentry_ops; + out: return ent; } diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 5185d7f6a51e..95b14196f284 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -44,6 +44,7 @@ struct proc_dir_entry { struct completion *pde_unload_completion; const struct inode_operations *proc_iops; const struct file_operations *proc_fops; + const struct dentry_operations *proc_dops; union { const struct seq_operations *seq_ops; int (*single_show)(struct seq_file *, void *); diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index d5e0fcb3439e..a7b12435519e 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c @@ -38,6 +38,22 @@ static struct net *get_proc_net(const struct inode *inode) return maybe_get_net(PDE_NET(PDE(inode))); } +static int proc_net_d_revalidate(struct dentry *dentry, unsigned int flags) +{ + return 0; +} + +static const struct dentry_operations proc_net_dentry_ops = { + .d_revalidate = proc_net_d_revalidate, + .d_delete = always_delete_dentry, +}; + +static void pde_force_lookup(struct proc_dir_entry *pde) +{ + /* /proc/net/ entries can be changed under us by setns(CLONE_NEWNET) */ + pde->proc_dops = &proc_net_dentry_ops; +} + static int seq_open_net(struct inode *inode, struct file *file) { unsigned int state_size = PDE(inode)->state_size; @@ -90,6 +106,7 @@ struct proc_dir_entry *proc_create_net_data(const char *name, umode_t mode, p = proc_create_reg(name, mode, &parent, data); if (!p) return NULL; + pde_force_lookup(p); p->proc_fops = &proc_net_seq_fops; p->seq_ops = ops; p->state_size = state_size; @@ -133,6 +150,7 @@ struct proc_dir_entry *proc_create_net_data_write(const char *name, umode_t mode p = proc_create_reg(name, mode, &parent, data); if (!p) return NULL; + pde_force_lookup(p); p->proc_fops = &proc_net_seq_fops; p->seq_ops = ops; p->state_size = state_size; @@ -181,6 +199,7 @@ struct proc_dir_entry *proc_create_net_single(const char *name, umode_t mode, p = proc_create_reg(name, mode, &parent, data); if (!p) return NULL; + pde_force_lookup(p); p->proc_fops = &proc_net_single_fops; p->single_show = show; return proc_register(parent, p); @@ -223,6 +242,7 @@ struct proc_dir_entry *proc_create_net_single_write(const char *name, umode_t mo p = proc_create_reg(name, mode, &parent, data); if (!p) return NULL; + pde_force_lookup(p); p->proc_fops = &proc_net_single_fops; p->single_show = show; p->write = write; diff --git a/tools/testing/selftests/proc/.gitignore b/tools/testing/selftests/proc/.gitignore index 82121a81681f..29bac5ef9a93 100644 --- a/tools/testing/selftests/proc/.gitignore +++ b/tools/testing/selftests/proc/.gitignore @@ -10,4 +10,5 @@ /proc-uptime-002 /read /self +/setns-dcache /thread-self diff --git a/tools/testing/selftests/proc/Makefile b/tools/testing/selftests/proc/Makefile index 1c12c34cf85d..434d033ee067 100644 --- a/tools/testing/selftests/proc/Makefile +++ b/tools/testing/selftests/proc/Makefile @@ -14,6 +14,7 @@ TEST_GEN_PROGS += proc-uptime-001 TEST_GEN_PROGS += proc-uptime-002 TEST_GEN_PROGS += read TEST_GEN_PROGS += self +TEST_GEN_PROGS += setns-dcache TEST_GEN_PROGS += thread-self include ../lib.mk diff --git a/tools/testing/selftests/proc/setns-dcache.c b/tools/testing/selftests/proc/setns-dcache.c new file mode 100644 index 000000000000..60ab197a73fc --- /dev/null +++ b/tools/testing/selftests/proc/setns-dcache.c @@ -0,0 +1,129 @@ +/* + * Copyright © 2019 Alexey Dobriyan + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +/* + * Test that setns(CLONE_NEWNET) points to new /proc/net content even + * if old one is in dcache. + * + * FIXME /proc/net/unix is under CONFIG_UNIX which can be disabled. + */ +#undef NDEBUG +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static pid_t pid = -1; + +static void f(void) +{ + if (pid > 0) { + kill(pid, SIGTERM); + } +} + +int main(void) +{ + int fd[2]; + char _ = 0; + int nsfd; + + atexit(f); + + /* Check for priviledges and syscall availability straight away. */ + if (unshare(CLONE_NEWNET) == -1) { + if (errno == ENOSYS || errno == EPERM) { + return 4; + } + return 1; + } + /* Distinguisher between two otherwise empty net namespaces. */ + if (socket(AF_UNIX, SOCK_STREAM, 0) == -1) { + return 1; + } + + if (pipe(fd) == -1) { + return 1; + } + + pid = fork(); + if (pid == -1) { + return 1; + } + + if (pid == 0) { + if (unshare(CLONE_NEWNET) == -1) { + return 1; + } + + if (write(fd[1], &_, 1) != 1) { + return 1; + } + + pause(); + + return 0; + } + + if (read(fd[0], &_, 1) != 1) { + return 1; + } + + { + char buf[64]; + snprintf(buf, sizeof(buf), "/proc/%u/ns/net", pid); + nsfd = open(buf, O_RDONLY); + if (nsfd == -1) { + return 1; + } + } + + /* Reliably pin dentry into dcache. */ + (void)open("/proc/net/unix", O_RDONLY); + + if (setns(nsfd, CLONE_NEWNET) == -1) { + return 1; + } + + kill(pid, SIGTERM); + pid = 0; + + { + char buf[4096]; + ssize_t rv; + int fd; + + fd = open("/proc/net/unix", O_RDONLY); + if (fd == -1) { + return 1; + } + +#define S "Num RefCount Protocol Flags Type St Inode Path\n" + rv = read(fd, buf, sizeof(buf)); + + assert(rv == strlen(S)); + assert(memcmp(buf, S, strlen(S)) == 0); + } + + return 0; +} -- cgit v1.2.3 From 5edeae210cb8c6245cd42ef1c0b4d338c9661cdd Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Fri, 1 Feb 2019 14:20:20 -0800 Subject: x86_64: increase stack size for KASAN_EXTRA [ Upstream commit a8e911d13540487942d53137c156bd7707f66e5d ] If the kernel is configured with KASAN_EXTRA, the stack size is increasted significantly because this option sets "-fstack-reuse" to "none" in GCC [1]. As a result, it triggers stack overrun quite often with 32k stack size compiled using GCC 8. For example, this reproducer https://github.com/linux-test-project/ltp/blob/master/testcases/kernel/syscalls/madvise/madvise06.c triggers a "corrupted stack end detected inside scheduler" very reliably with CONFIG_SCHED_STACK_END_CHECK enabled. There are just too many functions that could have a large stack with KASAN_EXTRA due to large local variables that have been called over and over again without being able to reuse the stacks. Some noticiable ones are size 7648 shrink_page_list 3584 xfs_rmap_convert 3312 migrate_page_move_mapping 3312 dev_ethtool 3200 migrate_misplaced_transhuge_page 3168 copy_process There are other 49 functions are over 2k in size while compiling kernel with "-Wframe-larger-than=" even with a related minimal config on this machine. Hence, it is too much work to change Makefiles for each object to compile without "-fsanitize-address-use-after-scope" individually. [1] https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81715#c23 Although there is a patch in GCC 9 to help the situation, GCC 9 probably won't be released in a few months and then it probably take another 6-month to 1-year for all major distros to include it as a default. Hence, the stack usage with KASAN_EXTRA can be revisited again in 2020 when GCC 9 is everywhere. Until then, this patch will help users avoid stack overrun. This has already been fixed for arm64 for the same reason via 6e8830674ea ("arm64: kasan: Increase stack size for KASAN_EXTRA"). Link: http://lkml.kernel.org/r/20190109215209.2903-1-cai@lca.pw Signed-off-by: Qian Cai Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Dmitry Vyukov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- arch/x86/include/asm/page_64_types.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index b99d497e342d..0b6352aabbd3 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h @@ -7,7 +7,11 @@ #endif #ifdef CONFIG_KASAN +#ifdef CONFIG_KASAN_EXTRA +#define KASAN_STACK_ORDER 2 +#else #define KASAN_STACK_ORDER 1 +#endif #else #define KASAN_STACK_ORDER 0 #endif -- cgit v1.2.3 From 6027792d6aa888c6d5ae51df81388111a55a0ce1 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 1 Feb 2019 14:20:34 -0800 Subject: mm, memory_hotplug: is_mem_section_removable do not pass the end of a zone [ Upstream commit efad4e475c312456edb3c789d0996d12ed744c13 ] Patch series "mm, memory_hotplug: fix uninitialized pages fallouts", v2. Mikhail Zaslonko has posted fixes for the two bugs quite some time ago [1]. I have pushed back on those fixes because I believed that it is much better to plug the problem at the initialization time rather than play whack-a-mole all over the hotplug code and find all the places which expect the full memory section to be initialized. We have ended up with commit 2830bf6f05fb ("mm, memory_hotplug: initialize struct pages for the full memory section") merged and cause a regression [2][3]. The reason is that there might be memory layouts when two NUMA nodes share the same memory section so the merged fix is simply incorrect. In order to plug this hole we really have to be zone range aware in those handlers. I have split up the original patch into two. One is unchanged (patch 2) and I took a different approach for `removable' crash. [1] http://lkml.kernel.org/r/20181105150401.97287-2-zaslonko@linux.ibm.com [2] https://bugzilla.redhat.com/show_bug.cgi?id=1666948 [3] http://lkml.kernel.org/r/20190125163938.GA20411@dhcp22.suse.cz This patch (of 2): Mikhail has reported the following VM_BUG_ON triggered when reading sysfs removable state of a memory block: page:000003d08300c000 is uninitialized and poisoned page dumped because: VM_BUG_ON_PAGE(PagePoisoned(p)) Call Trace: is_mem_section_removable+0xb4/0x190 show_mem_removable+0x9a/0xd8 dev_attr_show+0x34/0x70 sysfs_kf_seq_show+0xc8/0x148 seq_read+0x204/0x480 __vfs_read+0x32/0x178 vfs_read+0x82/0x138 ksys_read+0x5a/0xb0 system_call+0xdc/0x2d8 Last Breaking-Event-Address: is_mem_section_removable+0xb4/0x190 Kernel panic - not syncing: Fatal exception: panic_on_oops The reason is that the memory block spans the zone boundary and we are stumbling over an unitialized struct page. Fix this by enforcing zone range in is_mem_section_removable so that we never run away from a zone. Link: http://lkml.kernel.org/r/20190128144506.15603-2-mhocko@kernel.org Signed-off-by: Michal Hocko Reported-by: Mikhail Zaslonko Debugged-by: Mikhail Zaslonko Tested-by: Gerald Schaefer Tested-by: Mikhail Gavrilov Reviewed-by: Oscar Salvador Cc: Pavel Tatashin Cc: Heiko Carstens Cc: Martin Schwidefsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- mm/memory_hotplug.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index c6119ad3561e..34cde04f346d 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1258,7 +1258,8 @@ static bool is_pageblock_removable_nolock(struct page *page) bool is_mem_section_removable(unsigned long start_pfn, unsigned long nr_pages) { struct page *page = pfn_to_page(start_pfn); - struct page *end_page = page + nr_pages; + unsigned long end_pfn = min(start_pfn + nr_pages, zone_end_pfn(page_zone(page))); + struct page *end_page = pfn_to_page(end_pfn); /* Check the starting page of each pageblock within the range */ for (; page < end_page; page = next_active_pageblock(page)) { -- cgit v1.2.3 From 71df1c8bc771b4cf9d066ad0afdfb0e5a16e2c0a Mon Sep 17 00:00:00 2001 From: Mikhail Zaslonko Date: Fri, 1 Feb 2019 14:20:38 -0800 Subject: mm, memory_hotplug: test_pages_in_a_zone do not pass the end of zone [ Upstream commit 24feb47c5fa5b825efb0151f28906dfdad027e61 ] If memory end is not aligned with the sparse memory section boundary, the mapping of such a section is only partly initialized. This may lead to VM_BUG_ON due to uninitialized struct pages access from test_pages_in_a_zone() function triggered by memory_hotplug sysfs handlers. Here are the the panic examples: CONFIG_DEBUG_VM_PGFLAGS=y kernel parameter mem=2050M -------------------------- page:000003d082008000 is uninitialized and poisoned page dumped because: VM_BUG_ON_PAGE(PagePoisoned(p)) Call Trace: test_pages_in_a_zone+0xde/0x160 show_valid_zones+0x5c/0x190 dev_attr_show+0x34/0x70 sysfs_kf_seq_show+0xc8/0x148 seq_read+0x204/0x480 __vfs_read+0x32/0x178 vfs_read+0x82/0x138 ksys_read+0x5a/0xb0 system_call+0xdc/0x2d8 Last Breaking-Event-Address: test_pages_in_a_zone+0xde/0x160 Kernel panic - not syncing: Fatal exception: panic_on_oops Fix this by checking whether the pfn to check is within the zone. [mhocko@suse.com: separated this change from http://lkml.kernel.org/r/20181105150401.97287-2-zaslonko@linux.ibm.com] Link: http://lkml.kernel.org/r/20190128144506.15603-3-mhocko@kernel.org [mhocko@suse.com: separated this change from http://lkml.kernel.org/r/20181105150401.97287-2-zaslonko@linux.ibm.com] Signed-off-by: Michal Hocko Signed-off-by: Mikhail Zaslonko Tested-by: Mikhail Gavrilov Reviewed-by: Oscar Salvador Tested-by: Gerald Schaefer Cc: Heiko Carstens Cc: Martin Schwidefsky Cc: Mikhail Gavrilov Cc: Pavel Tatashin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- mm/memory_hotplug.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 34cde04f346d..ff93a57e1694 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1299,6 +1299,9 @@ int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn, i++; if (i == MAX_ORDER_NR_PAGES || pfn + i >= end_pfn) continue; + /* Check if we got outside of the zone */ + if (zone && !zone_spans_pfn(zone, pfn + i)) + return 0; page = pfn_to_page(pfn + i); if (zone && page_zone(page) != zone) return 0; -- cgit v1.2.3 From f1524fd049dec225ca8ae5585fced14359f348e6 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 1 Feb 2019 14:20:58 -0800 Subject: lib/test_kmod.c: potential double free in error handling [ Upstream commit db7ddeab3ce5d64c9696e70d61f45ea9909cd196 ] There is a copy and paste bug so we set "config->test_driver" to NULL twice instead of setting "config->test_fs". Smatch complains that it leads to a double free: lib/test_kmod.c:840 __kmod_config_init() warn: 'config->test_fs' double freed Link: http://lkml.kernel.org/r/20190121140011.GA14283@kadam Fixes: d9c6a72d6fa2 ("kmod: add test driver to stress test the module loader") Signed-off-by: Dan Carpenter Acked-by: Luis Chamberlain Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- lib/test_kmod.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/test_kmod.c b/lib/test_kmod.c index d82d022111e0..9cf77628fc91 100644 --- a/lib/test_kmod.c +++ b/lib/test_kmod.c @@ -632,7 +632,7 @@ static void __kmod_config_free(struct test_config *config) config->test_driver = NULL; kfree_const(config->test_fs); - config->test_driver = NULL; + config->test_fs = NULL; } static void kmod_config_free(struct kmod_test_device *test_dev) -- cgit v1.2.3 From 3d0acc076f5ffb40d28d1210a6b987034d17b46c Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 1 Feb 2019 14:21:23 -0800 Subject: fs/drop_caches.c: avoid softlockups in drop_pagecache_sb() [ Upstream commit c27d82f52f75fc9d8d9d40d120d2a96fdeeada5e ] When superblock has lots of inodes without any pagecache (like is the case for /proc), drop_pagecache_sb() will iterate through all of them without dropping sb->s_inode_list_lock which can lead to softlockups (one of our customers hit this). Fix the problem by going to the slow path and doing cond_resched() in case the process needs rescheduling. Link: http://lkml.kernel.org/r/20190114085343.15011-1-jack@suse.cz Signed-off-by: Jan Kara Acked-by: Michal Hocko Reviewed-by: Andrew Morton Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/drop_caches.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/drop_caches.c b/fs/drop_caches.c index 82377017130f..d31b6c72b476 100644 --- a/fs/drop_caches.c +++ b/fs/drop_caches.c @@ -21,8 +21,13 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused) spin_lock(&sb->s_inode_list_lock); list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { spin_lock(&inode->i_lock); + /* + * We must skip inodes in unusual state. We may also skip + * inodes without pages but we deliberately won't in case + * we need to reschedule to avoid softlockups. + */ if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) || - (inode->i_mapping->nrpages == 0)) { + (inode->i_mapping->nrpages == 0 && !need_resched())) { spin_unlock(&inode->i_lock); continue; } @@ -30,6 +35,7 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused) spin_unlock(&inode->i_lock); spin_unlock(&sb->s_inode_list_lock); + cond_resched(); invalidate_mapping_pages(inode->i_mapping, 0, -1); iput(toput_inode); toput_inode = inode; -- cgit v1.2.3 From 1efb234ec25193233d71997c769f6504d283c671 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Fri, 1 Feb 2019 14:21:26 -0800 Subject: autofs: drop dentry reference only when it is never used [ Upstream commit 63ce5f552beb9bdb41546b3a26c4374758b21815 ] autofs_expire_run() calls dput(dentry) to drop the reference count of dentry. However, dentry is read via autofs_dentry_ino(dentry) after that. This may result in a use-free-bug. The patch drops the reference count of dentry only when it is never used. Link: http://lkml.kernel.org/r/154725122396.11260.16053424107144453867.stgit@pluto-themaw-net Signed-off-by: Pan Bian Signed-off-by: Ian Kent Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/autofs/expire.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/autofs/expire.c b/fs/autofs/expire.c index d441244b79df..28d9c2b1b3bb 100644 --- a/fs/autofs/expire.c +++ b/fs/autofs/expire.c @@ -596,7 +596,6 @@ int autofs_expire_run(struct super_block *sb, pkt.len = dentry->d_name.len; memcpy(pkt.name, dentry->d_name.name, pkt.len); pkt.name[pkt.len] = '\0'; - dput(dentry); if (copy_to_user(pkt_p, &pkt, sizeof(struct autofs_packet_expire))) ret = -EFAULT; @@ -609,6 +608,8 @@ int autofs_expire_run(struct super_block *sb, complete_all(&ino->expire_complete); spin_unlock(&sbi->fs_lock); + dput(dentry); + return ret; } -- cgit v1.2.3 From 929278903367a8a9ca7b701b4d0af9a6ba5110a3 Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Fri, 1 Feb 2019 14:21:29 -0800 Subject: autofs: fix error return in autofs_fill_super() [ Upstream commit f585b283e3f025754c45bbe7533fc6e5c4643700 ] In autofs_fill_super() on error of get inode/make root dentry the return should be ENOMEM as this is the only failure case of the called functions. Link: http://lkml.kernel.org/r/154725123240.11260.796773942606871359.stgit@pluto-themaw-net Signed-off-by: Ian Kent Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/autofs/inode.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c index 846c052569dd..3c14a8e45ffb 100644 --- a/fs/autofs/inode.c +++ b/fs/autofs/inode.c @@ -255,8 +255,10 @@ int autofs_fill_super(struct super_block *s, void *data, int silent) } root_inode = autofs_get_inode(s, S_IFDIR | 0755); root = d_make_root(root_inode); - if (!root) + if (!root) { + ret = -ENOMEM; goto fail_ino; + } pipe = NULL; root->d_fsdata = ino; -- cgit v1.2.3 From 2cc84e2ea68d4c54f5a1058ec05303a616fe1b17 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 20 Feb 2019 22:20:46 -0800 Subject: mm, memory_hotplug: fix off-by-one in is_pageblock_removable [ Upstream commit 891cb2a72d821f930a39d5900cb7a3aa752c1d5b ] Rong Chen has reported the following boot crash: PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP PTI CPU: 1 PID: 239 Comm: udevd Not tainted 5.0.0-rc4-00149-gefad4e4 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/01/2014 RIP: 0010:page_mapping+0x12/0x80 Code: 5d c3 48 89 df e8 0e ad 02 00 85 c0 75 da 89 e8 5b 5d c3 0f 1f 44 00 00 53 48 89 fb 48 8b 43 08 48 8d 50 ff a8 01 48 0f 45 da <48> 8b 53 08 48 8d 42 ff 83 e2 01 48 0f 44 c3 48 83 38 ff 74 2f 48 RSP: 0018:ffff88801fa87cd8 EFLAGS: 00010202 RAX: ffffffffffffffff RBX: fffffffffffffffe RCX: 000000000000000a RDX: fffffffffffffffe RSI: ffffffff820b9a20 RDI: ffff88801e5c0000 RBP: 6db6db6db6db6db7 R08: ffff88801e8bb000 R09: 0000000001b64d13 R10: ffff88801fa87cf8 R11: 0000000000000001 R12: ffff88801e640000 R13: ffffffff820b9a20 R14: ffff88801f145258 R15: 0000000000000001 FS: 00007fb2079817c0(0000) GS:ffff88801dd00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000006 CR3: 000000001fa82000 CR4: 00000000000006a0 Call Trace: __dump_page+0x14/0x2c0 is_mem_section_removable+0x24c/0x2c0 removable_show+0x87/0xa0 dev_attr_show+0x25/0x60 sysfs_kf_seq_show+0xba/0x110 seq_read+0x196/0x3f0 __vfs_read+0x34/0x180 vfs_read+0xa0/0x150 ksys_read+0x44/0xb0 do_syscall_64+0x5e/0x4a0 entry_SYSCALL_64_after_hwframe+0x49/0xbe and bisected it down to commit efad4e475c31 ("mm, memory_hotplug: is_mem_section_removable do not pass the end of a zone"). The reason for the crash is that the mapping is garbage for poisoned (uninitialized) page. This shouldn't happen as all pages in the zone's boundary should be initialized. Later debugging revealed that the actual problem is an off-by-one when evaluating the end_page. 'start_pfn + nr_pages' resp 'zone_end_pfn' refers to a pfn after the range and as such it might belong to a differen memory section. This along with CONFIG_SPARSEMEM then makes the loop condition completely bogus because a pointer arithmetic doesn't work for pages from two different sections in that memory model. Fix the issue by reworking is_pageblock_removable to be pfn based and only use struct page where necessary. This makes the code slightly easier to follow and we will remove the problematic pointer arithmetic completely. Link: http://lkml.kernel.org/r/20190218181544.14616-1-mhocko@kernel.org Fixes: efad4e475c31 ("mm, memory_hotplug: is_mem_section_removable do not pass the end of a zone") Signed-off-by: Michal Hocko Reported-by: Tested-by: Acked-by: Mike Rapoport Reviewed-by: Oscar Salvador Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- mm/memory_hotplug.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index ff93a57e1694..156991edec2a 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1213,11 +1213,13 @@ static inline int pageblock_free(struct page *page) return PageBuddy(page) && page_order(page) >= pageblock_order; } -/* Return the start of the next active pageblock after a given page */ -static struct page *next_active_pageblock(struct page *page) +/* Return the pfn of the start of the next active pageblock after a given pfn */ +static unsigned long next_active_pageblock(unsigned long pfn) { + struct page *page = pfn_to_page(pfn); + /* Ensure the starting page is pageblock-aligned */ - BUG_ON(page_to_pfn(page) & (pageblock_nr_pages - 1)); + BUG_ON(pfn & (pageblock_nr_pages - 1)); /* If the entire pageblock is free, move to the end of free page */ if (pageblock_free(page)) { @@ -1225,16 +1227,16 @@ static struct page *next_active_pageblock(struct page *page) /* be careful. we don't have locks, page_order can be changed.*/ order = page_order(page); if ((order < MAX_ORDER) && (order >= pageblock_order)) - return page + (1 << order); + return pfn + (1 << order); } - return page + pageblock_nr_pages; + return pfn + pageblock_nr_pages; } -static bool is_pageblock_removable_nolock(struct page *page) +static bool is_pageblock_removable_nolock(unsigned long pfn) { + struct page *page = pfn_to_page(pfn); struct zone *zone; - unsigned long pfn; /* * We have to be careful here because we are iterating over memory @@ -1257,13 +1259,14 @@ static bool is_pageblock_removable_nolock(struct page *page) /* Checks if this range of memory is likely to be hot-removable. */ bool is_mem_section_removable(unsigned long start_pfn, unsigned long nr_pages) { - struct page *page = pfn_to_page(start_pfn); - unsigned long end_pfn = min(start_pfn + nr_pages, zone_end_pfn(page_zone(page))); - struct page *end_page = pfn_to_page(end_pfn); + unsigned long end_pfn, pfn; + + end_pfn = min(start_pfn + nr_pages, + zone_end_pfn(page_zone(pfn_to_page(start_pfn)))); /* Check the starting page of each pageblock within the range */ - for (; page < end_page; page = next_active_pageblock(page)) { - if (!is_pageblock_removable_nolock(page)) + for (pfn = start_pfn; pfn < end_pfn; pfn = next_active_pageblock(pfn)) { + if (!is_pageblock_removable_nolock(pfn)) return false; cond_resched(); } -- cgit v1.2.3 From b510051c73a799e6d43a08c182f4f78b3bcac2bf Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Sun, 23 Dec 2018 20:24:13 +0200 Subject: ARM: OMAP: dts: N950/N9: fix onenand timings [ Upstream commit 8443e4843e1c2594bf5664e1d993a1be71d1befb ] Commit a758f50f10cf ("mtd: onenand: omap2: Configure driver from DT") started using DT specified timings for GPMC, and as a result the OneNAND stopped working on N950/N9 as we had wrong values in the DT. Fix by updating the values to bootloader timings that have been tested to be working on both Nokia N950 and N9. Fixes: a758f50f10cf ("mtd: onenand: omap2: Configure driver from DT") Signed-off-by: Aaro Koskinen Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin --- arch/arm/boot/dts/omap3-n950-n9.dtsi | 42 ++++++++++++++++++++++++------------ 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/arch/arm/boot/dts/omap3-n950-n9.dtsi b/arch/arm/boot/dts/omap3-n950-n9.dtsi index 0d9b85317529..e142e6c70a59 100644 --- a/arch/arm/boot/dts/omap3-n950-n9.dtsi +++ b/arch/arm/boot/dts/omap3-n950-n9.dtsi @@ -370,6 +370,19 @@ compatible = "ti,omap2-onenand"; reg = <0 0 0x20000>; /* CS0, offset 0, IO size 128K */ + /* + * These timings are based on CONFIG_OMAP_GPMC_DEBUG=y reported + * bootloader set values when booted with v4.19 using both N950 + * and N9 devices (OneNAND Manufacturer: Samsung): + * + * gpmc cs0 before gpmc_cs_program_settings: + * cs0 GPMC_CS_CONFIG1: 0xfd001202 + * cs0 GPMC_CS_CONFIG2: 0x00181800 + * cs0 GPMC_CS_CONFIG3: 0x00030300 + * cs0 GPMC_CS_CONFIG4: 0x18001804 + * cs0 GPMC_CS_CONFIG5: 0x03171d1d + * cs0 GPMC_CS_CONFIG6: 0x97080000 + */ gpmc,sync-read; gpmc,sync-write; gpmc,burst-length = <16>; @@ -379,26 +392,27 @@ gpmc,device-width = <2>; gpmc,mux-add-data = <2>; gpmc,cs-on-ns = <0>; - gpmc,cs-rd-off-ns = <87>; - gpmc,cs-wr-off-ns = <87>; + gpmc,cs-rd-off-ns = <122>; + gpmc,cs-wr-off-ns = <122>; gpmc,adv-on-ns = <0>; - gpmc,adv-rd-off-ns = <10>; - gpmc,adv-wr-off-ns = <10>; - gpmc,oe-on-ns = <15>; - gpmc,oe-off-ns = <87>; + gpmc,adv-rd-off-ns = <15>; + gpmc,adv-wr-off-ns = <15>; + gpmc,oe-on-ns = <20>; + gpmc,oe-off-ns = <122>; gpmc,we-on-ns = <0>; - gpmc,we-off-ns = <87>; - gpmc,rd-cycle-ns = <112>; - gpmc,wr-cycle-ns = <112>; - gpmc,access-ns = <81>; + gpmc,we-off-ns = <122>; + gpmc,rd-cycle-ns = <148>; + gpmc,wr-cycle-ns = <148>; + gpmc,access-ns = <117>; gpmc,page-burst-access-ns = <15>; gpmc,bus-turnaround-ns = <0>; gpmc,cycle2cycle-delay-ns = <0>; gpmc,wait-monitoring-ns = <0>; - gpmc,clk-activation-ns = <5>; - gpmc,wr-data-mux-bus-ns = <30>; - gpmc,wr-access-ns = <81>; - gpmc,sync-clk-ps = <15000>; + gpmc,clk-activation-ns = <10>; + gpmc,wr-data-mux-bus-ns = <40>; + gpmc,wr-access-ns = <117>; + + gpmc,sync-clk-ps = <15000>; /* TBC; Where this value came? */ /* * MTD partition table corresponding to Nokia's MeeGo 1.2 -- cgit v1.2.3 From ec27bcd3f688abff5b02769760763fe5b358ceae Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Mon, 7 Jan 2019 09:52:43 -0800 Subject: ARM: dts: omap4-droid4: Fix typo in cpcap IRQ flags [ Upstream commit ef4a55b9197a8f844ea0663138e902dcce3e2f36 ] We're now getting the following error: genirq: Setting trigger mode 1 for irq 230 failed (regmap_irq_set_type+0x0/0x15c) cpcap-usb-phy cpcap-usb-phy.0: could not get irq dp: -524 Cc: Sebastian Reichel Reported-by: Pavel Machek Tested-by: Pavel Machek Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin --- arch/arm/boot/dts/motorola-cpcap-mapphone.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/motorola-cpcap-mapphone.dtsi b/arch/arm/boot/dts/motorola-cpcap-mapphone.dtsi index ddc7a7bb33c0..f57acf8f66b9 100644 --- a/arch/arm/boot/dts/motorola-cpcap-mapphone.dtsi +++ b/arch/arm/boot/dts/motorola-cpcap-mapphone.dtsi @@ -105,7 +105,7 @@ interrupts-extended = < &cpcap 15 0 &cpcap 14 0 &cpcap 28 0 &cpcap 19 0 &cpcap 18 0 &cpcap 17 0 &cpcap 16 0 &cpcap 49 0 - &cpcap 48 1 + &cpcap 48 0 >; interrupt-names = "id_ground", "id_float", "se0conn", "vbusvld", -- cgit v1.2.3 From 094ed50726a28d4797e16742a1c1fd9c3f578d20 Mon Sep 17 00:00:00 2001 From: Jernej Skrabec Date: Tue, 8 Jan 2019 20:18:40 +0100 Subject: ARM: dts: sun8i: h3: Add ethernet0 alias to Beelink X2 [ Upstream commit cc4bddade114b696ab27c1a77cfc7040151306da ] Because "ethernet0" alias is missing, U-Boot doesn't generate board specific MAC address. Effect of this is random MAC address every boot and thus new IP address is assigned to the board. Fix this by adding alias. Fixes: 7389172fc3ed ("ARM: dts: sun8i: h3: Enable dwmac-sun8i on the Beelink X2") Signed-off-by: Jernej Skrabec [Maxime: Removed unneeded comment] Signed-off-by: Maxime Ripard Signed-off-by: Sasha Levin --- arch/arm/boot/dts/sun8i-h3-beelink-x2.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/sun8i-h3-beelink-x2.dts b/arch/arm/boot/dts/sun8i-h3-beelink-x2.dts index 5d23667dc2d2..25540b7694d5 100644 --- a/arch/arm/boot/dts/sun8i-h3-beelink-x2.dts +++ b/arch/arm/boot/dts/sun8i-h3-beelink-x2.dts @@ -53,7 +53,7 @@ aliases { serial0 = &uart0; - /* ethernet0 is the H3 emac, defined in sun8i-h3.dtsi */ + ethernet0 = &emac; ethernet1 = &sdiowifi; }; -- cgit v1.2.3 From 94f31b4c9780593bb97433a335e53bd5f7b6bba6 Mon Sep 17 00:00:00 2001 From: Carlo Caione Date: Fri, 7 Dec 2018 10:52:31 +0000 Subject: arm: dts: meson: Fix IRQ trigger type for macirq [ Upstream commit e35e26b26e955c53e61c154ba26b9bb15da6b858 ] A long running stress test on a custom board shipping an AXG SoCs and a Realtek RTL8211F PHY revealed that after a few hours the connection speed would drop drastically, from ~1000Mbps to ~3Mbps. At the same time the 'macirq' (eth0) IRQ would stop being triggered at all and as consequence the GMAC IRQs never ACKed. After a painful investigation the problem seemed to be due to a wrong defined IRQ type for the GMAC IRQ that should be LEVEL_HIGH instead of EDGE_RISING. The change in the macirq IRQ type also solved another long standing issue affecting this SoC/PHY where EEE was causing the network connection to die after stressing it with iperf3 (even though much sooner). It's now possible to remove the 'eee-broken-1000t' quirk as well. Fixes: 9c15795a4f96 ("ARM: dts: meson8b-odroidc1: ethernet support") Signed-off-by: Carlo Caione Reviewed-by: Martin Blumenstingl Tested-by: Martin Blumenstingl Signed-off-by: Kevin Hilman Signed-off-by: Sasha Levin --- arch/arm/boot/dts/meson.dtsi | 2 +- arch/arm/boot/dts/meson8b-odroidc1.dts | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/meson.dtsi b/arch/arm/boot/dts/meson.dtsi index 0d9faf1a51ea..a86b89086334 100644 --- a/arch/arm/boot/dts/meson.dtsi +++ b/arch/arm/boot/dts/meson.dtsi @@ -263,7 +263,7 @@ compatible = "amlogic,meson6-dwmac", "snps,dwmac"; reg = <0xc9410000 0x10000 0xc1108108 0x4>; - interrupts = ; + interrupts = ; interrupt-names = "macirq"; status = "disabled"; }; diff --git a/arch/arm/boot/dts/meson8b-odroidc1.dts b/arch/arm/boot/dts/meson8b-odroidc1.dts index ef3177d3da3d..9e48936e2d8d 100644 --- a/arch/arm/boot/dts/meson8b-odroidc1.dts +++ b/arch/arm/boot/dts/meson8b-odroidc1.dts @@ -125,7 +125,6 @@ /* Realtek RTL8211F (0x001cc916) */ eth_phy: ethernet-phy@0 { reg = <0>; - eee-broken-1000t; interrupt-parent = <&gpio_intc>; /* GPIOH_3 */ interrupts = <17 IRQ_TYPE_LEVEL_LOW>; -- cgit v1.2.3 From ab2f85e5969379726c88a1dda24aab12f7cee5bb Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sat, 29 Dec 2018 13:57:09 +0100 Subject: ARM: dts: meson8b: odroidc1: mark the SD card detection GPIO active-low [ Upstream commit 3fb348e030319f20ebbde082a449d4bf8a96f2fd ] After commit 89a5e15bcba87d ("gpio/mmc/of: Respect polarity in the device tree") SD cards are not detected anymore. The CD GPIO is "active low" on Odroid-C1. The MMC dt-bindings specify: "[...] using the "cd-inverted" property means, that the CD line is active high, i.e. it is high, when a card is inserted". Fix the description of the SD card by marking it as GPIO_ACTIVE_LOW and drop the "cd-inverted" property. This makes the definition consistent with the existing dt-bindings and fixes the check whether an SD card is inserted. Fixes: e03efbce6bebf5 ("ARM: dts: meson8b-odroidc1: add microSD support") Signed-off-by: Martin Blumenstingl Reviewed-by: Linus Walleij Tested-by: Anand Moon Signed-off-by: Kevin Hilman Signed-off-by: Sasha Levin --- arch/arm/boot/dts/meson8b-odroidc1.dts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/meson8b-odroidc1.dts b/arch/arm/boot/dts/meson8b-odroidc1.dts index 9e48936e2d8d..8fdeeffecbdb 100644 --- a/arch/arm/boot/dts/meson8b-odroidc1.dts +++ b/arch/arm/boot/dts/meson8b-odroidc1.dts @@ -171,8 +171,7 @@ cap-sd-highspeed; disable-wp; - cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_HIGH>; - cd-inverted; + cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_LOW>; vmmc-supply = <&tflash_vdd>; vqmmc-supply = <&tf_io>; -- cgit v1.2.3 From 3edbed0dd13f290083671631af6b39c10a3d1a7e Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sat, 29 Dec 2018 13:57:11 +0100 Subject: ARM: dts: meson8m2: mxiii-plus: mark the SD card detection GPIO active-low [ Upstream commit 8615f5596335db0978cea593dcd0070dc5f8b116 ] After commit 89a5e15bcba87d ("gpio/mmc/of: Respect polarity in the device tree") SD cards are not detected anymore. The CD GPIO is "active low" on the MXIII-Plus. The MMC dt-bindings specify: "[...] using the "cd-inverted" property means, that the CD line is active high, i.e. it is high, when a card is inserted". Fix the description of the SD card by marking it as GPIO_ACTIVE_LOW and drop the "cd-inverted" property. This makes the definition consistent with the existing dt-bindings and fixes the check whether an SD card is inserted. Fixes: 35ee52bea66c74 ("ARM: dts: meson8m2: add support for the Tronsmart MXIII Plus") Signed-off-by: Martin Blumenstingl Reviewed-by: Linus Walleij Signed-off-by: Kevin Hilman Signed-off-by: Sasha Levin --- arch/arm/boot/dts/meson8m2-mxiii-plus.dts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/meson8m2-mxiii-plus.dts b/arch/arm/boot/dts/meson8m2-mxiii-plus.dts index f5853610b20b..6ac02beb5fa7 100644 --- a/arch/arm/boot/dts/meson8m2-mxiii-plus.dts +++ b/arch/arm/boot/dts/meson8m2-mxiii-plus.dts @@ -206,8 +206,7 @@ cap-sd-highspeed; disable-wp; - cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_HIGH>; - cd-inverted; + cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_LOW>; vmmc-supply = <&vcc_3v3>; }; -- cgit v1.2.3 From f02070cb4b603b36ff2a3343c2429b279c9f8084 Mon Sep 17 00:00:00 2001 From: Anson Huang Date: Sat, 29 Dec 2018 10:01:18 +0000 Subject: ARM: dts: imx6sx: correct backward compatible of gpt [ Upstream commit ba0f4560526ba19300c07ed5a3c1df7592815dc6 ] i.MX6SX has same GPT type as i.MX6DL, in GPT driver, it uses below TIMER_OF_DECLARE, so the backward compatible should be "fsl,imx6dl-gpt", correct it. TIMER_OF_DECLARE(imx6sx_timer, "fsl,imx6sx-gpt", imx6dl_timer_init_dt); Signed-off-by: Anson Huang Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm/boot/dts/imx6sx.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/imx6sx.dtsi b/arch/arm/boot/dts/imx6sx.dtsi index 844caa39364f..50083cecc6c9 100644 --- a/arch/arm/boot/dts/imx6sx.dtsi +++ b/arch/arm/boot/dts/imx6sx.dtsi @@ -462,7 +462,7 @@ }; gpt: gpt@2098000 { - compatible = "fsl,imx6sx-gpt", "fsl,imx31-gpt"; + compatible = "fsl,imx6sx-gpt", "fsl,imx6dl-gpt"; reg = <0x02098000 0x4000>; interrupts = ; clocks = <&clks IMX6SX_CLK_GPT_BUS>, -- cgit v1.2.3 From 4dc8b2e57ee4f8c8ab2fcdadb927e023ac1a1d7e Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 10 Jan 2019 14:39:15 +0100 Subject: arm64: dts: renesas: r8a7796: Enable DMA for SCIF2 [ Upstream commit 97f26702bc95b5c3a72671d5c6675e4d6ee0a2f4 ] SCIF2 on R-Car M3-W can be used with both DMAC1 and DMAC2. Fixes: dbcae5ea4bd27409 ("arm64: dts: r8a7796: Enable SCIF DMA") Signed-off-by: Geert Uytterhoeven Signed-off-by: Simon Horman Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/renesas/r8a7796.dtsi | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/boot/dts/renesas/r8a7796.dtsi b/arch/arm64/boot/dts/renesas/r8a7796.dtsi index cbd35c00b4af..33cb0281c39c 100644 --- a/arch/arm64/boot/dts/renesas/r8a7796.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a7796.dtsi @@ -1161,6 +1161,9 @@ <&cpg CPG_CORE R8A7796_CLK_S3D1>, <&scif_clk>; clock-names = "fck", "brg_int", "scif_clk"; + dmas = <&dmac1 0x13>, <&dmac1 0x12>, + <&dmac2 0x13>, <&dmac2 0x12>; + dma-names = "tx", "rx", "tx", "rx"; power-domains = <&sysc R8A7796_PD_ALWAYS_ON>; resets = <&cpg 310>; status = "disabled"; -- cgit v1.2.3 From 959773138ac9e9c7f2ebc79f8abcec661b76c9bb Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 10 Jan 2019 14:39:16 +0100 Subject: arm64: dts: renesas: r8a77965: Enable DMA for SCIF2 [ Upstream commit 05c8478abd485507c25aa565afab604af8d8fe46 ] SCIF2 on R-Car M3-N can be used with both DMAC1 and DMAC2. Fixes: 0ea5b2fd38db56aa ("arm64: dts: renesas: r8a77965: Add SCIF device nodes") Signed-off-by: Geert Uytterhoeven Signed-off-by: Simon Horman Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/renesas/r8a77965.dtsi | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/boot/dts/renesas/r8a77965.dtsi b/arch/arm64/boot/dts/renesas/r8a77965.dtsi index 0cd44461a0bd..f60f08ba1a6f 100644 --- a/arch/arm64/boot/dts/renesas/r8a77965.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a77965.dtsi @@ -951,6 +951,9 @@ <&cpg CPG_CORE R8A77965_CLK_S3D1>, <&scif_clk>; clock-names = "fck", "brg_int", "scif_clk"; + dmas = <&dmac1 0x13>, <&dmac1 0x12>, + <&dmac2 0x13>, <&dmac2 0x12>; + dma-names = "tx", "rx", "tx", "rx"; power-domains = <&sysc R8A77965_PD_ALWAYS_ON>; resets = <&cpg 310>; status = "disabled"; -- cgit v1.2.3 From a50434de0cfe68732296875073384ff409ae723a Mon Sep 17 00:00:00 2001 From: Madalin Bucur Date: Fri, 21 Dec 2018 16:41:42 +0200 Subject: soc: fsl: qbman: avoid race in clearing QMan interrupt [ Upstream commit 89857a8a5c89a406b967ab2be7bd2ccdbe75e73d ] By clearing all interrupt sources, not only those that already occurred, the existing code may acknowledge by mistake interrupts that occurred after the code checks for them. Signed-off-by: Madalin Bucur Signed-off-by: Roy Pledge Signed-off-by: Li Yang Signed-off-by: Sasha Levin --- drivers/soc/fsl/qbman/qman.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/soc/fsl/qbman/qman.c b/drivers/soc/fsl/qbman/qman.c index 8cc015183043..a4ac6073c555 100644 --- a/drivers/soc/fsl/qbman/qman.c +++ b/drivers/soc/fsl/qbman/qman.c @@ -1081,18 +1081,19 @@ static void qm_mr_process_task(struct work_struct *work); static irqreturn_t portal_isr(int irq, void *ptr) { struct qman_portal *p = ptr; - - u32 clear = QM_DQAVAIL_MASK | p->irq_sources; u32 is = qm_in(&p->p, QM_REG_ISR) & p->irq_sources; + u32 clear = 0; if (unlikely(!is)) return IRQ_NONE; /* DQRR-handling if it's interrupt-driven */ - if (is & QM_PIRQ_DQRI) + if (is & QM_PIRQ_DQRI) { __poll_portal_fast(p, QMAN_POLL_LIMIT); + clear = QM_DQAVAIL_MASK | QM_PIRQ_DQRI; + } /* Handling of anything else that's interrupt-driven */ - clear |= __poll_portal_slow(p, is); + clear |= __poll_portal_slow(p, is) & QM_PIRQ_SLOW; qm_out(&p->p, QM_REG_ISR, clear); return IRQ_HANDLED; } -- cgit v1.2.3 From 2634448683d32d131df5d050f07c6dec4aeb5d39 Mon Sep 17 00:00:00 2001 From: Jason Kridner Date: Fri, 11 Jan 2019 10:02:13 -0500 Subject: pinctrl: mcp23s08: spi: Fix regmap allocation for mcp23s18 [ Upstream commit f165988b77ef849eb0c1aebd94fe778024f88314 ] Fixes issue created by 9b3e4207661e67f04c72af15e29f74cd944f5964. It wasn't possible for one_regmap_config to be non-NULL at the point it was tested for mcp23s18 devices. Applied the same pattern of allocating one_regmap_config using devm_kmemdump() and then initializing the local regmap structure from that. Signed-off-by: Jason Kridner Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pinctrl/pinctrl-mcp23s08.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/pinctrl/pinctrl-mcp23s08.c b/drivers/pinctrl/pinctrl-mcp23s08.c index cf73a403d22d..cecbce21d01f 100644 --- a/drivers/pinctrl/pinctrl-mcp23s08.c +++ b/drivers/pinctrl/pinctrl-mcp23s08.c @@ -832,8 +832,13 @@ static int mcp23s08_probe_one(struct mcp23s08 *mcp, struct device *dev, break; case MCP_TYPE_S18: + one_regmap_config = + devm_kmemdup(dev, &mcp23x17_regmap, + sizeof(struct regmap_config), GFP_KERNEL); + if (!one_regmap_config) + return -ENOMEM; mcp->regmap = devm_regmap_init(dev, &mcp23sxx_spi_regmap, mcp, - &mcp23x17_regmap); + one_regmap_config); mcp->reg_shift = 1; mcp->chip.ngpio = 16; mcp->chip.label = "mcp23s18"; -- cgit v1.2.3 From 9a0f0bbac9a0b18115aad305399105ede061d936 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 16 Jan 2019 12:37:23 +0100 Subject: wlcore: sdio: Fixup power on/off sequence [ Upstream commit 13e62626c578d9889ebbda7c521be5adff9bef8e ] During "wlan-up", we are programming the FW into the WiFi-chip. However, re-programming the FW doesn't work, unless a power cycle of the WiFi-chip is made in-between the programmings. To conform to this requirement and to fix the regression in a simple way, let's start by allowing that the SDIO card (WiFi-chip) may stay powered on (runtime resumed) when wl12xx_sdio_power_off() returns. The intent with the current code is to treat this scenario as an error, but unfortunate this doesn't work as expected, so let's fix this. The other part is to guarantee that a power cycle of the SDIO card has been completed when wl12xx_sdio_power_on() returns, as to allow the FW programming to succeed. However, relying solely on runtime PM to deal with this isn't sufficient. For example, userspace may prevent runtime suspend via sysfs for the device that represents the SDIO card, leading to that the mmc core also keeps it powered on. For this reason, let's instead do a brute force power cycle in wl12xx_sdio_power_on(). Fixes: 728a9dc61f13 ("wlcore: sdio: Fix flakey SDIO runtime PM handling") Signed-off-by: Ulf Hansson Tested-by: Tony Lindgren Tested-by: Anders Roxell Signed-off-by: Ulf Hansson Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/ti/wlcore/sdio.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/ti/wlcore/sdio.c b/drivers/net/wireless/ti/wlcore/sdio.c index 750bea3574ee..627df164b7b6 100644 --- a/drivers/net/wireless/ti/wlcore/sdio.c +++ b/drivers/net/wireless/ti/wlcore/sdio.c @@ -164,6 +164,12 @@ static int wl12xx_sdio_power_on(struct wl12xx_sdio_glue *glue) } sdio_claim_host(func); + /* + * To guarantee that the SDIO card is power cycled, as required to make + * the FW programming to succeed, let's do a brute force HW reset. + */ + mmc_hw_reset(card->host); + sdio_enable_func(func); sdio_release_host(func); @@ -174,20 +180,13 @@ static int wl12xx_sdio_power_off(struct wl12xx_sdio_glue *glue) { struct sdio_func *func = dev_to_sdio_func(glue->dev); struct mmc_card *card = func->card; - int error; sdio_claim_host(func); sdio_disable_func(func); sdio_release_host(func); /* Let runtime PM know the card is powered off */ - error = pm_runtime_put(&card->dev); - if (error < 0 && error != -EBUSY) { - dev_err(&card->dev, "%s failed: %i\n", __func__, error); - - return error; - } - + pm_runtime_put(&card->dev); return 0; } -- cgit v1.2.3 From 1da961de4ad4f483dbf36450767b51eed235a212 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 18 Jan 2019 13:58:17 +0100 Subject: bpftool: Fix prog dump by tag [ Upstream commit 752bcf80f5549c9901b2e8bc77b2138de55b1026 ] Lance reported an issue with bpftool not being able to dump program if there are more programs loaded and you want to dump any but the first program, like: # bpftool prog 28: kprobe name trace_req_start tag 1dfc28ba8b3dd597 gpl loaded_at 2019-01-18T17:02:40+1100 uid 0 xlated 112B jited 109B memlock 4096B map_ids 13 29: kprobe name trace_req_compl tag 5b6a5ecc6030a683 gpl loaded_at 2019-01-18T17:02:40+1100 uid 0 xlated 928B jited 575B memlock 4096B map_ids 13,14 # bpftool prog dum jited tag 1dfc28ba8b3dd597 0: push %rbp 1: mov %rsp,%rbp ... # bpftool prog dum jited tag 5b6a5ecc6030a683 Error: can't get prog info (29): Bad address The problem is in the prog_fd_by_tag function not cleaning the struct bpf_prog_info before another request, so the previous program length is still in there and kernel assumes it needs to dump the program, which fails because there's no user pointer set. Moving the struct bpf_prog_info declaration into the loop, so it gets cleaned before each query. Fixes: 71bb428fe2c1 ("tools: bpf: add bpftool") Reported-by: Lance Digby Signed-off-by: Jiri Olsa Reviewed-by: Quentin Monnet Acked-by: Jakub Kicinski Signed-off-by: Daniel Borkmann Signed-off-by: Sasha Levin --- tools/bpf/bpftool/prog.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 0de024a6cc2b..bbba0d61570f 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -109,13 +109,14 @@ static void print_boot_time(__u64 nsecs, char *buf, unsigned int size) static int prog_fd_by_tag(unsigned char *tag) { - struct bpf_prog_info info = {}; - __u32 len = sizeof(info); unsigned int id = 0; int err; int fd; while (true) { + struct bpf_prog_info info = {}; + __u32 len = sizeof(info); + err = bpf_prog_get_next_id(id, &id); if (err) { p_err("%s", strerror(errno)); -- cgit v1.2.3 From 7e2b2e24e356d4eb8e2c99f0a349c87e1aae0f1f Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 21 Jan 2019 12:36:12 +0100 Subject: bpftool: fix percpu maps updating [ Upstream commit b0ca5ecb8e2279d706261f525f1bd0ba9e3fe800 ] When updating a percpu map, bpftool currently copies the provided value only into the first per CPU copy of the specified value, all others instances are left zeroed. This change explicitly copies the user-provided bytes to all the per CPU instances, keeping the sub-command syntax unchanged. v2 -> v3: - drop unused argument, as per Quentin's suggestion v1 -> v2: - rename the helper as per Quentin's suggestion Fixes: 71bb428fe2c1 ("tools: bpf: add bpftool") Signed-off-by: Paolo Abeni Reviewed-by: Quentin Monnet Signed-off-by: Daniel Borkmann Signed-off-by: Sasha Levin --- tools/bpf/bpftool/map.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index b455930a3eaf..ec73d83d0d31 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -370,6 +370,20 @@ static char **parse_bytes(char **argv, const char *name, unsigned char *val, return argv + i; } +/* on per cpu maps we must copy the provided value on all value instances */ +static void fill_per_cpu_value(struct bpf_map_info *info, void *value) +{ + unsigned int i, n, step; + + if (!map_is_per_cpu(info->type)) + return; + + n = get_possible_cpus(); + step = round_up(info->value_size, 8); + for (i = 1; i < n; i++) + memcpy(value + i * step, value, info->value_size); +} + static int parse_elem(char **argv, struct bpf_map_info *info, void *key, void *value, __u32 key_size, __u32 value_size, __u32 *flags, __u32 **value_fd) @@ -449,6 +463,8 @@ static int parse_elem(char **argv, struct bpf_map_info *info, argv = parse_bytes(argv, "value", value, value_size); if (!argv) return -1; + + fill_per_cpu_value(info, value); } return parse_elem(argv, info, key, NULL, key_size, value_size, -- cgit v1.2.3 From a8795ba8995a164f32c154114d1f6a0ee9bd4f8b Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Wed, 23 Jan 2019 12:37:19 +0800 Subject: bpf: sock recvbuff must be limited by rmem_max in bpf_setsockopt() [ Upstream commit c9e4576743eeda8d24dedc164d65b78877f9a98c ] When sock recvbuff is set by bpf_setsockopt(), the value must by limited by rmem_max. It is the same with sendbuff. Fixes: 8c4b4c7e9ff0 ("bpf: Add setsockopt helper function to bpf") Signed-off-by: Yafang Shao Acked-by: Martin KaFai Lau Acked-by: Lawrence Brakmo Signed-off-by: Daniel Borkmann Signed-off-by: Sasha Levin --- net/core/filter.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/core/filter.c b/net/core/filter.c index fb0080e84bd4..bed9061102f4 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3909,10 +3909,12 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock, /* Only some socketops are supported */ switch (optname) { case SO_RCVBUF: + val = min_t(u32, val, sysctl_rmem_max); sk->sk_userlocks |= SOCK_RCVBUF_LOCK; sk->sk_rcvbuf = max_t(int, val * 2, SOCK_MIN_RCVBUF); break; case SO_SNDBUF: + val = min_t(u32, val, sysctl_wmem_max); sk->sk_userlocks |= SOCK_SNDBUF_LOCK; sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF); break; -- cgit v1.2.3 From ac0997349142cc692f892987a80aa85d1259d008 Mon Sep 17 00:00:00 2001 From: Peng Hao Date: Sat, 29 Dec 2018 13:10:06 +0800 Subject: ARM: pxa: ssp: unneeded to free devm_ allocated data [ Upstream commit ba16adeb346387eb2d1ada69003588be96f098fa ] devm_ allocated data will be automatically freed. The free of devm_ allocated data is invalid. Fixes: 1c459de1e645 ("ARM: pxa: ssp: use devm_ functions") Signed-off-by: Peng Hao [title's prefix changed] Signed-off-by: Robert Jarzmik Signed-off-by: Sasha Levin --- arch/arm/plat-pxa/ssp.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/arm/plat-pxa/ssp.c b/arch/arm/plat-pxa/ssp.c index ed36dcab80f1..f51919974183 100644 --- a/arch/arm/plat-pxa/ssp.c +++ b/arch/arm/plat-pxa/ssp.c @@ -190,8 +190,6 @@ static int pxa_ssp_remove(struct platform_device *pdev) if (ssp == NULL) return -ENODEV; - iounmap(ssp->mmio_base); - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); release_mem_region(res->start, resource_size(res)); @@ -201,7 +199,6 @@ static int pxa_ssp_remove(struct platform_device *pdev) list_del(&ssp->node); mutex_unlock(&ssp_lock); - kfree(ssp); return 0; } -- cgit v1.2.3 From 90f0a75be8eb1f24982bc7c199a21635127f2412 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Mon, 10 Dec 2018 13:56:33 +0000 Subject: arm64: dts: add msm8996 compatible to gicv3 [ Upstream commit 2a81efb0de0e33f2d2c83154af0bd3ce389b3269 ] Add compatible to gicv3 node to enable quirk required to restrict writing to GICR_WAKER register which is restricted on msm8996 SoC in Hypervisor. With this quirk MSM8996 can at least boot out of mainline, which can help community to work with boards based on MSM8996. Without this patch Qualcomm DB820c board reboots on mainline. Signed-off-by: Srinivas Kandagatla Signed-off-by: Andy Gross Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/qcom/msm8996.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/msm8996.dtsi b/arch/arm64/boot/dts/qcom/msm8996.dtsi index cd3865e7a270..8c86c41a0d25 100644 --- a/arch/arm64/boot/dts/qcom/msm8996.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8996.dtsi @@ -399,7 +399,7 @@ }; intc: interrupt-controller@9bc0000 { - compatible = "arm,gic-v3"; + compatible = "qcom,msm8996-gic-v3", "arm,gic-v3"; #interrupt-cells = <3>; interrupt-controller; #redistributor-regions = <1>; -- cgit v1.2.3 From f7e3fb0a6f9d6bf3b8bf33f6205c2548f426ce88 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 25 Jan 2019 08:21:26 +0100 Subject: batman-adv: release station info tidstats [ Upstream commit 7d652669b61d702c6e62a39579d17f6881670ab6 ] With the addition of TXQ stats in the per-tid statistics the struct station_info grew significantly. This resulted in stack size warnings due to the structure itself being above the limit for the warnings. To work around this, the TID array was allocated dynamically. Also a function to free this content was introduced with commit 7ea3e110f2f8 ("cfg80211: release station info tidstats where needed") but the necessary changes were not provided for batman-adv's B.A.T.M.A.N. V implementation. Signed-off-by: Felix Fietkau Fixes: 8689c051a201 ("cfg80211: dynamically allocate per-tid stats for station info") [sven@narfation.org: add commit message] Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich Signed-off-by: Sasha Levin --- net/batman-adv/bat_v_elp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c index e8090f099eb8..ef0dec20c7d8 100644 --- a/net/batman-adv/bat_v_elp.c +++ b/net/batman-adv/bat_v_elp.c @@ -104,6 +104,9 @@ static u32 batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh) ret = cfg80211_get_station(real_netdev, neigh->addr, &sinfo); + /* free the TID stats immediately */ + cfg80211_sinfo_release_content(&sinfo); + dev_put(real_netdev); if (ret == -ENOENT) { /* Node is not associated anymore! It would be -- cgit v1.2.3 From fbc3ed1339ecfe0665e7321186c06f0f66714c60 Mon Sep 17 00:00:00 2001 From: Zhou Yanjie Date: Fri, 25 Jan 2019 02:22:15 +0800 Subject: DTS: CI20: Fix bugs in ci20's device tree. [ Upstream commit 1ca1c87f91d9dc50d6a38e2177b2032996e7901c ] According to the Schematic, the hardware of ci20 leads to uart3, but not to uart2. Uart2 is miswritten in the original code. Signed-off-by: Zhou Yanjie Signed-off-by: Paul Burton Cc: linux-mips Cc: linux-kernel Cc: devicetree@vger.kernel.org Cc: robh+dt@kernel.org Cc: ralf@linux-mips.org Cc: jhogan@kernel.org Cc: mark.rutland@arm.com Cc: malat@debian.org Cc: ezequiel@collabora.co.uk Cc: ulf.hansson@linaro.org Cc: syq Cc: jiaxun.yang Signed-off-by: Sasha Levin --- arch/mips/boot/dts/ingenic/ci20.dts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/mips/boot/dts/ingenic/ci20.dts b/arch/mips/boot/dts/ingenic/ci20.dts index 50cff3cbcc6d..4f7b1fa31cf5 100644 --- a/arch/mips/boot/dts/ingenic/ci20.dts +++ b/arch/mips/boot/dts/ingenic/ci20.dts @@ -76,7 +76,7 @@ status = "okay"; pinctrl-names = "default"; - pinctrl-0 = <&pins_uart2>; + pinctrl-0 = <&pins_uart3>; }; &uart4 { @@ -196,9 +196,9 @@ bias-disable; }; - pins_uart2: uart2 { - function = "uart2"; - groups = "uart2-data", "uart2-hwflow"; + pins_uart3: uart3 { + function = "uart3"; + groups = "uart3-data", "uart3-hwflow"; bias-disable; }; -- cgit v1.2.3 From 377ffe354dc0c4fc8d84662e2756d0eae191e87f Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Tue, 22 Jan 2019 11:36:02 +0100 Subject: usb: phy: fix link errors [ Upstream commit f2105d42597f4d10e431b195d69e96dccaf9b012 ] Fix link errors when CONFIG_FSL_USB2_OTG is enabled and USB_OTG_FSM is set to module then the following link error occurs. aarch64-linux-gnu-ld: drivers/usb/phy/phy-fsl-usb.o: in function `fsl_otg_ioctl': drivers/usb/phy/phy-fsl-usb.c:1083: undefined reference to `otg_statemachine' aarch64-linux-gnu-ld: drivers/usb/phy/phy-fsl-usb.c:1083:(.text+0x574): relocation truncated to fit: R_AARCH64_CALL26 against undefined symbol `otg_statemachine' aarch64-linux-gnu-ld: drivers/usb/phy/phy-fsl-usb.o: in function `fsl_otg_start_srp': drivers/usb/phy/phy-fsl-usb.c:674: undefined reference to `otg_statemachine' aarch64-linux-gnu-ld: drivers/usb/phy/phy-fsl-usb.c:674:(.text+0x61c): relocation truncated to fit: R_AARCH64_CALL26 against undefined symbol `otg_statemachine' aarch64-linux-gnu-ld: drivers/usb/phy/phy-fsl-usb.o: in function `fsl_otg_set_host': drivers/usb/phy/phy-fsl-usb.c:593: undefined reference to `otg_statemachine' aarch64-linux-gnu-ld: drivers/usb/phy/phy-fsl-usb.c:593:(.text+0x7a4): relocation truncated to fit: R_AARCH64_CALL26 against undefined symbol `otg_statemachine' aarch64-linux-gnu-ld: drivers/usb/phy/phy-fsl-usb.o: in function `fsl_otg_start_hnp': drivers/usb/phy/phy-fsl-usb.c:695: undefined reference to `otg_statemachine' aarch64-linux-gnu-ld: drivers/usb/phy/phy-fsl-usb.c:695:(.text+0x858): relocation truncated to fit: R_AARCH64_CALL26 against undefined symbol `otg_statemachine' aarch64-linux-gnu-ld: drivers/usb/phy/phy-fsl-usb.o: in function `a_wait_enum': drivers/usb/phy/phy-fsl-usb.c:274: undefined reference to `otg_statemachine' aarch64-linux-gnu-ld: drivers/usb/phy/phy-fsl-usb.c:274:(.text+0x16f0): relocation truncated to fit: R_AARCH64_CALL26 against undefined symbol `otg_statemachine' aarch64-linux-gnu-ld: drivers/usb/phy/phy-fsl-usb.o:drivers/usb/phy/phy-fsl-usb.c:619: more undefined references to `otg_statemachine' follow aarch64-linux-gnu-ld: drivers/usb/phy/phy-fsl-usb.o: in function `fsl_otg_set_peripheral': drivers/usb/phy/phy-fsl-usb.c:619:(.text+0x1fa0): relocation truncated to fit: R_AARCH64_CALL26 against undefined symbol `otg_statemachine' make[1]: *** [Makefile:1020: vmlinux] Error 1 make[1]: Target 'Image' not remade because of errors. make: *** [Makefile:152: sub-make] Error 2 make: Target 'Image' not remade because of errors. Rework so that FSL_USB2_OTG depends on that the USB_OTG_FSM is builtin. Signed-off-by: Anders Roxell Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin --- drivers/usb/phy/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/phy/Kconfig b/drivers/usb/phy/Kconfig index d7312eed6088..91ea3083e7ad 100644 --- a/drivers/usb/phy/Kconfig +++ b/drivers/usb/phy/Kconfig @@ -21,7 +21,7 @@ config AB8500_USB config FSL_USB2_OTG bool "Freescale USB OTG Transceiver Driver" - depends on USB_EHCI_FSL && USB_FSL_USB2 && USB_OTG_FSM && PM + depends on USB_EHCI_FSL && USB_FSL_USB2 && USB_OTG_FSM=y && PM depends on USB_GADGET || !USB_GADGET # if USB_GADGET=m, this can't be 'y' select USB_PHY help -- cgit v1.2.3 From dc81cfafb4b96d83c740387ef880e720214c92ab Mon Sep 17 00:00:00 2001 From: Heyi Guo Date: Thu, 24 Jan 2019 21:37:08 +0800 Subject: irqchip/gic-v4: Fix occasional VLPI drop [ Upstream commit 6479450f72c1391c03f08affe0d0110f41ae7ca0 ] 1. In current implementation, every VLPI will temporarily be mapped to the first CPU in system (normally CPU0) and then moved to the real scheduled CPU later. 2. So there is a time window and a VLPI may be sent to CPU0 instead of the real scheduled vCPU, in a multi-CPU virtual machine. 3. However, CPU0 may have not been scheduled as a virtual CPU after system boots up, so the value of its GICR_VPROPBASER is unknown at that moment. 4. If the INTID of VLPI is larger than 2^(GICR_VPROPBASER.IDbits+1), while IDbits is also in unknown state, GIC will behave as if the VLPI is out of range and simply drop it, which results in interrupt missing in Guest. As no code will clear GICR_VPROPBASER at runtime, we can safely initialize the IDbits field at boot time for each CPU to get rid of this issue. We also clear Valid bit of GICR_VPENDBASER in case any ancient programming gets left in and causes memory corrupting. A new function its_clear_vpend_valid() is added to reuse the code in its_vpe_deschedule(). Fixes: e643d8034036 ("irqchip/gic-v3-its: Add VPE scheduling") Signed-off-by: Heyi Guo Signed-off-by: Heyi Guo Signed-off-by: Marc Zyngier Signed-off-by: Sasha Levin --- drivers/irqchip/irq-gic-v3-its.c | 66 +++++++++++++++++++++++++++++----------- 1 file changed, 49 insertions(+), 17 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 4c2246fe5dbe..d9a880108315 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -1951,6 +1951,29 @@ static void its_free_pending_table(struct page *pt) get_order(max_t(u32, LPI_PENDBASE_SZ, SZ_64K))); } +static u64 its_clear_vpend_valid(void __iomem *vlpi_base) +{ + u32 count = 1000000; /* 1s! */ + bool clean; + u64 val; + + val = gits_read_vpendbaser(vlpi_base + GICR_VPENDBASER); + val &= ~GICR_VPENDBASER_Valid; + gits_write_vpendbaser(val, vlpi_base + GICR_VPENDBASER); + + do { + val = gits_read_vpendbaser(vlpi_base + GICR_VPENDBASER); + clean = !(val & GICR_VPENDBASER_Dirty); + if (!clean) { + count--; + cpu_relax(); + udelay(1); + } + } while (!clean && count); + + return val; +} + static void its_cpu_init_lpis(void) { void __iomem *rbase = gic_data_rdist_rd_base(); @@ -2024,6 +2047,30 @@ static void its_cpu_init_lpis(void) val |= GICR_CTLR_ENABLE_LPIS; writel_relaxed(val, rbase + GICR_CTLR); + if (gic_rdists->has_vlpis) { + void __iomem *vlpi_base = gic_data_rdist_vlpi_base(); + + /* + * It's possible for CPU to receive VLPIs before it is + * sheduled as a vPE, especially for the first CPU, and the + * VLPI with INTID larger than 2^(IDbits+1) will be considered + * as out of range and dropped by GIC. + * So we initialize IDbits to known value to avoid VLPI drop. + */ + val = (LPI_NRBITS - 1) & GICR_VPROPBASER_IDBITS_MASK; + pr_debug("GICv4: CPU%d: Init IDbits to 0x%llx for GICR_VPROPBASER\n", + smp_processor_id(), val); + gits_write_vpropbaser(val, vlpi_base + GICR_VPROPBASER); + + /* + * Also clear Valid bit of GICR_VPENDBASER, in case some + * ancient programming gets left in and has possibility of + * corrupting memory. + */ + val = its_clear_vpend_valid(vlpi_base); + WARN_ON(val & GICR_VPENDBASER_Dirty); + } + /* Make sure the GIC has seen the above */ dsb(sy); } @@ -2644,26 +2691,11 @@ static void its_vpe_schedule(struct its_vpe *vpe) static void its_vpe_deschedule(struct its_vpe *vpe) { void __iomem *vlpi_base = gic_data_rdist_vlpi_base(); - u32 count = 1000000; /* 1s! */ - bool clean; u64 val; - /* We're being scheduled out */ - val = gits_read_vpendbaser(vlpi_base + GICR_VPENDBASER); - val &= ~GICR_VPENDBASER_Valid; - gits_write_vpendbaser(val, vlpi_base + GICR_VPENDBASER); - - do { - val = gits_read_vpendbaser(vlpi_base + GICR_VPENDBASER); - clean = !(val & GICR_VPENDBASER_Dirty); - if (!clean) { - count--; - cpu_relax(); - udelay(1); - } - } while (!clean && count); + val = its_clear_vpend_valid(vlpi_base); - if (unlikely(!clean && !count)) { + if (unlikely(val & GICR_VPENDBASER_Dirty)) { pr_err_ratelimited("ITS virtual pending table not cleaning\n"); vpe->idai = false; vpe->pending_last = true; -- cgit v1.2.3 From 423869f8871dee4028b67fa30f660ab43d14cfb2 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 29 Jan 2019 15:19:23 +0000 Subject: irqchip/gic-v3-its: Gracefully fail on LPI exhaustion [ Upstream commit 45725e0fc3e7fe52fedb94f59806ec50e9618682 ] In the unlikely event that we cannot find any available LPI in the system, we should gracefully return an error instead of carrying on with no LPI allocated at all. Fixes: 38dd7c494cf6 ("irqchip/gic-v3-its: Drop chunk allocation compatibility") Signed-off-by: Marc Zyngier Signed-off-by: Sasha Levin --- drivers/irqchip/irq-gic-v3-its.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index d9a880108315..15579cba1a88 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -1581,6 +1581,9 @@ static unsigned long *its_lpi_alloc(int nr_irqs, u32 *base, int *nr_ids) nr_irqs /= 2; } while (nr_irqs > 0); + if (!nr_irqs) + err = -ENOSPC; + if (err) goto out; -- cgit v1.2.3 From 1bf791023315b4ad05dc1c7d94294ba7a39ed083 Mon Sep 17 00:00:00 2001 From: Lubomir Rintel Date: Mon, 28 Jan 2019 16:59:35 +0100 Subject: irqchip/mmp: Only touch the PJ4 IRQ & FIQ bits on enable/disable [ Upstream commit 2380a22b60ce6f995eac806e69c66e397b59d045 ] Resetting bit 4 disables the interrupt delivery to the "secure processor" core. This breaks the keyboard on a OLPC XO 1.75 laptop, where the firmware running on the "secure processor" bit-bangs the PS/2 protocol over the GPIO lines. It is not clear what the rest of the bits are and Marvell was unhelpful when asked for documentation. Aside from the SP bit, there are probably priority bits. Leaving the unknown bits as the firmware set them up seems to be a wiser course of action compared to just turning them off. Signed-off-by: Lubomir Rintel Acked-by: Pavel Machek [maz: fixed-up subject and commit message] Signed-off-by: Marc Zyngier Signed-off-by: Sasha Levin --- drivers/irqchip/irq-mmp.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-mmp.c b/drivers/irqchip/irq-mmp.c index 25f32e1d7764..3496b61a312a 100644 --- a/drivers/irqchip/irq-mmp.c +++ b/drivers/irqchip/irq-mmp.c @@ -34,6 +34,9 @@ #define SEL_INT_PENDING (1 << 6) #define SEL_INT_NUM_MASK 0x3f +#define MMP2_ICU_INT_ROUTE_PJ4_IRQ (1 << 5) +#define MMP2_ICU_INT_ROUTE_PJ4_FIQ (1 << 6) + struct icu_chip_data { int nr_irqs; unsigned int virq_base; @@ -190,7 +193,8 @@ static const struct mmp_intc_conf mmp_conf = { static const struct mmp_intc_conf mmp2_conf = { .conf_enable = 0x20, .conf_disable = 0x0, - .conf_mask = 0x7f, + .conf_mask = MMP2_ICU_INT_ROUTE_PJ4_IRQ | + MMP2_ICU_INT_ROUTE_PJ4_FIQ, }; static void __exception_irq_entry mmp_handle_irq(struct pt_regs *regs) -- cgit v1.2.3 From 094392380989d1779d35733268ba5657a1fcc571 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 9 Jan 2019 22:19:28 -0500 Subject: drm/amdgpu: Add missing power attribute to APU check [ Upstream commit dc14eb12f6bb3e779c5461429c1889a339c67aab ] Add missing power_average to visible check for power attributes for APUs. Was missed before. Reviewed-by: Evan Quan Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index 7b4e657a95c7..c3df75a9f65d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -1443,7 +1443,8 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, effective_mode &= ~S_IWUSR; if ((adev->flags & AMD_IS_APU) && - (attr == &sensor_dev_attr_power1_cap_max.dev_attr.attr || + (attr == &sensor_dev_attr_power1_average.dev_attr.attr || + attr == &sensor_dev_attr_power1_cap_max.dev_attr.attr || attr == &sensor_dev_attr_power1_cap_min.dev_attr.attr|| attr == &sensor_dev_attr_power1_cap.dev_attr.attr)) return 0; -- cgit v1.2.3 From 4ec880d7c1995bac70e1b576378bfb2d829a2465 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 15 Jan 2019 12:05:16 -0500 Subject: drm/radeon: check if device is root before getting pci speed caps [ Upstream commit afeff4c16edaa6275b903f82b0561406259aa3a3 ] Check if the device is root rather before attempting to see what speeds the pcie port supports. Fixes a crash with pci passthrough in a VM. Bug: https://bugs.freedesktop.org/show_bug.cgi?id=109366 Reviewed-by: Evan Quan Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/radeon/ci_dpm.c | 5 +++-- drivers/gpu/drm/radeon/si_dpm.c | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/radeon/ci_dpm.c b/drivers/gpu/drm/radeon/ci_dpm.c index d587779a80b4..a97294ac96d5 100644 --- a/drivers/gpu/drm/radeon/ci_dpm.c +++ b/drivers/gpu/drm/radeon/ci_dpm.c @@ -5676,7 +5676,7 @@ int ci_dpm_init(struct radeon_device *rdev) u16 data_offset, size; u8 frev, crev; struct ci_power_info *pi; - enum pci_bus_speed speed_cap; + enum pci_bus_speed speed_cap = PCI_SPEED_UNKNOWN; struct pci_dev *root = rdev->pdev->bus->self; int ret; @@ -5685,7 +5685,8 @@ int ci_dpm_init(struct radeon_device *rdev) return -ENOMEM; rdev->pm.dpm.priv = pi; - speed_cap = pcie_get_speed_cap(root); + if (!pci_is_root_bus(rdev->pdev->bus)) + speed_cap = pcie_get_speed_cap(root); if (speed_cap == PCI_SPEED_UNKNOWN) { pi->sys_pcie_mask = 0; } else { diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c index 8fb60b3af015..0a785ef0ab66 100644 --- a/drivers/gpu/drm/radeon/si_dpm.c +++ b/drivers/gpu/drm/radeon/si_dpm.c @@ -6899,7 +6899,7 @@ int si_dpm_init(struct radeon_device *rdev) struct ni_power_info *ni_pi; struct si_power_info *si_pi; struct atom_clock_dividers dividers; - enum pci_bus_speed speed_cap; + enum pci_bus_speed speed_cap = PCI_SPEED_UNKNOWN; struct pci_dev *root = rdev->pdev->bus->self; int ret; @@ -6911,7 +6911,8 @@ int si_dpm_init(struct radeon_device *rdev) eg_pi = &ni_pi->eg; pi = &eg_pi->rv7xx; - speed_cap = pcie_get_speed_cap(root); + if (!pci_is_root_bus(rdev->pdev->bus)) + speed_cap = pcie_get_speed_cap(root); if (speed_cap == PCI_SPEED_UNKNOWN) { si_pi->sys_pcie_mask = 0; } else { -- cgit v1.2.3 From 8096bc39c63e3d2e374a9ee9fcc0e1a9d4e5de47 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 30 Jan 2019 10:55:17 +0000 Subject: drm/amdgpu: Transfer fences to dmabuf importer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 6e11ea9de9576a644045ffdc2067c09bc2012eda ] amdgpu only uses shared-fences internally, but dmabuf importers rely on implicit write hazard tracking via the reservation_object.fence_excl. For example, the importer use the write hazard for timing a page flip to only occur after the exporter has finished flushing its write into the surface. As such, on exporting a dmabuf, we must either flush all outstanding fences (for we do not know which are writes and should have been exclusive) or alternatively create a new exclusive fence that is the composite of all the existing shared fences, and so will only be signaled when all earlier fences are signaled (ensuring that we can not be signaled before the completion of any earlier write). v2: reservation_object is already locked by amdgpu_bo_reserve() v3: Replace looping with get_fences_rcu and special case the promotion of a single shared fence directly to an exclusive fence, bypassing the fence array. v4: Drop the fence array ref after assigning to reservation_object Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107341 Testcase: igt/amd_prime/amd-to-i915 References: 8e94a46c1770 ("drm/amdgpu: Attach exclusive fence to prime exported bo's. (v5)") Signed-off-by: Chris Wilson Cc: Alex Deucher Cc: "Christian König" Reviewed-by: "Christian König" Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c | 59 ++++++++++++++++++++++++++----- 1 file changed, 51 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c index 1c5d97f4b4dd..8dcf6227ab99 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c @@ -37,6 +37,7 @@ #include "amdgpu_display.h" #include #include +#include static const struct dma_buf_ops amdgpu_dmabuf_ops; @@ -188,6 +189,48 @@ error: return ERR_PTR(ret); } +static int +__reservation_object_make_exclusive(struct reservation_object *obj) +{ + struct dma_fence **fences; + unsigned int count; + int r; + + if (!reservation_object_get_list(obj)) /* no shared fences to convert */ + return 0; + + r = reservation_object_get_fences_rcu(obj, NULL, &count, &fences); + if (r) + return r; + + if (count == 0) { + /* Now that was unexpected. */ + } else if (count == 1) { + reservation_object_add_excl_fence(obj, fences[0]); + dma_fence_put(fences[0]); + kfree(fences); + } else { + struct dma_fence_array *array; + + array = dma_fence_array_create(count, fences, + dma_fence_context_alloc(1), 0, + false); + if (!array) + goto err_fences_put; + + reservation_object_add_excl_fence(obj, &array->base); + dma_fence_put(&array->base); + } + + return 0; + +err_fences_put: + while (count--) + dma_fence_put(fences[count]); + kfree(fences); + return -ENOMEM; +} + /** * amdgpu_gem_map_attach - &dma_buf_ops.attach implementation * @dma_buf: shared DMA buffer @@ -219,16 +262,16 @@ static int amdgpu_gem_map_attach(struct dma_buf *dma_buf, if (attach->dev->driver != adev->dev->driver) { /* - * Wait for all shared fences to complete before we switch to future - * use of exclusive fence on this prime shared bo. + * We only create shared fences for internal use, but importers + * of the dmabuf rely on exclusive fences for implicitly + * tracking write hazards. As any of the current fences may + * correspond to a write, we need to convert all existing + * fences on the reservation object into a single exclusive + * fence. */ - r = reservation_object_wait_timeout_rcu(bo->tbo.resv, - true, false, - MAX_SCHEDULE_TIMEOUT); - if (unlikely(r < 0)) { - DRM_DEBUG_PRIME("Fence wait failed: %li\n", r); + r = __reservation_object_make_exclusive(bo->tbo.resv); + if (r) goto error_unreserve; - } } /* pin buffer into GTT */ -- cgit v1.2.3 From 46ba03c591d985e80d547dc3ccba1e1b0e482d55 Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Wed, 30 Jan 2019 15:54:19 +0100 Subject: net: stmmac: Fallback to Platform Data clock in Watchdog conversion [ Upstream commit 4ec5302fa906ec9d86597b236f62315bacdb9622 ] If we don't have DT then stmmac_clk will not be available. Let's add a new Platform Data field so that we can specify the refclk by this mean. This way we can still use the coalesce command in PCI based setups. Signed-off-by: Jose Abreu Cc: Joao Pinto Cc: David S. Miller Cc: Giuseppe Cavallaro Cc: Alexandre Torgue Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c | 14 ++++++++++---- include/linux/stmmac.h | 1 + 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c index 9caf79ba5ef1..4d5fb4b51cc4 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c @@ -719,8 +719,11 @@ static u32 stmmac_usec2riwt(u32 usec, struct stmmac_priv *priv) { unsigned long clk = clk_get_rate(priv->plat->stmmac_clk); - if (!clk) - return 0; + if (!clk) { + clk = priv->plat->clk_ref_rate; + if (!clk) + return 0; + } return (usec * (clk / 1000000)) / 256; } @@ -729,8 +732,11 @@ static u32 stmmac_riwt2usec(u32 riwt, struct stmmac_priv *priv) { unsigned long clk = clk_get_rate(priv->plat->stmmac_clk); - if (!clk) - return 0; + if (!clk) { + clk = priv->plat->clk_ref_rate; + if (!clk) + return 0; + } return (riwt * 256) / (clk / 1000000); } diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 7ddfc65586b0..4335bd771ce5 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -184,6 +184,7 @@ struct plat_stmmacenet_data { struct clk *pclk; struct clk *clk_ptp_ref; unsigned int clk_ptp_rate; + unsigned int clk_ref_rate; struct reset_control *stmmac_rst; struct stmmac_axi *axi; int has_gmac4; -- cgit v1.2.3 From 496eaed7fe94df7202d7cbe37873f96bcdda375e Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Wed, 30 Jan 2019 15:54:20 +0100 Subject: net: stmmac: Send TSO packets always from Queue 0 [ Upstream commit c5acdbee22a1b200dde07effd26fd1f649e9ab8a ] The number of TSO enabled channels in HW can be different than the number of total channels. There is no way to determined, at runtime, the number of TSO capable channels and its safe to assume that if TSO is enabled then at least channel 0 will be TSO capable. Lets always send TSO packets from Queue 0. Signed-off-by: Jose Abreu Cc: Joao Pinto Cc: David S. Miller Cc: Giuseppe Cavallaro Cc: Alexandre Torgue Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 123b74e25ed8..57c56e295705 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3030,8 +3030,17 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) /* Manage oversized TCP frames for GMAC4 device */ if (skb_is_gso(skb) && priv->tso) { - if (skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)) + if (skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)) { + /* + * There is no way to determine the number of TSO + * capable Queues. Let's use always the Queue 0 + * because if TSO is supported then at least this + * one will be capable. + */ + skb_set_queue_mapping(skb, 0); + return stmmac_tso_xmit(skb, dev); + } } if (unlikely(stmmac_tx_avail(priv, queue) < nfrags + 1)) { -- cgit v1.2.3 From fbdbb19457ced5836d4b15c0dd6e4bdf1e424e61 Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Wed, 30 Jan 2019 15:54:21 +0100 Subject: net: stmmac: Disable EEE mode earlier in XMIT callback [ Upstream commit e2cd682deb231ba6f80524bb84e57e7138261149 ] In stmmac xmit callback we use a different flow for TSO packets but TSO xmit callback is not disabling the EEE mode. Fix this by disabling earlier the EEE mode, i.e. before calling the TSO xmit callback. Signed-off-by: Jose Abreu Cc: Joao Pinto Cc: David S. Miller Cc: Giuseppe Cavallaro Cc: Alexandre Torgue Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 57c56e295705..43ab9e905bed 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3028,6 +3028,9 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) tx_q = &priv->tx_queue[queue]; + if (priv->tx_path_in_lpi_mode) + stmmac_disable_eee_mode(priv); + /* Manage oversized TCP frames for GMAC4 device */ if (skb_is_gso(skb) && priv->tso) { if (skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)) { @@ -3055,9 +3058,6 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_BUSY; } - if (priv->tx_path_in_lpi_mode) - stmmac_disable_eee_mode(priv); - entry = tx_q->cur_tx; first_entry = entry; WARN_ON(tx_q->tx_skbuff[first_entry]); -- cgit v1.2.3 From 2a5c84e164d2a6226afe374570a40e9413add72c Mon Sep 17 00:00:00 2001 From: Zenghui Yu Date: Thu, 31 Jan 2019 11:19:43 +0000 Subject: irqchip/gic-v3-its: Fix ITT_entry_size accessor [ Upstream commit 56841070ccc87b463ac037d2d1f2beb8e5e35f0c ] According to ARM IHI 0069C (ID070116), we should use GITS_TYPER's bits [7:4] as ITT_entry_size instead of [8:4]. Although this is pretty annoying, it only results in a potential over-allocation of memory, and nothing bad happens. Fixes: 3dfa576bfb45 ("irqchip/gic-v3-its: Add probing for VLPI properties") Signed-off-by: Zenghui Yu [maz: massaged subject and commit message] Signed-off-by: Marc Zyngier Signed-off-by: Sasha Levin --- include/linux/irqchip/arm-gic-v3.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 8bdbb5f29494..3188c0bef3e7 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -319,7 +319,7 @@ #define GITS_TYPER_PLPIS (1UL << 0) #define GITS_TYPER_VLPIS (1UL << 1) #define GITS_TYPER_ITT_ENTRY_SIZE_SHIFT 4 -#define GITS_TYPER_ITT_ENTRY_SIZE(r) ((((r) >> GITS_TYPER_ITT_ENTRY_SIZE_SHIFT) & 0x1f) + 1) +#define GITS_TYPER_ITT_ENTRY_SIZE(r) ((((r) >> GITS_TYPER_ITT_ENTRY_SIZE_SHIFT) & 0xf) + 1) #define GITS_TYPER_IDBITS_SHIFT 8 #define GITS_TYPER_DEVBITS_SHIFT 13 #define GITS_TYPER_DEVBITS(r) ((((r) >> GITS_TYPER_DEVBITS_SHIFT) & 0x1f) + 1) -- cgit v1.2.3 From 232bd90cf23844d7846bc57a049ec52c5d15269a Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 31 Jan 2019 13:57:58 +0100 Subject: relay: check return of create_buf_file() properly [ Upstream commit 2c1cf00eeacb784781cf1c9896b8af001246d339 ] If create_buf_file() returns an error, don't try to reference it later as a valid dentry pointer. This problem was exposed when debugfs started to return errors instead of just NULL for some calls when they do not succeed properly. Also, the check for WARN_ON(dentry) was just wrong :) Reported-by: Kees Cook Reported-and-tested-by: syzbot+16c3a70e1e9b29346c43@syzkaller.appspotmail.com Reported-by: Tetsuo Handa Cc: Andrew Morton Cc: David Rientjes Fixes: ff9fb72bc077 ("debugfs: return error values, not NULL") Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- kernel/relay.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/relay.c b/kernel/relay.c index 04f248644e06..9e0f52375487 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -428,6 +428,8 @@ static struct dentry *relay_create_buf_file(struct rchan *chan, dentry = chan->cb->create_buf_file(tmpname, chan->parent, S_IRUSR, buf, &chan->is_global); + if (IS_ERR(dentry)) + dentry = NULL; kfree(tmpname); @@ -461,7 +463,7 @@ static struct rchan_buf *relay_open_buf(struct rchan *chan, unsigned int cpu) dentry = chan->cb->create_buf_file(NULL, NULL, S_IRUSR, buf, &chan->is_global); - if (WARN_ON(dentry)) + if (IS_ERR_OR_NULL(dentry)) goto free_buf; } -- cgit v1.2.3 From 0ace0d28941b8dc91dac2edae754f48ac550ed40 Mon Sep 17 00:00:00 2001 From: Martynas Pumputis Date: Thu, 31 Jan 2019 10:19:33 +0100 Subject: bpf, selftests: fix handling of sparse CPU allocations [ Upstream commit 1bb54c4071f585ebef56ce8fdfe6026fa2cbcddd ] Previously, bpf_num_possible_cpus() had a bug when calculating a number of possible CPUs in the case of sparse CPU allocations, as it was considering only the first range or element of /sys/devices/system/cpu/possible. E.g. in the case of "0,2-3" (CPU 1 is not available), the function returned 1 instead of 3. This patch fixes the function by making it parse all CPU ranges and elements. Signed-off-by: Martynas Pumputis Acked-by: Yonghong Song Signed-off-by: Daniel Borkmann Signed-off-by: Sasha Levin --- tools/testing/selftests/bpf/bpf_util.h | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/tools/testing/selftests/bpf/bpf_util.h b/tools/testing/selftests/bpf/bpf_util.h index 315a44fa32af..84fd6f1bf33e 100644 --- a/tools/testing/selftests/bpf/bpf_util.h +++ b/tools/testing/selftests/bpf/bpf_util.h @@ -13,7 +13,7 @@ static inline unsigned int bpf_num_possible_cpus(void) unsigned int start, end, possible_cpus = 0; char buff[128]; FILE *fp; - int n; + int len, n, i, j = 0; fp = fopen(fcpu, "r"); if (!fp) { @@ -21,17 +21,27 @@ static inline unsigned int bpf_num_possible_cpus(void) exit(1); } - while (fgets(buff, sizeof(buff), fp)) { - n = sscanf(buff, "%u-%u", &start, &end); - if (n == 0) { - printf("Failed to retrieve # possible CPUs!\n"); - exit(1); - } else if (n == 1) { - end = start; + if (!fgets(buff, sizeof(buff), fp)) { + printf("Failed to read %s!\n", fcpu); + exit(1); + } + + len = strlen(buff); + for (i = 0; i <= len; i++) { + if (buff[i] == ',' || buff[i] == '\0') { + buff[i] = '\0'; + n = sscanf(&buff[j], "%u-%u", &start, &end); + if (n <= 0) { + printf("Failed to retrieve # possible CPUs!\n"); + exit(1); + } else if (n == 1) { + end = start; + } + possible_cpus += end - start + 1; + j = i + 1; } - possible_cpus = start == 0 ? end + 1 : 0; - break; } + fclose(fp); return possible_cpus; -- cgit v1.2.3 From e3bc64c9aa507ec3a7c71d7fed210c28cc0b4e8b Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 30 Jan 2019 18:12:43 -0800 Subject: bpf: fix lockdep false positive in percpu_freelist [ Upstream commit a89fac57b5d080771efd4d71feaae19877cf68f0 ] Lockdep warns about false positive: [ 12.492084] 00000000e6b28347 (&head->lock){+...}, at: pcpu_freelist_push+0x2a/0x40 [ 12.492696] but this lock was taken by another, HARDIRQ-safe lock in the past: [ 12.493275] (&rq->lock){-.-.} [ 12.493276] [ 12.493276] [ 12.493276] and interrupts could create inverse lock ordering between them. [ 12.493276] [ 12.494435] [ 12.494435] other info that might help us debug this: [ 12.494979] Possible interrupt unsafe locking scenario: [ 12.494979] [ 12.495518] CPU0 CPU1 [ 12.495879] ---- ---- [ 12.496243] lock(&head->lock); [ 12.496502] local_irq_disable(); [ 12.496969] lock(&rq->lock); [ 12.497431] lock(&head->lock); [ 12.497890] [ 12.498104] lock(&rq->lock); [ 12.498368] [ 12.498368] *** DEADLOCK *** [ 12.498368] [ 12.498837] 1 lock held by dd/276: [ 12.499110] #0: 00000000c58cb2ee (rcu_read_lock){....}, at: trace_call_bpf+0x5e/0x240 [ 12.499747] [ 12.499747] the shortest dependencies between 2nd lock and 1st lock: [ 12.500389] -> (&rq->lock){-.-.} { [ 12.500669] IN-HARDIRQ-W at: [ 12.500934] _raw_spin_lock+0x2f/0x40 [ 12.501373] scheduler_tick+0x4c/0xf0 [ 12.501812] update_process_times+0x40/0x50 [ 12.502294] tick_periodic+0x27/0xb0 [ 12.502723] tick_handle_periodic+0x1f/0x60 [ 12.503203] timer_interrupt+0x11/0x20 [ 12.503651] __handle_irq_event_percpu+0x43/0x2c0 [ 12.504167] handle_irq_event_percpu+0x20/0x50 [ 12.504674] handle_irq_event+0x37/0x60 [ 12.505139] handle_level_irq+0xa7/0x120 [ 12.505601] handle_irq+0xa1/0x150 [ 12.506018] do_IRQ+0x77/0x140 [ 12.506411] ret_from_intr+0x0/0x1d [ 12.506834] _raw_spin_unlock_irqrestore+0x53/0x60 [ 12.507362] __setup_irq+0x481/0x730 [ 12.507789] setup_irq+0x49/0x80 [ 12.508195] hpet_time_init+0x21/0x32 [ 12.508644] x86_late_time_init+0xb/0x16 [ 12.509106] start_kernel+0x390/0x42a [ 12.509554] secondary_startup_64+0xa4/0xb0 [ 12.510034] IN-SOFTIRQ-W at: [ 12.510305] _raw_spin_lock+0x2f/0x40 [ 12.510772] try_to_wake_up+0x1c7/0x4e0 [ 12.511220] swake_up_locked+0x20/0x40 [ 12.511657] swake_up_one+0x1a/0x30 [ 12.512070] rcu_process_callbacks+0xc5/0x650 [ 12.512553] __do_softirq+0xe6/0x47b [ 12.512978] irq_exit+0xc3/0xd0 [ 12.513372] smp_apic_timer_interrupt+0xa9/0x250 [ 12.513876] apic_timer_interrupt+0xf/0x20 [ 12.514343] default_idle+0x1c/0x170 [ 12.514765] do_idle+0x199/0x240 [ 12.515159] cpu_startup_entry+0x19/0x20 [ 12.515614] start_kernel+0x422/0x42a [ 12.516045] secondary_startup_64+0xa4/0xb0 [ 12.516521] INITIAL USE at: [ 12.516774] _raw_spin_lock_irqsave+0x38/0x50 [ 12.517258] rq_attach_root+0x16/0xd0 [ 12.517685] sched_init+0x2f2/0x3eb [ 12.518096] start_kernel+0x1fb/0x42a [ 12.518525] secondary_startup_64+0xa4/0xb0 [ 12.518986] } [ 12.519132] ... key at: [] __key.71384+0x0/0x8 [ 12.519649] ... acquired at: [ 12.519892] pcpu_freelist_pop+0x7b/0xd0 [ 12.520221] bpf_get_stackid+0x1d2/0x4d0 [ 12.520563] ___bpf_prog_run+0x8b4/0x11a0 [ 12.520887] [ 12.521008] -> (&head->lock){+...} { [ 12.521292] HARDIRQ-ON-W at: [ 12.521539] _raw_spin_lock+0x2f/0x40 [ 12.521950] pcpu_freelist_push+0x2a/0x40 [ 12.522396] bpf_get_stackid+0x494/0x4d0 [ 12.522828] ___bpf_prog_run+0x8b4/0x11a0 [ 12.523296] INITIAL USE at: [ 12.523537] _raw_spin_lock+0x2f/0x40 [ 12.523944] pcpu_freelist_populate+0xc0/0x120 [ 12.524417] htab_map_alloc+0x405/0x500 [ 12.524835] __do_sys_bpf+0x1a3/0x1a90 [ 12.525253] do_syscall_64+0x4a/0x180 [ 12.525659] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 12.526167] } [ 12.526311] ... key at: [] __key.13130+0x0/0x8 [ 12.526812] ... acquired at: [ 12.527047] __lock_acquire+0x521/0x1350 [ 12.527371] lock_acquire+0x98/0x190 [ 12.527680] _raw_spin_lock+0x2f/0x40 [ 12.527994] pcpu_freelist_push+0x2a/0x40 [ 12.528325] bpf_get_stackid+0x494/0x4d0 [ 12.528645] ___bpf_prog_run+0x8b4/0x11a0 [ 12.528970] [ 12.529092] [ 12.529092] stack backtrace: [ 12.529444] CPU: 0 PID: 276 Comm: dd Not tainted 5.0.0-rc3-00018-g2fa53f892422 #475 [ 12.530043] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-2.el7 04/01/2014 [ 12.530750] Call Trace: [ 12.530948] dump_stack+0x5f/0x8b [ 12.531248] check_usage_backwards+0x10c/0x120 [ 12.531598] ? ___bpf_prog_run+0x8b4/0x11a0 [ 12.531935] ? mark_lock+0x382/0x560 [ 12.532229] mark_lock+0x382/0x560 [ 12.532496] ? print_shortest_lock_dependencies+0x180/0x180 [ 12.532928] __lock_acquire+0x521/0x1350 [ 12.533271] ? find_get_entry+0x17f/0x2e0 [ 12.533586] ? find_get_entry+0x19c/0x2e0 [ 12.533902] ? lock_acquire+0x98/0x190 [ 12.534196] lock_acquire+0x98/0x190 [ 12.534482] ? pcpu_freelist_push+0x2a/0x40 [ 12.534810] _raw_spin_lock+0x2f/0x40 [ 12.535099] ? pcpu_freelist_push+0x2a/0x40 [ 12.535432] pcpu_freelist_push+0x2a/0x40 [ 12.535750] bpf_get_stackid+0x494/0x4d0 [ 12.536062] ___bpf_prog_run+0x8b4/0x11a0 It has been explained that is a false positive here: https://lkml.org/lkml/2018/7/25/756 Recap: - stackmap uses pcpu_freelist - The lock in pcpu_freelist is a percpu lock - stackmap is only used by tracing bpf_prog - A tracing bpf_prog cannot be run if another bpf_prog has already been running (ensured by the percpu bpf_prog_active counter). Eric pointed out that this lockdep splats stops other legit lockdep splats in selftests/bpf/test_progs.c. Fix this by calling local_irq_save/restore for stackmap. Another false positive had also been worked around by calling local_irq_save in commit 89ad2fa3f043 ("bpf: fix lockdep splat"). That commit added unnecessary irq_save/restore to fast path of bpf hash map. irqs are already disabled at that point, since htab is holding per bucket spin_lock with irqsave. Let's reduce overhead for htab by introducing __pcpu_freelist_push/pop function w/o irqsave and convert pcpu_freelist_push/pop to irqsave to be used elsewhere (right now only in stackmap). It stops lockdep false positive in stackmap with a bit of acceptable overhead. Fixes: 557c0c6e7df8 ("bpf: convert stackmap to pre-allocation") Reported-by: Naresh Kamboju Reported-by: Eric Dumazet Acked-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Signed-off-by: Sasha Levin --- kernel/bpf/hashtab.c | 4 ++-- kernel/bpf/percpu_freelist.c | 41 +++++++++++++++++++++++++++++------------ kernel/bpf/percpu_freelist.h | 4 ++++ 3 files changed, 35 insertions(+), 14 deletions(-) diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 03cc59ee9c95..cebadd6af4d9 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -677,7 +677,7 @@ static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l) } if (htab_is_prealloc(htab)) { - pcpu_freelist_push(&htab->freelist, &l->fnode); + __pcpu_freelist_push(&htab->freelist, &l->fnode); } else { atomic_dec(&htab->count); l->htab = htab; @@ -739,7 +739,7 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, } else { struct pcpu_freelist_node *l; - l = pcpu_freelist_pop(&htab->freelist); + l = __pcpu_freelist_pop(&htab->freelist); if (!l) return ERR_PTR(-E2BIG); l_new = container_of(l, struct htab_elem, fnode); diff --git a/kernel/bpf/percpu_freelist.c b/kernel/bpf/percpu_freelist.c index 673fa6fe2d73..0c1b4ba9e90e 100644 --- a/kernel/bpf/percpu_freelist.c +++ b/kernel/bpf/percpu_freelist.c @@ -28,8 +28,8 @@ void pcpu_freelist_destroy(struct pcpu_freelist *s) free_percpu(s->freelist); } -static inline void __pcpu_freelist_push(struct pcpu_freelist_head *head, - struct pcpu_freelist_node *node) +static inline void ___pcpu_freelist_push(struct pcpu_freelist_head *head, + struct pcpu_freelist_node *node) { raw_spin_lock(&head->lock); node->next = head->first; @@ -37,12 +37,22 @@ static inline void __pcpu_freelist_push(struct pcpu_freelist_head *head, raw_spin_unlock(&head->lock); } -void pcpu_freelist_push(struct pcpu_freelist *s, +void __pcpu_freelist_push(struct pcpu_freelist *s, struct pcpu_freelist_node *node) { struct pcpu_freelist_head *head = this_cpu_ptr(s->freelist); - __pcpu_freelist_push(head, node); + ___pcpu_freelist_push(head, node); +} + +void pcpu_freelist_push(struct pcpu_freelist *s, + struct pcpu_freelist_node *node) +{ + unsigned long flags; + + local_irq_save(flags); + __pcpu_freelist_push(s, node); + local_irq_restore(flags); } void pcpu_freelist_populate(struct pcpu_freelist *s, void *buf, u32 elem_size, @@ -63,7 +73,7 @@ void pcpu_freelist_populate(struct pcpu_freelist *s, void *buf, u32 elem_size, for_each_possible_cpu(cpu) { again: head = per_cpu_ptr(s->freelist, cpu); - __pcpu_freelist_push(head, buf); + ___pcpu_freelist_push(head, buf); i++; buf += elem_size; if (i == nr_elems) @@ -74,14 +84,12 @@ again: local_irq_restore(flags); } -struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *s) +struct pcpu_freelist_node *__pcpu_freelist_pop(struct pcpu_freelist *s) { struct pcpu_freelist_head *head; struct pcpu_freelist_node *node; - unsigned long flags; int orig_cpu, cpu; - local_irq_save(flags); orig_cpu = cpu = raw_smp_processor_id(); while (1) { head = per_cpu_ptr(s->freelist, cpu); @@ -89,16 +97,25 @@ struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *s) node = head->first; if (node) { head->first = node->next; - raw_spin_unlock_irqrestore(&head->lock, flags); + raw_spin_unlock(&head->lock); return node; } raw_spin_unlock(&head->lock); cpu = cpumask_next(cpu, cpu_possible_mask); if (cpu >= nr_cpu_ids) cpu = 0; - if (cpu == orig_cpu) { - local_irq_restore(flags); + if (cpu == orig_cpu) return NULL; - } } } + +struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *s) +{ + struct pcpu_freelist_node *ret; + unsigned long flags; + + local_irq_save(flags); + ret = __pcpu_freelist_pop(s); + local_irq_restore(flags); + return ret; +} diff --git a/kernel/bpf/percpu_freelist.h b/kernel/bpf/percpu_freelist.h index 3049aae8ea1e..c3960118e617 100644 --- a/kernel/bpf/percpu_freelist.h +++ b/kernel/bpf/percpu_freelist.h @@ -22,8 +22,12 @@ struct pcpu_freelist_node { struct pcpu_freelist_node *next; }; +/* pcpu_freelist_* do spin_lock_irqsave. */ void pcpu_freelist_push(struct pcpu_freelist *, struct pcpu_freelist_node *); struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *); +/* __pcpu_freelist_* do spin_lock only. caller must disable irqs. */ +void __pcpu_freelist_push(struct pcpu_freelist *, struct pcpu_freelist_node *); +struct pcpu_freelist_node *__pcpu_freelist_pop(struct pcpu_freelist *); void pcpu_freelist_populate(struct pcpu_freelist *s, void *buf, u32 elem_size, u32 nr_elems); int pcpu_freelist_init(struct pcpu_freelist *); -- cgit v1.2.3 From 3bbe6a4212800a9b73984e5d73349db478a6711e Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 30 Jan 2019 18:12:44 -0800 Subject: bpf: fix potential deadlock in bpf_prog_register [ Upstream commit e16ec34039c701594d55d08a5aa49ee3e1abc821 ] Lockdep found a potential deadlock between cpu_hotplug_lock, bpf_event_mutex, and cpuctx_mutex: [ 13.007000] WARNING: possible circular locking dependency detected [ 13.007587] 5.0.0-rc3-00018-g2fa53f892422-dirty #477 Not tainted [ 13.008124] ------------------------------------------------------ [ 13.008624] test_progs/246 is trying to acquire lock: [ 13.009030] 0000000094160d1d (tracepoints_mutex){+.+.}, at: tracepoint_probe_register_prio+0x2d/0x300 [ 13.009770] [ 13.009770] but task is already holding lock: [ 13.010239] 00000000d663ef86 (bpf_event_mutex){+.+.}, at: bpf_probe_register+0x1d/0x60 [ 13.010877] [ 13.010877] which lock already depends on the new lock. [ 13.010877] [ 13.011532] [ 13.011532] the existing dependency chain (in reverse order) is: [ 13.012129] [ 13.012129] -> #4 (bpf_event_mutex){+.+.}: [ 13.012582] perf_event_query_prog_array+0x9b/0x130 [ 13.013016] _perf_ioctl+0x3aa/0x830 [ 13.013354] perf_ioctl+0x2e/0x50 [ 13.013668] do_vfs_ioctl+0x8f/0x6a0 [ 13.014003] ksys_ioctl+0x70/0x80 [ 13.014320] __x64_sys_ioctl+0x16/0x20 [ 13.014668] do_syscall_64+0x4a/0x180 [ 13.015007] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 13.015469] [ 13.015469] -> #3 (&cpuctx_mutex){+.+.}: [ 13.015910] perf_event_init_cpu+0x5a/0x90 [ 13.016291] perf_event_init+0x1b2/0x1de [ 13.016654] start_kernel+0x2b8/0x42a [ 13.016995] secondary_startup_64+0xa4/0xb0 [ 13.017382] [ 13.017382] -> #2 (pmus_lock){+.+.}: [ 13.017794] perf_event_init_cpu+0x21/0x90 [ 13.018172] cpuhp_invoke_callback+0xb3/0x960 [ 13.018573] _cpu_up+0xa7/0x140 [ 13.018871] do_cpu_up+0xa4/0xc0 [ 13.019178] smp_init+0xcd/0xd2 [ 13.019483] kernel_init_freeable+0x123/0x24f [ 13.019878] kernel_init+0xa/0x110 [ 13.020201] ret_from_fork+0x24/0x30 [ 13.020541] [ 13.020541] -> #1 (cpu_hotplug_lock.rw_sem){++++}: [ 13.021051] static_key_slow_inc+0xe/0x20 [ 13.021424] tracepoint_probe_register_prio+0x28c/0x300 [ 13.021891] perf_trace_event_init+0x11f/0x250 [ 13.022297] perf_trace_init+0x6b/0xa0 [ 13.022644] perf_tp_event_init+0x25/0x40 [ 13.023011] perf_try_init_event+0x6b/0x90 [ 13.023386] perf_event_alloc+0x9a8/0xc40 [ 13.023754] __do_sys_perf_event_open+0x1dd/0xd30 [ 13.024173] do_syscall_64+0x4a/0x180 [ 13.024519] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 13.024968] [ 13.024968] -> #0 (tracepoints_mutex){+.+.}: [ 13.025434] __mutex_lock+0x86/0x970 [ 13.025764] tracepoint_probe_register_prio+0x2d/0x300 [ 13.026215] bpf_probe_register+0x40/0x60 [ 13.026584] bpf_raw_tracepoint_open.isra.34+0xa4/0x130 [ 13.027042] __do_sys_bpf+0x94f/0x1a90 [ 13.027389] do_syscall_64+0x4a/0x180 [ 13.027727] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 13.028171] [ 13.028171] other info that might help us debug this: [ 13.028171] [ 13.028807] Chain exists of: [ 13.028807] tracepoints_mutex --> &cpuctx_mutex --> bpf_event_mutex [ 13.028807] [ 13.029666] Possible unsafe locking scenario: [ 13.029666] [ 13.030140] CPU0 CPU1 [ 13.030510] ---- ---- [ 13.030875] lock(bpf_event_mutex); [ 13.031166] lock(&cpuctx_mutex); [ 13.031645] lock(bpf_event_mutex); [ 13.032135] lock(tracepoints_mutex); [ 13.032441] [ 13.032441] *** DEADLOCK *** [ 13.032441] [ 13.032911] 1 lock held by test_progs/246: [ 13.033239] #0: 00000000d663ef86 (bpf_event_mutex){+.+.}, at: bpf_probe_register+0x1d/0x60 [ 13.033909] [ 13.033909] stack backtrace: [ 13.034258] CPU: 1 PID: 246 Comm: test_progs Not tainted 5.0.0-rc3-00018-g2fa53f892422-dirty #477 [ 13.034964] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-2.el7 04/01/2014 [ 13.035657] Call Trace: [ 13.035859] dump_stack+0x5f/0x8b [ 13.036130] print_circular_bug.isra.37+0x1ce/0x1db [ 13.036526] __lock_acquire+0x1158/0x1350 [ 13.036852] ? lock_acquire+0x98/0x190 [ 13.037154] lock_acquire+0x98/0x190 [ 13.037447] ? tracepoint_probe_register_prio+0x2d/0x300 [ 13.037876] __mutex_lock+0x86/0x970 [ 13.038167] ? tracepoint_probe_register_prio+0x2d/0x300 [ 13.038600] ? tracepoint_probe_register_prio+0x2d/0x300 [ 13.039028] ? __mutex_lock+0x86/0x970 [ 13.039337] ? __mutex_lock+0x24a/0x970 [ 13.039649] ? bpf_probe_register+0x1d/0x60 [ 13.039992] ? __bpf_trace_sched_wake_idle_without_ipi+0x10/0x10 [ 13.040478] ? tracepoint_probe_register_prio+0x2d/0x300 [ 13.040906] tracepoint_probe_register_prio+0x2d/0x300 [ 13.041325] bpf_probe_register+0x40/0x60 [ 13.041649] bpf_raw_tracepoint_open.isra.34+0xa4/0x130 [ 13.042068] ? __might_fault+0x3e/0x90 [ 13.042374] __do_sys_bpf+0x94f/0x1a90 [ 13.042678] do_syscall_64+0x4a/0x180 [ 13.042975] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 13.043382] RIP: 0033:0x7f23b10a07f9 [ 13.045155] RSP: 002b:00007ffdef42fdd8 EFLAGS: 00000202 ORIG_RAX: 0000000000000141 [ 13.045759] RAX: ffffffffffffffda RBX: 00007ffdef42ff70 RCX: 00007f23b10a07f9 [ 13.046326] RDX: 0000000000000070 RSI: 00007ffdef42fe10 RDI: 0000000000000011 [ 13.046893] RBP: 00007ffdef42fdf0 R08: 0000000000000038 R09: 00007ffdef42fe10 [ 13.047462] R10: 0000000000000000 R11: 0000000000000202 R12: 0000000000000000 [ 13.048029] R13: 0000000000000016 R14: 00007f23b1db4690 R15: 0000000000000000 Since tracepoints_mutex will be taken in tracepoint_probe_register/unregister() there is no need to take bpf_event_mutex too. bpf_event_mutex is protecting modifications to prog array used in kprobe/perf bpf progs. bpf_raw_tracepoints don't need to take this mutex. Fixes: c4f6699dfcb8 ("bpf: introduce BPF_RAW_TRACEPOINT") Acked-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Signed-off-by: Sasha Levin --- kernel/trace/bpf_trace.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 9864a35c8bb5..6c28d519447d 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1158,22 +1158,12 @@ static int __bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog * int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog) { - int err; - - mutex_lock(&bpf_event_mutex); - err = __bpf_probe_register(btp, prog); - mutex_unlock(&bpf_event_mutex); - return err; + return __bpf_probe_register(btp, prog); } int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog) { - int err; - - mutex_lock(&bpf_event_mutex); - err = tracepoint_probe_unregister(btp->tp, (void *)btp->bpf_func, prog); - mutex_unlock(&bpf_event_mutex); - return err; + return tracepoint_probe_unregister(btp->tp, (void *)btp->bpf_func, prog); } int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id, -- cgit v1.2.3 From ae26a7109c150b9214d04bbc45a1867529cbc160 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 30 Jan 2019 18:12:45 -0800 Subject: bpf: Fix syscall's stackmap lookup potential deadlock [ Upstream commit 7c4cd051add3d00bbff008a133c936c515eaa8fe ] The map_lookup_elem used to not acquiring spinlock in order to optimize the reader. It was true until commit 557c0c6e7df8 ("bpf: convert stackmap to pre-allocation") The syscall's map_lookup_elem(stackmap) calls bpf_stackmap_copy(). bpf_stackmap_copy() may find the elem no longer needed after the copy is done. If that is the case, pcpu_freelist_push() saves this elem for reuse later. This push requires a spinlock. If a tracing bpf_prog got run in the middle of the syscall's map_lookup_elem(stackmap) and this tracing bpf_prog is calling bpf_get_stackid(stackmap) which also requires the same pcpu_freelist's spinlock, it may end up with a dead lock situation as reported by Eric Dumazet in https://patchwork.ozlabs.org/patch/1030266/ The situation is the same as the syscall's map_update_elem() which needs to acquire the pcpu_freelist's spinlock and could race with tracing bpf_prog. Hence, this patch fixes it by protecting bpf_stackmap_copy() with this_cpu_inc(bpf_prog_active) to prevent tracing bpf_prog from running. A later syscall's map_lookup_elem commit f1a2e44a3aec ("bpf: add queue and stack maps") also acquires a spinlock and races with tracing bpf_prog similarly. Hence, this patch is forward looking and protects the majority of the map lookups. bpf_map_offload_lookup_elem() is the exception since it is for network bpf_prog only (i.e. never called by tracing bpf_prog). Fixes: 557c0c6e7df8 ("bpf: convert stackmap to pre-allocation") Reported-by: Eric Dumazet Acked-by: Alexei Starovoitov Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Signed-off-by: Sasha Levin --- kernel/bpf/syscall.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 382c09dddf93..cc40b8be1171 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -701,8 +701,13 @@ static int map_lookup_elem(union bpf_attr *attr) if (bpf_map_is_dev_bound(map)) { err = bpf_map_offload_lookup_elem(map, key, value); - } else if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || - map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { + goto done; + } + + preempt_disable(); + this_cpu_inc(bpf_prog_active); + if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || + map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { err = bpf_percpu_hash_copy(map, key, value); } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { err = bpf_percpu_array_copy(map, key, value); @@ -722,7 +727,10 @@ static int map_lookup_elem(union bpf_attr *attr) rcu_read_unlock(); err = ptr ? 0 : -ENOENT; } + this_cpu_dec(bpf_prog_active); + preempt_enable(); +done: if (err) goto free_value; -- cgit v1.2.3 From 7cf4466df6cadc4fe4f884e6ada20dd552acb82c Mon Sep 17 00:00:00 2001 From: Paul Kocialkowski Date: Thu, 31 Jan 2019 14:25:50 +0100 Subject: drm/sun4i: tcon: Prepare and enable TCON channel 0 clock at init [ Upstream commit b14e945bda8ae227d1bf2b1837c0c4a61721cd1a ] When initializing clocks, a reference to the TCON channel 0 clock is obtained. However, the clock is never prepared and enabled later. Switching from simplefb to DRM actually disables the clock (that was usually configured by U-Boot) because of that. On the V3s, this results in a hang when writing to some mixer registers when switching over to DRM from simplefb. Fix this by preparing and enabling the clock when initializing other clocks. Waiting for sun4i_tcon_channel_enable to enable the clock is apparently too late and results in the same mixer register access hang. Signed-off-by: Paul Kocialkowski Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20190131132550.26355-1-paul.kocialkowski@bootlin.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/sun4i/sun4i_tcon.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/sun4i/sun4i_tcon.c b/drivers/gpu/drm/sun4i/sun4i_tcon.c index 3fb084f802e2..8c31c9ab06f8 100644 --- a/drivers/gpu/drm/sun4i/sun4i_tcon.c +++ b/drivers/gpu/drm/sun4i/sun4i_tcon.c @@ -672,6 +672,7 @@ static int sun4i_tcon_init_clocks(struct device *dev, return PTR_ERR(tcon->sclk0); } } + clk_prepare_enable(tcon->sclk0); if (tcon->quirks->has_channel_1) { tcon->sclk1 = devm_clk_get(dev, "tcon-ch1"); @@ -686,6 +687,7 @@ static int sun4i_tcon_init_clocks(struct device *dev, static void sun4i_tcon_free_clocks(struct sun4i_tcon *tcon) { + clk_disable_unprepare(tcon->sclk0); clk_disable_unprepare(tcon->clk); } -- cgit v1.2.3 From f3ffd4559de2479caad378912b2b52a761d8801b Mon Sep 17 00:00:00 2001 From: Codrin Ciubotariu Date: Wed, 23 Jan 2019 16:33:47 +0000 Subject: dmaengine: at_xdmac: Fix wrongfull report of a channel as in use [ Upstream commit dc3f595b6617ebc0307e0ce151e8f2f2b2489b95 ] atchan->status variable is used to store two different information: - pass channel interrupts status from interrupt handler to tasklet; - channel information like whether it is cyclic or paused; This causes a bug when device_terminate_all() is called, (AT_XDMAC_CHAN_IS_CYCLIC cleared on atchan->status) and then a late End of Block interrupt arrives (AT_XDMAC_CIS_BIS), which sets bit 0 of atchan->status. Bit 0 is also used for AT_XDMAC_CHAN_IS_CYCLIC, so when a new descriptor for a cyclic transfer is created, the driver reports the channel as in use: if (test_and_set_bit(AT_XDMAC_CHAN_IS_CYCLIC, &atchan->status)) { dev_err(chan2dev(chan), "channel currently used\n"); return NULL; } This patch fixes the bug by adding a different struct member to keep the interrupts status separated from the channel status bits. Fixes: e1f7c9eee707 ("dmaengine: at_xdmac: creation of the atmel eXtended DMA Controller driver") Signed-off-by: Codrin Ciubotariu Acked-by: Ludovic Desroches Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/at_xdmac.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c index 4bf72561667c..a75b95fac3bd 100644 --- a/drivers/dma/at_xdmac.c +++ b/drivers/dma/at_xdmac.c @@ -203,6 +203,7 @@ struct at_xdmac_chan { u32 save_cim; u32 save_cnda; u32 save_cndc; + u32 irq_status; unsigned long status; struct tasklet_struct tasklet; struct dma_slave_config sconfig; @@ -1580,8 +1581,8 @@ static void at_xdmac_tasklet(unsigned long data) struct at_xdmac_desc *desc; u32 error_mask; - dev_dbg(chan2dev(&atchan->chan), "%s: status=0x%08lx\n", - __func__, atchan->status); + dev_dbg(chan2dev(&atchan->chan), "%s: status=0x%08x\n", + __func__, atchan->irq_status); error_mask = AT_XDMAC_CIS_RBEIS | AT_XDMAC_CIS_WBEIS @@ -1589,15 +1590,15 @@ static void at_xdmac_tasklet(unsigned long data) if (at_xdmac_chan_is_cyclic(atchan)) { at_xdmac_handle_cyclic(atchan); - } else if ((atchan->status & AT_XDMAC_CIS_LIS) - || (atchan->status & error_mask)) { + } else if ((atchan->irq_status & AT_XDMAC_CIS_LIS) + || (atchan->irq_status & error_mask)) { struct dma_async_tx_descriptor *txd; - if (atchan->status & AT_XDMAC_CIS_RBEIS) + if (atchan->irq_status & AT_XDMAC_CIS_RBEIS) dev_err(chan2dev(&atchan->chan), "read bus error!!!"); - if (atchan->status & AT_XDMAC_CIS_WBEIS) + if (atchan->irq_status & AT_XDMAC_CIS_WBEIS) dev_err(chan2dev(&atchan->chan), "write bus error!!!"); - if (atchan->status & AT_XDMAC_CIS_ROIS) + if (atchan->irq_status & AT_XDMAC_CIS_ROIS) dev_err(chan2dev(&atchan->chan), "request overflow error!!!"); spin_lock_bh(&atchan->lock); @@ -1652,7 +1653,7 @@ static irqreturn_t at_xdmac_interrupt(int irq, void *dev_id) atchan = &atxdmac->chan[i]; chan_imr = at_xdmac_chan_read(atchan, AT_XDMAC_CIM); chan_status = at_xdmac_chan_read(atchan, AT_XDMAC_CIS); - atchan->status = chan_status & chan_imr; + atchan->irq_status = chan_status & chan_imr; dev_vdbg(atxdmac->dma.dev, "%s: chan%d: imr=0x%x, status=0x%x\n", __func__, i, chan_imr, chan_status); @@ -1666,7 +1667,7 @@ static irqreturn_t at_xdmac_interrupt(int irq, void *dev_id) at_xdmac_chan_read(atchan, AT_XDMAC_CDA), at_xdmac_chan_read(atchan, AT_XDMAC_CUBC)); - if (atchan->status & (AT_XDMAC_CIS_RBEIS | AT_XDMAC_CIS_WBEIS)) + if (atchan->irq_status & (AT_XDMAC_CIS_RBEIS | AT_XDMAC_CIS_WBEIS)) at_xdmac_write(atxdmac, AT_XDMAC_GD, atchan->mask); tasklet_schedule(&atchan->tasklet); -- cgit v1.2.3 From cd2013568dd51c6fc364c6d876d0fc624446572c Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Fri, 1 Feb 2019 12:42:06 +0100 Subject: vsock/virtio: fix kernel panic after device hot-unplug [ Upstream commit 22b5c0b63f32568e130fa2df4ba23efce3eb495b ] virtio_vsock_remove() invokes the vsock_core_exit() also if there are opened sockets for the AF_VSOCK protocol family. In this way the vsock "transport" pointer is set to NULL, triggering the kernel panic at the first socket activity. This patch move the vsock_core_init()/vsock_core_exit() in the virtio_vsock respectively in module_init and module_exit functions, that cannot be invoked until there are open sockets. Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1609699 Reported-by: Yan Fu Signed-off-by: Stefano Garzarella Acked-by: Stefan Hajnoczi Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/vmw_vsock/virtio_transport.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index 5d3cce9e8744..9dae54698737 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -75,6 +75,9 @@ static u32 virtio_transport_get_local_cid(void) { struct virtio_vsock *vsock = virtio_vsock_get(); + if (!vsock) + return VMADDR_CID_ANY; + return vsock->guest_cid; } @@ -584,10 +587,6 @@ static int virtio_vsock_probe(struct virtio_device *vdev) virtio_vsock_update_guest_cid(vsock); - ret = vsock_core_init(&virtio_transport.transport); - if (ret < 0) - goto out_vqs; - vsock->rx_buf_nr = 0; vsock->rx_buf_max_nr = 0; atomic_set(&vsock->queued_replies, 0); @@ -618,8 +617,6 @@ static int virtio_vsock_probe(struct virtio_device *vdev) mutex_unlock(&the_virtio_vsock_mutex); return 0; -out_vqs: - vsock->vdev->config->del_vqs(vsock->vdev); out: kfree(vsock); mutex_unlock(&the_virtio_vsock_mutex); @@ -669,7 +666,6 @@ static void virtio_vsock_remove(struct virtio_device *vdev) mutex_lock(&the_virtio_vsock_mutex); the_virtio_vsock = NULL; - vsock_core_exit(); mutex_unlock(&the_virtio_vsock_mutex); vdev->config->del_vqs(vdev); @@ -702,14 +698,28 @@ static int __init virtio_vsock_init(void) virtio_vsock_workqueue = alloc_workqueue("virtio_vsock", 0, 0); if (!virtio_vsock_workqueue) return -ENOMEM; + ret = register_virtio_driver(&virtio_vsock_driver); if (ret) - destroy_workqueue(virtio_vsock_workqueue); + goto out_wq; + + ret = vsock_core_init(&virtio_transport.transport); + if (ret) + goto out_vdr; + + return 0; + +out_vdr: + unregister_virtio_driver(&virtio_vsock_driver); +out_wq: + destroy_workqueue(virtio_vsock_workqueue); return ret; + } static void __exit virtio_vsock_exit(void) { + vsock_core_exit(); unregister_virtio_driver(&virtio_vsock_driver); destroy_workqueue(virtio_vsock_workqueue); } -- cgit v1.2.3 From 5eae58996b7e537220cf39d583f363abf7c9c612 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Fri, 1 Feb 2019 12:42:07 +0100 Subject: vsock/virtio: reset connected sockets on device removal [ Upstream commit 85965487abc540368393a15491e6e7fcd230039d ] When the virtio transport device disappear, we should reset all connected sockets in order to inform the users. Signed-off-by: Stefano Garzarella Reviewed-by: Stefan Hajnoczi Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/vmw_vsock/virtio_transport.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index 9dae54698737..15eb5d3d4750 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -634,6 +634,9 @@ static void virtio_vsock_remove(struct virtio_device *vdev) flush_work(&vsock->event_work); flush_work(&vsock->send_pkt_work); + /* Reset all connected sockets when the device disappear */ + vsock_for_each_connected_socket(virtio_vsock_reset_sock); + vdev->config->reset(vdev); mutex_lock(&vsock->rx_lock); -- cgit v1.2.3 From 0203f0c963e985188076ca654c53715537525933 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 30 Jan 2019 21:48:44 +0200 Subject: dmaengine: dmatest: Abort test in case of mapping error [ Upstream commit 6454368a804c4955ccd116236037536f81e5b1f1 ] In case of mapping error the DMA addresses are invalid and continuing will screw system memory or potentially something else. [ 222.480310] dmatest: dma0chan7-copy0: summary 1 tests, 3 failures 6 iops 349 KB/s (0) ... [ 240.912725] check: Corrupted low memory at 00000000c7c75ac9 (2940 phys) = 5656000000000000 [ 240.921998] check: Corrupted low memory at 000000005715a1cd (2948 phys) = 279f2aca5595ab2b [ 240.931280] check: Corrupted low memory at 000000002f4024c0 (2950 phys) = 5e5624f349e793cf ... Abort any test if mapping failed. Fixes: 4076e755dbec ("dmatest: convert to dmaengine_unmap_data") Cc: Dan Williams Signed-off-by: Andy Shevchenko Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/dmatest.c | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c index aa1712beb0cc..7b7fba0c9253 100644 --- a/drivers/dma/dmatest.c +++ b/drivers/dma/dmatest.c @@ -642,11 +642,9 @@ static int dmatest_func(void *data) srcs[i] = um->addr[i] + src_off; ret = dma_mapping_error(dev->dev, um->addr[i]); if (ret) { - dmaengine_unmap_put(um); result("src mapping error", total_tests, src_off, dst_off, len, ret); - failed_tests++; - continue; + goto error_unmap_continue; } um->to_cnt++; } @@ -661,11 +659,9 @@ static int dmatest_func(void *data) DMA_BIDIRECTIONAL); ret = dma_mapping_error(dev->dev, dsts[i]); if (ret) { - dmaengine_unmap_put(um); result("dst mapping error", total_tests, src_off, dst_off, len, ret); - failed_tests++; - continue; + goto error_unmap_continue; } um->bidi_cnt++; } @@ -693,12 +689,10 @@ static int dmatest_func(void *data) } if (!tx) { - dmaengine_unmap_put(um); result("prep error", total_tests, src_off, dst_off, len, ret); msleep(100); - failed_tests++; - continue; + goto error_unmap_continue; } done->done = false; @@ -707,12 +701,10 @@ static int dmatest_func(void *data) cookie = tx->tx_submit(tx); if (dma_submit_error(cookie)) { - dmaengine_unmap_put(um); result("submit error", total_tests, src_off, dst_off, len, ret); msleep(100); - failed_tests++; - continue; + goto error_unmap_continue; } dma_async_issue_pending(chan); @@ -725,16 +717,14 @@ static int dmatest_func(void *data) dmaengine_unmap_put(um); result("test timed out", total_tests, src_off, dst_off, len, 0); - failed_tests++; - continue; + goto error_unmap_continue; } else if (status != DMA_COMPLETE) { dmaengine_unmap_put(um); result(status == DMA_ERROR ? "completion error status" : "completion busy status", total_tests, src_off, dst_off, len, ret); - failed_tests++; - continue; + goto error_unmap_continue; } dmaengine_unmap_put(um); @@ -779,6 +769,12 @@ static int dmatest_func(void *data) verbose_result("test passed", total_tests, src_off, dst_off, len, 0); } + + continue; + +error_unmap_continue: + dmaengine_unmap_put(um); + failed_tests++; } ktime = ktime_sub(ktime_get(), ktime); ktime = ktime_sub(ktime, comparetime); -- cgit v1.2.3 From 974ed365b16455203b0fb6ef91b1aa9a07b9acde Mon Sep 17 00:00:00 2001 From: Naresh Kamboju Date: Tue, 29 Jan 2019 06:28:35 +0000 Subject: selftests: netfilter: fix config fragment CONFIG_NF_TABLES_INET [ Upstream commit 952b72f89ae23b316da8c1021b18d0c388ad6cc4 ] In selftests the config fragment for netfilter was added as NF_TABLES_INET=y and this patch correct it as CONFIG_NF_TABLES_INET=y Signed-off-by: Naresh Kamboju Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- tools/testing/selftests/netfilter/config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/netfilter/config b/tools/testing/selftests/netfilter/config index 1017313e41a8..59caa8f71cd8 100644 --- a/tools/testing/selftests/netfilter/config +++ b/tools/testing/selftests/netfilter/config @@ -1,2 +1,2 @@ CONFIG_NET_NS=y -NF_TABLES_INET=y +CONFIG_NF_TABLES_INET=y -- cgit v1.2.3 From 5c39e08fc417b91d24d243bc6d1a305e1c38fb9f Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 29 Jan 2019 15:16:23 +0100 Subject: selftests: netfilter: add simple masq/redirect test cases [ Upstream commit 98bfc3414bda335dbd7fec58bde6266f991801d7 ] Check basic nat/redirect/masquerade for ipv4 and ipv6. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- tools/testing/selftests/netfilter/Makefile | 2 +- tools/testing/selftests/netfilter/nft_nat.sh | 762 +++++++++++++++++++++++++++ 2 files changed, 763 insertions(+), 1 deletion(-) create mode 100755 tools/testing/selftests/netfilter/nft_nat.sh diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile index 47ed6cef93fb..c9ff2b47bd1c 100644 --- a/tools/testing/selftests/netfilter/Makefile +++ b/tools/testing/selftests/netfilter/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for netfilter selftests -TEST_PROGS := nft_trans_stress.sh +TEST_PROGS := nft_trans_stress.sh nft_nat.sh include ../lib.mk diff --git a/tools/testing/selftests/netfilter/nft_nat.sh b/tools/testing/selftests/netfilter/nft_nat.sh new file mode 100755 index 000000000000..8ec76681605c --- /dev/null +++ b/tools/testing/selftests/netfilter/nft_nat.sh @@ -0,0 +1,762 @@ +#!/bin/bash +# +# This test is for basic NAT functionality: snat, dnat, redirect, masquerade. +# + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 +ret=0 + +nft --version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without nft tool" + exit $ksft_skip +fi + +ip -Version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +ip netns add ns0 +ip netns add ns1 +ip netns add ns2 + +ip link add veth0 netns ns0 type veth peer name eth0 netns ns1 +ip link add veth1 netns ns0 type veth peer name eth0 netns ns2 + +ip -net ns0 link set lo up +ip -net ns0 link set veth0 up +ip -net ns0 addr add 10.0.1.1/24 dev veth0 +ip -net ns0 addr add dead:1::1/64 dev veth0 + +ip -net ns0 link set veth1 up +ip -net ns0 addr add 10.0.2.1/24 dev veth1 +ip -net ns0 addr add dead:2::1/64 dev veth1 + +for i in 1 2; do + ip -net ns$i link set lo up + ip -net ns$i link set eth0 up + ip -net ns$i addr add 10.0.$i.99/24 dev eth0 + ip -net ns$i route add default via 10.0.$i.1 + ip -net ns$i addr add dead:$i::99/64 dev eth0 + ip -net ns$i route add default via dead:$i::1 +done + +bad_counter() +{ + local ns=$1 + local counter=$2 + local expect=$3 + + echo "ERROR: $counter counter in $ns has unexpected value (expected $expect)" 1>&2 + ip netns exec $ns nft list counter inet filter $counter 1>&2 +} + +check_counters() +{ + ns=$1 + local lret=0 + + cnt=$(ip netns exec $ns nft list counter inet filter ns0in | grep -q "packets 1 bytes 84") + if [ $? -ne 0 ]; then + bad_counter $ns ns0in "packets 1 bytes 84" + lret=1 + fi + cnt=$(ip netns exec $ns nft list counter inet filter ns0out | grep -q "packets 1 bytes 84") + if [ $? -ne 0 ]; then + bad_counter $ns ns0out "packets 1 bytes 84" + lret=1 + fi + + expect="packets 1 bytes 104" + cnt=$(ip netns exec $ns nft list counter inet filter ns0in6 | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter $ns ns0in6 "$expect" + lret=1 + fi + cnt=$(ip netns exec $ns nft list counter inet filter ns0out6 | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter $ns ns0out6 "$expect" + lret=1 + fi + + return $lret +} + +check_ns0_counters() +{ + local ns=$1 + local lret=0 + + cnt=$(ip netns exec ns0 nft list counter inet filter ns0in | grep -q "packets 0 bytes 0") + if [ $? -ne 0 ]; then + bad_counter ns0 ns0in "packets 0 bytes 0" + lret=1 + fi + + cnt=$(ip netns exec ns0 nft list counter inet filter ns0in6 | grep -q "packets 0 bytes 0") + if [ $? -ne 0 ]; then + bad_counter ns0 ns0in6 "packets 0 bytes 0" + lret=1 + fi + + cnt=$(ip netns exec ns0 nft list counter inet filter ns0out | grep -q "packets 0 bytes 0") + if [ $? -ne 0 ]; then + bad_counter ns0 ns0out "packets 0 bytes 0" + lret=1 + fi + cnt=$(ip netns exec ns0 nft list counter inet filter ns0out6 | grep -q "packets 0 bytes 0") + if [ $? -ne 0 ]; then + bad_counter ns0 ns0out6 "packets 0 bytes 0" + lret=1 + fi + + for dir in "in" "out" ; do + expect="packets 1 bytes 84" + cnt=$(ip netns exec ns0 nft list counter inet filter ${ns}${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns0 $ns$dir "$expect" + lret=1 + fi + + expect="packets 1 bytes 104" + cnt=$(ip netns exec ns0 nft list counter inet filter ${ns}${dir}6 | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns0 $ns$dir6 "$expect" + lret=1 + fi + done + + return $lret +} + +reset_counters() +{ + for i in 0 1 2;do + ip netns exec ns$i nft reset counters inet > /dev/null + done +} + +test_local_dnat6() +{ + local lret=0 +ip netns exec ns0 nft -f - < /dev/null + if [ $? -ne 0 ]; then + lret=1 + echo "ERROR: ping6 failed" + return $lret + fi + + expect="packets 0 bytes 0" + for dir in "in6" "out6" ; do + cnt=$(ip netns exec ns0 nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns0 ns1$dir "$expect" + lret=1 + fi + done + + expect="packets 1 bytes 104" + for dir in "in6" "out6" ; do + cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns0 ns2$dir "$expect" + lret=1 + fi + done + + # expect 0 count in ns1 + expect="packets 0 bytes 0" + for dir in "in6" "out6" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + done + + # expect 1 packet in ns2 + expect="packets 1 bytes 104" + for dir in "in6" "out6" ; do + cnt=$(ip netns exec ns2 nft list counter inet filter ns0${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns0$dir "$expect" + lret=1 + fi + done + + test $lret -eq 0 && echo "PASS: ipv6 ping to ns1 was NATted to ns2" + ip netns exec ns0 nft flush chain ip6 nat output + + return $lret +} + +test_local_dnat() +{ + local lret=0 +ip netns exec ns0 nft -f - < /dev/null + if [ $? -ne 0 ]; then + lret=1 + echo "ERROR: ping failed" + return $lret + fi + + expect="packets 0 bytes 0" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns0 nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns0 ns1$dir "$expect" + lret=1 + fi + done + + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns0 ns2$dir "$expect" + lret=1 + fi + done + + # expect 0 count in ns1 + expect="packets 0 bytes 0" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + done + + # expect 1 packet in ns2 + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns2 nft list counter inet filter ns0${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns0$dir "$expect" + lret=1 + fi + done + + test $lret -eq 0 && echo "PASS: ping to ns1 was NATted to ns2" + + ip netns exec ns0 nft flush chain ip nat output + + reset_counters + ip netns exec ns0 ping -q -c 1 10.0.1.99 > /dev/null + if [ $? -ne 0 ]; then + lret=1 + echo "ERROR: ping failed" + return $lret + fi + + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns0 nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns1$dir "$expect" + lret=1 + fi + done + expect="packets 0 bytes 0" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns0 ns2$dir "$expect" + lret=1 + fi + done + + # expect 1 count in ns1 + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns0 ns0$dir "$expect" + lret=1 + fi + done + + # expect 0 packet in ns2 + expect="packets 0 bytes 0" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns2 nft list counter inet filter ns0${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns2$dir "$expect" + lret=1 + fi + done + + test $lret -eq 0 && echo "PASS: ping to ns1 OK after nat output chain flush" + + return $lret +} + + +test_masquerade6() +{ + local lret=0 + + ip netns exec ns0 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null + + ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: cannot ping ns1 from ns2 via ipv6" + return 1 + lret=1 + fi + + expect="packets 1 bytes 104" + for dir in "in6" "out6" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns2$dir "$expect" + lret=1 + fi + + cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns1$dir "$expect" + lret=1 + fi + done + + reset_counters + +# add masquerading rule +ip netns exec ns0 nft -f - < /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: cannot ping ns1 from ns2 with active ipv6 masquerading" + lret=1 + fi + + # ns1 should have seen packets from ns0, due to masquerade + expect="packets 1 bytes 104" + for dir in "in6" "out6" ; do + + cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + + cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns1$dir "$expect" + lret=1 + fi + done + + # ns1 should not have seen packets from ns2, due to masquerade + expect="packets 0 bytes 0" + for dir in "in6" "out6" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns1$dir "$expect" + lret=1 + fi + done + + ip netns exec ns0 nft flush chain ip6 nat postrouting + if [ $? -ne 0 ]; then + echo "ERROR: Could not flush ip6 nat postrouting" 1>&2 + lret=1 + fi + + test $lret -eq 0 && echo "PASS: IPv6 masquerade for ns2" + + return $lret +} + +test_masquerade() +{ + local lret=0 + + ip netns exec ns0 sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null + ip netns exec ns0 sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null + + ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: canot ping ns1 from ns2" + lret=1 + fi + + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns2$dir "$expect" + lret=1 + fi + + cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns1$dir "$expect" + lret=1 + fi + done + + reset_counters + +# add masquerading rule +ip netns exec ns0 nft -f - < /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: cannot ping ns1 from ns2 with active ip masquerading" + lret=1 + fi + + # ns1 should have seen packets from ns0, due to masquerade + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + + cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns1$dir "$expect" + lret=1 + fi + done + + # ns1 should not have seen packets from ns2, due to masquerade + expect="packets 0 bytes 0" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns1$dir "$expect" + lret=1 + fi + done + + ip netns exec ns0 nft flush chain ip nat postrouting + if [ $? -ne 0 ]; then + echo "ERROR: Could not flush nat postrouting" 1>&2 + lret=1 + fi + + test $lret -eq 0 && echo "PASS: IP masquerade for ns2" + + return $lret +} + +test_redirect6() +{ + local lret=0 + + ip netns exec ns0 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null + + ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: cannnot ping ns1 from ns2 via ipv6" + lret=1 + fi + + expect="packets 1 bytes 104" + for dir in "in6" "out6" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns2$dir "$expect" + lret=1 + fi + + cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns1$dir "$expect" + lret=1 + fi + done + + reset_counters + +# add redirect rule +ip netns exec ns0 nft -f - < /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: cannot ping ns1 from ns2 with active ip6 redirect" + lret=1 + fi + + # ns1 should have seen no packets from ns2, due to redirection + expect="packets 0 bytes 0" + for dir in "in6" "out6" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + done + + # ns0 should have seen packets from ns2, due to masquerade + expect="packets 1 bytes 104" + for dir in "in6" "out6" ; do + cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + done + + ip netns exec ns0 nft delete table ip6 nat + if [ $? -ne 0 ]; then + echo "ERROR: Could not delete ip6 nat table" 1>&2 + lret=1 + fi + + test $lret -eq 0 && echo "PASS: IPv6 redirection for ns2" + + return $lret +} + +test_redirect() +{ + local lret=0 + + ip netns exec ns0 sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null + ip netns exec ns0 sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null + + ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: cannot ping ns1 from ns2" + lret=1 + fi + + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns2$dir "$expect" + lret=1 + fi + + cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns1$dir "$expect" + lret=1 + fi + done + + reset_counters + +# add redirect rule +ip netns exec ns0 nft -f - < /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: cannot ping ns1 from ns2 with active ip redirect" + lret=1 + fi + + # ns1 should have seen no packets from ns2, due to redirection + expect="packets 0 bytes 0" + for dir in "in" "out" ; do + + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + done + + # ns0 should have seen packets from ns2, due to masquerade + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + done + + ip netns exec ns0 nft delete table ip nat + if [ $? -ne 0 ]; then + echo "ERROR: Could not delete nat table" 1>&2 + lret=1 + fi + + test $lret -eq 0 && echo "PASS: IP redirection for ns2" + + return $lret +} + + +# ip netns exec ns0 ping -c 1 -q 10.0.$i.99 +for i in 0 1 2; do +ip netns exec ns$i nft -f - < /dev/null + if [ $? -ne 0 ];then + echo "ERROR: Could not reach other namespace(s)" 1>&2 + ret=1 + fi + + ip netns exec ns0 ping -c 1 -q dead:$i::99 > /dev/null + if [ $? -ne 0 ];then + echo "ERROR: Could not reach other namespace(s) via ipv6" 1>&2 + ret=1 + fi + check_counters ns$i + if [ $? -ne 0 ]; then + ret=1 + fi + + check_ns0_counters ns$i + if [ $? -ne 0 ]; then + ret=1 + fi + reset_counters +done + +if [ $ret -eq 0 ];then + echo "PASS: netns routing/connectivity: ns0 can reach ns1 and ns2" +fi + +reset_counters +test_local_dnat +test_local_dnat6 + +reset_counters +test_masquerade +test_masquerade6 + +reset_counters +test_redirect +test_redirect6 + +for i in 0 1 2; do ip netns del ns$i;done + +exit $ret -- cgit v1.2.3 From 5058447bf760d7dabe58eb3e0cae13422da865e4 Mon Sep 17 00:00:00 2001 From: Martynas Pumputis Date: Tue, 29 Jan 2019 15:51:42 +0100 Subject: netfilter: nf_nat: skip nat clash resolution for same-origin entries [ Upstream commit 4e35c1cb9460240e983a01745b5f29fe3a4d8e39 ] It is possible that two concurrent packets originating from the same socket of a connection-less protocol (e.g. UDP) can end up having different IP_CT_DIR_REPLY tuples which results in one of the packets being dropped. To illustrate this, consider the following simplified scenario: 1. Packet A and B are sent at the same time from two different threads by same UDP socket. No matching conntrack entry exists yet. Both packets cause allocation of a new conntrack entry. 2. get_unique_tuple gets called for A. No clashing entry found. conntrack entry for A is added to main conntrack table. 3. get_unique_tuple is called for B and will find that the reply tuple of B is already taken by A. It will allocate a new UDP source port for B to resolve the clash. 4. conntrack entry for B cannot be added to main conntrack table because its ORIGINAL direction is clashing with A and the REPLY directions of A and B are not the same anymore due to UDP source port reallocation done in step 3. This patch modifies nf_conntrack_tuple_taken so it doesn't consider colliding reply tuples if the IP_CT_DIR_ORIGINAL tuples are equal. [ Florian: simplify patch to not use .allow_clash setting and always ignore identical flows ] Signed-off-by: Martynas Pumputis Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_conntrack_core.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 277d02a8cac8..895171a2e1f1 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1007,6 +1007,22 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, } if (nf_ct_key_equal(h, tuple, zone, net)) { + /* Tuple is taken already, so caller will need to find + * a new source port to use. + * + * Only exception: + * If the *original tuples* are identical, then both + * conntracks refer to the same flow. + * This is a rare situation, it can occur e.g. when + * more than one UDP packet is sent from same socket + * in different threads. + * + * Let nf_ct_resolve_clash() deal with this later. + */ + if (nf_ct_tuple_equal(&ignored_conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple, + &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple)) + continue; + NF_CT_STAT_INC_ATOMIC(net, found); rcu_read_unlock(); return 1; -- cgit v1.2.3 From 575a2461e50b097b5a37b89e50e3fc0269a29286 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Mon, 4 Feb 2019 17:40:06 +0100 Subject: s390/qeth: release cmd buffer in error paths [ Upstream commit 5065b2dd3e5f9247a6c9d67974bc0472bf561b9d ] Whenever we fail before/while starting an IO, make sure to release the IO buffer. Usually qeth_irq() would do this for us, but if the IO doesn't even start we obviously won't get an interrupt for it either. Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/s390/net/qeth_core_main.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index b03515d43745..4b3b9c4e46d2 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -565,6 +565,7 @@ static int __qeth_issue_next_read(struct qeth_card *card) QETH_DBF_MESSAGE(2, "%s error in starting next read ccw! " "rc=%i\n", dev_name(&card->gdev->dev), rc); atomic_set(&channel->irq_pending, 0); + qeth_release_buffer(channel, iob); card->read_or_write_problem = 1; qeth_schedule_recovery(card); wake_up(&card->wait_q); @@ -1187,6 +1188,8 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm, rc = qeth_get_problem(cdev, irb); if (rc) { card->read_or_write_problem = 1; + if (iob) + qeth_release_buffer(iob->channel, iob); qeth_clear_ipacmd_list(card); qeth_schedule_recovery(card); goto out; @@ -1852,6 +1855,7 @@ static int qeth_idx_activate_get_answer(struct qeth_channel *channel, QETH_DBF_MESSAGE(2, "Error2 in activating channel rc=%d\n", rc); QETH_DBF_TEXT_(SETUP, 2, "2err%d", rc); atomic_set(&channel->irq_pending, 0); + qeth_release_buffer(channel, iob); wake_up(&card->wait_q); return rc; } @@ -1923,6 +1927,7 @@ static int qeth_idx_activate_channel(struct qeth_channel *channel, rc); QETH_DBF_TEXT_(SETUP, 2, "1err%d", rc); atomic_set(&channel->irq_pending, 0); + qeth_release_buffer(channel, iob); wake_up(&card->wait_q); return rc; } @@ -2110,6 +2115,7 @@ int qeth_send_control_data(struct qeth_card *card, int len, } reply = qeth_alloc_reply(card); if (!reply) { + qeth_release_buffer(channel, iob); return -ENOMEM; } reply->callback = reply_cb; -- cgit v1.2.3 From 5327c5530c9bffe230e99e619946fa8e73f61eba Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Mon, 4 Feb 2019 17:40:07 +0100 Subject: s390/qeth: fix use-after-free in error path [ Upstream commit afa0c5904ba16d59b0454f7ee4c807dae350f432 ] The error path in qeth_alloc_qdio_buffers() that takes care of cleaning up the Output Queues is buggy. It first frees the queue, but then calls qeth_clear_outq_buffers() with that very queue struct. Make the call to qeth_clear_outq_buffers() part of the free action (in the correct order), and while at it fix the naming of the helper. Fixes: 0da9581ddb0f ("qeth: exploit asynchronous delivery of storage blocks") Signed-off-by: Julian Wiedmann Reviewed-by: Alexandra Winter Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/s390/net/qeth_core_main.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 4b3b9c4e46d2..56aacf32f71b 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -2454,11 +2454,12 @@ static int qeth_init_qdio_out_buf(struct qeth_qdio_out_q *q, int bidx) return 0; } -static void qeth_free_qdio_out_buf(struct qeth_qdio_out_q *q) +static void qeth_free_output_queue(struct qeth_qdio_out_q *q) { if (!q) return; + qeth_clear_outq_buffers(q, 1); qdio_free_buffers(q->qdio_bufs, QDIO_MAX_BUFFERS_PER_Q); kfree(q); } @@ -2532,10 +2533,8 @@ out_freeoutqbufs: card->qdio.out_qs[i]->bufs[j] = NULL; } out_freeoutq: - while (i > 0) { - qeth_free_qdio_out_buf(card->qdio.out_qs[--i]); - qeth_clear_outq_buffers(card->qdio.out_qs[i], 1); - } + while (i > 0) + qeth_free_output_queue(card->qdio.out_qs[--i]); kfree(card->qdio.out_qs); card->qdio.out_qs = NULL; out_freepool: @@ -2568,10 +2567,8 @@ static void qeth_free_qdio_buffers(struct qeth_card *card) qeth_free_buffer_pool(card); /* free outbound qdio_qs */ if (card->qdio.out_qs) { - for (i = 0; i < card->qdio.no_out_queues; ++i) { - qeth_clear_outq_buffers(card->qdio.out_qs[i], 1); - qeth_free_qdio_out_buf(card->qdio.out_qs[i]); - } + for (i = 0; i < card->qdio.no_out_queues; i++) + qeth_free_output_queue(card->qdio.out_qs[i]); kfree(card->qdio.out_qs); card->qdio.out_qs = NULL; } -- cgit v1.2.3 From 825e58bc9800897edd21920d4fef6aee714039d4 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Mon, 4 Feb 2019 17:40:08 +0100 Subject: s390/qeth: cancel close_dev work before removing a card [ Upstream commit c2780c1a3fb724560b1d44f7976e0de17bf153c7 ] A card's close_dev work is scheduled on a driver-wide workqueue. If the card is removed and freed while the work is still active, this causes a use-after-free. So make sure that the work is completed before freeing the card. Fixes: 0f54761d167f ("qeth: Support VEPA mode") Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/s390/net/qeth_core.h | 1 + drivers/s390/net/qeth_l2_main.c | 2 ++ drivers/s390/net/qeth_l3_main.c | 1 + 3 files changed, 4 insertions(+) diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index 970654fcc48d..2d1f6a583641 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -22,6 +22,7 @@ #include #include #include +#include #include #include diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 76b2fba5fba2..b7513c5848cf 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -854,6 +854,8 @@ static void qeth_l2_remove_device(struct ccwgroup_device *cgdev) if (cgdev->state == CCWGROUP_ONLINE) qeth_l2_set_offline(cgdev); + + cancel_work_sync(&card->close_dev_work); if (qeth_netdev_is_registered(card->dev)) unregister_netdev(card->dev); } diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index b7f6a8384543..7f71ca0d08e7 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -2611,6 +2611,7 @@ static void qeth_l3_remove_device(struct ccwgroup_device *cgdev) if (cgdev->state == CCWGROUP_ONLINE) qeth_l3_set_offline(cgdev); + cancel_work_sync(&card->close_dev_work); if (qeth_netdev_is_registered(card->dev)) unregister_netdev(card->dev); qeth_l3_clear_ip_htable(card, 0); -- cgit v1.2.3 From 47e3f3c08605b0af25c4dfbbf6c9c71655edbbda Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 28 Jan 2019 14:35:26 +0100 Subject: perf symbols: Filter out hidden symbols from labels [ Upstream commit 59a17706915fe5ea6f711e1f92d4fb706bce07fe ] When perf is built with the annobin plugin (RHEL8 build) extra symbols are added to its binary: # nm perf | grep annobin | head -10 0000000000241100 t .annobin_annotate.c 0000000000326490 t .annobin_annotate.c 0000000000249255 t .annobin_annotate.c_end 00000000003283a8 t .annobin_annotate.c_end 00000000001bce18 t .annobin_annotate.c_end.hot 00000000001bce18 t .annobin_annotate.c_end.hot 00000000001bc3e2 t .annobin_annotate.c_end.unlikely 00000000001bc400 t .annobin_annotate.c_end.unlikely 00000000001bce18 t .annobin_annotate.c.hot 00000000001bce18 t .annobin_annotate.c.hot ... Those symbols have no use for report or annotation and should be skipped. Moreover they interfere with the DWARF unwind test on the PPC arch, where they are mixed with checked symbols and then the test fails: # perf test dwarf -v 59: Test dwarf unwind : --- start --- test child forked, pid 8515 unwind: .annobin_dwarf_unwind.c:ip = 0x10dba40dc (0x2740dc) ... got: .annobin_dwarf_unwind.c 0x10dba40dc, expecting test__arch_unwind_sample unwind: failed with 'no error' The annobin symbols are defined as NOTYPE/LOCAL/HIDDEN: # readelf -s ./perf | grep annobin | head -1 40: 00000000001bce4f 0 NOTYPE LOCAL HIDDEN 13 .annobin_init.c They can still pass the check for the label symbol. Adding check for HIDDEN and INTERNAL (as suggested by Nick below) visibility and filter out such symbols. > Just to be awkward, if you are going to ignore STV_HIDDEN > symbols then you should probably also ignore STV_INTERNAL ones > as well... Annobin does not generate them, but you never know, > one day some other tool might create some. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Masami Hiramatsu Cc: Michael Petlan Cc: Namhyung Kim Cc: Nick Clifton Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20190128133526.GD15461@krava Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/symbol-elf.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 6e70cc00c161..a701a8a48f00 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -87,6 +87,11 @@ static inline uint8_t elf_sym__type(const GElf_Sym *sym) return GELF_ST_TYPE(sym->st_info); } +static inline uint8_t elf_sym__visibility(const GElf_Sym *sym) +{ + return GELF_ST_VISIBILITY(sym->st_other); +} + #ifndef STT_GNU_IFUNC #define STT_GNU_IFUNC 10 #endif @@ -111,7 +116,9 @@ static inline int elf_sym__is_label(const GElf_Sym *sym) return elf_sym__type(sym) == STT_NOTYPE && sym->st_name != 0 && sym->st_shndx != SHN_UNDEF && - sym->st_shndx != SHN_ABS; + sym->st_shndx != SHN_ABS && + elf_sym__visibility(sym) != STV_HIDDEN && + elf_sym__visibility(sym) != STV_INTERNAL; } static bool elf_sym__filter(GElf_Sym *sym) -- cgit v1.2.3 From 738f9e2774d6fc6a66bd4c551ac132699b1fc993 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 29 Jan 2019 15:12:34 +0100 Subject: perf trace: Support multiple "vfs_getname" probes [ Upstream commit 6ab3bc240ade47a0f52bc16d97edd9accbe0024e ] With a suitably defined "probe:vfs_getname" probe, 'perf trace' can "beautify" its output, so syscalls like open() or openat() can print the "filename" argument instead of just its hex address, like: $ perf trace -e open -- touch /dev/null [...] 0.590 ( 0.014 ms): touch/18063 open(filename: /dev/null, flags: CREAT|NOCTTY|NONBLOCK|WRONLY, mode: IRUGO|IWUGO) = 3 [...] The output without such beautifier looks like: 0.529 ( 0.011 ms): touch/18075 open(filename: 0xc78cf288, flags: CREAT|NOCTTY|NONBLOCK|WRONLY, mode: IRUGO|IWUGO) = 3 However, when the vfs_getname probe expands to multiple probes and it is not the first one that is hit, the beautifier fails, as following: 0.326 ( 0.010 ms): touch/18072 open(filename: , flags: CREAT|NOCTTY|NONBLOCK|WRONLY, mode: IRUGO|IWUGO) = 3 Fix it by hooking into all the expanded probes (inlines), now, for instance: [root@quaco ~]# perf probe -l probe:vfs_getname (on getname_flags:73@fs/namei.c with pathname) probe:vfs_getname_1 (on getname_flags:73@fs/namei.c with pathname) [root@quaco ~]# perf trace -e open* sleep 1 0.010 ( 0.005 ms): sleep/5588 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: RDONLY|CLOEXEC) = 3 0.029 ( 0.006 ms): sleep/5588 openat(dfd: CWD, filename: /lib64/libc.so.6, flags: RDONLY|CLOEXEC) = 3 0.194 ( 0.008 ms): sleep/5588 openat(dfd: CWD, filename: /usr/lib/locale/locale-archive, flags: RDONLY|CLOEXEC) = 3 [root@quaco ~]# Works, further verified with: [root@quaco ~]# perf test vfs 65: Use vfs_getname probe to get syscall args filenames : Ok 66: Add vfs_getname probe to get syscall args filenames : Ok 67: Check open filename arg using perf trace + vfs_getname: Ok [root@quaco ~]# Reported-by: Michael Petlan Tested-by: Michael Petlan Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-mv8kolk17xla1smvmp3qabv1@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/builtin-trace.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 22ab8e67c760..3f43aedb384d 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2263,19 +2263,30 @@ static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp); static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist) { - struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname"); + bool found = false; + struct perf_evsel *evsel, *tmp; + struct parse_events_error err = { .idx = 0, }; + int ret = parse_events(evlist, "probe:vfs_getname*", &err); - if (IS_ERR(evsel)) + if (ret) return false; - if (perf_evsel__field(evsel, "pathname") == NULL) { + evlist__for_each_entry_safe(evlist, evsel, tmp) { + if (!strstarts(perf_evsel__name(evsel), "probe:vfs_getname")) + continue; + + if (perf_evsel__field(evsel, "pathname")) { + evsel->handler = trace__vfs_getname; + found = true; + continue; + } + + list_del_init(&evsel->node); + evsel->evlist = NULL; perf_evsel__delete(evsel); - return false; } - evsel->handler = trace__vfs_getname; - perf_evlist__add(evlist, evsel); - return true; + return found; } static struct perf_evsel *perf_evsel__new_pgfault(u64 config) -- cgit v1.2.3 From cd8520a283c44f80fa34c726fac1b7e12ff9ac58 Mon Sep 17 00:00:00 2001 From: Jun-Ru Chang Date: Tue, 29 Jan 2019 11:56:07 +0800 Subject: MIPS: Remove function size check in get_frame_info() [ Upstream commit 2b424cfc69728224fcb5fad138ea7260728e0901 ] Patch (b6c7a324df37b "MIPS: Fix get_frame_info() handling of microMIPS function size.") introduces additional function size check for microMIPS by only checking insn between ip and ip + func_size. However, func_size in get_frame_info() is always 0 if KALLSYMS is not enabled. This causes get_frame_info() to return immediately without calculating correct frame_size, which in turn causes "Can't analyze schedule() prologue" warning messages at boot time. This patch removes func_size check, and let the frame_size check run up to 128 insns for both MIPS and microMIPS. Signed-off-by: Jun-Ru Chang Signed-off-by: Tony Wu Signed-off-by: Paul Burton Fixes: b6c7a324df37b ("MIPS: Fix get_frame_info() handling of microMIPS function size.") Cc: Cc: Cc: Cc: Cc: Cc: Cc: Cc: Signed-off-by: Sasha Levin --- arch/mips/kernel/process.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index d4f7fd4550e1..85522c137f19 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -371,7 +371,7 @@ static inline int is_sp_move_ins(union mips_instruction *ip, int *frame_size) static int get_frame_info(struct mips_frame_info *info) { bool is_mmips = IS_ENABLED(CONFIG_CPU_MICROMIPS); - union mips_instruction insn, *ip, *ip_end; + union mips_instruction insn, *ip; const unsigned int max_insns = 128; unsigned int last_insn_size = 0; unsigned int i; @@ -384,10 +384,9 @@ static int get_frame_info(struct mips_frame_info *info) if (!ip) goto err; - ip_end = (void *)ip + info->func_size; - - for (i = 0; i < max_insns && ip < ip_end; i++) { + for (i = 0; i < max_insns; i++) { ip = (void *)ip + last_insn_size; + if (is_mmips && mm_insn_16bit(ip->halfword[0])) { insn.word = ip->halfword[0] << 16; last_insn_size = 2; -- cgit v1.2.3 From 29f7b376d399acf6c1e54267e5214aca3549a001 Mon Sep 17 00:00:00 2001 From: Ross Lagerwall Date: Fri, 1 Feb 2019 14:42:28 +0000 Subject: Revert "scsi: libfc: Add WARN_ON() when deleting rports" [ Upstream commit d8f6382a7d026989029e2e50c515df954488459b ] This reverts commit bbc0f8bd88abefb0f27998f40a073634a3a2db89. It added a warning whose intent was to check whether the rport was still linked into the peer list. It doesn't work as intended and gives false positive warnings for two reasons: 1) If the rport is never linked into the peer list it will not be considered empty since the list_head is never initialized. 2) If the rport is deleted from the peer list using list_del_rcu(), then the list_head is in an undefined state and it is not considered empty. Signed-off-by: Ross Lagerwall Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/libfc/fc_rport.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/scsi/libfc/fc_rport.c b/drivers/scsi/libfc/fc_rport.c index 372387a450df..1797e47fab38 100644 --- a/drivers/scsi/libfc/fc_rport.c +++ b/drivers/scsi/libfc/fc_rport.c @@ -184,7 +184,6 @@ void fc_rport_destroy(struct kref *kref) struct fc_rport_priv *rdata; rdata = container_of(kref, struct fc_rport_priv, kref); - WARN_ON(!list_empty(&rdata->peers)); kfree_rcu(rdata, rcu); } EXPORT_SYMBOL(fc_rport_destroy); -- cgit v1.2.3 From ee84b62fba23ebdddb81ebe3f8275ad69d5f7ed7 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Thu, 10 Jan 2019 07:59:16 -0800 Subject: i2c: omap: Use noirq system sleep pm ops to idle device for suspend [ Upstream commit c6e2bd956936d925748581e4d0294f10f1d92f2c ] We currently get the following error with pixcir_ts driver during a suspend resume cycle: omap_i2c 4802a000.i2c: controller timed out pixcir_ts 1-005c: pixcir_int_enable: can't read reg 0x34 : -110 pixcir_ts 1-005c: Failed to disable interrupt generation: -110 pixcir_ts 1-005c: Failed to stop dpm_run_callback(): pixcir_i2c_ts_resume+0x0/0x98 [pixcir_i2c_ts] returns -110 PM: Device 1-005c failed to resume: error -110 And at least am437x based devices with pixcir_ts will fail to resume to a touchscreen that is configured as the wakeup-source in device tree for these devices. This is because pixcir_ts tries to reconfigure it's registers for noirq suspend which fails. This also leaves i2c-omap in enabled state for suspend. Let's fix the pixcir_ts issue and make sure i2c-omap is suspended by adding SET_NOIRQ_SYSTEM_SLEEP_PM_OPS. Let's also get rid of some ifdefs while at it and replace them with __maybe_unused as SET_RUNTIME_PM_OPS and SET_NOIRQ_SYSTEM_SLEEP_PM_OPS already deal with the various PM Kconfig options. Reported-by: Keerthy Signed-off-by: Tony Lindgren Acked-by: Vignesh R Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin --- drivers/i2c/busses/i2c-omap.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c index 65d06a819307..2ac86096ddd9 100644 --- a/drivers/i2c/busses/i2c-omap.c +++ b/drivers/i2c/busses/i2c-omap.c @@ -1498,8 +1498,7 @@ static int omap_i2c_remove(struct platform_device *pdev) return 0; } -#ifdef CONFIG_PM -static int omap_i2c_runtime_suspend(struct device *dev) +static int __maybe_unused omap_i2c_runtime_suspend(struct device *dev) { struct omap_i2c_dev *omap = dev_get_drvdata(dev); @@ -1525,7 +1524,7 @@ static int omap_i2c_runtime_suspend(struct device *dev) return 0; } -static int omap_i2c_runtime_resume(struct device *dev) +static int __maybe_unused omap_i2c_runtime_resume(struct device *dev) { struct omap_i2c_dev *omap = dev_get_drvdata(dev); @@ -1540,20 +1539,18 @@ static int omap_i2c_runtime_resume(struct device *dev) } static const struct dev_pm_ops omap_i2c_pm_ops = { + SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, + pm_runtime_force_resume) SET_RUNTIME_PM_OPS(omap_i2c_runtime_suspend, omap_i2c_runtime_resume, NULL) }; -#define OMAP_I2C_PM_OPS (&omap_i2c_pm_ops) -#else -#define OMAP_I2C_PM_OPS NULL -#endif /* CONFIG_PM */ static struct platform_driver omap_i2c_driver = { .probe = omap_i2c_probe, .remove = omap_i2c_remove, .driver = { .name = "omap_i2c", - .pm = OMAP_I2C_PM_OPS, + .pm = &omap_i2c_pm_ops, .of_match_table = of_match_ptr(omap_i2c_of_match), }, }; -- cgit v1.2.3 From 25aa5c8b063ad633dd7f32406473ce86167f2eca Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Wed, 30 Jan 2019 15:21:16 -0500 Subject: drm/amdgpu: use spin_lock_irqsave to protect vm_manager.pasid_idr MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 0a5f49cbf9d6ad3721c16f8a6d823363ea7a160f ] amdgpu_vm_get_task_info is called from interrupt handler and sched timeout workqueue, we should use irq version spin_lock to avoid deadlock. Signed-off-by: Philip Yang Reviewed-by: Christian König Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 6a84526e20e0..49fe5084c53d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -3011,14 +3011,15 @@ void amdgpu_vm_get_task_info(struct amdgpu_device *adev, unsigned int pasid, struct amdgpu_task_info *task_info) { struct amdgpu_vm *vm; + unsigned long flags; - spin_lock(&adev->vm_manager.pasid_lock); + spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags); vm = idr_find(&adev->vm_manager.pasid_idr, pasid); if (vm) *task_info = vm->task_info; - spin_unlock(&adev->vm_manager.pasid_lock); + spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags); } /** -- cgit v1.2.3 From e3aabe4c24671aea9e242819b001ab9d2beb1fc7 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 28 Jan 2019 09:46:07 -0700 Subject: nvme: lock NS list changes while handling command effects [ Upstream commit e7ad43c3eda6a1690c4c3c341f95dc1c6898da83 ] If a controller supports the NS Change Notification, the namespace scan_work is automatically triggered after attaching a new namespace. Occasionally the namespace scan_work may append the new namespace to the list before the admin command effects handling is completed. The effects handling unfreezes namespaces, but if it unfreezes the newly attached namespace, its request_queue freeze depth will be off and we'll hit the warning in blk_mq_unfreeze_queue(). On the next namespace add, we will fail to freeze that queue due to the previous bad accounting and deadlock waiting for frozen. Fix that by preventing scan work from altering the namespace list while command effects handling needs to pair freeze with unfreeze. Reported-by: Wen Xiong Tested-by: Wen Xiong Signed-off-by: Keith Busch Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin --- drivers/nvme/host/core.c | 8 +++++++- drivers/nvme/host/nvme.h | 1 + 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index e0d2b7473901..2cdb3032ca0f 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1182,6 +1182,7 @@ static u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns, * effects say only one namespace is affected. */ if (effects & (NVME_CMD_EFFECTS_LBCC | NVME_CMD_EFFECTS_CSE_MASK)) { + mutex_lock(&ctrl->scan_lock); nvme_start_freeze(ctrl); nvme_wait_freeze(ctrl); } @@ -1210,8 +1211,10 @@ static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects) */ if (effects & NVME_CMD_EFFECTS_LBCC) nvme_update_formats(ctrl); - if (effects & (NVME_CMD_EFFECTS_LBCC | NVME_CMD_EFFECTS_CSE_MASK)) + if (effects & (NVME_CMD_EFFECTS_LBCC | NVME_CMD_EFFECTS_CSE_MASK)) { nvme_unfreeze(ctrl); + mutex_unlock(&ctrl->scan_lock); + } if (effects & NVME_CMD_EFFECTS_CCC) nvme_init_identify(ctrl); if (effects & (NVME_CMD_EFFECTS_NIC | NVME_CMD_EFFECTS_NCC)) @@ -3292,6 +3295,7 @@ static void nvme_scan_work(struct work_struct *work) if (nvme_identify_ctrl(ctrl, &id)) return; + mutex_lock(&ctrl->scan_lock); nn = le32_to_cpu(id->nn); if (ctrl->vs >= NVME_VS(1, 1, 0) && !(ctrl->quirks & NVME_QUIRK_IDENTIFY_CNS)) { @@ -3300,6 +3304,7 @@ static void nvme_scan_work(struct work_struct *work) } nvme_scan_ns_sequential(ctrl, nn); out_free_id: + mutex_unlock(&ctrl->scan_lock); kfree(id); down_write(&ctrl->namespaces_rwsem); list_sort(NULL, &ctrl->namespaces, ns_cmp); @@ -3535,6 +3540,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, ctrl->state = NVME_CTRL_NEW; spin_lock_init(&ctrl->lock); + mutex_init(&ctrl->scan_lock); INIT_LIST_HEAD(&ctrl->namespaces); init_rwsem(&ctrl->namespaces_rwsem); ctrl->dev = dev; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 60220de2db52..e82cdaec81c9 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -148,6 +148,7 @@ struct nvme_ctrl { enum nvme_ctrl_state state; bool identified; spinlock_t lock; + struct mutex scan_lock; const struct nvme_ctrl_ops *ops; struct request_queue *admin_q; struct request_queue *connect_q; -- cgit v1.2.3 From 3cc6703d43f98131fc74def7599bb6cc240b0814 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 23 Jan 2019 18:46:11 -0700 Subject: nvme-pci: fix rapid add remove sequence [ Upstream commit 5c959d73dba6495ec01d04c206ee679d61ccb2b0 ] A surprise removal may fail to tear down request queues if it is racing with the initial asynchronous probe. If that happens, the remove path won't see the queue resources to tear down, and the controller reset path may create a new request queue on a removed device, but will not be able to make forward progress, deadlocking the pci removal. Protect setting up non-blocking resources from a shutdown by holding the same mutex, and transition to the CONNECTING state after these resources are initialized so the probe path may see the dead controller state before dispatching new IO. Link: https://bugzilla.kernel.org/show_bug.cgi?id=202081 Reported-by: Alex Gagniuc Signed-off-by: Keith Busch Tested-by: Alex Gagniuc Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin --- drivers/nvme/host/pci.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index f46313f441ec..6398ffbce6de 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2260,16 +2260,7 @@ static void nvme_reset_work(struct work_struct *work) if (dev->ctrl.ctrl_config & NVME_CC_ENABLE) nvme_dev_disable(dev, false); - /* - * Introduce CONNECTING state from nvme-fc/rdma transports to mark the - * initializing procedure here. - */ - if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_CONNECTING)) { - dev_warn(dev->ctrl.device, - "failed to mark controller CONNECTING\n"); - goto out; - } - + mutex_lock(&dev->shutdown_lock); result = nvme_pci_enable(dev); if (result) goto out; @@ -2288,6 +2279,17 @@ static void nvme_reset_work(struct work_struct *work) */ dev->ctrl.max_hw_sectors = NVME_MAX_KB_SZ << 1; dev->ctrl.max_segments = NVME_MAX_SEGS; + mutex_unlock(&dev->shutdown_lock); + + /* + * Introduce CONNECTING state from nvme-fc/rdma transports to mark the + * initializing procedure here. + */ + if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_CONNECTING)) { + dev_warn(dev->ctrl.device, + "failed to mark controller CONNECTING\n"); + goto out; + } result = nvme_init_identify(&dev->ctrl); if (result) -- cgit v1.2.3 From 72426ed2a149c3cd0c33b409ca10b3918f399cc0 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Mon, 21 Jan 2019 22:49:37 +0900 Subject: fs: ratelimit __find_get_block_slow() failure message. [ Upstream commit 43636c804df0126da669c261fc820fb22f62bfc2 ] When something let __find_get_block_slow() hit all_mapped path, it calls printk() for 100+ times per a second. But there is no need to print same message with such high frequency; it is just asking for stall warning, or at least bloating log files. [ 399.866302][T15342] __find_get_block_slow() failed. block=1, b_blocknr=8 [ 399.873324][T15342] b_state=0x00000029, b_size=512 [ 399.878403][T15342] device loop0 blocksize: 4096 [ 399.883296][T15342] __find_get_block_slow() failed. block=1, b_blocknr=8 [ 399.890400][T15342] b_state=0x00000029, b_size=512 [ 399.895595][T15342] device loop0 blocksize: 4096 [ 399.900556][T15342] __find_get_block_slow() failed. block=1, b_blocknr=8 [ 399.907471][T15342] b_state=0x00000029, b_size=512 [ 399.912506][T15342] device loop0 blocksize: 4096 This patch reduces frequency to up to once per a second, in addition to concatenating three lines into one. [ 399.866302][T15342] __find_get_block_slow() failed. block=1, b_blocknr=8, b_state=0x00000029, b_size=512, device loop0 blocksize: 4096 Signed-off-by: Tetsuo Handa Reviewed-by: Jan Kara Cc: Dmitry Vyukov Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- fs/buffer.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/fs/buffer.c b/fs/buffer.c index 6f1ae3ac9789..c083c4b3c1e7 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -200,6 +200,7 @@ __find_get_block_slow(struct block_device *bdev, sector_t block) struct buffer_head *head; struct page *page; int all_mapped = 1; + static DEFINE_RATELIMIT_STATE(last_warned, HZ, 1); index = block >> (PAGE_SHIFT - bd_inode->i_blkbits); page = find_get_page_flags(bd_mapping, index, FGP_ACCESSED); @@ -227,15 +228,15 @@ __find_get_block_slow(struct block_device *bdev, sector_t block) * file io on the block device and getblk. It gets dealt with * elsewhere, don't buffer_error if we had some unmapped buffers */ - if (all_mapped) { - printk("__find_get_block_slow() failed. " - "block=%llu, b_blocknr=%llu\n", - (unsigned long long)block, - (unsigned long long)bh->b_blocknr); - printk("b_state=0x%08lx, b_size=%zu\n", - bh->b_state, bh->b_size); - printk("device %pg blocksize: %d\n", bdev, - 1 << bd_inode->i_blkbits); + ratelimit_set_flags(&last_warned, RATELIMIT_MSG_ON_RELEASE); + if (all_mapped && __ratelimit(&last_warned)) { + printk("__find_get_block_slow() failed. block=%llu, " + "b_blocknr=%llu, b_state=0x%08lx, b_size=%zu, " + "device %pg blocksize: %d\n", + (unsigned long long)block, + (unsigned long long)bh->b_blocknr, + bh->b_state, bh->b_size, bdev, + 1 << bd_inode->i_blkbits); } out_unlock: spin_unlock(&bd_mapping->private_lock); -- cgit v1.2.3 From d727c0edf6461eb0938d354ec7e3677e149c4016 Mon Sep 17 00:00:00 2001 From: Manish Chopra Date: Wed, 6 Feb 2019 14:43:42 -0800 Subject: qed: Fix EQ full firmware assert. [ Upstream commit 660492bcf4a7561b5fdc13be0ae0b0c0a8c120be ] When slowpath messages are sent with high rate, the resulting events can lead to a FW assert in case they are not handled fast enough (Event Queue Full assert). Attempt to send queued slowpath messages only after the newly evacuated entries in the EQ ring are indicated to FW. Signed-off-by: Manish Chopra Signed-off-by: Ariel Elior Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/qlogic/qed/qed_sp.h | 1 + drivers/net/ethernet/qlogic/qed/qed_spq.c | 15 +++++++-------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp.h b/drivers/net/ethernet/qlogic/qed/qed_sp.h index 3157c0d99441..dae2896e1d8e 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sp.h +++ b/drivers/net/ethernet/qlogic/qed/qed_sp.h @@ -380,6 +380,7 @@ void qed_consq_setup(struct qed_hwfn *p_hwfn); * @param p_hwfn */ void qed_consq_free(struct qed_hwfn *p_hwfn); +int qed_spq_pend_post(struct qed_hwfn *p_hwfn); /** * @file diff --git a/drivers/net/ethernet/qlogic/qed/qed_spq.c b/drivers/net/ethernet/qlogic/qed/qed_spq.c index 7106ad17afe2..a0ee847f379b 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_spq.c +++ b/drivers/net/ethernet/qlogic/qed/qed_spq.c @@ -402,6 +402,11 @@ int qed_eq_completion(struct qed_hwfn *p_hwfn, void *cookie) qed_eq_prod_update(p_hwfn, qed_chain_get_prod_idx(p_chain)); + /* Attempt to post pending requests */ + spin_lock_bh(&p_hwfn->p_spq->lock); + rc = qed_spq_pend_post(p_hwfn); + spin_unlock_bh(&p_hwfn->p_spq->lock); + return rc; } @@ -745,7 +750,7 @@ static int qed_spq_post_list(struct qed_hwfn *p_hwfn, return 0; } -static int qed_spq_pend_post(struct qed_hwfn *p_hwfn) +int qed_spq_pend_post(struct qed_hwfn *p_hwfn) { struct qed_spq *p_spq = p_hwfn->p_spq; struct qed_spq_entry *p_ent = NULL; @@ -883,7 +888,6 @@ int qed_spq_completion(struct qed_hwfn *p_hwfn, struct qed_spq_entry *p_ent = NULL; struct qed_spq_entry *tmp; struct qed_spq_entry *found = NULL; - int rc; if (!p_hwfn) return -EINVAL; @@ -941,12 +945,7 @@ int qed_spq_completion(struct qed_hwfn *p_hwfn, */ qed_spq_return_entry(p_hwfn, found); - /* Attempt to post pending requests */ - spin_lock_bh(&p_spq->lock); - rc = qed_spq_pend_post(p_hwfn); - spin_unlock_bh(&p_spq->lock); - - return rc; + return 0; } int qed_consq_alloc(struct qed_hwfn *p_hwfn) -- cgit v1.2.3 From 84828dd2ba92454033d819a35e203f45a8b9dcd8 Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Wed, 6 Feb 2019 14:43:44 -0800 Subject: qed: Consider TX tcs while deriving the max num_queues for PF. [ Upstream commit fb1faab74ddef9ec2d841d54e5d0912a097b3abe ] Max supported queues is derived incorrectly in the case of multi-CoS. Need to consider TCs while calculating num_queues for PF. Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Ariel Elior Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/qlogic/qed/qed_l2.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c index e68ca83ae915..64ac95ca4df2 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c @@ -2216,7 +2216,7 @@ static int qed_fill_eth_dev_info(struct qed_dev *cdev, u16 num_queues = 0; /* Since the feature controls only queue-zones, - * make sure we have the contexts [rx, tx, xdp] to + * make sure we have the contexts [rx, xdp, tcs] to * match. */ for_each_hwfn(cdev, i) { @@ -2226,7 +2226,8 @@ static int qed_fill_eth_dev_info(struct qed_dev *cdev, u16 cids; cids = hwfn->pf_params.eth_pf_params.num_cons; - num_queues += min_t(u16, l2_queues, cids / 3); + cids /= (2 + info->num_tc); + num_queues += min_t(u16, l2_queues, cids); } /* queues might theoretically be >256, but interrupts' -- cgit v1.2.3 From 1781ae6f0eacd97a033966cf1a55143dca0ec7ae Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Wed, 6 Feb 2019 14:43:45 -0800 Subject: qede: Fix system crash on configuring channels. [ Upstream commit 0aa4febb420d91df5b56b1864a2465765da85f4b ] Under heavy traffic load, when changing number of channels via ethtool (ethtool -L) which will cause interface to be reloaded, it was observed that some packets gets transmitted on old TX channel/queue id which doesn't really exist after the channel configuration leads to system crash. Add a safeguard in the driver by validating queue id through ndo_select_queue() which is called before the ndo_start_xmit(). Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Ariel Elior Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/qlogic/qede/qede.h | 3 +++ drivers/net/ethernet/qlogic/qede/qede_fp.c | 13 +++++++++++++ drivers/net/ethernet/qlogic/qede/qede_main.c | 3 +++ 3 files changed, 19 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h index 6a4d266fb8e2..d242a5724069 100644 --- a/drivers/net/ethernet/qlogic/qede/qede.h +++ b/drivers/net/ethernet/qlogic/qede/qede.h @@ -489,6 +489,9 @@ struct qede_reload_args { /* Datapath functions definition */ netdev_tx_t qede_start_xmit(struct sk_buff *skb, struct net_device *ndev); +u16 qede_select_queue(struct net_device *dev, struct sk_buff *skb, + struct net_device *sb_dev, + select_queue_fallback_t fallback); netdev_features_t qede_features_check(struct sk_buff *skb, struct net_device *dev, netdev_features_t features); diff --git a/drivers/net/ethernet/qlogic/qede/qede_fp.c b/drivers/net/ethernet/qlogic/qede/qede_fp.c index 1a78027de071..a96da16f3404 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_fp.c +++ b/drivers/net/ethernet/qlogic/qede/qede_fp.c @@ -1695,6 +1695,19 @@ netdev_tx_t qede_start_xmit(struct sk_buff *skb, struct net_device *ndev) return NETDEV_TX_OK; } +u16 qede_select_queue(struct net_device *dev, struct sk_buff *skb, + struct net_device *sb_dev, + select_queue_fallback_t fallback) +{ + struct qede_dev *edev = netdev_priv(dev); + int total_txq; + + total_txq = QEDE_TSS_COUNT(edev) * edev->dev_info.num_tc; + + return QEDE_TSS_COUNT(edev) ? + fallback(dev, skb, NULL) % total_txq : 0; +} + /* 8B udp header + 8B base tunnel header + 32B option length */ #define QEDE_MAX_TUN_HDR_LEN 48 diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 46d0f2eaa0c0..f3d9c40c4115 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -631,6 +631,7 @@ static const struct net_device_ops qede_netdev_ops = { .ndo_open = qede_open, .ndo_stop = qede_close, .ndo_start_xmit = qede_start_xmit, + .ndo_select_queue = qede_select_queue, .ndo_set_rx_mode = qede_set_rx_mode, .ndo_set_mac_address = qede_set_mac_addr, .ndo_validate_addr = eth_validate_addr, @@ -666,6 +667,7 @@ static const struct net_device_ops qede_netdev_vf_ops = { .ndo_open = qede_open, .ndo_stop = qede_close, .ndo_start_xmit = qede_start_xmit, + .ndo_select_queue = qede_select_queue, .ndo_set_rx_mode = qede_set_rx_mode, .ndo_set_mac_address = qede_set_mac_addr, .ndo_validate_addr = eth_validate_addr, @@ -684,6 +686,7 @@ static const struct net_device_ops qede_netdev_vf_xdp_ops = { .ndo_open = qede_open, .ndo_stop = qede_close, .ndo_start_xmit = qede_start_xmit, + .ndo_select_queue = qede_select_queue, .ndo_set_rx_mode = qede_set_rx_mode, .ndo_set_mac_address = qede_set_mac_addr, .ndo_validate_addr = eth_validate_addr, -- cgit v1.2.3 From 6d482bc5697763eb1214f207286daa201b32d20a Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Fri, 25 Jan 2019 08:12:47 +0800 Subject: blk-iolatency: fix IO hang due to negative inflight counter [ Upstream commit 8c772a9bfc7c07c76f4a58b58910452fbb20843b ] Our test reported the following stack, and vmcore showed that ->inflight counter is -1. [ffffc9003fcc38d0] __schedule at ffffffff8173d95d [ffffc9003fcc3958] schedule at ffffffff8173de26 [ffffc9003fcc3970] io_schedule at ffffffff810bb6b6 [ffffc9003fcc3988] blkcg_iolatency_throttle at ffffffff813911cb [ffffc9003fcc3a20] rq_qos_throttle at ffffffff813847f3 [ffffc9003fcc3a48] blk_mq_make_request at ffffffff8137468a [ffffc9003fcc3b08] generic_make_request at ffffffff81368b49 [ffffc9003fcc3b68] submit_bio at ffffffff81368d7d [ffffc9003fcc3bb8] ext4_io_submit at ffffffffa031be00 [ext4] [ffffc9003fcc3c00] ext4_writepages at ffffffffa03163de [ext4] [ffffc9003fcc3d68] do_writepages at ffffffff811c49ae [ffffc9003fcc3d78] __filemap_fdatawrite_range at ffffffff811b6188 [ffffc9003fcc3e30] filemap_write_and_wait_range at ffffffff811b6301 [ffffc9003fcc3e60] ext4_sync_file at ffffffffa030cee8 [ext4] [ffffc9003fcc3ea8] vfs_fsync_range at ffffffff8128594b [ffffc9003fcc3ee8] do_fsync at ffffffff81285abd [ffffc9003fcc3f18] sys_fsync at ffffffff81285d50 [ffffc9003fcc3f28] do_syscall_64 at ffffffff81003c04 [ffffc9003fcc3f50] entry_SYSCALL_64_after_swapgs at ffffffff81742b8e The ->inflight counter may be negative (-1) if 1) blk-iolatency was disabled when the IO was issued, 2) blk-iolatency was enabled before this IO reached its endio, 3) the ->inflight counter is decreased from 0 to -1 in endio() In fact the hang can be easily reproduced by the below script, H=/sys/fs/cgroup/unified/ P=/sys/fs/cgroup/unified/test echo "+io" > $H/cgroup.subtree_control mkdir -p $P echo $$ > $P/cgroup.procs xfs_io -f -d -c "pwrite 0 4k" /dev/sdg echo "`cat /sys/block/sdg/dev` target=1000000" > $P/io.latency xfs_io -f -d -c "pwrite 0 4k" /dev/sdg This fixes the problem by freezing the queue so that while enabling/disabling iolatency, there is no inflight rq running. Note that quiesce_queue is not needed as this only updating iolatency configuration about which dispatching request_queue doesn't care. Signed-off-by: Liu Bo Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- block/blk-iolatency.c | 52 ++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 45 insertions(+), 7 deletions(-) diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c index 19923f8a029d..b154e057ca67 100644 --- a/block/blk-iolatency.c +++ b/block/blk-iolatency.c @@ -72,6 +72,7 @@ #include #include #include +#include #include "blk-rq-qos.h" #include "blk-stat.h" @@ -568,6 +569,9 @@ static void blkcg_iolatency_done_bio(struct rq_qos *rqos, struct bio *bio) return; enabled = blk_iolatency_enabled(iolat->blkiolat); + if (!enabled) + return; + while (blkg && blkg->parent) { iolat = blkg_to_lat(blkg); if (!iolat) { @@ -577,7 +581,7 @@ static void blkcg_iolatency_done_bio(struct rq_qos *rqos, struct bio *bio) rqw = &iolat->rq_wait; atomic_dec(&rqw->inflight); - if (!enabled || iolat->min_lat_nsec == 0) + if (iolat->min_lat_nsec == 0) goto next; iolatency_record_time(iolat, &bio->bi_issue, now, issue_as_root); @@ -721,10 +725,13 @@ int blk_iolatency_init(struct request_queue *q) return 0; } -static void iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val) +/* + * return 1 for enabling iolatency, return -1 for disabling iolatency, otherwise + * return 0. + */ +static int iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val) { struct iolatency_grp *iolat = blkg_to_lat(blkg); - struct blk_iolatency *blkiolat = iolat->blkiolat; u64 oldval = iolat->min_lat_nsec; iolat->min_lat_nsec = val; @@ -733,9 +740,10 @@ static void iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val) BLKIOLATENCY_MAX_WIN_SIZE); if (!oldval && val) - atomic_inc(&blkiolat->enabled); + return 1; if (oldval && !val) - atomic_dec(&blkiolat->enabled); + return -1; + return 0; } static void iolatency_clear_scaling(struct blkcg_gq *blkg) @@ -768,6 +776,7 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf, u64 lat_val = 0; u64 oldval; int ret; + int enable = 0; ret = blkg_conf_prep(blkcg, &blkcg_policy_iolatency, buf, &ctx); if (ret) @@ -803,7 +812,12 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf, blkg = ctx.blkg; oldval = iolat->min_lat_nsec; - iolatency_set_min_lat_nsec(blkg, lat_val); + enable = iolatency_set_min_lat_nsec(blkg, lat_val); + if (enable) { + WARN_ON_ONCE(!blk_get_queue(blkg->q)); + blkg_get(blkg); + } + if (oldval != iolat->min_lat_nsec) { iolatency_clear_scaling(blkg); } @@ -811,6 +825,24 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf, ret = 0; out: blkg_conf_finish(&ctx); + if (ret == 0 && enable) { + struct iolatency_grp *tmp = blkg_to_lat(blkg); + struct blk_iolatency *blkiolat = tmp->blkiolat; + + blk_mq_freeze_queue(blkg->q); + + if (enable == 1) + atomic_inc(&blkiolat->enabled); + else if (enable == -1) + atomic_dec(&blkiolat->enabled); + else + WARN_ON_ONCE(1); + + blk_mq_unfreeze_queue(blkg->q); + + blkg_put(blkg); + blk_put_queue(blkg->q); + } return ret ?: nbytes; } @@ -910,8 +942,14 @@ static void iolatency_pd_offline(struct blkg_policy_data *pd) { struct iolatency_grp *iolat = pd_to_lat(pd); struct blkcg_gq *blkg = lat_to_blkg(iolat); + struct blk_iolatency *blkiolat = iolat->blkiolat; + int ret; - iolatency_set_min_lat_nsec(blkg, 0); + ret = iolatency_set_min_lat_nsec(blkg, 0); + if (ret == 1) + atomic_inc(&blkiolat->enabled); + if (ret == -1) + atomic_dec(&blkiolat->enabled); iolatency_clear_scaling(blkg); } -- cgit v1.2.3 From 7066774e60e4d2c1108f99822725af5de19f1c78 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 11 Feb 2019 09:23:50 -0700 Subject: nvme-pci: add missing unlock for reset error [ Upstream commit 4726bcf30fad37cc555cd9dcd6c73f2b2668c879 ] The reset work holds a mutex to prevent races with removal modifying the same resources, but was unlocking only on success. Unlock on failure too. Fixes: 5c959d73dba64 ("nvme-pci: fix rapid add remove sequence") Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin --- drivers/nvme/host/pci.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 6398ffbce6de..7b9ef8e734e7 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2263,15 +2263,15 @@ static void nvme_reset_work(struct work_struct *work) mutex_lock(&dev->shutdown_lock); result = nvme_pci_enable(dev); if (result) - goto out; + goto out_unlock; result = nvme_pci_configure_admin_queue(dev); if (result) - goto out; + goto out_unlock; result = nvme_alloc_admin_tags(dev); if (result) - goto out; + goto out_unlock; /* * Limit the max command size to prevent iod->sg allocations going @@ -2354,6 +2354,8 @@ static void nvme_reset_work(struct work_struct *work) nvme_start_ctrl(&dev->ctrl); return; + out_unlock: + mutex_unlock(&dev->shutdown_lock); out: nvme_remove_dead_ctrl(dev, result); } -- cgit v1.2.3 From b3b29dc53104a0aa37d4c79918b8384ef7552e06 Mon Sep 17 00:00:00 2001 From: Jason Gerecke Date: Sat, 9 Mar 2019 15:32:13 -0800 Subject: Input: wacom_serial4 - add support for Wacom ArtPad II tablet commit 44fc95e218a09d7966a9d448941fdb003f6bb69f upstream. Tablet initially begins communicating at 9600 baud, so this command should be used to connect to the device: $ inputattach --daemon --baud 9600 --wacom_iv /dev/ttyS0 https://github.com/linuxwacom/xf86-input-wacom/issues/40 Signed-off-by: Jason Gerecke Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/tablet/wacom_serial4.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/input/tablet/wacom_serial4.c b/drivers/input/tablet/wacom_serial4.c index 38bfaca48eab..150f9eecaca7 100644 --- a/drivers/input/tablet/wacom_serial4.c +++ b/drivers/input/tablet/wacom_serial4.c @@ -187,6 +187,7 @@ enum { MODEL_DIGITIZER_II = 0x5544, /* UD */ MODEL_GRAPHIRE = 0x4554, /* ET */ MODEL_PENPARTNER = 0x4354, /* CT */ + MODEL_ARTPAD_II = 0x4B54, /* KT */ }; static void wacom_handle_model_response(struct wacom *wacom) @@ -245,6 +246,7 @@ static void wacom_handle_model_response(struct wacom *wacom) wacom->flags = F_HAS_STYLUS2 | F_HAS_SCROLLWHEEL; break; + case MODEL_ARTPAD_II: case MODEL_DIGITIZER_II: wacom->dev->name = "Wacom Digitizer II"; wacom->dev->id.version = MODEL_DIGITIZER_II; -- cgit v1.2.3 From fe34541ab93082490835f58d2d7784c4465f666c Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Sat, 9 Mar 2019 15:48:04 -0800 Subject: Input: elan_i2c - add id for touchpad found in Lenovo s21e-20 commit e154ab69321ce2c54f19863d75c77b4e2dc9d365 upstream. Lenovo s21e-20 uses ELAN0601 in its ACPI tables for the Elan touchpad. Signed-off-by: Vincent Batts Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/mouse/elan_i2c_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c index 225ae6980182..628ef617bb2f 100644 --- a/drivers/input/mouse/elan_i2c_core.c +++ b/drivers/input/mouse/elan_i2c_core.c @@ -1337,6 +1337,7 @@ static const struct acpi_device_id elan_acpi_id[] = { { "ELAN0000", 0 }, { "ELAN0100", 0 }, { "ELAN0600", 0 }, + { "ELAN0601", 0 }, { "ELAN0602", 0 }, { "ELAN0605", 0 }, { "ELAN0608", 0 }, -- cgit v1.2.3 From dcdd1bcbc0992b19cc7b301eeb7e61b590144e37 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 11 Feb 2019 12:43:23 -0600 Subject: iscsi_ibft: Fix missing break in switch statement commit df997abeebadaa4824271009e2d2b526a70a11cb upstream. Add missing break statement in order to prevent the code from falling through to case ISCSI_BOOT_TGT_NAME, which is unnecessary. This bug was found thanks to the ongoing efforts to enable -Wimplicit-fallthrough. Fixes: b33a84a38477 ("ibft: convert iscsi_ibft module to iscsi boot lib") Cc: stable@vger.kernel.org Signed-off-by: Gustavo A. R. Silva Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/iscsi_ibft.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/firmware/iscsi_ibft.c b/drivers/firmware/iscsi_ibft.c index 6bc8e6640d71..c51462f5aa1e 100644 --- a/drivers/firmware/iscsi_ibft.c +++ b/drivers/firmware/iscsi_ibft.c @@ -542,6 +542,7 @@ static umode_t __init ibft_check_tgt_for(void *data, int type) case ISCSI_BOOT_TGT_NIC_ASSOC: case ISCSI_BOOT_TGT_CHAP_TYPE: rc = S_IRUGO; + break; case ISCSI_BOOT_TGT_NAME: if (tgt->tgt_name_len) rc = S_IRUGO; -- cgit v1.2.3 From 917f943781a932a7a0d8397285145dadf07357bf Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 15 Feb 2019 15:42:42 -0600 Subject: scsi: aacraid: Fix missing break in switch statement commit 5e420fe635813e5746b296cfc8fff4853ae205a2 upstream. Add missing break statement and fix identation issue. This bug was found thanks to the ongoing efforts to enable -Wimplicit-fallthrough. Fixes: 9cb62fa24e0d ("aacraid: Log firmware AIF messages") Cc: stable@vger.kernel.org Signed-off-by: Gustavo A. R. Silva Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/aacraid/commsup.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c index 6e1b022a823d..3236240a4edd 100644 --- a/drivers/scsi/aacraid/commsup.c +++ b/drivers/scsi/aacraid/commsup.c @@ -1304,8 +1304,9 @@ static void aac_handle_aif(struct aac_dev * dev, struct fib * fibptr) ADD : DELETE; break; } - case AifBuManagerEvent: - aac_handle_aif_bu(dev, aifcmd); + break; + case AifBuManagerEvent: + aac_handle_aif_bu(dev, aifcmd); break; } -- cgit v1.2.3 From 36e3673d01af4438464e2d366cfedd7f7d42ee0a Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Thu, 7 Feb 2019 15:30:05 +0200 Subject: x86/PCI: Fixup RTIT_BAR of Intel Denverton Trace Hub MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 2e095ce7b6ecce2f3e2ff330527f12056ed1e1a1 upstream. On Denverton's integration of the Intel(R) Trace Hub (for a reference and overview see Documentation/trace/intel_th.rst) the reported size of one of its resources (RTIT_BAR) doesn't match its actual size, which leads to overlaps with other devices' resources. In practice, it overlaps with XHCI MMIO space, which results in the xhci driver bailing out after seeing its registers as 0xffffffff, and perceived disappearance of all USB devices: intel_th_pci 0000:00:1f.7: enabling device (0004 -> 0006) xhci_hcd 0000:00:15.0: xHCI host controller not responding, assume dead xhci_hcd 0000:00:15.0: xHC not responding in xhci_irq, assume controller is dead xhci_hcd 0000:00:15.0: HC died; cleaning up usb 1-1: USB disconnect, device number 2 For this reason, we need to resize the RTIT_BAR on Denverton to its actual size, which in this case is 4MB. The corresponding erratum is DNV36 at the link below: DNV36. Processor Host Root Complex May Incorrectly Route Memory Accesses to Intel® Trace Hub Problem: The Intel® Trace Hub RTIT_BAR (B0:D31:F7 offset 20h) is reported as a 2KB memory range. Due to this erratum, the processor Host Root Complex will forward addresses from RTIT_BAR to RTIT_BAR + 4MB -1 to Intel® Trace Hub. Implication: Devices assigned within the RTIT_BAR to RTIT_BAR + 4MB -1 space may not function correctly. Workaround: A BIOS code change has been identified and may be implemented as a workaround for this erratum. Status: No Fix. Note that 5118ccd34780 ("intel_th: pci: Add Denverton SOC support") updates the Trace Hub driver so it claims the Denverton device, but the resource overlap exists regardless of whether that driver is loaded or that commit is included. Link: https://www.intel.com/content/dam/www/public/us/en/documents/specification-updates/atom-c3000-family-spec-update.pdf Signed-off-by: Alexander Shishkin [bhelgaas: include erratum text, clarify relationship with 5118ccd34780] Signed-off-by: Bjorn Helgaas Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/pci/fixup.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index 13f4485ca388..bd372e896557 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c @@ -641,6 +641,22 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x334b, quirk_no_aersid); DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x334c, quirk_no_aersid); DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x334d, quirk_no_aersid); +static void quirk_intel_th_dnv(struct pci_dev *dev) +{ + struct resource *r = &dev->resource[4]; + + /* + * Denverton reports 2k of RTIT_BAR (intel_th resource 4), which + * appears to be 4 MB in reality. + */ + if (r->end == r->start + 0x7ff) { + r->start = 0; + r->end = 0x3fffff; + r->flags |= IORESOURCE_UNSET; + } +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x19e1, quirk_intel_th_dnv); + #ifdef CONFIG_PHYS_ADDR_T_64BIT #define AMD_141b_MMIO_BASE(x) (0x80 + (x) * 0x8) -- cgit v1.2.3 From 271c5a5d546ba6daecf101d6cf89ea162f2a1e1d Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Thu, 24 Jan 2019 09:28:59 +0100 Subject: arm64: dts: zcu100-revC: Give wifi some time after power-on commit 35a4f89cd4731ac6ec985cd29ddc1630903006b7 upstream. Somewhere along recent changes to power control of the wl1831, power-on became very unreliable on the Ultra96, failing like this: wl1271_sdio: probe of mmc2:0001:1 failed with error -16 wl1271_sdio: probe of mmc2:0001:2 failed with error -16 After playing with some dt parameters and comparing to other users of this chip, it turned out we need some power-on delay to make things stable again. In contrast to those other users which define 200 ms, Ultra96 is already happy with 10 ms. Fixes: 5869ba0653b9 ("arm64: zynqmp: Add support for Xilinx zcu100-revC") Signed-off-by: Jan Kiszka Acked-by: Ulf Hansson Signed-off-by: Michal Simek Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/xilinx/zynqmp-zcu100-revC.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/xilinx/zynqmp-zcu100-revC.dts b/arch/arm64/boot/dts/xilinx/zynqmp-zcu100-revC.dts index eb5e8bddb610..8954c8c6f547 100644 --- a/arch/arm64/boot/dts/xilinx/zynqmp-zcu100-revC.dts +++ b/arch/arm64/boot/dts/xilinx/zynqmp-zcu100-revC.dts @@ -101,6 +101,7 @@ sdio_pwrseq: sdio_pwrseq { compatible = "mmc-pwrseq-simple"; reset-gpios = <&gpio 7 GPIO_ACTIVE_LOW>; /* WIFI_EN */ + post-power-on-delay-ms = <10>; }; }; -- cgit v1.2.3 From e6eb5e3561ca8bcee04cfbe7de9acaa31159638e Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Thu, 24 Jan 2019 08:52:33 +0100 Subject: arm64: dts: hikey: Give wifi some time after power-on commit 83b944174ad79825ae84a47af1a0354485b24602 upstream. Somewhere along recent changes to power control of the wl1835, power-on became very unreliable on the hikey, failing like this: wl1271_sdio: probe of mmc2:0001:1 failed with error -16 wl1271_sdio: probe of mmc2:0001:2 failed with error -16 After playing with some dt parameters and comparing to other users of this chip, it turned out we need some power-on delay to make things stable again. In contrast to those other users which define 200 ms, the hikey would already be happy with 1 ms. Still, we use the safer 10 ms, like on the Ultra96. Fixes: ea452678734e ("arm64: dts: hikey: Fix WiFi support") Cc: #4.12+ Signed-off-by: Jan Kiszka Acked-by: Ulf Hansson Signed-off-by: Wei Xu Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts b/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts index f4964bee6a1a..e47609aa20d6 100644 --- a/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts +++ b/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts @@ -118,6 +118,7 @@ reset-gpios = <&gpio0 5 GPIO_ACTIVE_LOW>; clocks = <&pmic>; clock-names = "ext_clock"; + post-power-on-delay-ms = <10>; power-off-delay-us = <10>; }; -- cgit v1.2.3 From 103ec440943e9108094e89510e04dcd299c0b21e Mon Sep 17 00:00:00 2001 From: Alistair Strachan Date: Wed, 23 Jan 2019 12:06:06 -0800 Subject: arm64: dts: hikey: Revert "Enable HS200 mode on eMMC" commit 8d26c1390aec795d492b8de5e4437751e8805a1d upstream. This reverts commit abd7d0972a192ee653efc7b151a6af69db58f2bb. This change was already partially reverted by John Stultz in commit 9c6d26df1fae ("arm64: dts: hikey: Fix eMMC corruption regression"). This change appears to cause controller resets and block read failures which prevents successful booting on some hikey boards. Cc: Ryan Grachek Cc: Wei Xu Cc: Manivannan Sadhasivam Cc: Rob Herring Cc: Mark Rutland Cc: linux-arm-kernel@lists.infradead.org Cc: devicetree@vger.kernel.org Cc: stable #4.17+ Signed-off-by: Alistair Strachan Signed-off-by: John Stultz Signed-off-by: Wei Xu Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts b/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts index e47609aa20d6..e80a792827ed 100644 --- a/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts +++ b/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts @@ -301,7 +301,6 @@ dwmmc_0: dwmmc0@f723d000 { cap-mmc-highspeed; - mmc-hs200-1_8v; non-removable; bus-width = <0x8>; vmmc-supply = <&ldo19>; -- cgit v1.2.3 From cd10bc8261eb9aa3f955f4455d0609897e68bf58 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Thu, 24 Jan 2019 13:22:57 +0100 Subject: ARM: dts: exynos: Fix pinctrl definition for eMMC RTSN line on Odroid X2/U3 commit ec33745bccc8f336957c751f4153421cc9ef5a54 upstream. Commit 225da7e65a03 ("ARM: dts: add eMMC reset line for exynos4412-odroid-common") added MMC power sequence for eMMC card of Odroid X2/U3. It reused generic sd1_cd pin control configuration node and only disabled pull-up. However that time the pinctrl configuration was not applied during MMC power sequence driver initialization. This has been changed later by commit d97a1e5d7cd2 ("mmc: pwrseq: convert to proper platform device"). It turned out then, that the provided pinctrl configuration is not correct, because the eMMC_RTSN line is being re-configured as 'special function/card detect function for mmc1 controller' not the simple 'output', thus the power sequence driver doesn't really set the pin value. This in effect broke the reboot of Odroid X2/U3 boards. Fix this by providing separate node with eMMC_RTSN pin configuration. Cc: Reported-by: Markus Reichl Suggested-by: Ulf Hansson Fixes: 225da7e65a03 ("ARM: dts: add eMMC reset line for exynos4412-odroid-common") Signed-off-by: Marek Szyprowski Signed-off-by: Krzysztof Kozlowski Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/exynos4412-odroid-common.dtsi | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/arch/arm/boot/dts/exynos4412-odroid-common.dtsi b/arch/arm/boot/dts/exynos4412-odroid-common.dtsi index a09e46c9dbc0..00820d239753 100644 --- a/arch/arm/boot/dts/exynos4412-odroid-common.dtsi +++ b/arch/arm/boot/dts/exynos4412-odroid-common.dtsi @@ -49,7 +49,7 @@ }; emmc_pwrseq: pwrseq { - pinctrl-0 = <&sd1_cd>; + pinctrl-0 = <&emmc_rstn>; pinctrl-names = "default"; compatible = "mmc-pwrseq-emmc"; reset-gpios = <&gpk1 2 GPIO_ACTIVE_LOW>; @@ -161,12 +161,6 @@ cpu0-supply = <&buck2_reg>; }; -/* RSTN signal for eMMC */ -&sd1_cd { - samsung,pin-pud = ; - samsung,pin-drv = ; -}; - &pinctrl_1 { gpio_power_key: power_key { samsung,pins = "gpx1-3"; @@ -184,6 +178,11 @@ samsung,pins = "gpx3-7"; samsung,pin-pud = ; }; + + emmc_rstn: emmc-rstn { + samsung,pins = "gpk1-2"; + samsung,pin-pud = ; + }; }; &ehci { -- cgit v1.2.3 From bfc341b61f5c27cf4840695caeafff4f3eca2e47 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Fri, 15 Feb 2019 11:36:50 +0100 Subject: ARM: dts: exynos: Add minimal clkout parameters to Exynos3250 PMU commit a66352e005488ecb4b534ba1af58a9f671eba9b8 upstream. Add minimal parameters needed by the Exynos CLKOUT driver to Exynos3250 PMU node. This fixes the following warning on boot: exynos_clkout_init: failed to register clkout clock Fixes: d19bb397e19e ("ARM: dts: exynos: Update PMU node with CLKOUT related data") Cc: Signed-off-by: Marek Szyprowski Signed-off-by: Krzysztof Kozlowski Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/exynos3250.dtsi | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm/boot/dts/exynos3250.dtsi b/arch/arm/boot/dts/exynos3250.dtsi index 27a1ee28c3bb..94efca78c42f 100644 --- a/arch/arm/boot/dts/exynos3250.dtsi +++ b/arch/arm/boot/dts/exynos3250.dtsi @@ -168,6 +168,9 @@ interrupt-controller; #interrupt-cells = <3>; interrupt-parent = <&gic>; + clock-names = "clkout8"; + clocks = <&cmu CLK_FIN_PLL>; + #clock-cells = <1>; }; mipi_phy: video-phy { -- cgit v1.2.3 From bb2c205cd3a0e8d9c7597b67eb0be793d25854b7 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Mon, 25 Feb 2019 19:42:52 +0100 Subject: ARM: dts: exynos: Fix max voltage for buck8 regulator on Odroid XU3/XU4 commit a3238924a820c1d7c977b632b769f3b5690cba09 upstream. The maximum voltage value for buck8 regulator on Odroid XU3/XU4 boards is set too low. Increase it to the 2000mV as specified on the board schematic. So far the board worked fine, because of the bug in the PMIC driver, which used incorrect step value for that regulator. It interpreted the voltage value set by the bootloader as 1225mV and kept it unchanged. The regulator driver has been however fixed recently in the commit 56b5d4ea778c ("regulator: s2mps11: Fix steps for buck7, buck8 and LDO35"), what results in reading the proper buck8 value and forcing it to 1500mV on boot. This is not enough for proper board operation and results in eMMC errors during heavy IO traffic. Increasing maximum voltage value for buck8 restores original driver behavior and fixes eMMC issues. Signed-off-by: Marek Szyprowski Fixes: 86a2d2ac5e5d ("ARM: dts: Add dts file for Odroid XU3 board") Fixes: 56b5d4ea778c ("regulator: s2mps11: Fix steps for buck7, buck8 and LDO35") Cc: Signed-off-by: Krzysztof Kozlowski Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/exynos5422-odroid-core.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/exynos5422-odroid-core.dtsi b/arch/arm/boot/dts/exynos5422-odroid-core.dtsi index 2f4f40882dab..27214e6ebe4f 100644 --- a/arch/arm/boot/dts/exynos5422-odroid-core.dtsi +++ b/arch/arm/boot/dts/exynos5422-odroid-core.dtsi @@ -334,7 +334,7 @@ buck8_reg: BUCK8 { regulator-name = "vdd_1.8v_ldo"; regulator-min-microvolt = <800000>; - regulator-max-microvolt = <1500000>; + regulator-max-microvolt = <2000000>; regulator-always-on; regulator-boot-on; }; -- cgit v1.2.3 From f9a0a08d9c9981e21f86af2f62dba64cf605cd49 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 24 Jan 2019 13:06:58 +0100 Subject: drm: disable uncached DMA optimization for ARM and arm64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit e02f5c1bb2283cfcee68f2f0feddcc06150f13aa ] The DRM driver stack is designed to work with cache coherent devices only, but permits an optimization to be enabled in some cases, where for some buffers, both the CPU and the GPU use uncached mappings, removing the need for DMA snooping and allocation in the CPU caches. The use of uncached GPU mappings relies on the correct implementation of the PCIe NoSnoop TLP attribute by the platform, otherwise the GPU will use cached mappings nonetheless. On x86 platforms, this does not seem to matter, as uncached CPU mappings will snoop the caches in any case. However, on ARM and arm64, enabling this optimization on a platform where NoSnoop is ignored results in loss of coherency, which breaks correct operation of the device. Since we have no way of detecting whether NoSnoop works or not, just disable this optimization entirely for ARM and arm64. Cc: Christian Koenig Cc: Alex Deucher Cc: David Zhou Cc: Huang Rui Cc: Junwei Zhang Cc: Michel Daenzer Cc: David Airlie Cc: Daniel Vetter Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: Sean Paul Cc: Michael Ellerman Cc: Benjamin Herrenschmidt Cc: Will Deacon Cc: Christoph Hellwig Cc: Robin Murphy Cc: amd-gfx list Cc: dri-devel Reported-by: Carsten Haitzler Signed-off-by: Ard Biesheuvel Reviewed-by: Christian König Reviewed-by: Alex Deucher Link: https://patchwork.kernel.org/patch/10778815/ Signed-off-by: Christian König Signed-off-by: Sasha Levin --- include/drm/drm_cache.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/include/drm/drm_cache.h b/include/drm/drm_cache.h index bfe1639df02d..97fc498dc767 100644 --- a/include/drm/drm_cache.h +++ b/include/drm/drm_cache.h @@ -47,6 +47,24 @@ static inline bool drm_arch_can_wc_memory(void) return false; #elif defined(CONFIG_MIPS) && defined(CONFIG_CPU_LOONGSON3) return false; +#elif defined(CONFIG_ARM) || defined(CONFIG_ARM64) + /* + * The DRM driver stack is designed to work with cache coherent devices + * only, but permits an optimization to be enabled in some cases, where + * for some buffers, both the CPU and the GPU use uncached mappings, + * removing the need for DMA snooping and allocation in the CPU caches. + * + * The use of uncached GPU mappings relies on the correct implementation + * of the PCIe NoSnoop TLP attribute by the platform, otherwise the GPU + * will use cached mappings nonetheless. On x86 platforms, this does not + * seem to matter, as uncached CPU mappings will snoop the caches in any + * case. However, on ARM and arm64, enabling this optimization on a + * platform where NoSnoop is ignored results in loss of coherency, which + * breaks correct operation of the device. Since we have no way of + * detecting whether NoSnoop works or not, just disable this + * optimization entirely for ARM and arm64. + */ + return false; #else return true; #endif -- cgit v1.2.3 From 02d86085cac6365ecf68a124ac05fbe1acb60377 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Sun, 7 Oct 2018 00:09:18 +0900 Subject: netfilter: xt_TEE: fix wrong interface selection [ Upstream commit f24d2d4f9586985509320f90308723d3d0c4e47f ] TEE netdevice notifier handler checks only interface name. however each netns can have same interface name. hence other netns's interface could be selected. test commands: %ip netns add vm1 %iptables -I INPUT -p icmp -j TEE --gateway 192.168.1.1 --oif enp2s0 %ip link set enp2s0 netns vm1 Above rule is in the root netns. but that rule could get enp2s0 ifindex of vm1 by notifier handler. After this patch, TEE rule is added to the per-netns list. Fixes: 9e2f6c5d78db ("netfilter: Rework xt_TEE netdevice notifier") Signed-off-by: Taehee Yoo Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/xt_TEE.c | 69 +++++++++++++++++++++++++++++++++++++------------- 1 file changed, 52 insertions(+), 17 deletions(-) diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c index 0d0d68c989df..673ad2099f97 100644 --- a/net/netfilter/xt_TEE.c +++ b/net/netfilter/xt_TEE.c @@ -14,6 +14,8 @@ #include #include #include +#include +#include #include #include #include @@ -25,8 +27,15 @@ struct xt_tee_priv { int oif; }; +static unsigned int tee_net_id __read_mostly; static const union nf_inet_addr tee_zero_address; +struct tee_net { + struct list_head priv_list; + /* lock protects the priv_list */ + struct mutex lock; +}; + static unsigned int tee_tg4(struct sk_buff *skb, const struct xt_action_param *par) { @@ -51,17 +60,16 @@ tee_tg6(struct sk_buff *skb, const struct xt_action_param *par) } #endif -static DEFINE_MUTEX(priv_list_mutex); -static LIST_HEAD(priv_list); - static int tee_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct net *net = dev_net(dev); + struct tee_net *tn = net_generic(net, tee_net_id); struct xt_tee_priv *priv; - mutex_lock(&priv_list_mutex); - list_for_each_entry(priv, &priv_list, list) { + mutex_lock(&tn->lock); + list_for_each_entry(priv, &tn->priv_list, list) { switch (event) { case NETDEV_REGISTER: if (!strcmp(dev->name, priv->tginfo->oif)) @@ -79,13 +87,14 @@ static int tee_netdev_event(struct notifier_block *this, unsigned long event, break; } } - mutex_unlock(&priv_list_mutex); + mutex_unlock(&tn->lock); return NOTIFY_DONE; } static int tee_tg_check(const struct xt_tgchk_param *par) { + struct tee_net *tn = net_generic(par->net, tee_net_id); struct xt_tee_tginfo *info = par->targinfo; struct xt_tee_priv *priv; @@ -106,9 +115,9 @@ static int tee_tg_check(const struct xt_tgchk_param *par) priv->oif = -1; info->priv = priv; - mutex_lock(&priv_list_mutex); - list_add(&priv->list, &priv_list); - mutex_unlock(&priv_list_mutex); + mutex_lock(&tn->lock); + list_add(&priv->list, &tn->priv_list); + mutex_unlock(&tn->lock); } else info->priv = NULL; @@ -118,12 +127,13 @@ static int tee_tg_check(const struct xt_tgchk_param *par) static void tee_tg_destroy(const struct xt_tgdtor_param *par) { + struct tee_net *tn = net_generic(par->net, tee_net_id); struct xt_tee_tginfo *info = par->targinfo; if (info->priv) { - mutex_lock(&priv_list_mutex); + mutex_lock(&tn->lock); list_del(&info->priv->list); - mutex_unlock(&priv_list_mutex); + mutex_unlock(&tn->lock); kfree(info->priv); } static_key_slow_dec(&xt_tee_enabled); @@ -156,6 +166,21 @@ static struct xt_target tee_tg_reg[] __read_mostly = { #endif }; +static int __net_init tee_net_init(struct net *net) +{ + struct tee_net *tn = net_generic(net, tee_net_id); + + INIT_LIST_HEAD(&tn->priv_list); + mutex_init(&tn->lock); + return 0; +} + +static struct pernet_operations tee_net_ops = { + .init = tee_net_init, + .id = &tee_net_id, + .size = sizeof(struct tee_net), +}; + static struct notifier_block tee_netdev_notifier = { .notifier_call = tee_netdev_event, }; @@ -164,22 +189,32 @@ static int __init tee_tg_init(void) { int ret; - ret = xt_register_targets(tee_tg_reg, ARRAY_SIZE(tee_tg_reg)); - if (ret) + ret = register_pernet_subsys(&tee_net_ops); + if (ret < 0) return ret; + + ret = xt_register_targets(tee_tg_reg, ARRAY_SIZE(tee_tg_reg)); + if (ret < 0) + goto cleanup_subsys; + ret = register_netdevice_notifier(&tee_netdev_notifier); - if (ret) { - xt_unregister_targets(tee_tg_reg, ARRAY_SIZE(tee_tg_reg)); - return ret; - } + if (ret < 0) + goto unregister_targets; return 0; + +unregister_targets: + xt_unregister_targets(tee_tg_reg, ARRAY_SIZE(tee_tg_reg)); +cleanup_subsys: + unregister_pernet_subsys(&tee_net_ops); + return ret; } static void __exit tee_tg_exit(void) { unregister_netdevice_notifier(&tee_netdev_notifier); xt_unregister_targets(tee_tg_reg, ARRAY_SIZE(tee_tg_reg)); + unregister_pernet_subsys(&tee_net_ops); } module_init(tee_tg_init); -- cgit v1.2.3 From e6e00017910cadeace868ebe3bb6369accc486a4 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Sun, 7 Oct 2018 00:09:32 +0900 Subject: netfilter: xt_TEE: add missing code to get interface index in checkentry. [ Upstream commit 18c0ab87364ac5128a152055fdcb1d27e01caf01 ] checkentry(tee_tg_check) should initialize priv->oif from dev if possible. But only netdevice notifier handler can set that. Hence priv->oif is always -1 until notifier handler is called. Fixes: 9e2f6c5d78db ("netfilter: Rework xt_TEE netdevice notifier") Signed-off-by: Taehee Yoo Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/xt_TEE.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c index 673ad2099f97..1dae02a97ee3 100644 --- a/net/netfilter/xt_TEE.c +++ b/net/netfilter/xt_TEE.c @@ -104,6 +104,8 @@ static int tee_tg_check(const struct xt_tgchk_param *par) return -EINVAL; if (info->oif[0]) { + struct net_device *dev; + if (info->oif[sizeof(info->oif)-1] != '\0') return -EINVAL; @@ -115,6 +117,11 @@ static int tee_tg_check(const struct xt_tgchk_param *par) priv->oif = -1; info->priv = priv; + dev = dev_get_by_name(par->net, info->oif); + if (dev) { + priv->oif = dev->ifindex; + dev_put(dev); + } mutex_lock(&tn->lock); list_add(&priv->list, &tn->priv_list); mutex_unlock(&tn->lock); -- cgit v1.2.3 From 4f5a4c8881064c2eac658528390a6f2e7253c9b5 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 6 Mar 2019 15:41:57 +0100 Subject: gfs2: Fix missed wakeups in find_insert_glock commit 605b0487f0bc1ae9963bf52ece0f5c8055186f81 upstream. Mark Syms has reported seeing tasks that are stuck waiting in find_insert_glock. It turns out that struct lm_lockname contains four padding bytes on 64-bit architectures that function glock_waitqueue doesn't skip when hashing the glock name. As a result, we can end up waking up the wrong waitqueue, and the waiting tasks may be stuck forever. Fix that by using ht_parms.key_len instead of sizeof(struct lm_lockname) for the key length. Reported-by: Mark Syms Signed-off-by: Andreas Gruenbacher Signed-off-by: Bob Peterson Signed-off-by: Greg Kroah-Hartman --- fs/gfs2/glock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 4614ee25f621..9d566e62684c 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -107,7 +107,7 @@ static int glock_wake_function(wait_queue_entry_t *wait, unsigned int mode, static wait_queue_head_t *glock_waitqueue(struct lm_lockname *name) { - u32 hash = jhash2((u32 *)name, sizeof(*name) / 4, 0); + u32 hash = jhash2((u32 *)name, ht_parms.key_len / 4, 0); return glock_wait_table + hash_32(hash, GLOCK_WAIT_TABLE_BITS); } -- cgit v1.2.3 From f581642c8a2cf7cd31331ae6b2b9914ba9b512cf Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Mon, 11 Mar 2019 14:08:54 +0800 Subject: staging: erofs: add error handling for xattr submodule commit cadf1ccf1b0021d0b7a9347e102ac5258f9f98c8 upstream. This patch enhances the missing error handling code for xattr submodule, which improves the stability for the rare cases. Reviewed-by: Chao Yu Signed-off-by: Chao Yu Signed-off-by: Gao Xiang Signed-off-by: Greg Kroah-Hartman --- drivers/staging/erofs/internal.h | 5 +- drivers/staging/erofs/xattr.c | 112 +++++++++++++++++++++++++++------------ 2 files changed, 81 insertions(+), 36 deletions(-) diff --git a/drivers/staging/erofs/internal.h b/drivers/staging/erofs/internal.h index 9f44ed8f0023..1c048c412fb2 100644 --- a/drivers/staging/erofs/internal.h +++ b/drivers/staging/erofs/internal.h @@ -485,8 +485,9 @@ struct erofs_map_blocks_iter { }; -static inline struct page *erofs_get_inline_page(struct inode *inode, - erofs_blk_t blkaddr) +static inline struct page * +erofs_get_inline_page(struct inode *inode, + erofs_blk_t blkaddr) { return erofs_get_meta_page(inode->i_sb, blkaddr, S_ISDIR(inode->i_mode)); diff --git a/drivers/staging/erofs/xattr.c b/drivers/staging/erofs/xattr.c index 0e9cfeccdf99..9870f893608a 100644 --- a/drivers/staging/erofs/xattr.c +++ b/drivers/staging/erofs/xattr.c @@ -24,16 +24,25 @@ struct xattr_iter { static inline void xattr_iter_end(struct xattr_iter *it, bool atomic) { - /* only init_inode_xattrs use non-atomic once */ + /* the only user of kunmap() is 'init_inode_xattrs' */ if (unlikely(!atomic)) kunmap(it->page); else kunmap_atomic(it->kaddr); + unlock_page(it->page); put_page(it->page); } -static void init_inode_xattrs(struct inode *inode) +static inline void xattr_iter_end_final(struct xattr_iter *it) +{ + if (!it->page) + return; + + xattr_iter_end(it, true); +} + +static int init_inode_xattrs(struct inode *inode) { struct xattr_iter it; unsigned i; @@ -43,7 +52,7 @@ static void init_inode_xattrs(struct inode *inode) bool atomic_map; if (likely(inode_has_inited_xattr(inode))) - return; + return 0; vi = EROFS_V(inode); BUG_ON(!vi->xattr_isize); @@ -53,7 +62,8 @@ static void init_inode_xattrs(struct inode *inode) it.ofs = erofs_blkoff(iloc(sbi, vi->nid) + vi->inode_isize); it.page = erofs_get_inline_page(inode, it.blkaddr); - BUG_ON(IS_ERR(it.page)); + if (IS_ERR(it.page)) + return PTR_ERR(it.page); /* read in shared xattr array (non-atomic, see kmalloc below) */ it.kaddr = kmap(it.page); @@ -62,9 +72,12 @@ static void init_inode_xattrs(struct inode *inode) ih = (struct erofs_xattr_ibody_header *)(it.kaddr + it.ofs); vi->xattr_shared_count = ih->h_shared_count; - vi->xattr_shared_xattrs = (unsigned *)kmalloc_array( - vi->xattr_shared_count, sizeof(unsigned), - GFP_KERNEL | __GFP_NOFAIL); + vi->xattr_shared_xattrs = kmalloc_array(vi->xattr_shared_count, + sizeof(uint), GFP_KERNEL); + if (!vi->xattr_shared_xattrs) { + xattr_iter_end(&it, atomic_map); + return -ENOMEM; + } /* let's skip ibody header */ it.ofs += sizeof(struct erofs_xattr_ibody_header); @@ -77,7 +90,8 @@ static void init_inode_xattrs(struct inode *inode) it.page = erofs_get_meta_page(inode->i_sb, ++it.blkaddr, S_ISDIR(inode->i_mode)); - BUG_ON(IS_ERR(it.page)); + if (IS_ERR(it.page)) + return PTR_ERR(it.page); it.kaddr = kmap_atomic(it.page); atomic_map = true; @@ -90,6 +104,7 @@ static void init_inode_xattrs(struct inode *inode) xattr_iter_end(&it, atomic_map); inode_set_inited_xattr(inode); + return 0; } struct xattr_iter_handlers { @@ -99,18 +114,25 @@ struct xattr_iter_handlers { void (*value)(struct xattr_iter *, unsigned, char *, unsigned); }; -static void xattr_iter_fixup(struct xattr_iter *it) +static inline int xattr_iter_fixup(struct xattr_iter *it) { - if (unlikely(it->ofs >= EROFS_BLKSIZ)) { - xattr_iter_end(it, true); + if (it->ofs < EROFS_BLKSIZ) + return 0; - it->blkaddr += erofs_blknr(it->ofs); - it->page = erofs_get_meta_page(it->sb, it->blkaddr, false); - BUG_ON(IS_ERR(it->page)); + xattr_iter_end(it, true); - it->kaddr = kmap_atomic(it->page); - it->ofs = erofs_blkoff(it->ofs); + it->blkaddr += erofs_blknr(it->ofs); + it->page = erofs_get_meta_page(it->sb, it->blkaddr, false); + if (IS_ERR(it->page)) { + int err = PTR_ERR(it->page); + + it->page = NULL; + return err; } + + it->kaddr = kmap_atomic(it->page); + it->ofs = erofs_blkoff(it->ofs); + return 0; } static int inline_xattr_iter_begin(struct xattr_iter *it, @@ -132,21 +154,24 @@ static int inline_xattr_iter_begin(struct xattr_iter *it, it->ofs = erofs_blkoff(iloc(sbi, vi->nid) + inline_xattr_ofs); it->page = erofs_get_inline_page(inode, it->blkaddr); - BUG_ON(IS_ERR(it->page)); - it->kaddr = kmap_atomic(it->page); + if (IS_ERR(it->page)) + return PTR_ERR(it->page); + it->kaddr = kmap_atomic(it->page); return vi->xattr_isize - xattr_header_sz; } static int xattr_foreach(struct xattr_iter *it, - struct xattr_iter_handlers *op, unsigned *tlimit) + const struct xattr_iter_handlers *op, unsigned int *tlimit) { struct erofs_xattr_entry entry; unsigned value_sz, processed, slice; int err; /* 0. fixup blkaddr, ofs, ipage */ - xattr_iter_fixup(it); + err = xattr_iter_fixup(it); + if (err) + return err; /* * 1. read xattr entry to the memory, @@ -178,7 +203,9 @@ static int xattr_foreach(struct xattr_iter *it, if (it->ofs >= EROFS_BLKSIZ) { BUG_ON(it->ofs > EROFS_BLKSIZ); - xattr_iter_fixup(it); + err = xattr_iter_fixup(it); + if (err) + goto out; it->ofs = 0; } @@ -210,7 +237,10 @@ static int xattr_foreach(struct xattr_iter *it, while (processed < value_sz) { if (it->ofs >= EROFS_BLKSIZ) { BUG_ON(it->ofs > EROFS_BLKSIZ); - xattr_iter_fixup(it); + + err = xattr_iter_fixup(it); + if (err) + goto out; it->ofs = 0; } @@ -270,7 +300,7 @@ static void xattr_copyvalue(struct xattr_iter *_it, memcpy(it->buffer + processed, buf, len); } -static struct xattr_iter_handlers find_xattr_handlers = { +static const struct xattr_iter_handlers find_xattr_handlers = { .entry = xattr_entrymatch, .name = xattr_namematch, .alloc_buffer = xattr_checkbuffer, @@ -291,8 +321,11 @@ static int inline_getxattr(struct inode *inode, struct getxattr_iter *it) ret = xattr_foreach(&it->it, &find_xattr_handlers, &remaining); if (ret >= 0) break; + + if (ret != -ENOATTR) /* -ENOMEM, -EIO, etc. */ + break; } - xattr_iter_end(&it->it, true); + xattr_iter_end_final(&it->it); return ret < 0 ? ret : it->buffer_size; } @@ -315,8 +348,10 @@ static int shared_getxattr(struct inode *inode, struct getxattr_iter *it) xattr_iter_end(&it->it, true); it->it.page = erofs_get_meta_page(inode->i_sb, - blkaddr, false); - BUG_ON(IS_ERR(it->it.page)); + blkaddr, false); + if (IS_ERR(it->it.page)) + return PTR_ERR(it->it.page); + it->it.kaddr = kmap_atomic(it->it.page); it->it.blkaddr = blkaddr; } @@ -324,9 +359,12 @@ static int shared_getxattr(struct inode *inode, struct getxattr_iter *it) ret = xattr_foreach(&it->it, &find_xattr_handlers, NULL); if (ret >= 0) break; + + if (ret != -ENOATTR) /* -ENOMEM, -EIO, etc. */ + break; } if (vi->xattr_shared_count) - xattr_iter_end(&it->it, true); + xattr_iter_end_final(&it->it); return ret < 0 ? ret : it->buffer_size; } @@ -351,7 +389,9 @@ int erofs_getxattr(struct inode *inode, int index, if (unlikely(name == NULL)) return -EINVAL; - init_inode_xattrs(inode); + ret = init_inode_xattrs(inode); + if (ret) + return ret; it.index = index; @@ -494,7 +534,7 @@ static int xattr_skipvalue(struct xattr_iter *_it, return 1; } -static struct xattr_iter_handlers list_xattr_handlers = { +static const struct xattr_iter_handlers list_xattr_handlers = { .entry = xattr_entrylist, .name = xattr_namelist, .alloc_buffer = xattr_skipvalue, @@ -516,7 +556,7 @@ static int inline_listxattr(struct listxattr_iter *it) if (ret < 0) break; } - xattr_iter_end(&it->it, true); + xattr_iter_end_final(&it->it); return ret < 0 ? ret : it->buffer_ofs; } @@ -538,8 +578,10 @@ static int shared_listxattr(struct listxattr_iter *it) xattr_iter_end(&it->it, true); it->it.page = erofs_get_meta_page(inode->i_sb, - blkaddr, false); - BUG_ON(IS_ERR(it->it.page)); + blkaddr, false); + if (IS_ERR(it->it.page)) + return PTR_ERR(it->it.page); + it->it.kaddr = kmap_atomic(it->it.page); it->it.blkaddr = blkaddr; } @@ -549,7 +591,7 @@ static int shared_listxattr(struct listxattr_iter *it) break; } if (vi->xattr_shared_count) - xattr_iter_end(&it->it, true); + xattr_iter_end_final(&it->it); return ret < 0 ? ret : it->buffer_ofs; } @@ -560,7 +602,9 @@ ssize_t erofs_listxattr(struct dentry *dentry, int ret; struct listxattr_iter it; - init_inode_xattrs(d_inode(dentry)); + ret = init_inode_xattrs(d_inode(dentry)); + if (ret) + return ret; it.dentry = dentry; it.buffer = buffer; -- cgit v1.2.3 From 9dc84155425c51cc06b402b86c5bc0e8eef754fb Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Mon, 11 Mar 2019 14:08:55 +0800 Subject: staging: erofs: fix fast symlink w/o xattr when fs xattr is on commit 7077fffcb0b0b65dc75e341306aeef4d0e7f2ec6 upstream. Currently, this will hit a BUG_ON for these symlinks as follows: - kernel message ------------[ cut here ]------------ kernel BUG at drivers/staging/erofs/xattr.c:59! SMP PTI CPU: 1 PID: 1170 Comm: getllxattr Not tainted 4.20.0-rc6+ #92 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-2.fc27 04/01/2014 RIP: 0010:init_inode_xattrs+0x22b/0x270 Code: 48 0f 45 ea f0 ff 4d 34 74 0d 41 83 4c 24 e0 01 31 c0 e9 00 fe ff ff 48 89 ef e8 e0 31 9e ff eb e9 89 e8 e9 ef fd ff ff 0f 0$ <0f> 0b 48 89 ef e8 fb f6 9c ff 48 8b 45 08 a8 01 75 24 f0 ff 4d 34 RSP: 0018:ffffa03ac026bdf8 EFLAGS: 00010246 ------------[ cut here ]------------ ... Call Trace: erofs_listxattr+0x30/0x2c0 ? selinux_inode_listxattr+0x5a/0x80 ? kmem_cache_alloc+0x33/0x170 ? security_inode_listxattr+0x27/0x40 listxattr+0xaf/0xc0 path_listxattr+0x5a/0xa0 do_syscall_64+0x43/0xf0 entry_SYSCALL_64_after_hwframe+0x44/0xa9 ... ---[ end trace 3c24b49408dc0c72 ]--- Fix it by checking ->xattr_isize in init_inode_xattrs(), and it also fixes improper return value -ENOTSUPP (it should be -ENODATA if xattr is enabled) for those inodes. Fixes: b17500a0fdba ("staging: erofs: introduce xattr & acl support") Cc: # 4.19+ Reported-by: Li Guifu Tested-by: Li Guifu Reviewed-by: Chao Yu Signed-off-by: Gao Xiang Signed-off-by: Greg Kroah-Hartman --- drivers/staging/erofs/inode.c | 8 ++++---- drivers/staging/erofs/xattr.c | 25 ++++++++++++++++++++----- 2 files changed, 24 insertions(+), 9 deletions(-) diff --git a/drivers/staging/erofs/inode.c b/drivers/staging/erofs/inode.c index 9e7815f55a17..7448744cc515 100644 --- a/drivers/staging/erofs/inode.c +++ b/drivers/staging/erofs/inode.c @@ -184,16 +184,16 @@ static int fill_inode(struct inode *inode, int isdir) /* setup the new inode */ if (S_ISREG(inode->i_mode)) { #ifdef CONFIG_EROFS_FS_XATTR - if (vi->xattr_isize) - inode->i_op = &erofs_generic_xattr_iops; + inode->i_op = &erofs_generic_xattr_iops; #endif inode->i_fop = &generic_ro_fops; } else if (S_ISDIR(inode->i_mode)) { inode->i_op = #ifdef CONFIG_EROFS_FS_XATTR - vi->xattr_isize ? &erofs_dir_xattr_iops : -#endif + &erofs_dir_xattr_iops; +#else &erofs_dir_iops; +#endif inode->i_fop = &erofs_dir_fops; } else if (S_ISLNK(inode->i_mode)) { /* by default, page_get_link is used for symlink */ diff --git a/drivers/staging/erofs/xattr.c b/drivers/staging/erofs/xattr.c index 9870f893608a..c4b620dfa75c 100644 --- a/drivers/staging/erofs/xattr.c +++ b/drivers/staging/erofs/xattr.c @@ -55,7 +55,26 @@ static int init_inode_xattrs(struct inode *inode) return 0; vi = EROFS_V(inode); - BUG_ON(!vi->xattr_isize); + + /* + * bypass all xattr operations if ->xattr_isize is not greater than + * sizeof(struct erofs_xattr_ibody_header), in detail: + * 1) it is not enough to contain erofs_xattr_ibody_header then + * ->xattr_isize should be 0 (it means no xattr); + * 2) it is just to contain erofs_xattr_ibody_header, which is on-disk + * undefined right now (maybe use later with some new sb feature). + */ + if (vi->xattr_isize == sizeof(struct erofs_xattr_ibody_header)) { + errln("xattr_isize %d of nid %llu is not supported yet", + vi->xattr_isize, vi->nid); + return -ENOTSUPP; + } else if (vi->xattr_isize < sizeof(struct erofs_xattr_ibody_header)) { + if (unlikely(vi->xattr_isize)) { + DBG_BUGON(1); + return -EIO; /* xattr ondisk layout error */ + } + return -ENOATTR; + } sbi = EROFS_I_SB(inode); it.blkaddr = erofs_blknr(iloc(sbi, vi->nid) + vi->inode_isize); @@ -414,7 +433,6 @@ static int erofs_xattr_generic_get(const struct xattr_handler *handler, struct dentry *unused, struct inode *inode, const char *name, void *buffer, size_t size) { - struct erofs_vnode *const vi = EROFS_V(inode); struct erofs_sb_info *const sbi = EROFS_I_SB(inode); switch (handler->flags) { @@ -432,9 +450,6 @@ static int erofs_xattr_generic_get(const struct xattr_handler *handler, return -EINVAL; } - if (!vi->xattr_isize) - return -ENOATTR; - return erofs_getxattr(inode, handler->flags, name, buffer, size); } -- cgit v1.2.3 From 53a989a95a09cb89ca267c2b2d58aa86b0abb4ab Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Mon, 11 Mar 2019 14:08:56 +0800 Subject: staging: erofs: fix memleak of inode's shared xattr array From: Sheng Yong commit 3b1b5291f79d040d549d7c746669fc30e8045b9b upstream. If it fails to read a shared xattr page, the inode's shared xattr array is not freed. The next time the inode's xattr is accessed, the previously allocated array is leaked. Signed-off-by: Sheng Yong Fixes: b17500a0fdba ("staging: erofs: introduce xattr & acl support") Cc: # 4.19+ Reviewed-by: Gao Xiang Signed-off-by: Gao Xiang Signed-off-by: Greg Kroah-Hartman --- drivers/staging/erofs/xattr.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/staging/erofs/xattr.c b/drivers/staging/erofs/xattr.c index c4b620dfa75c..937ad3fa540f 100644 --- a/drivers/staging/erofs/xattr.c +++ b/drivers/staging/erofs/xattr.c @@ -109,8 +109,11 @@ static int init_inode_xattrs(struct inode *inode) it.page = erofs_get_meta_page(inode->i_sb, ++it.blkaddr, S_ISDIR(inode->i_mode)); - if (IS_ERR(it.page)) + if (IS_ERR(it.page)) { + kfree(vi->xattr_shared_xattrs); + vi->xattr_shared_xattrs = NULL; return PTR_ERR(it.page); + } it.kaddr = kmap_atomic(it.page); atomic_map = true; -- cgit v1.2.3 From 28b8f234edaf8eb7402bda6d2f9e6a51c9e86874 Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Mon, 11 Mar 2019 14:08:57 +0800 Subject: staging: erofs: fix race of initializing xattrs of a inode at the same time commit 62dc45979f3f8cb0ea67302a93bff686f0c46c5a upstream. In real scenario, there could be several threads accessing xattrs of the same xattr-uninitialized inode, and init_inode_xattrs() almost at the same time. That's actually an unexpected behavior, this patch closes the race. Fixes: b17500a0fdba ("staging: erofs: introduce xattr & acl support") Cc: # 4.19+ Reviewed-by: Chao Yu Signed-off-by: Gao Xiang Signed-off-by: Greg Kroah-Hartman --- drivers/staging/erofs/internal.h | 11 ++++++++--- drivers/staging/erofs/xattr.c | 41 ++++++++++++++++++++++++++++------------ 2 files changed, 37 insertions(+), 15 deletions(-) diff --git a/drivers/staging/erofs/internal.h b/drivers/staging/erofs/internal.h index 1c048c412fb2..c70f0c5237ea 100644 --- a/drivers/staging/erofs/internal.h +++ b/drivers/staging/erofs/internal.h @@ -327,12 +327,17 @@ static inline erofs_off_t iloc(struct erofs_sb_info *sbi, erofs_nid_t nid) return blknr_to_addr(sbi->meta_blkaddr) + (nid << sbi->islotbits); } -#define inode_set_inited_xattr(inode) (EROFS_V(inode)->flags |= 1) -#define inode_has_inited_xattr(inode) (EROFS_V(inode)->flags & 1) +/* atomic flag definitions */ +#define EROFS_V_EA_INITED_BIT 0 + +/* bitlock definitions (arranged in reverse order) */ +#define EROFS_V_BL_XATTR_BIT (BITS_PER_LONG - 1) struct erofs_vnode { erofs_nid_t nid; - unsigned int flags; + + /* atomic flags (including bitlocks) */ + unsigned long flags; unsigned char data_mapping_mode; /* inline size in bytes */ diff --git a/drivers/staging/erofs/xattr.c b/drivers/staging/erofs/xattr.c index 937ad3fa540f..2db99cff3c99 100644 --- a/drivers/staging/erofs/xattr.c +++ b/drivers/staging/erofs/xattr.c @@ -44,17 +44,24 @@ static inline void xattr_iter_end_final(struct xattr_iter *it) static int init_inode_xattrs(struct inode *inode) { + struct erofs_vnode *const vi = EROFS_V(inode); struct xattr_iter it; unsigned i; struct erofs_xattr_ibody_header *ih; struct erofs_sb_info *sbi; - struct erofs_vnode *vi; bool atomic_map; + int ret = 0; - if (likely(inode_has_inited_xattr(inode))) + /* the most case is that xattrs of this inode are initialized. */ + if (test_bit(EROFS_V_EA_INITED_BIT, &vi->flags)) return 0; - vi = EROFS_V(inode); + if (wait_on_bit_lock(&vi->flags, EROFS_V_BL_XATTR_BIT, TASK_KILLABLE)) + return -ERESTARTSYS; + + /* someone has initialized xattrs for us? */ + if (test_bit(EROFS_V_EA_INITED_BIT, &vi->flags)) + goto out_unlock; /* * bypass all xattr operations if ->xattr_isize is not greater than @@ -67,13 +74,16 @@ static int init_inode_xattrs(struct inode *inode) if (vi->xattr_isize == sizeof(struct erofs_xattr_ibody_header)) { errln("xattr_isize %d of nid %llu is not supported yet", vi->xattr_isize, vi->nid); - return -ENOTSUPP; + ret = -ENOTSUPP; + goto out_unlock; } else if (vi->xattr_isize < sizeof(struct erofs_xattr_ibody_header)) { if (unlikely(vi->xattr_isize)) { DBG_BUGON(1); - return -EIO; /* xattr ondisk layout error */ + ret = -EIO; + goto out_unlock; /* xattr ondisk layout error */ } - return -ENOATTR; + ret = -ENOATTR; + goto out_unlock; } sbi = EROFS_I_SB(inode); @@ -81,8 +91,10 @@ static int init_inode_xattrs(struct inode *inode) it.ofs = erofs_blkoff(iloc(sbi, vi->nid) + vi->inode_isize); it.page = erofs_get_inline_page(inode, it.blkaddr); - if (IS_ERR(it.page)) - return PTR_ERR(it.page); + if (IS_ERR(it.page)) { + ret = PTR_ERR(it.page); + goto out_unlock; + } /* read in shared xattr array (non-atomic, see kmalloc below) */ it.kaddr = kmap(it.page); @@ -95,7 +107,8 @@ static int init_inode_xattrs(struct inode *inode) sizeof(uint), GFP_KERNEL); if (!vi->xattr_shared_xattrs) { xattr_iter_end(&it, atomic_map); - return -ENOMEM; + ret = -ENOMEM; + goto out_unlock; } /* let's skip ibody header */ @@ -112,7 +125,8 @@ static int init_inode_xattrs(struct inode *inode) if (IS_ERR(it.page)) { kfree(vi->xattr_shared_xattrs); vi->xattr_shared_xattrs = NULL; - return PTR_ERR(it.page); + ret = PTR_ERR(it.page); + goto out_unlock; } it.kaddr = kmap_atomic(it.page); @@ -125,8 +139,11 @@ static int init_inode_xattrs(struct inode *inode) } xattr_iter_end(&it, atomic_map); - inode_set_inited_xattr(inode); - return 0; + set_bit(EROFS_V_EA_INITED_BIT, &vi->flags); + +out_unlock: + clear_and_wake_up_bit(EROFS_V_BL_XATTR_BIT, &vi->flags); + return ret; } struct xattr_iter_handlers { -- cgit v1.2.3 From e5d7b94cc435f0532d26a45166cc85e8ee15f314 Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Mon, 11 Mar 2019 14:08:58 +0800 Subject: staging: erofs: keep corrupted fs from crashing kernel in erofs_namei() commit 419d6efc50e94bcf5d6b35cd8c71f79edadec564 upstream. As Al pointed out, " ... and while we are at it, what happens to unsigned int nameoff = le16_to_cpu(de[mid].nameoff); unsigned int matched = min(startprfx, endprfx); struct qstr dname = QSTR_INIT(data + nameoff, unlikely(mid >= ndirents - 1) ? maxsize - nameoff : le16_to_cpu(de[mid + 1].nameoff) - nameoff); /* string comparison without already matched prefix */ int ret = dirnamecmp(name, &dname, &matched); if le16_to_cpu(de[...].nameoff) is not monotonically increasing? I.e. what's to prevent e.g. (unsigned)-1 ending up in dname.len? Corrupted fs image shouldn't oops the kernel.. " Revisit the related lookup flow to address the issue. Fixes: d72d1ce60174 ("staging: erofs: add namei functions") Cc: # 4.19+ Suggested-by: Al Viro Signed-off-by: Gao Xiang Signed-off-by: Greg Kroah-Hartman --- drivers/staging/erofs/namei.c | 189 ++++++++++++++++++++++-------------------- 1 file changed, 100 insertions(+), 89 deletions(-) diff --git a/drivers/staging/erofs/namei.c b/drivers/staging/erofs/namei.c index 546a47156101..023f64fa2c87 100644 --- a/drivers/staging/erofs/namei.c +++ b/drivers/staging/erofs/namei.c @@ -15,74 +15,77 @@ #include -/* based on the value of qn->len is accurate */ -static inline int dirnamecmp(struct qstr *qn, - struct qstr *qd, unsigned *matched) +struct erofs_qstr { + const unsigned char *name; + const unsigned char *end; +}; + +/* based on the end of qn is accurate and it must have the trailing '\0' */ +static inline int dirnamecmp(const struct erofs_qstr *qn, + const struct erofs_qstr *qd, + unsigned int *matched) { - unsigned i = *matched, len = min(qn->len, qd->len); -loop: - if (unlikely(i >= len)) { - *matched = i; - if (qn->len < qd->len) { - /* - * actually (qn->len == qd->len) - * when qd->name[i] == '\0' - */ - return qd->name[i] == '\0' ? 0 : -1; + unsigned int i = *matched; + + /* + * on-disk error, let's only BUG_ON in the debugging mode. + * otherwise, it will return 1 to just skip the invalid name + * and go on (in consideration of the lookup performance). + */ + DBG_BUGON(qd->name > qd->end); + + /* qd could not have trailing '\0' */ + /* However it is absolutely safe if < qd->end */ + while (qd->name + i < qd->end && qd->name[i] != '\0') { + if (qn->name[i] != qd->name[i]) { + *matched = i; + return qn->name[i] > qd->name[i] ? 1 : -1; } - return (qn->len > qd->len); + ++i; } - - if (qn->name[i] != qd->name[i]) { - *matched = i; - return qn->name[i] > qd->name[i] ? 1 : -1; - } - - ++i; - goto loop; + *matched = i; + /* See comments in __d_alloc on the terminating NUL character */ + return qn->name[i] == '\0' ? 0 : 1; } -static struct erofs_dirent *find_target_dirent( - struct qstr *name, - u8 *data, int maxsize) +#define nameoff_from_disk(off, sz) (le16_to_cpu(off) & ((sz) - 1)) + +static struct erofs_dirent *find_target_dirent(struct erofs_qstr *name, + u8 *data, + unsigned int dirblksize, + const int ndirents) { - unsigned ndirents, head, back; - unsigned startprfx, endprfx; + int head, back; + unsigned int startprfx, endprfx; struct erofs_dirent *const de = (struct erofs_dirent *)data; - /* make sure that maxsize is valid */ - BUG_ON(maxsize < sizeof(struct erofs_dirent)); - - ndirents = le16_to_cpu(de->nameoff) / sizeof(*de); - - /* corrupted dir (may be unnecessary...) */ - BUG_ON(!ndirents); - - head = 0; + /* since the 1st dirent has been evaluated previously */ + head = 1; back = ndirents - 1; startprfx = endprfx = 0; while (head <= back) { - unsigned mid = head + (back - head) / 2; - unsigned nameoff = le16_to_cpu(de[mid].nameoff); - unsigned matched = min(startprfx, endprfx); - - struct qstr dname = QSTR_INIT(data + nameoff, - unlikely(mid >= ndirents - 1) ? - maxsize - nameoff : - le16_to_cpu(de[mid + 1].nameoff) - nameoff); + const int mid = head + (back - head) / 2; + const int nameoff = nameoff_from_disk(de[mid].nameoff, + dirblksize); + unsigned int matched = min(startprfx, endprfx); + struct erofs_qstr dname = { + .name = data + nameoff, + .end = unlikely(mid >= ndirents - 1) ? + data + dirblksize : + data + nameoff_from_disk(de[mid + 1].nameoff, + dirblksize) + }; /* string comparison without already matched prefix */ int ret = dirnamecmp(name, &dname, &matched); - if (unlikely(!ret)) + if (unlikely(!ret)) { return de + mid; - else if (ret > 0) { + } else if (ret > 0) { head = mid + 1; startprfx = matched; - } else if (unlikely(mid < 1)) /* fix "mid" overflow */ - break; - else { + } else { back = mid - 1; endprfx = matched; } @@ -91,12 +94,12 @@ static struct erofs_dirent *find_target_dirent( return ERR_PTR(-ENOENT); } -static struct page *find_target_block_classic( - struct inode *dir, - struct qstr *name, int *_diff) +static struct page *find_target_block_classic(struct inode *dir, + struct erofs_qstr *name, + int *_ndirents) { - unsigned startprfx, endprfx; - unsigned head, back; + unsigned int startprfx, endprfx; + int head, back; struct address_space *const mapping = dir->i_mapping; struct page *candidate = ERR_PTR(-ENOENT); @@ -105,41 +108,43 @@ static struct page *find_target_block_classic( back = inode_datablocks(dir) - 1; while (head <= back) { - unsigned mid = head + (back - head) / 2; + const int mid = head + (back - head) / 2; struct page *page = read_mapping_page(mapping, mid, NULL); - if (IS_ERR(page)) { -exact_out: - if (!IS_ERR(candidate)) /* valid candidate */ - put_page(candidate); - return page; - } else { - int diff; - unsigned ndirents, matched; - struct qstr dname; + if (!IS_ERR(page)) { struct erofs_dirent *de = kmap_atomic(page); - unsigned nameoff = le16_to_cpu(de->nameoff); - - ndirents = nameoff / sizeof(*de); + const int nameoff = nameoff_from_disk(de->nameoff, + EROFS_BLKSIZ); + const int ndirents = nameoff / sizeof(*de); + int diff; + unsigned int matched; + struct erofs_qstr dname; - /* corrupted dir (should have one entry at least) */ - BUG_ON(!ndirents || nameoff > PAGE_SIZE); + if (unlikely(!ndirents)) { + DBG_BUGON(1); + kunmap_atomic(de); + put_page(page); + page = ERR_PTR(-EIO); + goto out; + } matched = min(startprfx, endprfx); dname.name = (u8 *)de + nameoff; - dname.len = ndirents == 1 ? - /* since the rest of the last page is 0 */ - EROFS_BLKSIZ - nameoff - : le16_to_cpu(de[1].nameoff) - nameoff; + if (ndirents == 1) + dname.end = (u8 *)de + EROFS_BLKSIZ; + else + dname.end = (u8 *)de + + nameoff_from_disk(de[1].nameoff, + EROFS_BLKSIZ); /* string comparison without already matched prefix */ diff = dirnamecmp(name, &dname, &matched); kunmap_atomic(de); if (unlikely(!diff)) { - *_diff = 0; - goto exact_out; + *_ndirents = 0; + goto out; } else if (diff > 0) { head = mid + 1; startprfx = matched; @@ -147,45 +152,51 @@ exact_out: if (likely(!IS_ERR(candidate))) put_page(candidate); candidate = page; + *_ndirents = ndirents; } else { put_page(page); - if (unlikely(mid < 1)) /* fix "mid" overflow */ - break; - back = mid - 1; endprfx = matched; } + continue; } +out: /* free if the candidate is valid */ + if (!IS_ERR(candidate)) + put_page(candidate); + return page; } - *_diff = 1; return candidate; } int erofs_namei(struct inode *dir, - struct qstr *name, - erofs_nid_t *nid, unsigned *d_type) + struct qstr *name, + erofs_nid_t *nid, unsigned int *d_type) { - int diff; + int ndirents; struct page *page; - u8 *data; + void *data; struct erofs_dirent *de; + struct erofs_qstr qn; if (unlikely(!dir->i_size)) return -ENOENT; - diff = 1; - page = find_target_block_classic(dir, name, &diff); + qn.name = name->name; + qn.end = name->name + name->len; + + ndirents = 0; + page = find_target_block_classic(dir, &qn, &ndirents); if (unlikely(IS_ERR(page))) return PTR_ERR(page); data = kmap_atomic(page); /* the target page has been mapped */ - de = likely(diff) ? - /* since the rest of the last page is 0 */ - find_target_dirent(name, data, EROFS_BLKSIZ) : - (struct erofs_dirent *)data; + if (ndirents) + de = find_target_dirent(&qn, data, EROFS_BLKSIZ, ndirents); + else + de = (struct erofs_dirent *)data; if (likely(!IS_ERR(de))) { *nid = le64_to_cpu(de->nid); -- cgit v1.2.3 From b4d965a37d89cd8611984ea16c85903d01ac967a Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Wed, 24 Oct 2018 11:50:33 +1000 Subject: cifs: allow calling SMB2_xxx_free(NULL) commit 32a1fb36f6e50183871c2c1fcf5493c633e84732 upstream. Change these free functions to allow passing NULL as the argument and treat it as a no-op just like free(NULL) would. Or, if rqst->rq_iov is NULL. The second scenario could happen for smb2_queryfs() if the call to SMB2_query_info_init() fails and we go to qfs_exit to clean up and free all resources. In that case we have not yet assigned rqst[2].rq_iov and thus the rq_iov dereference in SMB2_close_free() will cause a NULL pointer dereference. [ bp: upstream patch also fixes SMB2_set_info_free which was introduced in 4.20 ] Fixes: 1eb9fb52040f ("cifs: create SMB2_open_init()/SMB2_open_free() helpers") Signed-off-by: Ronnie Sahlberg Signed-off-by: Steve French Reviewed-by: Aurelien Aptel CC: Stable Signed-off-by: Greg Kroah-Hartman --- fs/cifs/smb2pdu.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 1e5a1171212f..a2d701775c49 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -2243,10 +2243,12 @@ SMB2_open_free(struct smb_rqst *rqst) { int i; - cifs_small_buf_release(rqst->rq_iov[0].iov_base); - for (i = 1; i < rqst->rq_nvec; i++) - if (rqst->rq_iov[i].iov_base != smb2_padding) - kfree(rqst->rq_iov[i].iov_base); + if (rqst && rqst->rq_iov) { + cifs_small_buf_release(rqst->rq_iov[0].iov_base); + for (i = 1; i < rqst->rq_nvec; i++) + if (rqst->rq_iov[i].iov_base != smb2_padding) + kfree(rqst->rq_iov[i].iov_base); + } } int @@ -2535,7 +2537,8 @@ SMB2_close_init(struct cifs_tcon *tcon, struct smb_rqst *rqst, void SMB2_close_free(struct smb_rqst *rqst) { - cifs_small_buf_release(rqst->rq_iov[0].iov_base); /* request */ + if (rqst && rqst->rq_iov) + cifs_small_buf_release(rqst->rq_iov[0].iov_base); /* request */ } int @@ -2685,7 +2688,8 @@ SMB2_query_info_init(struct cifs_tcon *tcon, struct smb_rqst *rqst, void SMB2_query_info_free(struct smb_rqst *rqst) { - cifs_small_buf_release(rqst->rq_iov[0].iov_base); /* request */ + if (rqst && rqst->rq_iov) + cifs_small_buf_release(rqst->rq_iov[0].iov_base); /* request */ } static int -- cgit v1.2.3 From adaa310f8a0184fe6b9108c935c810f4c877a487 Mon Sep 17 00:00:00 2001 From: "Daniel F. Dickinson" Date: Sat, 22 Dec 2018 01:09:13 -0500 Subject: ath9k: Avoid OF no-EEPROM quirks without qca,no-eeprom commit ce938231bd3b1d7af3cbd8836f084801090470e1 upstream. ath9k_of_init() function[0] was initially written on the assumption that if someone had an explicit ath9k OF node that "there must be something wrong, why would someone add an OF node if everything is fine"[1] (Quoting Martin Blumenstingl ) "it turns out it's not that simple. with your requirements I'm now aware of two use-cases where the current code in ath9k_of_init() doesn't work without modifications"[1] The "your requirements" Martin speaks of is the result of the fact that I have a device (PowerCloud Systems CR5000) has some kind of default - not unique mac address - set and requires to set the correct MAC address via mac-address devicetree property, however: "some cards come with a physical EEPROM chip [or OTP] so "qca,no-eeprom" should not be set (your use-case). in this case AH_USE_EEPROM should be set (which is the default when there is no OF node)"[1] The other use case is: the firmware on some PowerMac G5 seems to add a OF node for the ath9k card automatically. depending on the EEPROM on the card AH_NO_EEP_SWAP should be unset (which is the default when there is no OF node). see [3] After this patch to ath9k_of_init() the new behavior will be: if there's no OF node then everything is the same as before if there's an empty OF node then ath9k will use the hardware EEPROM (before ath9k would fail to initialize because no EEPROM data was provided by userspace) if there's an OF node with only a MAC address then ath9k will use the MAC address and the hardware EEPROM (see the case above) with "qca,no-eeprom" EEPROM data from userspace will be requested. the behavior here will not change [1] Martin provides additional background on EEPROM swapping[1]. Thanks to Christian Lamparter for all his help on troubleshooting this issue and the basis for this patch. [0]https://elixir.bootlin.com/linux/v4.20-rc7/source/drivers/net/wireless/ath/ath9k/init.c#L615 [1]https://github.com/openwrt/openwrt/pull/1645#issuecomment-448027058 [2]https://github.com/openwrt/openwrt/pull/1613 [3]https://patchwork.kernel.org/patch/10241731/ Fixes: 138b41253d9c ("ath9k: parse the device configuration from an OF node") Reviewed-by: Martin Blumenstingl Tested-by: Martin Blumenstingl Signed-off-by: Daniel F. Dickinson Signed-off-by: Kalle Valo Cc: Christian Lamparter Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath9k/init.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/init.c b/drivers/net/wireless/ath/ath9k/init.c index c070a9e51ebf..fae572b38416 100644 --- a/drivers/net/wireless/ath/ath9k/init.c +++ b/drivers/net/wireless/ath/ath9k/init.c @@ -636,15 +636,15 @@ static int ath9k_of_init(struct ath_softc *sc) ret = ath9k_eeprom_request(sc, eeprom_name); if (ret) return ret; + + ah->ah_flags &= ~AH_USE_EEPROM; + ah->ah_flags |= AH_NO_EEP_SWAP; } mac = of_get_mac_address(np); if (mac) ether_addr_copy(common->macaddr, mac); - ah->ah_flags &= ~AH_USE_EEPROM; - ah->ah_flags |= AH_NO_EEP_SWAP; - return 0; } -- cgit v1.2.3 From 7053a6faf019f989f214cefb3c580e62ae5585cc Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 7 Feb 2019 20:36:53 +0100 Subject: driver core: Postpone DMA tear-down until after devres release MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 376991db4b6464e906d699ef07681e2ffa8ab08c upstream. When unbinding the (IOMMU-enabled) R-Car SATA device on Salvator-XS (R-Car H3 ES2.0), in preparation of rebinding against vfio-platform for device pass-through for virtualization:     echo ee300000.sata > /sys/bus/platform/drivers/sata_rcar/unbind the kernel crashes with:     Unable to handle kernel paging request at virtual address ffffffbf029ffffc     Mem abort info:       ESR = 0x96000006       Exception class = DABT (current EL), IL = 32 bits       SET = 0, FnV = 0       EA = 0, S1PTW = 0     Data abort info:       ISV = 0, ISS = 0x00000006       CM = 0, WnR = 0     swapper pgtable: 4k pages, 39-bit VAs, pgdp = 000000007e8c586c     [ffffffbf029ffffc] pgd=000000073bfc6003, pud=000000073bfc6003, pmd=0000000000000000     Internal error: Oops: 96000006 [#1] SMP     Modules linked in:     CPU: 0 PID: 1098 Comm: bash Not tainted 5.0.0-rc5-salvator-x-00452-g37596f884f4318ef #287     Hardware name: Renesas Salvator-X 2nd version board based on r8a7795 ES2.0+ (DT)     pstate: 60400005 (nZCv daif +PAN -UAO)     pc : __free_pages+0x8/0x58     lr : __dma_direct_free_pages+0x50/0x5c     sp : ffffff801268baa0     x29: ffffff801268baa0 x28: 0000000000000000     x27: ffffffc6f9c60bf0 x26: ffffffc6f9c60bf0     x25: ffffffc6f9c60810 x24: 0000000000000000     x23: 00000000fffff000 x22: ffffff8012145000     x21: 0000000000000800 x20: ffffffbf029fffc8     x19: 0000000000000000 x18: ffffffc6f86c42c8     x17: 0000000000000000 x16: 0000000000000070     x15: 0000000000000003 x14: 0000000000000000     x13: ffffff801103d7f8 x12: 0000000000000028     x11: ffffff8011117604 x10: 0000000000009ad8     x9 : ffffff80110126d0 x8 : ffffffc6f7563000     x7 : 6b6b6b6b6b6b6b6b x6 : 0000000000000018     x5 : ffffff8011cf3cc8 x4 : 0000000000004000     x3 : 0000000000080000 x2 : 0000000000000001     x1 : 0000000000000000 x0 : ffffffbf029fffc8     Process bash (pid: 1098, stack limit = 0x00000000c38e3e32)     Call trace:      __free_pages+0x8/0x58      __dma_direct_free_pages+0x50/0x5c      arch_dma_free+0x1c/0x98      dma_direct_free+0x14/0x24      dma_free_attrs+0x9c/0xdc      dmam_release+0x18/0x20      release_nodes+0x25c/0x28c      devres_release_all+0x48/0x4c      device_release_driver_internal+0x184/0x1f0      device_release_driver+0x14/0x1c      unbind_store+0x70/0xb8      drv_attr_store+0x24/0x34      sysfs_kf_write+0x4c/0x64      kernfs_fop_write+0x154/0x1c4      __vfs_write+0x34/0x164      vfs_write+0xb4/0x16c      ksys_write+0x5c/0xbc      __arm64_sys_write+0x14/0x1c      el0_svc_common+0x98/0x114      el0_svc_handler+0x1c/0x24      el0_svc+0x8/0xc     Code: d51b4234 17fffffa a9bf7bfd 910003fd (b9403404)     ---[ end trace 8c564cdd3a1a840f ]--- While I've bisected this to commit e8e683ae9a736407 ("iommu/of: Fix probe-deferral"), and reverting that commit on post-v5.0-rc4 kernels does fix the problem, this turned out to be a red herring. On arm64, arch_teardown_dma_ops() resets dev->dma_ops to NULL. Hence if a driver has used a managed DMA allocation API, the allocated DMA memory will be freed using the direct DMA ops, while it may have been allocated using a custom DMA ops (iommu_dma_ops in this case). Fix this by reversing the order of the calls to devres_release_all() and arch_teardown_dma_ops(). Signed-off-by: Geert Uytterhoeven Acked-by: Christoph Hellwig Reviewed-by: Rafael J. Wysocki Cc: stable Reviewed-by: Robin Murphy [rm: backport for 4.12-4.19 - kernels before 5.0 will not see the crash above, but may get silent memory corruption instead] Signed-off-by: Robin Murphy Signed-off-by: Greg Kroah-Hartman --- drivers/base/dd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/dd.c b/drivers/base/dd.c index 7caa1adaf62a..f5b74856784a 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -963,9 +963,9 @@ static void __device_release_driver(struct device *dev, struct device *parent) drv->remove(dev); device_links_driver_cleanup(dev); - dma_deconfigure(dev); devres_release_all(dev); + dma_deconfigure(dev); dev->driver = NULL; dev_set_drvdata(dev, NULL); if (dev->pm_domain && dev->pm_domain->dismiss) -- cgit v1.2.3 From f99f7dae782e8fe1b5d3a93bd08d3073a67ebd12 Mon Sep 17 00:00:00 2001 From: "Peter Zijlstra (Intel)" Date: Tue, 5 Mar 2019 22:23:15 +0100 Subject: perf/x86/intel: Make cpuc allocations consistent commit d01b1f96a82e5dd7841a1d39db3abfdaf95f70ab upstream The cpuc data structure allocation is different between fake and real cpuc's; use the same code to init/free both. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/events/core.c | 13 +++++-------- arch/x86/events/intel/core.c | 29 ++++++++++++++++++----------- arch/x86/events/perf_event.h | 11 ++++++++--- 3 files changed, 31 insertions(+), 22 deletions(-) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index c04a8813cff9..a41554350893 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -1970,7 +1970,7 @@ static int x86_pmu_commit_txn(struct pmu *pmu) */ static void free_fake_cpuc(struct cpu_hw_events *cpuc) { - kfree(cpuc->shared_regs); + intel_cpuc_finish(cpuc); kfree(cpuc); } @@ -1982,14 +1982,11 @@ static struct cpu_hw_events *allocate_fake_cpuc(void) cpuc = kzalloc(sizeof(*cpuc), GFP_KERNEL); if (!cpuc) return ERR_PTR(-ENOMEM); - - /* only needed, if we have extra_regs */ - if (x86_pmu.extra_regs) { - cpuc->shared_regs = allocate_shared_regs(cpu); - if (!cpuc->shared_regs) - goto error; - } cpuc->is_fake = 1; + + if (intel_cpuc_prepare(cpuc, cpu)) + goto error; + return cpuc; error: free_fake_cpuc(cpuc); diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index fbd7551a8d44..1e357b597687 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3282,7 +3282,7 @@ ssize_t intel_event_sysfs_show(char *page, u64 config) return x86_event_sysfs_show(page, config, event); } -struct intel_shared_regs *allocate_shared_regs(int cpu) +static struct intel_shared_regs *allocate_shared_regs(int cpu) { struct intel_shared_regs *regs; int i; @@ -3314,10 +3314,9 @@ static struct intel_excl_cntrs *allocate_excl_cntrs(int cpu) return c; } -static int intel_pmu_cpu_prepare(int cpu) -{ - struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); +int intel_cpuc_prepare(struct cpu_hw_events *cpuc, int cpu) +{ if (x86_pmu.extra_regs || x86_pmu.lbr_sel_map) { cpuc->shared_regs = allocate_shared_regs(cpu); if (!cpuc->shared_regs) @@ -3327,7 +3326,7 @@ static int intel_pmu_cpu_prepare(int cpu) if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) { size_t sz = X86_PMC_IDX_MAX * sizeof(struct event_constraint); - cpuc->constraint_list = kzalloc(sz, GFP_KERNEL); + cpuc->constraint_list = kzalloc_node(sz, GFP_KERNEL, cpu_to_node(cpu)); if (!cpuc->constraint_list) goto err_shared_regs; @@ -3352,6 +3351,11 @@ err: return -ENOMEM; } +static int intel_pmu_cpu_prepare(int cpu) +{ + return intel_cpuc_prepare(&per_cpu(cpu_hw_events, cpu), cpu); +} + static void flip_smm_bit(void *data) { unsigned long set = *(unsigned long *)data; @@ -3423,9 +3427,8 @@ static void intel_pmu_cpu_starting(int cpu) } } -static void free_excl_cntrs(int cpu) +static void free_excl_cntrs(struct cpu_hw_events *cpuc) { - struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); struct intel_excl_cntrs *c; c = cpuc->excl_cntrs; @@ -3443,9 +3446,8 @@ static void intel_pmu_cpu_dying(int cpu) fini_debug_store_on_cpu(cpu); } -static void intel_pmu_cpu_dead(int cpu) +void intel_cpuc_finish(struct cpu_hw_events *cpuc) { - struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); struct intel_shared_regs *pc; pc = cpuc->shared_regs; @@ -3455,7 +3457,12 @@ static void intel_pmu_cpu_dead(int cpu) cpuc->shared_regs = NULL; } - free_excl_cntrs(cpu); + free_excl_cntrs(cpuc); +} + +static void intel_pmu_cpu_dead(int cpu) +{ + intel_cpuc_finish(&per_cpu(cpu_hw_events, cpu)); } static void intel_pmu_sched_task(struct perf_event_context *ctx, @@ -4515,7 +4522,7 @@ static __init int fixup_ht_bug(void) hardlockup_detector_perf_restart(); for_each_online_cpu(c) - free_excl_cntrs(c); + free_excl_cntrs(&per_cpu(cpu_hw_events, c)); cpus_read_unlock(); pr_info("PMU erratum BJ122, BV98, HSD29 workaround disabled, HT off\n"); diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 0ee3a441ad79..975095f750f6 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -887,7 +887,8 @@ struct event_constraint * x86_get_event_constraints(struct cpu_hw_events *cpuc, int idx, struct perf_event *event); -struct intel_shared_regs *allocate_shared_regs(int cpu); +extern int intel_cpuc_prepare(struct cpu_hw_events *cpuc, int cpu); +extern void intel_cpuc_finish(struct cpu_hw_events *cpuc); int intel_pmu_init(void); @@ -1023,9 +1024,13 @@ static inline int intel_pmu_init(void) return 0; } -static inline struct intel_shared_regs *allocate_shared_regs(int cpu) +static inline int intel_cpuc_prepare(struct cpu_hw_event *cpuc, int cpu) +{ + return 0; +} + +static inline void intel_cpuc_finish(struct cpu_hw_event *cpuc) { - return NULL; } static inline int is_ht_workaround_enabled(void) -- cgit v1.2.3 From 9e071aa6c28c6d8eda7e2ee3663abf3d561a66a8 Mon Sep 17 00:00:00 2001 From: "Peter Zijlstra (Intel)" Date: Tue, 5 Mar 2019 22:23:16 +0100 Subject: perf/x86/intel: Generalize dynamic constraint creation commit 11f8b2d65ca9029591c8df26bb6bd063c312b7fe upstream Such that we can re-use it. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/events/intel/core.c | 51 ++++++++++++++++++++++++++------------------ 1 file changed, 30 insertions(+), 21 deletions(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 1e357b597687..1392c502e963 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2652,6 +2652,35 @@ intel_stop_scheduling(struct cpu_hw_events *cpuc) raw_spin_unlock(&excl_cntrs->lock); } +static struct event_constraint * +dyn_constraint(struct cpu_hw_events *cpuc, struct event_constraint *c, int idx) +{ + WARN_ON_ONCE(!cpuc->constraint_list); + + if (!(c->flags & PERF_X86_EVENT_DYNAMIC)) { + struct event_constraint *cx; + + /* + * grab pre-allocated constraint entry + */ + cx = &cpuc->constraint_list[idx]; + + /* + * initialize dynamic constraint + * with static constraint + */ + *cx = *c; + + /* + * mark constraint as dynamic + */ + cx->flags |= PERF_X86_EVENT_DYNAMIC; + c = cx; + } + + return c; +} + static struct event_constraint * intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event, int idx, struct event_constraint *c) @@ -2682,27 +2711,7 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event, * only needed when constraint has not yet * been cloned (marked dynamic) */ - if (!(c->flags & PERF_X86_EVENT_DYNAMIC)) { - struct event_constraint *cx; - - /* - * grab pre-allocated constraint entry - */ - cx = &cpuc->constraint_list[idx]; - - /* - * initialize dynamic constraint - * with static constraint - */ - *cx = *c; - - /* - * mark constraint as dynamic, so we - * can free it later on - */ - cx->flags |= PERF_X86_EVENT_DYNAMIC; - c = cx; - } + c = dyn_constraint(cpuc, c, idx); /* * From here on, the constraint is dynamic. -- cgit v1.2.3 From fdd820949a745930738720c518df38e5dc99b3c2 Mon Sep 17 00:00:00 2001 From: "Peter Zijlstra (Intel)" Date: Tue, 5 Mar 2019 22:23:17 +0100 Subject: x86: Add TSX Force Abort CPUID/MSR commit 52f64909409c17adf54fcf5f9751e0544ca3a6b4 upstream Skylake systems will receive a microcode update to address a TSX errata. This microcode will (by default) clobber PMC3 when TSX instructions are (speculatively or not) executed. It also provides an MSR to cause all TSX transaction to abort and preserve PMC3. Add the CPUID enumeration and MSR definition. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/msr-index.h | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 89a048c2faec..7b31ee5223fc 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -340,6 +340,7 @@ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */ #define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ #define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ +#define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */ #define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ #define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ #define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 1f9de7635bcb..f14ca0be1e3f 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -629,6 +629,12 @@ #define MSR_IA32_TSC_DEADLINE 0x000006E0 + +#define MSR_TSX_FORCE_ABORT 0x0000010F + +#define MSR_TFA_RTM_FORCE_ABORT_BIT 0 +#define MSR_TFA_RTM_FORCE_ABORT BIT_ULL(MSR_TFA_RTM_FORCE_ABORT_BIT) + /* P4/Xeon+ specific */ #define MSR_IA32_MCG_EAX 0x00000180 #define MSR_IA32_MCG_EBX 0x00000181 -- cgit v1.2.3 From 26b6e018e558e94de73b35b0c5e18c3efd3634c3 Mon Sep 17 00:00:00 2001 From: "Peter Zijlstra (Intel)" Date: Tue, 5 Mar 2019 22:23:18 +0100 Subject: perf/x86/intel: Implement support for TSX Force Abort commit 400816f60c543153656ac74eaf7f36f6b7202378 upstream Skylake (and later) will receive a microcode update to address a TSX errata. This microcode will, on execution of a TSX instruction (speculative or not) use (clobber) PMC3. This update will also provide a new MSR to change this behaviour along with a CPUID bit to enumerate the presence of this new MSR. When the MSR gets set; the microcode will no longer use PMC3 but will Force Abort every TSX transaction (upon executing COMMIT). When TSX Force Abort (TFA) is allowed (default); the MSR gets set when PMC3 gets scheduled and cleared when, after scheduling, PMC3 is unused. When TFA is not allowed; clear PMC3 from all constraints such that it will not get used. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/events/intel/core.c | 74 ++++++++++++++++++++++++++++++++++++++++++-- arch/x86/events/perf_event.h | 6 ++++ 2 files changed, 77 insertions(+), 3 deletions(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 1392c502e963..220b40b75e6f 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -1995,6 +1995,39 @@ static void intel_pmu_nhm_enable_all(int added) intel_pmu_enable_all(added); } +static void intel_set_tfa(struct cpu_hw_events *cpuc, bool on) +{ + u64 val = on ? MSR_TFA_RTM_FORCE_ABORT : 0; + + if (cpuc->tfa_shadow != val) { + cpuc->tfa_shadow = val; + wrmsrl(MSR_TSX_FORCE_ABORT, val); + } +} + +static void intel_tfa_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int cntr) +{ + /* + * We're going to use PMC3, make sure TFA is set before we touch it. + */ + if (cntr == 3 && !cpuc->is_fake) + intel_set_tfa(cpuc, true); +} + +static void intel_tfa_pmu_enable_all(int added) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + + /* + * If we find PMC3 is no longer used when we enable the PMU, we can + * clear TFA. + */ + if (!test_bit(3, cpuc->active_mask)) + intel_set_tfa(cpuc, false); + + intel_pmu_enable_all(added); +} + static inline u64 intel_pmu_get_status(void) { u64 status; @@ -3238,6 +3271,26 @@ glp_get_event_constraints(struct cpu_hw_events *cpuc, int idx, return c; } +static bool allow_tsx_force_abort = true; + +static struct event_constraint * +tfa_get_event_constraints(struct cpu_hw_events *cpuc, int idx, + struct perf_event *event) +{ + struct event_constraint *c = hsw_get_event_constraints(cpuc, idx, event); + + /* + * Without TFA we must not use PMC3. + */ + if (!allow_tsx_force_abort && test_bit(3, c->idxmsk)) { + c = dyn_constraint(cpuc, c, idx); + c->idxmsk64 &= ~(1ULL << 3); + c->weight--; + } + + return c; +} + /* * Broadwell: * @@ -3332,13 +3385,15 @@ int intel_cpuc_prepare(struct cpu_hw_events *cpuc, int cpu) goto err; } - if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) { + if (x86_pmu.flags & (PMU_FL_EXCL_CNTRS | PMU_FL_TFA)) { size_t sz = X86_PMC_IDX_MAX * sizeof(struct event_constraint); cpuc->constraint_list = kzalloc_node(sz, GFP_KERNEL, cpu_to_node(cpu)); if (!cpuc->constraint_list) goto err_shared_regs; + } + if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) { cpuc->excl_cntrs = allocate_excl_cntrs(cpu); if (!cpuc->excl_cntrs) goto err_constraint_list; @@ -3445,9 +3500,10 @@ static void free_excl_cntrs(struct cpu_hw_events *cpuc) if (c->core_id == -1 || --c->refcnt == 0) kfree(c); cpuc->excl_cntrs = NULL; - kfree(cpuc->constraint_list); - cpuc->constraint_list = NULL; } + + kfree(cpuc->constraint_list); + cpuc->constraint_list = NULL; } static void intel_pmu_cpu_dying(int cpu) @@ -3933,8 +3989,11 @@ static struct attribute *intel_pmu_caps_attrs[] = { NULL }; +DEVICE_BOOL_ATTR(allow_tsx_force_abort, 0644, allow_tsx_force_abort); + static struct attribute *intel_pmu_attrs[] = { &dev_attr_freeze_on_smi.attr, + NULL, /* &dev_attr_allow_tsx_force_abort.attr.attr */ NULL, }; @@ -4390,6 +4449,15 @@ __init int intel_pmu_init(void) x86_pmu.cpu_events = get_hsw_events_attrs(); intel_pmu_pebs_data_source_skl( boot_cpu_data.x86_model == INTEL_FAM6_SKYLAKE_X); + + if (boot_cpu_has(X86_FEATURE_TSX_FORCE_ABORT)) { + x86_pmu.flags |= PMU_FL_TFA; + x86_pmu.get_event_constraints = tfa_get_event_constraints; + x86_pmu.enable_all = intel_tfa_pmu_enable_all; + x86_pmu.commit_scheduling = intel_tfa_commit_scheduling; + intel_pmu_attrs[1] = &dev_attr_allow_tsx_force_abort.attr.attr; + } + pr_cont("Skylake events, "); name = "skylake"; break; diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 975095f750f6..5c424009b71f 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -242,6 +242,11 @@ struct cpu_hw_events { struct intel_excl_cntrs *excl_cntrs; int excl_thread_id; /* 0 or 1 */ + /* + * SKL TSX_FORCE_ABORT shadow + */ + u64 tfa_shadow; + /* * AMD specific bits */ @@ -679,6 +684,7 @@ do { \ #define PMU_FL_EXCL_CNTRS 0x4 /* has exclusive counter requirements */ #define PMU_FL_EXCL_ENABLED 0x8 /* exclusive counter active */ #define PMU_FL_PEBS_ALL 0x10 /* all events are valid PEBS events */ +#define PMU_FL_TFA 0x20 /* deal with TSX force abort */ #define EVENT_VAR(_id) event_attr_##_id #define EVENT_PTR(_id) &event_attr_##_id.attr.attr -- cgit v1.2.3 From ce194fa2b267e2018f42442347d90df01c4071d6 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 13 Mar 2019 14:02:41 -0700 Subject: Linux 4.19.29 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index c6ac023ba33a..6e526583291c 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 19 -SUBLEVEL = 28 +SUBLEVEL = 29 EXTRAVERSION = NAME = "People's Front" -- cgit v1.2.3