From c126a53c276048125b4a950072bab37ad0fea120 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Sat, 14 Aug 2021 14:17:00 -0700 Subject: arch: remove GENERIC_FIND_FIRST_BIT entirely In 5.12 cycle we enabled GENERIC_FIND_FIRST_BIT config option for ARM64 and MIPS. It increased performance and shrunk .text size; and so far I didn't receive any negative feedback on the change. https://lore.kernel.org/linux-arch/20210225135700.1381396-1-yury.norov@gmail.com/ Now I think it's a good time to switch all architectures to use find_{first,last}_bit() unconditionally, and so remove corresponding config option. The patch does't introduce functioal changes for arc, arm, arm64, mips, m68k, s390 and x86, for other architectures I expect improvement both in performance and .text size. Signed-off-by: Yury Norov Tested-by: Alexander Lobakin (mips) Reviewed-by: Alexander Lobakin (mips) Reviewed-by: Andy Shevchenko Acked-by: Will Deacon Tested-by: Wolfram Sang --- lib/Kconfig | 3 --- 1 file changed, 3 deletions(-) (limited to 'lib') diff --git a/lib/Kconfig b/lib/Kconfig index 5e7165e6a346..6a6ae5312fa0 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -65,9 +65,6 @@ config GENERIC_STRNLEN_USER config GENERIC_NET_UTILS bool -config GENERIC_FIND_FIRST_BIT - bool - source "lib/math/Kconfig" config NO_GENERIC_PCI_IOPORT_MAP -- cgit v1.2.3 From f68edc9297bf3f7c94abb54b9b0b053607f7587b Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Sat, 14 Aug 2021 14:17:01 -0700 Subject: lib: add find_first_and_bit() Currently find_first_and_bit() is an alias to find_next_and_bit(). However, it is widely used in cpumask, so it worth to optimize it. This patch adds its own implementation for find_first_and_bit(). On x86_64 find_bit_benchmark says: Before (#define find_first_and_bit(...) find_next_and_bit(..., 0): Start testing find_bit() with random-filled bitmap [ 140.291468] find_first_and_bit: 46890919 ns, 32671 iterations Start testing find_bit() with sparse bitmap [ 140.295028] find_first_and_bit: 7103 ns, 1 iterations After: Start testing find_bit() with random-filled bitmap [ 162.574907] find_first_and_bit: 25045813 ns, 32846 iterations Start testing find_bit() with sparse bitmap [ 162.578458] find_first_and_bit: 4900 ns, 1 iterations (Thanks to Alexey Klimov for thorough testing.) Signed-off-by: Yury Norov Tested-by: Wolfram Sang Tested-by: Alexey Klimov --- include/linux/find.h | 27 +++++++++++++++++++++++++++ lib/find_bit.c | 21 +++++++++++++++++++++ lib/find_bit_benchmark.c | 21 +++++++++++++++++++++ 3 files changed, 69 insertions(+) (limited to 'lib') diff --git a/include/linux/find.h b/include/linux/find.h index ea57f7f38c49..6048f8c97418 100644 --- a/include/linux/find.h +++ b/include/linux/find.h @@ -12,6 +12,8 @@ extern unsigned long _find_next_bit(const unsigned long *addr1, const unsigned long *addr2, unsigned long nbits, unsigned long start, unsigned long invert, unsigned long le); extern unsigned long _find_first_bit(const unsigned long *addr, unsigned long size); +extern unsigned long _find_first_and_bit(const unsigned long *addr1, + const unsigned long *addr2, unsigned long size); extern unsigned long _find_first_zero_bit(const unsigned long *addr, unsigned long size); extern unsigned long _find_last_bit(const unsigned long *addr, unsigned long size); @@ -123,6 +125,31 @@ unsigned long find_first_bit(const unsigned long *addr, unsigned long size) } #endif +#ifndef find_first_and_bit +/** + * find_first_and_bit - find the first set bit in both memory regions + * @addr1: The first address to base the search on + * @addr2: The second address to base the search on + * @size: The bitmap size in bits + * + * Returns the bit number for the next set bit + * If no bits are set, returns @size. + */ +static inline +unsigned long find_first_and_bit(const unsigned long *addr1, + const unsigned long *addr2, + unsigned long size) +{ + if (small_const_nbits(size)) { + unsigned long val = *addr1 & *addr2 & GENMASK(size - 1, 0); + + return val ? __ffs(val) : size; + } + + return _find_first_and_bit(addr1, addr2, size); +} +#endif + #ifndef find_first_zero_bit /** * find_first_zero_bit - find the first cleared bit in a memory region diff --git a/lib/find_bit.c b/lib/find_bit.c index 0f8e2e369b1d..1b8e4b2a9cba 100644 --- a/lib/find_bit.c +++ b/lib/find_bit.c @@ -89,6 +89,27 @@ unsigned long _find_first_bit(const unsigned long *addr, unsigned long size) EXPORT_SYMBOL(_find_first_bit); #endif +#ifndef find_first_and_bit +/* + * Find the first set bit in two memory regions. + */ +unsigned long _find_first_and_bit(const unsigned long *addr1, + const unsigned long *addr2, + unsigned long size) +{ + unsigned long idx, val; + + for (idx = 0; idx * BITS_PER_LONG < size; idx++) { + val = addr1[idx] & addr2[idx]; + if (val) + return min(idx * BITS_PER_LONG + __ffs(val), size); + } + + return size; +} +EXPORT_SYMBOL(_find_first_and_bit); +#endif + #ifndef find_first_zero_bit /* * Find the first cleared bit in a memory region. diff --git a/lib/find_bit_benchmark.c b/lib/find_bit_benchmark.c index 5637c5711db9..db904b57d4b8 100644 --- a/lib/find_bit_benchmark.c +++ b/lib/find_bit_benchmark.c @@ -49,6 +49,25 @@ static int __init test_find_first_bit(void *bitmap, unsigned long len) return 0; } +static int __init test_find_first_and_bit(void *bitmap, const void *bitmap2, unsigned long len) +{ + static DECLARE_BITMAP(cp, BITMAP_LEN) __initdata; + unsigned long i, cnt; + ktime_t time; + + bitmap_copy(cp, bitmap, BITMAP_LEN); + + time = ktime_get(); + for (cnt = i = 0; i < len; cnt++) { + i = find_first_and_bit(cp, bitmap2, len); + __clear_bit(i, cp); + } + time = ktime_get() - time; + pr_err("find_first_and_bit: %18llu ns, %6ld iterations\n", time, cnt); + + return 0; +} + static int __init test_find_next_bit(const void *bitmap, unsigned long len) { unsigned long i, cnt; @@ -129,6 +148,7 @@ static int __init find_bit_test(void) * traverse only part of bitmap to avoid soft lockup. */ test_find_first_bit(bitmap, BITMAP_LEN / 10); + test_find_first_and_bit(bitmap, bitmap2, BITMAP_LEN / 2); test_find_next_and_bit(bitmap, bitmap2, BITMAP_LEN); pr_err("\nStart testing find_bit() with sparse bitmap\n"); @@ -145,6 +165,7 @@ static int __init find_bit_test(void) test_find_next_zero_bit(bitmap, BITMAP_LEN); test_find_last_bit(bitmap, BITMAP_LEN); test_find_first_bit(bitmap, BITMAP_LEN); + test_find_first_and_bit(bitmap, bitmap2, BITMAP_LEN); test_find_next_and_bit(bitmap, bitmap2, BITMAP_LEN); /* -- cgit v1.2.3 From b5c7e7ec7d3418af2544452b45cc67297c857a86 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Sat, 14 Aug 2021 14:17:03 -0700 Subject: all: replace find_next{,_zero}_bit with find_first{,_zero}_bit where appropriate find_first{,_zero}_bit is a more effective analogue of 'next' version if start == 0. This patch replaces 'next' with 'first' where things look trivial. Signed-off-by: Yury Norov Tested-by: Wolfram Sang --- arch/powerpc/platforms/pasemi/dma_lib.c | 4 ++-- arch/s390/kvm/kvm-s390.c | 2 +- drivers/block/rnbd/rnbd-clt.c | 2 +- drivers/dma/ti/edma.c | 2 +- drivers/iio/adc/ad7124.c | 2 +- drivers/infiniband/hw/irdma/hw.c | 16 ++++++++-------- drivers/media/cec/core/cec-core.c | 2 +- drivers/media/mc/mc-devnode.c | 2 +- drivers/pci/controller/dwc/pci-dra7xx.c | 2 +- drivers/scsi/lpfc/lpfc_sli.c | 10 +++++----- drivers/soc/ti/k3-ringacc.c | 4 ++-- drivers/tty/n_tty.c | 2 +- drivers/virt/acrn/ioreq.c | 3 +-- fs/f2fs/segment.c | 8 ++++---- fs/ocfs2/cluster/heartbeat.c | 2 +- fs/ocfs2/dlm/dlmdomain.c | 4 ++-- fs/ocfs2/dlm/dlmmaster.c | 18 +++++++++--------- fs/ocfs2/dlm/dlmrecovery.c | 2 +- fs/ocfs2/dlm/dlmthread.c | 2 +- lib/genalloc.c | 2 +- net/ncsi/ncsi-manage.c | 4 ++-- 21 files changed, 47 insertions(+), 48 deletions(-) (limited to 'lib') diff --git a/arch/powerpc/platforms/pasemi/dma_lib.c b/arch/powerpc/platforms/pasemi/dma_lib.c index 270fa3c0d372..26427311fc72 100644 --- a/arch/powerpc/platforms/pasemi/dma_lib.c +++ b/arch/powerpc/platforms/pasemi/dma_lib.c @@ -375,7 +375,7 @@ int pasemi_dma_alloc_flag(void) int bit; retry: - bit = find_next_bit(flags_free, MAX_FLAGS, 0); + bit = find_first_bit(flags_free, MAX_FLAGS); if (bit >= MAX_FLAGS) return -ENOSPC; if (!test_and_clear_bit(bit, flags_free)) @@ -440,7 +440,7 @@ int pasemi_dma_alloc_fun(void) int bit; retry: - bit = find_next_bit(fun_free, MAX_FLAGS, 0); + bit = find_first_bit(fun_free, MAX_FLAGS); if (bit >= MAX_FLAGS) return -ENOSPC; if (!test_and_clear_bit(bit, fun_free)) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 14a18ba5ff2c..9aba96d621b9 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -2021,7 +2021,7 @@ static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots, while ((slotidx > 0) && (ofs >= ms->npages)) { slotidx--; ms = slots->memslots + slotidx; - ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0); + ofs = find_first_bit(kvm_second_dirty_bitmap(ms), ms->npages); } return ms->base_gfn + ofs; } diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c index 2df0657cdf00..cef1058ec2fd 100644 --- a/drivers/block/rnbd/rnbd-clt.c +++ b/drivers/block/rnbd/rnbd-clt.c @@ -196,7 +196,7 @@ rnbd_get_cpu_qlist(struct rnbd_clt_session *sess, int cpu) return per_cpu_ptr(sess->cpu_queues, bit); } else if (cpu != 0) { /* Search from 0 to cpu */ - bit = find_next_bit(sess->cpu_queues_bm, cpu, 0); + bit = find_first_bit(sess->cpu_queues_bm, cpu); if (bit < cpu) return per_cpu_ptr(sess->cpu_queues, bit); } diff --git a/drivers/dma/ti/edma.c b/drivers/dma/ti/edma.c index 35d81bd857f1..caa4050ecc02 100644 --- a/drivers/dma/ti/edma.c +++ b/drivers/dma/ti/edma.c @@ -1681,7 +1681,7 @@ static irqreturn_t dma_ccerr_handler(int irq, void *data) dev_dbg(ecc->dev, "EMR%d 0x%08x\n", j, val); emr = val; - for (i = find_next_bit(&emr, 32, 0); i < 32; + for (i = find_first_bit(&emr, 32); i < 32; i = find_next_bit(&emr, 32, i + 1)) { int k = (j << 5) + i; diff --git a/drivers/iio/adc/ad7124.c b/drivers/iio/adc/ad7124.c index e45c600fccc0..bc2cfa5f9592 100644 --- a/drivers/iio/adc/ad7124.c +++ b/drivers/iio/adc/ad7124.c @@ -347,7 +347,7 @@ static int ad7124_find_free_config_slot(struct ad7124_state *st) { unsigned int free_cfg_slot; - free_cfg_slot = find_next_zero_bit(&st->cfg_slots_status, AD7124_MAX_CONFIGS, 0); + free_cfg_slot = find_first_zero_bit(&st->cfg_slots_status, AD7124_MAX_CONFIGS); if (free_cfg_slot == AD7124_MAX_CONFIGS) return -1; diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c index b4c657f5f2f9..e2dbd6b1ffcd 100644 --- a/drivers/infiniband/hw/irdma/hw.c +++ b/drivers/infiniband/hw/irdma/hw.c @@ -1709,14 +1709,14 @@ clean_msixtbl: */ static void irdma_get_used_rsrc(struct irdma_device *iwdev) { - iwdev->rf->used_pds = find_next_zero_bit(iwdev->rf->allocated_pds, - iwdev->rf->max_pd, 0); - iwdev->rf->used_qps = find_next_zero_bit(iwdev->rf->allocated_qps, - iwdev->rf->max_qp, 0); - iwdev->rf->used_cqs = find_next_zero_bit(iwdev->rf->allocated_cqs, - iwdev->rf->max_cq, 0); - iwdev->rf->used_mrs = find_next_zero_bit(iwdev->rf->allocated_mrs, - iwdev->rf->max_mr, 0); + iwdev->rf->used_pds = find_first_zero_bit(iwdev->rf->allocated_pds, + iwdev->rf->max_pd); + iwdev->rf->used_qps = find_first_zero_bit(iwdev->rf->allocated_qps, + iwdev->rf->max_qp); + iwdev->rf->used_cqs = find_first_zero_bit(iwdev->rf->allocated_cqs, + iwdev->rf->max_cq); + iwdev->rf->used_mrs = find_first_zero_bit(iwdev->rf->allocated_mrs, + iwdev->rf->max_mr); } void irdma_ctrl_deinit_hw(struct irdma_pci_f *rf) diff --git a/drivers/media/cec/core/cec-core.c b/drivers/media/cec/core/cec-core.c index 551689d371a7..7322e7cd9753 100644 --- a/drivers/media/cec/core/cec-core.c +++ b/drivers/media/cec/core/cec-core.c @@ -106,7 +106,7 @@ static int __must_check cec_devnode_register(struct cec_devnode *devnode, /* Part 1: Find a free minor number */ mutex_lock(&cec_devnode_lock); - minor = find_next_zero_bit(cec_devnode_nums, CEC_NUM_DEVICES, 0); + minor = find_first_zero_bit(cec_devnode_nums, CEC_NUM_DEVICES); if (minor == CEC_NUM_DEVICES) { mutex_unlock(&cec_devnode_lock); pr_err("could not get a free minor\n"); diff --git a/drivers/media/mc/mc-devnode.c b/drivers/media/mc/mc-devnode.c index f11382afe23b..680fbb3a9340 100644 --- a/drivers/media/mc/mc-devnode.c +++ b/drivers/media/mc/mc-devnode.c @@ -217,7 +217,7 @@ int __must_check media_devnode_register(struct media_device *mdev, /* Part 1: Find a free minor number */ mutex_lock(&media_devnode_lock); - minor = find_next_zero_bit(media_devnode_nums, MEDIA_NUM_DEVICES, 0); + minor = find_first_zero_bit(media_devnode_nums, MEDIA_NUM_DEVICES); if (minor == MEDIA_NUM_DEVICES) { mutex_unlock(&media_devnode_lock); pr_err("could not get a free minor\n"); diff --git a/drivers/pci/controller/dwc/pci-dra7xx.c b/drivers/pci/controller/dwc/pci-dra7xx.c index a4221f6f3629..279a6fa56584 100644 --- a/drivers/pci/controller/dwc/pci-dra7xx.c +++ b/drivers/pci/controller/dwc/pci-dra7xx.c @@ -213,7 +213,7 @@ static int dra7xx_pcie_handle_msi(struct pcie_port *pp, int index) if (!val) return 0; - pos = find_next_bit(&val, MAX_MSI_IRQS_PER_CTRL, 0); + pos = find_first_bit(&val, MAX_MSI_IRQS_PER_CTRL); while (pos != MAX_MSI_IRQS_PER_CTRL) { generic_handle_domain_irq(pp->irq_domain, (index * MAX_MSI_IRQS_PER_CTRL) + pos); diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 5dedb3de271d..77dfe293bf23 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -17990,8 +17990,8 @@ lpfc_sli4_alloc_xri(struct lpfc_hba *phba) * the driver starts at 0 each time. */ spin_lock_irq(&phba->hbalock); - xri = find_next_zero_bit(phba->sli4_hba.xri_bmask, - phba->sli4_hba.max_cfg_param.max_xri, 0); + xri = find_first_zero_bit(phba->sli4_hba.xri_bmask, + phba->sli4_hba.max_cfg_param.max_xri); if (xri >= phba->sli4_hba.max_cfg_param.max_xri) { spin_unlock_irq(&phba->hbalock); return NO_XRI; @@ -19668,7 +19668,7 @@ lpfc_sli4_alloc_rpi(struct lpfc_hba *phba) max_rpi = phba->sli4_hba.max_cfg_param.max_rpi; rpi_limit = phba->sli4_hba.next_rpi; - rpi = find_next_zero_bit(phba->sli4_hba.rpi_bmask, rpi_limit, 0); + rpi = find_first_zero_bit(phba->sli4_hba.rpi_bmask, rpi_limit); if (rpi >= rpi_limit) rpi = LPFC_RPI_ALLOC_ERROR; else { @@ -20311,8 +20311,8 @@ next_priority: * have been tested so that we can detect when we should * change the priority level. */ - next_fcf_index = find_next_bit(phba->fcf.fcf_rr_bmask, - LPFC_SLI4_FCF_TBL_INDX_MAX, 0); + next_fcf_index = find_first_bit(phba->fcf.fcf_rr_bmask, + LPFC_SLI4_FCF_TBL_INDX_MAX); } diff --git a/drivers/soc/ti/k3-ringacc.c b/drivers/soc/ti/k3-ringacc.c index 312ba0f98ad7..573be88f8191 100644 --- a/drivers/soc/ti/k3-ringacc.c +++ b/drivers/soc/ti/k3-ringacc.c @@ -358,8 +358,8 @@ struct k3_ring *k3_ringacc_request_ring(struct k3_ringacc *ringacc, goto out; if (flags & K3_RINGACC_RING_USE_PROXY) { - proxy_id = find_next_zero_bit(ringacc->proxy_inuse, - ringacc->num_proxies, 0); + proxy_id = find_first_zero_bit(ringacc->proxy_inuse, + ringacc->num_proxies); if (proxy_id == ringacc->num_proxies) goto error; } diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c index 5be6d02dc690..9fc2319a394d 100644 --- a/drivers/tty/n_tty.c +++ b/drivers/tty/n_tty.c @@ -1975,7 +1975,7 @@ static bool canon_copy_from_read_buf(struct tty_struct *tty, more = n - (size - tail); if (eol == N_TTY_BUF_SIZE && more) { /* scan wrapped without finding set bit */ - eol = find_next_bit(ldata->read_flags, more, 0); + eol = find_first_bit(ldata->read_flags, more); found = eol != more; } else found = eol != size; diff --git a/drivers/virt/acrn/ioreq.c b/drivers/virt/acrn/ioreq.c index 80b2e3f0e276..5ff1c53740c0 100644 --- a/drivers/virt/acrn/ioreq.c +++ b/drivers/virt/acrn/ioreq.c @@ -246,8 +246,7 @@ void acrn_ioreq_request_clear(struct acrn_vm *vm) spin_lock_bh(&vm->ioreq_clients_lock); client = vm->default_client; if (client) { - vcpu = find_next_bit(client->ioreqs_map, - ACRN_IO_REQUEST_MAX, 0); + vcpu = find_first_bit(client->ioreqs_map, ACRN_IO_REQUEST_MAX); while (vcpu < ACRN_IO_REQUEST_MAX) { acrn_ioreq_complete_request(client, vcpu, NULL); vcpu = find_next_bit(client->ioreqs_map, diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index df9ed75f0b7a..913552c98171 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2558,8 +2558,8 @@ find_other_zone: secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint); if (secno >= MAIN_SECS(sbi)) { if (dir == ALLOC_RIGHT) { - secno = find_next_zero_bit(free_i->free_secmap, - MAIN_SECS(sbi), 0); + secno = find_first_zero_bit(free_i->free_secmap, + MAIN_SECS(sbi)); f2fs_bug_on(sbi, secno >= MAIN_SECS(sbi)); } else { go_left = 1; @@ -2574,8 +2574,8 @@ find_other_zone: left_start--; continue; } - left_start = find_next_zero_bit(free_i->free_secmap, - MAIN_SECS(sbi), 0); + left_start = find_first_zero_bit(free_i->free_secmap, + MAIN_SECS(sbi)); f2fs_bug_on(sbi, left_start >= MAIN_SECS(sbi)); break; } diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index f89ffcbd585f..a17be1618bf7 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -379,7 +379,7 @@ static void o2hb_nego_timeout(struct work_struct *work) o2hb_fill_node_map(live_node_bitmap, sizeof(live_node_bitmap)); /* lowest node as master node to make negotiate decision. */ - master_node = find_next_bit(live_node_bitmap, O2NM_MAX_NODES, 0); + master_node = find_first_bit(live_node_bitmap, O2NM_MAX_NODES); if (master_node == o2nm_this_node()) { if (!test_bit(master_node, reg->hr_nego_node_bitmap)) { diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 9f90fc9551e1..c4eccd499db8 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c @@ -1045,7 +1045,7 @@ static int dlm_send_regions(struct dlm_ctxt *dlm, unsigned long *node_map) int status, ret = 0, i; char *p; - if (find_next_bit(node_map, O2NM_MAX_NODES, 0) >= O2NM_MAX_NODES) + if (find_first_bit(node_map, O2NM_MAX_NODES) >= O2NM_MAX_NODES) goto bail; qr = kzalloc(sizeof(struct dlm_query_region), GFP_KERNEL); @@ -1217,7 +1217,7 @@ static int dlm_send_nodeinfo(struct dlm_ctxt *dlm, unsigned long *node_map) struct o2nm_node *node; int ret = 0, status, count, i; - if (find_next_bit(node_map, O2NM_MAX_NODES, 0) >= O2NM_MAX_NODES) + if (find_first_bit(node_map, O2NM_MAX_NODES) >= O2NM_MAX_NODES) goto bail; qn = kzalloc(sizeof(struct dlm_query_nodeinfo), GFP_KERNEL); diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 9b88219febb5..227da5b1b6ab 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -861,7 +861,7 @@ lookup: * to see if there are any nodes that still need to be * considered. these will not appear in the mle nodemap * but they might own this lockres. wait on them. */ - bit = find_next_bit(dlm->recovery_map, O2NM_MAX_NODES, 0); + bit = find_first_bit(dlm->recovery_map, O2NM_MAX_NODES); if (bit < O2NM_MAX_NODES) { mlog(0, "%s: res %.*s, At least one node (%d) " "to recover before lock mastery can begin\n", @@ -912,7 +912,7 @@ redo_request: dlm_wait_for_recovery(dlm); spin_lock(&dlm->spinlock); - bit = find_next_bit(dlm->recovery_map, O2NM_MAX_NODES, 0); + bit = find_first_bit(dlm->recovery_map, O2NM_MAX_NODES); if (bit < O2NM_MAX_NODES) { mlog(0, "%s: res %.*s, At least one node (%d) " "to recover before lock mastery can begin\n", @@ -1079,7 +1079,7 @@ recheck: sleep = 1; /* have all nodes responded? */ if (voting_done && !*blocked) { - bit = find_next_bit(mle->maybe_map, O2NM_MAX_NODES, 0); + bit = find_first_bit(mle->maybe_map, O2NM_MAX_NODES); if (dlm->node_num <= bit) { /* my node number is lowest. * now tell other nodes that I am @@ -1234,8 +1234,8 @@ static int dlm_restart_lock_mastery(struct dlm_ctxt *dlm, } else { mlog(ML_ERROR, "node down! %d\n", node); if (blocked) { - int lowest = find_next_bit(mle->maybe_map, - O2NM_MAX_NODES, 0); + int lowest = find_first_bit(mle->maybe_map, + O2NM_MAX_NODES); /* act like it was never there */ clear_bit(node, mle->maybe_map); @@ -1795,7 +1795,7 @@ int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data, "MLE for it! (%.*s)\n", assert->node_idx, namelen, name); } else { - int bit = find_next_bit (mle->maybe_map, O2NM_MAX_NODES, 0); + int bit = find_first_bit(mle->maybe_map, O2NM_MAX_NODES); if (bit >= O2NM_MAX_NODES) { /* not necessarily an error, though less likely. * could be master just re-asserting. */ @@ -2521,7 +2521,7 @@ static int dlm_is_lockres_migratable(struct dlm_ctxt *dlm, } if (!nonlocal) { - node_ref = find_next_bit(res->refmap, O2NM_MAX_NODES, 0); + node_ref = find_first_bit(res->refmap, O2NM_MAX_NODES); if (node_ref >= O2NM_MAX_NODES) return 0; } @@ -3303,7 +3303,7 @@ static void dlm_clean_block_mle(struct dlm_ctxt *dlm, BUG_ON(mle->type != DLM_MLE_BLOCK); spin_lock(&mle->spinlock); - bit = find_next_bit(mle->maybe_map, O2NM_MAX_NODES, 0); + bit = find_first_bit(mle->maybe_map, O2NM_MAX_NODES); if (bit != dead_node) { mlog(0, "mle found, but dead node %u would not have been " "master\n", dead_node); @@ -3542,7 +3542,7 @@ void dlm_force_free_mles(struct dlm_ctxt *dlm) spin_lock(&dlm->master_lock); BUG_ON(dlm->dlm_state != DLM_CTXT_LEAVING); - BUG_ON((find_next_bit(dlm->domain_map, O2NM_MAX_NODES, 0) < O2NM_MAX_NODES)); + BUG_ON((find_first_bit(dlm->domain_map, O2NM_MAX_NODES) < O2NM_MAX_NODES)); for (i = 0; i < DLM_HASH_BUCKETS; i++) { bucket = dlm_master_hash(dlm, i); diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 5cd5f7511dac..52ad342fec3e 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -451,7 +451,7 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm) if (dlm->reco.dead_node == O2NM_INVALID_NODE_NUM) { int bit; - bit = find_next_bit (dlm->recovery_map, O2NM_MAX_NODES, 0); + bit = find_first_bit(dlm->recovery_map, O2NM_MAX_NODES); if (bit >= O2NM_MAX_NODES || bit < 0) dlm_set_reco_dead_node(dlm, O2NM_INVALID_NODE_NUM); else diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c index c350bd4df770..eedf07ca23ca 100644 --- a/fs/ocfs2/dlm/dlmthread.c +++ b/fs/ocfs2/dlm/dlmthread.c @@ -92,7 +92,7 @@ int __dlm_lockres_unused(struct dlm_lock_resource *res) return 0; /* Another node has this resource with this node as the master */ - bit = find_next_bit(res->refmap, O2NM_MAX_NODES, 0); + bit = find_first_bit(res->refmap, O2NM_MAX_NODES); if (bit < O2NM_MAX_NODES) return 0; diff --git a/lib/genalloc.c b/lib/genalloc.c index 9a57257988c7..00fc50d0a640 100644 --- a/lib/genalloc.c +++ b/lib/genalloc.c @@ -251,7 +251,7 @@ void gen_pool_destroy(struct gen_pool *pool) list_del(&chunk->next_chunk); end_bit = chunk_size(chunk) >> order; - bit = find_next_bit(chunk->bits, end_bit, 0); + bit = find_first_bit(chunk->bits, end_bit); BUG_ON(bit < end_bit); vfree(chunk); diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c index 7121ce2a47c0..78814417d753 100644 --- a/net/ncsi/ncsi-manage.c +++ b/net/ncsi/ncsi-manage.c @@ -608,7 +608,7 @@ static int clear_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc, bitmap = &ncf->bitmap; spin_lock_irqsave(&nc->lock, flags); - index = find_next_bit(bitmap, ncf->n_vids, 0); + index = find_first_bit(bitmap, ncf->n_vids); if (index >= ncf->n_vids) { spin_unlock_irqrestore(&nc->lock, flags); return -1; @@ -667,7 +667,7 @@ static int set_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc, return -1; } - index = find_next_zero_bit(bitmap, ncf->n_vids, 0); + index = find_first_zero_bit(bitmap, ncf->n_vids); if (index < 0 || index >= ncf->n_vids) { netdev_err(ndp->ndev.dev, "Channel %u already has all VLAN filters set\n", -- cgit v1.2.3 From db7313005e9c2d4e80888dd18d4a83926b920e8c Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Sat, 14 Aug 2021 14:17:12 -0700 Subject: lib: bitmap: add performance test for bitmap_print_to_pagebuf Functional tests for bitmap_print_to_pagebuf() are provided in lib/test_printf.c. This patch adds performance test for a case of fully set bitmap. Signed-off-by: Yury Norov Tested-by: Wolfram Sang --- lib/test_bitmap.c | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) (limited to 'lib') diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c index d33fa5a61b95..0c82f07f74fc 100644 --- a/lib/test_bitmap.c +++ b/lib/test_bitmap.c @@ -446,6 +446,42 @@ static void __init test_bitmap_parselist(void) } } +static void __init test_bitmap_printlist(void) +{ + unsigned long *bmap = kmalloc(PAGE_SIZE, GFP_KERNEL); + char *buf = kmalloc(PAGE_SIZE, GFP_KERNEL); + char expected[256]; + int ret, slen; + ktime_t time; + + if (!buf || !bmap) + goto out; + + memset(bmap, -1, PAGE_SIZE); + slen = snprintf(expected, 256, "0-%ld", PAGE_SIZE * 8 - 1); + if (slen < 0) + goto out; + + time = ktime_get(); + ret = bitmap_print_to_pagebuf(true, buf, bmap, PAGE_SIZE * 8); + time = ktime_get() - time; + + if (ret != slen + 1) { + pr_err("bitmap_print_to_pagebuf: result is %d, expected %d\n", ret, slen); + goto out; + } + + if (strncmp(buf, expected, slen)) { + pr_err("bitmap_print_to_pagebuf: result is %s, expected %s\n", buf, expected); + goto out; + } + + pr_err("bitmap_print_to_pagebuf: input is '%s', Time: %llu\n", buf, time); +out: + kfree(buf); + kfree(bmap); +} + static const unsigned long parse_test[] __initconst = { BITMAP_FROM_U64(0), BITMAP_FROM_U64(1), @@ -818,6 +854,7 @@ static void __init selftest(void) test_bitmap_arr32(); test_bitmap_parse(); test_bitmap_parselist(); + test_bitmap_printlist(); test_mem_optimisations(); test_for_each_set_clump8(); test_bitmap_cut(); -- cgit v1.2.3 From 15325b4f768f2b27b5765489eeab6ec0d6b5e902 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Sat, 14 Aug 2021 14:17:13 -0700 Subject: vsprintf: rework bitmap_list_string bitmap_list_string() is very ineffective when printing bitmaps with long ranges of set bits because it calls find_next_bit for each bit in the bitmap. We can do better by detecting ranges of set bits. In my environment, before/after is 943008/31008 ns. Signed-off-by: Yury Norov Tested-by: Wolfram Sang --- lib/vsprintf.c | 24 +++++++----------------- 1 file changed, 7 insertions(+), 17 deletions(-) (limited to 'lib') diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 58d5e567f836..d4b4e481045b 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -1241,20 +1241,13 @@ char *bitmap_list_string(char *buf, char *end, unsigned long *bitmap, struct printf_spec spec, const char *fmt) { int nr_bits = max_t(int, spec.field_width, 0); - /* current bit is 'cur', most recently seen range is [rbot, rtop] */ - int cur, rbot, rtop; bool first = true; + int rbot, rtop; if (check_pointer(&buf, end, bitmap, spec)) return buf; - rbot = cur = find_first_bit(bitmap, nr_bits); - while (cur < nr_bits) { - rtop = cur; - cur = find_next_bit(bitmap, nr_bits, cur + 1); - if (cur < nr_bits && cur <= rtop + 1) - continue; - + for_each_set_bitrange(rbot, rtop, bitmap, nr_bits) { if (!first) { if (buf < end) *buf = ','; @@ -1263,15 +1256,12 @@ char *bitmap_list_string(char *buf, char *end, unsigned long *bitmap, first = false; buf = number(buf, end, rbot, default_dec_spec); - if (rbot < rtop) { - if (buf < end) - *buf = '-'; - buf++; - - buf = number(buf, end, rtop, default_dec_spec); - } + if (rtop == rbot + 1) + continue; - rbot = cur; + if (buf < end) + *buf = '-'; + buf = number(++buf, end, rtop - 1, default_dec_spec); } return buf; } -- cgit v1.2.3