From 221fb7268d67c0867a93f23586bd53c3c3969eee Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 20 May 2019 14:22:25 -0700 Subject: Documentation/networking: fix af_xdp.rst Sphinx warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix Sphinx warnings in Documentation/networking/af_xdp.rst by adding indentation: Documentation/networking/af_xdp.rst:319: WARNING: Literal block expected; none found. Documentation/networking/af_xdp.rst:326: WARNING: Literal block expected; none found. Fixes: 0f4a9b7d4ecb ("xsk: add FAQ to facilitate for first time users") Signed-off-by: Randy Dunlap Cc: Magnus Karlsson Cc: Daniel Borkmann Acked-by: Björn Töpel Signed-off-by: Daniel Borkmann --- Documentation/networking/af_xdp.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'Documentation') diff --git a/Documentation/networking/af_xdp.rst b/Documentation/networking/af_xdp.rst index e14d7d40fc75..50bccbf68308 100644 --- a/Documentation/networking/af_xdp.rst +++ b/Documentation/networking/af_xdp.rst @@ -316,16 +316,16 @@ A: When a netdev of a physical NIC is initialized, Linux usually all the traffic, you can force the netdev to only have 1 queue, queue id 0, and then bind to queue 0. You can use ethtool to do this:: - sudo ethtool -L combined 1 + sudo ethtool -L combined 1 If you want to only see part of the traffic, you can program the NIC through ethtool to filter out your traffic to a single queue id that you can bind your XDP socket to. Here is one example in which UDP traffic to and from port 4242 are sent to queue 2:: - sudo ethtool -N rx-flow-hash udp4 fn - sudo ethtool -N flow-type udp4 src-port 4242 dst-port \ - 4242 action 2 + sudo ethtool -N rx-flow-hash udp4 fn + sudo ethtool -N flow-type udp4 src-port 4242 dst-port \ + 4242 action 2 A number of other ways are possible all up to the capabilitites of the NIC you have. -- cgit v1.2.3 From 8cfeb385e9ebda784dccd447fe0f784464ca6ee1 Mon Sep 17 00:00:00 2001 From: Odin Ugedal Date: Thu, 30 May 2019 00:24:25 +0200 Subject: docs cgroups: add another example size for hugetlb Add another example to clarify that HugePages smaller than 1MB will be displayed using "KB", with an uppercased K (eg. 20KB), and not the normal SI prefix kilo (small k). Because of a misunderstanding/copy-paste error inside runc (see https://github.com/opencontainers/runc/pull/2065), it tried accessing the cgroup control file of a 64kB HugePage using "hugetlb.64kB._____" instead of the correct "hugetlb.64KB._____". Adding a new example will make it clear how sizes smaller than 1MB are handled. Signed-off-by: Odin Ugedal Signed-off-by: Tejun Heo --- Documentation/cgroup-v1/hugetlb.txt | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) (limited to 'Documentation') diff --git a/Documentation/cgroup-v1/hugetlb.txt b/Documentation/cgroup-v1/hugetlb.txt index 106245c3aecc..1260e5369b9b 100644 --- a/Documentation/cgroup-v1/hugetlb.txt +++ b/Documentation/cgroup-v1/hugetlb.txt @@ -32,14 +32,18 @@ Brief summary of control files hugetlb..usage_in_bytes # show current usage for "hugepagesize" hugetlb hugetlb..failcnt # show the number of allocation failure due to HugeTLB limit -For a system supporting two hugepage size (16M and 16G) the control +For a system supporting three hugepage sizes (64k, 32M and 1G), the control files include: -hugetlb.16GB.limit_in_bytes -hugetlb.16GB.max_usage_in_bytes -hugetlb.16GB.usage_in_bytes -hugetlb.16GB.failcnt -hugetlb.16MB.limit_in_bytes -hugetlb.16MB.max_usage_in_bytes -hugetlb.16MB.usage_in_bytes -hugetlb.16MB.failcnt +hugetlb.1GB.limit_in_bytes +hugetlb.1GB.max_usage_in_bytes +hugetlb.1GB.usage_in_bytes +hugetlb.1GB.failcnt +hugetlb.64KB.limit_in_bytes +hugetlb.64KB.max_usage_in_bytes +hugetlb.64KB.usage_in_bytes +hugetlb.64KB.failcnt +hugetlb.32MB.limit_in_bytes +hugetlb.32MB.max_usage_in_bytes +hugetlb.32MB.usage_in_bytes +hugetlb.32MB.failcnt -- cgit v1.2.3 From 0df82dcd55832a99363ab7f9fab954fcacdac3ae Mon Sep 17 00:00:00 2001 From: Sean Nyekjaer Date: Tue, 7 May 2019 11:34:37 +0200 Subject: dt-bindings: can: mcp251x: add mcp25625 support Fully compatible with mcp2515, the mcp25625 have integrated transceiver. This patch add the mcp25625 to the device tree bindings documentation. Signed-off-by: Sean Nyekjaer Signed-off-by: Marc Kleine-Budde --- Documentation/devicetree/bindings/net/can/microchip,mcp251x.txt | 1 + 1 file changed, 1 insertion(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/can/microchip,mcp251x.txt b/Documentation/devicetree/bindings/net/can/microchip,mcp251x.txt index 188c8bd4eb67..5a0111d4de58 100644 --- a/Documentation/devicetree/bindings/net/can/microchip,mcp251x.txt +++ b/Documentation/devicetree/bindings/net/can/microchip,mcp251x.txt @@ -4,6 +4,7 @@ Required properties: - compatible: Should be one of the following: - "microchip,mcp2510" for MCP2510. - "microchip,mcp2515" for MCP2515. + - "microchip,mcp25625" for MCP25625. - reg: SPI chip select. - clocks: The clock feeding the CAN controller. - interrupts: Should contain IRQ line for the CAN controller. -- cgit v1.2.3 From bb2e05e0c8dcc2c93a97410ce362da998b83155f Mon Sep 17 00:00:00 2001 From: Masanari Iida Date: Wed, 12 Jun 2019 21:29:34 +0900 Subject: linux-next: DOC: RDS: Fix a typo in rds.txt This patch fixes a spelling typo in rds.txt Signed-off-by: Masanari Iida Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller --- Documentation/networking/rds.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/networking/rds.txt b/Documentation/networking/rds.txt index 0235ae69af2a..f2a0147c933d 100644 --- a/Documentation/networking/rds.txt +++ b/Documentation/networking/rds.txt @@ -389,7 +389,7 @@ Multipath RDS (mprds) a common (to all paths) part, and a per-path struct rds_conn_path. All I/O workqs and reconnect threads are driven from the rds_conn_path. Transports such as TCP that are multipath capable may then set up a - TPC socket per rds_conn_path, and this is managed by the transport via + TCP socket per rds_conn_path, and this is managed by the transport via the transport privatee cp_transport_data pointer. Transports announce themselves as multipath capable by setting the -- cgit v1.2.3 From 8614b0085d98482a065f0a308d715b9d4212aebf Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Wed, 12 Jun 2019 08:50:09 +0200 Subject: block/switching-sched.txt: Update to blk-mq schedulers Remove references to CFQ and legacy block layer which are gone. Update example with what's available under blk-mq. Signed-off-by: Andreas Herrmann Signed-off-by: Jens Axboe --- Documentation/block/switching-sched.txt | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) (limited to 'Documentation') diff --git a/Documentation/block/switching-sched.txt b/Documentation/block/switching-sched.txt index 3b2612e342f1..7977f6fb8b20 100644 --- a/Documentation/block/switching-sched.txt +++ b/Documentation/block/switching-sched.txt @@ -13,11 +13,9 @@ you can do so by typing: # mount none /sys -t sysfs -As of the Linux 2.6.10 kernel, it is now possible to change the -IO scheduler for a given block device on the fly (thus making it possible, -for instance, to set the CFQ scheduler for the system default, but -set a specific device to use the deadline or noop schedulers - which -can improve that device's throughput). +It is possible to change the IO scheduler for a given block device on +the fly to select one of mq-deadline, none, bfq, or kyber schedulers - +which can improve that device's throughput. To set a specific scheduler, simply do this: @@ -30,8 +28,8 @@ The list of defined schedulers can be found by simply doing a "cat /sys/block/DEV/queue/scheduler" - the list of valid names will be displayed, with the currently selected scheduler in brackets: -# cat /sys/block/hda/queue/scheduler -noop deadline [cfq] -# echo deadline > /sys/block/hda/queue/scheduler -# cat /sys/block/hda/queue/scheduler -noop [deadline] cfq +# cat /sys/block/sda/queue/scheduler +[mq-deadline] kyber bfq none +# echo none >/sys/block/sda/queue/scheduler +# cat /sys/block/sda/queue/scheduler +[none] mq-deadline kyber bfq -- cgit v1.2.3 From fb5772cbfe48575711bf789767d561582376f7f1 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Wed, 12 Jun 2019 08:17:32 +0200 Subject: blkio-controller.txt: Remove references to CFQ CFQ is gone. No need anymore to document its "proportional weight time based division of disk policy". Signed-off-by: Andreas Herrmann Signed-off-by: Jens Axboe --- Documentation/cgroup-v1/blkio-controller.txt | 96 ++-------------------------- 1 file changed, 7 insertions(+), 89 deletions(-) (limited to 'Documentation') diff --git a/Documentation/cgroup-v1/blkio-controller.txt b/Documentation/cgroup-v1/blkio-controller.txt index 673dc34d3f78..d1a1b7bdd03a 100644 --- a/Documentation/cgroup-v1/blkio-controller.txt +++ b/Documentation/cgroup-v1/blkio-controller.txt @@ -8,61 +8,13 @@ both at leaf nodes as well as at intermediate nodes in a storage hierarchy. Plan is to use the same cgroup based management interface for blkio controller and based on user options switch IO policies in the background. -Currently two IO control policies are implemented. First one is proportional -weight time based division of disk policy. It is implemented in CFQ. Hence -this policy takes effect only on leaf nodes when CFQ is being used. The second -one is throttling policy which can be used to specify upper IO rate limits -on devices. This policy is implemented in generic block layer and can be -used on leaf nodes as well as higher level logical devices like device mapper. +One IO control policy is throttling policy which can be used to +specify upper IO rate limits on devices. This policy is implemented in +generic block layer and can be used on leaf nodes as well as higher +level logical devices like device mapper. HOWTO ===== -Proportional Weight division of bandwidth ------------------------------------------ -You can do a very simple testing of running two dd threads in two different -cgroups. Here is what you can do. - -- Enable Block IO controller - CONFIG_BLK_CGROUP=y - -- Enable group scheduling in CFQ - CONFIG_CFQ_GROUP_IOSCHED=y - -- Compile and boot into kernel and mount IO controller (blkio); see - cgroups.txt, Why are cgroups needed?. - - mount -t tmpfs cgroup_root /sys/fs/cgroup - mkdir /sys/fs/cgroup/blkio - mount -t cgroup -o blkio none /sys/fs/cgroup/blkio - -- Create two cgroups - mkdir -p /sys/fs/cgroup/blkio/test1/ /sys/fs/cgroup/blkio/test2 - -- Set weights of group test1 and test2 - echo 1000 > /sys/fs/cgroup/blkio/test1/blkio.weight - echo 500 > /sys/fs/cgroup/blkio/test2/blkio.weight - -- Create two same size files (say 512MB each) on same disk (file1, file2) and - launch two dd threads in different cgroup to read those files. - - sync - echo 3 > /proc/sys/vm/drop_caches - - dd if=/mnt/sdb/zerofile1 of=/dev/null & - echo $! > /sys/fs/cgroup/blkio/test1/tasks - cat /sys/fs/cgroup/blkio/test1/tasks - - dd if=/mnt/sdb/zerofile2 of=/dev/null & - echo $! > /sys/fs/cgroup/blkio/test2/tasks - cat /sys/fs/cgroup/blkio/test2/tasks - -- At macro level, first dd should finish first. To get more precise data, keep - on looking at (with the help of script), at blkio.disk_time and - blkio.disk_sectors files of both test1 and test2 groups. This will tell how - much disk time (in milliseconds), each group got and how many sectors each - group dispatched to the disk. We provide fairness in terms of disk time, so - ideally io.disk_time of cgroups should be in proportion to the weight. - Throttling/Upper Limit policy ----------------------------- - Enable Block IO controller @@ -94,7 +46,7 @@ Throttling/Upper Limit policy Hierarchical Cgroups ==================== -Both CFQ and throttling implement hierarchy support; however, +Throttling implements hierarchy support; however, throttling's hierarchy support is enabled iff "sane_behavior" is enabled from cgroup side, which currently is a development option and not publicly available. @@ -107,9 +59,8 @@ If somebody created a hierarchy like as follows. | test3 -CFQ by default and throttling with "sane_behavior" will handle the -hierarchy correctly. For details on CFQ hierarchy support, refer to -Documentation/block/cfq-iosched.txt. For throttling, all limits apply +Throttling with "sane_behavior" will handle the +hierarchy correctly. For throttling, all limits apply to the whole subtree while all statistics are local to the IOs directly generated by tasks in that cgroup. @@ -130,10 +81,6 @@ CONFIG_DEBUG_BLK_CGROUP - Debug help. Right now some additional stats file show up in cgroup if this option is enabled. -CONFIG_CFQ_GROUP_IOSCHED - - Enables group scheduling in CFQ. Currently only 1 level of group - creation is allowed. - CONFIG_BLK_DEV_THROTTLING - Enable block device throttling support in block layer. @@ -344,32 +291,3 @@ Common files among various policies - blkio.reset_stats - Writing an int to this file will result in resetting all the stats for that cgroup. - -CFQ sysfs tunable -================= -/sys/block//queue/iosched/slice_idle ------------------------------------------- -On a faster hardware CFQ can be slow, especially with sequential workload. -This happens because CFQ idles on a single queue and single queue might not -drive deeper request queue depths to keep the storage busy. In such scenarios -one can try setting slice_idle=0 and that would switch CFQ to IOPS -(IO operations per second) mode on NCQ supporting hardware. - -That means CFQ will not idle between cfq queues of a cfq group and hence be -able to driver higher queue depth and achieve better throughput. That also -means that cfq provides fairness among groups in terms of IOPS and not in -terms of disk time. - -/sys/block//queue/iosched/group_idle ------------------------------------------- -If one disables idling on individual cfq queues and cfq service trees by -setting slice_idle=0, group_idle kicks in. That means CFQ will still idle -on the group in an attempt to provide fairness among groups. - -By default group_idle is same as slice_idle and does not do anything if -slice_idle is enabled. - -One can experience an overall throughput drop if you have created multiple -groups and put applications in that group which are not driving enough -IO to keep disk busy. In that case set group_idle=0, and CFQ will not idle -on individual groups and throughput should improve. -- cgit v1.2.3 From 41040cf7c5f0f26c368bc5d3016fed3a9ca6dba4 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Wed, 12 Jun 2019 17:00:32 +0100 Subject: arm64/sve: Fix missing SVE/FPSIMD endianness conversions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The in-memory representation of SVE and FPSIMD registers is different: the FPSIMD V-registers are stored as single 128-bit host-endian values, whereas SVE registers are stored in an endianness-invariant byte order. This means that the two representations differ when running on a big-endian host. But we blindly copy data from one representation to another when converting between the two, resulting in the register contents being unintentionally byteswapped in certain situations. Currently this can be triggered by the first SVE instruction after a syscall, for example (though the potential trigger points may vary in future). So, fix the conversion functions fpsimd_to_sve(), sve_to_fpsimd() and sve_sync_from_fpsimd_zeropad() to swab where appropriate. There is no common swahl128() or swab128() that we could use here. Maybe it would be worth making this generic, but for now add a simple local hack. Since the byte order differences are exposed in ABI, also clarify the documentation. Cc: Alex Bennée Cc: Peter Maydell Cc: Alan Hayward Cc: Julien Grall Fixes: bc0ee4760364 ("arm64/sve: Core task context handling") Fixes: 8cd969d28fd2 ("arm64/sve: Signal handling support") Fixes: 43d4da2c45b2 ("arm64/sve: ptrace and ELF coredump support") Signed-off-by: Dave Martin [will: Fix typos in comments and docs spotted by Julien] Signed-off-by: Will Deacon --- Documentation/arm64/sve.txt | 16 ++++++++++++ arch/arm64/include/uapi/asm/kvm.h | 7 ++++++ arch/arm64/include/uapi/asm/ptrace.h | 4 +++ arch/arm64/include/uapi/asm/sigcontext.h | 14 +++++++++++ arch/arm64/kernel/fpsimd.c | 42 +++++++++++++++++++++++++------- 5 files changed, 74 insertions(+), 9 deletions(-) (limited to 'Documentation') diff --git a/Documentation/arm64/sve.txt b/Documentation/arm64/sve.txt index 9940e924a47e..5689fc9a976a 100644 --- a/Documentation/arm64/sve.txt +++ b/Documentation/arm64/sve.txt @@ -56,6 +56,18 @@ model features for SVE is included in Appendix A. is to connect to a target process first and then attempt a ptrace(PTRACE_GETREGSET, pid, NT_ARM_SVE, &iov). +* Whenever SVE scalable register values (Zn, Pn, FFR) are exchanged in memory + between userspace and the kernel, the register value is encoded in memory in + an endianness-invariant layout, with bits [(8 * i + 7) : (8 * i)] encoded at + byte offset i from the start of the memory representation. This affects for + example the signal frame (struct sve_context) and ptrace interface + (struct user_sve_header) and associated data. + + Beware that on big-endian systems this results in a different byte order than + for the FPSIMD V-registers, which are stored as single host-endian 128-bit + values, with bits [(127 - 8 * i) : (120 - 8 * i)] of the register encoded at + byte offset i. (struct fpsimd_context, struct user_fpsimd_state). + 2. Vector length terminology ----------------------------- @@ -124,6 +136,10 @@ the SVE instruction set architecture. size and layout. Macros SVE_SIG_* are defined [1] to facilitate access to the members. +* Each scalable register (Zn, Pn, FFR) is stored in an endianness-invariant + layout, with bits [(8 * i + 7) : (8 * i)] stored at byte offset i from the + start of the register's representation in memory. + * If the SVE context is too big to fit in sigcontext.__reserved[], then extra space is allocated on the stack, an extra_context record is written in __reserved[] referencing this space. sve_context is then written in the diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 7b7ac0f6cec9..d819a3e8b552 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -260,6 +260,13 @@ struct kvm_vcpu_events { KVM_REG_SIZE_U256 | \ ((i) & (KVM_ARM64_SVE_MAX_SLICES - 1))) +/* + * Register values for KVM_REG_ARM64_SVE_ZREG(), KVM_REG_ARM64_SVE_PREG() and + * KVM_REG_ARM64_SVE_FFR() are represented in memory in an endianness- + * invariant layout which differs from the layout used for the FPSIMD + * V-registers on big-endian systems: see sigcontext.h for more explanation. + */ + #define KVM_ARM64_SVE_VQ_MIN __SVE_VQ_MIN #define KVM_ARM64_SVE_VQ_MAX __SVE_VQ_MAX diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h index d78623acb649..97c53203150b 100644 --- a/arch/arm64/include/uapi/asm/ptrace.h +++ b/arch/arm64/include/uapi/asm/ptrace.h @@ -176,6 +176,10 @@ struct user_sve_header { * FPCR uint32_t FPCR * * Additional data might be appended in the future. + * + * The Z-, P- and FFR registers are represented in memory in an endianness- + * invariant layout which differs from the layout used for the FPSIMD + * V-registers on big-endian systems: see sigcontext.h for more explanation. */ #define SVE_PT_SVE_ZREG_SIZE(vq) __SVE_ZREG_SIZE(vq) diff --git a/arch/arm64/include/uapi/asm/sigcontext.h b/arch/arm64/include/uapi/asm/sigcontext.h index 5f3c0cec5af9..3d448a0bb225 100644 --- a/arch/arm64/include/uapi/asm/sigcontext.h +++ b/arch/arm64/include/uapi/asm/sigcontext.h @@ -77,6 +77,15 @@ struct fpsimd_context { __uint128_t vregs[32]; }; +/* + * Note: similarly to all other integer fields, each V-register is stored in an + * endianness-dependent format, with the byte at offset i from the start of the + * in-memory representation of the register value containing + * + * bits [(7 + 8 * i) : (8 * i)] of the register on little-endian hosts; or + * bits [(127 - 8 * i) : (120 - 8 * i)] on big-endian hosts. + */ + /* ESR_EL1 context */ #define ESR_MAGIC 0x45535201 @@ -204,6 +213,11 @@ struct sve_context { * FFR uint16_t[vq] first-fault status register * * Additional data might be appended in the future. + * + * Unlike vregs[] in fpsimd_context, each SVE scalable register (Z-, P- or FFR) + * is encoded in memory in an endianness-invariant format, with the byte at + * offset i from the start of the in-memory representation containing bits + * [(7 + 8 * i) : (8 * i)] of the register value. */ #define SVE_SIG_ZREG_SIZE(vq) __SVE_ZREG_SIZE(vq) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index a38bf74bcca8..bb42cd04baec 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -352,6 +353,23 @@ static int __init sve_sysctl_init(void) { return 0; } #define ZREG(sve_state, vq, n) ((char *)(sve_state) + \ (SVE_SIG_ZREG_OFFSET(vq, n) - SVE_SIG_REGS_OFFSET)) +#ifdef CONFIG_CPU_BIG_ENDIAN +static __uint128_t arm64_cpu_to_le128(__uint128_t x) +{ + u64 a = swab64(x); + u64 b = swab64(x >> 64); + + return ((__uint128_t)a << 64) | b; +} +#else +static __uint128_t arm64_cpu_to_le128(__uint128_t x) +{ + return x; +} +#endif + +#define arm64_le128_to_cpu(x) arm64_cpu_to_le128(x) + /* * Transfer the FPSIMD state in task->thread.uw.fpsimd_state to * task->thread.sve_state. @@ -369,14 +387,16 @@ static void fpsimd_to_sve(struct task_struct *task) void *sst = task->thread.sve_state; struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state; unsigned int i; + __uint128_t *p; if (!system_supports_sve()) return; vq = sve_vq_from_vl(task->thread.sve_vl); - for (i = 0; i < 32; ++i) - memcpy(ZREG(sst, vq, i), &fst->vregs[i], - sizeof(fst->vregs[i])); + for (i = 0; i < 32; ++i) { + p = (__uint128_t *)ZREG(sst, vq, i); + *p = arm64_cpu_to_le128(fst->vregs[i]); + } } /* @@ -395,14 +415,16 @@ static void sve_to_fpsimd(struct task_struct *task) void const *sst = task->thread.sve_state; struct user_fpsimd_state *fst = &task->thread.uw.fpsimd_state; unsigned int i; + __uint128_t const *p; if (!system_supports_sve()) return; vq = sve_vq_from_vl(task->thread.sve_vl); - for (i = 0; i < 32; ++i) - memcpy(&fst->vregs[i], ZREG(sst, vq, i), - sizeof(fst->vregs[i])); + for (i = 0; i < 32; ++i) { + p = (__uint128_t const *)ZREG(sst, vq, i); + fst->vregs[i] = arm64_le128_to_cpu(*p); + } } #ifdef CONFIG_ARM64_SVE @@ -491,6 +513,7 @@ void sve_sync_from_fpsimd_zeropad(struct task_struct *task) void *sst = task->thread.sve_state; struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state; unsigned int i; + __uint128_t *p; if (!test_tsk_thread_flag(task, TIF_SVE)) return; @@ -499,9 +522,10 @@ void sve_sync_from_fpsimd_zeropad(struct task_struct *task) memset(sst, 0, SVE_SIG_REGS_SIZE(vq)); - for (i = 0; i < 32; ++i) - memcpy(ZREG(sst, vq, i), &fst->vregs[i], - sizeof(fst->vregs[i])); + for (i = 0; i < 32; ++i) { + p = (__uint128_t *)ZREG(sst, vq, i); + *p = arm64_cpu_to_le128(fst->vregs[i]); + } } int sve_set_vector_length(struct task_struct *task, -- cgit v1.2.3 From 36815b416fa48766ac5a98e4b2dc3ebc5887222e Mon Sep 17 00:00:00 2001 From: Reinhard Speyerer Date: Wed, 12 Jun 2019 19:03:50 +0200 Subject: qmi_wwan: extend permitted QMAP mux_id value range Permit mux_id values up to 254 to be used in qmimux_register_device() for compatibility with ip(8) and the rmnet driver. Fixes: c6adf77953bc ("net: usb: qmi_wwan: add qmap mux protocol support") Cc: Daniele Palmas Signed-off-by: Reinhard Speyerer Signed-off-by: David S. Miller --- Documentation/ABI/testing/sysfs-class-net-qmi | 4 ++-- drivers/net/usb/qmi_wwan.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'Documentation') diff --git a/Documentation/ABI/testing/sysfs-class-net-qmi b/Documentation/ABI/testing/sysfs-class-net-qmi index 7122d6264c49..c310db4ccbc2 100644 --- a/Documentation/ABI/testing/sysfs-class-net-qmi +++ b/Documentation/ABI/testing/sysfs-class-net-qmi @@ -29,7 +29,7 @@ Contact: Bjørn Mork Description: Unsigned integer. - Write a number ranging from 1 to 127 to add a qmap mux + Write a number ranging from 1 to 254 to add a qmap mux based network device, supported by recent Qualcomm based modems. @@ -46,5 +46,5 @@ Contact: Bjørn Mork Description: Unsigned integer. - Write a number ranging from 1 to 127 to delete a previously + Write a number ranging from 1 to 254 to delete a previously created qmap mux based network device. diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index c6fbc2a2a785..780c10ee359b 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -429,8 +429,8 @@ static ssize_t add_mux_store(struct device *d, struct device_attribute *attr, c if (kstrtou8(buf, 0, &mux_id)) return -EINVAL; - /* mux_id [1 - 0x7f] range empirically found */ - if (mux_id < 1 || mux_id > 0x7f) + /* mux_id [1 - 254] for compatibility with ip(8) and the rmnet driver */ + if (mux_id < 1 || mux_id > 254) return -EINVAL; if (!rtnl_trylock()) -- cgit v1.2.3 From ede61ca474a0348b975d9824565b66c7595461de Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 14 Jun 2019 16:22:19 -0700 Subject: tcp: add tcp_rx_skb_cache sysctl Instead of relying on rps_needed, it is safer to use a separate static key, since we do not want to enable TCP rx_skb_cache by default. This feature can cause huge increase of memory usage on hosts with millions of sockets. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 8 ++++++++ include/net/sock.h | 6 ++---- net/ipv4/sysctl_net_ipv4.c | 9 +++++++++ 3 files changed, 19 insertions(+), 4 deletions(-) (limited to 'Documentation') diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 14fe93049d28..288aa264ac26 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -772,6 +772,14 @@ tcp_challenge_ack_limit - INTEGER in RFC 5961 (Improving TCP's Robustness to Blind In-Window Attacks) Default: 100 +tcp_rx_skb_cache - BOOLEAN + Controls a per TCP socket cache of one skb, that might help + performance of some workloads. This might be dangerous + on systems with a lot of TCP sockets, since it increases + memory usage. + + Default: 0 (disabled) + UDP variables: udp_l3mdev_accept - BOOLEAN diff --git a/include/net/sock.h b/include/net/sock.h index e9d769c04637..b02645e2dfad 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2433,13 +2433,11 @@ static inline void skb_setup_tx_timestamp(struct sk_buff *skb, __u16 tsflags) * This routine must be called with interrupts disabled or with the socket * locked so that the sk_buff queue operation is ok. */ +DECLARE_STATIC_KEY_FALSE(tcp_rx_skb_cache_key); static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb) { __skb_unlink(skb, &sk->sk_receive_queue); - if ( -#ifdef CONFIG_RPS - !static_branch_unlikely(&rps_needed) && -#endif + if (static_branch_unlikely(&tcp_rx_skb_cache_key) && !sk->sk_rx_skb_cache) { sk->sk_rx_skb_cache = skb; skb_orphan(skb); diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 875867b64d6a..886b58d31351 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -51,6 +51,9 @@ static int comp_sack_nr_max = 255; static u32 u32_max_div_HZ = UINT_MAX / HZ; static int one_day_secs = 24 * 3600; +DEFINE_STATIC_KEY_FALSE(tcp_rx_skb_cache_key); +EXPORT_SYMBOL(tcp_rx_skb_cache_key); + /* obsolete */ static int sysctl_tcp_low_latency __read_mostly; @@ -559,6 +562,12 @@ static struct ctl_table ipv4_table[] = { .extra1 = &sysctl_fib_sync_mem_min, .extra2 = &sysctl_fib_sync_mem_max, }, + { + .procname = "tcp_rx_skb_cache", + .data = &tcp_rx_skb_cache_key.key, + .mode = 0644, + .proc_handler = proc_do_static_key, + }, { } }; -- cgit v1.2.3 From 5f3e2bf008c2221478101ee72f5cb4654b9fc363 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 6 Jun 2019 09:15:31 -0700 Subject: tcp: add tcp_min_snd_mss sysctl Some TCP peers announce a very small MSS option in their SYN and/or SYN/ACK messages. This forces the stack to send packets with a very high network/cpu overhead. Linux has enforced a minimal value of 48. Since this value includes the size of TCP options, and that the options can consume up to 40 bytes, this means that each segment can include only 8 bytes of payload. In some cases, it can be useful to increase the minimal value to a saner value. We still let the default to 48 (TCP_MIN_SND_MSS), for compatibility reasons. Note that TCP_MAXSEG socket option enforces a minimal value of (TCP_MIN_MSS). David Miller increased this minimal value in commit c39508d6f118 ("tcp: Make TCP_MAXSEG minimum more correct.") from 64 to 88. We might in the future merge TCP_MIN_SND_MSS and TCP_MIN_MSS. CVE-2019-11479 -- tcp mss hardcoded to 48 Signed-off-by: Eric Dumazet Suggested-by: Jonathan Looney Acked-by: Neal Cardwell Cc: Yuchung Cheng Cc: Tyler Hicks Cc: Bruce Curtis Cc: Jonathan Lemon Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 8 ++++++++ include/net/netns/ipv4.h | 1 + net/ipv4/sysctl_net_ipv4.c | 11 +++++++++++ net/ipv4/tcp_ipv4.c | 1 + net/ipv4/tcp_output.c | 3 +-- 5 files changed, 22 insertions(+), 2 deletions(-) (limited to 'Documentation') diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 288aa264ac26..22f6b8b1110a 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -255,6 +255,14 @@ tcp_base_mss - INTEGER Path MTU discovery (MTU probing). If MTU probing is enabled, this is the initial MSS used by the connection. +tcp_min_snd_mss - INTEGER + TCP SYN and SYNACK messages usually advertise an ADVMSS option, + as described in RFC 1122 and RFC 6691. + If this ADVMSS option is smaller than tcp_min_snd_mss, + it is silently capped to tcp_min_snd_mss. + + Default : 48 (at least 8 bytes of payload per segment) + tcp_congestion_control - STRING Set the congestion control algorithm to be used for new connections. The algorithm "reno" is always available, but diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 7698460a3dd1..623cfbb7b8dc 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -117,6 +117,7 @@ struct netns_ipv4 { #endif int sysctl_tcp_mtu_probing; int sysctl_tcp_base_mss; + int sysctl_tcp_min_snd_mss; int sysctl_tcp_probe_threshold; u32 sysctl_tcp_probe_interval; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 08a428a7b274..9e5257251163 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -39,6 +39,8 @@ static int ip_local_port_range_min[] = { 1, 1 }; static int ip_local_port_range_max[] = { 65535, 65535 }; static int tcp_adv_win_scale_min = -31; static int tcp_adv_win_scale_max = 31; +static int tcp_min_snd_mss_min = TCP_MIN_SND_MSS; +static int tcp_min_snd_mss_max = 65535; static int ip_privileged_port_min; static int ip_privileged_port_max = 65535; static int ip_ttl_min = 1; @@ -774,6 +776,15 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "tcp_min_snd_mss", + .data = &init_net.ipv4.sysctl_tcp_min_snd_mss, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &tcp_min_snd_mss_min, + .extra2 = &tcp_min_snd_mss_max, + }, { .procname = "tcp_probe_threshold", .data = &init_net.ipv4.sysctl_tcp_probe_threshold, diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index bc86f9735f45..cfa81190a1b1 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2628,6 +2628,7 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_ecn_fallback = 1; net->ipv4.sysctl_tcp_base_mss = TCP_BASE_MSS; + net->ipv4.sysctl_tcp_min_snd_mss = TCP_MIN_SND_MSS; net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD; net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 1bb1c46b4aba..00c01a01b547 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1459,8 +1459,7 @@ static inline int __tcp_mtu_to_mss(struct sock *sk, int pmtu) mss_now -= icsk->icsk_ext_hdr_len; /* Then reserve room for full set of TCP options and 8 bytes of data */ - if (mss_now < TCP_MIN_SND_MSS) - mss_now = TCP_MIN_SND_MSS; + mss_now = max(mss_now, sock_net(sk)->ipv4.sysctl_tcp_min_snd_mss); return mss_now; } -- cgit v1.2.3 From c7af55981789299547c51caf5db38e6872e7841f Mon Sep 17 00:00:00 2001 From: Paul Walmsley Date: Mon, 20 May 2019 09:19:40 -0700 Subject: dt-bindings: riscv: sifive: add YAML documentation for the SiFive FU540 Add YAML DT binding documentation for the SiFive FU540 SoC. This SoC is documented at: https://static.dev.sifive.com/FU540-C000-v1.0.pdf Passes dt-doc-validate, as of yaml-bindings commit 4c79d42e9216. Signed-off-by: Paul Walmsley Signed-off-by: Paul Walmsley Reviewed-by: Rob Herring Cc: Rob Herring Cc: Mark Rutland Cc: Palmer Dabbelt Cc: Albert Ou Cc: devicetree@vger.kernel.org Cc: linux-riscv@lists.infradead.org Cc: linux-kernel@vger.kernel.org --- .../devicetree/bindings/riscv/sifive.yaml | 25 ++++++++++++++++++++++ MAINTAINERS | 9 ++++++++ 2 files changed, 34 insertions(+) create mode 100644 Documentation/devicetree/bindings/riscv/sifive.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/riscv/sifive.yaml b/Documentation/devicetree/bindings/riscv/sifive.yaml new file mode 100644 index 000000000000..9d17dc2f3f84 --- /dev/null +++ b/Documentation/devicetree/bindings/riscv/sifive.yaml @@ -0,0 +1,25 @@ +# SPDX-License-Identifier: (GPL-2.0 OR MIT) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/riscv/sifive.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: SiFive SoC-based boards + +maintainers: + - Paul Walmsley + - Palmer Dabbelt + +description: + SiFive SoC-based boards + +properties: + $nodename: + const: '/' + compatible: + items: + - enum: + - sifive,freedom-unleashed-a00 + - const: sifive,fu540-c000 + - const: sifive,fu540 +... diff --git a/MAINTAINERS b/MAINTAINERS index 5cfbea4ce575..8a64051cf5fc 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14322,6 +14322,15 @@ S: Supported K: sifive N: sifive +SIFIVE FU540 SYSTEM-ON-CHIP +M: Paul Walmsley +M: Palmer Dabbelt +L: linux-riscv@lists.infradead.org +T: git git://git.kernel.org/pub/scm/linux/kernel/git/pjw/sifive.git +S: Supported +K: fu540 +N: fu540 + SILEAD TOUCHSCREEN DRIVER M: Hans de Goede L: linux-input@vger.kernel.org -- cgit v1.2.3 From 4fd669a8c4873b6ad54e77bc45aa37b03edc5587 Mon Sep 17 00:00:00 2001 From: Paul Walmsley Date: Mon, 20 May 2019 09:19:40 -0700 Subject: dt-bindings: riscv: convert cpu binding to json-schema At Rob's request, we're starting to migrate our DT binding documentation to json-schema YAML format. Start by converting our cpu binding documentation. While doing so, document more properties and nodes. This includes adding binding documentation support for the E51 and U54 CPU cores ("harts") that are present on this SoC. These cores are described in: https://static.dev.sifive.com/FU540-C000-v1.0.pdf This cpus.yaml file is intended to be a starting point and to evolve over time. It passes dt-doc-validate as of the yaml-bindings commit 4c79d42e9216. This patch was originally based on the ARM json-schema binding documentation as added by commit 672951cbd1b7 ("dt-bindings: arm: Convert cpu binding to json-schema"). Signed-off-by: Paul Walmsley Signed-off-by: Paul Walmsley Reviewed-by: Rob Herring Cc: Mark Rutland Cc: Lorenzo Pieralisi Cc: devicetree@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-riscv@lists.infradead.org --- Documentation/devicetree/bindings/riscv/cpus.yaml | 168 ++++++++++++++++++++++ 1 file changed, 168 insertions(+) create mode 100644 Documentation/devicetree/bindings/riscv/cpus.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/riscv/cpus.yaml b/Documentation/devicetree/bindings/riscv/cpus.yaml new file mode 100644 index 000000000000..27f02ec4bb45 --- /dev/null +++ b/Documentation/devicetree/bindings/riscv/cpus.yaml @@ -0,0 +1,168 @@ +# SPDX-License-Identifier: (GPL-2.0 OR MIT) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/riscv/cpus.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: RISC-V bindings for 'cpus' DT nodes + +maintainers: + - Paul Walmsley + - Palmer Dabbelt + +allOf: + - $ref: /schemas/cpus.yaml# + +properties: + $nodename: + const: cpus + description: Container of cpu nodes + + '#address-cells': + const: 1 + description: | + A single unsigned 32-bit integer uniquely identifies each RISC-V + hart in a system. (See the "reg" node under the "cpu" node, + below). + + '#size-cells': + const: 0 + +patternProperties: + '^cpu@[0-9a-f]+$': + properties: + compatible: + type: array + items: + - enum: + - sifive,rocket0 + - sifive,e5 + - sifive,e51 + - sifive,u54-mc + - sifive,u54 + - sifive,u5 + - const: riscv + description: + Identifies that the hart uses the RISC-V instruction set + and identifies the type of the hart. + + mmu-type: + allOf: + - $ref: "/schemas/types.yaml#/definitions/string" + - enum: + - riscv,sv32 + - riscv,sv39 + - riscv,sv48 + description: + Identifies the MMU address translation mode used on this + hart. These values originate from the RISC-V Privileged + Specification document, available from + https://riscv.org/specifications/ + + riscv,isa: + allOf: + - $ref: "/schemas/types.yaml#/definitions/string" + - enum: + - rv64imac + - rv64imafdc + description: + Identifies the specific RISC-V instruction set architecture + supported by the hart. These are documented in the RISC-V + User-Level ISA document, available from + https://riscv.org/specifications/ + + timebase-frequency: + type: integer + minimum: 1 + description: + Specifies the clock frequency of the system timer in Hz. + This value is common to all harts on a single system image. + + interrupt-controller: + type: object + description: Describes the CPU's local interrupt controller + + properties: + '#interrupt-cells': + const: 1 + + compatible: + const: riscv,cpu-intc + + interrupt-controller: true + + required: + - '#interrupt-cells' + - compatible + - interrupt-controller + + required: + - riscv,isa + - timebase-frequency + - interrupt-controller + +examples: + - | + // Example 1: SiFive Freedom U540G Development Kit + cpus { + #address-cells = <1>; + #size-cells = <0>; + timebase-frequency = <1000000>; + cpu@0 { + clock-frequency = <0>; + compatible = "sifive,rocket0", "riscv"; + device_type = "cpu"; + i-cache-block-size = <64>; + i-cache-sets = <128>; + i-cache-size = <16384>; + reg = <0>; + riscv,isa = "rv64imac"; + cpu_intc0: interrupt-controller { + #interrupt-cells = <1>; + compatible = "riscv,cpu-intc"; + interrupt-controller; + }; + }; + cpu@1 { + clock-frequency = <0>; + compatible = "sifive,rocket0", "riscv"; + d-cache-block-size = <64>; + d-cache-sets = <64>; + d-cache-size = <32768>; + d-tlb-sets = <1>; + d-tlb-size = <32>; + device_type = "cpu"; + i-cache-block-size = <64>; + i-cache-sets = <64>; + i-cache-size = <32768>; + i-tlb-sets = <1>; + i-tlb-size = <32>; + mmu-type = "riscv,sv39"; + reg = <1>; + riscv,isa = "rv64imafdc"; + tlb-split; + cpu_intc1: interrupt-controller { + #interrupt-cells = <1>; + compatible = "riscv,cpu-intc"; + interrupt-controller; + }; + }; + }; + + - | + // Example 2: Spike ISA Simulator with 1 Hart + cpus { + cpu@0 { + device_type = "cpu"; + reg = <0>; + compatible = "riscv"; + riscv,isa = "rv64imafdc"; + mmu-type = "riscv,sv48"; + interrupt-controller { + #interrupt-cells = <1>; + interrupt-controller; + compatible = "riscv,cpu-intc"; + }; + }; + }; +... -- cgit v1.2.3