From 2e54dc3c7dadd4d012f132b1a2b3ab89d9a48cc2 Mon Sep 17 00:00:00 2001
From: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Date: Fri, 12 Jun 2015 10:57:40 +0200
Subject: s390/kernel: move EX_TABLE macros to linkage.h header file

Move the EX_TABLE macro definitions from the processor.h to the linkage.h
header file.  It helps to reduce circular header file dependencies.

Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/linkage.h   | 22 ++++++++++++++++++++++
 arch/s390/include/asm/processor.h | 19 -------------------
 2 files changed, 22 insertions(+), 19 deletions(-)

(limited to 'arch/s390')

diff --git a/arch/s390/include/asm/linkage.h b/arch/s390/include/asm/linkage.h
index fc8a8284778e..27da78cf416d 100644
--- a/arch/s390/include/asm/linkage.h
+++ b/arch/s390/include/asm/linkage.h
@@ -6,4 +6,26 @@
 #define __ALIGN .align 4, 0x07
 #define __ALIGN_STR __stringify(__ALIGN)
 
+#ifndef __ASSEMBLY__
+
+/*
+ * Helper macro for exception table entries
+ */
+#define EX_TABLE(_fault, _target)	\
+	".section __ex_table,\"a\"\n"	\
+	".align	4\n"			\
+	".long	(" #_fault ") - .\n"	\
+	".long	(" #_target ") - .\n"	\
+	".previous\n"
+
+#else /* __ASSEMBLY__ */
+
+#define EX_TABLE(_fault, _target)	\
+	.section __ex_table,"a"	;	\
+	.align	4 ;			\
+	.long	(_fault) - . ;		\
+	.long	(_target) - . ;		\
+	.previous
+
+#endif /* __ASSEMBLY__ */
 #endif
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index dedb6218544b..f4d9f741421d 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -336,25 +336,6 @@ extern void memcpy_absolute(void *, void *, size_t);
 	memcpy_absolute(&(dest), &__tmp, sizeof(__tmp));	\
 }
 
-/*
- * Helper macro for exception table entries
- */
-#define EX_TABLE(_fault, _target)	\
-	".section __ex_table,\"a\"\n"	\
-	".align	4\n"			\
-	".long	(" #_fault ") - .\n"	\
-	".long	(" #_target ") - .\n"	\
-	".previous\n"
-
-#else /* __ASSEMBLY__ */
-
-#define EX_TABLE(_fault, _target)	\
-	.section __ex_table,"a"	;	\
-	.align	4 ;			\
-	.long	(_fault) - . ;		\
-	.long	(_target) - . ;		\
-	.previous
-
 #endif /* __ASSEMBLY__ */
 
 #endif /* __ASM_S390_PROCESSOR_H */
-- 
cgit v1.2.3


From 4084eb7767418861a81d9e24d222f2536537f58e Mon Sep 17 00:00:00 2001
From: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Date: Tue, 28 Apr 2015 11:30:40 +0200
Subject: s390/kernel: use test_fp_ctl() to verify the floating-point control
 word

Use the test_fp_ctl() to test the floating-point control word
for validity and use restore_fp_ctl() to set it in load_sigregs.

Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/switch_to.h | 1 -
 arch/s390/kernel/compat_signal.c  | 6 +++---
 arch/s390/kernel/signal.c         | 6 +++---
 3 files changed, 6 insertions(+), 7 deletions(-)

(limited to 'arch/s390')

diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h
index d62e7a69605f..b304031d9d11 100644
--- a/arch/s390/include/asm/switch_to.h
+++ b/arch/s390/include/asm/switch_to.h
@@ -45,7 +45,6 @@ static inline int restore_fp_ctl(u32 *fpc)
 		"	lfpc    %1\n"
 		"0:	la	%0,0\n"
 		"1:\n"
-		EX_TABLE(0b,1b)
 		: "=d" (rc) : "Q" (*fpc), "0" (-EINVAL));
 	return rc;
 }
diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c
index fe8d6924efaa..b55d2063a23d 100644
--- a/arch/s390/kernel/compat_signal.c
+++ b/arch/s390/kernel/compat_signal.c
@@ -172,7 +172,7 @@ static void load_sigregs(void)
 	int i;
 
 	restore_access_regs(current->thread.acrs);
-	/* restore_fp_ctl is done in restore_sigregs */
+	restore_fp_ctl(&current->thread.fp_regs.fpc);
 	if (current->thread.vxrs) {
 		for (i = 0; i < __NUM_FPRS; i++)
 			*(freg_t *)(current->thread.vxrs + i) =
@@ -217,8 +217,8 @@ static int restore_sigregs32(struct pt_regs *regs,_sigregs32 __user *sregs)
 	if (!is_ri_task(current) && (user_sregs.regs.psw.mask & PSW32_MASK_RI))
 		return -EINVAL;
 
-	/* Loading the floating-point-control word can fail. Do that first. */
-	if (restore_fp_ctl(&user_sregs.fpregs.fpc))
+	/* Test the floating-point-control word. */
+	if (test_fp_ctl(user_sregs.fpregs.fpc))
 		return -EINVAL;
 
 	/* Use regs->psw.mask instead of PSW_USER_BITS to preserve PER bit. */
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index c551f22ce066..98520c8ae0ee 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -121,7 +121,7 @@ static void store_sigregs(void)
 static void load_sigregs(void)
 {
 	restore_access_regs(current->thread.acrs);
-	/* restore_fp_ctl is done in restore_sigregs */
+	restore_fp_ctl(&current->thread.fp_regs.fpc);
 	if (current->thread.vxrs) {
 		int i;
 
@@ -166,8 +166,8 @@ static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs)
 	if (!is_ri_task(current) && (user_sregs.regs.psw.mask & PSW_MASK_RI))
 		return -EINVAL;
 
-	/* Loading the floating-point-control word can fail. Do that first. */
-	if (restore_fp_ctl(&user_sregs.fpregs.fpc))
+	/* Test the floating-point-control word. */
+	if (test_fp_ctl(user_sregs.fpregs.fpc))
 		return -EINVAL;
 
 	/* Use regs->psw.mask instead of PSW_USER_BITS to preserve PER bit. */
-- 
cgit v1.2.3


From 96b2d7a83a27fbae10fc57c39577a7e2689d9f0a Mon Sep 17 00:00:00 2001
From: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Date: Fri, 12 Jun 2015 13:53:51 +0200
Subject: s390/kvm: validate the floating-point control before restoring it

The kvm_arch_vcpu_load() does not validate whether the floating-point
control (FPC) is valid.  Further, the return code of the restore is not
checked too.  If the FPC is invalid, the restore fails and the host FPC
value might remain.  The correct behavior would be to clear the FPC if it
is not valid.  Hence, validate the FPC value and, optionally, reset the
value before restoring it.

Reviewed-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kvm/kvm-s390.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'arch/s390')

diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 2078f92d15ac..fc7bc7118b23 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -1200,6 +1200,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
+	__u32 fpc;
+
 	save_fp_ctl(&vcpu->arch.host_fpregs.fpc);
 	if (test_kvm_facility(vcpu->kvm, 129))
 		save_vx_regs((__vector128 *)&vcpu->arch.host_vregs->vrs);
@@ -1207,12 +1209,16 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 		save_fp_regs(vcpu->arch.host_fpregs.fprs);
 	save_access_regs(vcpu->arch.host_acrs);
 	if (test_kvm_facility(vcpu->kvm, 129)) {
-		restore_fp_ctl(&vcpu->run->s.regs.fpc);
+		fpc = vcpu->run->s.regs.fpc;
 		restore_vx_regs((__vector128 *)&vcpu->run->s.regs.vrs);
 	} else {
-		restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
+		fpc = vcpu->arch.guest_fpregs.fpc;
 		restore_fp_regs(vcpu->arch.guest_fpregs.fprs);
 	}
+	if (test_fp_ctl(fpc))
+		/* User space provided an invalid FPC, let's clear it */
+		fpc = 0;
+	restore_fp_ctl(&fpc);
 	restore_access_regs(vcpu->run->s.regs.acrs);
 	gmap_enable(vcpu->arch.gmap);
 	atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
-- 
cgit v1.2.3


From 904818e2f229f3d94ec95f6932a6358c81e73d78 Mon Sep 17 00:00:00 2001
From: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Date: Thu, 11 Jun 2015 15:33:54 +0200
Subject: s390/kernel: introduce fpu-internal.h with fpu helper functions

Introduce a new structure to manage FP and VX registers. Refactor the
save and restore of floating point and vector registers with a set
of helper functions in fpu-internal.h.

Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/fpu-internal.h | 191 +++++++++++++++++++++++++++++++++++
 arch/s390/include/asm/processor.h    |   4 +-
 arch/s390/include/asm/switch_to.h    | 134 +-----------------------
 arch/s390/kernel/compat_signal.c     |  43 ++------
 arch/s390/kernel/nmi.c               |   1 +
 arch/s390/kernel/process.c           |  14 +--
 arch/s390/kernel/ptrace.c            | 131 ++++++++++++------------
 arch/s390/kernel/signal.c            |  42 ++------
 arch/s390/kernel/traps.c             |  24 ++---
 9 files changed, 299 insertions(+), 285 deletions(-)
 create mode 100644 arch/s390/include/asm/fpu-internal.h

(limited to 'arch/s390')

diff --git a/arch/s390/include/asm/fpu-internal.h b/arch/s390/include/asm/fpu-internal.h
new file mode 100644
index 000000000000..04b4cfc08fb5
--- /dev/null
+++ b/arch/s390/include/asm/fpu-internal.h
@@ -0,0 +1,191 @@
+/*
+ * General floating pointer and vector register helpers
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+
+#ifndef _ASM_S390_FPU_INTERNAL_H
+#define _ASM_S390_FPU_INTERNAL_H
+
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <asm/linkage.h>
+#include <asm/ctl_reg.h>
+#include <asm/sigcontext.h>
+
+struct fpu {
+	__u32 fpc;			/* Floating-point control */
+	__u32 pad;
+	freg_t fprs[__NUM_FPRS];	/* Floating-point register save area */
+	__vector128 *vxrs;		/* Vector register save area */
+};
+
+#define is_vx_fpu(fpu) (!!(fpu)->vxrs)
+#define is_vx_task(tsk) (!!(tsk)->thread.fpu.vxrs)
+
+static inline int test_fp_ctl(u32 fpc)
+{
+	u32 orig_fpc;
+	int rc;
+
+	asm volatile(
+		"	efpc    %1\n"
+		"	sfpc	%2\n"
+		"0:	sfpc	%1\n"
+		"	la	%0,0\n"
+		"1:\n"
+		EX_TABLE(0b,1b)
+		: "=d" (rc), "=d" (orig_fpc)
+		: "d" (fpc), "0" (-EINVAL));
+	return rc;
+}
+
+static inline void save_fp_ctl(u32 *fpc)
+{
+	asm volatile(
+		"       stfpc   %0\n"
+		: "+Q" (*fpc));
+}
+
+static inline int restore_fp_ctl(u32 *fpc)
+{
+	int rc;
+
+	asm volatile(
+		"	lfpc    %1\n"
+		"0:	la	%0,0\n"
+		"1:\n"
+		: "=d" (rc) : "Q" (*fpc), "0" (-EINVAL));
+	return rc;
+}
+
+static inline void save_fp_regs(freg_t *fprs)
+{
+	asm volatile("std 0,%0" : "=Q" (fprs[0]));
+	asm volatile("std 2,%0" : "=Q" (fprs[2]));
+	asm volatile("std 4,%0" : "=Q" (fprs[4]));
+	asm volatile("std 6,%0" : "=Q" (fprs[6]));
+	asm volatile("std 1,%0" : "=Q" (fprs[1]));
+	asm volatile("std 3,%0" : "=Q" (fprs[3]));
+	asm volatile("std 5,%0" : "=Q" (fprs[5]));
+	asm volatile("std 7,%0" : "=Q" (fprs[7]));
+	asm volatile("std 8,%0" : "=Q" (fprs[8]));
+	asm volatile("std 9,%0" : "=Q" (fprs[9]));
+	asm volatile("std 10,%0" : "=Q" (fprs[10]));
+	asm volatile("std 11,%0" : "=Q" (fprs[11]));
+	asm volatile("std 12,%0" : "=Q" (fprs[12]));
+	asm volatile("std 13,%0" : "=Q" (fprs[13]));
+	asm volatile("std 14,%0" : "=Q" (fprs[14]));
+	asm volatile("std 15,%0" : "=Q" (fprs[15]));
+}
+
+static inline void restore_fp_regs(freg_t *fprs)
+{
+	asm volatile("ld 0,%0" : : "Q" (fprs[0]));
+	asm volatile("ld 2,%0" : : "Q" (fprs[2]));
+	asm volatile("ld 4,%0" : : "Q" (fprs[4]));
+	asm volatile("ld 6,%0" : : "Q" (fprs[6]));
+	asm volatile("ld 1,%0" : : "Q" (fprs[1]));
+	asm volatile("ld 3,%0" : : "Q" (fprs[3]));
+	asm volatile("ld 5,%0" : : "Q" (fprs[5]));
+	asm volatile("ld 7,%0" : : "Q" (fprs[7]));
+	asm volatile("ld 8,%0" : : "Q" (fprs[8]));
+	asm volatile("ld 9,%0" : : "Q" (fprs[9]));
+	asm volatile("ld 10,%0" : : "Q" (fprs[10]));
+	asm volatile("ld 11,%0" : : "Q" (fprs[11]));
+	asm volatile("ld 12,%0" : : "Q" (fprs[12]));
+	asm volatile("ld 13,%0" : : "Q" (fprs[13]));
+	asm volatile("ld 14,%0" : : "Q" (fprs[14]));
+	asm volatile("ld 15,%0" : : "Q" (fprs[15]));
+}
+
+static inline void save_vx_regs(__vector128 *vxrs)
+{
+	typedef struct { __vector128 _[__NUM_VXRS]; } addrtype;
+
+	asm volatile(
+		"	la	1,%0\n"
+		"	.word	0xe70f,0x1000,0x003e\n"	/* vstm 0,15,0(1) */
+		"	.word	0xe70f,0x1100,0x0c3e\n"	/* vstm 16,31,256(1) */
+		: "=Q" (*(addrtype *) vxrs) : : "1");
+}
+
+static inline void save_vx_regs_safe(__vector128 *vxrs)
+{
+	unsigned long cr0, flags;
+
+	flags = arch_local_irq_save();
+	__ctl_store(cr0, 0, 0);
+	__ctl_set_bit(0, 17);
+	__ctl_set_bit(0, 18);
+	save_vx_regs(vxrs);
+	__ctl_load(cr0, 0, 0);
+	arch_local_irq_restore(flags);
+}
+
+static inline void restore_vx_regs(__vector128 *vxrs)
+{
+	typedef struct { __vector128 _[__NUM_VXRS]; } addrtype;
+
+	asm volatile(
+		"	la	1,%0\n"
+		"	.word	0xe70f,0x1000,0x0036\n"	/* vlm 0,15,0(1) */
+		"	.word	0xe70f,0x1100,0x0c36\n"	/* vlm 16,31,256(1) */
+		: : "Q" (*(addrtype *) vxrs) : "1");
+}
+
+static inline void convert_vx_to_fp(freg_t *fprs, __vector128 *vxrs)
+{
+	int i;
+
+	for (i = 0; i < __NUM_FPRS; i++)
+		fprs[i] = *(freg_t *)(vxrs + i);
+}
+
+static inline void convert_fp_to_vx(__vector128 *vxrs, freg_t *fprs)
+{
+	int i;
+
+	for (i = 0; i < __NUM_FPRS; i++)
+		*(freg_t *)(vxrs + i) = fprs[i];
+}
+
+static inline void fpregs_store(_s390_fp_regs *fpregs, struct fpu *fpu)
+{
+	fpregs->pad = 0;
+	if (is_vx_fpu(fpu))
+		convert_vx_to_fp((freg_t *)&fpregs->fprs, fpu->vxrs);
+	else
+		memcpy((freg_t *)&fpregs->fprs, fpu->fprs,
+		       sizeof(fpregs->fprs));
+}
+
+static inline void fpregs_load(_s390_fp_regs *fpregs, struct fpu *fpu)
+{
+	if (is_vx_fpu(fpu))
+		convert_fp_to_vx(fpu->vxrs, (freg_t *)&fpregs->fprs);
+	else
+		memcpy(fpu->fprs, (freg_t *)&fpregs->fprs,
+		       sizeof(fpregs->fprs));
+}
+
+static inline void save_fpu_regs(struct fpu *fpu)
+{
+	save_fp_ctl(&fpu->fpc);
+	if (is_vx_fpu(fpu))
+		save_vx_regs(fpu->vxrs);
+	else
+		save_fp_regs(fpu->fprs);
+}
+
+static inline void restore_fpu_regs(struct fpu *fpu)
+{
+	restore_fp_ctl(&fpu->fpc);
+	if (is_vx_fpu(fpu))
+		restore_vx_regs(fpu->vxrs);
+	else
+		restore_fp_regs(fpu->fprs);
+}
+
+#endif /* _ASM_S390_FPU_INTERNAL_H */
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index f4d9f741421d..19f51db7c5e6 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -28,6 +28,7 @@
 #include <asm/ptrace.h>
 #include <asm/setup.h>
 #include <asm/runtime_instr.h>
+#include <asm/fpu-internal.h>
 
 static inline void set_cpu_flag(int flag)
 {
@@ -85,7 +86,7 @@ typedef struct {
  * Thread structure
  */
 struct thread_struct {
-	s390_fp_regs fp_regs;
+	struct fpu fpu;			/* FP and VX register save area */
 	unsigned int  acrs[NUM_ACRS];
         unsigned long ksp;              /* kernel stack pointer             */
 	mm_segment_t mm_segment;
@@ -101,7 +102,6 @@ struct thread_struct {
 	struct runtime_instr_cb *ri_cb;
 	int ri_signum;
 	unsigned char trap_tdb[256];	/* Transaction abort diagnose block */
-	__vector128 *vxrs;		/* Vector register save area */
 };
 
 /* Flag to disable transactions. */
diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h
index b304031d9d11..caf4f23462b0 100644
--- a/arch/s390/include/asm/switch_to.h
+++ b/arch/s390/include/asm/switch_to.h
@@ -8,138 +8,12 @@
 #define __ASM_SWITCH_TO_H
 
 #include <linux/thread_info.h>
+#include <asm/fpu-internal.h>
 #include <asm/ptrace.h>
 
 extern struct task_struct *__switch_to(void *, void *);
 extern void update_cr_regs(struct task_struct *task);
 
-static inline int test_fp_ctl(u32 fpc)
-{
-	u32 orig_fpc;
-	int rc;
-
-	asm volatile(
-		"	efpc    %1\n"
-		"	sfpc	%2\n"
-		"0:	sfpc	%1\n"
-		"	la	%0,0\n"
-		"1:\n"
-		EX_TABLE(0b,1b)
-		: "=d" (rc), "=d" (orig_fpc)
-		: "d" (fpc), "0" (-EINVAL));
-	return rc;
-}
-
-static inline void save_fp_ctl(u32 *fpc)
-{
-	asm volatile(
-		"       stfpc   %0\n"
-		: "+Q" (*fpc));
-}
-
-static inline int restore_fp_ctl(u32 *fpc)
-{
-	int rc;
-
-	asm volatile(
-		"	lfpc    %1\n"
-		"0:	la	%0,0\n"
-		"1:\n"
-		: "=d" (rc) : "Q" (*fpc), "0" (-EINVAL));
-	return rc;
-}
-
-static inline void save_fp_regs(freg_t *fprs)
-{
-	asm volatile("std 0,%0" : "=Q" (fprs[0]));
-	asm volatile("std 2,%0" : "=Q" (fprs[2]));
-	asm volatile("std 4,%0" : "=Q" (fprs[4]));
-	asm volatile("std 6,%0" : "=Q" (fprs[6]));
-	asm volatile("std 1,%0" : "=Q" (fprs[1]));
-	asm volatile("std 3,%0" : "=Q" (fprs[3]));
-	asm volatile("std 5,%0" : "=Q" (fprs[5]));
-	asm volatile("std 7,%0" : "=Q" (fprs[7]));
-	asm volatile("std 8,%0" : "=Q" (fprs[8]));
-	asm volatile("std 9,%0" : "=Q" (fprs[9]));
-	asm volatile("std 10,%0" : "=Q" (fprs[10]));
-	asm volatile("std 11,%0" : "=Q" (fprs[11]));
-	asm volatile("std 12,%0" : "=Q" (fprs[12]));
-	asm volatile("std 13,%0" : "=Q" (fprs[13]));
-	asm volatile("std 14,%0" : "=Q" (fprs[14]));
-	asm volatile("std 15,%0" : "=Q" (fprs[15]));
-}
-
-static inline void restore_fp_regs(freg_t *fprs)
-{
-	asm volatile("ld 0,%0" : : "Q" (fprs[0]));
-	asm volatile("ld 2,%0" : : "Q" (fprs[2]));
-	asm volatile("ld 4,%0" : : "Q" (fprs[4]));
-	asm volatile("ld 6,%0" : : "Q" (fprs[6]));
-	asm volatile("ld 1,%0" : : "Q" (fprs[1]));
-	asm volatile("ld 3,%0" : : "Q" (fprs[3]));
-	asm volatile("ld 5,%0" : : "Q" (fprs[5]));
-	asm volatile("ld 7,%0" : : "Q" (fprs[7]));
-	asm volatile("ld 8,%0" : : "Q" (fprs[8]));
-	asm volatile("ld 9,%0" : : "Q" (fprs[9]));
-	asm volatile("ld 10,%0" : : "Q" (fprs[10]));
-	asm volatile("ld 11,%0" : : "Q" (fprs[11]));
-	asm volatile("ld 12,%0" : : "Q" (fprs[12]));
-	asm volatile("ld 13,%0" : : "Q" (fprs[13]));
-	asm volatile("ld 14,%0" : : "Q" (fprs[14]));
-	asm volatile("ld 15,%0" : : "Q" (fprs[15]));
-}
-
-static inline void save_vx_regs(__vector128 *vxrs)
-{
-	typedef struct { __vector128 _[__NUM_VXRS]; } addrtype;
-
-	asm volatile(
-		"	la	1,%0\n"
-		"	.word	0xe70f,0x1000,0x003e\n"	/* vstm 0,15,0(1) */
-		"	.word	0xe70f,0x1100,0x0c3e\n"	/* vstm 16,31,256(1) */
-		: "=Q" (*(addrtype *) vxrs) : : "1");
-}
-
-static inline void save_vx_regs_safe(__vector128 *vxrs)
-{
-	unsigned long cr0, flags;
-
-	flags = arch_local_irq_save();
-	__ctl_store(cr0, 0, 0);
-	__ctl_set_bit(0, 17);
-	__ctl_set_bit(0, 18);
-	save_vx_regs(vxrs);
-	__ctl_load(cr0, 0, 0);
-	arch_local_irq_restore(flags);
-}
-
-static inline void restore_vx_regs(__vector128 *vxrs)
-{
-	typedef struct { __vector128 _[__NUM_VXRS]; } addrtype;
-
-	asm volatile(
-		"	la	1,%0\n"
-		"	.word	0xe70f,0x1000,0x0036\n"	/* vlm 0,15,0(1) */
-		"	.word	0xe70f,0x1100,0x0c36\n"	/* vlm 16,31,256(1) */
-		: : "Q" (*(addrtype *) vxrs) : "1");
-}
-
-static inline void save_fp_vx_regs(struct task_struct *task)
-{
-	if (task->thread.vxrs)
-		save_vx_regs(task->thread.vxrs);
-	else
-		save_fp_regs(task->thread.fp_regs.fprs);
-}
-
-static inline void restore_fp_vx_regs(struct task_struct *task)
-{
-	if (task->thread.vxrs)
-		restore_vx_regs(task->thread.vxrs);
-	else
-		restore_fp_regs(task->thread.fp_regs.fprs);
-}
-
 static inline void save_access_regs(unsigned int *acrs)
 {
 	typedef struct { int _[NUM_ACRS]; } acrstype;
@@ -156,15 +30,13 @@ static inline void restore_access_regs(unsigned int *acrs)
 
 #define switch_to(prev,next,last) do {					\
 	if (prev->mm) {							\
-		save_fp_ctl(&prev->thread.fp_regs.fpc);			\
-		save_fp_vx_regs(prev);					\
+		save_fpu_regs(&prev->thread.fpu);			\
 		save_access_regs(&prev->thread.acrs[0]);		\
 		save_ri_cb(prev->thread.ri_cb);				\
 	}								\
 	if (next->mm) {							\
 		update_cr_regs(next);					\
-		restore_fp_ctl(&next->thread.fp_regs.fpc);		\
-		restore_fp_vx_regs(next);				\
+		restore_fpu_regs(&next->thread.fpu);			\
 		restore_access_regs(&next->thread.acrs[0]);		\
 		restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb);	\
 	}								\
diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c
index b55d2063a23d..452995137a69 100644
--- a/arch/s390/kernel/compat_signal.c
+++ b/arch/s390/kernel/compat_signal.c
@@ -153,33 +153,15 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from)
 /* Store registers needed to create the signal frame */
 static void store_sigregs(void)
 {
-	int i;
-
 	save_access_regs(current->thread.acrs);
-	save_fp_ctl(&current->thread.fp_regs.fpc);
-	if (current->thread.vxrs) {
-		save_vx_regs(current->thread.vxrs);
-		for (i = 0; i < __NUM_FPRS; i++)
-			current->thread.fp_regs.fprs[i] =
-				*(freg_t *)(current->thread.vxrs + i);
-	} else
-		save_fp_regs(current->thread.fp_regs.fprs);
+	save_fpu_regs(&current->thread.fpu);
 }
 
 /* Load registers after signal return */
 static void load_sigregs(void)
 {
-	int i;
-
 	restore_access_regs(current->thread.acrs);
-	restore_fp_ctl(&current->thread.fp_regs.fpc);
-	if (current->thread.vxrs) {
-		for (i = 0; i < __NUM_FPRS; i++)
-			*(freg_t *)(current->thread.vxrs + i) =
-				current->thread.fp_regs.fprs[i];
-		restore_vx_regs(current->thread.vxrs);
-	} else
-		restore_fp_regs(current->thread.fp_regs.fprs);
+	restore_fpu_regs(&current->thread.fpu);
 }
 
 static int save_sigregs32(struct pt_regs *regs, _sigregs32 __user *sregs)
@@ -196,8 +178,7 @@ static int save_sigregs32(struct pt_regs *regs, _sigregs32 __user *sregs)
 		user_sregs.regs.gprs[i] = (__u32) regs->gprs[i];
 	memcpy(&user_sregs.regs.acrs, current->thread.acrs,
 	       sizeof(user_sregs.regs.acrs));
-	memcpy(&user_sregs.fpregs, &current->thread.fp_regs,
-	       sizeof(user_sregs.fpregs));
+	fpregs_store((_s390_fp_regs *) &user_sregs.fpregs, &current->thread.fpu);
 	if (__copy_to_user(sregs, &user_sregs, sizeof(_sigregs32)))
 		return -EFAULT;
 	return 0;
@@ -235,9 +216,7 @@ static int restore_sigregs32(struct pt_regs *regs,_sigregs32 __user *sregs)
 		regs->gprs[i] = (__u64) user_sregs.regs.gprs[i];
 	memcpy(&current->thread.acrs, &user_sregs.regs.acrs,
 	       sizeof(current->thread.acrs));
-
-	memcpy(&current->thread.fp_regs, &user_sregs.fpregs,
-	       sizeof(current->thread.fp_regs));
+	fpregs_load((_s390_fp_regs *) &user_sregs.fpregs, &current->thread.fpu);
 
 	clear_pt_regs_flag(regs, PIF_SYSCALL); /* No longer in a system call */
 	return 0;
@@ -258,13 +237,13 @@ static int save_sigregs_ext32(struct pt_regs *regs,
 		return -EFAULT;
 
 	/* Save vector registers to signal stack */
-	if (current->thread.vxrs) {
+	if (is_vx_task(current)) {
 		for (i = 0; i < __NUM_VXRS_LOW; i++)
-			vxrs[i] = *((__u64 *)(current->thread.vxrs + i) + 1);
+			vxrs[i] = *((__u64 *)(current->thread.fpu.vxrs + i) + 1);
 		if (__copy_to_user(&sregs_ext->vxrs_low, vxrs,
 				   sizeof(sregs_ext->vxrs_low)) ||
 		    __copy_to_user(&sregs_ext->vxrs_high,
-				   current->thread.vxrs + __NUM_VXRS_LOW,
+				   current->thread.fpu.vxrs + __NUM_VXRS_LOW,
 				   sizeof(sregs_ext->vxrs_high)))
 			return -EFAULT;
 	}
@@ -286,15 +265,15 @@ static int restore_sigregs_ext32(struct pt_regs *regs,
 		*(__u32 *)&regs->gprs[i] = gprs_high[i];
 
 	/* Restore vector registers from signal stack */
-	if (current->thread.vxrs) {
+	if (is_vx_task(current)) {
 		if (__copy_from_user(vxrs, &sregs_ext->vxrs_low,
 				     sizeof(sregs_ext->vxrs_low)) ||
-		    __copy_from_user(current->thread.vxrs + __NUM_VXRS_LOW,
+		    __copy_from_user(current->thread.fpu.vxrs + __NUM_VXRS_LOW,
 				     &sregs_ext->vxrs_high,
 				     sizeof(sregs_ext->vxrs_high)))
 			return -EFAULT;
 		for (i = 0; i < __NUM_VXRS_LOW; i++)
-			*((__u64 *)(current->thread.vxrs + i) + 1) = vxrs[i];
+			*((__u64 *)(current->thread.fpu.vxrs + i) + 1) = vxrs[i];
 	}
 	return 0;
 }
@@ -472,7 +451,7 @@ static int setup_rt_frame32(struct ksignal *ksig, sigset_t *set,
 	 */
 	uc_flags = UC_GPRS_HIGH;
 	if (MACHINE_HAS_VX) {
-		if (current->thread.vxrs)
+		if (is_vx_task(current))
 			uc_flags |= UC_VXRS;
 	} else
 		frame_size -= sizeof(frame->uc.uc_mcontext_ext.vxrs_low) +
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index 56b550893593..e66141c6696a 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -21,6 +21,7 @@
 #include <asm/nmi.h>
 #include <asm/crw.h>
 #include <asm/switch_to.h>
+#include <asm/fpu-internal.h>
 #include <asm/ctl_reg.h>
 
 struct mcck_struct {
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 8f587d871b9f..61795bc2fff4 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -81,8 +81,8 @@ void release_thread(struct task_struct *dead_task)
 
 void arch_release_task_struct(struct task_struct *tsk)
 {
-	if (tsk->thread.vxrs)
-		kfree(tsk->thread.vxrs);
+	if (is_vx_task(tsk))
+		kfree(tsk->thread.fpu.vxrs);
 }
 
 int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
@@ -143,10 +143,10 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
 	frame->childregs.psw.mask &= ~PSW_MASK_RI;
 
 	/* Save the fpu registers to new thread structure. */
-	save_fp_ctl(&p->thread.fp_regs.fpc);
-	save_fp_regs(p->thread.fp_regs.fprs);
-	p->thread.fp_regs.pad = 0;
-	p->thread.vxrs = NULL;
+	save_fp_ctl(&p->thread.fpu.fpc);
+	save_fp_regs(p->thread.fpu.fprs);
+	p->thread.fpu.pad = 0;
+	p->thread.fpu.vxrs = NULL;
 	/* Set a new TLS ?  */
 	if (clone_flags & CLONE_SETTLS) {
 		unsigned long tls = frame->childregs.gprs[6];
@@ -162,7 +162,7 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
 
 asmlinkage void execve_tail(void)
 {
-	current->thread.fp_regs.fpc = 0;
+	current->thread.fpu.fpc = 0;
 	asm volatile("sfpc %0" : : "d" (0));
 }
 
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index d363c9c322a1..52e2e1dd919d 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -59,7 +59,7 @@ void update_cr_regs(struct task_struct *task)
 		if (MACHINE_HAS_VX) {
 			/* Enable/disable of vector extension */
 			cr_new &= ~(1UL << 17);
-			if (task->thread.vxrs)
+			if (task->thread.fpu.vxrs)
 				cr_new |= (1UL << 17);
 		}
 		if (cr_new != cr)
@@ -242,21 +242,21 @@ static unsigned long __peek_user(struct task_struct *child, addr_t addr)
 		/*
 		 * floating point control reg. is in the thread structure
 		 */
-		tmp = child->thread.fp_regs.fpc;
+		tmp = child->thread.fpu.fpc;
 		tmp <<= BITS_PER_LONG - 32;
 
 	} else if (addr < (addr_t) (&dummy->regs.fp_regs + 1)) {
 		/*
-		 * floating point regs. are either in child->thread.fp_regs
-		 * or the child->thread.vxrs array
+		 * floating point regs. are either in child->thread.fpu
+		 * or the child->thread.fpu.vxrs array
 		 */
 		offset = addr - (addr_t) &dummy->regs.fp_regs.fprs;
-		if (child->thread.vxrs)
+		if (is_vx_task(child))
 			tmp = *(addr_t *)
-			       ((addr_t) child->thread.vxrs + 2*offset);
+			       ((addr_t) child->thread.fpu.vxrs + 2*offset);
 		else
 			tmp = *(addr_t *)
-			       ((addr_t) &child->thread.fp_regs.fprs + offset);
+			       ((addr_t) &child->thread.fpu.fprs + offset);
 
 	} else if (addr < (addr_t) (&dummy->regs.per_info + 1)) {
 		/*
@@ -387,20 +387,20 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data)
 		if ((unsigned int) data != 0 ||
 		    test_fp_ctl(data >> (BITS_PER_LONG - 32)))
 			return -EINVAL;
-		child->thread.fp_regs.fpc = data >> (BITS_PER_LONG - 32);
+		child->thread.fpu.fpc = data >> (BITS_PER_LONG - 32);
 
 	} else if (addr < (addr_t) (&dummy->regs.fp_regs + 1)) {
 		/*
-		 * floating point regs. are either in child->thread.fp_regs
-		 * or the child->thread.vxrs array
+		 * floating point regs. are either in child->thread.fpu
+		 * or the child->thread.fpu.vxrs array
 		 */
 		offset = addr - (addr_t) &dummy->regs.fp_regs.fprs;
-		if (child->thread.vxrs)
+		if (is_vx_task(child))
 			*(addr_t *)((addr_t)
-				child->thread.vxrs + 2*offset) = data;
+				child->thread.fpu.vxrs + 2*offset) = data;
 		else
 			*(addr_t *)((addr_t)
-				&child->thread.fp_regs.fprs + offset) = data;
+				&child->thread.fpu.fprs + offset) = data;
 
 	} else if (addr < (addr_t) (&dummy->regs.per_info + 1)) {
 		/*
@@ -621,20 +621,20 @@ static u32 __peek_user_compat(struct task_struct *child, addr_t addr)
 		/*
 		 * floating point control reg. is in the thread structure
 		 */
-		tmp = child->thread.fp_regs.fpc;
+		tmp = child->thread.fpu.fpc;
 
 	} else if (addr < (addr_t) (&dummy32->regs.fp_regs + 1)) {
 		/*
-		 * floating point regs. are either in child->thread.fp_regs
-		 * or the child->thread.vxrs array
+		 * floating point regs. are either in child->thread.fpu
+		 * or the child->thread.fpu.vxrs array
 		 */
 		offset = addr - (addr_t) &dummy32->regs.fp_regs.fprs;
-		if (child->thread.vxrs)
+		if (is_vx_task(child))
 			tmp = *(__u32 *)
-			       ((addr_t) child->thread.vxrs + 2*offset);
+			       ((addr_t) child->thread.fpu.vxrs + 2*offset);
 		else
 			tmp = *(__u32 *)
-			       ((addr_t) &child->thread.fp_regs.fprs + offset);
+			       ((addr_t) &child->thread.fpu.fprs + offset);
 
 	} else if (addr < (addr_t) (&dummy32->regs.per_info + 1)) {
 		/*
@@ -746,20 +746,20 @@ static int __poke_user_compat(struct task_struct *child,
 		 */
 		if (test_fp_ctl(tmp))
 			return -EINVAL;
-		child->thread.fp_regs.fpc = data;
+		child->thread.fpu.fpc = data;
 
 	} else if (addr < (addr_t) (&dummy32->regs.fp_regs + 1)) {
 		/*
-		 * floating point regs. are either in child->thread.fp_regs
-		 * or the child->thread.vxrs array
+		 * floating point regs. are either in child->thread.fpu
+		 * or the child->thread.fpu.vxrs array
 		 */
 		offset = addr - (addr_t) &dummy32->regs.fp_regs.fprs;
-		if (child->thread.vxrs)
+		if (is_vx_task(child))
 			*(__u32 *)((addr_t)
-				child->thread.vxrs + 2*offset) = tmp;
+				child->thread.fpu.vxrs + 2*offset) = tmp;
 		else
 			*(__u32 *)((addr_t)
-				&child->thread.fp_regs.fprs + offset) = tmp;
+				&child->thread.fpu.fprs + offset) = tmp;
 
 	} else if (addr < (addr_t) (&dummy32->regs.per_info + 1)) {
 		/*
@@ -952,18 +952,16 @@ static int s390_fpregs_get(struct task_struct *target,
 			   const struct user_regset *regset, unsigned int pos,
 			   unsigned int count, void *kbuf, void __user *ubuf)
 {
-	if (target == current) {
-		save_fp_ctl(&target->thread.fp_regs.fpc);
-		save_fp_regs(target->thread.fp_regs.fprs);
-	} else if (target->thread.vxrs) {
-		int i;
+	_s390_fp_regs fp_regs;
+
+	if (target == current)
+		save_fpu_regs(&target->thread.fpu);
+
+	fp_regs.fpc = target->thread.fpu.fpc;
+	fpregs_store(&fp_regs, &target->thread.fpu);
 
-		for (i = 0; i < __NUM_VXRS_LOW; i++)
-			target->thread.fp_regs.fprs[i] =
-				*(freg_t *)(target->thread.vxrs + i);
-	}
 	return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
-				   &target->thread.fp_regs, 0, -1);
+				   &fp_regs, 0, -1);
 }
 
 static int s390_fpregs_set(struct task_struct *target,
@@ -972,41 +970,36 @@ static int s390_fpregs_set(struct task_struct *target,
 			   const void __user *ubuf)
 {
 	int rc = 0;
+	freg_t fprs[__NUM_FPRS];
 
-	if (target == current) {
-		save_fp_ctl(&target->thread.fp_regs.fpc);
-		save_fp_regs(target->thread.fp_regs.fprs);
-	}
+	if (target == current)
+		save_fpu_regs(&target->thread.fpu);
 
 	/* If setting FPC, must validate it first. */
 	if (count > 0 && pos < offsetof(s390_fp_regs, fprs)) {
-		u32 ufpc[2] = { target->thread.fp_regs.fpc, 0 };
+		u32 ufpc[2] = { target->thread.fpu.fpc, 0 };
 		rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ufpc,
 					0, offsetof(s390_fp_regs, fprs));
 		if (rc)
 			return rc;
 		if (ufpc[1] != 0 || test_fp_ctl(ufpc[0]))
 			return -EINVAL;
-		target->thread.fp_regs.fpc = ufpc[0];
+		target->thread.fpu.fpc = ufpc[0];
 	}
 
 	if (rc == 0 && count > 0)
 		rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
-					target->thread.fp_regs.fprs,
-					offsetof(s390_fp_regs, fprs), -1);
+					fprs, offsetof(s390_fp_regs, fprs), -1);
+	if (rc)
+		return rc;
 
-	if (rc == 0) {
-		if (target == current) {
-			restore_fp_ctl(&target->thread.fp_regs.fpc);
-			restore_fp_regs(target->thread.fp_regs.fprs);
-		} else if (target->thread.vxrs) {
-			int i;
-
-			for (i = 0; i < __NUM_VXRS_LOW; i++)
-				*(freg_t *)(target->thread.vxrs + i) =
-					target->thread.fp_regs.fprs[i];
-		}
-	}
+	if (is_vx_task(target))
+		convert_fp_to_vx(target->thread.fpu.vxrs, fprs);
+	else
+		memcpy(target->thread.fpu.fprs, &fprs, sizeof(fprs));
+
+	if (target == current)
+		restore_fpu_regs(&target->thread.fpu);
 
 	return rc;
 }
@@ -1069,11 +1062,11 @@ static int s390_vxrs_low_get(struct task_struct *target,
 
 	if (!MACHINE_HAS_VX)
 		return -ENODEV;
-	if (target->thread.vxrs) {
+	if (is_vx_task(target)) {
 		if (target == current)
-			save_vx_regs(target->thread.vxrs);
+			save_fpu_regs(&target->thread.fpu);
 		for (i = 0; i < __NUM_VXRS_LOW; i++)
-			vxrs[i] = *((__u64 *)(target->thread.vxrs + i) + 1);
+			vxrs[i] = *((__u64 *)(target->thread.fpu.vxrs + i) + 1);
 	} else
 		memset(vxrs, 0, sizeof(vxrs));
 	return user_regset_copyout(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1);
@@ -1089,19 +1082,19 @@ static int s390_vxrs_low_set(struct task_struct *target,
 
 	if (!MACHINE_HAS_VX)
 		return -ENODEV;
-	if (!target->thread.vxrs) {
+	if (!is_vx_task(target)) {
 		rc = alloc_vector_registers(target);
 		if (rc)
 			return rc;
 	} else if (target == current)
-		save_vx_regs(target->thread.vxrs);
+		save_fpu_regs(&target->thread.fpu);
 
 	rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1);
 	if (rc == 0) {
 		for (i = 0; i < __NUM_VXRS_LOW; i++)
-			*((__u64 *)(target->thread.vxrs + i) + 1) = vxrs[i];
+			*((__u64 *)(target->thread.fpu.vxrs + i) + 1) = vxrs[i];
 		if (target == current)
-			restore_vx_regs(target->thread.vxrs);
+			restore_fpu_regs(&target->thread.fpu);
 	}
 
 	return rc;
@@ -1116,10 +1109,10 @@ static int s390_vxrs_high_get(struct task_struct *target,
 
 	if (!MACHINE_HAS_VX)
 		return -ENODEV;
-	if (target->thread.vxrs) {
+	if (is_vx_task(target)) {
 		if (target == current)
-			save_vx_regs(target->thread.vxrs);
-		memcpy(vxrs, target->thread.vxrs + __NUM_VXRS_LOW,
+			save_fpu_regs(&target->thread.fpu);
+		memcpy(vxrs, target->thread.fpu.vxrs + __NUM_VXRS_LOW,
 		       sizeof(vxrs));
 	} else
 		memset(vxrs, 0, sizeof(vxrs));
@@ -1135,17 +1128,17 @@ static int s390_vxrs_high_set(struct task_struct *target,
 
 	if (!MACHINE_HAS_VX)
 		return -ENODEV;
-	if (!target->thread.vxrs) {
+	if (!is_vx_task(target)) {
 		rc = alloc_vector_registers(target);
 		if (rc)
 			return rc;
 	} else if (target == current)
-		save_vx_regs(target->thread.vxrs);
+		save_fpu_regs(&target->thread.fpu);
 
 	rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
-				target->thread.vxrs + __NUM_VXRS_LOW, 0, -1);
+				target->thread.fpu.vxrs + __NUM_VXRS_LOW, 0, -1);
 	if (rc == 0 && target == current)
-		restore_vx_regs(target->thread.vxrs);
+		restore_vx_regs(target->thread.fpu.vxrs);
 
 	return rc;
 }
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index 98520c8ae0ee..49c259cd5a33 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -105,32 +105,14 @@ struct rt_sigframe
 static void store_sigregs(void)
 {
 	save_access_regs(current->thread.acrs);
-	save_fp_ctl(&current->thread.fp_regs.fpc);
-	if (current->thread.vxrs) {
-		int i;
-
-		save_vx_regs(current->thread.vxrs);
-		for (i = 0; i < __NUM_FPRS; i++)
-			current->thread.fp_regs.fprs[i] =
-				*(freg_t *)(current->thread.vxrs + i);
-	} else
-		save_fp_regs(current->thread.fp_regs.fprs);
+	save_fpu_regs(&current->thread.fpu);
 }
 
 /* Load registers after signal return */
 static void load_sigregs(void)
 {
 	restore_access_regs(current->thread.acrs);
-	restore_fp_ctl(&current->thread.fp_regs.fpc);
-	if (current->thread.vxrs) {
-		int i;
-
-		for (i = 0; i < __NUM_FPRS; i++)
-			*(freg_t *)(current->thread.vxrs + i) =
-				current->thread.fp_regs.fprs[i];
-		restore_vx_regs(current->thread.vxrs);
-	} else
-		restore_fp_regs(current->thread.fp_regs.fprs);
+	restore_fpu_regs(&current->thread.fpu);
 }
 
 /* Returns non-zero on fault. */
@@ -146,8 +128,7 @@ static int save_sigregs(struct pt_regs *regs, _sigregs __user *sregs)
 	memcpy(&user_sregs.regs.gprs, &regs->gprs, sizeof(sregs->regs.gprs));
 	memcpy(&user_sregs.regs.acrs, current->thread.acrs,
 	       sizeof(user_sregs.regs.acrs));
-	memcpy(&user_sregs.fpregs, &current->thread.fp_regs,
-	       sizeof(user_sregs.fpregs));
+	fpregs_store(&user_sregs.fpregs, &current->thread.fpu);
 	if (__copy_to_user(sregs, &user_sregs, sizeof(_sigregs)))
 		return -EFAULT;
 	return 0;
@@ -185,8 +166,7 @@ static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs)
 	memcpy(&current->thread.acrs, &user_sregs.regs.acrs,
 	       sizeof(current->thread.acrs));
 
-	memcpy(&current->thread.fp_regs, &user_sregs.fpregs,
-	       sizeof(current->thread.fp_regs));
+	fpregs_load(&user_sregs.fpregs, &current->thread.fpu);
 
 	clear_pt_regs_flag(regs, PIF_SYSCALL); /* No longer in a system call */
 	return 0;
@@ -200,13 +180,13 @@ static int save_sigregs_ext(struct pt_regs *regs,
 	int i;
 
 	/* Save vector registers to signal stack */
-	if (current->thread.vxrs) {
+	if (is_vx_task(current)) {
 		for (i = 0; i < __NUM_VXRS_LOW; i++)
-			vxrs[i] = *((__u64 *)(current->thread.vxrs + i) + 1);
+			vxrs[i] = *((__u64 *)(current->thread.fpu.vxrs + i) + 1);
 		if (__copy_to_user(&sregs_ext->vxrs_low, vxrs,
 				   sizeof(sregs_ext->vxrs_low)) ||
 		    __copy_to_user(&sregs_ext->vxrs_high,
-				   current->thread.vxrs + __NUM_VXRS_LOW,
+				   current->thread.fpu.vxrs + __NUM_VXRS_LOW,
 				   sizeof(sregs_ext->vxrs_high)))
 			return -EFAULT;
 	}
@@ -220,15 +200,15 @@ static int restore_sigregs_ext(struct pt_regs *regs,
 	int i;
 
 	/* Restore vector registers from signal stack */
-	if (current->thread.vxrs) {
+	if (is_vx_task(current)) {
 		if (__copy_from_user(vxrs, &sregs_ext->vxrs_low,
 				     sizeof(sregs_ext->vxrs_low)) ||
-		    __copy_from_user(current->thread.vxrs + __NUM_VXRS_LOW,
+		    __copy_from_user(current->thread.fpu.vxrs + __NUM_VXRS_LOW,
 				     &sregs_ext->vxrs_high,
 				     sizeof(sregs_ext->vxrs_high)))
 			return -EFAULT;
 		for (i = 0; i < __NUM_VXRS_LOW; i++)
-			*((__u64 *)(current->thread.vxrs + i) + 1) = vxrs[i];
+			*((__u64 *)(current->thread.fpu.vxrs + i) + 1) = vxrs[i];
 	}
 	return 0;
 }
@@ -400,7 +380,7 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
 	uc_flags = 0;
 	if (MACHINE_HAS_VX) {
 		frame_size += sizeof(_sigregs_ext);
-		if (current->thread.vxrs)
+		if (is_vx_task(current))
 			uc_flags |= UC_VXRS;
 	}
 	frame = get_sigframe(&ksig->ka, regs, frame_size);
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index 7bea81d8a363..97598d1876c7 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -19,7 +19,7 @@
 #include <linux/sched.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
-#include <asm/switch_to.h>
+#include <asm/fpu-internal.h>
 #include "entry.h"
 
 int show_unhandled_signals = 1;
@@ -227,7 +227,6 @@ DO_ERROR_INFO(specification_exception, SIGILL, ILL_ILLOPN,
 int alloc_vector_registers(struct task_struct *tsk)
 {
 	__vector128 *vxrs;
-	int i;
 
 	/* Allocate vector register save area. */
 	vxrs = kzalloc(sizeof(__vector128) * __NUM_VXRS,
@@ -236,11 +235,10 @@ int alloc_vector_registers(struct task_struct *tsk)
 		return -ENOMEM;
 	preempt_disable();
 	if (tsk == current)
-		save_fp_regs(tsk->thread.fp_regs.fprs);
+		save_fp_regs(tsk->thread.fpu.fprs);
 	/* Copy the 16 floating point registers */
-	for (i = 0; i < 16; i++)
-		*(freg_t *) &vxrs[i] = tsk->thread.fp_regs.fprs[i];
-	tsk->thread.vxrs = vxrs;
+	convert_fp_to_vx(vxrs, tsk->thread.fpu.fprs);
+	tsk->thread.fpu.vxrs = vxrs;
 	if (tsk == current) {
 		__ctl_set_bit(0, 17);
 		restore_vx_regs(vxrs);
@@ -259,8 +257,8 @@ void vector_exception(struct pt_regs *regs)
 	}
 
 	/* get vector interrupt code from fpc */
-	asm volatile("stfpc %0" : "=Q" (current->thread.fp_regs.fpc));
-	vic = (current->thread.fp_regs.fpc & 0xf00) >> 8;
+	asm volatile("stfpc %0" : "=Q" (current->thread.fpu.fpc));
+	vic = (current->thread.fpu.fpc & 0xf00) >> 8;
 	switch (vic) {
 	case 1: /* invalid vector operation */
 		si_code = FPE_FLTINV;
@@ -297,22 +295,22 @@ void data_exception(struct pt_regs *regs)
 
 	location = get_trap_ip(regs);
 
-	asm volatile("stfpc %0" : "=Q" (current->thread.fp_regs.fpc));
+	asm volatile("stfpc %0" : "=Q" (current->thread.fpu.fpc));
 	/* Check for vector register enablement */
-	if (MACHINE_HAS_VX && !current->thread.vxrs &&
-	    (current->thread.fp_regs.fpc & FPC_DXC_MASK) == 0xfe00) {
+	if (MACHINE_HAS_VX && !is_vx_task(current) &&
+	    (current->thread.fpu.fpc & FPC_DXC_MASK) == 0xfe00) {
 		alloc_vector_registers(current);
 		/* Vector data exception is suppressing, rewind psw. */
 		regs->psw.addr = __rewind_psw(regs->psw, regs->int_code >> 16);
 		clear_pt_regs_flag(regs, PIF_PER_TRAP);
 		return;
 	}
-	if (current->thread.fp_regs.fpc & FPC_DXC_MASK)
+	if (current->thread.fpu.fpc & FPC_DXC_MASK)
 		signal = SIGFPE;
 	else
 		signal = SIGILL;
 	if (signal == SIGFPE)
-		do_fp_trap(regs, current->thread.fp_regs.fpc);
+		do_fp_trap(regs, current->thread.fpu.fpc);
 	else if (signal)
 		do_trap(regs, signal, ILL_ILLOPN, "data exception");
 }
-- 
cgit v1.2.3


From 155e839a814834a3b4b31e729f4716e59d3d2dd4 Mon Sep 17 00:00:00 2001
From: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Date: Thu, 11 Jun 2015 16:57:20 +0200
Subject: s390/kernel: dynamically allocate FP register save area

Make the floating-point save area dynamically allocated and uses a flag
to distinguish whether a task uses floating-point or vector registers.

Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/fpu-internal.h | 19 ++++++++++++++-----
 arch/s390/kernel/process.c           | 27 ++++++++++++++++++++-------
 arch/s390/kernel/traps.c             |  4 ++++
 3 files changed, 38 insertions(+), 12 deletions(-)

(limited to 'arch/s390')

diff --git a/arch/s390/include/asm/fpu-internal.h b/arch/s390/include/asm/fpu-internal.h
index 04b4cfc08fb5..cc44c75fc4f7 100644
--- a/arch/s390/include/asm/fpu-internal.h
+++ b/arch/s390/include/asm/fpu-internal.h
@@ -8,6 +8,10 @@
 #ifndef _ASM_S390_FPU_INTERNAL_H
 #define _ASM_S390_FPU_INTERNAL_H
 
+#define FPU_USE_VX		1	/* Vector extension is active */
+
+#ifndef __ASSEMBLY__
+
 #include <linux/errno.h>
 #include <linux/string.h>
 #include <asm/linkage.h>
@@ -16,13 +20,16 @@
 
 struct fpu {
 	__u32 fpc;			/* Floating-point control */
-	__u32 pad;
-	freg_t fprs[__NUM_FPRS];	/* Floating-point register save area */
-	__vector128 *vxrs;		/* Vector register save area */
+	__u32 flags;
+	union {
+		void *regs;
+		freg_t *fprs;		/* Floating-point register save area */
+		__vector128 *vxrs;	/* Vector register save area */
+	};
 };
 
-#define is_vx_fpu(fpu) (!!(fpu)->vxrs)
-#define is_vx_task(tsk) (!!(tsk)->thread.fpu.vxrs)
+#define is_vx_fpu(fpu) (!!((fpu)->flags & FPU_USE_VX))
+#define is_vx_task(tsk) (!!((tsk)->thread.fpu.flags & FPU_USE_VX))
 
 static inline int test_fp_ctl(u32 fpc)
 {
@@ -188,4 +195,6 @@ static inline void restore_fpu_regs(struct fpu *fpu)
 		restore_fp_regs(fpu->fprs);
 }
 
+#endif
+
 #endif /* _ASM_S390_FPU_INTERNAL_H */
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 61795bc2fff4..56949c9cda97 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -81,8 +81,26 @@ void release_thread(struct task_struct *dead_task)
 
 void arch_release_task_struct(struct task_struct *tsk)
 {
-	if (is_vx_task(tsk))
-		kfree(tsk->thread.fpu.vxrs);
+	/* Free either the floating-point or the vector register save area */
+	kfree(tsk->thread.fpu.regs);
+}
+
+int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
+{
+	*dst = *src;
+
+	/* Set up a new floating-point register save area */
+	dst->thread.fpu.fprs = kzalloc(sizeof(freg_t) * __NUM_FPRS,
+				       GFP_KERNEL|__GFP_REPEAT);
+	if (!dst->thread.fpu.fprs)
+		return -ENOMEM;
+
+	/* Save the fpu registers to new thread structure. */
+	save_fp_ctl(&dst->thread.fpu.fpc);
+	save_fp_regs(dst->thread.fpu.fprs);
+	dst->thread.fpu.flags = 0;     /* Always start with VX disabled */
+
+	return 0;
 }
 
 int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
@@ -142,11 +160,6 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
 	p->thread.ri_signum = 0;
 	frame->childregs.psw.mask &= ~PSW_MASK_RI;
 
-	/* Save the fpu registers to new thread structure. */
-	save_fp_ctl(&p->thread.fpu.fpc);
-	save_fp_regs(p->thread.fpu.fprs);
-	p->thread.fpu.pad = 0;
-	p->thread.fpu.vxrs = NULL;
 	/* Set a new TLS ?  */
 	if (clone_flags & CLONE_SETTLS) {
 		unsigned long tls = frame->childregs.gprs[6];
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index 97598d1876c7..7b09224c05a3 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -227,6 +227,7 @@ DO_ERROR_INFO(specification_exception, SIGILL, ILL_ILLOPN,
 int alloc_vector_registers(struct task_struct *tsk)
 {
 	__vector128 *vxrs;
+	freg_t *fprs;
 
 	/* Allocate vector register save area. */
 	vxrs = kzalloc(sizeof(__vector128) * __NUM_VXRS,
@@ -238,7 +239,10 @@ int alloc_vector_registers(struct task_struct *tsk)
 		save_fp_regs(tsk->thread.fpu.fprs);
 	/* Copy the 16 floating point registers */
 	convert_fp_to_vx(vxrs, tsk->thread.fpu.fprs);
+	fprs = tsk->thread.fpu.fprs;
 	tsk->thread.fpu.vxrs = vxrs;
+	tsk->thread.fpu.flags |= FPU_USE_VX;
+	kfree(fprs);
 	if (tsk == current) {
 		__ctl_set_bit(0, 17);
 		restore_vx_regs(vxrs);
-- 
cgit v1.2.3


From bd550337f61b6f10dee8c60d0bd17ac02367b56d Mon Sep 17 00:00:00 2001
From: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Date: Mon, 30 Mar 2015 17:32:52 +0200
Subject: s390/vx: add vector instruction support for older binutils versions

Older binutils versions do not include support for the vector instruction
formats.  Add assembler macros for vector instruction mnemonics to easily
encode and generate vector instructions.

Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/vx-insn.h | 480 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 480 insertions(+)
 create mode 100644 arch/s390/include/asm/vx-insn.h

(limited to 'arch/s390')

diff --git a/arch/s390/include/asm/vx-insn.h b/arch/s390/include/asm/vx-insn.h
new file mode 100644
index 000000000000..4a3135620f5e
--- /dev/null
+++ b/arch/s390/include/asm/vx-insn.h
@@ -0,0 +1,480 @@
+/*
+ * Support for Vector Instructions
+ *
+ * Assembler macros to generate .byte/.word code for particular
+ * vector instructions that are supported by recent binutils (>= 2.26) only.
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+
+#ifndef __ASM_S390_VX_INSN_H
+#define __ASM_S390_VX_INSN_H
+
+#ifdef __ASSEMBLY__
+
+
+/* Macros to generate vector instruction byte code */
+
+#define REG_NUM_INVALID	       255
+
+/* GR_NUM - Retrieve general-purpose register number
+ *
+ * @opd:	Operand to store register number
+ * @r64:	String designation register in the format "%rN"
+ */
+.macro	GR_NUM	opd gr
+	\opd = REG_NUM_INVALID
+	.ifc \gr,%r0
+		\opd = 0
+	.endif
+	.ifc \gr,%r1
+		\opd = 1
+	.endif
+	.ifc \gr,%r2
+		\opd = 2
+	.endif
+	.ifc \gr,%r3
+		\opd = 3
+	.endif
+	.ifc \gr,%r4
+		\opd = 4
+	.endif
+	.ifc \gr,%r5
+		\opd = 5
+	.endif
+	.ifc \gr,%r6
+		\opd = 6
+	.endif
+	.ifc \gr,%r7
+		\opd = 7
+	.endif
+	.ifc \gr,%r8
+		\opd = 8
+	.endif
+	.ifc \gr,%r9
+		\opd = 9
+	.endif
+	.ifc \gr,%r10
+		\opd = 10
+	.endif
+	.ifc \gr,%r11
+		\opd = 11
+	.endif
+	.ifc \gr,%r12
+		\opd = 12
+	.endif
+	.ifc \gr,%r13
+		\opd = 13
+	.endif
+	.ifc \gr,%r14
+		\opd = 14
+	.endif
+	.ifc \gr,%r15
+		\opd = 15
+	.endif
+	.if \opd == REG_NUM_INVALID
+		.error "Invalid general-purpose register designation: \gr"
+	.endif
+.endm
+
+/* VX_R() - Macro to encode the VX_NUM into the instruction */
+#define VX_R(v)		(v & 0x0F)
+
+/* VX_NUM - Retrieve vector register number
+ *
+ * @opd:	Operand to store register number
+ * @vxr:	String designation register in the format "%vN"
+ *
+ * The vector register number is used for as input number to the
+ * instruction and, as well as, to compute the RXB field of the
+ * instruction.  To encode the particular vector register number,
+ * use the VX_R(v) macro to extract the instruction opcode.
+ */
+.macro	VX_NUM	opd vxr
+	\opd = REG_NUM_INVALID
+	.ifc \vxr,%v0
+		\opd = 0
+	.endif
+	.ifc \vxr,%v1
+		\opd = 1
+	.endif
+	.ifc \vxr,%v2
+		\opd = 2
+	.endif
+	.ifc \vxr,%v3
+		\opd = 3
+	.endif
+	.ifc \vxr,%v4
+		\opd = 4
+	.endif
+	.ifc \vxr,%v5
+		\opd = 5
+	.endif
+	.ifc \vxr,%v6
+		\opd = 6
+	.endif
+	.ifc \vxr,%v7
+		\opd = 7
+	.endif
+	.ifc \vxr,%v8
+		\opd = 8
+	.endif
+	.ifc \vxr,%v9
+		\opd = 9
+	.endif
+	.ifc \vxr,%v10
+		\opd = 10
+	.endif
+	.ifc \vxr,%v11
+		\opd = 11
+	.endif
+	.ifc \vxr,%v12
+		\opd = 12
+	.endif
+	.ifc \vxr,%v13
+		\opd = 13
+	.endif
+	.ifc \vxr,%v14
+		\opd = 14
+	.endif
+	.ifc \vxr,%v15
+		\opd = 15
+	.endif
+	.ifc \vxr,%v16
+		\opd = 16
+	.endif
+	.ifc \vxr,%v17
+		\opd = 17
+	.endif
+	.ifc \vxr,%v18
+		\opd = 18
+	.endif
+	.ifc \vxr,%v19
+		\opd = 19
+	.endif
+	.ifc \vxr,%v20
+		\opd = 20
+	.endif
+	.ifc \vxr,%v21
+		\opd = 21
+	.endif
+	.ifc \vxr,%v22
+		\opd = 22
+	.endif
+	.ifc \vxr,%v23
+		\opd = 23
+	.endif
+	.ifc \vxr,%v24
+		\opd = 24
+	.endif
+	.ifc \vxr,%v25
+		\opd = 25
+	.endif
+	.ifc \vxr,%v26
+		\opd = 26
+	.endif
+	.ifc \vxr,%v27
+		\opd = 27
+	.endif
+	.ifc \vxr,%v28
+		\opd = 28
+	.endif
+	.ifc \vxr,%v29
+		\opd = 29
+	.endif
+	.ifc \vxr,%v30
+		\opd = 30
+	.endif
+	.ifc \vxr,%v31
+		\opd = 31
+	.endif
+	.if \opd == REG_NUM_INVALID
+		.error "Invalid vector register designation: \vxr"
+	.endif
+.endm
+
+/* RXB - Compute most significant bit used vector registers
+ *
+ * @rxb:	Operand to store computed RXB value
+ * @v1:		First vector register designated operand
+ * @v2:		Second vector register designated operand
+ * @v3:		Third vector register designated operand
+ * @v4:		Fourth vector register designated operand
+ */
+.macro	RXB	rxb v1 v2=0 v3=0 v4=0
+	\rxb = 0
+	.if \v1 & 0x10
+		\rxb = \rxb | 0x08
+	.endif
+	.if \v2 & 0x10
+		\rxb = \rxb | 0x04
+	.endif
+	.if \v3 & 0x10
+		\rxb = \rxb | 0x02
+	.endif
+	.if \v4 & 0x10
+		\rxb = \rxb | 0x01
+	.endif
+.endm
+
+/* MRXB - Generate Element Size Control and RXB value
+ *
+ * @m:		Element size control
+ * @v1:		First vector register designated operand (for RXB)
+ * @v2:		Second vector register designated operand (for RXB)
+ * @v3:		Third vector register designated operand (for RXB)
+ * @v4:		Fourth vector register designated operand (for RXB)
+ */
+.macro	MRXB	m v1 v2=0 v3=0 v4=0
+	rxb = 0
+	RXB	rxb, \v1, \v2, \v3, \v4
+	.byte	(\m << 4) | rxb
+.endm
+
+/* MRXBOPC - Generate Element Size Control, RXB, and final Opcode fields
+ *
+ * @m:		Element size control
+ * @opc:	Opcode
+ * @v1:		First vector register designated operand (for RXB)
+ * @v2:		Second vector register designated operand (for RXB)
+ * @v3:		Third vector register designated operand (for RXB)
+ * @v4:		Fourth vector register designated operand (for RXB)
+ */
+.macro	MRXBOPC	m opc v1 v2=0 v3=0 v4=0
+	MRXB	\m, \v1, \v2, \v3, \v4
+	.byte	\opc
+.endm
+
+/* Vector support instructions */
+
+/* VECTOR GENERATE BYTE MASK */
+.macro	VGBM	vr imm2
+	VX_NUM	v1, \vr
+	.word	(0xE700 | (VX_R(v1) << 4))
+	.word	\imm2
+	MRXBOPC	0, 0x44, v1
+.endm
+.macro	VZERO	vxr
+	VGBM	\vxr, 0
+.endm
+.macro	VONE	vxr
+	VGBM	\vxr, 0xFFFF
+.endm
+
+/* VECTOR LOAD VR ELEMENT FROM GR */
+.macro	VLVG	v, gr, disp, m
+	VX_NUM	v1, \v
+	GR_NUM	b2, "%r0"
+	GR_NUM	r3, \gr
+	.word	0xE700 | (VX_R(v1) << 4) | r3
+	.word	(b2 << 12) | (\disp)
+	MRXBOPC	\m, 0x22, v1
+.endm
+.macro	VLVGB	v, gr, index, base
+	VLVG	\v, \gr, \index, \base, 0
+.endm
+.macro	VLVGH	v, gr, index
+	VLVG	\v, \gr, \index, 1
+.endm
+.macro	VLVGF	v, gr, index
+	VLVG	\v, \gr, \index, 2
+.endm
+.macro	VLVGG	v, gr, index
+	VLVG	\v, \gr, \index, 3
+.endm
+
+/* VECTOR LOAD */
+.macro	VL	v, disp, index="%r0", base
+	VX_NUM	v1, \v
+	GR_NUM	x2, \index
+	GR_NUM	b2, \base
+	.word	0xE700 | (VX_R(v1) << 4) | x2
+	.word	(b2 << 12) | (\disp)
+	MRXBOPC 0, 0x06, v1
+.endm
+
+/* VECTOR LOAD ELEMENT */
+.macro	VLEx	vr1, disp, index="%r0", base, m3, opc
+	VX_NUM	v1, \vr1
+	GR_NUM	x2, \index
+	GR_NUM	b2, \base
+	.word	0xE700 | (VX_R(v1) << 4) | x2
+	.word	(b2 << 12) | (\disp)
+	MRXBOPC	\m3, \opc, v1
+.endm
+.macro	VLEB	vr1, disp, index="%r0", base, m3
+	VLEx	\vr1, \disp, \index, \base, \m3, 0x00
+.endm
+.macro	VLEH	vr1, disp, index="%r0", base, m3
+	VLEx	\vr1, \disp, \index, \base, \m3, 0x01
+.endm
+.macro	VLEF	vr1, disp, index="%r0", base, m3
+	VLEx	\vr1, \disp, \index, \base, \m3, 0x03
+.endm
+.macro	VLEG	vr1, disp, index="%r0", base, m3
+	VLEx	\vr1, \disp, \index, \base, \m3, 0x02
+.endm
+
+/* VECTOR LOAD ELEMENT IMMEDIATE */
+.macro	VLEIx	vr1, imm2, m3, opc
+	VX_NUM	v1, \vr1
+	.word	0xE700 | (VX_R(v1) << 4)
+	.word	\imm2
+	MRXBOPC	\m3, \opc, v1
+.endm
+.macro	VLEIB	vr1, imm2, index
+	VLEIx	\vr1, \imm2, \index, 0x40
+.endm
+.macro	VLEIH	vr1, imm2, index
+	VLEIx	\vr1, \imm2, \index, 0x41
+.endm
+.macro	VLEIF	vr1, imm2, index
+	VLEIx	\vr1, \imm2, \index, 0x43
+.endm
+.macro	VLEIG	vr1, imm2, index
+	VLEIx	\vr1, \imm2, \index, 0x42
+.endm
+
+/* VECTOR LOAD GR FROM VR ELEMENT */
+.macro	VLGV	gr, vr, disp, base="%r0", m
+	GR_NUM	r1, \gr
+	GR_NUM	b2, \base
+	VX_NUM	v3, \vr
+	.word	0xE700 | (r1 << 4) | VX_R(v3)
+	.word	(b2 << 12) | (\disp)
+	MRXBOPC	\m, 0x21, v3
+.endm
+.macro	VLGVB	gr, vr, disp, base="%r0"
+	VLGV	\gr, \vr, \disp, \base, 0
+.endm
+.macro	VLGVH	gr, vr, disp, base="%r0"
+	VLGV	\gr, \vr, \disp, \base, 1
+.endm
+.macro	VLGVF	gr, vr, disp, base="%r0"
+	VLGV	\gr, \vr, \disp, \base, 2
+.endm
+.macro	VLGVG	gr, vr, disp, base="%r0"
+	VLGV	\gr, \vr, \disp, \base, 3
+.endm
+
+/* VECTOR LOAD MULTIPLE */
+.macro	VLM	vfrom, vto, disp, base
+	VX_NUM	v1, \vfrom
+	VX_NUM	v3, \vto
+	GR_NUM	b2, \base	    /* Base register */
+	.word	0xE700 | (VX_R(v1) << 4) | VX_R(v3)
+	.word	(b2 << 12) | (\disp)
+	MRXBOPC	0, 0x36, v1, v3
+.endm
+
+/* VECTOR STORE MULTIPLE */
+.macro	VSTM	vfrom, vto, disp, base
+	VX_NUM	v1, \vfrom
+	VX_NUM	v3, \vto
+	GR_NUM	b2, \base	    /* Base register */
+	.word	0xE700 | (VX_R(v1) << 4) | VX_R(v3)
+	.word	(b2 << 12) | (\disp)
+	MRXBOPC	0, 0x3E, v1, v3
+.endm
+
+/* VECTOR PERMUTE */
+.macro	VPERM	vr1, vr2, vr3, vr4
+	VX_NUM	v1, \vr1
+	VX_NUM	v2, \vr2
+	VX_NUM	v3, \vr3
+	VX_NUM	v4, \vr4
+	.word	0xE700 | (VX_R(v1) << 4) | VX_R(v2)
+	.word	(VX_R(v3) << 12)
+	MRXBOPC	VX_R(v4), 0x8C, v1, v2, v3, v4
+.endm
+
+/* VECTOR UNPACK LOGICAL LOW */
+.macro	VUPLL	vr1, vr2, m3
+	VX_NUM	v1, \vr1
+	VX_NUM	v2, \vr2
+	.word	0xE700 | (VX_R(v1) << 4) | VX_R(v2)
+	.word	0x0000
+	MRXBOPC	\m3, 0xD4, v1, v2
+.endm
+.macro	VUPLLB	vr1, vr2
+	VUPLL	\vr1, \vr2, 0
+.endm
+.macro	VUPLLH	vr1, vr2
+	VUPLL	\vr1, \vr2, 1
+.endm
+.macro	VUPLLF	vr1, vr2
+	VUPLL	\vr1, \vr2, 2
+.endm
+
+
+/* Vector integer instructions */
+
+/* VECTOR EXCLUSIVE OR */
+.macro	VX	vr1, vr2, vr3
+	VX_NUM	v1, \vr1
+	VX_NUM	v2, \vr2
+	VX_NUM	v3, \vr3
+	.word	0xE700 | (VX_R(v1) << 4) | VX_R(v2)
+	.word	(VX_R(v3) << 12)
+	MRXBOPC	0, 0x6D, v1, v2, v3
+.endm
+
+/* VECTOR GALOIS FIELD MULTIPLY SUM */
+.macro	VGFM	vr1, vr2, vr3, m4
+	VX_NUM	v1, \vr1
+	VX_NUM	v2, \vr2
+	VX_NUM	v3, \vr3
+	.word	0xE700 | (VX_R(v1) << 4) | VX_R(v2)
+	.word	(VX_R(v3) << 12)
+	MRXBOPC	\m4, 0xB4, v1, v2, v3
+.endm
+.macro	VGFMB	vr1, vr2, vr3
+	VGFM	\vr1, \vr2, \vr3, 0
+.endm
+.macro	VGFMH	vr1, vr2, vr3
+	VGFM	\vr1, \vr2, \vr3, 1
+.endm
+.macro	VGFMF	vr1, vr2, vr3
+	VGFM	\vr1, \vr2, \vr3, 2
+.endm
+.macro	VGFMG	vr1, vr2, vr3
+	VGFM	\vr1, \vr2, \vr3, 3
+.endm
+
+/* VECTOR GALOIS FIELD MULTIPLY SUM AND ACCUMULATE */
+.macro	VGFMA	vr1, vr2, vr3, vr4, m5
+	VX_NUM	v1, \vr1
+	VX_NUM	v2, \vr2
+	VX_NUM	v3, \vr3
+	VX_NUM	v4, \vr4
+	.word	0xE700 | (VX_R(v1) << 4) | VX_R(v2)
+	.word	(VX_R(v3) << 12) | (\m5 << 8)
+	MRXBOPC	VX_R(v4), 0xBC, v1, v2, v3, v4
+.endm
+.macro	VGFMAB	vr1, vr2, vr3, vr4
+	VGFMA	\vr1, \vr2, \vr3, \vr4, 0
+.endm
+.macro	VGFMAH	vr1, vr2, vr3, vr4
+	VGFMA	\vr1, \vr2, \vr3, \vr4, 1
+.endm
+.macro	VGFMAF	vr1, vr2, vr3, vr4
+	VGFMA	\vr1, \vr2, \vr3, \vr4, 2
+.endm
+.macro	VGFMAG	vr1, vr2, vr3, vr4
+	VGFMA	\vr1, \vr2, \vr3, \vr4, 3
+.endm
+
+/* VECTOR SHIFT RIGHT LOGICAL BY BYTE */
+.macro	VSRLB	vr1, vr2, vr3
+	VX_NUM	v1, \vr1
+	VX_NUM	v2, \vr2
+	VX_NUM	v3, \vr3
+	.word	0xE700 | (VX_R(v1) << 4) | VX_R(v2)
+	.word	(VX_R(v3) << 12)
+	MRXBOPC	0, 0x7D, v1, v2, v3
+.endm
+
+
+#endif	/* __ASSEMBLY__ */
+#endif	/* __ASM_S390_VX_INSN_H */
-- 
cgit v1.2.3


From 9977e886cbbc758b4b601a160b5825ba573b5ca8 Mon Sep 17 00:00:00 2001
From: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Date: Wed, 10 Jun 2015 12:53:42 +0200
Subject: s390/kernel: lazy restore fpu registers

Improve the save and restore behavior of FPU register contents to use the
vector extension within the kernel.

The kernel does not use floating-point or vector registers and, therefore,
saving and restoring the FPU register contents are performed for handling
signals or switching processes only.  To prepare for using vector
instructions and vector registers within the kernel, enhance the save
behavior and implement a lazy restore at return to user space from a
system call or interrupt.

To implement the lazy restore, the save_fpu_regs() sets a CPU information
flag, CIF_FPU, to indicate that the FPU registers must be restored.
Saving and setting CIF_FPU is performed in an atomic fashion to be
interrupt-safe.  When the kernel wants to use the vector extension or
wants to change the FPU register state for a task during signal handling,
the save_fpu_regs() must be called first.  The CIF_FPU flag is also set at
process switch.  At return to user space, the FPU state is restored.  In
particular, the FPU state includes the floating-point or vector register
contents, as well as, vector-enablement and floating-point control.  The
FPU state restore and clearing CIF_FPU is also performed in an atomic
fashion.

For KVM, the restore of the FPU register state is performed when restoring
the general-purpose guest registers before the SIE instructions is started.
Because the path towards the SIE instruction is interruptible, the CIF_FPU
flag must be checked again right before going into SIE.  If set, the guest
registers must be reloaded again by re-entering the outer SIE loop.  This
is the same behavior as if the SIE critical section is interrupted.

Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/ctl_reg.h      |   2 +
 arch/s390/include/asm/fpu-internal.h | 110 ++-----------
 arch/s390/include/asm/kvm_host.h     |   6 +-
 arch/s390/include/asm/processor.h    |   2 +
 arch/s390/include/asm/switch_to.h    |   2 +-
 arch/s390/kernel/asm-offsets.c       |   5 +
 arch/s390/kernel/compat_signal.c     |   3 +-
 arch/s390/kernel/entry.S             | 311 ++++++++++++++++++++++++++++++++++-
 arch/s390/kernel/nmi.c               |   8 +-
 arch/s390/kernel/process.c           |  33 +++-
 arch/s390/kernel/ptrace.c            |  53 ++----
 arch/s390/kernel/s390_ksyms.c        |   3 +
 arch/s390/kernel/signal.c            |   3 +-
 arch/s390/kernel/traps.c             |  12 +-
 arch/s390/kvm/kvm-s390.c             | 132 ++++++++++-----
 15 files changed, 482 insertions(+), 203 deletions(-)

(limited to 'arch/s390')

diff --git a/arch/s390/include/asm/ctl_reg.h b/arch/s390/include/asm/ctl_reg.h
index d7697ab802f6..17a373576868 100644
--- a/arch/s390/include/asm/ctl_reg.h
+++ b/arch/s390/include/asm/ctl_reg.h
@@ -46,6 +46,8 @@ static inline void __ctl_clear_bit(unsigned int cr, unsigned int bit)
 	__ctl_load(reg, cr, cr);
 }
 
+void __ctl_set_vx(void);
+
 void smp_ctl_set_bit(int cr, int bit);
 void smp_ctl_clear_bit(int cr, int bit);
 
diff --git a/arch/s390/include/asm/fpu-internal.h b/arch/s390/include/asm/fpu-internal.h
index cc44c75fc4f7..237f8fcbe46b 100644
--- a/arch/s390/include/asm/fpu-internal.h
+++ b/arch/s390/include/asm/fpu-internal.h
@@ -28,9 +28,14 @@ struct fpu {
 	};
 };
 
+void save_fpu_regs(struct fpu *fpu);
+
 #define is_vx_fpu(fpu) (!!((fpu)->flags & FPU_USE_VX))
 #define is_vx_task(tsk) (!!((tsk)->thread.fpu.flags & FPU_USE_VX))
 
+/* VX array structure for address operand constraints in inline assemblies */
+struct vx_array { __vector128 _[__NUM_VXRS]; };
+
 static inline int test_fp_ctl(u32 fpc)
 {
 	u32 orig_fpc;
@@ -48,76 +53,6 @@ static inline int test_fp_ctl(u32 fpc)
 	return rc;
 }
 
-static inline void save_fp_ctl(u32 *fpc)
-{
-	asm volatile(
-		"       stfpc   %0\n"
-		: "+Q" (*fpc));
-}
-
-static inline int restore_fp_ctl(u32 *fpc)
-{
-	int rc;
-
-	asm volatile(
-		"	lfpc    %1\n"
-		"0:	la	%0,0\n"
-		"1:\n"
-		: "=d" (rc) : "Q" (*fpc), "0" (-EINVAL));
-	return rc;
-}
-
-static inline void save_fp_regs(freg_t *fprs)
-{
-	asm volatile("std 0,%0" : "=Q" (fprs[0]));
-	asm volatile("std 2,%0" : "=Q" (fprs[2]));
-	asm volatile("std 4,%0" : "=Q" (fprs[4]));
-	asm volatile("std 6,%0" : "=Q" (fprs[6]));
-	asm volatile("std 1,%0" : "=Q" (fprs[1]));
-	asm volatile("std 3,%0" : "=Q" (fprs[3]));
-	asm volatile("std 5,%0" : "=Q" (fprs[5]));
-	asm volatile("std 7,%0" : "=Q" (fprs[7]));
-	asm volatile("std 8,%0" : "=Q" (fprs[8]));
-	asm volatile("std 9,%0" : "=Q" (fprs[9]));
-	asm volatile("std 10,%0" : "=Q" (fprs[10]));
-	asm volatile("std 11,%0" : "=Q" (fprs[11]));
-	asm volatile("std 12,%0" : "=Q" (fprs[12]));
-	asm volatile("std 13,%0" : "=Q" (fprs[13]));
-	asm volatile("std 14,%0" : "=Q" (fprs[14]));
-	asm volatile("std 15,%0" : "=Q" (fprs[15]));
-}
-
-static inline void restore_fp_regs(freg_t *fprs)
-{
-	asm volatile("ld 0,%0" : : "Q" (fprs[0]));
-	asm volatile("ld 2,%0" : : "Q" (fprs[2]));
-	asm volatile("ld 4,%0" : : "Q" (fprs[4]));
-	asm volatile("ld 6,%0" : : "Q" (fprs[6]));
-	asm volatile("ld 1,%0" : : "Q" (fprs[1]));
-	asm volatile("ld 3,%0" : : "Q" (fprs[3]));
-	asm volatile("ld 5,%0" : : "Q" (fprs[5]));
-	asm volatile("ld 7,%0" : : "Q" (fprs[7]));
-	asm volatile("ld 8,%0" : : "Q" (fprs[8]));
-	asm volatile("ld 9,%0" : : "Q" (fprs[9]));
-	asm volatile("ld 10,%0" : : "Q" (fprs[10]));
-	asm volatile("ld 11,%0" : : "Q" (fprs[11]));
-	asm volatile("ld 12,%0" : : "Q" (fprs[12]));
-	asm volatile("ld 13,%0" : : "Q" (fprs[13]));
-	asm volatile("ld 14,%0" : : "Q" (fprs[14]));
-	asm volatile("ld 15,%0" : : "Q" (fprs[15]));
-}
-
-static inline void save_vx_regs(__vector128 *vxrs)
-{
-	typedef struct { __vector128 _[__NUM_VXRS]; } addrtype;
-
-	asm volatile(
-		"	la	1,%0\n"
-		"	.word	0xe70f,0x1000,0x003e\n"	/* vstm 0,15,0(1) */
-		"	.word	0xe70f,0x1100,0x0c3e\n"	/* vstm 16,31,256(1) */
-		: "=Q" (*(addrtype *) vxrs) : : "1");
-}
-
 static inline void save_vx_regs_safe(__vector128 *vxrs)
 {
 	unsigned long cr0, flags;
@@ -126,20 +61,13 @@ static inline void save_vx_regs_safe(__vector128 *vxrs)
 	__ctl_store(cr0, 0, 0);
 	__ctl_set_bit(0, 17);
 	__ctl_set_bit(0, 18);
-	save_vx_regs(vxrs);
-	__ctl_load(cr0, 0, 0);
-	arch_local_irq_restore(flags);
-}
-
-static inline void restore_vx_regs(__vector128 *vxrs)
-{
-	typedef struct { __vector128 _[__NUM_VXRS]; } addrtype;
-
 	asm volatile(
 		"	la	1,%0\n"
-		"	.word	0xe70f,0x1000,0x0036\n"	/* vlm 0,15,0(1) */
-		"	.word	0xe70f,0x1100,0x0c36\n"	/* vlm 16,31,256(1) */
-		: : "Q" (*(addrtype *) vxrs) : "1");
+		"	.word	0xe70f,0x1000,0x003e\n"	/* vstm 0,15,0(1) */
+		"	.word	0xe70f,0x1100,0x0c3e\n"	/* vstm 16,31,256(1) */
+		: "=Q" (*(struct vx_array *) vxrs) : : "1");
+	__ctl_load(cr0, 0, 0);
+	arch_local_irq_restore(flags);
 }
 
 static inline void convert_vx_to_fp(freg_t *fprs, __vector128 *vxrs)
@@ -177,24 +105,6 @@ static inline void fpregs_load(_s390_fp_regs *fpregs, struct fpu *fpu)
 		       sizeof(fpregs->fprs));
 }
 
-static inline void save_fpu_regs(struct fpu *fpu)
-{
-	save_fp_ctl(&fpu->fpc);
-	if (is_vx_fpu(fpu))
-		save_vx_regs(fpu->vxrs);
-	else
-		save_fp_regs(fpu->fprs);
-}
-
-static inline void restore_fpu_regs(struct fpu *fpu)
-{
-	restore_fp_ctl(&fpu->fpc);
-	if (is_vx_fpu(fpu))
-		restore_vx_regs(fpu->vxrs);
-	else
-		restore_fp_regs(fpu->fprs);
-}
-
 #endif
 
 #endif /* _ASM_S390_FPU_INTERNAL_H */
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 3024acbe1f9d..c4f4c52aaa23 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -22,6 +22,7 @@
 #include <linux/kvm.h>
 #include <asm/debug.h>
 #include <asm/cpu.h>
+#include <asm/fpu-internal.h>
 #include <asm/isc.h>
 
 #define KVM_MAX_VCPUS 64
@@ -498,10 +499,9 @@ struct kvm_guestdbg_info_arch {
 
 struct kvm_vcpu_arch {
 	struct kvm_s390_sie_block *sie_block;
-	s390_fp_regs      host_fpregs;
 	unsigned int      host_acrs[NUM_ACRS];
-	s390_fp_regs      guest_fpregs;
-	struct kvm_s390_vregs	*host_vregs;
+	struct fpu	  host_fpregs;
+	struct fpu	  guest_fpregs;
 	struct kvm_s390_local_interrupt local_int;
 	struct hrtimer    ckc_timer;
 	struct kvm_s390_pgm_info pgm;
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 19f51db7c5e6..c417015c5304 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -14,10 +14,12 @@
 #define CIF_MCCK_PENDING	0	/* machine check handling is pending */
 #define CIF_ASCE		1	/* user asce needs fixup / uaccess */
 #define CIF_NOHZ_DELAY		2	/* delay HZ disable for a tick */
+#define CIF_FPU			3	/* restore vector registers */
 
 #define _CIF_MCCK_PENDING	(1<<CIF_MCCK_PENDING)
 #define _CIF_ASCE		(1<<CIF_ASCE)
 #define _CIF_NOHZ_DELAY		(1<<CIF_NOHZ_DELAY)
+#define _CIF_FPU		(1<<CIF_FPU)
 
 #ifndef __ASSEMBLY__
 
diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h
index caf4f23462b0..0a4a3150b7d7 100644
--- a/arch/s390/include/asm/switch_to.h
+++ b/arch/s390/include/asm/switch_to.h
@@ -36,7 +36,7 @@ static inline void restore_access_regs(unsigned int *acrs)
 	}								\
 	if (next->mm) {							\
 		update_cr_regs(next);					\
-		restore_fpu_regs(&next->thread.fpu);			\
+		set_cpu_flag(CIF_FPU);					\
 		restore_access_regs(&next->thread.acrs[0]);		\
 		restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb);	\
 	}								\
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index a2da259d9327..6bc42c08be09 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -28,11 +28,16 @@ int main(void)
 	DEFINE(__TASK_pid, offsetof(struct task_struct, pid));
 	BLANK();
 	DEFINE(__THREAD_ksp, offsetof(struct thread_struct, ksp));
+	DEFINE(__THREAD_fpu, offsetof(struct task_struct, thread.fpu));
 	DEFINE(__THREAD_per_cause, offsetof(struct thread_struct, per_event.cause));
 	DEFINE(__THREAD_per_address, offsetof(struct thread_struct, per_event.address));
 	DEFINE(__THREAD_per_paid, offsetof(struct thread_struct, per_event.paid));
 	DEFINE(__THREAD_trap_tdb, offsetof(struct thread_struct, trap_tdb));
 	BLANK();
+	DEFINE(__FPU_fpc, offsetof(struct fpu, fpc));
+	DEFINE(__FPU_flags, offsetof(struct fpu, flags));
+	DEFINE(__FPU_regs, offsetof(struct fpu, regs));
+	BLANK();
 	DEFINE(__TI_task, offsetof(struct thread_info, task));
 	DEFINE(__TI_flags, offsetof(struct thread_info, flags));
 	DEFINE(__TI_sysc_table, offsetof(struct thread_info, sys_call_table));
diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c
index 452995137a69..0b46fd4aa31e 100644
--- a/arch/s390/kernel/compat_signal.c
+++ b/arch/s390/kernel/compat_signal.c
@@ -161,7 +161,6 @@ static void store_sigregs(void)
 static void load_sigregs(void)
 {
 	restore_access_regs(current->thread.acrs);
-	restore_fpu_regs(&current->thread.fpu);
 }
 
 static int save_sigregs32(struct pt_regs *regs, _sigregs32 __user *sregs)
@@ -287,6 +286,7 @@ COMPAT_SYSCALL_DEFINE0(sigreturn)
 	if (__copy_from_user(&set.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE32))
 		goto badframe;
 	set_current_blocked(&set);
+	save_fpu_regs(&current->thread.fpu);
 	if (restore_sigregs32(regs, &frame->sregs))
 		goto badframe;
 	if (restore_sigregs_ext32(regs, &frame->sregs_ext))
@@ -309,6 +309,7 @@ COMPAT_SYSCALL_DEFINE0(rt_sigreturn)
 	set_current_blocked(&set);
 	if (compat_restore_altstack(&frame->uc.uc_stack))
 		goto badframe;
+	save_fpu_regs(&current->thread.fpu);
 	if (restore_sigregs32(regs, &frame->uc.uc_mcontext))
 		goto badframe;
 	if (restore_sigregs_ext32(regs, &frame->uc.uc_mcontext_ext))
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 84062e7a77da..05ea485156ee 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -20,6 +20,8 @@
 #include <asm/page.h>
 #include <asm/sigp.h>
 #include <asm/irq.h>
+#include <asm/fpu-internal.h>
+#include <asm/vx-insn.h>
 
 __PT_R0      =	__PT_GPRS
 __PT_R1      =	__PT_GPRS + 8
@@ -46,10 +48,10 @@ _TIF_WORK	= (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
 		   _TIF_UPROBE)
 _TIF_TRACE	= (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \
 		   _TIF_SYSCALL_TRACEPOINT)
-_CIF_WORK	= (_CIF_MCCK_PENDING | _CIF_ASCE)
+_CIF_WORK	= (_CIF_MCCK_PENDING | _CIF_ASCE | _CIF_FPU)
 _PIF_WORK	= (_PIF_PER_TRAP)
 
-#define BASED(name) name-system_call(%r13)
+#define BASED(name) name-cleanup_critical(%r13)
 
 	.macro	TRACE_IRQS_ON
 #ifdef CONFIG_TRACE_IRQFLAGS
@@ -280,6 +282,8 @@ ENTRY(system_call)
 	jo	.Lsysc_sigpending
 	tm	__TI_flags+7(%r12),_TIF_NOTIFY_RESUME
 	jo	.Lsysc_notify_resume
+	tm	__LC_CPU_FLAGS+7,_CIF_FPU
+	jo	.Lsysc_vxrs
 	tm	__LC_CPU_FLAGS+7,_CIF_ASCE
 	jo	.Lsysc_uaccess
 	j	.Lsysc_return		# beware of critical section cleanup
@@ -306,6 +310,13 @@ ENTRY(system_call)
 	lctlg	%c1,%c1,__LC_USER_ASCE		# load primary asce
 	j	.Lsysc_return
 
+#
+# CIF_FPU is set, restore floating-point controls and floating-point registers.
+#
+.Lsysc_vxrs:
+	larl	%r14,.Lsysc_return
+	jg	load_fpu_regs
+
 #
 # _TIF_SIGPENDING is set, call do_signal
 #
@@ -405,7 +416,7 @@ ENTRY(pgm_check_handler)
 	stmg	%r8,%r15,__LC_SAVE_AREA_SYNC
 	lg	%r10,__LC_LAST_BREAK
 	lg	%r12,__LC_THREAD_INFO
-	larl	%r13,system_call
+	larl	%r13,cleanup_critical
 	lmg	%r8,%r9,__LC_PGM_OLD_PSW
 	HANDLE_SIE_INTERCEPT %r14,1
 	tmhh	%r8,0x0001		# test problem state bit
@@ -483,7 +494,7 @@ ENTRY(io_int_handler)
 	stmg	%r8,%r15,__LC_SAVE_AREA_ASYNC
 	lg	%r10,__LC_LAST_BREAK
 	lg	%r12,__LC_THREAD_INFO
-	larl	%r13,system_call
+	larl	%r13,cleanup_critical
 	lmg	%r8,%r9,__LC_IO_OLD_PSW
 	HANDLE_SIE_INTERCEPT %r14,2
 	SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT
@@ -587,6 +598,8 @@ ENTRY(io_int_handler)
 	jo	.Lio_sigpending
 	tm	__TI_flags+7(%r12),_TIF_NOTIFY_RESUME
 	jo	.Lio_notify_resume
+	tm	__LC_CPU_FLAGS+7,_CIF_FPU
+	jo	.Lio_vxrs
 	tm	__LC_CPU_FLAGS+7,_CIF_ASCE
 	jo	.Lio_uaccess
 	j	.Lio_return		# beware of critical section cleanup
@@ -608,6 +621,13 @@ ENTRY(io_int_handler)
 	lctlg	%c1,%c1,__LC_USER_ASCE		# load primary asce
 	j	.Lio_return
 
+#
+# CIF_FPU is set, restore floating-point controls and floating-point registers.
+#
+.Lio_vxrs:
+	larl	%r14,.Lio_return
+	jg	load_fpu_regs
+
 #
 # _TIF_NEED_RESCHED is set, call schedule
 #
@@ -652,7 +672,7 @@ ENTRY(ext_int_handler)
 	stmg	%r8,%r15,__LC_SAVE_AREA_ASYNC
 	lg	%r10,__LC_LAST_BREAK
 	lg	%r12,__LC_THREAD_INFO
-	larl	%r13,system_call
+	larl	%r13,cleanup_critical
 	lmg	%r8,%r9,__LC_EXT_OLD_PSW
 	HANDLE_SIE_INTERCEPT %r14,3
 	SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT
@@ -690,6 +710,121 @@ ENTRY(psw_idle)
 	br	%r14
 .Lpsw_idle_end:
 
+/* Store floating-point controls and floating-point or vector extension
+ * registers instead.  A critical section cleanup assures that the registers
+ * are stored even if interrupted for some other work.	The register %r2
+ * designates a struct fpu to store register contents.	If the specified
+ * structure does not contain a register save area, the register store is
+ * omitted (see also comments in arch_dup_task_struct()).
+ *
+ * The CIF_FPU flag is set in any case.  The CIF_FPU triggers a lazy restore
+ * of the register contents at system call or io return.
+ */
+ENTRY(save_fpu_regs)
+	tm	__LC_CPU_FLAGS+7,_CIF_FPU
+	bor	%r14
+	stfpc	__FPU_fpc(%r2)
+.Lsave_fpu_regs_fpc_end:
+	lg	%r3,__FPU_regs(%r2)
+	ltgr	%r3,%r3
+	jz	.Lsave_fpu_regs_done	  # no save area -> set CIF_FPU
+	tm	__FPU_flags+3(%r2),FPU_USE_VX
+	jz	.Lsave_fpu_regs_fp	  # no -> store FP regs
+.Lsave_fpu_regs_vx_low:
+	VSTM	%v0,%v15,0,%r3		  # vstm 0,15,0(3)
+.Lsave_fpu_regs_vx_high:
+	VSTM	%v16,%v31,256,%r3	  # vstm 16,31,256(3)
+	j	.Lsave_fpu_regs_done	  # -> set CIF_FPU flag
+.Lsave_fpu_regs_fp:
+	std	0,0(%r3)
+	std	1,8(%r3)
+	std	2,16(%r3)
+	std	3,24(%r3)
+	std	4,32(%r3)
+	std	5,40(%r3)
+	std	6,48(%r3)
+	std	7,56(%r3)
+	std	8,64(%r3)
+	std	9,72(%r3)
+	std	10,80(%r3)
+	std	11,88(%r3)
+	std	12,96(%r3)
+	std	13,104(%r3)
+	std	14,112(%r3)
+	std	15,120(%r3)
+.Lsave_fpu_regs_done:
+	oi	__LC_CPU_FLAGS+7,_CIF_FPU
+	br	%r14
+.Lsave_fpu_regs_end:
+
+/* Load floating-point controls and floating-point or vector extension
+ * registers.  A critical section cleanup assures that the register contents
+ * are loaded even if interrupted for some other work.	Depending on the saved
+ * FP/VX state, the vector-enablement control, CR0.46, is either set or cleared.
+ *
+ * There are special calling conventions to fit into sysc and io return work:
+ *	%r12:	__LC_THREAD_INFO
+ *	%r15:	<kernel stack>
+ * The function requires:
+ *	%r4 and __SF_EMPTY+32(%r15)
+ */
+load_fpu_regs:
+	tm	__LC_CPU_FLAGS+7,_CIF_FPU
+	bnor	%r14
+	lg	%r4,__TI_task(%r12)
+	la	%r4,__THREAD_fpu(%r4)
+	lfpc	__FPU_fpc(%r4)
+	stctg	%c0,%c0,__SF_EMPTY+32(%r15)	# store CR0
+	tm	__FPU_flags+3(%r4),FPU_USE_VX	# VX-enabled task ?
+	lg	%r4,__FPU_regs(%r4)		# %r4 <- reg save area
+	jz	.Lload_fpu_regs_fp_ctl		# -> no VX, load FP regs
+.Lload_fpu_regs_vx_ctl:
+	tm	__SF_EMPTY+32+5(%r15),2		# test VX control
+	jo	.Lload_fpu_regs_vx
+	oi	__SF_EMPTY+32+5(%r15),2		# set VX control
+	lctlg	%c0,%c0,__SF_EMPTY+32(%r15)
+.Lload_fpu_regs_vx:
+	VLM	%v0,%v15,0,%r4
+.Lload_fpu_regs_vx_high:
+	VLM	%v16,%v31,256,%r4
+	j	.Lload_fpu_regs_done
+.Lload_fpu_regs_fp_ctl:
+	tm	__SF_EMPTY+32+5(%r15),2		# test VX control
+	jz	.Lload_fpu_regs_fp
+	ni	__SF_EMPTY+32+5(%r15),253	# clear VX control
+	lctlg	%c0,%c0,__SF_EMPTY+32(%r15)
+.Lload_fpu_regs_fp:
+	ld	0,0(%r4)
+	ld	1,8(%r4)
+	ld	2,16(%r4)
+	ld	3,24(%r4)
+	ld	4,32(%r4)
+	ld	5,40(%r4)
+	ld	6,48(%r4)
+	ld	7,56(%r4)
+	ld	8,64(%r4)
+	ld	9,72(%r4)
+	ld	10,80(%r4)
+	ld	11,88(%r4)
+	ld	12,96(%r4)
+	ld	13,104(%r4)
+	ld	14,112(%r4)
+	ld	15,120(%r4)
+.Lload_fpu_regs_done:
+	ni	__LC_CPU_FLAGS+7,255-_CIF_FPU
+	br	%r14
+.Lload_fpu_regs_end:
+
+/* Test and set the vector enablement control in CR0.46 */
+ENTRY(__ctl_set_vx)
+	stctg	%c0,%c0,__SF_EMPTY(%r15)
+	tm	__SF_EMPTY+5(%r15),2
+	bor	%r14
+	oi	__SF_EMPTY+5(%r15),2
+	lctlg	%c0,%c0,__SF_EMPTY(%r15)
+	br	%r14
+.L__ctl_set_vx_end:
+
 .L__critical_end:
 
 /*
@@ -702,7 +837,7 @@ ENTRY(mcck_int_handler)
 	lmg	%r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)# revalidate gprs
 	lg	%r10,__LC_LAST_BREAK
 	lg	%r12,__LC_THREAD_INFO
-	larl	%r13,system_call
+	larl	%r13,cleanup_critical
 	lmg	%r8,%r9,__LC_MCK_OLD_PSW
 	HANDLE_SIE_INTERCEPT %r14,4
 	tm	__LC_MCCK_CODE,0x80	# system damage?
@@ -831,6 +966,12 @@ stack_overflow:
 	.quad	.Lio_done
 	.quad	psw_idle
 	.quad	.Lpsw_idle_end
+	.quad	save_fpu_regs
+	.quad	.Lsave_fpu_regs_end
+	.quad	load_fpu_regs
+	.quad	.Lload_fpu_regs_end
+	.quad	__ctl_set_vx
+	.quad	.L__ctl_set_vx_end
 
 cleanup_critical:
 	clg	%r9,BASED(.Lcleanup_table)	# system_call
@@ -853,6 +994,18 @@ cleanup_critical:
 	jl	0f
 	clg	%r9,BASED(.Lcleanup_table+72)	# .Lpsw_idle_end
 	jl	.Lcleanup_idle
+	clg	%r9,BASED(.Lcleanup_table+80)	# save_fpu_regs
+	jl	0f
+	clg	%r9,BASED(.Lcleanup_table+88)	# .Lsave_fpu_regs_end
+	jl	.Lcleanup_save_fpu_regs
+	clg	%r9,BASED(.Lcleanup_table+96)	# load_fpu_regs
+	jl	0f
+	clg	%r9,BASED(.Lcleanup_table+104)	# .Lload_fpu_regs_end
+	jl	.Lcleanup_load_fpu_regs
+	clg	%r9,BASED(.Lcleanup_table+112)	# __ctl_set_vx
+	jl	0f
+	clg	%r9,BASED(.Lcleanup_table+120)	# .L__ctl_set_vx_end
+	jl	.Lcleanup___ctl_set_vx
 0:	br	%r14
 
 
@@ -981,6 +1134,145 @@ cleanup_critical:
 .Lcleanup_idle_insn:
 	.quad	.Lpsw_idle_lpsw
 
+.Lcleanup_save_fpu_regs:
+	tm	__LC_CPU_FLAGS+7,_CIF_FPU
+	bor	%r14
+	clg	%r9,BASED(.Lcleanup_save_fpu_regs_done)
+	jhe	5f
+	clg	%r9,BASED(.Lcleanup_save_fpu_regs_fp)
+	jhe	4f
+	clg	%r9,BASED(.Lcleanup_save_fpu_regs_vx_high)
+	jhe	3f
+	clg	%r9,BASED(.Lcleanup_save_fpu_regs_vx_low)
+	jhe	2f
+	clg	%r9,BASED(.Lcleanup_save_fpu_fpc_end)
+	jhe	1f
+0:	# Store floating-point controls
+	stfpc	__FPU_fpc(%r2)
+1:	# Load register save area and check if VX is active
+	lg	%r3,__FPU_regs(%r2)
+	ltgr	%r3,%r3
+	jz	5f			  # no save area -> set CIF_FPU
+	tm	__FPU_flags+3(%r2),FPU_USE_VX
+	jz	4f			  # no VX -> store FP regs
+2:	# Store vector registers (V0-V15)
+	VSTM	%v0,%v15,0,%r3		  # vstm 0,15,0(3)
+3:	# Store vector registers (V16-V31)
+	VSTM	%v16,%v31,256,%r3	  # vstm 16,31,256(3)
+	j	5f			  # -> done, set CIF_FPU flag
+4:	# Store floating-point registers
+	std	0,0(%r3)
+	std	1,8(%r3)
+	std	2,16(%r3)
+	std	3,24(%r3)
+	std	4,32(%r3)
+	std	5,40(%r3)
+	std	6,48(%r3)
+	std	7,56(%r3)
+	std	8,64(%r3)
+	std	9,72(%r3)
+	std	10,80(%r3)
+	std	11,88(%r3)
+	std	12,96(%r3)
+	std	13,104(%r3)
+	std	14,112(%r3)
+	std	15,120(%r3)
+5:	# Set CIF_FPU flag
+	oi	__LC_CPU_FLAGS+7,_CIF_FPU
+	lg	%r9,48(%r11)		# return from save_fpu_regs
+	br	%r14
+.Lcleanup_save_fpu_fpc_end:
+	.quad	.Lsave_fpu_regs_fpc_end
+.Lcleanup_save_fpu_regs_vx_low:
+	.quad	.Lsave_fpu_regs_vx_low
+.Lcleanup_save_fpu_regs_vx_high:
+	.quad	.Lsave_fpu_regs_vx_high
+.Lcleanup_save_fpu_regs_fp:
+	.quad	.Lsave_fpu_regs_fp
+.Lcleanup_save_fpu_regs_done:
+	.quad	.Lsave_fpu_regs_done
+
+.Lcleanup_load_fpu_regs:
+	tm	__LC_CPU_FLAGS+7,_CIF_FPU
+	bnor	%r14
+	clg	%r9,BASED(.Lcleanup_load_fpu_regs_done)
+	jhe	1f
+	clg	%r9,BASED(.Lcleanup_load_fpu_regs_fp)
+	jhe	2f
+	clg	%r9,BASED(.Lcleanup_load_fpu_regs_fp_ctl)
+	jhe	3f
+	clg	%r9,BASED(.Lcleanup_load_fpu_regs_vx_high)
+	jhe	4f
+	clg	%r9,BASED(.Lcleanup_load_fpu_regs_vx)
+	jhe	5f
+	clg	%r9,BASED(.Lcleanup_load_fpu_regs_vx_ctl)
+	jhe	6f
+	lg	%r4,__TI_task(%r12)
+	la	%r4,__THREAD_fpu(%r4)
+	lfpc	__FPU_fpc(%r4)
+	tm	__FPU_flags+3(%r4),FPU_USE_VX	# VX-enabled task ?
+	lg	%r4,__FPU_regs(%r4)		# %r4 <- reg save area
+	jz	3f				# -> no VX, load FP regs
+6:	# Set VX-enablement control
+	stctg	%c0,%c0,__SF_EMPTY+32(%r15)	# store CR0
+	tm	__SF_EMPTY+32+5(%r15),2		# test VX control
+	jo	5f
+	oi	__SF_EMPTY+32+5(%r15),2		# set VX control
+	lctlg	%c0,%c0,__SF_EMPTY+32(%r15)
+5:	# Load V0 ..V15 registers
+	VLM	%v0,%v15,0,%r4
+4:	# Load V16..V31 registers
+	VLM	%v16,%v31,256,%r4
+	j	1f
+3:	# Clear VX-enablement control for FP
+	stctg	%c0,%c0,__SF_EMPTY+32(%r15)	# store CR0
+	tm	__SF_EMPTY+32+5(%r15),2		# test VX control
+	jz	2f
+	ni	__SF_EMPTY+32+5(%r15),253	# clear VX control
+	lctlg	%c0,%c0,__SF_EMPTY+32(%r15)
+2:	# Load floating-point registers
+	ld	0,0(%r4)
+	ld	1,8(%r4)
+	ld	2,16(%r4)
+	ld	3,24(%r4)
+	ld	4,32(%r4)
+	ld	5,40(%r4)
+	ld	6,48(%r4)
+	ld	7,56(%r4)
+	ld	8,64(%r4)
+	ld	9,72(%r4)
+	ld	10,80(%r4)
+	ld	11,88(%r4)
+	ld	12,96(%r4)
+	ld	13,104(%r4)
+	ld	14,112(%r4)
+	ld	15,120(%r4)
+1:	# Clear CIF_FPU bit
+	ni	__LC_CPU_FLAGS+7,255-_CIF_FPU
+	lg	%r9,48(%r11)		# return from load_fpu_regs
+	br	%r14
+.Lcleanup_load_fpu_regs_vx_ctl:
+	.quad	.Lload_fpu_regs_vx_ctl
+.Lcleanup_load_fpu_regs_vx:
+	.quad	.Lload_fpu_regs_vx
+.Lcleanup_load_fpu_regs_vx_high:
+	.quad	.Lload_fpu_regs_vx_high
+.Lcleanup_load_fpu_regs_fp_ctl:
+	.quad	.Lload_fpu_regs_fp_ctl
+.Lcleanup_load_fpu_regs_fp:
+	.quad	.Lload_fpu_regs_fp
+.Lcleanup_load_fpu_regs_done:
+	.quad	.Lload_fpu_regs_done
+
+.Lcleanup___ctl_set_vx:
+	stctg	%c0,%c0,__SF_EMPTY(%r15)
+	tm	__SF_EMPTY+5(%r15),2
+	bor	%r14
+	oi	__SF_EMPTY+5(%r15),2
+	lctlg	%c0,%c0,__SF_EMPTY(%r15)
+	lg	%r9,48(%r11)		# return from __ctl_set_vx
+	br	%r14
+
 /*
  * Integer constants
  */
@@ -1002,6 +1294,11 @@ ENTRY(sie64a)
 	stg	%r2,__SF_EMPTY(%r15)		# save control block pointer
 	stg	%r3,__SF_EMPTY+8(%r15)		# save guest register save area
 	xc	__SF_EMPTY+16(16,%r15),__SF_EMPTY+16(%r15) # host id & reason
+	tm	__LC_CPU_FLAGS+7,_CIF_FPU	# load guest fp/vx registers ?
+	jno	.Lsie_load_guest_gprs
+	lg	%r12,__LC_THREAD_INFO		# load fp/vx regs save area
+	brasl	%r14,load_fpu_regs		# load guest fp/vx regs
+.Lsie_load_guest_gprs:
 	lmg	%r0,%r13,0(%r3)			# load guest gprs 0-13
 	lg	%r14,__LC_GMAP			# get gmap pointer
 	ltgr	%r14,%r14
@@ -1012,6 +1309,8 @@ ENTRY(sie64a)
 	oi	__SIE_PROG0C+3(%r14),1		# we are going into SIE now
 	tm	__SIE_PROG20+3(%r14),3		# last exit...
 	jnz	.Lsie_done
+	tm	__LC_CPU_FLAGS+7,_CIF_FPU
+	jo	.Lsie_done			# exit if fp/vx regs changed
 	LPP	__SF_EMPTY(%r15)		# set guest id
 	sie	0(%r14)
 .Lsie_done:
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index e66141c6696a..cbdd94c8ba18 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -165,8 +165,12 @@ static int notrace s390_revalidate_registers(struct mci *mci)
 		cr0.val = S390_lowcore.cregs_save_area[0];
 		cr0.afp = cr0.vx = 1;
 		__ctl_load(cr0.val, 0, 0);
-		restore_vx_regs((__vector128 *)
-				&S390_lowcore.vector_save_area);
+		asm volatile(
+			"	la	1,%0\n"
+			"	.word	0xe70f,0x1000,0x0036\n"	/* vlm 0,15,0(1) */
+			"	.word	0xe70f,0x1100,0x0c36\n"	/* vlm 16,31,256(1) */
+			: : "Q" (*(struct vx_array *)
+				 &S390_lowcore.vector_save_area) : "1");
 		__ctl_load(S390_lowcore.cregs_save_area[0], 0, 0);
 	}
 	/* Revalidate access registers */
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 56949c9cda97..9cf0063f920e 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -90,16 +90,28 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
 	*dst = *src;
 
 	/* Set up a new floating-point register save area */
+	dst->thread.fpu.fpc = 0;
+	dst->thread.fpu.flags = 0;	/* Always start with VX disabled */
 	dst->thread.fpu.fprs = kzalloc(sizeof(freg_t) * __NUM_FPRS,
 				       GFP_KERNEL|__GFP_REPEAT);
 	if (!dst->thread.fpu.fprs)
 		return -ENOMEM;
 
-	/* Save the fpu registers to new thread structure. */
-	save_fp_ctl(&dst->thread.fpu.fpc);
-	save_fp_regs(dst->thread.fpu.fprs);
-	dst->thread.fpu.flags = 0;     /* Always start with VX disabled */
-
+	/*
+	 * Save the floating-point or vector register state of the current
+	 * task.  The state is not saved for early kernel threads, for example,
+	 * the init_task, which do not have an allocated save area.
+	 * The CIF_FPU flag is set in any case to lazy clear or restore a saved
+	 * state when switching to a different task or returning to user space.
+	 */
+	save_fpu_regs(&current->thread.fpu);
+	dst->thread.fpu.fpc = current->thread.fpu.fpc;
+	if (is_vx_task(current))
+		convert_vx_to_fp(dst->thread.fpu.fprs,
+				 current->thread.fpu.vxrs);
+	else
+		memcpy(dst->thread.fpu.fprs, current->thread.fpu.fprs,
+		       sizeof(freg_t) * __NUM_FPRS);
 	return 0;
 }
 
@@ -184,8 +196,15 @@ asmlinkage void execve_tail(void)
  */
 int dump_fpu (struct pt_regs * regs, s390_fp_regs *fpregs)
 {
-	save_fp_ctl(&fpregs->fpc);
-	save_fp_regs(fpregs->fprs);
+	save_fpu_regs(&current->thread.fpu);
+	fpregs->fpc = current->thread.fpu.fpc;
+	fpregs->pad = 0;
+	if (is_vx_task(current))
+		convert_vx_to_fp((freg_t *)&fpregs->fprs,
+				 current->thread.fpu.vxrs);
+	else
+		memcpy(&fpregs->fprs, current->thread.fpu.fprs,
+		       sizeof(fpregs->fprs));
 	return 1;
 }
 EXPORT_SYMBOL(dump_fpu);
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index 52e2e1dd919d..8c525880a3ff 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -45,39 +45,27 @@ void update_cr_regs(struct task_struct *task)
 	struct per_regs old, new;
 
 	/* Take care of the enable/disable of transactional execution. */
-	if (MACHINE_HAS_TE || MACHINE_HAS_VX) {
+	if (MACHINE_HAS_TE) {
 		unsigned long cr, cr_new;
 
 		__ctl_store(cr, 0, 0);
-		cr_new = cr;
-		if (MACHINE_HAS_TE) {
-			/* Set or clear transaction execution TXC bit 8. */
-			cr_new |= (1UL << 55);
-			if (task->thread.per_flags & PER_FLAG_NO_TE)
-				cr_new &= ~(1UL << 55);
-		}
-		if (MACHINE_HAS_VX) {
-			/* Enable/disable of vector extension */
-			cr_new &= ~(1UL << 17);
-			if (task->thread.fpu.vxrs)
-				cr_new |= (1UL << 17);
-		}
+		/* Set or clear transaction execution TXC bit 8. */
+		cr_new = cr | (1UL << 55);
+		if (task->thread.per_flags & PER_FLAG_NO_TE)
+			cr_new &= ~(1UL << 55);
 		if (cr_new != cr)
 			__ctl_load(cr_new, 0, 0);
-		if (MACHINE_HAS_TE) {
-			/* Set/clear transaction execution TDC bits 62/63. */
-			__ctl_store(cr, 2, 2);
-			cr_new = cr & ~3UL;
-			if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND) {
-				if (task->thread.per_flags &
-				    PER_FLAG_TE_ABORT_RAND_TEND)
-					cr_new |= 1UL;
-				else
-					cr_new |= 2UL;
-			}
-			if (cr_new != cr)
-				__ctl_load(cr_new, 2, 2);
+		/* Set or clear transaction execution TDC bits 62 and 63. */
+		__ctl_store(cr, 2, 2);
+		cr_new = cr & ~3UL;
+		if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND) {
+			if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND_TEND)
+				cr_new |= 1UL;
+			else
+				cr_new |= 2UL;
 		}
+		if (cr_new != cr)
+			__ctl_load(cr_new, 2, 2);
 	}
 	/* Copy user specified PER registers */
 	new.control = thread->per_user.control;
@@ -998,9 +986,6 @@ static int s390_fpregs_set(struct task_struct *target,
 	else
 		memcpy(target->thread.fpu.fprs, &fprs, sizeof(fprs));
 
-	if (target == current)
-		restore_fpu_regs(&target->thread.fpu);
-
 	return rc;
 }
 
@@ -1090,12 +1075,9 @@ static int s390_vxrs_low_set(struct task_struct *target,
 		save_fpu_regs(&target->thread.fpu);
 
 	rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1);
-	if (rc == 0) {
+	if (rc == 0)
 		for (i = 0; i < __NUM_VXRS_LOW; i++)
 			*((__u64 *)(target->thread.fpu.vxrs + i) + 1) = vxrs[i];
-		if (target == current)
-			restore_fpu_regs(&target->thread.fpu);
-	}
 
 	return rc;
 }
@@ -1137,9 +1119,6 @@ static int s390_vxrs_high_set(struct task_struct *target,
 
 	rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
 				target->thread.fpu.vxrs + __NUM_VXRS_LOW, 0, -1);
-	if (rc == 0 && target == current)
-		restore_vx_regs(target->thread.fpu.vxrs);
-
 	return rc;
 }
 
diff --git a/arch/s390/kernel/s390_ksyms.c b/arch/s390/kernel/s390_ksyms.c
index 9f60467938d1..5090d3dad10b 100644
--- a/arch/s390/kernel/s390_ksyms.c
+++ b/arch/s390/kernel/s390_ksyms.c
@@ -1,5 +1,6 @@
 #include <linux/module.h>
 #include <linux/kvm_host.h>
+#include <asm/fpu-internal.h>
 #include <asm/ftrace.h>
 
 #ifdef CONFIG_FUNCTION_TRACER
@@ -8,6 +9,8 @@ EXPORT_SYMBOL(_mcount);
 #if IS_ENABLED(CONFIG_KVM)
 EXPORT_SYMBOL(sie64a);
 EXPORT_SYMBOL(sie_exit);
+EXPORT_SYMBOL(save_fpu_regs);
+EXPORT_SYMBOL(__ctl_set_vx);
 #endif
 EXPORT_SYMBOL(memcpy);
 EXPORT_SYMBOL(memset);
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index 49c259cd5a33..2f4c7e2638c9 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -112,7 +112,6 @@ static void store_sigregs(void)
 static void load_sigregs(void)
 {
 	restore_access_regs(current->thread.acrs);
-	restore_fpu_regs(&current->thread.fpu);
 }
 
 /* Returns non-zero on fault. */
@@ -223,6 +222,7 @@ SYSCALL_DEFINE0(sigreturn)
 	if (__copy_from_user(&set.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE))
 		goto badframe;
 	set_current_blocked(&set);
+	save_fpu_regs(&current->thread.fpu);
 	if (restore_sigregs(regs, &frame->sregs))
 		goto badframe;
 	if (restore_sigregs_ext(regs, &frame->sregs_ext))
@@ -246,6 +246,7 @@ SYSCALL_DEFINE0(rt_sigreturn)
 	set_current_blocked(&set);
 	if (restore_altstack(&frame->uc.uc_stack))
 		goto badframe;
+	save_fpu_regs(&current->thread.fpu);
 	if (restore_sigregs(regs, &frame->uc.uc_mcontext))
 		goto badframe;
 	if (restore_sigregs_ext(regs, &frame->uc.uc_mcontext_ext))
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index 7b09224c05a3..76f76932ccb9 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -151,7 +151,7 @@ DO_ERROR_INFO(special_op_exception, SIGILL, ILL_ILLOPN,
 DO_ERROR_INFO(transaction_exception, SIGILL, ILL_ILLOPN,
 	      "transaction constraint exception")
 
-static inline void do_fp_trap(struct pt_regs *regs, int fpc)
+static inline void do_fp_trap(struct pt_regs *regs, __u32 fpc)
 {
 	int si_code = 0;
 	/* FPC[2] is Data Exception Code */
@@ -236,17 +236,13 @@ int alloc_vector_registers(struct task_struct *tsk)
 		return -ENOMEM;
 	preempt_disable();
 	if (tsk == current)
-		save_fp_regs(tsk->thread.fpu.fprs);
+		save_fpu_regs(&tsk->thread.fpu);
 	/* Copy the 16 floating point registers */
 	convert_fp_to_vx(vxrs, tsk->thread.fpu.fprs);
 	fprs = tsk->thread.fpu.fprs;
 	tsk->thread.fpu.vxrs = vxrs;
 	tsk->thread.fpu.flags |= FPU_USE_VX;
 	kfree(fprs);
-	if (tsk == current) {
-		__ctl_set_bit(0, 17);
-		restore_vx_regs(vxrs);
-	}
 	preempt_enable();
 	return 0;
 }
@@ -261,7 +257,7 @@ void vector_exception(struct pt_regs *regs)
 	}
 
 	/* get vector interrupt code from fpc */
-	asm volatile("stfpc %0" : "=Q" (current->thread.fpu.fpc));
+	save_fpu_regs(&current->thread.fpu);
 	vic = (current->thread.fpu.fpc & 0xf00) >> 8;
 	switch (vic) {
 	case 1: /* invalid vector operation */
@@ -299,7 +295,7 @@ void data_exception(struct pt_regs *regs)
 
 	location = get_trap_ip(regs);
 
-	asm volatile("stfpc %0" : "=Q" (current->thread.fpu.fpc));
+	save_fpu_regs(&current->thread.fpu);
 	/* Check for vector register enablement */
 	if (MACHINE_HAS_VX && !is_vx_task(current) &&
 	    (current->thread.fpu.fpc & FPC_DXC_MASK) == 0xfe00) {
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index fc7bc7118b23..c0cceaf4a92e 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -1198,27 +1198,54 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
+/*
+ * Backs up the current FP/VX register save area on a particular
+ * destination.  Used to switch between different register save
+ * areas.
+ */
+static inline void save_fpu_to(struct fpu *dst)
+{
+	dst->fpc = current->thread.fpu.fpc;
+	dst->flags = current->thread.fpu.flags;
+	dst->regs = current->thread.fpu.regs;
+}
+
+/*
+ * Switches the FP/VX register save area from which to lazy
+ * restore register contents.
+ */
+static inline void load_fpu_from(struct fpu *from)
+{
+	current->thread.fpu.fpc = from->fpc;
+	current->thread.fpu.flags = from->flags;
+	current->thread.fpu.regs = from->regs;
+}
+
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
-	__u32 fpc;
+	/* Save host register state */
+	save_fpu_regs(&current->thread.fpu);
+	save_fpu_to(&vcpu->arch.host_fpregs);
 
-	save_fp_ctl(&vcpu->arch.host_fpregs.fpc);
-	if (test_kvm_facility(vcpu->kvm, 129))
-		save_vx_regs((__vector128 *)&vcpu->arch.host_vregs->vrs);
-	else
-		save_fp_regs(vcpu->arch.host_fpregs.fprs);
-	save_access_regs(vcpu->arch.host_acrs);
 	if (test_kvm_facility(vcpu->kvm, 129)) {
-		fpc = vcpu->run->s.regs.fpc;
-		restore_vx_regs((__vector128 *)&vcpu->run->s.regs.vrs);
-	} else {
-		fpc = vcpu->arch.guest_fpregs.fpc;
-		restore_fp_regs(vcpu->arch.guest_fpregs.fprs);
-	}
-	if (test_fp_ctl(fpc))
+		current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
+		current->thread.fpu.flags = FPU_USE_VX;
+		/*
+		 * Use the register save area in the SIE-control block
+		 * for register restore and save in kvm_arch_vcpu_put()
+		 */
+		current->thread.fpu.vxrs =
+			(__vector128 *)&vcpu->run->s.regs.vrs;
+		/* Always enable the vector extension for KVM */
+		__ctl_set_vx();
+	} else
+		load_fpu_from(&vcpu->arch.guest_fpregs);
+
+	if (test_fp_ctl(current->thread.fpu.fpc))
 		/* User space provided an invalid FPC, let's clear it */
-		fpc = 0;
-	restore_fp_ctl(&fpc);
+		current->thread.fpu.fpc = 0;
+
+	save_access_regs(vcpu->arch.host_acrs);
 	restore_access_regs(vcpu->run->s.regs.acrs);
 	gmap_enable(vcpu->arch.gmap);
 	atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
@@ -1228,19 +1255,22 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 {
 	atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
 	gmap_disable(vcpu->arch.gmap);
-	if (test_kvm_facility(vcpu->kvm, 129)) {
-		save_fp_ctl(&vcpu->run->s.regs.fpc);
-		save_vx_regs((__vector128 *)&vcpu->run->s.regs.vrs);
-	} else {
-		save_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
-		save_fp_regs(vcpu->arch.guest_fpregs.fprs);
-	}
-	save_access_regs(vcpu->run->s.regs.acrs);
-	restore_fp_ctl(&vcpu->arch.host_fpregs.fpc);
+
+	save_fpu_regs(&current->thread.fpu);
+
 	if (test_kvm_facility(vcpu->kvm, 129))
-		restore_vx_regs((__vector128 *)&vcpu->arch.host_vregs->vrs);
+		/*
+		 * kvm_arch_vcpu_load() set up the register save area to
+		 * the &vcpu->run->s.regs.vrs and, thus, the vector registers
+		 * are already saved.  Only the floating-point control must be
+		 * copied.
+		 */
+		vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
 	else
-		restore_fp_regs(vcpu->arch.host_fpregs.fprs);
+		save_fpu_to(&vcpu->arch.guest_fpregs);
+	load_fpu_from(&vcpu->arch.host_fpregs);
+
+	save_access_regs(vcpu->run->s.regs.acrs);
 	restore_access_regs(vcpu->arch.host_acrs);
 }
 
@@ -1383,7 +1413,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
 
 	vcpu->arch.sie_block = &sie_page->sie_block;
 	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
-	vcpu->arch.host_vregs = &sie_page->vregs;
 
 	vcpu->arch.sie_block->icpua = id;
 	if (!kvm_is_ucontrol(kvm)) {
@@ -1405,6 +1434,19 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
 	vcpu->arch.local_int.wq = &vcpu->wq;
 	vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
 
+	/*
+	 * Allocate a save area for floating-point registers.  If the vector
+	 * extension is available, register contents are saved in the SIE
+	 * control block.  The allocated save area is still required in
+	 * particular places, for example, in kvm_s390_vcpu_store_status().
+	 */
+	vcpu->arch.guest_fpregs.fprs = kzalloc(sizeof(freg_t) * __NUM_FPRS,
+					       GFP_KERNEL);
+	if (!vcpu->arch.guest_fpregs.fprs) {
+		rc = -ENOMEM;
+		goto out_free_sie_block;
+	}
+
 	rc = kvm_vcpu_init(vcpu, kvm, id);
 	if (rc)
 		goto out_free_sie_block;
@@ -1627,16 +1669,16 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 {
 	if (test_fp_ctl(fpu->fpc))
 		return -EINVAL;
-	memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
+	memcpy(vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
 	vcpu->arch.guest_fpregs.fpc = fpu->fpc;
-	restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
-	restore_fp_regs(vcpu->arch.guest_fpregs.fprs);
+	save_fpu_regs(&current->thread.fpu);
+	load_fpu_from(&vcpu->arch.guest_fpregs);
 	return 0;
 }
 
 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 {
-	memcpy(&fpu->fprs, &vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
+	memcpy(&fpu->fprs, vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
 	fpu->fpc = vcpu->arch.guest_fpregs.fpc;
 	return 0;
 }
@@ -2199,8 +2241,21 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
 	 * copying in vcpu load/put. Lets update our copies before we save
 	 * it into the save area
 	 */
-	save_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
-	save_fp_regs(vcpu->arch.guest_fpregs.fprs);
+	save_fpu_regs(&current->thread.fpu);
+	if (test_kvm_facility(vcpu->kvm, 129)) {
+		/*
+		 * If the vector extension is available, the vector registers
+		 * which overlaps with floating-point registers are saved in
+		 * the SIE-control block.  Hence, extract the floating-point
+		 * registers and the FPC value and store them in the
+		 * guest_fpregs structure.
+		 */
+		WARN_ON(!is_vx_task(current));	  /* XXX remove later */
+		vcpu->arch.guest_fpregs.fpc = current->thread.fpu.fpc;
+		convert_vx_to_fp(vcpu->arch.guest_fpregs.fprs,
+				 current->thread.fpu.vxrs);
+	} else
+		save_fpu_to(&vcpu->arch.guest_fpregs);
 	save_access_regs(vcpu->run->s.regs.acrs);
 
 	return kvm_s390_store_status_unloaded(vcpu, addr);
@@ -2227,10 +2282,13 @@ int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr)
 
 	/*
 	 * The guest VXRS are in the host VXRs due to the lazy
-	 * copying in vcpu load/put. Let's update our copies before we save
-	 * it into the save area.
+	 * copying in vcpu load/put. We can simply call save_fpu_regs()
+	 * to save the current register state because we are in the
+	 * middle of a load/put cycle.
+	 *
+	 * Let's update our copies before we save it into the save area.
 	 */
-	save_vx_regs((__vector128 *)&vcpu->run->s.regs.vrs);
+	save_fpu_regs(&current->thread.fpu);
 
 	return kvm_s390_store_adtl_status_unloaded(vcpu, addr);
 }
-- 
cgit v1.2.3


From 8f00b3e28f73e712a2f82a15f66acd852f60e3ba Mon Sep 17 00:00:00 2001
From: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Date: Thu, 19 Feb 2015 12:22:02 +0100
Subject: s390/module: enable generic CPU feature modalias using s390 ELF
 hwcaps

Add support for the generic CPU feature modalias implementation that wires
up optional CPU features to udev-based module autoprobing.

The <asm/cpufeature.h> file provides definitions to map CPU features to
s390 ELF hardware capabilities.

Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/Kconfig                  |  1 +
 arch/s390/include/asm/cpufeature.h | 29 +++++++++++++++++++++++++++++
 arch/s390/kernel/processor.c       |  9 +++++++++
 arch/s390/kernel/setup.c           |  2 +-
 4 files changed, 40 insertions(+), 1 deletion(-)
 create mode 100644 arch/s390/include/asm/cpufeature.h

(limited to 'arch/s390')

diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index b06dc3839268..91e8954f1237 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -105,6 +105,7 @@ config S390
 	select CLONE_BACKWARDS2
 	select DYNAMIC_FTRACE if FUNCTION_TRACER
 	select GENERIC_CLOCKEVENTS
+	select GENERIC_CPU_AUTOPROBE
 	select GENERIC_CPU_DEVICES if !SMP
 	select GENERIC_FIND_FIRST_BIT
 	select GENERIC_SMP_IDLE_THREAD
diff --git a/arch/s390/include/asm/cpufeature.h b/arch/s390/include/asm/cpufeature.h
new file mode 100644
index 000000000000..fa7e69b7c299
--- /dev/null
+++ b/arch/s390/include/asm/cpufeature.h
@@ -0,0 +1,29 @@
+/*
+ * Module interface for CPU features
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+
+#ifndef __ASM_S390_CPUFEATURE_H
+#define __ASM_S390_CPUFEATURE_H
+
+#include <asm/elf.h>
+
+/* Hardware features on Linux on z Systems are indicated by facility bits that
+ * are mapped to the so-called machine flags.  Particular machine flags are
+ * then used to define ELF hardware capabilities; most notably hardware flags
+ * that are essential for user space / glibc.
+ *
+ * Restrict the set of exposed CPU features to ELF hardware capabilities for
+ * now.  Additional machine flags can be indicated by values larger than
+ * MAX_ELF_HWCAP_FEATURES.
+ */
+#define MAX_ELF_HWCAP_FEATURES	(8 * sizeof(elf_hwcap))
+#define MAX_CPU_FEATURES	MAX_ELF_HWCAP_FEATURES
+
+#define cpu_feature(feat)	ilog2(HWCAP_S390_ ## feat)
+
+int cpu_have_feature(unsigned int nr);
+
+#endif /* __ASM_S390_CPUFEATURE_H */
diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c
index dc488e13b7e3..e6e077ae3990 100644
--- a/arch/s390/kernel/processor.c
+++ b/arch/s390/kernel/processor.c
@@ -40,6 +40,15 @@ void cpu_init(void)
 	enter_lazy_tlb(&init_mm, current);
 }
 
+/*
+ * cpu_have_feature - Test CPU features on module initialization
+ */
+int cpu_have_feature(unsigned int num)
+{
+	return elf_hwcap & (1UL << num);
+}
+EXPORT_SYMBOL(cpu_have_feature);
+
 /*
  * show_cpuinfo - Get information on one CPU for use by procfs.
  */
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index ca070d260af2..85a1d4770c9c 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -76,7 +76,7 @@ EXPORT_SYMBOL(console_devno);
 unsigned int console_irq = -1;
 EXPORT_SYMBOL(console_irq);
 
-unsigned long elf_hwcap = 0;
+unsigned long elf_hwcap __read_mostly = 0;
 char elf_platform[ELF_PLATFORM_SIZE];
 
 int __initdata memory_end_set;
-- 
cgit v1.2.3


From d05377c12ae2ac88e747a28ae1e23f556d549592 Mon Sep 17 00:00:00 2001
From: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Date: Thu, 19 Feb 2015 17:34:07 +0100
Subject: s390/crypto: add cpu feature modaliases for crypto modules

Use the module_cpu_feature_match() module init function to add an
module alias based on required CPU features.   The modules are
automatically loaded on hardware that supports the required CPU features.

Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/crypto/aes_s390.c    | 3 ++-
 arch/s390/crypto/des_s390.c    | 3 ++-
 arch/s390/crypto/ghash_s390.c  | 3 ++-
 arch/s390/crypto/prng.c        | 4 ++--
 arch/s390/crypto/sha1_s390.c   | 3 ++-
 arch/s390/crypto/sha256_s390.c | 3 ++-
 arch/s390/crypto/sha512_s390.c | 3 ++-
 7 files changed, 14 insertions(+), 8 deletions(-)

(limited to 'arch/s390')

diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c
index 5566ce80abdb..0b9b95f3c703 100644
--- a/arch/s390/crypto/aes_s390.c
+++ b/arch/s390/crypto/aes_s390.c
@@ -24,6 +24,7 @@
 #include <crypto/algapi.h>
 #include <linux/err.h>
 #include <linux/module.h>
+#include <linux/cpufeature.h>
 #include <linux/init.h>
 #include <linux/spinlock.h>
 #include "crypt_s390.h"
@@ -976,7 +977,7 @@ static void __exit aes_s390_fini(void)
 	crypto_unregister_alg(&aes_alg);
 }
 
-module_init(aes_s390_init);
+module_cpu_feature_match(MSA, aes_s390_init);
 module_exit(aes_s390_fini);
 
 MODULE_ALIAS_CRYPTO("aes-all");
diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c
index 9e05cc453a40..fba1c10a2dd0 100644
--- a/arch/s390/crypto/des_s390.c
+++ b/arch/s390/crypto/des_s390.c
@@ -16,6 +16,7 @@
 
 #include <linux/init.h>
 #include <linux/module.h>
+#include <linux/cpufeature.h>
 #include <linux/crypto.h>
 #include <crypto/algapi.h>
 #include <crypto/des.h>
@@ -616,7 +617,7 @@ static void __exit des_s390_exit(void)
 	crypto_unregister_alg(&des_alg);
 }
 
-module_init(des_s390_init);
+module_cpu_feature_match(MSA, des_s390_init);
 module_exit(des_s390_exit);
 
 MODULE_ALIAS_CRYPTO("des");
diff --git a/arch/s390/crypto/ghash_s390.c b/arch/s390/crypto/ghash_s390.c
index b258110da952..26e14efd30a7 100644
--- a/arch/s390/crypto/ghash_s390.c
+++ b/arch/s390/crypto/ghash_s390.c
@@ -9,6 +9,7 @@
 
 #include <crypto/internal/hash.h>
 #include <linux/module.h>
+#include <linux/cpufeature.h>
 
 #include "crypt_s390.h"
 
@@ -158,7 +159,7 @@ static void __exit ghash_mod_exit(void)
 	crypto_unregister_shash(&ghash_alg);
 }
 
-module_init(ghash_mod_init);
+module_cpu_feature_match(MSA, ghash_mod_init);
 module_exit(ghash_mod_exit);
 
 MODULE_ALIAS_CRYPTO("ghash");
diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c
index 9d5192c94963..b8045b97f4fb 100644
--- a/arch/s390/crypto/prng.c
+++ b/arch/s390/crypto/prng.c
@@ -17,6 +17,7 @@
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/mutex.h>
+#include <linux/cpufeature.h>
 #include <linux/random.h>
 #include <linux/slab.h>
 #include <asm/debug.h>
@@ -914,6 +915,5 @@ static void __exit prng_exit(void)
 	}
 }
 
-
-module_init(prng_init);
+module_cpu_feature_match(MSA, prng_init);
 module_exit(prng_exit);
diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c
index 5b2bee323694..9208eadae9f0 100644
--- a/arch/s390/crypto/sha1_s390.c
+++ b/arch/s390/crypto/sha1_s390.c
@@ -26,6 +26,7 @@
 #include <crypto/internal/hash.h>
 #include <linux/init.h>
 #include <linux/module.h>
+#include <linux/cpufeature.h>
 #include <crypto/sha.h>
 
 #include "crypt_s390.h"
@@ -100,7 +101,7 @@ static void __exit sha1_s390_fini(void)
 	crypto_unregister_shash(&alg);
 }
 
-module_init(sha1_s390_init);
+module_cpu_feature_match(MSA, sha1_s390_init);
 module_exit(sha1_s390_fini);
 
 MODULE_ALIAS_CRYPTO("sha1");
diff --git a/arch/s390/crypto/sha256_s390.c b/arch/s390/crypto/sha256_s390.c
index b74ff158108c..667888f5c964 100644
--- a/arch/s390/crypto/sha256_s390.c
+++ b/arch/s390/crypto/sha256_s390.c
@@ -16,6 +16,7 @@
 #include <crypto/internal/hash.h>
 #include <linux/init.h>
 #include <linux/module.h>
+#include <linux/cpufeature.h>
 #include <crypto/sha.h>
 
 #include "crypt_s390.h"
@@ -140,7 +141,7 @@ static void __exit sha256_s390_fini(void)
 	crypto_unregister_shash(&sha256_alg);
 }
 
-module_init(sha256_s390_init);
+module_cpu_feature_match(MSA, sha256_s390_init);
 module_exit(sha256_s390_fini);
 
 MODULE_ALIAS_CRYPTO("sha256");
diff --git a/arch/s390/crypto/sha512_s390.c b/arch/s390/crypto/sha512_s390.c
index 0c36989ba182..2ba66b1518f0 100644
--- a/arch/s390/crypto/sha512_s390.c
+++ b/arch/s390/crypto/sha512_s390.c
@@ -18,6 +18,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/cpufeature.h>
 
 #include "sha.h"
 #include "crypt_s390.h"
@@ -148,7 +149,7 @@ static void __exit fini(void)
 	crypto_unregister_shash(&sha384_alg);
 }
 
-module_init(init);
+module_cpu_feature_match(MSA, init);
 module_exit(fini);
 
 MODULE_LICENSE("GPL");
-- 
cgit v1.2.3


From dcd2a9aaa0e0331ba0c4d7a64830788f22f26aa4 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Mon, 22 Jun 2015 17:14:14 +0200
Subject: s390/kvm: fix interrupt race with HANDLE_SIE_INTERCEPT

The HANDLE_SIE_INTERCEPT macro is used in the interrupt handlers
and the program check handler to undo a few changes done by sie64a.
Among them are guest vs host LPP, the gmap ASCE vs kernel ASCE and
the bit that indicates that SIE is currently running on the CPU.

There is a race of a voluntary SIE exit vs asynchronous interrupts.
If the CPU completed the SIE instruction and the TM instruction of
the LPP macro at the time it receives an interrupt, the interrupt
handler will run while the LPP, the ASCE and the SIE bit are still
set up for guest execution. This might result in wrong sampling data,
but it will not cause data corruption or lockups.

The critical section in sie64a needs to be enlarged to include all
instructions that undo the changes required for guest execution.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/entry.S | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

(limited to 'arch/s390')

diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 05ea485156ee..391db6412b85 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -90,14 +90,7 @@ _PIF_WORK	= (_PIF_PER_TRAP)
 	lgr	\scratch,%r9
 	slg	\scratch,BASED(.Lsie_critical)
 	clg	\scratch,BASED(.Lsie_critical_length)
-	.if	\reason==1
-	# Some program interrupts are suppressing (e.g. protection).
-	# We must also check the instruction after SIE in that case.
-	# do_protection_exception will rewind to .Lrewind_pad
-	jh	.+42
-	.else
 	jhe	.+42
-	.endif
 	lg	%r14,__SF_EMPTY(%r15)		# get control block pointer
 	LPP	__SF_EMPTY+16(%r15)		# set host id
 	ni	__SIE_PROG0C+3(%r14),0xfe	# no longer in SIE
@@ -1308,15 +1301,16 @@ ENTRY(sie64a)
 	lg	%r14,__SF_EMPTY(%r15)		# get control block pointer
 	oi	__SIE_PROG0C+3(%r14),1		# we are going into SIE now
 	tm	__SIE_PROG20+3(%r14),3		# last exit...
-	jnz	.Lsie_done
+	jnz	.Lsie_skip
 	tm	__LC_CPU_FLAGS+7,_CIF_FPU
-	jo	.Lsie_done			# exit if fp/vx regs changed
+	jo	.Lsie_skip			# exit if fp/vx regs changed
 	LPP	__SF_EMPTY(%r15)		# set guest id
 	sie	0(%r14)
-.Lsie_done:
 	LPP	__SF_EMPTY+16(%r15)		# set host id
+.Lsie_skip:
 	ni	__SIE_PROG0C+3(%r14),0xfe	# no longer in SIE
 	lctlg	%c1,%c1,__LC_USER_ASCE		# load primary asce
+.Lsie_done:
 # some program checks are suppressing. C code (e.g. do_protection_exception)
 # will rewind the PSW by the ILC, which is 4 bytes in case of SIE. Other
 # instructions between sie64a and .Lsie_done should not cause program
-- 
cgit v1.2.3


From d0fc41071a6884d0a10077bb6dc87f9267f32dd6 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Mon, 22 Jun 2015 17:26:40 +0200
Subject: s390/kvm: integrate HANDLE_SIE_INTERCEPT into cleanup_critical

Currently there are two mechanisms to deal with cleanup work due to
interrupts. The HANDLE_SIE_INTERCEPT macro is used to undo the changes
required to enter SIE in sie64a. If the SIE instruction causes a program
check, or an asynchronous interrupt is received the HANDLE_SIE_INTERCEPT
code forwards the program execution to sie_exit.

All the other critical sections in entry.S are handled by the code in
cleanup_critical that is called by the SWITCH_ASYNC macro.

Move the sie64a function to the beginning of the critical section and
add the code from HANDLE_SIE_INTERCEPT to cleanup_critical. Add a special
case for the sie64a cleanup to the program check handler.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/entry.S | 241 ++++++++++++++++++++++++-----------------------
 1 file changed, 123 insertions(+), 118 deletions(-)

(limited to 'arch/s390')

diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 391db6412b85..9406e7a62d2f 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -75,31 +75,6 @@ _PIF_WORK	= (_PIF_PER_TRAP)
 #endif
 	.endm
 
-	.macro LPP newpp
-#if IS_ENABLED(CONFIG_KVM)
-	tm	__LC_MACHINE_FLAGS+6,0x20	# MACHINE_FLAG_LPP
-	jz	.+8
-	.insn	s,0xb2800000,\newpp
-#endif
-	.endm
-
-	.macro	HANDLE_SIE_INTERCEPT scratch,reason
-#if IS_ENABLED(CONFIG_KVM)
-	tmhh	%r8,0x0001		# interrupting from user ?
-	jnz	.+62
-	lgr	\scratch,%r9
-	slg	\scratch,BASED(.Lsie_critical)
-	clg	\scratch,BASED(.Lsie_critical_length)
-	jhe	.+42
-	lg	%r14,__SF_EMPTY(%r15)		# get control block pointer
-	LPP	__SF_EMPTY+16(%r15)		# set host id
-	ni	__SIE_PROG0C+3(%r14),0xfe	# no longer in SIE
-	lctlg	%c1,%c1,__LC_USER_ASCE		# load primary asce
-	larl	%r9,sie_exit			# skip forward to sie_exit
-	mvi	__SF_EMPTY+31(%r15),\reason	# set exit reason
-#endif
-	.endm
-
 	.macro	CHECK_STACK stacksize,savearea
 #ifdef CONFIG_CHECK_STACK
 	tml	%r15,\stacksize - CONFIG_STACK_GUARD
@@ -192,6 +167,70 @@ ENTRY(__switch_to)
 	br	%r14
 
 .L__critical_start:
+
+#if IS_ENABLED(CONFIG_KVM)
+/*
+ * sie64a calling convention:
+ * %r2 pointer to sie control block
+ * %r3 guest register save area
+ */
+ENTRY(sie64a)
+	stmg	%r6,%r14,__SF_GPRS(%r15)	# save kernel registers
+	stg	%r2,__SF_EMPTY(%r15)		# save control block pointer
+	stg	%r3,__SF_EMPTY+8(%r15)		# save guest register save area
+	xc	__SF_EMPTY+16(16,%r15),__SF_EMPTY+16(%r15) # host id & reason
+	tm	__LC_CPU_FLAGS+7,_CIF_FPU	# load guest fp/vx registers ?
+	jno	.Lsie_load_guest_gprs
+	lg	%r12,__LC_THREAD_INFO		# load fp/vx regs save area
+	brasl	%r14,load_fpu_regs		# load guest fp/vx regs
+.Lsie_load_guest_gprs:
+	lmg	%r0,%r13,0(%r3)			# load guest gprs 0-13
+	lg	%r14,__LC_GMAP			# get gmap pointer
+	ltgr	%r14,%r14
+	jz	.Lsie_gmap
+	lctlg	%c1,%c1,__GMAP_ASCE(%r14)	# load primary asce
+.Lsie_gmap:
+	lg	%r14,__SF_EMPTY(%r15)		# get control block pointer
+	oi	__SIE_PROG0C+3(%r14),1		# we are going into SIE now
+	tm	__SIE_PROG20+3(%r14),3		# last exit...
+	jnz	.Lsie_skip
+	tm	__LC_CPU_FLAGS+7,_CIF_FPU
+	jo	.Lsie_skip			# exit if fp/vx regs changed
+	tm	__LC_MACHINE_FLAGS+6,0x20	# MACHINE_FLAG_LPP
+	jz	.Lsie_enter
+	.insn	s,0xb2800000,__SF_EMPTY(%r15)	# set guest id
+.Lsie_enter:
+	sie	0(%r14)
+	tm	__LC_MACHINE_FLAGS+6,0x20	# MACHINE_FLAG_LPP
+	jz	.Lsie_skip
+	.insn	s,0xb2800000,__SF_EMPTY+16(%r15)# set host id
+.Lsie_skip:
+	ni	__SIE_PROG0C+3(%r14),0xfe	# no longer in SIE
+	lctlg	%c1,%c1,__LC_USER_ASCE		# load primary asce
+.Lsie_done:
+# some program checks are suppressing. C code (e.g. do_protection_exception)
+# will rewind the PSW by the ILC, which is 4 bytes in case of SIE. Other
+# instructions between sie64a and .Lsie_done should not cause program
+# interrupts. So lets use a nop (47 00 00 00) as a landing pad.
+# See also .Lcleanup_sie
+.Lrewind_pad:
+	nop	0
+	.globl sie_exit
+sie_exit:
+	lg	%r14,__SF_EMPTY+8(%r15)		# load guest register save area
+	stmg	%r0,%r13,0(%r14)		# save guest gprs 0-13
+	lmg	%r6,%r14,__SF_GPRS(%r15)	# restore kernel registers
+	lg	%r2,__SF_EMPTY+24(%r15)		# return exit reason code
+	br	%r14
+.Lsie_fault:
+	lghi	%r14,-EFAULT
+	stg	%r14,__SF_EMPTY+24(%r15)	# set exit reason code
+	j	sie_exit
+
+	EX_TABLE(.Lrewind_pad,.Lsie_fault)
+	EX_TABLE(sie_exit,.Lsie_fault)
+#endif
+
 /*
  * SVC interrupt handler routine. System calls are synchronous events and
  * are executed with interrupts enabled.
@@ -411,26 +450,33 @@ ENTRY(pgm_check_handler)
 	lg	%r12,__LC_THREAD_INFO
 	larl	%r13,cleanup_critical
 	lmg	%r8,%r9,__LC_PGM_OLD_PSW
-	HANDLE_SIE_INTERCEPT %r14,1
 	tmhh	%r8,0x0001		# test problem state bit
-	jnz	1f			# -> fault in user space
-	tmhh	%r8,0x4000		# PER bit set in old PSW ?
-	jnz	0f			# -> enabled, can't be a double fault
+	jnz	2f			# -> fault in user space
+#if IS_ENABLED(CONFIG_KVM)
+	# cleanup critical section for sie64a
+	lgr	%r14,%r9
+	slg	%r14,BASED(.Lsie_critical_start)
+	clg	%r14,BASED(.Lsie_critical_length)
+	jhe	0f
+	brasl	%r14,.Lcleanup_sie
+#endif
+0:	tmhh	%r8,0x4000		# PER bit set in old PSW ?
+	jnz	1f			# -> enabled, can't be a double fault
 	tm	__LC_PGM_ILC+3,0x80	# check for per exception
 	jnz	.Lpgm_svcper		# -> single stepped svc
-0:	CHECK_STACK STACK_SIZE,__LC_SAVE_AREA_SYNC
+1:	CHECK_STACK STACK_SIZE,__LC_SAVE_AREA_SYNC
 	aghi	%r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
-	j	2f
-1:	UPDATE_VTIME %r14,__LC_SYNC_ENTER_TIMER
+	j	3f
+2:	UPDATE_VTIME %r14,__LC_SYNC_ENTER_TIMER
 	LAST_BREAK %r14
 	lg	%r15,__LC_KERNEL_STACK
 	lg	%r14,__TI_task(%r12)
 	aghi	%r14,__TASK_thread	# pointer to thread_struct
 	lghi	%r13,__LC_PGM_TDB
 	tm	__LC_PGM_ILC+2,0x02	# check for transaction abort
-	jz	2f
+	jz	3f
 	mvc	__THREAD_trap_tdb(256,%r14),0(%r13)
-2:	la	%r11,STACK_FRAME_OVERHEAD(%r15)
+3:	la	%r11,STACK_FRAME_OVERHEAD(%r15)
 	stmg	%r0,%r7,__PT_R0(%r11)
 	mvc	__PT_R8(64,%r11),__LC_SAVE_AREA_SYNC
 	stmg	%r8,%r9,__PT_PSW(%r11)
@@ -439,14 +485,14 @@ ENTRY(pgm_check_handler)
 	xc	__PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
 	stg	%r10,__PT_ARGS(%r11)
 	tm	__LC_PGM_ILC+3,0x80	# check for per exception
-	jz	0f
+	jz	4f
 	tmhh	%r8,0x0001		# kernel per event ?
 	jz	.Lpgm_kprobe
 	oi	__PT_FLAGS+7(%r11),_PIF_PER_TRAP
 	mvc	__THREAD_per_address(8,%r14),__LC_PER_ADDRESS
 	mvc	__THREAD_per_cause(2,%r14),__LC_PER_CODE
 	mvc	__THREAD_per_paid(1,%r14),__LC_PER_ACCESS_ID
-0:	REENABLE_IRQS
+4:	REENABLE_IRQS
 	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
 	larl	%r1,pgm_check_table
 	llgh	%r10,__PT_INT_CODE+2(%r11)
@@ -489,7 +535,6 @@ ENTRY(io_int_handler)
 	lg	%r12,__LC_THREAD_INFO
 	larl	%r13,cleanup_critical
 	lmg	%r8,%r9,__LC_IO_OLD_PSW
-	HANDLE_SIE_INTERCEPT %r14,2
 	SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT
 	tmhh	%r8,0x0001		# interrupting from user?
 	jz	.Lio_skip
@@ -667,7 +712,6 @@ ENTRY(ext_int_handler)
 	lg	%r12,__LC_THREAD_INFO
 	larl	%r13,cleanup_critical
 	lmg	%r8,%r9,__LC_EXT_OLD_PSW
-	HANDLE_SIE_INTERCEPT %r14,3
 	SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT
 	tmhh	%r8,0x0001		# interrupting from user ?
 	jz	.Lext_skip
@@ -832,7 +876,6 @@ ENTRY(mcck_int_handler)
 	lg	%r12,__LC_THREAD_INFO
 	larl	%r13,cleanup_critical
 	lmg	%r8,%r9,__LC_MCK_OLD_PSW
-	HANDLE_SIE_INTERCEPT %r14,4
 	tm	__LC_MCCK_CODE,0x80	# system damage?
 	jo	.Lmcck_panic		# yes -> rest of mcck code invalid
 	lghi	%r14,__LC_CPU_TIMER_SAVE_AREA
@@ -947,26 +990,13 @@ stack_overflow:
 	jg	kernel_stack_overflow
 #endif
 
-	.align	8
-.Lcleanup_table:
-	.quad	system_call
-	.quad	.Lsysc_do_svc
-	.quad	.Lsysc_tif
-	.quad	.Lsysc_restore
-	.quad	.Lsysc_done
-	.quad	.Lio_tif
-	.quad	.Lio_restore
-	.quad	.Lio_done
-	.quad	psw_idle
-	.quad	.Lpsw_idle_end
-	.quad	save_fpu_regs
-	.quad	.Lsave_fpu_regs_end
-	.quad	load_fpu_regs
-	.quad	.Lload_fpu_regs_end
-	.quad	__ctl_set_vx
-	.quad	.L__ctl_set_vx_end
-
 cleanup_critical:
+#if IS_ENABLED(CONFIG_KVM)
+	clg	%r9,BASED(.Lcleanup_table_sie)	# .Lsie_gmap
+	jl	0f
+	clg	%r9,BASED(.Lcleanup_table_sie+8)# .Lsie_done
+	jl	.Lcleanup_sie
+#endif
 	clg	%r9,BASED(.Lcleanup_table)	# system_call
 	jl	0f
 	clg	%r9,BASED(.Lcleanup_table+8)	# .Lsysc_do_svc
@@ -1001,6 +1031,40 @@ cleanup_critical:
 	jl	.Lcleanup___ctl_set_vx
 0:	br	%r14
 
+	.align	8
+.Lcleanup_table:
+	.quad	system_call
+	.quad	.Lsysc_do_svc
+	.quad	.Lsysc_tif
+	.quad	.Lsysc_restore
+	.quad	.Lsysc_done
+	.quad	.Lio_tif
+	.quad	.Lio_restore
+	.quad	.Lio_done
+	.quad	psw_idle
+	.quad	.Lpsw_idle_end
+	.quad	save_fpu_regs
+	.quad	.Lsave_fpu_regs_end
+	.quad	load_fpu_regs
+	.quad	.Lload_fpu_regs_end
+	.quad	__ctl_set_vx
+	.quad	.L__ctl_set_vx_end
+
+#if IS_ENABLED(CONFIG_KVM)
+.Lcleanup_table_sie:
+	.quad	.Lsie_gmap
+	.quad	.Lsie_done
+
+.Lcleanup_sie:
+	lg	%r9,__SF_EMPTY(%r15)		# get control block pointer
+	tm	__LC_MACHINE_FLAGS+6,0x20	# MACHINE_FLAG_LPP
+	jz	0f
+	.insn	s,0xb2800000,__SF_EMPTY+16(%r15)# set host id
+0:	ni	__SIE_PROG0C+3(%r9),0xfe	# no longer in SIE
+	lctlg	%c1,%c1,__LC_USER_ASCE		# load primary asce
+	larl	%r9,sie_exit			# skip forward to sie_exit
+	br	%r14
+#endif
 
 .Lcleanup_system_call:
 	# check if stpt has been executed
@@ -1274,70 +1338,11 @@ cleanup_critical:
 	.quad	.L__critical_start
 .Lcritical_length:
 	.quad	.L__critical_end - .L__critical_start
-
-
 #if IS_ENABLED(CONFIG_KVM)
-/*
- * sie64a calling convention:
- * %r2 pointer to sie control block
- * %r3 guest register save area
- */
-ENTRY(sie64a)
-	stmg	%r6,%r14,__SF_GPRS(%r15)	# save kernel registers
-	stg	%r2,__SF_EMPTY(%r15)		# save control block pointer
-	stg	%r3,__SF_EMPTY+8(%r15)		# save guest register save area
-	xc	__SF_EMPTY+16(16,%r15),__SF_EMPTY+16(%r15) # host id & reason
-	tm	__LC_CPU_FLAGS+7,_CIF_FPU	# load guest fp/vx registers ?
-	jno	.Lsie_load_guest_gprs
-	lg	%r12,__LC_THREAD_INFO		# load fp/vx regs save area
-	brasl	%r14,load_fpu_regs		# load guest fp/vx regs
-.Lsie_load_guest_gprs:
-	lmg	%r0,%r13,0(%r3)			# load guest gprs 0-13
-	lg	%r14,__LC_GMAP			# get gmap pointer
-	ltgr	%r14,%r14
-	jz	.Lsie_gmap
-	lctlg	%c1,%c1,__GMAP_ASCE(%r14)	# load primary asce
-.Lsie_gmap:
-	lg	%r14,__SF_EMPTY(%r15)		# get control block pointer
-	oi	__SIE_PROG0C+3(%r14),1		# we are going into SIE now
-	tm	__SIE_PROG20+3(%r14),3		# last exit...
-	jnz	.Lsie_skip
-	tm	__LC_CPU_FLAGS+7,_CIF_FPU
-	jo	.Lsie_skip			# exit if fp/vx regs changed
-	LPP	__SF_EMPTY(%r15)		# set guest id
-	sie	0(%r14)
-	LPP	__SF_EMPTY+16(%r15)		# set host id
-.Lsie_skip:
-	ni	__SIE_PROG0C+3(%r14),0xfe	# no longer in SIE
-	lctlg	%c1,%c1,__LC_USER_ASCE		# load primary asce
-.Lsie_done:
-# some program checks are suppressing. C code (e.g. do_protection_exception)
-# will rewind the PSW by the ILC, which is 4 bytes in case of SIE. Other
-# instructions between sie64a and .Lsie_done should not cause program
-# interrupts. So lets use a nop (47 00 00 00) as a landing pad.
-# See also HANDLE_SIE_INTERCEPT
-.Lrewind_pad:
-	nop	0
-	.globl sie_exit
-sie_exit:
-	lg	%r14,__SF_EMPTY+8(%r15)		# load guest register save area
-	stmg	%r0,%r13,0(%r14)		# save guest gprs 0-13
-	lmg	%r6,%r14,__SF_GPRS(%r15)	# restore kernel registers
-	lg	%r2,__SF_EMPTY+24(%r15)		# return exit reason code
-	br	%r14
-.Lsie_fault:
-	lghi	%r14,-EFAULT
-	stg	%r14,__SF_EMPTY+24(%r15)	# set exit reason code
-	j	sie_exit
-
-	.align	8
-.Lsie_critical:
+.Lsie_critical_start:
 	.quad	.Lsie_gmap
 .Lsie_critical_length:
 	.quad	.Lsie_done - .Lsie_gmap
-
-	EX_TABLE(.Lrewind_pad,.Lsie_fault)
-	EX_TABLE(sie_exit,.Lsie_fault)
 #endif
 
 	.section .rodata, "a"
-- 
cgit v1.2.3


From a359bb1190f213d282f4934fd461cf440d87dae0 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Mon, 22 Jun 2015 17:27:48 +0200
Subject: s390/kernel: squeeze a few more cycles out of the system call handler

Reorder the instructions of UPDATE_VTIME to improve superscalar execution,
remove duplicate checks for problem-state from the asynchronous interrupt
handlers, and move the check for problem-state from the synchronous
exit path to the program check path as it is only needed for program
checks inside the kernel.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/entry.S | 75 ++++++++++++++++++++++--------------------------
 1 file changed, 34 insertions(+), 41 deletions(-)

(limited to 'arch/s390')

diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 9406e7a62d2f..a721c39d014d 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -83,7 +83,7 @@ _PIF_WORK	= (_PIF_PER_TRAP)
 #endif
 	.endm
 
-	.macro	SWITCH_ASYNC savearea,stack,shift
+	.macro	SWITCH_ASYNC savearea,stack,shift,timer
 	tmhh	%r8,0x0001		# interrupting from user ?
 	jnz	1f
 	lgr	%r14,%r9
@@ -97,23 +97,25 @@ _PIF_WORK	= (_PIF_PER_TRAP)
 0:	lg	%r14,\stack		# are we already on the target stack?
 	slgr	%r14,%r15
 	srag	%r14,%r14,\shift
-	jnz	1f
+	jnz	2f
 	CHECK_STACK 1<<\shift,\savearea
 	aghi	%r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
-	j	2f
-1:	lg	%r15,\stack		# load target stack
-2:	la	%r11,STACK_FRAME_OVERHEAD(%r15)
+	j	3f
+1:	LAST_BREAK %r14
+	UPDATE_VTIME %r14,%r15,\timer
+2:	lg	%r15,\stack		# load target stack
+3:	la	%r11,STACK_FRAME_OVERHEAD(%r15)
 	.endm
 
-	.macro UPDATE_VTIME scratch,enter_timer
-	lg	\scratch,__LC_EXIT_TIMER
-	slg	\scratch,\enter_timer
-	alg	\scratch,__LC_USER_TIMER
-	stg	\scratch,__LC_USER_TIMER
-	lg	\scratch,__LC_LAST_UPDATE_TIMER
-	slg	\scratch,__LC_EXIT_TIMER
-	alg	\scratch,__LC_SYSTEM_TIMER
-	stg	\scratch,__LC_SYSTEM_TIMER
+	.macro UPDATE_VTIME w1,w2,enter_timer
+	lg	\w1,__LC_EXIT_TIMER
+	lg	\w2,__LC_LAST_UPDATE_TIMER
+	slg	\w1,\enter_timer
+	slg	\w2,__LC_EXIT_TIMER
+	alg	\w1,__LC_USER_TIMER
+	alg	\w2,__LC_SYSTEM_TIMER
+	stg	\w1,__LC_USER_TIMER
+	stg	\w2,__LC_SYSTEM_TIMER
 	mvc	__LC_LAST_UPDATE_TIMER(8),\enter_timer
 	.endm
 
@@ -246,9 +248,9 @@ ENTRY(system_call)
 .Lsysc_per:
 	lg	%r15,__LC_KERNEL_STACK
 	la	%r11,STACK_FRAME_OVERHEAD(%r15)	# pointer to pt_regs
-.Lsysc_vtime:
-	UPDATE_VTIME %r13,__LC_SYNC_ENTER_TIMER
 	LAST_BREAK %r13
+.Lsysc_vtime:
+	UPDATE_VTIME %r10,%r13,__LC_SYNC_ENTER_TIMER
 	stmg	%r0,%r7,__PT_R0(%r11)
 	mvc	__PT_R8(64,%r11),__LC_SAVE_AREA_SYNC
 	mvc	__PT_PSW(16,%r11),__LC_SVC_OLD_PSW
@@ -278,8 +280,6 @@ ENTRY(system_call)
 .Lsysc_return:
 	LOCKDEP_SYS_EXIT
 .Lsysc_tif:
-	tm	__PT_PSW+1(%r11),0x01		# returning to user ?
-	jno	.Lsysc_restore
 	tm	__PT_FLAGS+7(%r11),_PIF_WORK
 	jnz	.Lsysc_work
 	tm	__TI_flags+7(%r12),_TIF_WORK
@@ -467,8 +467,8 @@ ENTRY(pgm_check_handler)
 1:	CHECK_STACK STACK_SIZE,__LC_SAVE_AREA_SYNC
 	aghi	%r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
 	j	3f
-2:	UPDATE_VTIME %r14,__LC_SYNC_ENTER_TIMER
-	LAST_BREAK %r14
+2:	LAST_BREAK %r14
+	UPDATE_VTIME %r14,%r15,__LC_SYNC_ENTER_TIMER
 	lg	%r15,__LC_KERNEL_STACK
 	lg	%r14,__TI_task(%r12)
 	aghi	%r14,__TASK_thread	# pointer to thread_struct
@@ -498,11 +498,15 @@ ENTRY(pgm_check_handler)
 	llgh	%r10,__PT_INT_CODE+2(%r11)
 	nill	%r10,0x007f
 	sll	%r10,2
-	je	.Lsysc_return
+	je	.Lpgm_return
 	lgf	%r1,0(%r10,%r1)		# load address of handler routine
 	lgr	%r2,%r11		# pass pointer to pt_regs
 	basr	%r14,%r1		# branch to interrupt-handler
-	j	.Lsysc_return
+.Lpgm_return:
+	LOCKDEP_SYS_EXIT
+	tm	__PT_PSW+1(%r11),0x01	# returning to user ?
+	jno	.Lsysc_restore
+	j	.Lsysc_tif
 
 #
 # PER event in supervisor state, must be kprobes
@@ -512,7 +516,7 @@ ENTRY(pgm_check_handler)
 	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
 	lgr	%r2,%r11		# pass pointer to pt_regs
 	brasl	%r14,do_per_trap
-	j	.Lsysc_return
+	j	.Lpgm_return
 
 #
 # single stepped system call
@@ -535,12 +539,8 @@ ENTRY(io_int_handler)
 	lg	%r12,__LC_THREAD_INFO
 	larl	%r13,cleanup_critical
 	lmg	%r8,%r9,__LC_IO_OLD_PSW
-	SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT
-	tmhh	%r8,0x0001		# interrupting from user?
-	jz	.Lio_skip
-	UPDATE_VTIME %r14,__LC_ASYNC_ENTER_TIMER
-	LAST_BREAK %r14
-.Lio_skip:
+	SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT,\
+		     __LC_ASYNC_ENTER_TIMER
 	stmg	%r0,%r7,__PT_R0(%r11)
 	mvc	__PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC
 	stmg	%r8,%r9,__PT_PSW(%r11)
@@ -712,12 +712,8 @@ ENTRY(ext_int_handler)
 	lg	%r12,__LC_THREAD_INFO
 	larl	%r13,cleanup_critical
 	lmg	%r8,%r9,__LC_EXT_OLD_PSW
-	SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT
-	tmhh	%r8,0x0001		# interrupting from user ?
-	jz	.Lext_skip
-	UPDATE_VTIME %r14,__LC_ASYNC_ENTER_TIMER
-	LAST_BREAK %r14
-.Lext_skip:
+	SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT,\
+		     __LC_ASYNC_ENTER_TIMER
 	stmg	%r0,%r7,__PT_R0(%r11)
 	mvc	__PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC
 	stmg	%r8,%r9,__PT_PSW(%r11)
@@ -896,11 +892,8 @@ ENTRY(mcck_int_handler)
 	mvc	__LC_MCCK_ENTER_TIMER(8),0(%r14)
 3:	tm	__LC_MCCK_CODE+2,0x09	# mwp + ia of old psw valid?
 	jno	.Lmcck_panic		# no -> skip cleanup critical
-	SWITCH_ASYNC __LC_GPREGS_SAVE_AREA+64,__LC_PANIC_STACK,PAGE_SHIFT
-	tm	%r8,0x0001		# interrupting from user ?
-	jz	.Lmcck_skip
-	UPDATE_VTIME %r14,__LC_MCCK_ENTER_TIMER
-	LAST_BREAK %r14
+	SWITCH_ASYNC __LC_GPREGS_SAVE_AREA+64,__LC_PANIC_STACK,PAGE_SHIFT,\
+		     __LC_MCCK_ENTER_TIMER
 .Lmcck_skip:
 	lghi	%r14,__LC_GPREGS_SAVE_AREA+64
 	stmg	%r0,%r7,__PT_R0(%r11)
@@ -1125,7 +1118,7 @@ cleanup_critical:
 	.quad	system_call
 	.quad	.Lsysc_stmg
 	.quad	.Lsysc_per
-	.quad	.Lsysc_vtime+18
+	.quad	.Lsysc_vtime+36
 	.quad	.Lsysc_vtime+42
 
 .Lcleanup_sysc_tif:
-- 
cgit v1.2.3


From 2acb94f43128b5cd375873f9ba82fac968d3ce5d Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Mon, 22 Jun 2015 17:28:14 +0200
Subject: s390/nmi: use the normal asynchronous stack for machine checks

If a machine checks is received while the CPU is in the kernel, only
the s390_do_machine_check function will be called. The call to
s390_handle_mcck is postponed until the CPU returns to user space.
Because of this it is safe to use the asynchronous stack for machine
checks even if the CPU is already handling an interrupt.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/entry.S | 25 +++++++++----------------
 1 file changed, 9 insertions(+), 16 deletions(-)

(limited to 'arch/s390')

diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index a721c39d014d..21c1219122af 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -83,7 +83,7 @@ _PIF_WORK	= (_PIF_PER_TRAP)
 #endif
 	.endm
 
-	.macro	SWITCH_ASYNC savearea,stack,shift,timer
+	.macro	SWITCH_ASYNC savearea,timer
 	tmhh	%r8,0x0001		# interrupting from user ?
 	jnz	1f
 	lgr	%r14,%r9
@@ -94,16 +94,16 @@ _PIF_WORK	= (_PIF_PER_TRAP)
 	brasl	%r14,cleanup_critical
 	tmhh	%r8,0x0001		# retest problem state after cleanup
 	jnz	1f
-0:	lg	%r14,\stack		# are we already on the target stack?
+0:	lg	%r14,__LC_ASYNC_STACK	# are we already on the async stack?
 	slgr	%r14,%r15
-	srag	%r14,%r14,\shift
+	srag	%r14,%r14,STACK_SHIFT
 	jnz	2f
-	CHECK_STACK 1<<\shift,\savearea
+	CHECK_STACK 1<<STACK_SHIFT,\savearea
 	aghi	%r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
 	j	3f
 1:	LAST_BREAK %r14
 	UPDATE_VTIME %r14,%r15,\timer
-2:	lg	%r15,\stack		# load target stack
+2:	lg	%r15,__LC_ASYNC_STACK	# load async stack
 3:	la	%r11,STACK_FRAME_OVERHEAD(%r15)
 	.endm
 
@@ -539,8 +539,7 @@ ENTRY(io_int_handler)
 	lg	%r12,__LC_THREAD_INFO
 	larl	%r13,cleanup_critical
 	lmg	%r8,%r9,__LC_IO_OLD_PSW
-	SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT,\
-		     __LC_ASYNC_ENTER_TIMER
+	SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_ENTER_TIMER
 	stmg	%r0,%r7,__PT_R0(%r11)
 	mvc	__PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC
 	stmg	%r8,%r9,__PT_PSW(%r11)
@@ -712,8 +711,7 @@ ENTRY(ext_int_handler)
 	lg	%r12,__LC_THREAD_INFO
 	larl	%r13,cleanup_critical
 	lmg	%r8,%r9,__LC_EXT_OLD_PSW
-	SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT,\
-		     __LC_ASYNC_ENTER_TIMER
+	SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_ENTER_TIMER
 	stmg	%r0,%r7,__PT_R0(%r11)
 	mvc	__PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC
 	stmg	%r8,%r9,__PT_PSW(%r11)
@@ -892,8 +890,7 @@ ENTRY(mcck_int_handler)
 	mvc	__LC_MCCK_ENTER_TIMER(8),0(%r14)
 3:	tm	__LC_MCCK_CODE+2,0x09	# mwp + ia of old psw valid?
 	jno	.Lmcck_panic		# no -> skip cleanup critical
-	SWITCH_ASYNC __LC_GPREGS_SAVE_AREA+64,__LC_PANIC_STACK,PAGE_SHIFT,\
-		     __LC_MCCK_ENTER_TIMER
+	SWITCH_ASYNC __LC_GPREGS_SAVE_AREA+64,__LC_MCCK_ENTER_TIMER
 .Lmcck_skip:
 	lghi	%r14,__LC_GPREGS_SAVE_AREA+64
 	stmg	%r0,%r7,__PT_R0(%r11)
@@ -928,12 +925,8 @@ ENTRY(mcck_int_handler)
 	lpswe	__LC_RETURN_MCCK_PSW
 
 .Lmcck_panic:
-	lg	%r14,__LC_PANIC_STACK
-	slgr	%r14,%r15
-	srag	%r14,%r14,PAGE_SHIFT
-	jz	0f
 	lg	%r15,__LC_PANIC_STACK
-0:	aghi	%r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
+	aghi	%r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
 	j	.Lmcck_skip
 
 #
-- 
cgit v1.2.3


From 198a52789435a00087040ad0ec25da84c555621f Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.vnet.ibm.com>
Date: Tue, 23 Jun 2015 14:06:35 +0200
Subject: s390/pci: inline get_zdev

Inline get_zdev to save ~200 bytes of kernel text for CONFIG_PCI=y.
Also rename the function to to_zpci to make clear that we don't do
reference counting here.

Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/pci.h |  6 +++++-
 arch/s390/pci/pci.c         | 21 ++++++++-------------
 arch/s390/pci/pci_dma.c     |  8 ++++----
 arch/s390/pci/pci_sysfs.c   |  6 +++---
 4 files changed, 20 insertions(+), 21 deletions(-)

(limited to 'arch/s390')

diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index a648338c434a..9b6545e8f685 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -170,7 +170,11 @@ static inline void zpci_exit_slot(struct zpci_dev *zdev) {}
 #endif /* CONFIG_HOTPLUG_PCI_S390 */
 
 /* Helpers */
-struct zpci_dev *get_zdev(struct pci_dev *);
+static inline struct zpci_dev *to_zpci(struct pci_dev *pdev)
+{
+	return pdev->sysdata;
+}
+
 struct zpci_dev *get_zdev_by_fid(u32);
 
 /* DMA */
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 598f023cf8a6..6316f4935f78 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -76,11 +76,6 @@ EXPORT_SYMBOL_GPL(zpci_iomap_start);
 
 static struct kmem_cache *zdev_fmb_cache;
 
-struct zpci_dev *get_zdev(struct pci_dev *pdev)
-{
-	return (struct zpci_dev *) pdev->sysdata;
-}
-
 struct zpci_dev *get_zdev_by_fid(u32 fid)
 {
 	struct zpci_dev *tmp, *zdev = NULL;
@@ -269,7 +264,7 @@ void __iomem *pci_iomap_range(struct pci_dev *pdev,
 			      unsigned long offset,
 			      unsigned long max)
 {
-	struct zpci_dev *zdev =	get_zdev(pdev);
+	struct zpci_dev *zdev =	to_zpci(pdev);
 	u64 addr;
 	int idx;
 
@@ -385,7 +380,7 @@ static void zpci_irq_handler(struct airq_struct *airq)
 
 int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
 {
-	struct zpci_dev *zdev = get_zdev(pdev);
+	struct zpci_dev *zdev = to_zpci(pdev);
 	unsigned int hwirq, msi_vecs;
 	unsigned long aisb;
 	struct msi_desc *msi;
@@ -460,7 +455,7 @@ out:
 
 void arch_teardown_msi_irqs(struct pci_dev *pdev)
 {
-	struct zpci_dev *zdev = get_zdev(pdev);
+	struct zpci_dev *zdev = to_zpci(pdev);
 	struct msi_desc *msi;
 	int rc;
 
@@ -648,7 +643,7 @@ static void zpci_cleanup_bus_resources(struct zpci_dev *zdev)
 
 int pcibios_add_device(struct pci_dev *pdev)
 {
-	struct zpci_dev *zdev = get_zdev(pdev);
+	struct zpci_dev *zdev = to_zpci(pdev);
 	struct resource *res;
 	int i;
 
@@ -673,7 +668,7 @@ void pcibios_release_device(struct pci_dev *pdev)
 
 int pcibios_enable_device(struct pci_dev *pdev, int mask)
 {
-	struct zpci_dev *zdev = get_zdev(pdev);
+	struct zpci_dev *zdev = to_zpci(pdev);
 
 	zdev->pdev = pdev;
 	zpci_debug_init_device(zdev);
@@ -684,7 +679,7 @@ int pcibios_enable_device(struct pci_dev *pdev, int mask)
 
 void pcibios_disable_device(struct pci_dev *pdev)
 {
-	struct zpci_dev *zdev = get_zdev(pdev);
+	struct zpci_dev *zdev = to_zpci(pdev);
 
 	zpci_fmb_disable_device(zdev);
 	zpci_debug_exit_device(zdev);
@@ -695,7 +690,7 @@ void pcibios_disable_device(struct pci_dev *pdev)
 static int zpci_restore(struct device *dev)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
-	struct zpci_dev *zdev = get_zdev(pdev);
+	struct zpci_dev *zdev = to_zpci(pdev);
 	int ret = 0;
 
 	if (zdev->state != ZPCI_FN_STATE_ONLINE)
@@ -717,7 +712,7 @@ out:
 static int zpci_freeze(struct device *dev)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
-	struct zpci_dev *zdev = get_zdev(pdev);
+	struct zpci_dev *zdev = to_zpci(pdev);
 
 	if (zdev->state != ZPCI_FN_STATE_ONLINE)
 		return 0;
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
index 6fd8d5836138..42b76580c8b8 100644
--- a/arch/s390/pci/pci_dma.c
+++ b/arch/s390/pci/pci_dma.c
@@ -277,7 +277,7 @@ static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page,
 				     enum dma_data_direction direction,
 				     struct dma_attrs *attrs)
 {
-	struct zpci_dev *zdev = get_zdev(to_pci_dev(dev));
+	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
 	unsigned long nr_pages, iommu_page_index;
 	unsigned long pa = page_to_phys(page) + offset;
 	int flags = ZPCI_PTE_VALID;
@@ -316,7 +316,7 @@ static void s390_dma_unmap_pages(struct device *dev, dma_addr_t dma_addr,
 				 size_t size, enum dma_data_direction direction,
 				 struct dma_attrs *attrs)
 {
-	struct zpci_dev *zdev = get_zdev(to_pci_dev(dev));
+	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
 	unsigned long iommu_page_index;
 	int npages;
 
@@ -337,7 +337,7 @@ static void *s390_dma_alloc(struct device *dev, size_t size,
 			    dma_addr_t *dma_handle, gfp_t flag,
 			    struct dma_attrs *attrs)
 {
-	struct zpci_dev *zdev = get_zdev(to_pci_dev(dev));
+	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
 	struct page *page;
 	unsigned long pa;
 	dma_addr_t map;
@@ -367,7 +367,7 @@ static void s390_dma_free(struct device *dev, size_t size,
 			  void *pa, dma_addr_t dma_handle,
 			  struct dma_attrs *attrs)
 {
-	struct zpci_dev *zdev = get_zdev(to_pci_dev(dev));
+	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
 
 	size = PAGE_ALIGN(size);
 	atomic64_sub(size / PAGE_SIZE, &zdev->allocated_pages);
diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c
index fa3ce891e597..dad920fd3606 100644
--- a/arch/s390/pci/pci_sysfs.c
+++ b/arch/s390/pci/pci_sysfs.c
@@ -16,7 +16,7 @@
 static ssize_t name##_show(struct device *dev,				\
 			   struct device_attribute *attr, char *buf)	\
 {									\
-	struct zpci_dev *zdev = get_zdev(to_pci_dev(dev));		\
+	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));		\
 									\
 	return sprintf(buf, fmt, zdev->member);				\
 }									\
@@ -38,7 +38,7 @@ static ssize_t recover_store(struct device *dev, struct device_attribute *attr,
 			     const char *buf, size_t count)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
-	struct zpci_dev *zdev = get_zdev(pdev);
+	struct zpci_dev *zdev = to_zpci(pdev);
 	int ret;
 
 	if (!device_remove_file_self(dev, attr))
@@ -64,7 +64,7 @@ static ssize_t util_string_read(struct file *filp, struct kobject *kobj,
 {
 	struct device *dev = kobj_to_dev(kobj);
 	struct pci_dev *pdev = to_pci_dev(dev);
-	struct zpci_dev *zdev = get_zdev(pdev);
+	struct zpci_dev *zdev = to_zpci(pdev);
 
 	return memory_read_from_buffer(buf, count, &off, zdev->util_str,
 				       sizeof(zdev->util_str));
-- 
cgit v1.2.3


From 22362a0e23182d230527a5add690b4027860d7d3 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Wed, 8 Jul 2015 10:20:04 +0200
Subject: s390/sclp: convert early sclp console code to C

The 31-bit assembler code for the early sclp console is error
prone as git commit fde24b54d976cc123506695c17db01438a11b673
"s390/sclp: clear upper register halves in _sclp_print_early"
has shown.

Convert the assembler code to C.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/Makefile                |   2 +
 arch/s390/include/asm/processor.h |  11 ++
 arch/s390/include/asm/sclp.h      |   2 +-
 arch/s390/kernel/Makefile         |  11 ++
 arch/s390/kernel/head.S           |   1 +
 arch/s390/kernel/sclp.S           | 355 --------------------------------------
 arch/s390/kernel/sclp.c           | 160 +++++++++++++++++
 7 files changed, 186 insertions(+), 356 deletions(-)
 delete mode 100644 arch/s390/kernel/sclp.S
 create mode 100644 arch/s390/kernel/sclp.c

(limited to 'arch/s390')

diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index 667b1bca5681..e8d4423e4f85 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -33,6 +33,8 @@ mflags-$(CONFIG_MARCH_Z196)   := -march=z196
 mflags-$(CONFIG_MARCH_ZEC12)  := -march=zEC12
 mflags-$(CONFIG_MARCH_Z13)   := -march=z13
 
+export CC_FLAGS_MARCH := $(mflags-y)
+
 aflags-y += $(mflags-y)
 cflags-y += $(mflags-y)
 
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index c417015c5304..085fb0d3c54e 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -232,6 +232,17 @@ static inline void __load_psw_mask (unsigned long mask)
 		: "=&d" (addr), "=Q" (psw) : "Q" (psw) : "memory", "cc");
 }
 
+/*
+ * Extract current PSW mask
+ */
+static inline unsigned long __extract_psw(void)
+{
+	unsigned int reg1, reg2;
+
+	asm volatile("epsw %0,%1" : "=d" (reg1), "=a" (reg2));
+	return (((unsigned long) reg1) << 32) | ((unsigned long) reg2);
+}
+
 /*
  * Rewind PSW instruction address by specified number of bytes.
  */
diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h
index f6ff06077631..821dde5f425d 100644
--- a/arch/s390/include/asm/sclp.h
+++ b/arch/s390/include/asm/sclp.h
@@ -79,6 +79,6 @@ int sclp_pci_configure(u32 fid);
 int sclp_pci_deconfigure(u32 fid);
 int memcpy_hsa(void *dest, unsigned long src, size_t count, int mode);
 void sclp_early_detect(void);
-long _sclp_print_early(const char *);
+int _sclp_print_early(const char *);
 
 #endif /* _ASM_S390_SCLP_H */
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index ffb87617a36c..b756c6348ac6 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -28,6 +28,17 @@ CFLAGS_ptrace.o		+= -DUTS_MACHINE='"$(UTS_MACHINE)"'
 
 CFLAGS_sysinfo.o += -w
 
+#
+# Use -march=z900 for sclp.c to be able to print an error message if
+# the kernel is started on a machine which is too old
+#
+CFLAGS_REMOVE_sclp.o = $(CC_FLAGS_FTRACE)
+ifneq ($(CC_FLAGS_MARCH),-march=z900)
+CFLAGS_REMOVE_sclp.o += $(CC_FLAGS_MARCH)
+CFLAGS_sclp.o	+= -march=z900
+endif
+GCOV_PROFILE_sclp.o := n
+
 obj-y	:= traps.o time.o process.o base.o early.o setup.o idle.o vtime.o
 obj-y	+= processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o
 obj-y	+= debug.o irq.o ipl.o dis.o diag.o sclp.o vdso.o
diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S
index 59b7c6470567..63c77fdb619e 100644
--- a/arch/s390/kernel/head.S
+++ b/arch/s390/kernel/head.S
@@ -370,6 +370,7 @@ ENTRY(startup_kdump)
 	xc	0x200(256),0x200	# partially clear lowcore
 	xc	0x300(256),0x300
 	xc	0xe00(256),0xe00
+	lctlg	%c0,%c15,0x200(%r0)	# initialize control registers
 	stck	__LC_LAST_UPDATE_CLOCK
 	spt	6f-.LPG0(%r13)
 	mvc	__LC_LAST_UPDATE_TIMER(8),6f-.LPG0(%r13)
diff --git a/arch/s390/kernel/sclp.S b/arch/s390/kernel/sclp.S
deleted file mode 100644
index ada0c07fe1a8..000000000000
--- a/arch/s390/kernel/sclp.S
+++ /dev/null
@@ -1,355 +0,0 @@
-/*
- * Mini SCLP driver.
- *
- * Copyright IBM Corp. 2004, 2009
- *
- *   Author(s):	Peter Oberparleiter <Peter.Oberparleiter@de.ibm.com>,
- *		Heiko Carstens <heiko.carstens@de.ibm.com>,
- *
- */
-
-#include <linux/linkage.h>
-#include <asm/irq.h>
-
-LC_EXT_NEW_PSW		= 0x58			# addr of ext int handler
-LC_EXT_NEW_PSW_64	= 0x1b0			# addr of ext int handler 64 bit
-LC_EXT_INT_PARAM	= 0x80			# addr of ext int parameter
-LC_EXT_INT_CODE		= 0x86			# addr of ext int code
-LC_AR_MODE_ID		= 0xa3
-
-#
-# Subroutine which waits synchronously until either an external interruption
-# or a timeout occurs.
-#
-# Parameters:
-#   R2	= 0 for no timeout, non-zero for timeout in (approximated) seconds
-#
-# Returns:
-#   R2	= 0 on interrupt, 2 on timeout
-#   R3	= external interruption parameter if R2=0
-#
-
-_sclp_wait_int:
-	stm	%r6,%r15,24(%r15)		# save registers
-	basr	%r13,0				# get base register
-.LbaseS1:
-	ahi	%r15,-96			# create stack frame
-	la	%r8,LC_EXT_NEW_PSW		# register int handler
-	la	%r9,.LextpswS1-.LbaseS1(%r13)
-	tm	LC_AR_MODE_ID,1
-	jno	.Lesa1
-	la	%r8,LC_EXT_NEW_PSW_64		# register int handler 64 bit
-	la	%r9,.LextpswS1_64-.LbaseS1(%r13)
-.Lesa1:
-	mvc	.LoldpswS1-.LbaseS1(16,%r13),0(%r8)
-	mvc	0(16,%r8),0(%r9)
-	epsw	%r6,%r7				# set current addressing mode
-	nill	%r6,0x1				# in new psw (31 or 64 bit mode)
-	nilh	%r7,0x8000
-	stm	%r6,%r7,0(%r8)
-	lhi	%r6,0x0200			# cr mask for ext int (cr0.54)
-	ltr	%r2,%r2
-	jz	.LsetctS1
-	ahi	%r6,0x0800			# cr mask for clock int (cr0.52)
-	stck	.LtimeS1-.LbaseS1(%r13)		# initiate timeout
-	al	%r2,.LtimeS1-.LbaseS1(%r13)
-	st	%r2,.LtimeS1-.LbaseS1(%r13)
-	sckc	.LtimeS1-.LbaseS1(%r13)
-
-.LsetctS1:
-	stctl	%c0,%c0,.LctlS1-.LbaseS1(%r13)	# enable required interrupts
-	l	%r0,.LctlS1-.LbaseS1(%r13)
-	lhi	%r1,~(0x200 | 0x800)		# clear old values
-	nr	%r1,%r0
-	or	%r1,%r6				# set new value
-	st	%r1,.LctlS1-.LbaseS1(%r13)
-	lctl	%c0,%c0,.LctlS1-.LbaseS1(%r13)
-	st	%r0,.LctlS1-.LbaseS1(%r13)
-	lhi	%r2,2				# return code for timeout
-.LloopS1:
-	lpsw	.LwaitpswS1-.LbaseS1(%r13)	# wait until interrupt
-.LwaitS1:
-	lh	%r7,LC_EXT_INT_CODE
-	chi	%r7,EXT_IRQ_CLK_COMP		# timeout?
-	je	.LtimeoutS1
-	chi	%r7,EXT_IRQ_SERVICE_SIG		# service int?
-	jne	.LloopS1
-	sr	%r2,%r2
-	l	%r3,LC_EXT_INT_PARAM
-.LtimeoutS1:
-	lctl	%c0,%c0,.LctlS1-.LbaseS1(%r13)	# restore interrupt setting
-	# restore old handler
-	mvc	0(16,%r8),.LoldpswS1-.LbaseS1(%r13)
-	lm	%r6,%r15,120(%r15)		# restore registers
-	br	%r14				# return to caller
-
-	.align	8
-.LoldpswS1:
-	.long	0, 0, 0, 0			# old ext int PSW
-.LextpswS1:
-	.long	0x00080000, 0x80000000+.LwaitS1	# PSW to handle ext int
-.LextpswS1_64:
-	.quad	0, .LwaitS1			# PSW to handle ext int, 64 bit
-.LwaitpswS1:
-	.long	0x010a0000, 0x00000000+.LloopS1	# PSW to wait for ext int
-.LtimeS1:
-	.quad	0				# current time
-.LctlS1:
-	.long	0				# CT0 contents
-
-#
-# Subroutine to synchronously issue a service call.
-#
-# Parameters:
-#   R2	= command word
-#   R3	= sccb address
-#
-# Returns:
-#   R2	= 0 on success, 1 on failure
-#   R3	= sccb response code if R2 = 0
-#
-
-_sclp_servc:
-	stm	%r6,%r15,24(%r15)		# save registers
-	ahi	%r15,-96			# create stack frame
-	lr	%r6,%r2				# save command word
-	lr	%r7,%r3				# save sccb address
-.LretryS2:
-	lhi	%r2,1				# error return code
-	.insn	rre,0xb2200000,%r6,%r7		# servc
-	brc	1,.LendS2			# exit if not operational
-	brc	8,.LnotbusyS2			# go on if not busy
-	sr	%r2,%r2				# wait until no longer busy
-	bras	%r14,_sclp_wait_int
-	j	.LretryS2			# retry
-.LnotbusyS2:
-	sr	%r2,%r2				# wait until result
-	bras	%r14,_sclp_wait_int
-	sr	%r2,%r2
-	lh	%r3,6(%r7)
-.LendS2:
-	lm	%r6,%r15,120(%r15)		# restore registers
-	br	%r14
-
-#
-# Subroutine to set up the SCLP interface.
-#
-# Parameters:
-#   R2	= 0 to activate, non-zero to deactivate
-#
-# Returns:
-#   R2	= 0 on success, non-zero on failure
-#
-
-_sclp_setup:
-	stm	%r6,%r15,24(%r15)		# save registers
-	ahi	%r15,-96			# create stack frame
-	basr	%r13,0				# get base register
-.LbaseS3:
-	l	%r6,.LsccbS0-.LbaseS3(%r13)	# prepare init mask sccb
-	mvc	0(.LinitendS3-.LinitsccbS3,%r6),.LinitsccbS3-.LbaseS3(%r13)
-	ltr	%r2,%r2				# initialization?
-	jz	.LdoinitS3			# go ahead
-	# clear masks
-	xc	.LinitmaskS3-.LinitsccbS3(8,%r6),.LinitmaskS3-.LinitsccbS3(%r6)
-.LdoinitS3:
-	l	%r2,.LwritemaskS3-.LbaseS3(%r13)# get command word
-	lr	%r3,%r6				# get sccb address
-	bras	%r14,_sclp_servc		# issue service call
-	ltr	%r2,%r2				# servc successful?
-	jnz	.LerrorS3
-	chi	%r3,0x20			# write mask successful?
-	jne	.LerrorS3
-	# check masks
-	la	%r2,.LinitmaskS3-.LinitsccbS3(%r6)
-	l	%r1,0(%r2)			# receive mask ok?
-	n	%r1,12(%r2)
-	cl	%r1,0(%r2)
-	jne	.LerrorS3
-	l	%r1,4(%r2)			# send mask ok?
-	n	%r1,8(%r2)
-	cl	%r1,4(%r2)
-	sr	%r2,%r2
-	je	.LendS3
-.LerrorS3:
-	lhi	%r2,1				# error return code
-.LendS3:
-	lm	%r6,%r15,120(%r15)		# restore registers
-	br	%r14
-.LwritemaskS3:
-	.long	0x00780005			# SCLP command for write mask
-.LinitsccbS3:
-	.word	.LinitendS3-.LinitsccbS3
-	.byte	0,0,0,0
-	.word	0
-	.word	0
-	.word	4
-.LinitmaskS3:
-	.long	0x80000000
-	.long	0x40000000
-	.long	0
-	.long	0
-.LinitendS3:
-
-#
-# Subroutine which prints a given text to the SCLP console.
-#
-# Parameters:
-#   R2	= address of nil-terminated ASCII text
-#
-# Returns:
-#   R2	= 0 on success, 1 on failure
-#
-
-_sclp_print:
-	stm	%r6,%r15,24(%r15)		# save registers
-	ahi	%r15,-96			# create stack frame
-	basr	%r13,0				# get base register
-.LbaseS4:
-	l	%r8,.LsccbS0-.LbaseS4(%r13)	# prepare write data sccb
-	mvc	0(.LmtoS4-.LwritesccbS4,%r8),.LwritesccbS4-.LbaseS4(%r13)
-	la	%r7,.LmtoS4-.LwritesccbS4(%r8)	# current mto addr
-	sr	%r0,%r0
-	l	%r10,.Lascebc-.LbaseS4(%r13)	# address of translation table
-.LinitmtoS4:
-	# initialize mto
-	mvc	0(.LmtoendS4-.LmtoS4,%r7),.LmtoS4-.LbaseS4(%r13)
-	lhi	%r6,.LmtoendS4-.LmtoS4		# current mto length
-.LloopS4:
-	ic	%r0,0(%r2)			# get character
-	ahi	%r2,1
-	ltr	%r0,%r0				# end of string?
-	jz	.LfinalizemtoS4
-	chi	%r0,0x0a			# end of line (NL)?
-	jz	.LfinalizemtoS4
-	stc	%r0,0(%r6,%r7)			# copy to mto
-	la	%r11,0(%r6,%r7)
-	tr	0(1,%r11),0(%r10)		# translate to EBCDIC
-	ahi	%r6,1
-	j	.LloopS4
-.LfinalizemtoS4:
-	sth	%r6,0(%r7)			# update mto length
-	lh	%r9,.LmdbS4-.LwritesccbS4(%r8)	# update mdb length
-	ar	%r9,%r6
-	sth	%r9,.LmdbS4-.LwritesccbS4(%r8)
-	lh	%r9,.LevbufS4-.LwritesccbS4(%r8)# update evbuf length
-	ar	%r9,%r6
-	sth	%r9,.LevbufS4-.LwritesccbS4(%r8)
-	lh	%r9,0(%r8)			# update sccb length
-	ar	%r9,%r6
-	sth	%r9,0(%r8)
-	ar	%r7,%r6				# update current mto address
-	ltr	%r0,%r0				# more characters?
-	jnz	.LinitmtoS4
-	l	%r2,.LwritedataS4-.LbaseS4(%r13)# write data
-	lr	%r3,%r8
-	bras	%r14,_sclp_servc
-	ltr	%r2,%r2				# servc successful?
-	jnz	.LendS4
-	chi	%r3,0x20			# write data successful?
-	je	.LendS4
-	lhi	%r2,1				# error return code
-.LendS4:
-	lm	%r6,%r15,120(%r15)		# restore registers
-	br	%r14
-
-#
-# Function which prints a given text to the SCLP console.
-#
-# Parameters:
-#   R2	= address of nil-terminated ASCII text
-#
-# Returns:
-#   R2	= 0 on success, 1 on failure
-#
-
-ENTRY(_sclp_print_early)
-	stm	%r6,%r15,24(%r15)		# save registers
-	ahi	%r15,-96			# create stack frame
-	tm	LC_AR_MODE_ID,1
-	jno	.Lesa2
-	ahi	%r15,-80
-	stmh	%r6,%r15,96(%r15)		# store upper register halves
-	basr	%r13,0
-	lmh	%r0,%r15,.Lzeroes-.(%r13)	# clear upper register halves
-.Lesa2:
-	lr	%r10,%r2			# save string pointer
-	lhi	%r2,0
-	bras	%r14,_sclp_setup		# enable console
-	ltr	%r2,%r2
-	jnz	.LendS5
-	lr	%r2,%r10
-	bras	%r14,_sclp_print		# print string
-	ltr	%r2,%r2
-	jnz	.LendS5
-	lhi	%r2,1
-	bras	%r14,_sclp_setup		# disable console
-.LendS5:
-	tm	LC_AR_MODE_ID,1
-	jno	.Lesa3
-	lgfr	%r2,%r2				# sign extend return value
-	lmh	%r6,%r15,96(%r15)		# restore upper register halves
-	ahi	%r15,80
-.Lesa3:
-	lm	%r6,%r15,120(%r15)		# restore registers
-	br	%r14
-.Lzeroes:
-	.fill	64,4,0
-
-.LwritedataS4:
-	.long	0x00760005			# SCLP command for write data
-.LwritesccbS4:
-	# sccb
-	.word	.LmtoS4-.LwritesccbS4
-	.byte	0
-	.byte	0,0,0
-	.word	0
-
-	# evbuf
-.LevbufS4:
-	.word	.LmtoS4-.LevbufS4
-	.byte	0x02
-	.byte	0
-	.word	0
-
-.LmdbS4:
-	# mdb
-	.word	.LmtoS4-.LmdbS4
-	.word	1
-	.long	0xd4c4c240
-	.long	1
-
-	# go
-.LgoS4:
-	.word	.LmtoS4-.LgoS4
-	.word	1
-	.long	0
-	.byte	0,0,0,0,0,0,0,0
-	.byte	0,0,0
-	.byte	0
-	.byte	0,0,0,0,0,0,0
-	.byte	0
-	.word	0
-	.byte	0,0,0,0,0,0,0,0,0,0
-	.byte	0,0,0,0,0,0,0,0
-	.byte	0,0,0,0,0,0,0,0
-
-.LmtoS4:
-	.word	.LmtoendS4-.LmtoS4
-	.word	4
-	.word	0x1000
-	.byte	0
-	.byte	0,0,0
-.LmtoendS4:
-
-	# Global constants
-.LsccbS0:
-	.long	_sclp_work_area
-.Lascebc:
-	.long	_ascebc
-
-.section .data,"aw",@progbits
-	.balign 4096
-_sclp_work_area:
-	.fill	4096
-.previous
diff --git a/arch/s390/kernel/sclp.c b/arch/s390/kernel/sclp.c
new file mode 100644
index 000000000000..fa0bdff1d413
--- /dev/null
+++ b/arch/s390/kernel/sclp.c
@@ -0,0 +1,160 @@
+/*
+ *    Copyright IBM Corp. 2015
+ *    Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+#include <linux/kernel.h>
+#include <asm/ebcdic.h>
+#include <asm/irq.h>
+#include <asm/lowcore.h>
+#include <asm/processor.h>
+#include <asm/sclp.h>
+
+static char _sclp_work_area[4096] __aligned(PAGE_SIZE);
+
+static void _sclp_wait_int(void)
+{
+	unsigned long cr0, cr0_new, psw_mask, addr;
+	psw_t psw_ext_save, psw_wait;
+
+	__ctl_store(cr0, 0, 0);
+	cr0_new = cr0 | 0x200;
+	__ctl_load(cr0_new, 0, 0);
+
+	psw_ext_save = S390_lowcore.external_new_psw;
+	psw_mask = __extract_psw() & (PSW_MASK_EA | PSW_MASK_BA);
+	S390_lowcore.external_new_psw.mask = psw_mask;
+	psw_wait.mask = psw_mask | PSW_MASK_EXT | PSW_MASK_WAIT;
+	S390_lowcore.ext_int_code = 0;
+
+	do {
+		asm volatile(
+			"	larl	%[addr],0f\n"
+			"	stg	%[addr],%[psw_wait_addr]\n"
+			"	stg	%[addr],%[psw_ext_addr]\n"
+			"	lpswe	%[psw_wait]\n"
+			"0:\n"
+			: [addr] "=&d" (addr),
+			  [psw_wait_addr] "=Q" (psw_wait.addr),
+			  [psw_ext_addr] "=Q" (S390_lowcore.external_new_psw.addr)
+			: [psw_wait] "Q" (psw_wait)
+			: "cc", "memory");
+	} while (S390_lowcore.ext_int_code != EXT_IRQ_SERVICE_SIG);
+
+	__ctl_load(cr0, 0, 0);
+	S390_lowcore.external_new_psw = psw_ext_save;
+}
+
+static int _sclp_servc(unsigned int cmd, char *sccb)
+{
+	unsigned int cc;
+
+	do {
+		asm volatile(
+			"	.insn	rre,0xb2200000,%1,%2\n"
+			"	ipm	%0\n"
+			: "=d" (cc) : "d" (cmd), "a" (sccb)
+			: "cc", "memory");
+		cc >>= 28;
+		if (cc == 3)
+			return -EINVAL;
+		_sclp_wait_int();
+	} while (cc != 0);
+	return (*(unsigned short *)(sccb + 6) == 0x20) ? 0 : -EIO;
+}
+
+static int _sclp_setup(int disable)
+{
+	static unsigned char init_sccb[] = {
+		0x00, 0x1c,
+		0x00, 0x00, 0x00, 0x00,	0x00, 0x00, 0x00, 0x00,
+		0x00, 0x04,
+		0x80, 0x00, 0x00, 0x00,	0x40, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00,	0x00, 0x00, 0x00, 0x00
+	};
+	unsigned int *masks;
+	int rc;
+
+	memcpy(_sclp_work_area, init_sccb, 28);
+	masks = (unsigned int *)(_sclp_work_area + 12);
+	if (disable)
+		memset(masks, 0, 16);
+	/* SCLP write mask */
+	rc = _sclp_servc(0x00780005, _sclp_work_area);
+	if (rc)
+		return rc;
+	if ((masks[0] & masks[3]) != masks[0] ||
+	    (masks[1] & masks[2]) != masks[1])
+		return -EIO;
+	return 0;
+}
+
+static int _sclp_print(const char *str)
+{
+	static unsigned char write_head[] = {
+		/* sccb header */
+		0x00, 0x52,					/* 0 */
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00,		/* 2 */
+		/* evbuf */
+		0x00, 0x4a,					/* 8 */
+		0x02, 0x00, 0x00, 0x00,				/* 10 */
+		/* mdb */
+		0x00, 0x44,					/* 14 */
+		0x00, 0x01,					/* 16 */
+		0xd4, 0xc4, 0xc2, 0x40,				/* 18 */
+		0x00, 0x00, 0x00, 0x01,				/* 22 */
+		/* go */
+		0x00, 0x38,					/* 26 */
+		0x00, 0x01,					/* 28 */
+		0x00, 0x00, 0x00, 0x00,				/* 30 */
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,	/* 34 */
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,	/* 42 */
+		0x00, 0x00, 0x00, 0x00,				/* 50 */
+		0x00, 0x00,					/* 54 */
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,	/* 56 */
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,	/* 64 */
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,	/* 72 */
+		0x00, 0x00,					/* 80 */
+	};
+	static unsigned char write_mto[] = {
+		/* mto	*/
+		0x00, 0x0a,					/* 0 */
+		0x00, 0x04,					/* 2 */
+		0x10, 0x00,					/* 4 */
+		0x00, 0x00, 0x00, 0x00				/* 6 */
+	};
+	unsigned char *ptr, ch;
+	unsigned int count;
+
+	memcpy(_sclp_work_area, write_head, sizeof(write_head));
+	ptr = _sclp_work_area + sizeof(write_head);
+	do {
+		memcpy(ptr, write_mto, sizeof(write_mto));
+		for (count = sizeof(write_mto); (ch = *str++) != 0; count++) {
+			if (ch == 0x0a)
+				break;
+			ptr[count] = _ascebc[ch];
+		}
+		/* Update length fields in mto, mdb, evbuf and sccb */
+		*(unsigned short *) ptr = count;
+		*(unsigned short *)(_sclp_work_area + 14) += count;
+		*(unsigned short *)(_sclp_work_area + 8) += count;
+		*(unsigned short *)(_sclp_work_area + 0) += count;
+		ptr += count;
+	} while (ch != 0);
+
+	/* SCLP write data */
+	return _sclp_servc(0x00760005, _sclp_work_area);
+}
+
+int _sclp_print_early(const char *str)
+{
+	int rc;
+
+	rc = _sclp_setup(0);
+	if (rc)
+		return rc;
+	rc = _sclp_print(str);
+	if (rc)
+		return rc;
+	return _sclp_setup(1);
+}
-- 
cgit v1.2.3


From e7f596de1973e9dd9b5dcd7cb00aeb040ab8c13c Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Fri, 17 Jul 2015 08:05:23 +0200
Subject: s390/smp: add missing __init annotation to __smp_store_cpu_state()

 Section mismatch in reference from the function __smp_store_cpu_state()
  to the function .init.text:memblock_alloc()
The function __smp_store_cpu_state() references
the function __init memblock_alloc().

Reviewed-by: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/smp.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/s390')

diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 6f54c175f5c9..c6355e6f3fcc 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -532,8 +532,8 @@ EXPORT_SYMBOL(smp_ctl_clear_bit);
 
 #ifdef CONFIG_CRASH_DUMP
 
-static void __smp_store_cpu_state(struct save_area_ext *sa_ext, u16 address,
-				  int is_boot_cpu)
+static void __init __smp_store_cpu_state(struct save_area_ext *sa_ext,
+					 u16 address, int is_boot_cpu)
 {
 	void *lc = (void *)(unsigned long) store_prefix();
 	unsigned long vx_sa;
-- 
cgit v1.2.3


From 2b1df72416a858f928c4edcbbb7f13b8dfc10a35 Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.vnet.ibm.com>
Date: Tue, 28 Jul 2015 19:10:45 +0200
Subject: s390/pci: free resources after failed bus allocation

Free bus resources when the allocation/registration of the bus failed.

Reviewed-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/pci/pci.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

(limited to 'arch/s390')

diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 6316f4935f78..17c04c7269e7 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -632,7 +632,7 @@ static void zpci_cleanup_bus_resources(struct zpci_dev *zdev)
 	int i;
 
 	for (i = 0; i < PCI_BAR_COUNT; i++) {
-		if (!zdev->bars[i].size)
+		if (!zdev->bars[i].size || !zdev->bars[i].res)
 			continue;
 
 		zpci_free_iomap(zdev, zdev->bars[i].map_idx);
@@ -772,17 +772,22 @@ static int zpci_scan_bus(struct zpci_dev *zdev)
 
 	ret = zpci_setup_bus_resources(zdev, &resources);
 	if (ret)
-		return ret;
+		goto error;
 
 	zdev->bus = pci_scan_root_bus(NULL, ZPCI_BUS_NR, &pci_root_ops,
 				      zdev, &resources);
 	if (!zdev->bus) {
-		zpci_cleanup_bus_resources(zdev);
-		return -EIO;
+		ret = -EIO;
+		goto error;
 	}
 	zdev->bus->max_bus_speed = zdev->max_bus_speed;
 	pci_bus_add_devices(zdev->bus);
 	return 0;
+
+error:
+	zpci_cleanup_bus_resources(zdev);
+	pci_free_resource_list(&resources);
+	return ret;
 }
 
 int zpci_enable_device(struct zpci_dev *zdev)
-- 
cgit v1.2.3


From 515f022e8b59ce928fe5d82affb3e93ab53d12ed Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.vnet.ibm.com>
Date: Tue, 28 Jul 2015 19:11:40 +0200
Subject: s390/pci: handle events for unused functions

Receiving error events for a pci function that's currently not in use
will crash the kernel. For example the procedure for FW upgrades might
include:
* remove the function from Linux
* apply FW upgrade
* rescan for new functions

Receiving an event during the FW upgrade will result in a use after free
when printing the functions name. Just print "n/a" in such cases.

Reviewed-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/pci/pci_event.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'arch/s390')

diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
index ed2394dd14e9..eef76f0e6873 100644
--- a/arch/s390/pci/pci_event.c
+++ b/arch/s390/pci/pci_event.c
@@ -46,15 +46,13 @@ struct zpci_ccdf_avail {
 static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
 {
 	struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
+	struct pci_dev *pdev = zdev ? zdev->pdev : NULL;
 
 	zpci_err("error CCDF:\n");
 	zpci_err_hex(ccdf, sizeof(*ccdf));
 
-	if (!zdev)
-		return;
-
 	pr_err("%s: Event 0x%x reports an error for PCI function 0x%x\n",
-	       pci_name(zdev->pdev), ccdf->pec, ccdf->fid);
+	       pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid);
 }
 
 void zpci_event_error(void *data)
-- 
cgit v1.2.3


From 2a01bd1bd3d28d1eef26d5509c95d0923f7dc75c Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.vnet.ibm.com>
Date: Tue, 28 Jul 2015 19:14:51 +0200
Subject: s390/pci: use pci_rescan_remove_lock

Make sure that we use the pci_rescan_remove_lock when we remove
or add functions from/to the bus.

Reviewed-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/pci/pci_event.c |  6 ++++--
 arch/s390/pci/pci_sysfs.c | 11 +++++++++--
 2 files changed, 13 insertions(+), 4 deletions(-)

(limited to 'arch/s390')

diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
index eef76f0e6873..369a3e05d468 100644
--- a/arch/s390/pci/pci_event.c
+++ b/arch/s390/pci/pci_event.c
@@ -87,7 +87,9 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
 		ret = zpci_enable_device(zdev);
 		if (ret)
 			break;
+		pci_lock_rescan_remove();
 		pci_rescan_bus(zdev->bus);
+		pci_unlock_rescan_remove();
 		break;
 	case 0x0302: /* Reserved -> Standby */
 		if (!zdev)
@@ -95,7 +97,7 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
 		break;
 	case 0x0303: /* Deconfiguration requested */
 		if (pdev)
-			pci_stop_and_remove_bus_device(pdev);
+			pci_stop_and_remove_bus_device_locked(pdev);
 
 		ret = zpci_disable_device(zdev);
 		if (ret)
@@ -112,7 +114,7 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
 			/* Give the driver a hint that the function is
 			 * already unusable. */
 			pdev->error_state = pci_channel_io_perm_failure;
-			pci_stop_and_remove_bus_device(pdev);
+			pci_stop_and_remove_bus_device_locked(pdev);
 		}
 
 		zdev->fh = ccdf->fh;
diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c
index dad920fd3606..f37a5808883d 100644
--- a/arch/s390/pci/pci_sysfs.c
+++ b/arch/s390/pci/pci_sysfs.c
@@ -44,17 +44,24 @@ static ssize_t recover_store(struct device *dev, struct device_attribute *attr,
 	if (!device_remove_file_self(dev, attr))
 		return count;
 
+	pci_lock_rescan_remove();
 	pci_stop_and_remove_bus_device(pdev);
 	ret = zpci_disable_device(zdev);
 	if (ret)
-		return ret;
+		goto error;
 
 	ret = zpci_enable_device(zdev);
 	if (ret)
-		return ret;
+		goto error;
 
 	pci_rescan_bus(zdev->bus);
+	pci_unlock_rescan_remove();
+
 	return count;
+
+error:
+	pci_unlock_rescan_remove();
+	return ret;
 }
 static DEVICE_ATTR_WO(recover);
 
-- 
cgit v1.2.3


From d0164ee20d98847d3c777a0ae90e678e7ac1e416 Mon Sep 17 00:00:00 2001
From: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Date: Mon, 29 Jun 2015 16:43:06 +0200
Subject: s390/kernel: remove save_fpu_regs() parameter and use __LC_CURRENT
 instead

All calls to save_fpu_regs() specify the fpu structure of the current task
pointer as parameter.  The task pointer of the current task can also be
retrieved from the CPU lowcore directly.  Remove the parameter definition,
load the __LC_CURRENT task pointer from the CPU lowcore, and rebase the FPU
structure onto the task structure.  Apply the same approach for the
load_fpu_regs() function.

Reviewed-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/fpu-internal.h |  2 +-
 arch/s390/include/asm/switch_to.h    |  2 +-
 arch/s390/kernel/asm-offsets.c       |  8 +++-----
 arch/s390/kernel/compat_signal.c     |  6 +++---
 arch/s390/kernel/entry.S             | 36 ++++++++++++++++++------------------
 arch/s390/kernel/process.c           |  4 ++--
 arch/s390/kernel/ptrace.c            | 12 ++++++------
 arch/s390/kernel/signal.c            |  6 +++---
 arch/s390/kernel/traps.c             |  6 +++---
 arch/s390/kvm/kvm-s390.c             | 10 +++++-----
 10 files changed, 45 insertions(+), 47 deletions(-)

(limited to 'arch/s390')

diff --git a/arch/s390/include/asm/fpu-internal.h b/arch/s390/include/asm/fpu-internal.h
index 237f8fcbe46b..55dc2c0fb40a 100644
--- a/arch/s390/include/asm/fpu-internal.h
+++ b/arch/s390/include/asm/fpu-internal.h
@@ -28,7 +28,7 @@ struct fpu {
 	};
 };
 
-void save_fpu_regs(struct fpu *fpu);
+void save_fpu_regs(void);
 
 #define is_vx_fpu(fpu) (!!((fpu)->flags & FPU_USE_VX))
 #define is_vx_task(tsk) (!!((tsk)->thread.fpu.flags & FPU_USE_VX))
diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h
index 0a4a3150b7d7..dcadfde32265 100644
--- a/arch/s390/include/asm/switch_to.h
+++ b/arch/s390/include/asm/switch_to.h
@@ -30,7 +30,7 @@ static inline void restore_access_regs(unsigned int *acrs)
 
 #define switch_to(prev,next,last) do {					\
 	if (prev->mm) {							\
-		save_fpu_regs(&prev->thread.fpu);			\
+		save_fpu_regs();					\
 		save_access_regs(&prev->thread.acrs[0]);		\
 		save_ri_cb(prev->thread.ri_cb);				\
 	}								\
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index 6bc42c08be09..48c9af7a7683 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -28,16 +28,14 @@ int main(void)
 	DEFINE(__TASK_pid, offsetof(struct task_struct, pid));
 	BLANK();
 	DEFINE(__THREAD_ksp, offsetof(struct thread_struct, ksp));
-	DEFINE(__THREAD_fpu, offsetof(struct task_struct, thread.fpu));
+	DEFINE(__THREAD_FPU_fpc, offsetof(struct thread_struct, fpu.fpc));
+	DEFINE(__THREAD_FPU_flags, offsetof(struct thread_struct, fpu.flags));
+	DEFINE(__THREAD_FPU_regs, offsetof(struct thread_struct, fpu.regs));
 	DEFINE(__THREAD_per_cause, offsetof(struct thread_struct, per_event.cause));
 	DEFINE(__THREAD_per_address, offsetof(struct thread_struct, per_event.address));
 	DEFINE(__THREAD_per_paid, offsetof(struct thread_struct, per_event.paid));
 	DEFINE(__THREAD_trap_tdb, offsetof(struct thread_struct, trap_tdb));
 	BLANK();
-	DEFINE(__FPU_fpc, offsetof(struct fpu, fpc));
-	DEFINE(__FPU_flags, offsetof(struct fpu, flags));
-	DEFINE(__FPU_regs, offsetof(struct fpu, regs));
-	BLANK();
 	DEFINE(__TI_task, offsetof(struct thread_info, task));
 	DEFINE(__TI_flags, offsetof(struct thread_info, flags));
 	DEFINE(__TI_sysc_table, offsetof(struct thread_info, sys_call_table));
diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c
index 0b46fd4aa31e..eb4664238613 100644
--- a/arch/s390/kernel/compat_signal.c
+++ b/arch/s390/kernel/compat_signal.c
@@ -154,7 +154,7 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from)
 static void store_sigregs(void)
 {
 	save_access_regs(current->thread.acrs);
-	save_fpu_regs(&current->thread.fpu);
+	save_fpu_regs();
 }
 
 /* Load registers after signal return */
@@ -286,7 +286,7 @@ COMPAT_SYSCALL_DEFINE0(sigreturn)
 	if (__copy_from_user(&set.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE32))
 		goto badframe;
 	set_current_blocked(&set);
-	save_fpu_regs(&current->thread.fpu);
+	save_fpu_regs();
 	if (restore_sigregs32(regs, &frame->sregs))
 		goto badframe;
 	if (restore_sigregs_ext32(regs, &frame->sregs_ext))
@@ -309,7 +309,7 @@ COMPAT_SYSCALL_DEFINE0(rt_sigreturn)
 	set_current_blocked(&set);
 	if (compat_restore_altstack(&frame->uc.uc_stack))
 		goto badframe;
-	save_fpu_regs(&current->thread.fpu);
+	save_fpu_regs();
 	if (restore_sigregs32(regs, &frame->uc.uc_mcontext))
 		goto badframe;
 	if (restore_sigregs_ext32(regs, &frame->uc.uc_mcontext_ext))
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 21c1219122af..5a966dea937f 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -183,7 +183,6 @@ ENTRY(sie64a)
 	xc	__SF_EMPTY+16(16,%r15),__SF_EMPTY+16(%r15) # host id & reason
 	tm	__LC_CPU_FLAGS+7,_CIF_FPU	# load guest fp/vx registers ?
 	jno	.Lsie_load_guest_gprs
-	lg	%r12,__LC_THREAD_INFO		# load fp/vx regs save area
 	brasl	%r14,load_fpu_regs		# load guest fp/vx regs
 .Lsie_load_guest_gprs:
 	lmg	%r0,%r13,0(%r3)			# load guest gprs 0-13
@@ -752,14 +751,16 @@ ENTRY(psw_idle)
  * of the register contents at system call or io return.
  */
 ENTRY(save_fpu_regs)
+	lg	%r2,__LC_CURRENT
+	aghi	%r2,__TASK_thread
 	tm	__LC_CPU_FLAGS+7,_CIF_FPU
 	bor	%r14
-	stfpc	__FPU_fpc(%r2)
+	stfpc	__THREAD_FPU_fpc(%r2)
 .Lsave_fpu_regs_fpc_end:
-	lg	%r3,__FPU_regs(%r2)
+	lg	%r3,__THREAD_FPU_regs(%r2)
 	ltgr	%r3,%r3
 	jz	.Lsave_fpu_regs_done	  # no save area -> set CIF_FPU
-	tm	__FPU_flags+3(%r2),FPU_USE_VX
+	tm	__THREAD_FPU_flags+3(%r2),FPU_USE_VX
 	jz	.Lsave_fpu_regs_fp	  # no -> store FP regs
 .Lsave_fpu_regs_vx_low:
 	VSTM	%v0,%v15,0,%r3		  # vstm 0,15,0(3)
@@ -794,20 +795,19 @@ ENTRY(save_fpu_regs)
  * FP/VX state, the vector-enablement control, CR0.46, is either set or cleared.
  *
  * There are special calling conventions to fit into sysc and io return work:
- *	%r12:	__LC_THREAD_INFO
  *	%r15:	<kernel stack>
  * The function requires:
  *	%r4 and __SF_EMPTY+32(%r15)
  */
 load_fpu_regs:
+	lg	%r4,__LC_CURRENT
+	aghi	%r4,__TASK_thread
 	tm	__LC_CPU_FLAGS+7,_CIF_FPU
 	bnor	%r14
-	lg	%r4,__TI_task(%r12)
-	la	%r4,__THREAD_fpu(%r4)
-	lfpc	__FPU_fpc(%r4)
+	lfpc	__THREAD_FPU_fpc(%r4)
 	stctg	%c0,%c0,__SF_EMPTY+32(%r15)	# store CR0
-	tm	__FPU_flags+3(%r4),FPU_USE_VX	# VX-enabled task ?
-	lg	%r4,__FPU_regs(%r4)		# %r4 <- reg save area
+	tm	__THREAD_FPU_flags+3(%r4),FPU_USE_VX	# VX-enabled task ?
+	lg	%r4,__THREAD_FPU_regs(%r4)	# %r4 <- reg save area
 	jz	.Lload_fpu_regs_fp_ctl		# -> no VX, load FP regs
 .Lload_fpu_regs_vx_ctl:
 	tm	__SF_EMPTY+32+5(%r15),2		# test VX control
@@ -1190,13 +1190,14 @@ cleanup_critical:
 	jhe	2f
 	clg	%r9,BASED(.Lcleanup_save_fpu_fpc_end)
 	jhe	1f
+	lg	%r2,__LC_CURRENT
 0:	# Store floating-point controls
-	stfpc	__FPU_fpc(%r2)
+	stfpc	__THREAD_FPU_fpc(%r2)
 1:	# Load register save area and check if VX is active
-	lg	%r3,__FPU_regs(%r2)
+	lg	%r3,__THREAD_FPU_regs(%r2)
 	ltgr	%r3,%r3
 	jz	5f			  # no save area -> set CIF_FPU
-	tm	__FPU_flags+3(%r2),FPU_USE_VX
+	tm	__THREAD_FPU_flags+3(%r2),FPU_USE_VX
 	jz	4f			  # no VX -> store FP regs
 2:	# Store vector registers (V0-V15)
 	VSTM	%v0,%v15,0,%r3		  # vstm 0,15,0(3)
@@ -1250,11 +1251,10 @@ cleanup_critical:
 	jhe	5f
 	clg	%r9,BASED(.Lcleanup_load_fpu_regs_vx_ctl)
 	jhe	6f
-	lg	%r4,__TI_task(%r12)
-	la	%r4,__THREAD_fpu(%r4)
-	lfpc	__FPU_fpc(%r4)
-	tm	__FPU_flags+3(%r4),FPU_USE_VX	# VX-enabled task ?
-	lg	%r4,__FPU_regs(%r4)		# %r4 <- reg save area
+	lg	%r4,__LC_CURRENT
+	lfpc	__THREAD_FPU_fpc(%r4)
+	tm	__THREAD_FPU_flags+3(%r4),FPU_USE_VX	# VX-enabled task ?
+	lg	%r4,__THREAD_FPU_regs(%r4)	# %r4 <- reg save area
 	jz	3f				# -> no VX, load FP regs
 6:	# Set VX-enablement control
 	stctg	%c0,%c0,__SF_EMPTY+32(%r15)	# store CR0
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 9cf0063f920e..f2dac9f0799d 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -104,7 +104,7 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
 	 * The CIF_FPU flag is set in any case to lazy clear or restore a saved
 	 * state when switching to a different task or returning to user space.
 	 */
-	save_fpu_regs(&current->thread.fpu);
+	save_fpu_regs();
 	dst->thread.fpu.fpc = current->thread.fpu.fpc;
 	if (is_vx_task(current))
 		convert_vx_to_fp(dst->thread.fpu.fprs,
@@ -196,7 +196,7 @@ asmlinkage void execve_tail(void)
  */
 int dump_fpu (struct pt_regs * regs, s390_fp_regs *fpregs)
 {
-	save_fpu_regs(&current->thread.fpu);
+	save_fpu_regs();
 	fpregs->fpc = current->thread.fpu.fpc;
 	fpregs->pad = 0;
 	if (is_vx_task(current))
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index 8c525880a3ff..8b1c8e33f184 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -943,7 +943,7 @@ static int s390_fpregs_get(struct task_struct *target,
 	_s390_fp_regs fp_regs;
 
 	if (target == current)
-		save_fpu_regs(&target->thread.fpu);
+		save_fpu_regs();
 
 	fp_regs.fpc = target->thread.fpu.fpc;
 	fpregs_store(&fp_regs, &target->thread.fpu);
@@ -961,7 +961,7 @@ static int s390_fpregs_set(struct task_struct *target,
 	freg_t fprs[__NUM_FPRS];
 
 	if (target == current)
-		save_fpu_regs(&target->thread.fpu);
+		save_fpu_regs();
 
 	/* If setting FPC, must validate it first. */
 	if (count > 0 && pos < offsetof(s390_fp_regs, fprs)) {
@@ -1049,7 +1049,7 @@ static int s390_vxrs_low_get(struct task_struct *target,
 		return -ENODEV;
 	if (is_vx_task(target)) {
 		if (target == current)
-			save_fpu_regs(&target->thread.fpu);
+			save_fpu_regs();
 		for (i = 0; i < __NUM_VXRS_LOW; i++)
 			vxrs[i] = *((__u64 *)(target->thread.fpu.vxrs + i) + 1);
 	} else
@@ -1072,7 +1072,7 @@ static int s390_vxrs_low_set(struct task_struct *target,
 		if (rc)
 			return rc;
 	} else if (target == current)
-		save_fpu_regs(&target->thread.fpu);
+		save_fpu_regs();
 
 	rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1);
 	if (rc == 0)
@@ -1093,7 +1093,7 @@ static int s390_vxrs_high_get(struct task_struct *target,
 		return -ENODEV;
 	if (is_vx_task(target)) {
 		if (target == current)
-			save_fpu_regs(&target->thread.fpu);
+			save_fpu_regs();
 		memcpy(vxrs, target->thread.fpu.vxrs + __NUM_VXRS_LOW,
 		       sizeof(vxrs));
 	} else
@@ -1115,7 +1115,7 @@ static int s390_vxrs_high_set(struct task_struct *target,
 		if (rc)
 			return rc;
 	} else if (target == current)
-		save_fpu_regs(&target->thread.fpu);
+		save_fpu_regs();
 
 	rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
 				target->thread.fpu.vxrs + __NUM_VXRS_LOW, 0, -1);
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index 2f4c7e2638c9..9549af102d75 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -105,7 +105,7 @@ struct rt_sigframe
 static void store_sigregs(void)
 {
 	save_access_regs(current->thread.acrs);
-	save_fpu_regs(&current->thread.fpu);
+	save_fpu_regs();
 }
 
 /* Load registers after signal return */
@@ -222,7 +222,7 @@ SYSCALL_DEFINE0(sigreturn)
 	if (__copy_from_user(&set.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE))
 		goto badframe;
 	set_current_blocked(&set);
-	save_fpu_regs(&current->thread.fpu);
+	save_fpu_regs();
 	if (restore_sigregs(regs, &frame->sregs))
 		goto badframe;
 	if (restore_sigregs_ext(regs, &frame->sregs_ext))
@@ -246,7 +246,7 @@ SYSCALL_DEFINE0(rt_sigreturn)
 	set_current_blocked(&set);
 	if (restore_altstack(&frame->uc.uc_stack))
 		goto badframe;
-	save_fpu_regs(&current->thread.fpu);
+	save_fpu_regs();
 	if (restore_sigregs(regs, &frame->uc.uc_mcontext))
 		goto badframe;
 	if (restore_sigregs_ext(regs, &frame->uc.uc_mcontext_ext))
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index 76f76932ccb9..9861613fb35a 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -236,7 +236,7 @@ int alloc_vector_registers(struct task_struct *tsk)
 		return -ENOMEM;
 	preempt_disable();
 	if (tsk == current)
-		save_fpu_regs(&tsk->thread.fpu);
+		save_fpu_regs();
 	/* Copy the 16 floating point registers */
 	convert_fp_to_vx(vxrs, tsk->thread.fpu.fprs);
 	fprs = tsk->thread.fpu.fprs;
@@ -257,7 +257,7 @@ void vector_exception(struct pt_regs *regs)
 	}
 
 	/* get vector interrupt code from fpc */
-	save_fpu_regs(&current->thread.fpu);
+	save_fpu_regs();
 	vic = (current->thread.fpu.fpc & 0xf00) >> 8;
 	switch (vic) {
 	case 1: /* invalid vector operation */
@@ -295,7 +295,7 @@ void data_exception(struct pt_regs *regs)
 
 	location = get_trap_ip(regs);
 
-	save_fpu_regs(&current->thread.fpu);
+	save_fpu_regs();
 	/* Check for vector register enablement */
 	if (MACHINE_HAS_VX && !is_vx_task(current) &&
 	    (current->thread.fpu.fpc & FPC_DXC_MASK) == 0xfe00) {
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index c0cceaf4a92e..1903f0212bd0 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -1224,7 +1224,7 @@ static inline void load_fpu_from(struct fpu *from)
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 	/* Save host register state */
-	save_fpu_regs(&current->thread.fpu);
+	save_fpu_regs();
 	save_fpu_to(&vcpu->arch.host_fpregs);
 
 	if (test_kvm_facility(vcpu->kvm, 129)) {
@@ -1256,7 +1256,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 	atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
 	gmap_disable(vcpu->arch.gmap);
 
-	save_fpu_regs(&current->thread.fpu);
+	save_fpu_regs();
 
 	if (test_kvm_facility(vcpu->kvm, 129))
 		/*
@@ -1671,7 +1671,7 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 		return -EINVAL;
 	memcpy(vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
 	vcpu->arch.guest_fpregs.fpc = fpu->fpc;
-	save_fpu_regs(&current->thread.fpu);
+	save_fpu_regs();
 	load_fpu_from(&vcpu->arch.guest_fpregs);
 	return 0;
 }
@@ -2241,7 +2241,7 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
 	 * copying in vcpu load/put. Lets update our copies before we save
 	 * it into the save area
 	 */
-	save_fpu_regs(&current->thread.fpu);
+	save_fpu_regs();
 	if (test_kvm_facility(vcpu->kvm, 129)) {
 		/*
 		 * If the vector extension is available, the vector registers
@@ -2288,7 +2288,7 @@ int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr)
 	 *
 	 * Let's update our copies before we save it into the save area.
 	 */
-	save_fpu_regs(&current->thread.fpu);
+	save_fpu_regs();
 
 	return kvm_s390_store_adtl_status_unloaded(vcpu, addr);
 }
-- 
cgit v1.2.3


From 888d5e9804bb401f3531b700cc93da0bdf8496bf Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Thu, 9 Jul 2015 17:56:13 +0200
Subject: KVM: s390: use pid of cpu thread for sampling tagging

Right now we use the address of the sie control block as tag for
the sampling data. This is hard to get for users. Let's just use
the PID of the cpu thread to mark the hardware samples.

Reviewed-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/entry.S        | 2 +-
 arch/s390/kernel/perf_cpum_sf.c | 9 +++------
 2 files changed, 4 insertions(+), 7 deletions(-)

(limited to 'arch/s390')

diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 5a966dea937f..247b7aae4c6d 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -199,7 +199,7 @@ ENTRY(sie64a)
 	jo	.Lsie_skip			# exit if fp/vx regs changed
 	tm	__LC_MACHINE_FLAGS+6,0x20	# MACHINE_FLAG_LPP
 	jz	.Lsie_enter
-	.insn	s,0xb2800000,__SF_EMPTY(%r15)	# set guest id
+	.insn	s,0xb2800000,__LC_CURRENT_PID	# set guest id to pid
 .Lsie_enter:
 	sie	0(%r14)
 	tm	__LC_MACHINE_FLAGS+6,0x20	# MACHINE_FLAG_LPP
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index afe05bfb7e00..b973972f6ba5 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -1019,12 +1019,9 @@ static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr)
 		break;
 	}
 
-	/* The host-program-parameter (hpp) contains the sie control
-	 * block that is set by sie64a() in entry64.S.	Check if hpp
-	 * refers to a valid control block and set sde_regs flags
-	 * accordingly.  This would allow to use hpp values for other
-	 * purposes too.
-	 * For now, simply use a non-zero value as guest indicator.
+	/* The host-program-parameter (hpp) contains the pid of
+	 * the CPU thread as set by sie64a() in entry.S.
+	 * If non-zero assume a guest sample.
 	 */
 	if (sfr->basic.hpp)
 		sde_regs->in_guest = 1;
-- 
cgit v1.2.3


From b54565b86824ecc9f0ad5d0ee69696f38edc50fd Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Tue, 23 Sep 2014 14:01:34 +0200
Subject: s390/mm: add NUMA balancing primitives

Define pte_protnone and pmd_protnone for NUMA memory migration.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/pgtable.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'arch/s390')

diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index f66d82798a6a..bdb2f51124ed 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -576,6 +576,19 @@ static inline int pte_same(pte_t a, pte_t b)
 	return pte_val(a) == pte_val(b);
 }
 
+#ifdef CONFIG_NUMA_BALANCING
+static inline int pte_protnone(pte_t pte)
+{
+	return pte_present(pte) && !(pte_val(pte) & _PAGE_READ);
+}
+
+static inline int pmd_protnone(pmd_t pmd)
+{
+	/* pmd_large(pmd) implies pmd_present(pmd) */
+	return pmd_large(pmd) && !(pmd_val(pmd) & _SEGMENT_ENTRY_READ);
+}
+#endif
+
 static inline pgste_t pgste_get_lock(pte_t *ptep)
 {
 	unsigned long new = 0;
-- 
cgit v1.2.3


From ecf46abdd32a35fcd7dfeef72600a065425532b3 Mon Sep 17 00:00:00 2001
From: Gerald Schaefer <geraldsc@linux.vnet.ibm.com>
Date: Fri, 29 May 2015 15:34:51 +0200
Subject: s390/mm: enable gup code for NUMA

Force get_user_page() to take the slow path for NUMA migration pages.

Signed-off-by: Gerald Schaefer <geraldsc@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/mm/gup.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'arch/s390')

diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c
index 1eb41bb3010c..12bbf0e8478f 100644
--- a/arch/s390/mm/gup.c
+++ b/arch/s390/mm/gup.c
@@ -30,6 +30,9 @@ static inline int gup_pte_range(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
 	do {
 		pte = *ptep;
 		barrier();
+		/* Similar to the PMD case, NUMA hinting must take slow path */
+		if (pte_protnone(pte))
+			return 0;
 		if ((pte_val(pte) & mask) != 0)
 			return 0;
 		VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
@@ -125,6 +128,13 @@ static inline int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr,
 		if (pmd_none(pmd) || pmd_trans_splitting(pmd))
 			return 0;
 		if (unlikely(pmd_large(pmd))) {
+			/*
+			 * NUMA hinting faults need to be handled in the GUP
+			 * slowpath for accounting purposes and so that they
+			 * can be serialised against THP migration.
+			 */
+			if (pmd_protnone(pmd))
+				return 0;
 			if (!gup_huge_pmd(pmdp, pmd, addr, next,
 					  write, pages, nr))
 				return 0;
-- 
cgit v1.2.3


From 199071f108f5641badc2a6970e1fa7ec469d5d12 Mon Sep 17 00:00:00 2001
From: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Date: Fri, 8 May 2015 17:40:43 +0200
Subject: s390/mm: make arch_add_memory() NUMA aware

With NUMA support for s390, arch_add_memory() needs to respect the nid
parameter.

Signed-off-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/mm/init.c | 38 +++++++++++++++++++-------------------
 1 file changed, 19 insertions(+), 19 deletions(-)

(limited to 'arch/s390')

diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 76e873748b56..dc4db08286e9 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -27,6 +27,7 @@
 #include <linux/initrd.h>
 #include <linux/export.h>
 #include <linux/gfp.h>
+#include <linux/memblock.h>
 #include <asm/processor.h>
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -170,37 +171,36 @@ void __init free_initrd_mem(unsigned long start, unsigned long end)
 #ifdef CONFIG_MEMORY_HOTPLUG
 int arch_add_memory(int nid, u64 start, u64 size)
 {
-	unsigned long zone_start_pfn, zone_end_pfn, nr_pages;
+	unsigned long normal_end_pfn = PFN_DOWN(memblock_end_of_DRAM());
+	unsigned long dma_end_pfn = PFN_DOWN(MAX_DMA_ADDRESS);
 	unsigned long start_pfn = PFN_DOWN(start);
 	unsigned long size_pages = PFN_DOWN(size);
-	struct zone *zone;
-	int rc;
+	unsigned long nr_pages;
+	int rc, zone_enum;
 
 	rc = vmem_add_mapping(start, size);
 	if (rc)
 		return rc;
-	for_each_zone(zone) {
-		if (zone_idx(zone) != ZONE_MOVABLE) {
-			/* Add range within existing zone limits */
-			zone_start_pfn = zone->zone_start_pfn;
-			zone_end_pfn = zone->zone_start_pfn +
-				       zone->spanned_pages;
+
+	while (size_pages > 0) {
+		if (start_pfn < dma_end_pfn) {
+			nr_pages = (start_pfn + size_pages > dma_end_pfn) ?
+				   dma_end_pfn - start_pfn : size_pages;
+			zone_enum = ZONE_DMA;
+		} else if (start_pfn < normal_end_pfn) {
+			nr_pages = (start_pfn + size_pages > normal_end_pfn) ?
+				   normal_end_pfn - start_pfn : size_pages;
+			zone_enum = ZONE_NORMAL;
 		} else {
-			/* Add remaining range to ZONE_MOVABLE */
-			zone_start_pfn = start_pfn;
-			zone_end_pfn = start_pfn + size_pages;
+			nr_pages = size_pages;
+			zone_enum = ZONE_MOVABLE;
 		}
-		if (start_pfn < zone_start_pfn || start_pfn >= zone_end_pfn)
-			continue;
-		nr_pages = (start_pfn + size_pages > zone_end_pfn) ?
-			   zone_end_pfn - start_pfn : size_pages;
-		rc = __add_pages(nid, zone, start_pfn, nr_pages);
+		rc = __add_pages(nid, NODE_DATA(nid)->node_zones + zone_enum,
+				 start_pfn, size_pages);
 		if (rc)
 			break;
 		start_pfn += nr_pages;
 		size_pages -= nr_pages;
-		if (!size_pages)
-			break;
 	}
 	if (rc)
 		vmem_remove_mapping(start, size);
-- 
cgit v1.2.3


From 3a368f742da13955bed4a2efed85ed7c1d826bcc Mon Sep 17 00:00:00 2001
From: Philipp Hachtmann <phacht@linux.vnet.ibm.com>
Date: Thu, 6 Mar 2014 18:25:13 +0100
Subject: s390/numa: add core infrastructure

Enable core NUMA support for s390 and add one simple default mode "plain"
that creates one single NUMA node.

This patch contains several changes from Michael Holzheu.

Signed-off-by: Philipp Hachtmann <phacht@linux.vnet.ibm.com>
Signed-off-by: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/Kbuild                    |   1 +
 arch/s390/Kconfig                   |  37 ++++++++
 arch/s390/include/asm/mmzone.h      |  16 ++++
 arch/s390/include/asm/numa.h        |  31 +++++++
 arch/s390/include/asm/pci.h         |  16 ++++
 arch/s390/include/asm/topology.h    |  39 ++++++++
 arch/s390/include/asm/unistd.h      |  12 +--
 arch/s390/include/uapi/asm/unistd.h |  10 +-
 arch/s390/kernel/setup.c            |   2 +
 arch/s390/kernel/syscalls.S         |  10 +-
 arch/s390/kernel/topology.c         |  21 +++--
 arch/s390/mm/init.c                 |   2 +-
 arch/s390/numa/Makefile             |   1 +
 arch/s390/numa/numa.c               | 180 ++++++++++++++++++++++++++++++++++++
 arch/s390/numa/numa_mode.h          |  23 +++++
 15 files changed, 375 insertions(+), 26 deletions(-)
 create mode 100644 arch/s390/include/asm/mmzone.h
 create mode 100644 arch/s390/include/asm/numa.h
 create mode 100644 arch/s390/numa/Makefile
 create mode 100644 arch/s390/numa/numa.c
 create mode 100644 arch/s390/numa/numa_mode.h

(limited to 'arch/s390')

diff --git a/arch/s390/Kbuild b/arch/s390/Kbuild
index 2938934c6518..e256592eb66e 100644
--- a/arch/s390/Kbuild
+++ b/arch/s390/Kbuild
@@ -6,3 +6,4 @@ obj-$(CONFIG_S390_HYPFS_FS)	+= hypfs/
 obj-$(CONFIG_APPLDATA_BASE)	+= appldata/
 obj-y				+= net/
 obj-$(CONFIG_PCI)		+= pci/
+obj-$(CONFIG_NUMA)		+= numa/
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 91e8954f1237..25510adb07d3 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -153,6 +153,10 @@ config S390
 	select TTY
 	select VIRT_CPU_ACCOUNTING
 	select VIRT_TO_BUS
+	select ARCH_SUPPORTS_NUMA_BALANCING
+	select ARCH_WANTS_PROT_NUMA_PROT_NONE
+	select HAVE_ARCH_EARLY_PFN_TO_NID
+
 
 config SCHED_OMIT_FRAME_POINTER
 	def_bool y
@@ -386,6 +390,39 @@ config HOTPLUG_CPU
 config SCHED_SMT
 	def_bool n
 
+# Some NUMA nodes have memory ranges that span
+# other nodes.	Even though a pfn is valid and
+# between a node's start and end pfns, it may not
+# reside on that node.	See memmap_init_zone()
+# for details. <- They meant memory holes!
+config NODES_SPAN_OTHER_NODES
+	def_bool NUMA
+
+config NUMA
+	bool "NUMA support"
+	depends on SMP && 64BIT && SCHED_TOPOLOGY
+	default n
+	help
+	  Enable NUMA support
+
+	  This option adds NUMA support to the kernel.
+
+	  An operation mode can be selected by appending
+	  numa=<method> to the kernel command line.
+
+	  The default behaviour is identical to appending numa=plain to
+	  the command line. This will create just one node with all
+	  available memory and all CPUs in it.
+
+config NODES_SHIFT
+	int "Maximum NUMA nodes (as a power of 2)"
+	range 1 10
+	depends on NUMA
+	default "4"
+	help
+	  Specify the maximum number of NUMA nodes available on the target
+	  system. Increases memory reserved to accommodate various tables.
+
 config SCHED_MC
 	def_bool n
 
diff --git a/arch/s390/include/asm/mmzone.h b/arch/s390/include/asm/mmzone.h
new file mode 100644
index 000000000000..a9e834e60b84
--- /dev/null
+++ b/arch/s390/include/asm/mmzone.h
@@ -0,0 +1,16 @@
+/*
+ * NUMA support for s390
+ *
+ * Copyright IBM Corp. 2015
+ */
+
+#ifndef _ASM_S390_MMZONE_H
+#define _ASM_S390_MMZONE_H
+
+#ifdef CONFIG_NUMA
+
+extern struct pglist_data *node_data[];
+#define NODE_DATA(nid) (node_data[nid])
+
+#endif /* CONFIG_NUMA */
+#endif /* _ASM_S390_MMZONE_H */
diff --git a/arch/s390/include/asm/numa.h b/arch/s390/include/asm/numa.h
new file mode 100644
index 000000000000..ea4edbfba9f6
--- /dev/null
+++ b/arch/s390/include/asm/numa.h
@@ -0,0 +1,31 @@
+/*
+ * NUMA support for s390
+ *
+ * Declare the NUMA core code structures and functions.
+ *
+ * Copyright IBM Corp. 2015
+ */
+
+#ifndef _ASM_S390_NUMA_H
+#define _ASM_S390_NUMA_H
+
+#ifdef CONFIG_NUMA
+
+#include <linux/numa.h>
+#include <linux/cpumask.h>
+
+void numa_setup(void);
+int numa_pfn_to_nid(unsigned long pfn);
+int __node_distance(int a, int b);
+void numa_update_cpu_topology(void);
+
+extern cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
+extern int numa_debug_enabled;
+
+#else
+
+static inline void numa_setup(void) { }
+static inline void numa_update_cpu_topology(void) { }
+
+#endif /* CONFIG_NUMA */
+#endif /* _ASM_S390_NUMA_H */
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index 9b6545e8f685..34d960353a08 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -192,4 +192,20 @@ void zpci_debug_init_device(struct zpci_dev *);
 void zpci_debug_exit_device(struct zpci_dev *);
 void zpci_debug_info(struct zpci_dev *, struct seq_file *);
 
+#ifdef CONFIG_NUMA
+
+/* Returns the node based on PCI bus */
+static inline int __pcibus_to_node(const struct pci_bus *bus)
+{
+	return NUMA_NO_NODE;
+}
+
+static inline const struct cpumask *
+cpumask_of_pcibus(const struct pci_bus *bus)
+{
+	return cpu_online_mask;
+}
+
+#endif /* CONFIG_NUMA */
+
 #endif
diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h
index 4990f6c66288..27ebde643933 100644
--- a/arch/s390/include/asm/topology.h
+++ b/arch/s390/include/asm/topology.h
@@ -2,6 +2,7 @@
 #define _ASM_S390_TOPOLOGY_H
 
 #include <linux/cpumask.h>
+#include <asm/numa.h>
 
 struct sysinfo_15_1_x;
 struct cpu;
@@ -13,6 +14,7 @@ struct cpu_topology_s390 {
 	unsigned short core_id;
 	unsigned short socket_id;
 	unsigned short book_id;
+	unsigned short node_id;
 	cpumask_t thread_mask;
 	cpumask_t core_mask;
 	cpumask_t book_mask;
@@ -52,6 +54,43 @@ static inline void topology_expect_change(void) { }
 #define POLARIZATION_VM		(2)
 #define POLARIZATION_VH		(3)
 
+#define SD_BOOK_INIT	SD_CPU_INIT
+
+#ifdef CONFIG_NUMA
+
+#define cpu_to_node cpu_to_node
+static inline int cpu_to_node(int cpu)
+{
+	return per_cpu(cpu_topology, cpu).node_id;
+}
+
+/* Returns a pointer to the cpumask of CPUs on node 'node'. */
+#define cpumask_of_node cpumask_of_node
+static inline const struct cpumask *cpumask_of_node(int node)
+{
+	return node_to_cpumask_map[node];
+}
+
+/*
+ * Returns the number of the node containing node 'node'. This
+ * architecture is flat, so it is a pretty simple function!
+ */
+#define parent_node(node) (node)
+
+#define pcibus_to_node(bus) __pcibus_to_node(bus)
+
+#define node_distance(a, b) __node_distance(a, b)
+
+#else /* !CONFIG_NUMA */
+
+#define numa_node_id numa_node_id
+static inline int numa_node_id(void)
+{
+	return 0;
+}
+
+#endif /* CONFIG_NUMA */
+
 #include <asm-generic/topology.h>
 
 #endif /* _ASM_S390_TOPOLOGY_H */
diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h
index 91f56b1d8156..ec2bfc83a1e9 100644
--- a/arch/s390/include/asm/unistd.h
+++ b/arch/s390/include/asm/unistd.h
@@ -11,12 +11,12 @@
 
 #define __IGNORE_time
 
-/* Ignore NUMA system calls. Not wired up on s390. */
-#define __IGNORE_mbind
-#define __IGNORE_get_mempolicy
-#define __IGNORE_set_mempolicy
-#define __IGNORE_migrate_pages
-#define __IGNORE_move_pages
+/* NUMA system calls */
+#define  _ARCH_WANT_mbind
+#define __ARCH_WANT_get_mempolicy
+#define __ARCH_WANT_set_mempolicy
+#define __ARCH_WANT_migrate_pages
+#define __ARCH_WANT_move_pages
 
 /* Ignore system calls that are also reachable via sys_socket */
 #define __IGNORE_recvmmsg
diff --git a/arch/s390/include/uapi/asm/unistd.h b/arch/s390/include/uapi/asm/unistd.h
index 67878af257a0..59d2bb4e2d0c 100644
--- a/arch/s390/include/uapi/asm/unistd.h
+++ b/arch/s390/include/uapi/asm/unistd.h
@@ -204,9 +204,9 @@
 #define __NR_statfs64		265
 #define __NR_fstatfs64		266
 #define __NR_remap_file_pages	267
-/* Number 268 is reserved for new sys_mbind */
-/* Number 269 is reserved for new sys_get_mempolicy */
-/* Number 270 is reserved for new sys_set_mempolicy */
+#define __NR_mbind		268
+#define __NR_get_mempolicy	269
+#define __NR_set_mempolicy	270
 #define __NR_mq_open		271
 #define __NR_mq_unlink		272
 #define __NR_mq_timedsend	273
@@ -223,7 +223,7 @@
 #define __NR_inotify_init	284
 #define __NR_inotify_add_watch	285
 #define __NR_inotify_rm_watch	286
-/* Number 287 is reserved for new sys_migrate_pages */
+#define __NR_migrate_pages	287
 #define __NR_openat		288
 #define __NR_mkdirat		289
 #define __NR_mknodat		290
@@ -245,7 +245,7 @@
 #define __NR_sync_file_range	307
 #define __NR_tee		308
 #define __NR_vmsplice		309
-/* Number 310 is reserved for new sys_move_pages */
+#define __NR_move_pages		310
 #define __NR_getcpu		311
 #define __NR_epoll_pwait	312
 #define __NR_utimes		313
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 85a1d4770c9c..af6b0236ccf3 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -62,6 +62,7 @@
 #include <asm/os_info.h>
 #include <asm/sclp.h>
 #include <asm/sysinfo.h>
+#include <asm/numa.h>
 #include "entry.h"
 
 /*
@@ -879,6 +880,7 @@ void __init setup_arch(char **cmdline_p)
 	setup_lowcore();
 	smp_fill_possible_mask();
         cpu_init();
+	numa_setup();
 
 	/*
 	 * Setup capabilities (ELF_HWCAP & ELF_PLATFORM).
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index 1acad02681c4..f3f4a137aef6 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -276,9 +276,9 @@ SYSCALL(sys_ni_syscall,compat_sys_s390_fadvise64_64)
 SYSCALL(sys_statfs64,compat_sys_statfs64)
 SYSCALL(sys_fstatfs64,compat_sys_fstatfs64)
 SYSCALL(sys_remap_file_pages,compat_sys_remap_file_pages)
-NI_SYSCALL						/* 268 sys_mbind */
-NI_SYSCALL						/* 269 sys_get_mempolicy */
-NI_SYSCALL						/* 270 sys_set_mempolicy */
+SYSCALL(sys_mbind,compat_sys_mbind)
+SYSCALL(sys_get_mempolicy,compat_sys_get_mempolicy)
+SYSCALL(sys_set_mempolicy,compat_sys_set_mempolicy)
 SYSCALL(sys_mq_open,compat_sys_mq_open)
 SYSCALL(sys_mq_unlink,compat_sys_mq_unlink)
 SYSCALL(sys_mq_timedsend,compat_sys_mq_timedsend)
@@ -295,7 +295,7 @@ SYSCALL(sys_ioprio_get,compat_sys_ioprio_get)
 SYSCALL(sys_inotify_init,sys_inotify_init)
 SYSCALL(sys_inotify_add_watch,compat_sys_inotify_add_watch)	/* 285 */
 SYSCALL(sys_inotify_rm_watch,compat_sys_inotify_rm_watch)
-NI_SYSCALL						/* 287 sys_migrate_pages */
+SYSCALL(sys_migrate_pages,compat_sys_migrate_pages)
 SYSCALL(sys_openat,compat_sys_openat)
 SYSCALL(sys_mkdirat,compat_sys_mkdirat)
 SYSCALL(sys_mknodat,compat_sys_mknodat)			/* 290 */
@@ -318,7 +318,7 @@ SYSCALL(sys_splice,compat_sys_splice)
 SYSCALL(sys_sync_file_range,compat_sys_s390_sync_file_range)
 SYSCALL(sys_tee,compat_sys_tee)
 SYSCALL(sys_vmsplice,compat_sys_vmsplice)
-NI_SYSCALL						/* 310 sys_move_pages */
+SYSCALL(sys_move_pages,compat_sys_move_pages)
 SYSCALL(sys_getcpu,compat_sys_getcpu)
 SYSCALL(sys_epoll_pwait,compat_sys_epoll_pwait)
 SYSCALL(sys_utimes,compat_sys_utimes)
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index 5728c5bd44a8..0f5f8b09c903 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -18,7 +18,10 @@
 #include <linux/cpu.h>
 #include <linux/smp.h>
 #include <linux/mm.h>
+#include <linux/nodemask.h>
+#include <linux/node.h>
 #include <asm/sysinfo.h>
+#include <asm/numa.h>
 
 #define PTF_HORIZONTAL	(0UL)
 #define PTF_VERTICAL	(1UL)
@@ -260,6 +263,7 @@ static void update_cpu_masks(void)
 		}
 	}
 	spin_unlock_irqrestore(&topology_lock, flags);
+	numa_update_cpu_topology();
 }
 
 void store_topology(struct sysinfo_15_1_x *info)
@@ -274,21 +278,21 @@ int arch_update_cpu_topology(void)
 {
 	struct sysinfo_15_1_x *info = tl_info;
 	struct device *dev;
-	int cpu;
+	int cpu, rc = 0;
 
-	if (!MACHINE_HAS_TOPOLOGY) {
-		update_cpu_masks();
-		topology_update_polarization_simple();
-		return 0;
+	if (MACHINE_HAS_TOPOLOGY) {
+		rc = 1;
+		store_topology(info);
+		tl_to_masks(info);
 	}
-	store_topology(info);
-	tl_to_masks(info);
 	update_cpu_masks();
+	if (!MACHINE_HAS_TOPOLOGY)
+		topology_update_polarization_simple();
 	for_each_online_cpu(cpu) {
 		dev = get_cpu_device(cpu);
 		kobject_uevent(&dev->kobj, KOBJ_CHANGE);
 	}
-	return 1;
+	return rc;
 }
 
 static void topology_work_fn(struct work_struct *work)
@@ -450,7 +454,6 @@ static struct sched_domain_topology_level s390_topology[] = {
 	{ cpu_thread_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
 	{ cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
 	{ cpu_book_mask, SD_INIT_NAME(BOOK) },
-	{ cpu_cpu_mask, SD_INIT_NAME(DIE) },
 	{ NULL, },
 };
 
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index dc4db08286e9..2963b563621c 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -139,7 +139,7 @@ void __init mem_init(void)
 	cpumask_set_cpu(0, mm_cpumask(&init_mm));
 	atomic_set(&init_mm.context.attach_count, 1);
 
-        max_mapnr = max_low_pfn;
+	set_max_mapnr(max_low_pfn);
         high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
 
 	/* Setup guest page hinting */
diff --git a/arch/s390/numa/Makefile b/arch/s390/numa/Makefile
new file mode 100644
index 000000000000..7e94c8f491f7
--- /dev/null
+++ b/arch/s390/numa/Makefile
@@ -0,0 +1 @@
+obj-y			+= numa.o
diff --git a/arch/s390/numa/numa.c b/arch/s390/numa/numa.c
new file mode 100644
index 000000000000..0416a3671e33
--- /dev/null
+++ b/arch/s390/numa/numa.c
@@ -0,0 +1,180 @@
+/*
+ * NUMA support for s390
+ *
+ * Implement NUMA core code.
+ *
+ * Copyright IBM Corp. 2015
+ */
+
+#define KMSG_COMPONENT "numa"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/mmzone.h>
+#include <linux/cpumask.h>
+#include <linux/bootmem.h>
+#include <linux/memblock.h>
+#include <linux/slab.h>
+#include <linux/node.h>
+
+#include <asm/numa.h>
+#include "numa_mode.h"
+
+pg_data_t *node_data[MAX_NUMNODES];
+EXPORT_SYMBOL(node_data);
+
+cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
+EXPORT_SYMBOL(node_to_cpumask_map);
+
+const struct numa_mode numa_mode_plain = {
+	.name = "plain",
+};
+
+static const struct numa_mode *mode = &numa_mode_plain;
+
+int numa_pfn_to_nid(unsigned long pfn)
+{
+	return mode->__pfn_to_nid ? mode->__pfn_to_nid(pfn) : 0;
+}
+
+void numa_update_cpu_topology(void)
+{
+	if (mode->update_cpu_topology)
+		mode->update_cpu_topology();
+}
+
+int __node_distance(int a, int b)
+{
+	return mode->distance ? mode->distance(a, b) : 0;
+}
+
+int numa_debug_enabled;
+
+/*
+ * alloc_node_data() - Allocate node data
+ */
+static __init pg_data_t *alloc_node_data(void)
+{
+	pg_data_t *res;
+
+	res = (pg_data_t *) memblock_alloc(sizeof(pg_data_t), 1);
+	if (!res)
+		panic("Could not allocate memory for node data!\n");
+	memset(res, 0, sizeof(pg_data_t));
+	return res;
+}
+
+/*
+ * numa_setup_memory() - Assign bootmem to nodes
+ *
+ * The memory is first added to memblock without any respect to nodes.
+ * This is fixed before remaining memblock memory is handed over to the
+ * buddy allocator.
+ * An important side effect is that large bootmem allocations might easily
+ * cross node boundaries, which can be needed for large allocations with
+ * smaller memory stripes in each node (i.e. when using NUMA emulation).
+ *
+ * Memory defines nodes:
+ * Therefore this routine also sets the nodes online with memory.
+ */
+static void __init numa_setup_memory(void)
+{
+	unsigned long cur_base, align, end_of_dram;
+	int nid = 0;
+
+	end_of_dram = memblock_end_of_DRAM();
+	align = mode->align ? mode->align() : ULONG_MAX;
+
+	/*
+	 * Step through all available memory and assign it to the nodes
+	 * indicated by the mode implementation.
+	 * All nodes which are seen here will be set online.
+	 */
+	cur_base = 0;
+	do {
+		nid = numa_pfn_to_nid(PFN_DOWN(cur_base));
+		node_set_online(nid);
+		memblock_set_node(cur_base, align, &memblock.memory, nid);
+		cur_base += align;
+	} while (cur_base < end_of_dram);
+
+	/* Allocate and fill out node_data */
+	for (nid = 0; nid < MAX_NUMNODES; nid++)
+		NODE_DATA(nid) = alloc_node_data();
+
+	for_each_online_node(nid) {
+		unsigned long start_pfn, end_pfn;
+		unsigned long t_start, t_end;
+		int i;
+
+		start_pfn = ULONG_MAX;
+		end_pfn = 0;
+		for_each_mem_pfn_range(i, nid, &t_start, &t_end, NULL) {
+			if (t_start < start_pfn)
+				start_pfn = t_start;
+			if (t_end > end_pfn)
+				end_pfn = t_end;
+		}
+		NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn;
+		NODE_DATA(nid)->node_id = nid;
+	}
+}
+
+/*
+ * numa_setup() - Earliest initialization
+ *
+ * Assign the mode and call the mode's setup routine.
+ */
+void __init numa_setup(void)
+{
+	pr_info("NUMA mode: %s\n", mode->name);
+	if (mode->setup)
+		mode->setup();
+	numa_setup_memory();
+	memblock_dump_all();
+}
+
+
+/*
+ * numa_init_early() - Initialization initcall
+ *
+ * This runs when only one CPU is online and before the first
+ * topology update is called for by the scheduler.
+ */
+static int __init numa_init_early(void)
+{
+	/* Attach all possible CPUs to node 0 for now. */
+	cpumask_copy(node_to_cpumask_map[0], cpu_possible_mask);
+	return 0;
+}
+early_initcall(numa_init_early);
+
+/*
+ * numa_init_late() - Initialization initcall
+ *
+ * Register NUMA nodes.
+ */
+static int __init numa_init_late(void)
+{
+	int nid;
+
+	for_each_online_node(nid)
+		register_one_node(nid);
+	return 0;
+}
+device_initcall(numa_init_late);
+
+static int __init parse_debug(char *parm)
+{
+	numa_debug_enabled = 1;
+	return 0;
+}
+early_param("numa_debug", parse_debug);
+
+static int __init parse_numa(char *parm)
+{
+	if (strcmp(parm, numa_mode_plain.name) == 0)
+		mode = &numa_mode_plain;
+	return 0;
+}
+early_param("numa", parse_numa);
diff --git a/arch/s390/numa/numa_mode.h b/arch/s390/numa/numa_mode.h
new file mode 100644
index 000000000000..775659848011
--- /dev/null
+++ b/arch/s390/numa/numa_mode.h
@@ -0,0 +1,23 @@
+/*
+ * NUMA support for s390
+ *
+ * Define declarations used for communication between NUMA mode
+ * implementations and NUMA core functionality.
+ *
+ * Copyright IBM Corp. 2015
+ */
+#ifndef __S390_NUMA_MODE_H
+#define __S390_NUMA_MODE_H
+
+struct numa_mode {
+	char *name;				/* Name of mode */
+	void (*setup)(void);			/* Initizalize mode */
+	void (*update_cpu_topology)(void);	/* Called by topology code */
+	int (*__pfn_to_nid)(unsigned long pfn);	/* PFN to node ID */
+	unsigned long (*align)(void);		/* Minimum node alignment */
+	int (*distance)(int a, int b);		/* Distance between two nodes */
+};
+
+extern const struct numa_mode numa_mode_plain;
+
+#endif /* __S390_NUMA_MODE_H */
-- 
cgit v1.2.3


From e8054b654bf5d4f549f4f24b708acce6d2718b1b Mon Sep 17 00:00:00 2001
From: Philipp Hachtmann <phacht@linux.vnet.ibm.com>
Date: Thu, 6 Mar 2014 18:39:39 +0100
Subject: s390/numa: add topology tree infrastructure

NUMA emulation needs proper means to mangle the book/mc/core topology
of the machine. The topology tree (toptree) consistently maintains cpu
masks for the root, each node, and all leaves of the tree while the
user may use the toptree functions to rearrange the tree in various
ways.

This patch contains several changes from Michael Holzheu.

Signed-off-by: Philipp Hachtmann <phacht@linux.vnet.ibm.com>
Signed-off-by: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/numa/Makefile  |   1 +
 arch/s390/numa/toptree.c | 342 +++++++++++++++++++++++++++++++++++++++++++++++
 arch/s390/numa/toptree.h |  60 +++++++++
 3 files changed, 403 insertions(+)
 create mode 100644 arch/s390/numa/toptree.c
 create mode 100644 arch/s390/numa/toptree.h

(limited to 'arch/s390')

diff --git a/arch/s390/numa/Makefile b/arch/s390/numa/Makefile
index 7e94c8f491f7..31372293b62e 100644
--- a/arch/s390/numa/Makefile
+++ b/arch/s390/numa/Makefile
@@ -1 +1,2 @@
 obj-y			+= numa.o
+obj-y			+= toptree.o
diff --git a/arch/s390/numa/toptree.c b/arch/s390/numa/toptree.c
new file mode 100644
index 000000000000..902d350d859a
--- /dev/null
+++ b/arch/s390/numa/toptree.c
@@ -0,0 +1,342 @@
+/*
+ * NUMA support for s390
+ *
+ * A tree structure used for machine topology mangling
+ *
+ * Copyright IBM Corp. 2015
+ */
+
+#include <linux/kernel.h>
+#include <linux/cpumask.h>
+#include <linux/list.h>
+#include <linux/list_sort.h>
+#include <linux/slab.h>
+#include <asm/numa.h>
+
+#include "toptree.h"
+
+/**
+ * toptree_alloc - Allocate and initialize a new tree node.
+ * @level: The node's vertical level; level 0 contains the leaves.
+ * @id: ID number, explicitly not unique beyond scope of node's siblings
+ *
+ * Allocate a new tree node and initialize it.
+ *
+ * RETURNS:
+ * Pointer to the new tree node or NULL on error
+ */
+struct toptree *toptree_alloc(int level, int id)
+{
+	struct toptree *res = kzalloc(sizeof(struct toptree), GFP_KERNEL);
+
+	if (!res)
+		return res;
+
+	INIT_LIST_HEAD(&res->children);
+	INIT_LIST_HEAD(&res->sibling);
+	cpumask_clear(&res->mask);
+	res->level = level;
+	res->id = id;
+	return res;
+}
+
+/**
+ * toptree_remove - Remove a tree node from a tree
+ * @cand: Pointer to the node to remove
+ *
+ * The node is detached from its parent node. The parent node's
+ * masks will be updated to reflect the loss of the child.
+ */
+static void toptree_remove(struct toptree *cand)
+{
+	struct toptree *oldparent;
+
+	list_del_init(&cand->sibling);
+	oldparent = cand->parent;
+	cand->parent = NULL;
+	toptree_update_mask(oldparent);
+}
+
+/**
+ * toptree_free - discard a tree node
+ * @cand: Pointer to the tree node to discard
+ *
+ * Checks if @cand is attached to a parent node. Detaches it
+ * cleanly using toptree_remove. Possible children are freed
+ * recursively. In the end @cand itself is freed.
+ */
+void toptree_free(struct toptree *cand)
+{
+	struct toptree *child, *tmp;
+
+	if (cand->parent)
+		toptree_remove(cand);
+	toptree_for_each_child_safe(child, tmp, cand)
+		toptree_free(child);
+	kfree(cand);
+}
+
+/**
+ * toptree_update_mask - Update node bitmasks
+ * @cand: Pointer to a tree node
+ *
+ * The node's cpumask will be updated by combining all children's
+ * masks. Then toptree_update_mask is called recursively for the
+ * parent if applicable.
+ *
+ * NOTE:
+ * This must not be called on leaves. If called on a leaf, its
+ * CPU mask is cleared and lost.
+ */
+void toptree_update_mask(struct toptree *cand)
+{
+	struct toptree *child;
+
+	cpumask_clear(&cand->mask);
+	list_for_each_entry(child, &cand->children, sibling)
+		cpumask_or(&cand->mask, &cand->mask, &child->mask);
+	if (cand->parent)
+		toptree_update_mask(cand->parent);
+}
+
+/**
+ * toptree_insert - Insert a tree node into tree
+ * @cand: Pointer to the node to insert
+ * @target: Pointer to the node to which @cand will added as a child
+ *
+ * Insert a tree node into a tree. Masks will be updated automatically.
+ *
+ * RETURNS:
+ * 0 on success, -1 if NULL is passed as argument or the node levels
+ * don't fit.
+ */
+static int toptree_insert(struct toptree *cand, struct toptree *target)
+{
+	if (!cand || !target)
+		return -1;
+	if (target->level != (cand->level + 1))
+		return -1;
+	list_add_tail(&cand->sibling, &target->children);
+	cand->parent = target;
+	toptree_update_mask(target);
+	return 0;
+}
+
+/**
+ * toptree_move_children - Move all child nodes of a node to a new place
+ * @cand: Pointer to the node whose children are to be moved
+ * @target: Pointer to the node to which @cand's children will be attached
+ *
+ * Take all child nodes of @cand and move them using toptree_move.
+ */
+static void toptree_move_children(struct toptree *cand, struct toptree *target)
+{
+	struct toptree *child, *tmp;
+
+	toptree_for_each_child_safe(child, tmp, cand)
+		toptree_move(child, target);
+}
+
+/**
+ * toptree_unify - Merge children with same ID
+ * @cand: Pointer to node whose direct children should be made unique
+ *
+ * When mangling the tree it is possible that a node has two or more children
+ * which have the same ID. This routine merges these children into one and
+ * moves all children of the merged nodes into the unified node.
+ */
+void toptree_unify(struct toptree *cand)
+{
+	struct toptree *child, *tmp, *cand_copy;
+
+	/* Threads cannot be split, cores are not split */
+	if (cand->level < 2)
+		return;
+
+	cand_copy = toptree_alloc(cand->level, 0);
+	toptree_for_each_child_safe(child, tmp, cand) {
+		struct toptree *tmpchild;
+
+		if (!cpumask_empty(&child->mask)) {
+			tmpchild = toptree_get_child(cand_copy, child->id);
+			toptree_move_children(child, tmpchild);
+		}
+		toptree_free(child);
+	}
+	toptree_move_children(cand_copy, cand);
+	toptree_free(cand_copy);
+
+	toptree_for_each_child(child, cand)
+		toptree_unify(child);
+}
+
+/**
+ * toptree_move - Move a node to another context
+ * @cand: Pointer to the node to move
+ * @target: Pointer to the node where @cand should go
+ *
+ * In the easiest case @cand is exactly on the level below @target
+ * and will be immediately moved to the target.
+ *
+ * If @target's level is not the direct parent level of @cand,
+ * nodes for the missing levels are created and put between
+ * @cand and @target. The "stacking" nodes' IDs are taken from
+ * @cand's parents.
+ *
+ * After this it is likely to have redundant nodes in the tree
+ * which are addressed by means of toptree_unify.
+ */
+void toptree_move(struct toptree *cand, struct toptree *target)
+{
+	struct toptree *stack_target, *real_insert_point, *ptr, *tmp;
+
+	if (cand->level + 1 == target->level) {
+		toptree_remove(cand);
+		toptree_insert(cand, target);
+		return;
+	}
+
+	real_insert_point = NULL;
+	ptr = cand;
+	stack_target = NULL;
+
+	do {
+		tmp = stack_target;
+		stack_target = toptree_alloc(ptr->level + 1,
+					     ptr->parent->id);
+		toptree_insert(tmp, stack_target);
+		if (!real_insert_point)
+			real_insert_point = stack_target;
+		ptr = ptr->parent;
+	} while (stack_target->level < (target->level - 1));
+
+	toptree_remove(cand);
+	toptree_insert(cand, real_insert_point);
+	toptree_insert(stack_target, target);
+}
+
+/**
+ * toptree_get_child - Access a tree node's child by its ID
+ * @cand: Pointer to tree node whose child is to access
+ * @id: The desired child's ID
+ *
+ * @cand's children are searched for a child with matching ID.
+ * If no match can be found, a new child with the desired ID
+ * is created and returned.
+ */
+struct toptree *toptree_get_child(struct toptree *cand, int id)
+{
+	struct toptree *child;
+
+	toptree_for_each_child(child, cand)
+		if (child->id == id)
+			return child;
+	child = toptree_alloc(cand->level-1, id);
+	toptree_insert(child, cand);
+	return child;
+}
+
+/**
+ * toptree_first - Find the first descendant on specified level
+ * @context: Pointer to tree node whose descendants are to be used
+ * @level: The level of interest
+ *
+ * RETURNS:
+ * @context's first descendant on the specified level, or NULL
+ * if there is no matching descendant
+ */
+struct toptree *toptree_first(struct toptree *context, int level)
+{
+	struct toptree *child, *tmp;
+
+	if (context->level == level)
+		return context;
+
+	if (!list_empty(&context->children)) {
+		list_for_each_entry(child, &context->children, sibling) {
+			tmp = toptree_first(child, level);
+			if (tmp)
+				return tmp;
+		}
+	}
+	return NULL;
+}
+
+/**
+ * toptree_next_sibling - Return next sibling
+ * @cur: Pointer to a tree node
+ *
+ * RETURNS:
+ * If @cur has a parent and is not the last in the parent's children list,
+ * the next sibling is returned. Or NULL when there are no siblings left.
+ */
+static struct toptree *toptree_next_sibling(struct toptree *cur)
+{
+	if (cur->parent == NULL)
+		return NULL;
+
+	if (cur == list_last_entry(&cur->parent->children,
+				   struct toptree, sibling))
+		return NULL;
+	return (struct toptree *) list_next_entry(cur, sibling);
+}
+
+/**
+ * toptree_next - Tree traversal function
+ * @cur: Pointer to current element
+ * @context: Pointer to the root node of the tree or subtree to
+ * be traversed.
+ * @level: The level of interest.
+ *
+ * RETURNS:
+ * Pointer to the next node on level @level
+ * or NULL when there is no next node.
+ */
+struct toptree *toptree_next(struct toptree *cur, struct toptree *context,
+			     int level)
+{
+	struct toptree *cur_context, *tmp;
+
+	if (!cur)
+		return NULL;
+
+	if (context->level == level)
+		return NULL;
+
+	tmp = toptree_next_sibling(cur);
+	if (tmp != NULL)
+		return tmp;
+
+	cur_context = cur;
+	while (cur_context->level < context->level - 1) {
+		/* Step up */
+		cur_context = cur_context->parent;
+		/* Step aside */
+		tmp = toptree_next_sibling(cur_context);
+		if (tmp != NULL) {
+			/* Step down */
+			tmp = toptree_first(tmp, level);
+			if (tmp != NULL)
+				return tmp;
+		}
+	}
+	return NULL;
+}
+
+/**
+ * toptree_count - Count descendants on specified level
+ * @context: Pointer to node whose descendants are to be considered
+ * @level: Only descendants on the specified level will be counted
+ *
+ * RETURNS:
+ * Number of descendants on the specified level
+ */
+int toptree_count(struct toptree *context, int level)
+{
+	struct toptree *cur;
+	int cnt = 0;
+
+	toptree_for_each(cur, context, level)
+		cnt++;
+	return cnt;
+}
diff --git a/arch/s390/numa/toptree.h b/arch/s390/numa/toptree.h
new file mode 100644
index 000000000000..bdf502027af4
--- /dev/null
+++ b/arch/s390/numa/toptree.h
@@ -0,0 +1,60 @@
+/*
+ * NUMA support for s390
+ *
+ * A tree structure used for machine topology mangling
+ *
+ * Copyright IBM Corp. 2015
+ */
+#ifndef S390_TOPTREE_H
+#define S390_TOPTREE_H
+
+#include <linux/cpumask.h>
+#include <linux/list.h>
+
+struct toptree {
+	int level;
+	int id;
+	cpumask_t mask;
+	struct toptree *parent;
+	struct list_head sibling;
+	struct list_head children;
+};
+
+struct toptree *toptree_alloc(int level, int id);
+void toptree_free(struct toptree *cand);
+void toptree_update_mask(struct toptree *cand);
+void toptree_unify(struct toptree *cand);
+struct toptree *toptree_get_child(struct toptree *cand, int id);
+void toptree_move(struct toptree *cand, struct toptree *target);
+int toptree_count(struct toptree *context, int level);
+
+struct toptree *toptree_first(struct toptree *context, int level);
+struct toptree *toptree_next(struct toptree *cur, struct toptree *context,
+			     int level);
+
+#define toptree_for_each_child(child, ptree)				\
+	list_for_each_entry(child,  &ptree->children, sibling)
+
+#define toptree_for_each_child_safe(child, ptmp, ptree)			\
+	list_for_each_entry_safe(child, ptmp, &ptree->children, sibling)
+
+#define toptree_is_last(ptree)					\
+	((ptree->parent == NULL) ||				\
+	 (ptree->parent->children.prev == &ptree->sibling))
+
+#define toptree_for_each(ptree, cont, ttype)		\
+	for (ptree = toptree_first(cont, ttype);	\
+	     ptree != NULL;				\
+	     ptree = toptree_next(ptree, cont, ttype))
+
+#define toptree_for_each_safe(ptree, tmp, cont, ttype)		\
+	for (ptree = toptree_first(cont, ttype),		\
+		     tmp = toptree_next(ptree, cont, ttype);	\
+	     ptree != NULL;					\
+	     ptree = tmp,					\
+		     tmp = toptree_next(ptree, cont, ttype))
+
+#define toptree_for_each_sibling(ptree, start)			\
+	toptree_for_each(ptree, start->parent, start->level)
+
+#endif /* S390_TOPTREE_H */
-- 
cgit v1.2.3


From c29a7baf091fc6b2c9e40561030f8c62e6145a19 Mon Sep 17 00:00:00 2001
From: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Date: Thu, 6 Mar 2014 18:47:21 +0100
Subject: s390/numa: add emulation support

NUMA emulation (aka fake NUMA) distributes the available memory to nodes
without using real topology information about the physical memory of the
machine.

Splitting the system memory into nodes replicates the memory management
structures for each node. Particularly each node has its own "mm locks"
and its own "kswapd" task.

For large systems, under certain conditions, this results in improved
system performance and/or latency based on reduced pressure on the mm
locks and the kswapd tasks.

NUMA emulation distributes CPUs to nodes while respecting the original
machine topology information. This is done by trying to avoid to separate
CPUs which reside on the same book or even on the same MC. Because the
current Linux scheduler code requires a stable cpu to node mapping, cores
are pinned to nodes when the first CPU thread is set online.

This patch is based on the initial implementation from Philipp Hachtmann.

Signed-off-by: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/Kconfig            |  37 ++++
 arch/s390/include/asm/numa.h |   4 +
 arch/s390/numa/Makefile      |   1 +
 arch/s390/numa/mode_emu.c    | 511 +++++++++++++++++++++++++++++++++++++++++++
 arch/s390/numa/numa.c        |   4 +
 arch/s390/numa/numa_mode.h   |   1 +
 drivers/s390/char/sclp_cmd.c |  18 +-
 7 files changed, 569 insertions(+), 7 deletions(-)
 create mode 100644 arch/s390/numa/mode_emu.c

(limited to 'arch/s390')

diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 25510adb07d3..cb418dcc2d45 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -423,6 +423,43 @@ config NODES_SHIFT
 	  Specify the maximum number of NUMA nodes available on the target
 	  system. Increases memory reserved to accommodate various tables.
 
+menu "Select NUMA modes"
+	depends on NUMA
+
+config NUMA_EMU
+	bool "NUMA emulation"
+	default y
+	help
+	  Numa emulation mode will split the available system memory into
+	  equal chunks which then are distributed over the configured number
+	  of nodes in a round-robin manner.
+
+	  The number of fake nodes is limited by the number of available memory
+	  chunks (i.e. memory size / fake size) and the number of supported
+	  nodes in the kernel.
+
+	  The CPUs are assigned to the nodes in a way that partially respects
+	  the original machine topology (if supported by the machine).
+	  Fair distribution of the CPUs is not guaranteed.
+
+config EMU_SIZE
+	hex "NUMA emulation memory chunk size"
+	default 0x10000000
+	range 0x400000 0x100000000
+	depends on NUMA_EMU
+	help
+	  Select the default size by which the memory is chopped and then
+	  assigned to emulated NUMA nodes.
+
+	  This can be overridden by specifying
+
+	  emu_size=<n>
+
+	  on the kernel command line where also suffixes K, M, G, and T are
+	  supported.
+
+endmenu
+
 config SCHED_MC
 	def_bool n
 
diff --git a/arch/s390/include/asm/numa.h b/arch/s390/include/asm/numa.h
index ea4edbfba9f6..2a0efc63b9e5 100644
--- a/arch/s390/include/asm/numa.h
+++ b/arch/s390/include/asm/numa.h
@@ -26,6 +26,10 @@ extern int numa_debug_enabled;
 
 static inline void numa_setup(void) { }
 static inline void numa_update_cpu_topology(void) { }
+static inline int numa_pfn_to_nid(unsigned long pfn)
+{
+	return 0;
+}
 
 #endif /* CONFIG_NUMA */
 #endif /* _ASM_S390_NUMA_H */
diff --git a/arch/s390/numa/Makefile b/arch/s390/numa/Makefile
index 31372293b62e..f94ecaffa71b 100644
--- a/arch/s390/numa/Makefile
+++ b/arch/s390/numa/Makefile
@@ -1,2 +1,3 @@
 obj-y			+= numa.o
 obj-y			+= toptree.o
+obj-$(CONFIG_NUMA_EMU)	+= mode_emu.o
diff --git a/arch/s390/numa/mode_emu.c b/arch/s390/numa/mode_emu.c
new file mode 100644
index 000000000000..9d4e1e15a6f0
--- /dev/null
+++ b/arch/s390/numa/mode_emu.c
@@ -0,0 +1,511 @@
+/*
+ * NUMA support for s390
+ *
+ * NUMA emulation (aka fake NUMA) distributes the available memory to nodes
+ * without using real topology information about the physical memory of the
+ * machine.
+ *
+ * It distributes the available CPUs to nodes while respecting the original
+ * machine topology information. This is done by trying to avoid to separate
+ * CPUs which reside on the same book or even on the same MC.
+ *
+ * Because the current Linux scheduler code requires a stable cpu to node
+ * mapping, cores are pinned to nodes when the first CPU thread is set online.
+ *
+ * Copyright IBM Corp. 2015
+ */
+
+#define KMSG_COMPONENT "numa_emu"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/cpumask.h>
+#include <linux/memblock.h>
+#include <linux/node.h>
+#include <linux/memory.h>
+#include <asm/smp.h>
+#include <asm/topology.h>
+#include "numa_mode.h"
+#include "toptree.h"
+
+/* Distances between the different system components */
+#define DIST_EMPTY	0
+#define DIST_CORE	1
+#define DIST_MC		2
+#define DIST_BOOK	3
+#define DIST_MAX	4
+
+/* Node distance reported to common code */
+#define EMU_NODE_DIST	10
+
+/* Node ID for free (not yet pinned) cores */
+#define NODE_ID_FREE	-1
+
+/* Different levels of toptree */
+enum toptree_level {CORE, MC, BOOK, NODE, TOPOLOGY};
+
+/* The two toptree IDs */
+enum {TOPTREE_ID_PHYS, TOPTREE_ID_NUMA};
+
+/* Number of NUMA nodes */
+static int emu_nodes = 1;
+/* NUMA stripe size */
+static unsigned long emu_size;
+/* Pinned core to node mapping */
+static int cores_to_node_id[CONFIG_NR_CPUS];
+/* Total number of pinned cores */
+static int cores_total;
+/* Number of cores per node without extra cores */
+static int cores_per_node_target;
+/* Number of cores pinned to node */
+static int cores_per_node[MAX_NUMNODES];
+
+/*
+ * Pin a core to a node
+ */
+static void pin_core_to_node(int core_id, int node_id)
+{
+	if (cores_to_node_id[core_id] == NODE_ID_FREE) {
+		cores_per_node[node_id]++;
+		cores_to_node_id[core_id] = node_id;
+		cores_total++;
+	} else {
+		WARN_ON(cores_to_node_id[core_id] != node_id);
+	}
+}
+
+/*
+ * Number of pinned cores of a node
+ */
+static int cores_pinned(struct toptree *node)
+{
+	return cores_per_node[node->id];
+}
+
+/*
+ * ID of the node where the core is pinned (or NODE_ID_FREE)
+ */
+static int core_pinned_to_node_id(struct toptree *core)
+{
+	return cores_to_node_id[core->id];
+}
+
+/*
+ * Number of cores in the tree that are not yet pinned
+ */
+static int cores_free(struct toptree *tree)
+{
+	struct toptree *core;
+	int count = 0;
+
+	toptree_for_each(core, tree, CORE) {
+		if (core_pinned_to_node_id(core) == NODE_ID_FREE)
+			count++;
+	}
+	return count;
+}
+
+/*
+ * Return node of core
+ */
+static struct toptree *core_node(struct toptree *core)
+{
+	return core->parent->parent->parent;
+}
+
+/*
+ * Return book of core
+ */
+static struct toptree *core_book(struct toptree *core)
+{
+	return core->parent->parent;
+}
+
+/*
+ * Return mc of core
+ */
+static struct toptree *core_mc(struct toptree *core)
+{
+	return core->parent;
+}
+
+/*
+ * Distance between two cores
+ */
+static int dist_core_to_core(struct toptree *core1, struct toptree *core2)
+{
+	if (core_book(core1)->id != core_book(core2)->id)
+		return DIST_BOOK;
+	if (core_mc(core1)->id != core_mc(core2)->id)
+		return DIST_MC;
+	/* Same core or sibling on same MC */
+	return DIST_CORE;
+}
+
+/*
+ * Distance of a node to a core
+ */
+static int dist_node_to_core(struct toptree *node, struct toptree *core)
+{
+	struct toptree *core_node;
+	int dist_min = DIST_MAX;
+
+	toptree_for_each(core_node, node, CORE)
+		dist_min = min(dist_min, dist_core_to_core(core_node, core));
+	return dist_min == DIST_MAX ? DIST_EMPTY : dist_min;
+}
+
+/*
+ * Unify will delete empty nodes, therefore recreate nodes.
+ */
+static void toptree_unify_tree(struct toptree *tree)
+{
+	int nid;
+
+	toptree_unify(tree);
+	for (nid = 0; nid < emu_nodes; nid++)
+		toptree_get_child(tree, nid);
+}
+
+/*
+ * Find the best/nearest node for a given core and ensure that no node
+ * gets more than "cores_per_node_target + extra" cores.
+ */
+static struct toptree *node_for_core(struct toptree *numa, struct toptree *core,
+				     int extra)
+{
+	struct toptree *node, *node_best = NULL;
+	int dist_cur, dist_best;
+
+	dist_best = DIST_MAX;
+	node_best = NULL;
+	toptree_for_each(node, numa, NODE) {
+		/* Already pinned cores must use their nodes */
+		if (core_pinned_to_node_id(core) == node->id) {
+			node_best = node;
+			break;
+		}
+		/* Skip nodes that already have enough cores */
+		if (cores_pinned(node) >= cores_per_node_target + extra)
+			continue;
+		dist_cur = dist_node_to_core(node, core);
+		if (dist_cur < dist_best) {
+			dist_best = dist_cur;
+			node_best = node;
+		}
+	}
+	return node_best;
+}
+
+/*
+ * Find the best node for each core with respect to "extra" core count
+ */
+static void toptree_to_numa_single(struct toptree *numa, struct toptree *phys,
+				   int extra)
+{
+	struct toptree *node, *core, *tmp;
+
+	toptree_for_each_safe(core, tmp, phys, CORE) {
+		node = node_for_core(numa, core, extra);
+		if (!node)
+			return;
+		toptree_move(core, node);
+		pin_core_to_node(core->id, node->id);
+	}
+}
+
+/*
+ * Move structures of given level to specified NUMA node
+ */
+static void move_level_to_numa_node(struct toptree *node, struct toptree *phys,
+				    enum toptree_level level, bool perfect)
+{
+	struct toptree *cur, *tmp;
+	int cores_free;
+
+	toptree_for_each_safe(cur, tmp, phys, level) {
+		cores_free = cores_per_node_target - toptree_count(node, CORE);
+		if (perfect) {
+			if (cores_free == toptree_count(cur, CORE))
+				toptree_move(cur, node);
+		} else {
+			if (cores_free >= toptree_count(cur, CORE))
+				toptree_move(cur, node);
+		}
+	}
+}
+
+/*
+ * Move structures of a given level to NUMA nodes. If "perfect" is specified
+ * move only perfectly fitting structures. Otherwise move also smaller
+ * than needed structures.
+ */
+static void move_level_to_numa(struct toptree *numa, struct toptree *phys,
+			       enum toptree_level level, bool perfect)
+{
+	struct toptree *node;
+
+	toptree_for_each(node, numa, NODE)
+		move_level_to_numa_node(node, phys, level, perfect);
+}
+
+/*
+ * For the first run try to move the big structures
+ */
+static void toptree_to_numa_first(struct toptree *numa, struct toptree *phys)
+{
+	struct toptree *core;
+
+	/* Always try to move perfectly fitting structures first */
+	move_level_to_numa(numa, phys, BOOK, true);
+	move_level_to_numa(numa, phys, BOOK, false);
+	move_level_to_numa(numa, phys, MC, true);
+	move_level_to_numa(numa, phys, MC, false);
+	/* Now pin all the moved cores */
+	toptree_for_each(core, numa, CORE)
+		pin_core_to_node(core->id, core_node(core)->id);
+}
+
+/*
+ * Allocate new topology and create required nodes
+ */
+static struct toptree *toptree_new(int id, int nodes)
+{
+	struct toptree *tree;
+	int nid;
+
+	tree = toptree_alloc(TOPOLOGY, id);
+	if (!tree)
+		goto fail;
+	for (nid = 0; nid < nodes; nid++) {
+		if (!toptree_get_child(tree, nid))
+			goto fail;
+	}
+	return tree;
+fail:
+	panic("NUMA emulation could not allocate topology");
+}
+
+/*
+ * Move cores from physical topology into NUMA target topology
+ * and try to keep as much of the physical topology as possible.
+ */
+static struct toptree *toptree_to_numa(struct toptree *phys)
+{
+	static int first = 1;
+	struct toptree *numa;
+
+	cores_per_node_target = (cores_total + cores_free(phys)) / emu_nodes;
+	numa = toptree_new(TOPTREE_ID_NUMA, emu_nodes);
+	if (first) {
+		toptree_to_numa_first(numa, phys);
+		first = 0;
+	}
+	toptree_to_numa_single(numa, phys, 0);
+	toptree_to_numa_single(numa, phys, 1);
+	toptree_unify_tree(numa);
+
+	WARN_ON(cpumask_weight(&phys->mask));
+	return numa;
+}
+
+/*
+ * Create a toptree out of the physical topology that we got from the hypervisor
+ */
+static struct toptree *toptree_from_topology(void)
+{
+	struct toptree *phys, *node, *book, *mc, *core;
+	struct cpu_topology_s390 *top;
+	int cpu;
+
+	phys = toptree_new(TOPTREE_ID_PHYS, 1);
+
+	for_each_online_cpu(cpu) {
+		top = &per_cpu(cpu_topology, cpu);
+		node = toptree_get_child(phys, 0);
+		book = toptree_get_child(node, top->book_id);
+		mc = toptree_get_child(book, top->socket_id);
+		core = toptree_get_child(mc, top->core_id);
+		if (!book || !mc || !core)
+			panic("NUMA emulation could not allocate memory");
+		cpumask_set_cpu(cpu, &core->mask);
+		toptree_update_mask(mc);
+	}
+	return phys;
+}
+
+/*
+ * Add toptree core to topology and create correct CPU masks
+ */
+static void topology_add_core(struct toptree *core)
+{
+	struct cpu_topology_s390 *top;
+	int cpu;
+
+	for_each_cpu(cpu, &core->mask) {
+		top = &per_cpu(cpu_topology, cpu);
+		cpumask_copy(&top->thread_mask, &core->mask);
+		cpumask_copy(&top->core_mask, &core_mc(core)->mask);
+		cpumask_copy(&top->book_mask, &core_book(core)->mask);
+		cpumask_set_cpu(cpu, node_to_cpumask_map[core_node(core)->id]);
+		top->node_id = core_node(core)->id;
+	}
+}
+
+/*
+ * Apply toptree to topology and create CPU masks
+ */
+static void toptree_to_topology(struct toptree *numa)
+{
+	struct toptree *core;
+	int i;
+
+	/* Clear all node masks */
+	for (i = 0; i < MAX_NUMNODES; i++)
+		cpumask_clear(node_to_cpumask_map[i]);
+
+	/* Rebuild all masks */
+	toptree_for_each(core, numa, CORE)
+		topology_add_core(core);
+}
+
+/*
+ * Show the node to core mapping
+ */
+static void print_node_to_core_map(void)
+{
+	int nid, cid;
+
+	if (!numa_debug_enabled)
+		return;
+	printk(KERN_DEBUG "NUMA node to core mapping\n");
+	for (nid = 0; nid < emu_nodes; nid++) {
+		printk(KERN_DEBUG "  node %3d: ", nid);
+		for (cid = 0; cid < ARRAY_SIZE(cores_to_node_id); cid++) {
+			if (cores_to_node_id[cid] == nid)
+				printk(KERN_CONT "%d ", cid);
+		}
+		printk(KERN_CONT "\n");
+	}
+}
+
+/*
+ * Transfer physical topology into a NUMA topology and modify CPU masks
+ * according to the NUMA topology.
+ *
+ * This function is called under the CPU hotplug lock.
+ */
+static void emu_update_cpu_topology(void)
+{
+	struct toptree *phys, *numa;
+
+	phys = toptree_from_topology();
+	numa = toptree_to_numa(phys);
+	toptree_free(phys);
+	toptree_to_topology(numa);
+	toptree_free(numa);
+	print_node_to_core_map();
+}
+
+/*
+ * If emu_size is not set, use CONFIG_EMU_SIZE. Then round to minimum
+ * alignment (needed for memory hotplug).
+ */
+static unsigned long emu_setup_size_adjust(unsigned long size)
+{
+	size = size ? : CONFIG_EMU_SIZE;
+	size = roundup(size, memory_block_size_bytes());
+	return size;
+}
+
+/*
+ * If we have not enough memory for the specified nodes, reduce the node count.
+ */
+static int emu_setup_nodes_adjust(int nodes)
+{
+	int nodes_max;
+
+	nodes_max = memblock.memory.total_size / emu_size;
+	nodes_max = max(nodes_max, 1);
+	if (nodes_max >= nodes)
+		return nodes;
+	pr_warn("Not enough memory for %d nodes, reducing node count\n", nodes);
+	return nodes_max;
+}
+
+/*
+ * Early emu setup
+ */
+static void emu_setup(void)
+{
+	int i;
+
+	emu_size = emu_setup_size_adjust(emu_size);
+	emu_nodes = emu_setup_nodes_adjust(emu_nodes);
+	for (i = 0; i < ARRAY_SIZE(cores_to_node_id); i++)
+		cores_to_node_id[i] = NODE_ID_FREE;
+	pr_info("Creating %d nodes with memory stripe size %ld MB\n",
+		emu_nodes, emu_size >> 20);
+}
+
+/*
+ * Return node id for given page number
+ */
+static int emu_pfn_to_nid(unsigned long pfn)
+{
+	return (pfn / (emu_size >> PAGE_SHIFT)) % emu_nodes;
+}
+
+/*
+ * Return stripe size
+ */
+static unsigned long emu_align(void)
+{
+	return emu_size;
+}
+
+/*
+ * Return distance between two nodes
+ */
+static int emu_distance(int node1, int node2)
+{
+	return (node1 != node2) * EMU_NODE_DIST;
+}
+
+/*
+ * Define callbacks for generic s390 NUMA infrastructure
+ */
+const struct numa_mode numa_mode_emu = {
+	.name = "emu",
+	.setup = emu_setup,
+	.update_cpu_topology = emu_update_cpu_topology,
+	.__pfn_to_nid = emu_pfn_to_nid,
+	.align = emu_align,
+	.distance = emu_distance,
+};
+
+/*
+ * Kernel parameter: emu_nodes=<n>
+ */
+static int __init early_parse_emu_nodes(char *p)
+{
+	int count;
+
+	if (kstrtoint(p, 0, &count) != 0 || count <= 0)
+		return 0;
+	if (count <= 0)
+		return 0;
+	emu_nodes = min(count, MAX_NUMNODES);
+	return 0;
+}
+early_param("emu_nodes", early_parse_emu_nodes);
+
+/*
+ * Kernel parameter: emu_size=[<n>[k|M|G|T]]
+ */
+static int __init early_parse_emu_size(char *p)
+{
+	emu_size = memparse(p, NULL);
+	return 0;
+}
+early_param("emu_size", early_parse_emu_size);
diff --git a/arch/s390/numa/numa.c b/arch/s390/numa/numa.c
index 0416a3671e33..09b1d2355bd9 100644
--- a/arch/s390/numa/numa.c
+++ b/arch/s390/numa/numa.c
@@ -175,6 +175,10 @@ static int __init parse_numa(char *parm)
 {
 	if (strcmp(parm, numa_mode_plain.name) == 0)
 		mode = &numa_mode_plain;
+#ifdef CONFIG_NUMA_EMU
+	if (strcmp(parm, numa_mode_emu.name) == 0)
+		mode = &numa_mode_emu;
+#endif
 	return 0;
 }
 early_param("numa", parse_numa);
diff --git a/arch/s390/numa/numa_mode.h b/arch/s390/numa/numa_mode.h
index 775659848011..08953b0b1c7f 100644
--- a/arch/s390/numa/numa_mode.h
+++ b/arch/s390/numa/numa_mode.h
@@ -19,5 +19,6 @@ struct numa_mode {
 };
 
 extern const struct numa_mode numa_mode_plain;
+extern const struct numa_mode numa_mode_emu;
 
 #endif /* __S390_NUMA_MODE_H */
diff --git a/drivers/s390/char/sclp_cmd.c b/drivers/s390/char/sclp_cmd.c
index e9485fbbb373..806239c2cf2f 100644
--- a/drivers/s390/char/sclp_cmd.c
+++ b/drivers/s390/char/sclp_cmd.c
@@ -25,6 +25,7 @@
 #include <asm/setup.h>
 #include <asm/page.h>
 #include <asm/sclp.h>
+#include <asm/numa.h>
 
 #include "sclp.h"
 
@@ -388,11 +389,11 @@ static struct notifier_block sclp_mem_nb = {
 };
 
 static void __init align_to_block_size(unsigned long long *start,
-				       unsigned long long *size)
+				       unsigned long long *size,
+				       unsigned long long alignment)
 {
-	unsigned long long start_align, size_align, alignment;
+	unsigned long long start_align, size_align;
 
-	alignment = memory_block_size_bytes();
 	start_align = roundup(*start, alignment);
 	size_align = rounddown(*start + *size, alignment) - start_align;
 
@@ -404,8 +405,8 @@ static void __init align_to_block_size(unsigned long long *start,
 
 static void __init add_memory_merged(u16 rn)
 {
+	unsigned long long start, size, addr, block_size;
 	static u16 first_rn, num;
-	unsigned long long start, size;
 
 	if (rn && first_rn && (first_rn + num == rn)) {
 		num++;
@@ -423,9 +424,12 @@ static void __init add_memory_merged(u16 rn)
 		goto skip_add;
 	if (memory_end_set && (start + size > memory_end))
 		size = memory_end - start;
-	align_to_block_size(&start, &size);
-	if (size)
-		add_memory(0, start, size);
+	block_size = memory_block_size_bytes();
+	align_to_block_size(&start, &size, block_size);
+	if (!size)
+		goto skip_add;
+	for (addr = start; addr < start + size; addr += block_size)
+		add_memory(numa_pfn_to_nid(PFN_DOWN(addr)), addr, block_size);
 skip_add:
 	first_rn = rn;
 	num = 1;
-- 
cgit v1.2.3


From a763bc8b656d11b7424cd2696e19efca301d8aa4 Mon Sep 17 00:00:00 2001
From: Philipp Hachtmann <phacht@linux.vnet.ibm.com>
Date: Fri, 8 May 2015 17:40:44 +0200
Subject: s390/numa: enable support in s390 configs

Signed-off-by: Philipp Hachtmann <phacht@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/Kconfig                       | 6 +++---
 arch/s390/configs/default_defconfig     | 2 ++
 arch/s390/configs/gcov_defconfig        | 2 ++
 arch/s390/configs/performance_defconfig | 3 +++
 4 files changed, 10 insertions(+), 3 deletions(-)

(limited to 'arch/s390')

diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index cb418dcc2d45..4827870f7a6d 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -99,7 +99,9 @@ config S390
 	select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE
 	select ARCH_SAVE_PAGE_KEYS if HIBERNATION
 	select ARCH_SUPPORTS_ATOMIC_RMW
+	select ARCH_SUPPORTS_NUMA_BALANCING
 	select ARCH_USE_CMPXCHG_LOCKREF
+	select ARCH_WANTS_PROT_NUMA_PROT_NONE
 	select ARCH_WANT_IPC_PARSE_VERSION
 	select BUILDTIME_EXTABLE_SORT
 	select CLONE_BACKWARDS2
@@ -112,6 +114,7 @@ config S390
 	select GENERIC_TIME_VSYSCALL
 	select HAVE_ALIGNED_STRUCT_PAGE if SLUB
 	select HAVE_ARCH_AUDITSYSCALL
+	select HAVE_ARCH_EARLY_PFN_TO_NID
 	select HAVE_ARCH_JUMP_LABEL
 	select HAVE_ARCH_SECCOMP_FILTER
 	select HAVE_ARCH_TRACEHOOK
@@ -153,9 +156,6 @@ config S390
 	select TTY
 	select VIRT_CPU_ACCOUNTING
 	select VIRT_TO_BUS
-	select ARCH_SUPPORTS_NUMA_BALANCING
-	select ARCH_WANTS_PROT_NUMA_PROT_NONE
-	select HAVE_ARCH_EARLY_PFN_TO_NID
 
 
 config SCHED_OMIT_FRAME_POINTER
diff --git a/arch/s390/configs/default_defconfig b/arch/s390/configs/default_defconfig
index 940cbddd9237..0c98f1508542 100644
--- a/arch/s390/configs/default_defconfig
+++ b/arch/s390/configs/default_defconfig
@@ -13,6 +13,7 @@ CONFIG_TASK_IO_ACCOUNTING=y
 CONFIG_RCU_FAST_NO_HZ=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
+CONFIG_NUMA_BALANCING=y
 CONFIG_CGROUP_FREEZER=y
 CONFIG_CGROUP_DEVICE=y
 CONFIG_CPUSETS=y
@@ -50,6 +51,7 @@ CONFIG_LIVEPATCH=y
 CONFIG_MARCH_Z196=y
 CONFIG_TUNE_ZEC12=y
 CONFIG_NR_CPUS=256
+CONFIG_NUMA=y
 CONFIG_PREEMPT=y
 CONFIG_HZ_100=y
 CONFIG_MEMORY_HOTPLUG=y
diff --git a/arch/s390/configs/gcov_defconfig b/arch/s390/configs/gcov_defconfig
index d793fec91797..82083e1fbdc4 100644
--- a/arch/s390/configs/gcov_defconfig
+++ b/arch/s390/configs/gcov_defconfig
@@ -13,6 +13,7 @@ CONFIG_TASK_IO_ACCOUNTING=y
 CONFIG_RCU_FAST_NO_HZ=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
+CONFIG_NUMA_BALANCING=y
 CONFIG_CGROUP_FREEZER=y
 CONFIG_CGROUP_DEVICE=y
 CONFIG_CPUSETS=y
@@ -49,6 +50,7 @@ CONFIG_DEFAULT_DEADLINE=y
 CONFIG_MARCH_Z196=y
 CONFIG_TUNE_ZEC12=y
 CONFIG_NR_CPUS=256
+CONFIG_NUMA=y
 CONFIG_HZ_100=y
 CONFIG_MEMORY_HOTPLUG=y
 CONFIG_MEMORY_HOTREMOVE=y
diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig
index 38a77e9c8aa6..c05c9e0821e3 100644
--- a/arch/s390/configs/performance_defconfig
+++ b/arch/s390/configs/performance_defconfig
@@ -13,6 +13,8 @@ CONFIG_TASK_IO_ACCOUNTING=y
 CONFIG_RCU_FAST_NO_HZ=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
+CONFIG_NUMA_BALANCING=y
+# CONFIG_NUMA_BALANCING_DEFAULT_ENABLED is not set
 CONFIG_CGROUP_FREEZER=y
 CONFIG_CGROUP_DEVICE=y
 CONFIG_CPUSETS=y
@@ -48,6 +50,7 @@ CONFIG_LIVEPATCH=y
 CONFIG_MARCH_Z196=y
 CONFIG_TUNE_ZEC12=y
 CONFIG_NR_CPUS=512
+CONFIG_NUMA=y
 CONFIG_HZ_100=y
 CONFIG_MEMORY_HOTPLUG=y
 CONFIG_MEMORY_HOTREMOVE=y
-- 
cgit v1.2.3


From 5a7ff75a0c63222d138d944240146dc49a9624e1 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Tue, 4 Aug 2015 09:15:58 +0200
Subject: s390/syscalls: ignore syscalls reachable via sys_socketcall

x86 will wire up all syscalls reachable via sys_socketcall. Therefore this
will yield a lot of warnings from the checksyscalls.sh scripts on s390
where we currently don't wire them up directly.

This might change in the future, but this needs to be done carefully in
order to not break anything.

For the time being just tell the checksyscalls script to ignore the missing
syscalls on s390.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/unistd.h | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

(limited to 'arch/s390')

diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h
index ec2bfc83a1e9..06bc3c292fb3 100644
--- a/arch/s390/include/asm/unistd.h
+++ b/arch/s390/include/asm/unistd.h
@@ -18,9 +18,24 @@
 #define __ARCH_WANT_migrate_pages
 #define __ARCH_WANT_move_pages
 
-/* Ignore system calls that are also reachable via sys_socket */
+/* Ignore system calls that are also reachable via sys_socketcall */
 #define __IGNORE_recvmmsg
 #define __IGNORE_sendmmsg
+#define __IGNORE_socket
+#define __IGNORE_socketpair
+#define __IGNORE_bind
+#define __IGNORE_connect
+#define __IGNORE_listen
+#define __IGNORE_accept4
+#define __IGNORE_getsockopt
+#define __IGNORE_setsockopt
+#define __IGNORE_getsockname
+#define __IGNORE_getpeername
+#define __IGNORE_sendto
+#define __IGNORE_sendmsg
+#define __IGNORE_recvfrom
+#define __IGNORE_recvmsg
+#define __IGNORE_shutdown
 
 #define __ARCH_WANT_OLD_READDIR
 #define __ARCH_WANT_SYS_ALARM
-- 
cgit v1.2.3


From f341b8dff9823a969be5fb3c958e5cb305ac67e8 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Mon, 3 Aug 2015 16:16:40 +0200
Subject: s390/vtime: limit MT scaling value updates

The MT scaling values are updated on each calll to do_account_vtime.
This function is called for each HZ interrupt and for each context
switch. Context switch can happen often, the STCCTM instruction
on this path is noticeable. Limit the updates to once per jiffy.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/vtime.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

(limited to 'arch/s390')

diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index e53d3595a7c8..b9ce650e9e99 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -28,6 +28,7 @@ static atomic64_t virt_timer_elapsed;
 static DEFINE_PER_CPU(u64, mt_cycles[32]);
 static DEFINE_PER_CPU(u64, mt_scaling_mult) = { 1 };
 static DEFINE_PER_CPU(u64, mt_scaling_div) = { 1 };
+static DEFINE_PER_CPU(u64, mt_scaling_jiffies);
 
 static inline u64 get_vtimer(void)
 {
@@ -85,7 +86,8 @@ static int do_account_vtime(struct task_struct *tsk, int hardirq_offset)
 	S390_lowcore.steal_timer += S390_lowcore.last_update_clock - clock;
 
 	/* Do MT utilization calculation */
-	if (smp_cpu_mtid) {
+	if (smp_cpu_mtid &&
+	    time_after64(jiffies_64, __this_cpu_read(mt_scaling_jiffies))) {
 		u64 cycles_new[32], *cycles_old;
 		u64 delta, mult, div;
 
@@ -105,6 +107,7 @@ static int do_account_vtime(struct task_struct *tsk, int hardirq_offset)
 				       sizeof(u64) * (smp_cpu_mtid + 1));
 			}
 		}
+		__this_cpu_write(mt_scaling_jiffies, jiffies_64);
 	}
 
 	user = S390_lowcore.user_timer - ti->user_timer;
@@ -376,4 +379,11 @@ void vtime_init(void)
 {
 	/* set initial cpu timer */
 	set_vtimer(VTIMER_MAX_SLICE);
+	/* Setup initial MT scaling values */
+	if (smp_cpu_mtid) {
+		__this_cpu_write(mt_scaling_jiffies, jiffies);
+		__this_cpu_write(mt_scaling_mult, 1);
+		__this_cpu_write(mt_scaling_div, 1);
+		stcctm5(smp_cpu_mtid + 1, this_cpu_ptr(mt_cycles));
+	}
 }
-- 
cgit v1.2.3


From 3a3814c28b13d038ce5e055fc01f9a7aa5b821b9 Mon Sep 17 00:00:00 2001
From: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Date: Sat, 1 Aug 2015 18:12:41 +0200
Subject: s390/topology: remove topology lock

Since we are already protected by the "sched_domains_mutex" lock, we can
safely remove the topology lock.

Reviewed-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/topology.c | 11 ++++-------
 arch/s390/numa/mode_emu.c   |  7 ++++++-
 2 files changed, 10 insertions(+), 8 deletions(-)

(limited to 'arch/s390')

diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index 0f5f8b09c903..1fbe1f83f89c 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -40,8 +40,10 @@ static struct sysinfo_15_1_x *tl_info;
 static int topology_enabled = 1;
 static DECLARE_WORK(topology_work, topology_work_fn);
 
-/* topology_lock protects the socket and book linked lists */
-static DEFINE_SPINLOCK(topology_lock);
+/*
+ * Socket/Book linked lists and per_cpu(cpu_topology) updates are
+ * protected by "sched_domains_mutex".
+ */
 static struct mask_info socket_info;
 static struct mask_info book_info;
 
@@ -191,7 +193,6 @@ static void tl_to_masks(struct sysinfo_15_1_x *info)
 {
 	struct cpuid cpu_id;
 
-	spin_lock_irq(&topology_lock);
 	get_cpu_id(&cpu_id);
 	clear_masks();
 	switch (cpu_id.machine) {
@@ -202,7 +203,6 @@ static void tl_to_masks(struct sysinfo_15_1_x *info)
 	default:
 		__tl_to_masks_generic(info);
 	}
-	spin_unlock_irq(&topology_lock);
 }
 
 static void topology_update_polarization_simple(void)
@@ -247,10 +247,8 @@ int topology_set_cpu_management(int fc)
 
 static void update_cpu_masks(void)
 {
-	unsigned long flags;
 	int cpu;
 
-	spin_lock_irqsave(&topology_lock, flags);
 	for_each_possible_cpu(cpu) {
 		per_cpu(cpu_topology, cpu).thread_mask = cpu_thread_map(cpu);
 		per_cpu(cpu_topology, cpu).core_mask = cpu_group_map(&socket_info, cpu);
@@ -262,7 +260,6 @@ static void update_cpu_masks(void)
 			per_cpu(cpu_topology, cpu).book_id = cpu;
 		}
 	}
-	spin_unlock_irqrestore(&topology_lock, flags);
 	numa_update_cpu_topology();
 }
 
diff --git a/arch/s390/numa/mode_emu.c b/arch/s390/numa/mode_emu.c
index 9d4e1e15a6f0..646cd94cff2f 100644
--- a/arch/s390/numa/mode_emu.c
+++ b/arch/s390/numa/mode_emu.c
@@ -51,6 +51,11 @@ enum {TOPTREE_ID_PHYS, TOPTREE_ID_NUMA};
 static int emu_nodes = 1;
 /* NUMA stripe size */
 static unsigned long emu_size;
+
+/*
+ * Node to core pinning information updates are protected by
+ * "sched_domains_mutex".
+ */
 /* Pinned core to node mapping */
 static int cores_to_node_id[CONFIG_NR_CPUS];
 /* Total number of pinned cores */
@@ -393,7 +398,7 @@ static void print_node_to_core_map(void)
  * Transfer physical topology into a NUMA topology and modify CPU masks
  * according to the NUMA topology.
  *
- * This function is called under the CPU hotplug lock.
+ * Must be called with "sched_domains_mutex" lock held.
  */
 static void emu_update_cpu_topology(void)
 {
-- 
cgit v1.2.3


From 854508c0d0bbcf2f80fa1f79af5cb9ce219b2e9c Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Tue, 4 Aug 2015 23:38:23 -0700
Subject: s390/lib: export __delay

__delay is exported by most architectures, and may be used in modules.
Since it is not exported for s390, s390:allmodconfig currently fails
to build with

ERROR: "__delay" [drivers/net/phy/mdio-octeon.ko] undefined!

Fixes: a6d678645210 ("net: mdio-octeon: Modify driver to work on both
	ThunderX and Octeon")
Cc: Radha Mohan Chintakuntla <rchintakuntla@cavium.com>
Cc: David Daney <david.daney@cavium.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/lib/delay.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/s390')

diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c
index 16dc42d83f93..246a7eb4b680 100644
--- a/arch/s390/lib/delay.c
+++ b/arch/s390/lib/delay.c
@@ -26,6 +26,7 @@ void __delay(unsigned long loops)
          */
 	asm volatile("0: brct %0,0b" : : "d" ((loops/2) + 1));
 }
+EXPORT_SYMBOL(__delay);
 
 static void __udelay_disabled(unsigned long long usecs)
 {
-- 
cgit v1.2.3


From 7cde4910a5adcab62506eff49f4500053464fc8a Mon Sep 17 00:00:00 2001
From: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Date: Wed, 5 Aug 2015 11:23:53 +0200
Subject: s390/numa: make core to node mapping data dynamic

The core to node mapping data consumes about 2 KB bss data. To save memory
for the non-NUMA case, make the data dynamic. In addition change the
"core_to_node" array from "int" to "s32" which saves 1 KB also for the
NUMA case.

Suggested-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/numa/mode_emu.c | 68 ++++++++++++++++++++++++++++-------------------
 1 file changed, 41 insertions(+), 27 deletions(-)

(limited to 'arch/s390')

diff --git a/arch/s390/numa/mode_emu.c b/arch/s390/numa/mode_emu.c
index 646cd94cff2f..7de4e2f780d7 100644
--- a/arch/s390/numa/mode_emu.c
+++ b/arch/s390/numa/mode_emu.c
@@ -23,6 +23,7 @@
 #include <linux/memblock.h>
 #include <linux/node.h>
 #include <linux/memory.h>
+#include <linux/slab.h>
 #include <asm/smp.h>
 #include <asm/topology.h>
 #include "numa_mode.h"
@@ -56,26 +57,24 @@ static unsigned long emu_size;
  * Node to core pinning information updates are protected by
  * "sched_domains_mutex".
  */
-/* Pinned core to node mapping */
-static int cores_to_node_id[CONFIG_NR_CPUS];
-/* Total number of pinned cores */
-static int cores_total;
-/* Number of cores per node without extra cores */
-static int cores_per_node_target;
-/* Number of cores pinned to node */
-static int cores_per_node[MAX_NUMNODES];
+static struct {
+	s32 to_node_id[CONFIG_NR_CPUS];	/* Pinned core to node mapping */
+	int total;			/* Total number of pinned cores */
+	int per_node_target;		/* Cores per node without extra cores */
+	int per_node[MAX_NUMNODES];	/* Number of cores pinned to node */
+} *emu_cores;
 
 /*
  * Pin a core to a node
  */
 static void pin_core_to_node(int core_id, int node_id)
 {
-	if (cores_to_node_id[core_id] == NODE_ID_FREE) {
-		cores_per_node[node_id]++;
-		cores_to_node_id[core_id] = node_id;
-		cores_total++;
+	if (emu_cores->to_node_id[core_id] == NODE_ID_FREE) {
+		emu_cores->per_node[node_id]++;
+		emu_cores->to_node_id[core_id] = node_id;
+		emu_cores->total++;
 	} else {
-		WARN_ON(cores_to_node_id[core_id] != node_id);
+		WARN_ON(emu_cores->to_node_id[core_id] != node_id);
 	}
 }
 
@@ -84,7 +83,7 @@ static void pin_core_to_node(int core_id, int node_id)
  */
 static int cores_pinned(struct toptree *node)
 {
-	return cores_per_node[node->id];
+	return emu_cores->per_node[node->id];
 }
 
 /*
@@ -92,7 +91,7 @@ static int cores_pinned(struct toptree *node)
  */
 static int core_pinned_to_node_id(struct toptree *core)
 {
-	return cores_to_node_id[core->id];
+	return emu_cores->to_node_id[core->id];
 }
 
 /*
@@ -174,14 +173,15 @@ static void toptree_unify_tree(struct toptree *tree)
 
 /*
  * Find the best/nearest node for a given core and ensure that no node
- * gets more than "cores_per_node_target + extra" cores.
+ * gets more than "emu_cores->per_node_target + extra" cores.
  */
 static struct toptree *node_for_core(struct toptree *numa, struct toptree *core,
 				     int extra)
 {
 	struct toptree *node, *node_best = NULL;
-	int dist_cur, dist_best;
+	int dist_cur, dist_best, cores_target;
 
+	cores_target = emu_cores->per_node_target + extra;
 	dist_best = DIST_MAX;
 	node_best = NULL;
 	toptree_for_each(node, numa, NODE) {
@@ -191,7 +191,7 @@ static struct toptree *node_for_core(struct toptree *numa, struct toptree *core,
 			break;
 		}
 		/* Skip nodes that already have enough cores */
-		if (cores_pinned(node) >= cores_per_node_target + extra)
+		if (cores_pinned(node) >= cores_target)
 			continue;
 		dist_cur = dist_node_to_core(node, core);
 		if (dist_cur < dist_best) {
@@ -225,11 +225,11 @@ static void toptree_to_numa_single(struct toptree *numa, struct toptree *phys,
 static void move_level_to_numa_node(struct toptree *node, struct toptree *phys,
 				    enum toptree_level level, bool perfect)
 {
+	int cores_free, cores_target = emu_cores->per_node_target;
 	struct toptree *cur, *tmp;
-	int cores_free;
 
 	toptree_for_each_safe(cur, tmp, phys, level) {
-		cores_free = cores_per_node_target - toptree_count(node, CORE);
+		cores_free = cores_target - toptree_count(node, CORE);
 		if (perfect) {
 			if (cores_free == toptree_count(cur, CORE))
 				toptree_move(cur, node);
@@ -291,6 +291,20 @@ fail:
 	panic("NUMA emulation could not allocate topology");
 }
 
+/*
+ * Allocate and initialize core to node mapping
+ */
+static void create_core_to_node_map(void)
+{
+	int i;
+
+	emu_cores = kzalloc(sizeof(*emu_cores), GFP_KERNEL);
+	if (emu_cores == NULL)
+		panic("Could not allocate cores to node memory");
+	for (i = 0; i < ARRAY_SIZE(emu_cores->to_node_id); i++)
+		emu_cores->to_node_id[i] = NODE_ID_FREE;
+}
+
 /*
  * Move cores from physical topology into NUMA target topology
  * and try to keep as much of the physical topology as possible.
@@ -299,8 +313,10 @@ static struct toptree *toptree_to_numa(struct toptree *phys)
 {
 	static int first = 1;
 	struct toptree *numa;
+	int cores_total;
 
-	cores_per_node_target = (cores_total + cores_free(phys)) / emu_nodes;
+	cores_total = emu_cores->total + cores_free(phys);
+	emu_cores->per_node_target = cores_total / emu_nodes;
 	numa = toptree_new(TOPTREE_ID_NUMA, emu_nodes);
 	if (first) {
 		toptree_to_numa_first(numa, phys);
@@ -386,8 +402,8 @@ static void print_node_to_core_map(void)
 	printk(KERN_DEBUG "NUMA node to core mapping\n");
 	for (nid = 0; nid < emu_nodes; nid++) {
 		printk(KERN_DEBUG "  node %3d: ", nid);
-		for (cid = 0; cid < ARRAY_SIZE(cores_to_node_id); cid++) {
-			if (cores_to_node_id[cid] == nid)
+		for (cid = 0; cid < ARRAY_SIZE(emu_cores->to_node_id); cid++) {
+			if (emu_cores->to_node_id[cid] == nid)
 				printk(KERN_CONT "%d ", cid);
 		}
 		printk(KERN_CONT "\n");
@@ -404,6 +420,8 @@ static void emu_update_cpu_topology(void)
 {
 	struct toptree *phys, *numa;
 
+	if (emu_cores == NULL)
+		create_core_to_node_map();
 	phys = toptree_from_topology();
 	numa = toptree_to_numa(phys);
 	toptree_free(phys);
@@ -443,12 +461,8 @@ static int emu_setup_nodes_adjust(int nodes)
  */
 static void emu_setup(void)
 {
-	int i;
-
 	emu_size = emu_setup_size_adjust(emu_size);
 	emu_nodes = emu_setup_nodes_adjust(emu_nodes);
-	for (i = 0; i < ARRAY_SIZE(cores_to_node_id); i++)
-		cores_to_node_id[i] = NODE_ID_FREE;
 	pr_info("Creating %d nodes with memory stripe size %ld MB\n",
 		emu_nodes, emu_size >> 20);
 }
-- 
cgit v1.2.3


From aad1b688c996b4a247c4a8fd1bd6c5c563963ddb Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Fri, 7 Aug 2015 08:55:48 +0200
Subject: s390/vdso: emit a GNU hash

As proposed by Andy Lutomirski create the SysV and the GNU hash
for the vdso objects. This may make some dynamic loaders a bit
faster.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/vdso32/Makefile | 2 +-
 arch/s390/kernel/vdso64/Makefile | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/s390')

diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile
index 8ad2b34ad151..ee8a18e50a25 100644
--- a/arch/s390/kernel/vdso32/Makefile
+++ b/arch/s390/kernel/vdso32/Makefile
@@ -13,7 +13,7 @@ KBUILD_AFLAGS_31 += -m31 -s
 KBUILD_CFLAGS_31 := $(filter-out -m64,$(KBUILD_CFLAGS))
 KBUILD_CFLAGS_31 += -m31 -fPIC -shared -fno-common -fno-builtin
 KBUILD_CFLAGS_31 += -nostdlib -Wl,-soname=linux-vdso32.so.1 \
-			$(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
+			$(call cc-ldoption, -Wl$(comma)--hash-style=both)
 
 $(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_31)
 $(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_31)
diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile
index 2a8ddfd12a5b..c4b03f9ed228 100644
--- a/arch/s390/kernel/vdso64/Makefile
+++ b/arch/s390/kernel/vdso64/Makefile
@@ -13,7 +13,7 @@ KBUILD_AFLAGS_64 += -m64 -s
 KBUILD_CFLAGS_64 := $(filter-out -m64,$(KBUILD_CFLAGS))
 KBUILD_CFLAGS_64 += -m64 -fPIC -shared -fno-common -fno-builtin
 KBUILD_CFLAGS_64 += -nostdlib -Wl,-soname=linux-vdso64.so.1 \
-			$(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
+			$(call cc-ldoption, -Wl$(comma)--hash-style=both)
 
 $(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_64)
 $(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_64)
-- 
cgit v1.2.3


From c0e5ddab6e985c83714b31a88098e76c39ff084a Mon Sep 17 00:00:00 2001
From: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Date: Thu, 13 Aug 2015 10:35:11 +0200
Subject: s390/numa: re-add DIE sched_domain_topology_level

By accident this level has been removed by the NUMA infrastructure patch.
For non-NUMA systems with CPUs that span more than one book, this makes
the scheduler only use one of the books and the other books remain idle.

Fix this and re-add the missing level.

For NUMA and non-NUMA we have the following scheduling domains and groups:

 - SMT  (Groups: CPU threads)
 - MC   (Groups: Cores)
 - BOOK (Groups: Books)

For the non-NUMA case we have one last level scheduling domain:

 - DIE  (Groups: Whole system, has all CPUs -> cpu_cpu_mask)

For the NUMA case we have the following two last level scheduling domains:

 - DIE  (Groups: NUMA nodes -> cpu_cpu_mask -> returns node siblings)
 - NUMA (Groups: Whole system, has all CPUs -> created in sched_init_numa())

Fixes: e8054b654bf5 ("s390/numa: add topology tree infrastructure")
Reported-and-tested-by: Evgeny Cherkashin <Eugene.Crosser@ru.ibm.com>
Signed-off-by: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Acked-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/topology.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/s390')

diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index 1fbe1f83f89c..bf05e7fc3e70 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -451,6 +451,7 @@ static struct sched_domain_topology_level s390_topology[] = {
 	{ cpu_thread_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
 	{ cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
 	{ cpu_book_mask, SD_INIT_NAME(BOOK) },
+	{ cpu_cpu_mask, SD_INIT_NAME(DIE) },
 	{ NULL, },
 };
 
-- 
cgit v1.2.3


From a69db2f6ad69157cace810971f19cb5aec9956b1 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Thu, 13 Aug 2015 12:27:16 +0200
Subject: s390/facilities: remove transactional-execution bits

Remove the two facility bits

50 - constrained transactional-execution facility
74 - transactional-execution facility

from the required facilities if the kernel is built with -march=zEC12.

E.g. z/VM 6.3 doesn't virtualize the TX facility yet. Therefore a kernel
built with -march=zEC12 and ipl'ed on a zEC12 machine as a z/VM 6.3 guest
will emit a message about the missing facilities and stop working.

The kernel however doesn't make use of the TX facility, therefore remove
the two TX related facility bits and fix this unpleasant behavior.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Reviewed-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/head.S | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/s390')

diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S
index 63c77fdb619e..1255c6c5353e 100644
--- a/arch/s390/kernel/head.S
+++ b/arch/s390/kernel/head.S
@@ -414,9 +414,9 @@ ENTRY(startup_kdump)
 # followed by the facility words.
 
 #if defined(CONFIG_MARCH_Z13)
-	.long 3, 0xc100eff2, 0xf46ce800, 0x00400000
+	.long 2, 0xc100eff2, 0xf46cc800
 #elif defined(CONFIG_MARCH_ZEC12)
-	.long 3, 0xc100eff2, 0xf46ce800, 0x00400000
+	.long 2, 0xc100eff2, 0xf46cc800
 #elif defined(CONFIG_MARCH_Z196)
 	.long 2, 0xc100eff2, 0xf46c0000
 #elif defined(CONFIG_MARCH_Z10)
-- 
cgit v1.2.3


From 92d6289105d134582094eede00192bd66f54905f Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Thu, 13 Aug 2015 13:26:49 +0200
Subject: s390: remove unneeded sizeof(void *) comparisons

Remove two more statements which always evaluate to 'false'.
These are more leftovers from the 31 bit era.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Reviewed-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/mm/fault.c           | 2 +-
 drivers/s390/block/dasd_eckd.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/s390')

diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 4c8f5d7f9c23..f985856a538b 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -646,7 +646,7 @@ static void pfault_interrupt(struct ext_code ext_code,
 		return;
 	inc_irq_stat(IRQEXT_PFL);
 	/* Get the token (= pid of the affected task). */
-	pid = sizeof(void *) == 4 ? param32 : param64;
+	pid = param64;
 	rcu_read_lock();
 	tsk = find_task_by_pid_ns(pid, &init_pid_ns);
 	if (tsk)
diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c
index 72cdaae8ce99..62a323539226 100644
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c
@@ -3989,7 +3989,7 @@ static int dasd_symm_io(struct dasd_device *device, void __user *argp)
 	rc = -EFAULT;
 	if (copy_from_user(&usrparm, argp, sizeof(usrparm)))
 		goto out;
-	if (is_compat_task() || sizeof(long) == 4) {
+	if (is_compat_task()) {
 		/* Make sure pointers are sane even on 31 bit. */
 		rc = -EINVAL;
 		if ((usrparm.psf_data >> 32) != 0)
-- 
cgit v1.2.3


From cabc4abe8e368d754805691b146f31f574533cfa Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Thu, 13 Aug 2015 13:44:34 +0200
Subject: s390/uaccess: remove uaccess_primary kernel parameter

get_user() and put_user() are inline functions in the meantime
again. Both will generate the mvcos instruction if compiled
with -march=z10 (or greater).

The kernel parameter "uaccess_primary" can only change the behavior
of out-of-line uaccess functions like copy_from_user() to not use
the mvcos instruction, but not for the above named inlined functions.

Therefore it is quite useless and the parameter can be removed.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/lib/uaccess.c | 15 +--------------
 1 file changed, 1 insertion(+), 14 deletions(-)

(limited to 'arch/s390')

diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c
index 4614d415bb58..0d002a746bec 100644
--- a/arch/s390/lib/uaccess.c
+++ b/arch/s390/lib/uaccess.c
@@ -370,22 +370,9 @@ long __strncpy_from_user(char *dst, const char __user *src, long size)
 }
 EXPORT_SYMBOL(__strncpy_from_user);
 
-/*
- * The "old" uaccess variant without mvcos can be enforced with the
- * uaccess_primary kernel parameter. This is mainly for debugging purposes.
- */
-static int uaccess_primary __initdata;
-
-static int __init parse_uaccess_pt(char *__unused)
-{
-	uaccess_primary = 1;
-	return 0;
-}
-early_param("uaccess_primary", parse_uaccess_pt);
-
 static int __init uaccess_init(void)
 {
-	if (!uaccess_primary && test_facility(27))
+	if (test_facility(27))
 		static_key_slow_inc(&have_mvcos);
 	return 0;
 }
-- 
cgit v1.2.3


From 89b1145e93771d727645c96e323539c029b63f1c Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Fri, 14 Aug 2015 13:20:28 +0200
Subject: s390/setup: fix novx parameter

The novx parameter disables the vector facility but the HWCAP_S390_VXRS
bit in the ELf hardware capabilies is always set if the machine has
the vector facility. If the user space program uses the "vx" string
in the features field of /proc/cpuinfo to utilize vector instruction
it will crash if the novx kernel paramter is set.

Convert setup_hwcaps to an arch_initcall and use MACHINE_HAS_VX to
decide if the HWCAPS_S390_VXRS bit needs to be set.

Cc: stable@vger.kernel.org # 3.18+
Reported-by: Ulrich Weigand <uweigand@de.ibm.com>
Reviewed-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/setup.c | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

(limited to 'arch/s390')

diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index af6b0236ccf3..ce0cbd6ba7ca 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -689,7 +689,7 @@ static void __init setup_memory(void)
 /*
  * Setup hardware capabilities.
  */
-static void __init setup_hwcaps(void)
+static int __init setup_hwcaps(void)
 {
 	static const int stfl_bits[6] = { 0, 2, 7, 17, 19, 21 };
 	struct cpuid cpu_id;
@@ -755,9 +755,11 @@ static void __init setup_hwcaps(void)
 		elf_hwcap |= HWCAP_S390_TE;
 
 	/*
-	 * Vector extension HWCAP_S390_VXRS is bit 11.
+	 * Vector extension HWCAP_S390_VXRS is bit 11. The Vector extension
+	 * can be disabled with the "novx" parameter. Use MACHINE_HAS_VX
+	 * instead of facility bit 129.
 	 */
-	if (test_facility(129))
+	if (MACHINE_HAS_VX)
 		elf_hwcap |= HWCAP_S390_VXRS;
 	get_cpu_id(&cpu_id);
 	add_device_randomness(&cpu_id, sizeof(cpu_id));
@@ -794,7 +796,9 @@ static void __init setup_hwcaps(void)
 		strcpy(elf_platform, "z13");
 		break;
 	}
+	return 0;
 }
+arch_initcall(setup_hwcaps);
 
 /*
  * Add system information as device randomness
@@ -882,11 +886,6 @@ void __init setup_arch(char **cmdline_p)
         cpu_init();
 	numa_setup();
 
-	/*
-	 * Setup capabilities (ELF_HWCAP & ELF_PLATFORM).
-	 */
-	setup_hwcaps();
-
 	/*
 	 * Create kernel page tables and switch to virtual addressing.
 	 */
-- 
cgit v1.2.3


From 24d05ff863e78544e8538a792e3234291cdd5650 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Mon, 17 Aug 2015 08:09:17 +0200
Subject: s390/nmi: initialize control register 0 earlier

Change machine_check_init() to an early_initcall(). This makes sure it will
be called before all other cpus are online and therfore saves us a lot of
pointless smp_call_function() calls.
The control register settings will be forwarded to the other cpus when they
will be brought online.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Reviewed-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/nmi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/s390')

diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index cbdd94c8ba18..0ae6f8e74840 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -363,4 +363,4 @@ static int __init machine_check_init(void)
 	ctl_set_bit(14, 24);	/* enable warning MCH */
 	return 0;
 }
-arch_initcall(machine_check_init);
+early_initcall(machine_check_init);
-- 
cgit v1.2.3


From 3d8258e4ab5166ca702b2828c99a5fa7dcb40a0c Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.vnet.ibm.com>
Date: Tue, 18 Aug 2015 19:39:27 +0200
Subject: s390/pci: move debug messages to debugfs

The error messages in pci_insn are for debug purposes only. Move
them to the debugfs.

Reviewed-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/pci/pci_insn.c | 33 +++++++++++++++++++++++----------
 1 file changed, 23 insertions(+), 10 deletions(-)

(limited to 'arch/s390')

diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c
index 85267c058af8..dcc2634ccbe2 100644
--- a/arch/s390/pci/pci_insn.c
+++ b/arch/s390/pci/pci_insn.c
@@ -8,10 +8,23 @@
 #include <linux/errno.h>
 #include <linux/delay.h>
 #include <asm/pci_insn.h>
+#include <asm/pci_debug.h>
 #include <asm/processor.h>
 
 #define ZPCI_INSN_BUSY_DELAY	1	/* 1 microsecond */
 
+static inline void zpci_err_insn(u8 cc, u8 status, u64 req, u64 offset)
+{
+	struct {
+		u8 cc;
+		u8 status;
+		u64 req;
+		u64 offset;
+	} data = {cc, status, req, offset};
+
+	zpci_err_hex(&data, sizeof(data));
+}
+
 /* Modify PCI Function Controls */
 static inline u8 __mpcifc(u64 req, struct zpci_fib *fib, u8 *status)
 {
@@ -38,8 +51,8 @@ int zpci_mod_fc(u64 req, struct zpci_fib *fib)
 	} while (cc == 2);
 
 	if (cc)
-		printk_once(KERN_ERR "%s: error cc: %d  status: %d\n",
-			     __func__, cc, status);
+		zpci_err_insn(cc, status, req, 0);
+
 	return (cc) ? -EIO : 0;
 }
 
@@ -72,8 +85,8 @@ int zpci_refresh_trans(u64 fn, u64 addr, u64 range)
 	} while (cc == 2);
 
 	if (cc)
-		printk_once(KERN_ERR "%s: error cc: %d  status: %d  dma_addr: %Lx  size: %Lx\n",
-			    __func__, cc, status, addr, range);
+		zpci_err_insn(cc, status, addr, range);
+
 	return (cc) ? -EIO : 0;
 }
 
@@ -121,8 +134,8 @@ int zpci_load(u64 *data, u64 req, u64 offset)
 	} while (cc == 2);
 
 	if (cc)
-		printk_once(KERN_ERR "%s: error cc: %d  status: %d  req: %Lx  offset: %Lx\n",
-			    __func__, cc, status, req, offset);
+		zpci_err_insn(cc, status, req, offset);
+
 	return (cc > 0) ? -EIO : cc;
 }
 EXPORT_SYMBOL_GPL(zpci_load);
@@ -159,8 +172,8 @@ int zpci_store(u64 data, u64 req, u64 offset)
 	} while (cc == 2);
 
 	if (cc)
-		printk_once(KERN_ERR "%s: error cc: %d  status: %d  req: %Lx  offset: %Lx\n",
-			__func__, cc, status, req, offset);
+		zpci_err_insn(cc, status, req, offset);
+
 	return (cc > 0) ? -EIO : cc;
 }
 EXPORT_SYMBOL_GPL(zpci_store);
@@ -195,8 +208,8 @@ int zpci_store_block(const u64 *data, u64 req, u64 offset)
 	} while (cc == 2);
 
 	if (cc)
-		printk_once(KERN_ERR "%s: error cc: %d  status: %d  req: %Lx  offset: %Lx\n",
-			    __func__, cc, status, req, offset);
+		zpci_err_insn(cc, status, req, offset);
+
 	return (cc > 0) ? -EIO : cc;
 }
 EXPORT_SYMBOL_GPL(zpci_store_block);
-- 
cgit v1.2.3


From 78fb907626dccfeee6f4213c649606e9ba49c6bc Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Fri, 14 Aug 2015 14:58:50 +0200
Subject: s390/mm: simplify page table alloc/free code

With the removal of the dynamic reallocation of page tables for
KVM (see git commit 0b46e0a3ec0d7a04af6a091354f1b5e1b952d70a)
the page table allocation / freeing code can be simplified.

The page table free code can now use the alloc_pgste bit in the
mm context to decide if a page table is 2K or 4K, there is no mix
of different sized page tables anymore. This eliminates the need
to use "page->_mapcount == 0" to check for 4K page table.

Use the lower two bits in page->_mapcount to indicate which
2K fragments of the 4K page are in use.

As 31-bit support is gone, remove the two defines ALLOC_ORDER
and FRAG_MASK and use the constants directly where appropriate.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/mm/pgtable.c | 225 +++++++++++++++++++------------------------------
 1 file changed, 89 insertions(+), 136 deletions(-)

(limited to 'arch/s390')

diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index b33f66110ca9..54ef3bc01b43 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -10,11 +10,7 @@
 #include <linux/mm.h>
 #include <linux/swap.h>
 #include <linux/smp.h>
-#include <linux/highmem.h>
-#include <linux/pagemap.h>
 #include <linux/spinlock.h>
-#include <linux/module.h>
-#include <linux/quicklist.h>
 #include <linux/rcupdate.h>
 #include <linux/slab.h>
 #include <linux/swapops.h>
@@ -28,12 +24,9 @@
 #include <asm/tlbflush.h>
 #include <asm/mmu_context.h>
 
-#define ALLOC_ORDER	2
-#define FRAG_MASK	0x03
-
 unsigned long *crst_table_alloc(struct mm_struct *mm)
 {
-	struct page *page = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
+	struct page *page = alloc_pages(GFP_KERNEL, 2);
 
 	if (!page)
 		return NULL;
@@ -42,7 +35,7 @@ unsigned long *crst_table_alloc(struct mm_struct *mm)
 
 void crst_table_free(struct mm_struct *mm, unsigned long *table)
 {
-	free_pages((unsigned long) table, ALLOC_ORDER);
+	free_pages((unsigned long) table, 2);
 }
 
 static void __crst_table_upgrade(void *arg)
@@ -176,7 +169,7 @@ struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit)
 	INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC);
 	spin_lock_init(&gmap->guest_table_lock);
 	gmap->mm = mm;
-	page = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
+	page = alloc_pages(GFP_KERNEL, 2);
 	if (!page)
 		goto out_free;
 	page->index = 0;
@@ -247,7 +240,7 @@ void gmap_free(struct gmap *gmap)
 
 	/* Free all segment & region tables. */
 	list_for_each_entry_safe(page, next, &gmap->crst_list, lru)
-		__free_pages(page, ALLOC_ORDER);
+		__free_pages(page, 2);
 	gmap_radix_tree_free(&gmap->guest_to_host);
 	gmap_radix_tree_free(&gmap->host_to_guest);
 	down_write(&gmap->mm->mmap_sem);
@@ -287,7 +280,7 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table,
 	unsigned long *new;
 
 	/* since we dont free the gmap table until gmap_free we can unlock */
-	page = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
+	page = alloc_pages(GFP_KERNEL, 2);
 	if (!page)
 		return -ENOMEM;
 	new = (unsigned long *) page_to_phys(page);
@@ -302,7 +295,7 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table,
 	}
 	spin_unlock(&gmap->mm->page_table_lock);
 	if (page)
-		__free_pages(page, ALLOC_ORDER);
+		__free_pages(page, 2);
 	return 0;
 }
 
@@ -795,40 +788,6 @@ void gmap_do_ipte_notify(struct mm_struct *mm, unsigned long vmaddr, pte_t *pte)
 }
 EXPORT_SYMBOL_GPL(gmap_do_ipte_notify);
 
-static inline int page_table_with_pgste(struct page *page)
-{
-	return atomic_read(&page->_mapcount) == 0;
-}
-
-static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm)
-{
-	struct page *page;
-	unsigned long *table;
-
-	page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
-	if (!page)
-		return NULL;
-	if (!pgtable_page_ctor(page)) {
-		__free_page(page);
-		return NULL;
-	}
-	atomic_set(&page->_mapcount, 0);
-	table = (unsigned long *) page_to_phys(page);
-	clear_table(table, _PAGE_INVALID, PAGE_SIZE/2);
-	clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2);
-	return table;
-}
-
-static inline void page_table_free_pgste(unsigned long *table)
-{
-	struct page *page;
-
-	page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
-	pgtable_page_dtor(page);
-	atomic_set(&page->_mapcount, -1);
-	__free_page(page);
-}
-
 int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
 			  unsigned long key, bool nq)
 {
@@ -957,20 +916,6 @@ __initcall(page_table_register_sysctl);
 
 #else /* CONFIG_PGSTE */
 
-static inline int page_table_with_pgste(struct page *page)
-{
-	return 0;
-}
-
-static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm)
-{
-	return NULL;
-}
-
-static inline void page_table_free_pgste(unsigned long *table)
-{
-}
-
 static inline void gmap_unlink(struct mm_struct *mm, unsigned long *table,
 			unsigned long vmaddr)
 {
@@ -994,44 +939,55 @@ static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits)
  */
 unsigned long *page_table_alloc(struct mm_struct *mm)
 {
-	unsigned long *uninitialized_var(table);
-	struct page *uninitialized_var(page);
+	unsigned long *table;
+	struct page *page;
 	unsigned int mask, bit;
 
-	if (mm_alloc_pgste(mm))
-		return page_table_alloc_pgste(mm);
-	/* Allocate fragments of a 4K page as 1K/2K page table */
-	spin_lock_bh(&mm->context.list_lock);
-	mask = FRAG_MASK;
-	if (!list_empty(&mm->context.pgtable_list)) {
-		page = list_first_entry(&mm->context.pgtable_list,
-					struct page, lru);
-		table = (unsigned long *) page_to_phys(page);
-		mask = atomic_read(&page->_mapcount);
-		mask = mask | (mask >> 4);
-	}
-	if ((mask & FRAG_MASK) == FRAG_MASK) {
-		spin_unlock_bh(&mm->context.list_lock);
-		page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
-		if (!page)
-			return NULL;
-		if (!pgtable_page_ctor(page)) {
-			__free_page(page);
-			return NULL;
+	/* Try to get a fragment of a 4K page as a 2K page table */
+	if (!mm_alloc_pgste(mm)) {
+		table = NULL;
+		spin_lock_bh(&mm->context.list_lock);
+		if (!list_empty(&mm->context.pgtable_list)) {
+			page = list_first_entry(&mm->context.pgtable_list,
+						struct page, lru);
+			mask = atomic_read(&page->_mapcount);
+			mask = (mask | (mask >> 4)) & 3;
+			if (mask != 3) {
+				table = (unsigned long *) page_to_phys(page);
+				bit = mask & 1;		/* =1 -> second 2K */
+				if (bit)
+					table += PTRS_PER_PTE;
+				atomic_xor_bits(&page->_mapcount, 1U << bit);
+				list_del(&page->lru);
+			}
 		}
+		spin_unlock_bh(&mm->context.list_lock);
+		if (table)
+			return table;
+	}
+	/* Allocate a fresh page */
+	page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
+	if (!page)
+		return NULL;
+	if (!pgtable_page_ctor(page)) {
+		__free_page(page);
+		return NULL;
+	}
+	/* Initialize page table */
+	table = (unsigned long *) page_to_phys(page);
+	if (mm_alloc_pgste(mm)) {
+		/* Return 4K page table with PGSTEs */
+		atomic_set(&page->_mapcount, 3);
+		clear_table(table, _PAGE_INVALID, PAGE_SIZE/2);
+		clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2);
+	} else {
+		/* Return the first 2K fragment of the page */
 		atomic_set(&page->_mapcount, 1);
-		table = (unsigned long *) page_to_phys(page);
 		clear_table(table, _PAGE_INVALID, PAGE_SIZE);
 		spin_lock_bh(&mm->context.list_lock);
 		list_add(&page->lru, &mm->context.pgtable_list);
-	} else {
-		for (bit = 1; mask & bit; bit <<= 1)
-			table += PTRS_PER_PTE;
-		mask = atomic_xor_bits(&page->_mapcount, bit);
-		if ((mask & FRAG_MASK) == FRAG_MASK)
-			list_del(&page->lru);
+		spin_unlock_bh(&mm->context.list_lock);
 	}
-	spin_unlock_bh(&mm->context.list_lock);
 	return table;
 }
 
@@ -1041,37 +997,23 @@ void page_table_free(struct mm_struct *mm, unsigned long *table)
 	unsigned int bit, mask;
 
 	page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
-	if (page_table_with_pgste(page))
-		return page_table_free_pgste(table);
-	/* Free 1K/2K page table fragment of a 4K page */
-	bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)));
-	spin_lock_bh(&mm->context.list_lock);
-	if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK)
-		list_del(&page->lru);
-	mask = atomic_xor_bits(&page->_mapcount, bit);
-	if (mask & FRAG_MASK)
-		list_add(&page->lru, &mm->context.pgtable_list);
-	spin_unlock_bh(&mm->context.list_lock);
-	if (mask == 0) {
-		pgtable_page_dtor(page);
-		atomic_set(&page->_mapcount, -1);
-		__free_page(page);
+	if (!mm_alloc_pgste(mm)) {
+		/* Free 2K page table fragment of a 4K page */
+		bit = (__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t));
+		spin_lock_bh(&mm->context.list_lock);
+		mask = atomic_xor_bits(&page->_mapcount, 1U << bit);
+		if (mask & 3)
+			list_add(&page->lru, &mm->context.pgtable_list);
+		else
+			list_del(&page->lru);
+		spin_unlock_bh(&mm->context.list_lock);
+		if (mask != 0)
+			return;
 	}
-}
-
-static void __page_table_free_rcu(void *table, unsigned bit)
-{
-	struct page *page;
 
-	if (bit == FRAG_MASK)
-		return page_table_free_pgste(table);
-	/* Free 1K/2K page table fragment of a 4K page */
-	page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
-	if (atomic_xor_bits(&page->_mapcount, bit) == 0) {
-		pgtable_page_dtor(page);
-		atomic_set(&page->_mapcount, -1);
-		__free_page(page);
-	}
+	pgtable_page_dtor(page);
+	atomic_set(&page->_mapcount, -1);
+	__free_page(page);
 }
 
 void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table,
@@ -1083,34 +1025,45 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table,
 
 	mm = tlb->mm;
 	page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
-	if (page_table_with_pgste(page)) {
+	if (mm_alloc_pgste(mm)) {
 		gmap_unlink(mm, table, vmaddr);
-		table = (unsigned long *) (__pa(table) | FRAG_MASK);
+		table = (unsigned long *) (__pa(table) | 3);
 		tlb_remove_table(tlb, table);
 		return;
 	}
-	bit = 1 << ((__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t)));
+	bit = (__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t));
 	spin_lock_bh(&mm->context.list_lock);
-	if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK)
-		list_del(&page->lru);
-	mask = atomic_xor_bits(&page->_mapcount, bit | (bit << 4));
-	if (mask & FRAG_MASK)
+	mask = atomic_xor_bits(&page->_mapcount, 0x11U << bit);
+	if (mask & 3)
 		list_add_tail(&page->lru, &mm->context.pgtable_list);
+	else
+		list_del(&page->lru);
 	spin_unlock_bh(&mm->context.list_lock);
-	table = (unsigned long *) (__pa(table) | (bit << 4));
+	table = (unsigned long *) (__pa(table) | (1U << bit));
 	tlb_remove_table(tlb, table);
 }
 
 static void __tlb_remove_table(void *_table)
 {
-	const unsigned long mask = (FRAG_MASK << 4) | FRAG_MASK;
-	void *table = (void *)((unsigned long) _table & ~mask);
-	unsigned type = (unsigned long) _table & mask;
-
-	if (type)
-		__page_table_free_rcu(table, type);
-	else
-		free_pages((unsigned long) table, ALLOC_ORDER);
+	unsigned int mask = (unsigned long) _table & 3;
+	void *table = (void *)((unsigned long) _table ^ mask);
+	struct page *page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
+
+	switch (mask) {
+	case 0:		/* pmd or pud */
+		free_pages((unsigned long) table, 2);
+		break;
+	case 1:		/* lower 2K of a 4K page table */
+	case 2:		/* higher 2K of a 4K page table */
+		if (atomic_xor_bits(&page->_mapcount, mask << 4) != 0)
+			break;
+		/* fallthrough */
+	case 3:		/* 4K page table with pgstes */
+		pgtable_page_dtor(page);
+		atomic_set(&page->_mapcount, -1);
+		__free_page(page);
+		break;
+	}
 }
 
 static void tlb_remove_table_smp_sync(void *arg)
-- 
cgit v1.2.3


From de9c35f32410c225c585535a321ce591ea001645 Mon Sep 17 00:00:00 2001
From: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Date: Fri, 21 Aug 2015 19:29:33 +0200
Subject: s390/numa: remove superfluous ARCH_WANT defines

The NUMA system call "__ARCH_WANT" defines are not used by the
Linux kernel, therefore remove them.

Fixes: 9df62adffeb0 ("s390/numa: add core infrastructure")
Signed-off-by: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/unistd.h | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'arch/s390')

diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h
index 06bc3c292fb3..525cef73b085 100644
--- a/arch/s390/include/asm/unistd.h
+++ b/arch/s390/include/asm/unistd.h
@@ -11,13 +11,6 @@
 
 #define __IGNORE_time
 
-/* NUMA system calls */
-#define  _ARCH_WANT_mbind
-#define __ARCH_WANT_get_mempolicy
-#define __ARCH_WANT_set_mempolicy
-#define __ARCH_WANT_migrate_pages
-#define __ARCH_WANT_move_pages
-
 /* Ignore system calls that are also reachable via sys_socketcall */
 #define __IGNORE_recvmmsg
 #define __IGNORE_sendmmsg
-- 
cgit v1.2.3


From e4ec73510812f24087a28ac8cbf1f77c9fb262e5 Mon Sep 17 00:00:00 2001
From: Alexander Kuleshov <kuleshovmail@gmail.com>
Date: Thu, 27 Aug 2015 01:17:46 +0600
Subject: s390/jump_label: Use %*ph to print small buffers

printk() supports %*ph format specifier for printing a small buffers,
let's use it intead of %02x %02x...

Signed-off-by: Alexander Kuleshov <kuleshovmail@gmail.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/jump_label.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

(limited to 'arch/s390')

diff --git a/arch/s390/kernel/jump_label.c b/arch/s390/kernel/jump_label.c
index a90299600483..c9dac2139f59 100644
--- a/arch/s390/kernel/jump_label.c
+++ b/arch/s390/kernel/jump_label.c
@@ -44,12 +44,9 @@ static void jump_label_bug(struct jump_entry *entry, struct insn *expected,
 	unsigned char *ipn = (unsigned char *)new;
 
 	pr_emerg("Jump label code mismatch at %pS [%p]\n", ipc, ipc);
-	pr_emerg("Found:    %02x %02x %02x %02x %02x %02x\n",
-		 ipc[0], ipc[1], ipc[2], ipc[3], ipc[4], ipc[5]);
-	pr_emerg("Expected: %02x %02x %02x %02x %02x %02x\n",
-		 ipe[0], ipe[1], ipe[2], ipe[3], ipe[4], ipe[5]);
-	pr_emerg("New:      %02x %02x %02x %02x %02x %02x\n",
-		 ipn[0], ipn[1], ipn[2], ipn[3], ipn[4], ipn[5]);
+	pr_emerg("Found:    %6ph\n", ipc);
+	pr_emerg("Expected: %6ph\n", ipe);
+	pr_emerg("New:      %6ph\n", ipn);
 	panic("Corrupted kernel text");
 }
 
-- 
cgit v1.2.3