diff options
-rw-r--r-- | 0000_README | 4 | ||||
-rw-r--r-- | 1017_linux-5.8.18.patch | 5442 |
2 files changed, 5446 insertions, 0 deletions
diff --git a/0000_README b/0000_README index 333aabcf..a90cff20 100644 --- a/0000_README +++ b/0000_README @@ -111,6 +111,10 @@ Patch: 1016_linux-5.8.17.patch From: http://www.kernel.org Desc: Linux 5.8.17 +Patch: 1017_linux-5.8.18.patch +From: http://www.kernel.org +Desc: Linux 5.8.18 + Patch: 1500_XATTR_USER_PREFIX.patch From: https://bugs.gentoo.org/show_bug.cgi?id=470644 Desc: Support for namespace user.pax.* on tmpfs. diff --git a/1017_linux-5.8.18.patch b/1017_linux-5.8.18.patch new file mode 100644 index 00000000..473975ba --- /dev/null +++ b/1017_linux-5.8.18.patch @@ -0,0 +1,5442 @@ +diff --git a/Makefile b/Makefile +index 9bdb93053ee93..33c45a0cd8582 100644 +--- a/Makefile ++++ b/Makefile +@@ -1,7 +1,7 @@ + # SPDX-License-Identifier: GPL-2.0 + VERSION = 5 + PATCHLEVEL = 8 +-SUBLEVEL = 17 ++SUBLEVEL = 18 + EXTRAVERSION = + NAME = Kleptomaniac Octopus + +diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile +index d5fe7c9e0be1d..5a34423464188 100644 +--- a/arch/arm64/Makefile ++++ b/arch/arm64/Makefile +@@ -10,14 +10,14 @@ + # + # Copyright (C) 1995-2001 by Russell King + +-LDFLAGS_vmlinux :=--no-undefined -X ++LDFLAGS_vmlinux :=--no-undefined -X -z norelro + CPPFLAGS_vmlinux.lds = -DTEXT_OFFSET=$(TEXT_OFFSET) + + ifeq ($(CONFIG_RELOCATABLE), y) + # Pass --no-apply-dynamic-relocs to restore pre-binutils-2.27 behaviour + # for relative relocs, since this leads to better Image compression + # with the relocation offsets always being zero. +-LDFLAGS_vmlinux += -shared -Bsymbolic -z notext -z norelro \ ++LDFLAGS_vmlinux += -shared -Bsymbolic -z notext \ + $(call ld-option, --no-apply-dynamic-relocs) + endif + +diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c +index 6e8a7eec667e8..d8a2bacf4e0a8 100644 +--- a/arch/arm64/kernel/cpu_errata.c ++++ b/arch/arm64/kernel/cpu_errata.c +@@ -457,6 +457,12 @@ out_printmsg: + return required; + } + ++static void cpu_enable_ssbd_mitigation(const struct arm64_cpu_capabilities *cap) ++{ ++ if (ssbd_state != ARM64_SSBD_FORCE_DISABLE) ++ cap->matches(cap, SCOPE_LOCAL_CPU); ++} ++ + /* known invulnerable cores */ + static const struct midr_range arm64_ssb_cpus[] = { + MIDR_ALL_VERSIONS(MIDR_CORTEX_A35), +@@ -599,6 +605,12 @@ check_branch_predictor(const struct arm64_cpu_capabilities *entry, int scope) + return (need_wa > 0); + } + ++static void ++cpu_enable_branch_predictor_hardening(const struct arm64_cpu_capabilities *cap) ++{ ++ cap->matches(cap, SCOPE_LOCAL_CPU); ++} ++ + static const __maybe_unused struct midr_range tx2_family_cpus[] = { + MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN), + MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2), +@@ -890,9 +902,11 @@ const struct arm64_cpu_capabilities arm64_errata[] = { + }, + #endif + { ++ .desc = "Branch predictor hardening", + .capability = ARM64_HARDEN_BRANCH_PREDICTOR, + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, + .matches = check_branch_predictor, ++ .cpu_enable = cpu_enable_branch_predictor_hardening, + }, + #ifdef CONFIG_HARDEN_EL2_VECTORS + { +@@ -906,6 +920,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = { + .capability = ARM64_SSBD, + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, + .matches = has_ssbd_mitigation, ++ .cpu_enable = cpu_enable_ssbd_mitigation, + .midr_range_list = arm64_ssb_cpus, + }, + #ifdef CONFIG_ARM64_ERRATUM_1418040 +diff --git a/arch/openrisc/include/asm/uaccess.h b/arch/openrisc/include/asm/uaccess.h +index 17c24f14615fb..6839f8fcf76b2 100644 +--- a/arch/openrisc/include/asm/uaccess.h ++++ b/arch/openrisc/include/asm/uaccess.h +@@ -164,19 +164,19 @@ struct __large_struct { + + #define __get_user_nocheck(x, ptr, size) \ + ({ \ +- long __gu_err, __gu_val; \ +- __get_user_size(__gu_val, (ptr), (size), __gu_err); \ +- (x) = (__force __typeof__(*(ptr)))__gu_val; \ ++ long __gu_err; \ ++ __get_user_size((x), (ptr), (size), __gu_err); \ + __gu_err; \ + }) + + #define __get_user_check(x, ptr, size) \ + ({ \ +- long __gu_err = -EFAULT, __gu_val = 0; \ +- const __typeof__(*(ptr)) * __gu_addr = (ptr); \ +- if (access_ok(__gu_addr, size)) \ +- __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \ +- (x) = (__force __typeof__(*(ptr)))__gu_val; \ ++ long __gu_err = -EFAULT; \ ++ const __typeof__(*(ptr)) *__gu_addr = (ptr); \ ++ if (access_ok(__gu_addr, size)) \ ++ __get_user_size((x), __gu_addr, (size), __gu_err); \ ++ else \ ++ (x) = (__typeof__(*(ptr))) 0; \ + __gu_err; \ + }) + +@@ -190,11 +190,13 @@ do { \ + case 2: __get_user_asm(x, ptr, retval, "l.lhz"); break; \ + case 4: __get_user_asm(x, ptr, retval, "l.lwz"); break; \ + case 8: __get_user_asm2(x, ptr, retval); break; \ +- default: (x) = __get_user_bad(); \ ++ default: (x) = (__typeof__(*(ptr)))__get_user_bad(); \ + } \ + } while (0) + + #define __get_user_asm(x, addr, err, op) \ ++{ \ ++ unsigned long __gu_tmp; \ + __asm__ __volatile__( \ + "1: "op" %1,0(%2)\n" \ + "2:\n" \ +@@ -208,10 +210,14 @@ do { \ + " .align 2\n" \ + " .long 1b,3b\n" \ + ".previous" \ +- : "=r"(err), "=r"(x) \ +- : "r"(addr), "i"(-EFAULT), "0"(err)) ++ : "=r"(err), "=r"(__gu_tmp) \ ++ : "r"(addr), "i"(-EFAULT), "0"(err)); \ ++ (x) = (__typeof__(*(addr)))__gu_tmp; \ ++} + + #define __get_user_asm2(x, addr, err) \ ++{ \ ++ unsigned long long __gu_tmp; \ + __asm__ __volatile__( \ + "1: l.lwz %1,0(%2)\n" \ + "2: l.lwz %H1,4(%2)\n" \ +@@ -228,8 +234,11 @@ do { \ + " .long 1b,4b\n" \ + " .long 2b,4b\n" \ + ".previous" \ +- : "=r"(err), "=&r"(x) \ +- : "r"(addr), "i"(-EFAULT), "0"(err)) ++ : "=r"(err), "=&r"(__gu_tmp) \ ++ : "r"(addr), "i"(-EFAULT), "0"(err)); \ ++ (x) = (__typeof__(*(addr)))( \ ++ (__typeof__((x)-(x)))__gu_tmp); \ ++} + + /* more complex routines */ + +diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig +index 9fa23eb320ff5..cf78ad7ff0b7c 100644 +--- a/arch/powerpc/Kconfig ++++ b/arch/powerpc/Kconfig +@@ -135,7 +135,7 @@ config PPC + select ARCH_HAS_STRICT_KERNEL_RWX if (PPC32 && !HIBERNATION) + select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST + select ARCH_HAS_UACCESS_FLUSHCACHE +- select ARCH_HAS_UACCESS_MCSAFE if PPC64 ++ select ARCH_HAS_COPY_MC if PPC64 + select ARCH_HAS_UBSAN_SANITIZE_ALL + select ARCH_HAVE_NMI_SAFE_CMPXCHG + select ARCH_KEEP_MEMBLOCK +diff --git a/arch/powerpc/include/asm/string.h b/arch/powerpc/include/asm/string.h +index b72692702f35f..9bf6dffb40900 100644 +--- a/arch/powerpc/include/asm/string.h ++++ b/arch/powerpc/include/asm/string.h +@@ -53,9 +53,7 @@ void *__memmove(void *to, const void *from, __kernel_size_t n); + #ifndef CONFIG_KASAN + #define __HAVE_ARCH_MEMSET32 + #define __HAVE_ARCH_MEMSET64 +-#define __HAVE_ARCH_MEMCPY_MCSAFE + +-extern int memcpy_mcsafe(void *dst, const void *src, __kernel_size_t sz); + extern void *__memset16(uint16_t *, uint16_t v, __kernel_size_t); + extern void *__memset32(uint32_t *, uint32_t v, __kernel_size_t); + extern void *__memset64(uint64_t *, uint64_t v, __kernel_size_t); +diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h +index 64c04ab091123..97506441c15b1 100644 +--- a/arch/powerpc/include/asm/uaccess.h ++++ b/arch/powerpc/include/asm/uaccess.h +@@ -436,6 +436,32 @@ do { \ + extern unsigned long __copy_tofrom_user(void __user *to, + const void __user *from, unsigned long size); + ++#ifdef CONFIG_ARCH_HAS_COPY_MC ++unsigned long __must_check ++copy_mc_generic(void *to, const void *from, unsigned long size); ++ ++static inline unsigned long __must_check ++copy_mc_to_kernel(void *to, const void *from, unsigned long size) ++{ ++ return copy_mc_generic(to, from, size); ++} ++#define copy_mc_to_kernel copy_mc_to_kernel ++ ++static inline unsigned long __must_check ++copy_mc_to_user(void __user *to, const void *from, unsigned long n) ++{ ++ if (likely(check_copy_size(from, n, true))) { ++ if (access_ok(to, n)) { ++ allow_write_to_user(to, n); ++ n = copy_mc_generic((void *)to, from, n); ++ prevent_write_to_user(to, n); ++ } ++ } ++ ++ return n; ++} ++#endif ++ + #ifdef __powerpc64__ + static inline unsigned long + raw_copy_in_user(void __user *to, const void __user *from, unsigned long n) +@@ -524,20 +550,6 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n) + return ret; + } + +-static __always_inline unsigned long __must_check +-copy_to_user_mcsafe(void __user *to, const void *from, unsigned long n) +-{ +- if (likely(check_copy_size(from, n, true))) { +- if (access_ok(to, n)) { +- allow_write_to_user(to, n); +- n = memcpy_mcsafe((void *)to, from, n); +- prevent_write_to_user(to, n); +- } +- } +- +- return n; +-} +- + unsigned long __arch_clear_user(void __user *addr, unsigned long size); + + static inline unsigned long clear_user(void __user *addr, unsigned long size) +diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile +index 5e994cda8e401..c254f5f733a86 100644 +--- a/arch/powerpc/lib/Makefile ++++ b/arch/powerpc/lib/Makefile +@@ -39,7 +39,7 @@ obj-$(CONFIG_PPC_BOOK3S_64) += copyuser_power7.o copypage_power7.o \ + memcpy_power7.o + + obj64-y += copypage_64.o copyuser_64.o mem_64.o hweight_64.o \ +- memcpy_64.o memcpy_mcsafe_64.o ++ memcpy_64.o copy_mc_64.o + + obj64-$(CONFIG_SMP) += locks.o + obj64-$(CONFIG_ALTIVEC) += vmx-helper.o +diff --git a/arch/powerpc/lib/copy_mc_64.S b/arch/powerpc/lib/copy_mc_64.S +new file mode 100644 +index 0000000000000..88d46c471493b +--- /dev/null ++++ b/arch/powerpc/lib/copy_mc_64.S +@@ -0,0 +1,242 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++/* ++ * Copyright (C) IBM Corporation, 2011 ++ * Derived from copyuser_power7.s by Anton Blanchard <anton@au.ibm.com> ++ * Author - Balbir Singh <bsingharora@gmail.com> ++ */ ++#include <asm/ppc_asm.h> ++#include <asm/errno.h> ++#include <asm/export.h> ++ ++ .macro err1 ++100: ++ EX_TABLE(100b,.Ldo_err1) ++ .endm ++ ++ .macro err2 ++200: ++ EX_TABLE(200b,.Ldo_err2) ++ .endm ++ ++ .macro err3 ++300: EX_TABLE(300b,.Ldone) ++ .endm ++ ++.Ldo_err2: ++ ld r22,STK_REG(R22)(r1) ++ ld r21,STK_REG(R21)(r1) ++ ld r20,STK_REG(R20)(r1) ++ ld r19,STK_REG(R19)(r1) ++ ld r18,STK_REG(R18)(r1) ++ ld r17,STK_REG(R17)(r1) ++ ld r16,STK_REG(R16)(r1) ++ ld r15,STK_REG(R15)(r1) ++ ld r14,STK_REG(R14)(r1) ++ addi r1,r1,STACKFRAMESIZE ++.Ldo_err1: ++ /* Do a byte by byte copy to get the exact remaining size */ ++ mtctr r7 ++46: ++err3; lbz r0,0(r4) ++ addi r4,r4,1 ++err3; stb r0,0(r3) ++ addi r3,r3,1 ++ bdnz 46b ++ li r3,0 ++ blr ++ ++.Ldone: ++ mfctr r3 ++ blr ++ ++ ++_GLOBAL(copy_mc_generic) ++ mr r7,r5 ++ cmpldi r5,16 ++ blt .Lshort_copy ++ ++.Lcopy: ++ /* Get the source 8B aligned */ ++ neg r6,r4 ++ mtocrf 0x01,r6 ++ clrldi r6,r6,(64-3) ++ ++ bf cr7*4+3,1f ++err1; lbz r0,0(r4) ++ addi r4,r4,1 ++err1; stb r0,0(r3) ++ addi r3,r3,1 ++ subi r7,r7,1 ++ ++1: bf cr7*4+2,2f ++err1; lhz r0,0(r4) ++ addi r4,r4,2 ++err1; sth r0,0(r3) ++ addi r3,r3,2 ++ subi r7,r7,2 ++ ++2: bf cr7*4+1,3f ++err1; lwz r0,0(r4) ++ addi r4,r4,4 ++err1; stw r0,0(r3) ++ addi r3,r3,4 ++ subi r7,r7,4 ++ ++3: sub r5,r5,r6 ++ cmpldi r5,128 ++ ++ mflr r0 ++ stdu r1,-STACKFRAMESIZE(r1) ++ std r14,STK_REG(R14)(r1) ++ std r15,STK_REG(R15)(r1) ++ std r16,STK_REG(R16)(r1) ++ std r17,STK_REG(R17)(r1) ++ std r18,STK_REG(R18)(r1) ++ std r19,STK_REG(R19)(r1) ++ std r20,STK_REG(R20)(r1) ++ std r21,STK_REG(R21)(r1) ++ std r22,STK_REG(R22)(r1) ++ std r0,STACKFRAMESIZE+16(r1) ++ ++ blt 5f ++ srdi r6,r5,7 ++ mtctr r6 ++ ++ /* Now do cacheline (128B) sized loads and stores. */ ++ .align 5 ++4: ++err2; ld r0,0(r4) ++err2; ld r6,8(r4) ++err2; ld r8,16(r4) ++err2; ld r9,24(r4) ++err2; ld r10,32(r4) ++err2; ld r11,40(r4) ++err2; ld r12,48(r4) ++err2; ld r14,56(r4) ++err2; ld r15,64(r4) ++err2; ld r16,72(r4) ++err2; ld r17,80(r4) ++err2; ld r18,88(r4) ++err2; ld r19,96(r4) ++err2; ld r20,104(r4) ++err2; ld r21,112(r4) ++err2; ld r22,120(r4) ++ addi r4,r4,128 ++err2; std r0,0(r3) ++err2; std r6,8(r3) ++err2; std r8,16(r3) ++err2; std r9,24(r3) ++err2; std r10,32(r3) ++err2; std r11,40(r3) ++err2; std r12,48(r3) ++err2; std r14,56(r3) ++err2; std r15,64(r3) ++err2; std r16,72(r3) ++err2; std r17,80(r3) ++err2; std r18,88(r3) ++err2; std r19,96(r3) ++err2; std r20,104(r3) ++err2; std r21,112(r3) ++err2; std r22,120(r3) ++ addi r3,r3,128 ++ subi r7,r7,128 ++ bdnz 4b ++ ++ clrldi r5,r5,(64-7) ++ ++ /* Up to 127B to go */ ++5: srdi r6,r5,4 ++ mtocrf 0x01,r6 ++ ++6: bf cr7*4+1,7f ++err2; ld r0,0(r4) ++err2; ld r6,8(r4) ++err2; ld r8,16(r4) ++err2; ld r9,24(r4) ++err2; ld r10,32(r4) ++err2; ld r11,40(r4) ++err2; ld r12,48(r4) ++err2; ld r14,56(r4) ++ addi r4,r4,64 ++err2; std r0,0(r3) ++err2; std r6,8(r3) ++err2; std r8,16(r3) ++err2; std r9,24(r3) ++err2; std r10,32(r3) ++err2; std r11,40(r3) ++err2; std r12,48(r3) ++err2; std r14,56(r3) ++ addi r3,r3,64 ++ subi r7,r7,64 ++ ++7: ld r14,STK_REG(R14)(r1) ++ ld r15,STK_REG(R15)(r1) ++ ld r16,STK_REG(R16)(r1) ++ ld r17,STK_REG(R17)(r1) ++ ld r18,STK_REG(R18)(r1) ++ ld r19,STK_REG(R19)(r1) ++ ld r20,STK_REG(R20)(r1) ++ ld r21,STK_REG(R21)(r1) ++ ld r22,STK_REG(R22)(r1) ++ addi r1,r1,STACKFRAMESIZE ++ ++ /* Up to 63B to go */ ++ bf cr7*4+2,8f ++err1; ld r0,0(r4) ++err1; ld r6,8(r4) ++err1; ld r8,16(r4) ++err1; ld r9,24(r4) ++ addi r4,r4,32 ++err1; std r0,0(r3) ++err1; std r6,8(r3) ++err1; std r8,16(r3) ++err1; std r9,24(r3) ++ addi r3,r3,32 ++ subi r7,r7,32 ++ ++ /* Up to 31B to go */ ++8: bf cr7*4+3,9f ++err1; ld r0,0(r4) ++err1; ld r6,8(r4) ++ addi r4,r4,16 ++err1; std r0,0(r3) ++err1; std r6,8(r3) ++ addi r3,r3,16 ++ subi r7,r7,16 ++ ++9: clrldi r5,r5,(64-4) ++ ++ /* Up to 15B to go */ ++.Lshort_copy: ++ mtocrf 0x01,r5 ++ bf cr7*4+0,12f ++err1; lwz r0,0(r4) /* Less chance of a reject with word ops */ ++err1; lwz r6,4(r4) ++ addi r4,r4,8 ++err1; stw r0,0(r3) ++err1; stw r6,4(r3) ++ addi r3,r3,8 ++ subi r7,r7,8 ++ ++12: bf cr7*4+1,13f ++err1; lwz r0,0(r4) ++ addi r4,r4,4 ++err1; stw r0,0(r3) ++ addi r3,r3,4 ++ subi r7,r7,4 ++ ++13: bf cr7*4+2,14f ++err1; lhz r0,0(r4) ++ addi r4,r4,2 ++err1; sth r0,0(r3) ++ addi r3,r3,2 ++ subi r7,r7,2 ++ ++14: bf cr7*4+3,15f ++err1; lbz r0,0(r4) ++err1; stb r0,0(r3) ++ ++15: li r3,0 ++ blr ++ ++EXPORT_SYMBOL_GPL(copy_mc_generic); +diff --git a/arch/powerpc/lib/memcpy_mcsafe_64.S b/arch/powerpc/lib/memcpy_mcsafe_64.S +deleted file mode 100644 +index cb882d9a6d8a3..0000000000000 +--- a/arch/powerpc/lib/memcpy_mcsafe_64.S ++++ /dev/null +@@ -1,242 +0,0 @@ +-/* SPDX-License-Identifier: GPL-2.0 */ +-/* +- * Copyright (C) IBM Corporation, 2011 +- * Derived from copyuser_power7.s by Anton Blanchard <anton@au.ibm.com> +- * Author - Balbir Singh <bsingharora@gmail.com> +- */ +-#include <asm/ppc_asm.h> +-#include <asm/errno.h> +-#include <asm/export.h> +- +- .macro err1 +-100: +- EX_TABLE(100b,.Ldo_err1) +- .endm +- +- .macro err2 +-200: +- EX_TABLE(200b,.Ldo_err2) +- .endm +- +- .macro err3 +-300: EX_TABLE(300b,.Ldone) +- .endm +- +-.Ldo_err2: +- ld r22,STK_REG(R22)(r1) +- ld r21,STK_REG(R21)(r1) +- ld r20,STK_REG(R20)(r1) +- ld r19,STK_REG(R19)(r1) +- ld r18,STK_REG(R18)(r1) +- ld r17,STK_REG(R17)(r1) +- ld r16,STK_REG(R16)(r1) +- ld r15,STK_REG(R15)(r1) +- ld r14,STK_REG(R14)(r1) +- addi r1,r1,STACKFRAMESIZE +-.Ldo_err1: +- /* Do a byte by byte copy to get the exact remaining size */ +- mtctr r7 +-46: +-err3; lbz r0,0(r4) +- addi r4,r4,1 +-err3; stb r0,0(r3) +- addi r3,r3,1 +- bdnz 46b +- li r3,0 +- blr +- +-.Ldone: +- mfctr r3 +- blr +- +- +-_GLOBAL(memcpy_mcsafe) +- mr r7,r5 +- cmpldi r5,16 +- blt .Lshort_copy +- +-.Lcopy: +- /* Get the source 8B aligned */ +- neg r6,r4 +- mtocrf 0x01,r6 +- clrldi r6,r6,(64-3) +- +- bf cr7*4+3,1f +-err1; lbz r0,0(r4) +- addi r4,r4,1 +-err1; stb r0,0(r3) +- addi r3,r3,1 +- subi r7,r7,1 +- +-1: bf cr7*4+2,2f +-err1; lhz r0,0(r4) +- addi r4,r4,2 +-err1; sth r0,0(r3) +- addi r3,r3,2 +- subi r7,r7,2 +- +-2: bf cr7*4+1,3f +-err1; lwz r0,0(r4) +- addi r4,r4,4 +-err1; stw r0,0(r3) +- addi r3,r3,4 +- subi r7,r7,4 +- +-3: sub r5,r5,r6 +- cmpldi r5,128 +- +- mflr r0 +- stdu r1,-STACKFRAMESIZE(r1) +- std r14,STK_REG(R14)(r1) +- std r15,STK_REG(R15)(r1) +- std r16,STK_REG(R16)(r1) +- std r17,STK_REG(R17)(r1) +- std r18,STK_REG(R18)(r1) +- std r19,STK_REG(R19)(r1) +- std r20,STK_REG(R20)(r1) +- std r21,STK_REG(R21)(r1) +- std r22,STK_REG(R22)(r1) +- std r0,STACKFRAMESIZE+16(r1) +- +- blt 5f +- srdi r6,r5,7 +- mtctr r6 +- +- /* Now do cacheline (128B) sized loads and stores. */ +- .align 5 +-4: +-err2; ld r0,0(r4) +-err2; ld r6,8(r4) +-err2; ld r8,16(r4) +-err2; ld r9,24(r4) +-err2; ld r10,32(r4) +-err2; ld r11,40(r4) +-err2; ld r12,48(r4) +-err2; ld r14,56(r4) +-err2; ld r15,64(r4) +-err2; ld r16,72(r4) +-err2; ld r17,80(r4) +-err2; ld r18,88(r4) +-err2; ld r19,96(r4) +-err2; ld r20,104(r4) +-err2; ld r21,112(r4) +-err2; ld r22,120(r4) +- addi r4,r4,128 +-err2; std r0,0(r3) +-err2; std r6,8(r3) +-err2; std r8,16(r3) +-err2; std r9,24(r3) +-err2; std r10,32(r3) +-err2; std r11,40(r3) +-err2; std r12,48(r3) +-err2; std r14,56(r3) +-err2; std r15,64(r3) +-err2; std r16,72(r3) +-err2; std r17,80(r3) +-err2; std r18,88(r3) +-err2; std r19,96(r3) +-err2; std r20,104(r3) +-err2; std r21,112(r3) +-err2; std r22,120(r3) +- addi r3,r3,128 +- subi r7,r7,128 +- bdnz 4b +- +- clrldi r5,r5,(64-7) +- +- /* Up to 127B to go */ +-5: srdi r6,r5,4 +- mtocrf 0x01,r6 +- +-6: bf cr7*4+1,7f +-err2; ld r0,0(r4) +-err2; ld r6,8(r4) +-err2; ld r8,16(r4) +-err2; ld r9,24(r4) +-err2; ld r10,32(r4) +-err2; ld r11,40(r4) +-err2; ld r12,48(r4) +-err2; ld r14,56(r4) +- addi r4,r4,64 +-err2; std r0,0(r3) +-err2; std r6,8(r3) +-err2; std r8,16(r3) +-err2; std r9,24(r3) +-err2; std r10,32(r3) +-err2; std r11,40(r3) +-err2; std r12,48(r3) +-err2; std r14,56(r3) +- addi r3,r3,64 +- subi r7,r7,64 +- +-7: ld r14,STK_REG(R14)(r1) +- ld r15,STK_REG(R15)(r1) +- ld r16,STK_REG(R16)(r1) +- ld r17,STK_REG(R17)(r1) +- ld r18,STK_REG(R18)(r1) +- ld r19,STK_REG(R19)(r1) +- ld r20,STK_REG(R20)(r1) +- ld r21,STK_REG(R21)(r1) +- ld r22,STK_REG(R22)(r1) +- addi r1,r1,STACKFRAMESIZE +- +- /* Up to 63B to go */ +- bf cr7*4+2,8f +-err1; ld r0,0(r4) +-err1; ld r6,8(r4) +-err1; ld r8,16(r4) +-err1; ld r9,24(r4) +- addi r4,r4,32 +-err1; std r0,0(r3) +-err1; std r6,8(r3) +-err1; std r8,16(r3) +-err1; std r9,24(r3) +- addi r3,r3,32 +- subi r7,r7,32 +- +- /* Up to 31B to go */ +-8: bf cr7*4+3,9f +-err1; ld r0,0(r4) +-err1; ld r6,8(r4) +- addi r4,r4,16 +-err1; std r0,0(r3) +-err1; std r6,8(r3) +- addi r3,r3,16 +- subi r7,r7,16 +- +-9: clrldi r5,r5,(64-4) +- +- /* Up to 15B to go */ +-.Lshort_copy: +- mtocrf 0x01,r5 +- bf cr7*4+0,12f +-err1; lwz r0,0(r4) /* Less chance of a reject with word ops */ +-err1; lwz r6,4(r4) +- addi r4,r4,8 +-err1; stw r0,0(r3) +-err1; stw r6,4(r3) +- addi r3,r3,8 +- subi r7,r7,8 +- +-12: bf cr7*4+1,13f +-err1; lwz r0,0(r4) +- addi r4,r4,4 +-err1; stw r0,0(r3) +- addi r3,r3,4 +- subi r7,r7,4 +- +-13: bf cr7*4+2,14f +-err1; lhz r0,0(r4) +- addi r4,r4,2 +-err1; sth r0,0(r3) +- addi r3,r3,2 +- subi r7,r7,2 +- +-14: bf cr7*4+3,15f +-err1; lbz r0,0(r4) +-err1; stb r0,0(r3) +- +-15: li r3,0 +- blr +- +-EXPORT_SYMBOL_GPL(memcpy_mcsafe); +diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig +index 883da0abf7790..1f4104f8852b8 100644 +--- a/arch/x86/Kconfig ++++ b/arch/x86/Kconfig +@@ -75,7 +75,7 @@ config X86 + select ARCH_HAS_PTE_DEVMAP if X86_64 + select ARCH_HAS_PTE_SPECIAL + select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64 +- select ARCH_HAS_UACCESS_MCSAFE if X86_64 && X86_MCE ++ select ARCH_HAS_COPY_MC if X86_64 + select ARCH_HAS_SET_MEMORY + select ARCH_HAS_SET_DIRECT_MAP + select ARCH_HAS_STRICT_KERNEL_RWX +diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug +index 0dd319e6e5b49..ec98b400e38f9 100644 +--- a/arch/x86/Kconfig.debug ++++ b/arch/x86/Kconfig.debug +@@ -59,7 +59,7 @@ config EARLY_PRINTK_USB_XDBC + You should normally say N here, unless you want to debug early + crashes or need a very simple printk logging facility. + +-config MCSAFE_TEST ++config COPY_MC_TEST + def_bool n + + config EFI_PGT_DUMP +diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c +index 26c36357c4c9c..a023cbe21230a 100644 +--- a/arch/x86/events/amd/ibs.c ++++ b/arch/x86/events/amd/ibs.c +@@ -89,6 +89,7 @@ struct perf_ibs { + u64 max_period; + unsigned long offset_mask[1]; + int offset_max; ++ unsigned int fetch_count_reset_broken : 1; + struct cpu_perf_ibs __percpu *pcpu; + + struct attribute **format_attrs; +@@ -363,7 +364,12 @@ perf_ibs_event_update(struct perf_ibs *perf_ibs, struct perf_event *event, + static inline void perf_ibs_enable_event(struct perf_ibs *perf_ibs, + struct hw_perf_event *hwc, u64 config) + { +- wrmsrl(hwc->config_base, hwc->config | config | perf_ibs->enable_mask); ++ u64 tmp = hwc->config | config; ++ ++ if (perf_ibs->fetch_count_reset_broken) ++ wrmsrl(hwc->config_base, tmp & ~perf_ibs->enable_mask); ++ ++ wrmsrl(hwc->config_base, tmp | perf_ibs->enable_mask); + } + + /* +@@ -733,6 +739,13 @@ static __init void perf_event_ibs_init(void) + { + struct attribute **attr = ibs_op_format_attrs; + ++ /* ++ * Some chips fail to reset the fetch count when it is written; instead ++ * they need a 0-1 transition of IbsFetchEn. ++ */ ++ if (boot_cpu_data.x86 >= 0x16 && boot_cpu_data.x86 <= 0x18) ++ perf_ibs_fetch.fetch_count_reset_broken = 1; ++ + perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch"); + + if (ibs_caps & IBS_CAPS_OPCNT) { +diff --git a/arch/x86/include/asm/copy_mc_test.h b/arch/x86/include/asm/copy_mc_test.h +new file mode 100644 +index 0000000000000..e4991ba967266 +--- /dev/null ++++ b/arch/x86/include/asm/copy_mc_test.h +@@ -0,0 +1,75 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++#ifndef _COPY_MC_TEST_H_ ++#define _COPY_MC_TEST_H_ ++ ++#ifndef __ASSEMBLY__ ++#ifdef CONFIG_COPY_MC_TEST ++extern unsigned long copy_mc_test_src; ++extern unsigned long copy_mc_test_dst; ++ ++static inline void copy_mc_inject_src(void *addr) ++{ ++ if (addr) ++ copy_mc_test_src = (unsigned long) addr; ++ else ++ copy_mc_test_src = ~0UL; ++} ++ ++static inline void copy_mc_inject_dst(void *addr) ++{ ++ if (addr) ++ copy_mc_test_dst = (unsigned long) addr; ++ else ++ copy_mc_test_dst = ~0UL; ++} ++#else /* CONFIG_COPY_MC_TEST */ ++static inline void copy_mc_inject_src(void *addr) ++{ ++} ++ ++static inline void copy_mc_inject_dst(void *addr) ++{ ++} ++#endif /* CONFIG_COPY_MC_TEST */ ++ ++#else /* __ASSEMBLY__ */ ++#include <asm/export.h> ++ ++#ifdef CONFIG_COPY_MC_TEST ++.macro COPY_MC_TEST_CTL ++ .pushsection .data ++ .align 8 ++ .globl copy_mc_test_src ++ copy_mc_test_src: ++ .quad 0 ++ EXPORT_SYMBOL_GPL(copy_mc_test_src) ++ .globl copy_mc_test_dst ++ copy_mc_test_dst: ++ .quad 0 ++ EXPORT_SYMBOL_GPL(copy_mc_test_dst) ++ .popsection ++.endm ++ ++.macro COPY_MC_TEST_SRC reg count target ++ leaq \count(\reg), %r9 ++ cmp copy_mc_test_src, %r9 ++ ja \target ++.endm ++ ++.macro COPY_MC_TEST_DST reg count target ++ leaq \count(\reg), %r9 ++ cmp copy_mc_test_dst, %r9 ++ ja \target ++.endm ++#else ++.macro COPY_MC_TEST_CTL ++.endm ++ ++.macro COPY_MC_TEST_SRC reg count target ++.endm ++ ++.macro COPY_MC_TEST_DST reg count target ++.endm ++#endif /* CONFIG_COPY_MC_TEST */ ++#endif /* __ASSEMBLY__ */ ++#endif /* _COPY_MC_TEST_H_ */ +diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h +index cf503824529ce..9b9112e4379ab 100644 +--- a/arch/x86/include/asm/mce.h ++++ b/arch/x86/include/asm/mce.h +@@ -174,6 +174,15 @@ extern void mce_unregister_decode_chain(struct notifier_block *nb); + + extern int mce_p5_enabled; + ++#ifdef CONFIG_ARCH_HAS_COPY_MC ++extern void enable_copy_mc_fragile(void); ++unsigned long __must_check copy_mc_fragile(void *dst, const void *src, unsigned cnt); ++#else ++static inline void enable_copy_mc_fragile(void) ++{ ++} ++#endif ++ + #ifdef CONFIG_X86_MCE + int mcheck_init(void); + void mcheck_cpu_init(struct cpuinfo_x86 *c); +diff --git a/arch/x86/include/asm/mcsafe_test.h b/arch/x86/include/asm/mcsafe_test.h +deleted file mode 100644 +index eb59804b6201c..0000000000000 +--- a/arch/x86/include/asm/mcsafe_test.h ++++ /dev/null +@@ -1,75 +0,0 @@ +-/* SPDX-License-Identifier: GPL-2.0 */ +-#ifndef _MCSAFE_TEST_H_ +-#define _MCSAFE_TEST_H_ +- +-#ifndef __ASSEMBLY__ +-#ifdef CONFIG_MCSAFE_TEST +-extern unsigned long mcsafe_test_src; +-extern unsigned long mcsafe_test_dst; +- +-static inline void mcsafe_inject_src(void *addr) +-{ +- if (addr) +- mcsafe_test_src = (unsigned long) addr; +- else +- mcsafe_test_src = ~0UL; +-} +- +-static inline void mcsafe_inject_dst(void *addr) +-{ +- if (addr) +- mcsafe_test_dst = (unsigned long) addr; +- else +- mcsafe_test_dst = ~0UL; +-} +-#else /* CONFIG_MCSAFE_TEST */ +-static inline void mcsafe_inject_src(void *addr) +-{ +-} +- +-static inline void mcsafe_inject_dst(void *addr) +-{ +-} +-#endif /* CONFIG_MCSAFE_TEST */ +- +-#else /* __ASSEMBLY__ */ +-#include <asm/export.h> +- +-#ifdef CONFIG_MCSAFE_TEST +-.macro MCSAFE_TEST_CTL +- .pushsection .data +- .align 8 +- .globl mcsafe_test_src +- mcsafe_test_src: +- .quad 0 +- EXPORT_SYMBOL_GPL(mcsafe_test_src) +- .globl mcsafe_test_dst +- mcsafe_test_dst: +- .quad 0 +- EXPORT_SYMBOL_GPL(mcsafe_test_dst) +- .popsection +-.endm +- +-.macro MCSAFE_TEST_SRC reg count target +- leaq \count(\reg), %r9 +- cmp mcsafe_test_src, %r9 +- ja \target +-.endm +- +-.macro MCSAFE_TEST_DST reg count target +- leaq \count(\reg), %r9 +- cmp mcsafe_test_dst, %r9 +- ja \target +-.endm +-#else +-.macro MCSAFE_TEST_CTL +-.endm +- +-.macro MCSAFE_TEST_SRC reg count target +-.endm +- +-.macro MCSAFE_TEST_DST reg count target +-.endm +-#endif /* CONFIG_MCSAFE_TEST */ +-#endif /* __ASSEMBLY__ */ +-#endif /* _MCSAFE_TEST_H_ */ +diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h +index 75314c3dbe471..6e450827f677a 100644 +--- a/arch/x86/include/asm/string_64.h ++++ b/arch/x86/include/asm/string_64.h +@@ -82,38 +82,6 @@ int strcmp(const char *cs, const char *ct); + + #endif + +-#define __HAVE_ARCH_MEMCPY_MCSAFE 1 +-__must_check unsigned long __memcpy_mcsafe(void *dst, const void *src, +- size_t cnt); +-DECLARE_STATIC_KEY_FALSE(mcsafe_key); +- +-/** +- * memcpy_mcsafe - copy memory with indication if a machine check happened +- * +- * @dst: destination address +- * @src: source address +- * @cnt: number of bytes to copy +- * +- * Low level memory copy function that catches machine checks +- * We only call into the "safe" function on systems that can +- * actually do machine check recovery. Everyone else can just +- * use memcpy(). +- * +- * Return 0 for success, or number of bytes not copied if there was an +- * exception. +- */ +-static __always_inline __must_check unsigned long +-memcpy_mcsafe(void *dst, const void *src, size_t cnt) +-{ +-#ifdef CONFIG_X86_MCE +- if (static_branch_unlikely(&mcsafe_key)) +- return __memcpy_mcsafe(dst, src, cnt); +- else +-#endif +- memcpy(dst, src, cnt); +- return 0; +-} +- + #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE + #define __HAVE_ARCH_MEMCPY_FLUSHCACHE 1 + void __memcpy_flushcache(void *dst, const void *src, size_t cnt); +diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h +index 2f3e8f2a958f6..9bfca52b46411 100644 +--- a/arch/x86/include/asm/uaccess.h ++++ b/arch/x86/include/asm/uaccess.h +@@ -455,6 +455,15 @@ extern __must_check long strnlen_user(const char __user *str, long n); + unsigned long __must_check clear_user(void __user *mem, unsigned long len); + unsigned long __must_check __clear_user(void __user *mem, unsigned long len); + ++#ifdef CONFIG_ARCH_HAS_COPY_MC ++unsigned long __must_check ++copy_mc_to_kernel(void *to, const void *from, unsigned len); ++#define copy_mc_to_kernel copy_mc_to_kernel ++ ++unsigned long __must_check ++copy_mc_to_user(void *to, const void *from, unsigned len); ++#endif ++ + /* + * movsl can be slow when source and dest are not both 8-byte aligned + */ +diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h +index bc10e3dc64fed..e7265a552f4f0 100644 +--- a/arch/x86/include/asm/uaccess_64.h ++++ b/arch/x86/include/asm/uaccess_64.h +@@ -46,22 +46,6 @@ copy_user_generic(void *to, const void *from, unsigned len) + return ret; + } + +-static __always_inline __must_check unsigned long +-copy_to_user_mcsafe(void *to, const void *from, unsigned len) +-{ +- unsigned long ret; +- +- __uaccess_begin(); +- /* +- * Note, __memcpy_mcsafe() is explicitly used since it can +- * handle exceptions / faults. memcpy_mcsafe() may fall back to +- * memcpy() which lacks this handling. +- */ +- ret = __memcpy_mcsafe(to, from, len); +- __uaccess_end(); +- return ret; +-} +- + static __always_inline __must_check unsigned long + raw_copy_from_user(void *dst, const void __user *src, unsigned long size) + { +@@ -102,8 +86,4 @@ __copy_from_user_flushcache(void *dst, const void __user *src, unsigned size) + kasan_check_write(dst, size); + return __copy_user_flushcache(dst, src, size); + } +- +-unsigned long +-mcsafe_handle_tail(char *to, char *from, unsigned len); +- + #endif /* _ASM_X86_UACCESS_64_H */ +diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c +index 07673a034d39c..69b2bb305a5a7 100644 +--- a/arch/x86/kernel/cpu/mce/core.c ++++ b/arch/x86/kernel/cpu/mce/core.c +@@ -40,7 +40,6 @@ + #include <linux/debugfs.h> + #include <linux/irq_work.h> + #include <linux/export.h> +-#include <linux/jump_label.h> + #include <linux/set_memory.h> + #include <linux/task_work.h> + #include <linux/hardirq.h> +@@ -2122,7 +2121,7 @@ void mce_disable_bank(int bank) + and older. + * mce=nobootlog Don't log MCEs from before booting. + * mce=bios_cmci_threshold Don't program the CMCI threshold +- * mce=recovery force enable memcpy_mcsafe() ++ * mce=recovery force enable copy_mc_fragile() + */ + static int __init mcheck_enable(char *str) + { +@@ -2730,13 +2729,10 @@ static void __init mcheck_debugfs_init(void) + static void __init mcheck_debugfs_init(void) { } + #endif + +-DEFINE_STATIC_KEY_FALSE(mcsafe_key); +-EXPORT_SYMBOL_GPL(mcsafe_key); +- + static int __init mcheck_late_init(void) + { + if (mca_cfg.recovery) +- static_branch_inc(&mcsafe_key); ++ enable_copy_mc_fragile(); + + mcheck_debugfs_init(); + +diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c +index 896d74cb5081a..e0296983a2386 100644 +--- a/arch/x86/kernel/quirks.c ++++ b/arch/x86/kernel/quirks.c +@@ -8,6 +8,7 @@ + + #include <asm/hpet.h> + #include <asm/setup.h> ++#include <asm/mce.h> + + #if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_SMP) && defined(CONFIG_PCI) + +@@ -624,10 +625,6 @@ static void amd_disable_seq_and_redirect_scrub(struct pci_dev *dev) + DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3, + amd_disable_seq_and_redirect_scrub); + +-#if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE) +-#include <linux/jump_label.h> +-#include <asm/string_64.h> +- + /* Ivy Bridge, Haswell, Broadwell */ + static void quirk_intel_brickland_xeon_ras_cap(struct pci_dev *pdev) + { +@@ -636,7 +633,7 @@ static void quirk_intel_brickland_xeon_ras_cap(struct pci_dev *pdev) + pci_read_config_dword(pdev, 0x84, &capid0); + + if (capid0 & 0x10) +- static_branch_inc(&mcsafe_key); ++ enable_copy_mc_fragile(); + } + + /* Skylake */ +@@ -653,7 +650,7 @@ static void quirk_intel_purley_xeon_ras_cap(struct pci_dev *pdev) + * enabled, so memory machine check recovery is also enabled. + */ + if ((capid0 & 0xc0) == 0xc0 || (capid5 & 0x1e0)) +- static_branch_inc(&mcsafe_key); ++ enable_copy_mc_fragile(); + + } + DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x0ec3, quirk_intel_brickland_xeon_ras_cap); +@@ -661,7 +658,6 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2fc0, quirk_intel_brickland_xeon_ + DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fc0, quirk_intel_brickland_xeon_ras_cap); + DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2083, quirk_intel_purley_xeon_ras_cap); + #endif +-#endif + + bool x86_apple_machine; + EXPORT_SYMBOL(x86_apple_machine); +diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c +index 69cc823109740..d43df8de75a6a 100644 +--- a/arch/x86/kernel/traps.c ++++ b/arch/x86/kernel/traps.c +@@ -196,7 +196,7 @@ static __always_inline void __user *error_get_trap_addr(struct pt_regs *regs) + + DEFINE_IDTENTRY(exc_divide_error) + { +- do_error_trap(regs, 0, "divide_error", X86_TRAP_DE, SIGFPE, ++ do_error_trap(regs, 0, "divide error", X86_TRAP_DE, SIGFPE, + FPE_INTDIV, error_get_trap_addr(regs)); + } + +diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile +index 6110bce7237bd..02c3cec7e5157 100644 +--- a/arch/x86/lib/Makefile ++++ b/arch/x86/lib/Makefile +@@ -44,6 +44,7 @@ obj-$(CONFIG_SMP) += msr-smp.o cache-smp.o + lib-y := delay.o misc.o cmdline.o cpu.o + lib-y += usercopy_$(BITS).o usercopy.o getuser.o putuser.o + lib-y += memcpy_$(BITS).o ++lib-$(CONFIG_ARCH_HAS_COPY_MC) += copy_mc.o copy_mc_64.o + lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o + lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o + lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o +diff --git a/arch/x86/lib/copy_mc.c b/arch/x86/lib/copy_mc.c +new file mode 100644 +index 0000000000000..c13e8c9ee926b +--- /dev/null ++++ b/arch/x86/lib/copy_mc.c +@@ -0,0 +1,96 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* Copyright(c) 2016-2020 Intel Corporation. All rights reserved. */ ++ ++#include <linux/jump_label.h> ++#include <linux/uaccess.h> ++#include <linux/export.h> ++#include <linux/string.h> ++#include <linux/types.h> ++ ++#include <asm/mce.h> ++ ++#ifdef CONFIG_X86_MCE ++/* ++ * See COPY_MC_TEST for self-test of the copy_mc_fragile() ++ * implementation. ++ */ ++static DEFINE_STATIC_KEY_FALSE(copy_mc_fragile_key); ++ ++void enable_copy_mc_fragile(void) ++{ ++ static_branch_inc(©_mc_fragile_key); ++} ++#define copy_mc_fragile_enabled (static_branch_unlikely(©_mc_fragile_key)) ++ ++/* ++ * Similar to copy_user_handle_tail, probe for the write fault point, or ++ * source exception point. ++ */ ++__visible notrace unsigned long ++copy_mc_fragile_handle_tail(char *to, char *from, unsigned len) ++{ ++ for (; len; --len, to++, from++) ++ if (copy_mc_fragile(to, from, 1)) ++ break; ++ return len; ++} ++#else ++/* ++ * No point in doing careful copying, or consulting a static key when ++ * there is no #MC handler in the CONFIG_X86_MCE=n case. ++ */ ++void enable_copy_mc_fragile(void) ++{ ++} ++#define copy_mc_fragile_enabled (0) ++#endif ++ ++unsigned long copy_mc_enhanced_fast_string(void *dst, const void *src, unsigned len); ++ ++/** ++ * copy_mc_to_kernel - memory copy that handles source exceptions ++ * ++ * @dst: destination address ++ * @src: source address ++ * @len: number of bytes to copy ++ * ++ * Call into the 'fragile' version on systems that benefit from avoiding ++ * corner case poison consumption scenarios, For example, accessing ++ * poison across 2 cachelines with a single instruction. Almost all ++ * other uses case can use copy_mc_enhanced_fast_string() for a fast ++ * recoverable copy, or fallback to plain memcpy. ++ * ++ * Return 0 for success, or number of bytes not copied if there was an ++ * exception. ++ */ ++unsigned long __must_check copy_mc_to_kernel(void *dst, const void *src, unsigned len) ++{ ++ if (copy_mc_fragile_enabled) ++ return copy_mc_fragile(dst, src, len); ++ if (static_cpu_has(X86_FEATURE_ERMS)) ++ return copy_mc_enhanced_fast_string(dst, src, len); ++ memcpy(dst, src, len); ++ return 0; ++} ++EXPORT_SYMBOL_GPL(copy_mc_to_kernel); ++ ++unsigned long __must_check copy_mc_to_user(void *dst, const void *src, unsigned len) ++{ ++ unsigned long ret; ++ ++ if (copy_mc_fragile_enabled) { ++ __uaccess_begin(); ++ ret = copy_mc_fragile(dst, src, len); ++ __uaccess_end(); ++ return ret; ++ } ++ ++ if (static_cpu_has(X86_FEATURE_ERMS)) { ++ __uaccess_begin(); ++ ret = copy_mc_enhanced_fast_string(dst, src, len); ++ __uaccess_end(); ++ return ret; ++ } ++ ++ return copy_user_generic(dst, src, len); ++} +diff --git a/arch/x86/lib/copy_mc_64.S b/arch/x86/lib/copy_mc_64.S +new file mode 100644 +index 0000000000000..892d8915f609e +--- /dev/null ++++ b/arch/x86/lib/copy_mc_64.S +@@ -0,0 +1,163 @@ ++/* SPDX-License-Identifier: GPL-2.0-only */ ++/* Copyright(c) 2016-2020 Intel Corporation. All rights reserved. */ ++ ++#include <linux/linkage.h> ++#include <asm/copy_mc_test.h> ++#include <asm/export.h> ++#include <asm/asm.h> ++ ++#ifndef CONFIG_UML ++ ++#ifdef CONFIG_X86_MCE ++COPY_MC_TEST_CTL ++ ++/* ++ * copy_mc_fragile - copy memory with indication if an exception / fault happened ++ * ++ * The 'fragile' version is opted into by platform quirks and takes ++ * pains to avoid unrecoverable corner cases like 'fast-string' ++ * instruction sequences, and consuming poison across a cacheline ++ * boundary. The non-fragile version is equivalent to memcpy() ++ * regardless of CPU machine-check-recovery capability. ++ */ ++SYM_FUNC_START(copy_mc_fragile) ++ cmpl $8, %edx ++ /* Less than 8 bytes? Go to byte copy loop */ ++ jb .L_no_whole_words ++ ++ /* Check for bad alignment of source */ ++ testl $7, %esi ++ /* Already aligned */ ++ jz .L_8byte_aligned ++ ++ /* Copy one byte at a time until source is 8-byte aligned */ ++ movl %esi, %ecx ++ andl $7, %ecx ++ subl $8, %ecx ++ negl %ecx ++ subl %ecx, %edx ++.L_read_leading_bytes: ++ movb (%rsi), %al ++ COPY_MC_TEST_SRC %rsi 1 .E_leading_bytes ++ COPY_MC_TEST_DST %rdi 1 .E_leading_bytes ++.L_write_leading_bytes: ++ movb %al, (%rdi) ++ incq %rsi ++ incq %rdi ++ decl %ecx ++ jnz .L_read_leading_bytes ++ ++.L_8byte_aligned: ++ movl %edx, %ecx ++ andl $7, %edx ++ shrl $3, %ecx ++ jz .L_no_whole_words ++ ++.L_read_words: ++ movq (%rsi), %r8 ++ COPY_MC_TEST_SRC %rsi 8 .E_read_words ++ COPY_MC_TEST_DST %rdi 8 .E_write_words ++.L_write_words: ++ movq %r8, (%rdi) ++ addq $8, %rsi ++ addq $8, %rdi ++ decl %ecx ++ jnz .L_read_words ++ ++ /* Any trailing bytes? */ ++.L_no_whole_words: ++ andl %edx, %edx ++ jz .L_done_memcpy_trap ++ ++ /* Copy trailing bytes */ ++ movl %edx, %ecx ++.L_read_trailing_bytes: ++ movb (%rsi), %al ++ COPY_MC_TEST_SRC %rsi 1 .E_trailing_bytes ++ COPY_MC_TEST_DST %rdi 1 .E_trailing_bytes ++.L_write_trailing_bytes: ++ movb %al, (%rdi) ++ incq %rsi ++ incq %rdi ++ decl %ecx ++ jnz .L_read_trailing_bytes ++ ++ /* Copy successful. Return zero */ ++.L_done_memcpy_trap: ++ xorl %eax, %eax ++.L_done: ++ ret ++SYM_FUNC_END(copy_mc_fragile) ++EXPORT_SYMBOL_GPL(copy_mc_fragile) ++ ++ .section .fixup, "ax" ++ /* ++ * Return number of bytes not copied for any failure. Note that ++ * there is no "tail" handling since the source buffer is 8-byte ++ * aligned and poison is cacheline aligned. ++ */ ++.E_read_words: ++ shll $3, %ecx ++.E_leading_bytes: ++ addl %edx, %ecx ++.E_trailing_bytes: ++ mov %ecx, %eax ++ jmp .L_done ++ ++ /* ++ * For write fault handling, given the destination is unaligned, ++ * we handle faults on multi-byte writes with a byte-by-byte ++ * copy up to the write-protected page. ++ */ ++.E_write_words: ++ shll $3, %ecx ++ addl %edx, %ecx ++ movl %ecx, %edx ++ jmp copy_mc_fragile_handle_tail ++ ++ .previous ++ ++ _ASM_EXTABLE_FAULT(.L_read_leading_bytes, .E_leading_bytes) ++ _ASM_EXTABLE_FAULT(.L_read_words, .E_read_words) ++ _ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .E_trailing_bytes) ++ _ASM_EXTABLE(.L_write_leading_bytes, .E_leading_bytes) ++ _ASM_EXTABLE(.L_write_words, .E_write_words) ++ _ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes) ++#endif /* CONFIG_X86_MCE */ ++ ++/* ++ * copy_mc_enhanced_fast_string - memory copy with exception handling ++ * ++ * Fast string copy + fault / exception handling. If the CPU does ++ * support machine check exception recovery, but does not support ++ * recovering from fast-string exceptions then this CPU needs to be ++ * added to the copy_mc_fragile_key set of quirks. Otherwise, absent any ++ * machine check recovery support this version should be no slower than ++ * standard memcpy. ++ */ ++SYM_FUNC_START(copy_mc_enhanced_fast_string) ++ movq %rdi, %rax ++ movq %rdx, %rcx ++.L_copy: ++ rep movsb ++ /* Copy successful. Return zero */ ++ xorl %eax, %eax ++ ret ++SYM_FUNC_END(copy_mc_enhanced_fast_string) ++ ++ .section .fixup, "ax" ++.E_copy: ++ /* ++ * On fault %rcx is updated such that the copy instruction could ++ * optionally be restarted at the fault position, i.e. it ++ * contains 'bytes remaining'. A non-zero return indicates error ++ * to copy_mc_generic() users, or indicate short transfers to ++ * user-copy routines. ++ */ ++ movq %rcx, %rax ++ ret ++ ++ .previous ++ ++ _ASM_EXTABLE_FAULT(.L_copy, .E_copy) ++#endif /* !CONFIG_UML */ +diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S +index bbcc05bcefadb..037faac46b0cc 100644 +--- a/arch/x86/lib/memcpy_64.S ++++ b/arch/x86/lib/memcpy_64.S +@@ -4,7 +4,6 @@ + #include <linux/linkage.h> + #include <asm/errno.h> + #include <asm/cpufeatures.h> +-#include <asm/mcsafe_test.h> + #include <asm/alternative-asm.h> + #include <asm/export.h> + +@@ -187,117 +186,3 @@ SYM_FUNC_START_LOCAL(memcpy_orig) + SYM_FUNC_END(memcpy_orig) + + .popsection +- +-#ifndef CONFIG_UML +- +-MCSAFE_TEST_CTL +- +-/* +- * __memcpy_mcsafe - memory copy with machine check exception handling +- * Note that we only catch machine checks when reading the source addresses. +- * Writes to target are posted and don't generate machine checks. +- */ +-SYM_FUNC_START(__memcpy_mcsafe) +- cmpl $8, %edx +- /* Less than 8 bytes? Go to byte copy loop */ +- jb .L_no_whole_words +- +- /* Check for bad alignment of source */ +- testl $7, %esi +- /* Already aligned */ +- jz .L_8byte_aligned +- +- /* Copy one byte at a time until source is 8-byte aligned */ +- movl %esi, %ecx +- andl $7, %ecx +- subl $8, %ecx +- negl %ecx +- subl %ecx, %edx +-.L_read_leading_bytes: +- movb (%rsi), %al +- MCSAFE_TEST_SRC %rsi 1 .E_leading_bytes +- MCSAFE_TEST_DST %rdi 1 .E_leading_bytes +-.L_write_leading_bytes: +- movb %al, (%rdi) +- incq %rsi +- incq %rdi +- decl %ecx +- jnz .L_read_leading_bytes +- +-.L_8byte_aligned: +- movl %edx, %ecx +- andl $7, %edx +- shrl $3, %ecx +- jz .L_no_whole_words +- +-.L_read_words: +- movq (%rsi), %r8 +- MCSAFE_TEST_SRC %rsi 8 .E_read_words +- MCSAFE_TEST_DST %rdi 8 .E_write_words +-.L_write_words: +- movq %r8, (%rdi) +- addq $8, %rsi +- addq $8, %rdi +- decl %ecx +- jnz .L_read_words +- +- /* Any trailing bytes? */ +-.L_no_whole_words: +- andl %edx, %edx +- jz .L_done_memcpy_trap +- +- /* Copy trailing bytes */ +- movl %edx, %ecx +-.L_read_trailing_bytes: +- movb (%rsi), %al +- MCSAFE_TEST_SRC %rsi 1 .E_trailing_bytes +- MCSAFE_TEST_DST %rdi 1 .E_trailing_bytes +-.L_write_trailing_bytes: +- movb %al, (%rdi) +- incq %rsi +- incq %rdi +- decl %ecx +- jnz .L_read_trailing_bytes +- +- /* Copy successful. Return zero */ +-.L_done_memcpy_trap: +- xorl %eax, %eax +-.L_done: +- ret +-SYM_FUNC_END(__memcpy_mcsafe) +-EXPORT_SYMBOL_GPL(__memcpy_mcsafe) +- +- .section .fixup, "ax" +- /* +- * Return number of bytes not copied for any failure. Note that +- * there is no "tail" handling since the source buffer is 8-byte +- * aligned and poison is cacheline aligned. +- */ +-.E_read_words: +- shll $3, %ecx +-.E_leading_bytes: +- addl %edx, %ecx +-.E_trailing_bytes: +- mov %ecx, %eax +- jmp .L_done +- +- /* +- * For write fault handling, given the destination is unaligned, +- * we handle faults on multi-byte writes with a byte-by-byte +- * copy up to the write-protected page. +- */ +-.E_write_words: +- shll $3, %ecx +- addl %edx, %ecx +- movl %ecx, %edx +- jmp mcsafe_handle_tail +- +- .previous +- +- _ASM_EXTABLE_FAULT(.L_read_leading_bytes, .E_leading_bytes) +- _ASM_EXTABLE_FAULT(.L_read_words, .E_read_words) +- _ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .E_trailing_bytes) +- _ASM_EXTABLE(.L_write_leading_bytes, .E_leading_bytes) +- _ASM_EXTABLE(.L_write_words, .E_write_words) +- _ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes) +-#endif +diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c +index 1847e993ac63a..508c81e97ab10 100644 +--- a/arch/x86/lib/usercopy_64.c ++++ b/arch/x86/lib/usercopy_64.c +@@ -56,27 +56,6 @@ unsigned long clear_user(void __user *to, unsigned long n) + } + EXPORT_SYMBOL(clear_user); + +-/* +- * Similar to copy_user_handle_tail, probe for the write fault point, +- * but reuse __memcpy_mcsafe in case a new read error is encountered. +- * clac() is handled in _copy_to_iter_mcsafe(). +- */ +-__visible notrace unsigned long +-mcsafe_handle_tail(char *to, char *from, unsigned len) +-{ +- for (; len; --len, to++, from++) { +- /* +- * Call the assembly routine back directly since +- * memcpy_mcsafe() may silently fallback to memcpy. +- */ +- unsigned long rem = __memcpy_mcsafe(to, from, 1); +- +- if (rem) +- break; +- } +- return len; +-} +- + #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE + /** + * clean_cache_range - write back a cache range with CLWB +diff --git a/arch/x86/pci/intel_mid_pci.c b/arch/x86/pci/intel_mid_pci.c +index 00c62115f39cd..0aaf31917061d 100644 +--- a/arch/x86/pci/intel_mid_pci.c ++++ b/arch/x86/pci/intel_mid_pci.c +@@ -33,6 +33,7 @@ + #include <asm/hw_irq.h> + #include <asm/io_apic.h> + #include <asm/intel-mid.h> ++#include <asm/acpi.h> + + #define PCIE_CAP_OFFSET 0x100 + +diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c +index c46b9f2e732ff..6e39eda00c2c9 100644 +--- a/arch/x86/xen/enlighten_pv.c ++++ b/arch/x86/xen/enlighten_pv.c +@@ -1438,6 +1438,15 @@ asmlinkage __visible void __init xen_start_kernel(void) + x86_init.mpparse.get_smp_config = x86_init_uint_noop; + + xen_boot_params_init_edd(); ++ ++#ifdef CONFIG_ACPI ++ /* ++ * Disable selecting "Firmware First mode" for correctable ++ * memory errors, as this is the duty of the hypervisor to ++ * decide. ++ */ ++ acpi_disable_cmcff = 1; ++#endif + } + + if (!boot_params.screen_info.orig_video_isVGA) +diff --git a/drivers/ata/ahci.h b/drivers/ata/ahci.h +index d991dd46e89cc..98b8baa47dc5e 100644 +--- a/drivers/ata/ahci.h ++++ b/drivers/ata/ahci.h +@@ -240,6 +240,8 @@ enum { + as default lpm_policy */ + AHCI_HFLAG_SUSPEND_PHYS = (1 << 26), /* handle PHYs during + suspend/resume */ ++ AHCI_HFLAG_IGN_NOTSUPP_POWER_ON = (1 << 27), /* ignore -EOPNOTSUPP ++ from phy_power_on() */ + + /* ap->flags bits */ + +diff --git a/drivers/ata/ahci_mvebu.c b/drivers/ata/ahci_mvebu.c +index d4bba3ace45d7..3ad46d26d9d51 100644 +--- a/drivers/ata/ahci_mvebu.c ++++ b/drivers/ata/ahci_mvebu.c +@@ -227,7 +227,7 @@ static const struct ahci_mvebu_plat_data ahci_mvebu_armada_380_plat_data = { + + static const struct ahci_mvebu_plat_data ahci_mvebu_armada_3700_plat_data = { + .plat_config = ahci_mvebu_armada_3700_config, +- .flags = AHCI_HFLAG_SUSPEND_PHYS, ++ .flags = AHCI_HFLAG_SUSPEND_PHYS | AHCI_HFLAG_IGN_NOTSUPP_POWER_ON, + }; + + static const struct of_device_id ahci_mvebu_of_match[] = { +diff --git a/drivers/ata/libahci_platform.c b/drivers/ata/libahci_platform.c +index 129556fcf6be7..a1cbb894e5f0a 100644 +--- a/drivers/ata/libahci_platform.c ++++ b/drivers/ata/libahci_platform.c +@@ -59,7 +59,7 @@ int ahci_platform_enable_phys(struct ahci_host_priv *hpriv) + } + + rc = phy_power_on(hpriv->phys[i]); +- if (rc) { ++ if (rc && !(rc == -EOPNOTSUPP && (hpriv->flags & AHCI_HFLAG_IGN_NOTSUPP_POWER_ON))) { + phy_exit(hpriv->phys[i]); + goto disable_phys; + } +diff --git a/drivers/ata/sata_rcar.c b/drivers/ata/sata_rcar.c +index 141ac600b64c8..44b0ed8f6bb8a 100644 +--- a/drivers/ata/sata_rcar.c ++++ b/drivers/ata/sata_rcar.c +@@ -120,7 +120,7 @@ + /* Descriptor table word 0 bit (when DTA32M = 1) */ + #define SATA_RCAR_DTEND BIT(0) + +-#define SATA_RCAR_DMA_BOUNDARY 0x1FFFFFFEUL ++#define SATA_RCAR_DMA_BOUNDARY 0x1FFFFFFFUL + + /* Gen2 Physical Layer Control Registers */ + #define RCAR_GEN2_PHY_CTL1_REG 0x1704 +diff --git a/drivers/base/firmware_loader/fallback_platform.c b/drivers/base/firmware_loader/fallback_platform.c +index 685edb7dd05a7..6958ab1a80593 100644 +--- a/drivers/base/firmware_loader/fallback_platform.c ++++ b/drivers/base/firmware_loader/fallback_platform.c +@@ -17,7 +17,7 @@ int firmware_fallback_platform(struct fw_priv *fw_priv, u32 opt_flags) + if (!(opt_flags & FW_OPT_FALLBACK_PLATFORM)) + return -ENOENT; + +- rc = security_kernel_load_data(LOADING_FIRMWARE_EFI_EMBEDDED); ++ rc = security_kernel_load_data(LOADING_FIRMWARE); + if (rc) + return rc; + +diff --git a/drivers/crypto/chelsio/chtls/chtls_cm.c b/drivers/crypto/chelsio/chtls/chtls_cm.c +index bad8e90ba168d..62fbc7df022bc 100644 +--- a/drivers/crypto/chelsio/chtls/chtls_cm.c ++++ b/drivers/crypto/chelsio/chtls/chtls_cm.c +@@ -772,14 +772,13 @@ static int chtls_pass_open_rpl(struct chtls_dev *cdev, struct sk_buff *skb) + if (rpl->status != CPL_ERR_NONE) { + pr_info("Unexpected PASS_OPEN_RPL status %u for STID %u\n", + rpl->status, stid); +- return CPL_RET_BUF_DONE; ++ } else { ++ cxgb4_free_stid(cdev->tids, stid, listen_ctx->lsk->sk_family); ++ sock_put(listen_ctx->lsk); ++ kfree(listen_ctx); ++ module_put(THIS_MODULE); + } +- cxgb4_free_stid(cdev->tids, stid, listen_ctx->lsk->sk_family); +- sock_put(listen_ctx->lsk); +- kfree(listen_ctx); +- module_put(THIS_MODULE); +- +- return 0; ++ return CPL_RET_BUF_DONE; + } + + static int chtls_close_listsrv_rpl(struct chtls_dev *cdev, struct sk_buff *skb) +@@ -796,15 +795,13 @@ static int chtls_close_listsrv_rpl(struct chtls_dev *cdev, struct sk_buff *skb) + if (rpl->status != CPL_ERR_NONE) { + pr_info("Unexpected CLOSE_LISTSRV_RPL status %u for STID %u\n", + rpl->status, stid); +- return CPL_RET_BUF_DONE; ++ } else { ++ cxgb4_free_stid(cdev->tids, stid, listen_ctx->lsk->sk_family); ++ sock_put(listen_ctx->lsk); ++ kfree(listen_ctx); ++ module_put(THIS_MODULE); + } +- +- cxgb4_free_stid(cdev->tids, stid, listen_ctx->lsk->sk_family); +- sock_put(listen_ctx->lsk); +- kfree(listen_ctx); +- module_put(THIS_MODULE); +- +- return 0; ++ return CPL_RET_BUF_DONE; + } + + static void chtls_purge_wr_queue(struct sock *sk) +@@ -1513,7 +1510,6 @@ static void add_to_reap_list(struct sock *sk) + struct chtls_sock *csk = sk->sk_user_data; + + local_bh_disable(); +- bh_lock_sock(sk); + release_tcp_port(sk); /* release the port immediately */ + + spin_lock(&reap_list_lock); +@@ -1522,7 +1518,6 @@ static void add_to_reap_list(struct sock *sk) + if (!csk->passive_reap_next) + schedule_work(&reap_task); + spin_unlock(&reap_list_lock); +- bh_unlock_sock(sk); + local_bh_enable(); + } + +diff --git a/drivers/crypto/chelsio/chtls/chtls_io.c b/drivers/crypto/chelsio/chtls/chtls_io.c +index 9fb5ca6682ea2..188d871f6b8cd 100644 +--- a/drivers/crypto/chelsio/chtls/chtls_io.c ++++ b/drivers/crypto/chelsio/chtls/chtls_io.c +@@ -1585,6 +1585,7 @@ skip_copy: + tp->urg_data = 0; + + if ((avail + offset) >= skb->len) { ++ struct sk_buff *next_skb; + if (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_TLS_HDR) { + tp->copied_seq += skb->len; + hws->rcvpld = skb->hdr_len; +@@ -1595,8 +1596,10 @@ skip_copy: + chtls_free_skb(sk, skb); + buffers_freed++; + hws->copied_seq = 0; +- if (copied >= target && +- !skb_peek(&sk->sk_receive_queue)) ++ next_skb = skb_peek(&sk->sk_receive_queue); ++ if (copied >= target && !next_skb) ++ break; ++ if (ULP_SKB_CB(next_skb)->flags & ULPCB_FLAG_TLS_HDR) + break; + } + } while (len > 0); +diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c +index e5bfac79e5ac9..04f5d79d42653 100644 +--- a/drivers/firmware/efi/libstub/arm64-stub.c ++++ b/drivers/firmware/efi/libstub/arm64-stub.c +@@ -62,10 +62,12 @@ efi_status_t handle_kernel_image(unsigned long *image_addr, + status = efi_get_random_bytes(sizeof(phys_seed), + (u8 *)&phys_seed); + if (status == EFI_NOT_FOUND) { +- efi_info("EFI_RNG_PROTOCOL unavailable, no randomness supplied\n"); ++ efi_info("EFI_RNG_PROTOCOL unavailable, KASLR will be disabled\n"); ++ efi_nokaslr = true; + } else if (status != EFI_SUCCESS) { +- efi_err("efi_get_random_bytes() failed\n"); +- return status; ++ efi_err("efi_get_random_bytes() failed (0x%lx), KASLR will be disabled\n", ++ status); ++ efi_nokaslr = true; + } + } else { + efi_info("KASLR disabled on kernel command line\n"); +diff --git a/drivers/firmware/efi/libstub/fdt.c b/drivers/firmware/efi/libstub/fdt.c +index 11ecf3c4640eb..368cd60000eec 100644 +--- a/drivers/firmware/efi/libstub/fdt.c ++++ b/drivers/firmware/efi/libstub/fdt.c +@@ -136,7 +136,7 @@ static efi_status_t update_fdt(void *orig_fdt, unsigned long orig_fdt_size, + if (status) + goto fdt_set_fail; + +- if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) { ++ if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && !efi_nokaslr) { + efi_status_t efi_status; + + efi_status = efi_get_random_bytes(sizeof(fdt_val64), +@@ -145,8 +145,6 @@ static efi_status_t update_fdt(void *orig_fdt, unsigned long orig_fdt_size, + status = fdt_setprop_var(fdt, node, "kaslr-seed", fdt_val64); + if (status) + goto fdt_set_fail; +- } else if (efi_status != EFI_NOT_FOUND) { +- return efi_status; + } + } + +diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c +index e7532e7d74e91..0e1f11669b072 100644 +--- a/drivers/gpu/drm/i915/i915_debugfs.c ++++ b/drivers/gpu/drm/i915/i915_debugfs.c +@@ -323,6 +323,7 @@ static void print_context_stats(struct seq_file *m, + } + i915_gem_context_unlock_engines(ctx); + ++ mutex_lock(&ctx->mutex); + if (!IS_ERR_OR_NULL(ctx->file_priv)) { + struct file_stats stats = { + .vm = rcu_access_pointer(ctx->vm), +@@ -343,6 +344,7 @@ static void print_context_stats(struct seq_file *m, + + print_file_stats(m, name, stats); + } ++ mutex_unlock(&ctx->mutex); + + spin_lock(&i915->gem.contexts.lock); + list_safe_reset_next(ctx, cn, link); +diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c +index 3a98439bba832..0abce004a9591 100644 +--- a/drivers/infiniband/core/addr.c ++++ b/drivers/infiniband/core/addr.c +@@ -647,13 +647,12 @@ static void process_one_req(struct work_struct *_work) + req->callback = NULL; + + spin_lock_bh(&lock); ++ /* ++ * Although the work will normally have been canceled by the workqueue, ++ * it can still be requeued as long as it is on the req_list. ++ */ ++ cancel_delayed_work(&req->work); + if (!list_empty(&req->list)) { +- /* +- * Although the work will normally have been canceled by the +- * workqueue, it can still be requeued as long as it is on the +- * req_list. +- */ +- cancel_delayed_work(&req->work); + list_del_init(&req->list); + kfree(req); + } +diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c +index 1533419f18758..de467a1303db3 100644 +--- a/drivers/md/dm-writecache.c ++++ b/drivers/md/dm-writecache.c +@@ -49,7 +49,7 @@ do { \ + #define pmem_assign(dest, src) ((dest) = (src)) + #endif + +-#if defined(__HAVE_ARCH_MEMCPY_MCSAFE) && defined(DM_WRITECACHE_HAS_PMEM) ++#if IS_ENABLED(CONFIG_ARCH_HAS_COPY_MC) && defined(DM_WRITECACHE_HAS_PMEM) + #define DM_WRITECACHE_HANDLE_HARDWARE_ERRORS + #endif + +@@ -992,7 +992,8 @@ static void writecache_resume(struct dm_target *ti) + } + wc->freelist_size = 0; + +- r = memcpy_mcsafe(&sb_seq_count, &sb(wc)->seq_count, sizeof(uint64_t)); ++ r = copy_mc_to_kernel(&sb_seq_count, &sb(wc)->seq_count, ++ sizeof(uint64_t)); + if (r) { + writecache_error(wc, r, "hardware memory error when reading superblock: %d", r); + sb_seq_count = cpu_to_le64(0); +@@ -1008,7 +1009,8 @@ static void writecache_resume(struct dm_target *ti) + e->seq_count = -1; + continue; + } +- r = memcpy_mcsafe(&wme, memory_entry(wc, e), sizeof(struct wc_memory_entry)); ++ r = copy_mc_to_kernel(&wme, memory_entry(wc, e), ++ sizeof(struct wc_memory_entry)); + if (r) { + writecache_error(wc, r, "hardware memory error when reading metadata entry %lu: %d", + (unsigned long)b, r); +@@ -1206,7 +1208,7 @@ static void bio_copy_block(struct dm_writecache *wc, struct bio *bio, void *data + + if (rw == READ) { + int r; +- r = memcpy_mcsafe(buf, data, size); ++ r = copy_mc_to_kernel(buf, data, size); + flush_dcache_page(bio_page(bio)); + if (unlikely(r)) { + writecache_error(wc, r, "hardware memory error when reading data: %d", r); +@@ -2349,7 +2351,7 @@ invalid_optional: + } + } + +- r = memcpy_mcsafe(&s, sb(wc), sizeof(struct wc_memory_superblock)); ++ r = copy_mc_to_kernel(&s, sb(wc), sizeof(struct wc_memory_superblock)); + if (r) { + ti->error = "Hardware memory error when reading superblock"; + goto bad; +@@ -2360,7 +2362,8 @@ invalid_optional: + ti->error = "Unable to initialize device"; + goto bad; + } +- r = memcpy_mcsafe(&s, sb(wc), sizeof(struct wc_memory_superblock)); ++ r = copy_mc_to_kernel(&s, sb(wc), ++ sizeof(struct wc_memory_superblock)); + if (r) { + ti->error = "Hardware memory error when reading superblock"; + goto bad; +diff --git a/drivers/misc/cardreader/rtsx_pcr.c b/drivers/misc/cardreader/rtsx_pcr.c +index 82246f7aec6fb..e39b118b945f8 100644 +--- a/drivers/misc/cardreader/rtsx_pcr.c ++++ b/drivers/misc/cardreader/rtsx_pcr.c +@@ -1172,10 +1172,6 @@ void rtsx_pci_init_ocp(struct rtsx_pcr *pcr) + rtsx_pci_write_register(pcr, REG_OCPGLITCH, + SD_OCP_GLITCH_MASK, pcr->hw_param.ocp_glitch); + rtsx_pci_enable_ocp(pcr); +- } else { +- /* OC power down */ +- rtsx_pci_write_register(pcr, FPDCTL, OC_POWER_DOWN, +- OC_POWER_DOWN); + } + } + } +diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c +index 25a9dd9c0c1b5..2ba899f5659ff 100644 +--- a/drivers/misc/cxl/pci.c ++++ b/drivers/misc/cxl/pci.c +@@ -393,8 +393,8 @@ int cxl_calc_capp_routing(struct pci_dev *dev, u64 *chipid, + *capp_unit_id = get_capp_unit_id(np, *phb_index); + of_node_put(np); + if (!*capp_unit_id) { +- pr_err("cxl: invalid capp unit id (phb_index: %d)\n", +- *phb_index); ++ pr_err("cxl: No capp unit found for PHB[%lld,%d]. Make sure the adapter is on a capi-compatible slot\n", ++ *chipid, *phb_index); + return -ENODEV; + } + +diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c +index dd07db656a5c3..f3c125d50d7a0 100644 +--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c ++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c +@@ -1158,16 +1158,6 @@ static void bnxt_queue_sp_work(struct bnxt *bp) + schedule_work(&bp->sp_task); + } + +-static void bnxt_cancel_sp_work(struct bnxt *bp) +-{ +- if (BNXT_PF(bp)) { +- flush_workqueue(bnxt_pf_wq); +- } else { +- cancel_work_sync(&bp->sp_task); +- cancel_delayed_work_sync(&bp->fw_reset_task); +- } +-} +- + static void bnxt_sched_reset(struct bnxt *bp, struct bnxt_rx_ring_info *rxr) + { + if (!rxr->bnapi->in_reset) { +@@ -4198,7 +4188,8 @@ static int bnxt_hwrm_do_send_msg(struct bnxt *bp, void *msg, u32 msg_len, + u32 bar_offset = BNXT_GRCPF_REG_CHIMP_COMM; + u16 dst = BNXT_HWRM_CHNL_CHIMP; + +- if (BNXT_NO_FW_ACCESS(bp)) ++ if (BNXT_NO_FW_ACCESS(bp) && ++ le16_to_cpu(req->req_type) != HWRM_FUNC_RESET) + return -EBUSY; + + if (msg_len > BNXT_HWRM_MAX_REQ_LEN) { +@@ -9247,7 +9238,10 @@ int bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init) + { + int rc = 0; + +- rc = __bnxt_open_nic(bp, irq_re_init, link_re_init); ++ if (test_bit(BNXT_STATE_ABORT_ERR, &bp->state)) ++ rc = -EIO; ++ if (!rc) ++ rc = __bnxt_open_nic(bp, irq_re_init, link_re_init); + if (rc) { + netdev_err(bp->dev, "nic open fail (rc: %x)\n", rc); + dev_close(bp->dev); +@@ -11505,15 +11499,17 @@ static void bnxt_remove_one(struct pci_dev *pdev) + if (BNXT_PF(bp)) + bnxt_sriov_disable(bp); + +- clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state); +- bnxt_cancel_sp_work(bp); +- bp->sp_event = 0; +- +- bnxt_dl_fw_reporters_destroy(bp, true); + if (BNXT_PF(bp)) + devlink_port_type_clear(&bp->dl_port); + pci_disable_pcie_error_reporting(pdev); + unregister_netdev(dev); ++ clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state); ++ /* Flush any pending tasks */ ++ cancel_work_sync(&bp->sp_task); ++ cancel_delayed_work_sync(&bp->fw_reset_task); ++ bp->sp_event = 0; ++ ++ bnxt_dl_fw_reporters_destroy(bp, true); + bnxt_dl_unregister(bp); + bnxt_shutdown_tc(bp); + +@@ -12238,6 +12234,9 @@ static pci_ers_result_t bnxt_io_error_detected(struct pci_dev *pdev, + return PCI_ERS_RESULT_DISCONNECT; + } + ++ if (state == pci_channel_io_frozen) ++ set_bit(BNXT_STATE_PCI_CHANNEL_IO_FROZEN, &bp->state); ++ + if (netif_running(netdev)) + bnxt_close(netdev); + +@@ -12264,7 +12263,7 @@ static pci_ers_result_t bnxt_io_slot_reset(struct pci_dev *pdev) + { + struct net_device *netdev = pci_get_drvdata(pdev); + struct bnxt *bp = netdev_priv(netdev); +- int err = 0; ++ int err = 0, off; + pci_ers_result_t result = PCI_ERS_RESULT_DISCONNECT; + + netdev_info(bp->dev, "PCI Slot Reset\n"); +@@ -12276,6 +12275,20 @@ static pci_ers_result_t bnxt_io_slot_reset(struct pci_dev *pdev) + "Cannot re-enable PCI device after reset.\n"); + } else { + pci_set_master(pdev); ++ /* Upon fatal error, our device internal logic that latches to ++ * BAR value is getting reset and will restore only upon ++ * rewritting the BARs. ++ * ++ * As pci_restore_state() does not re-write the BARs if the ++ * value is same as saved value earlier, driver needs to ++ * write the BARs to 0 to force restore, in case of fatal error. ++ */ ++ if (test_and_clear_bit(BNXT_STATE_PCI_CHANNEL_IO_FROZEN, ++ &bp->state)) { ++ for (off = PCI_BASE_ADDRESS_0; ++ off <= PCI_BASE_ADDRESS_5; off += 4) ++ pci_write_config_dword(bp->pdev, off, 0); ++ } + pci_restore_state(pdev); + pci_save_state(pdev); + +diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h +index 440b43c8068f1..a80ac2ae57a68 100644 +--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h ++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h +@@ -1672,6 +1672,7 @@ struct bnxt { + #define BNXT_STATE_ABORT_ERR 5 + #define BNXT_STATE_FW_FATAL_COND 6 + #define BNXT_STATE_DRV_REGISTERED 7 ++#define BNXT_STATE_PCI_CHANNEL_IO_FROZEN 8 + + #define BNXT_NO_FW_ACCESS(bp) \ + (test_bit(BNXT_STATE_FW_FATAL_COND, &(bp)->state) || \ +diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c +index ff0d82e2535da..fd33c888046b9 100644 +--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c ++++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c +@@ -145,13 +145,13 @@ static int configure_filter_smac(struct adapter *adap, struct filter_entry *f) + int err; + + /* do a set-tcb for smac-sel and CWR bit.. */ +- err = set_tcb_tflag(adap, f, f->tid, TF_CCTRL_CWR_S, 1, 1); +- if (err) +- goto smac_err; +- + err = set_tcb_field(adap, f, f->tid, TCB_SMAC_SEL_W, + TCB_SMAC_SEL_V(TCB_SMAC_SEL_M), + TCB_SMAC_SEL_V(f->smt->idx), 1); ++ if (err) ++ goto smac_err; ++ ++ err = set_tcb_tflag(adap, f, f->tid, TF_CCTRL_CWR_S, 1, 1); + if (!err) + return 0; + +@@ -865,6 +865,7 @@ int set_filter_wr(struct adapter *adapter, int fidx) + FW_FILTER_WR_DIRSTEERHASH_V(f->fs.dirsteerhash) | + FW_FILTER_WR_LPBK_V(f->fs.action == FILTER_SWITCH) | + FW_FILTER_WR_DMAC_V(f->fs.newdmac) | ++ FW_FILTER_WR_SMAC_V(f->fs.newsmac) | + FW_FILTER_WR_INSVLAN_V(f->fs.newvlan == VLAN_INSERT || + f->fs.newvlan == VLAN_REWRITE) | + FW_FILTER_WR_RMVLAN_V(f->fs.newvlan == VLAN_REMOVE || +@@ -882,7 +883,7 @@ int set_filter_wr(struct adapter *adapter, int fidx) + FW_FILTER_WR_OVLAN_VLD_V(f->fs.val.ovlan_vld) | + FW_FILTER_WR_IVLAN_VLDM_V(f->fs.mask.ivlan_vld) | + FW_FILTER_WR_OVLAN_VLDM_V(f->fs.mask.ovlan_vld)); +- fwr->smac_sel = 0; ++ fwr->smac_sel = f->smt->idx; + fwr->rx_chan_rx_rpl_iq = + htons(FW_FILTER_WR_RX_CHAN_V(0) | + FW_FILTER_WR_RX_RPL_IQ_V(adapter->sge.fw_evtq.abs_id)); +@@ -1321,11 +1322,8 @@ static void mk_act_open_req6(struct filter_entry *f, struct sk_buff *skb, + TX_QUEUE_V(f->fs.nat_mode) | + T5_OPT_2_VALID_F | + RX_CHANNEL_V(cxgb4_port_e2cchan(f->dev)) | +- CONG_CNTRL_V((f->fs.action == FILTER_DROP) | +- (f->fs.dirsteer << 1)) | + PACE_V((f->fs.maskhash) | +- ((f->fs.dirsteerhash) << 1)) | +- CCTRL_ECN_V(f->fs.action == FILTER_SWITCH)); ++ ((f->fs.dirsteerhash) << 1))); + } + + static void mk_act_open_req(struct filter_entry *f, struct sk_buff *skb, +@@ -1361,11 +1359,8 @@ static void mk_act_open_req(struct filter_entry *f, struct sk_buff *skb, + TX_QUEUE_V(f->fs.nat_mode) | + T5_OPT_2_VALID_F | + RX_CHANNEL_V(cxgb4_port_e2cchan(f->dev)) | +- CONG_CNTRL_V((f->fs.action == FILTER_DROP) | +- (f->fs.dirsteer << 1)) | + PACE_V((f->fs.maskhash) | +- ((f->fs.dirsteerhash) << 1)) | +- CCTRL_ECN_V(f->fs.action == FILTER_SWITCH)); ++ ((f->fs.dirsteerhash) << 1))); + } + + static int cxgb4_set_hash_filter(struct net_device *dev, +@@ -2037,6 +2032,20 @@ void hash_filter_rpl(struct adapter *adap, const struct cpl_act_open_rpl *rpl) + } + return; + } ++ switch (f->fs.action) { ++ case FILTER_PASS: ++ if (f->fs.dirsteer) ++ set_tcb_tflag(adap, f, tid, ++ TF_DIRECT_STEER_S, 1, 1); ++ break; ++ case FILTER_DROP: ++ set_tcb_tflag(adap, f, tid, TF_DROP_S, 1, 1); ++ break; ++ case FILTER_SWITCH: ++ set_tcb_tflag(adap, f, tid, TF_LPBK_S, 1, 1); ++ break; ++ } ++ + break; + + default: +@@ -2104,22 +2113,11 @@ void filter_rpl(struct adapter *adap, const struct cpl_set_tcb_rpl *rpl) + if (ctx) + ctx->result = 0; + } else if (ret == FW_FILTER_WR_FLT_ADDED) { +- int err = 0; +- +- if (f->fs.newsmac) +- err = configure_filter_smac(adap, f); +- +- if (!err) { +- f->pending = 0; /* async setup completed */ +- f->valid = 1; +- if (ctx) { +- ctx->result = 0; +- ctx->tid = idx; +- } +- } else { +- clear_filter(adap, f); +- if (ctx) +- ctx->result = err; ++ f->pending = 0; /* async setup completed */ ++ f->valid = 1; ++ if (ctx) { ++ ctx->result = 0; ++ ctx->tid = idx; + } + } else { + /* Something went wrong. Issue a warning about the +diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h b/drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h +index 50232e063f49e..92473dda55d9f 100644 +--- a/drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h ++++ b/drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h +@@ -50,6 +50,10 @@ + #define TCB_T_FLAGS_M 0xffffffffffffffffULL + #define TCB_T_FLAGS_V(x) ((__u64)(x) << TCB_T_FLAGS_S) + ++#define TF_DROP_S 22 ++#define TF_DIRECT_STEER_S 23 ++#define TF_LPBK_S 59 ++ + #define TF_CCTRL_ECE_S 60 + #define TF_CCTRL_CWR_S 61 + #define TF_CCTRL_RFR_S 62 +diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +index 9162856de1b19..ab15f1c588b3a 100644 +--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c ++++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +@@ -3146,8 +3146,8 @@ static void hclgevf_uninit_hdev(struct hclgevf_dev *hdev) + hclgevf_uninit_msi(hdev); + } + +- hclgevf_pci_uninit(hdev); + hclgevf_cmd_uninit(hdev); ++ hclgevf_pci_uninit(hdev); + hclgevf_uninit_mac_list(hdev); + } + +diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c +index 7ef3369953b6a..c3ec9ceed833e 100644 +--- a/drivers/net/ethernet/ibm/ibmveth.c ++++ b/drivers/net/ethernet/ibm/ibmveth.c +@@ -1031,12 +1031,6 @@ static int ibmveth_is_packet_unsupported(struct sk_buff *skb, + ret = -EOPNOTSUPP; + } + +- if (!ether_addr_equal(ether_header->h_source, netdev->dev_addr)) { +- netdev_dbg(netdev, "source packet MAC address does not match veth device's, dropping packet.\n"); +- netdev->stats.tx_dropped++; +- ret = -EOPNOTSUPP; +- } +- + return ret; + } + +diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c +index 3e0aab04d86fb..f96bb3dab5a8b 100644 +--- a/drivers/net/ethernet/ibm/ibmvnic.c ++++ b/drivers/net/ethernet/ibm/ibmvnic.c +@@ -1828,9 +1828,13 @@ static int ibmvnic_set_mac(struct net_device *netdev, void *p) + int rc; + + rc = 0; +- ether_addr_copy(adapter->mac_addr, addr->sa_data); +- if (adapter->state != VNIC_PROBED) ++ if (!is_valid_ether_addr(addr->sa_data)) ++ return -EADDRNOTAVAIL; ++ ++ if (adapter->state != VNIC_PROBED) { ++ ether_addr_copy(adapter->mac_addr, addr->sa_data); + rc = __ibmvnic_set_mac(netdev, addr->sa_data); ++ } + + return rc; + } +diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c +index 71b6185b49042..42726fdf5a3af 100644 +--- a/drivers/net/ethernet/mellanox/mlxsw/core.c ++++ b/drivers/net/ethernet/mellanox/mlxsw/core.c +@@ -1483,6 +1483,8 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core, + if (!reload) + devlink_resources_unregister(devlink, NULL); + mlxsw_core->bus->fini(mlxsw_core->bus_priv); ++ if (!reload) ++ devlink_free(devlink); + + return; + +diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c +index b1feef473b746..ed89e669ddd5b 100644 +--- a/drivers/net/ethernet/realtek/r8169_main.c ++++ b/drivers/net/ethernet/realtek/r8169_main.c +@@ -4559,7 +4559,7 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance) + } + + rtl_irq_disable(tp); +- napi_schedule_irqoff(&tp->napi); ++ napi_schedule(&tp->napi); + out: + rtl_ack_events(tp, status); + +@@ -4727,7 +4727,7 @@ static int rtl_open(struct net_device *dev) + rtl_request_firmware(tp); + + retval = request_irq(pci_irq_vector(pdev, 0), rtl8169_interrupt, +- IRQF_NO_THREAD | IRQF_SHARED, dev->name, tp); ++ IRQF_SHARED, dev->name, tp); + if (retval < 0) + goto err_release_fw_2; + +diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c +index 99f7aae102ce1..6c58ba186b2cb 100644 +--- a/drivers/net/ethernet/renesas/ravb_main.c ++++ b/drivers/net/ethernet/renesas/ravb_main.c +@@ -1747,12 +1747,16 @@ static int ravb_hwtstamp_get(struct net_device *ndev, struct ifreq *req) + config.flags = 0; + config.tx_type = priv->tstamp_tx_ctrl ? HWTSTAMP_TX_ON : + HWTSTAMP_TX_OFF; +- if (priv->tstamp_rx_ctrl & RAVB_RXTSTAMP_TYPE_V2_L2_EVENT) ++ switch (priv->tstamp_rx_ctrl & RAVB_RXTSTAMP_TYPE) { ++ case RAVB_RXTSTAMP_TYPE_V2_L2_EVENT: + config.rx_filter = HWTSTAMP_FILTER_PTP_V2_L2_EVENT; +- else if (priv->tstamp_rx_ctrl & RAVB_RXTSTAMP_TYPE_ALL) ++ break; ++ case RAVB_RXTSTAMP_TYPE_ALL: + config.rx_filter = HWTSTAMP_FILTER_ALL; +- else ++ break; ++ default: + config.rx_filter = HWTSTAMP_FILTER_NONE; ++ } + + return copy_to_user(req->ifr_data, &config, sizeof(config)) ? + -EFAULT : 0; +diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c +index 8e47d0112e5dc..10f910f8cbe52 100644 +--- a/drivers/net/gtp.c ++++ b/drivers/net/gtp.c +@@ -663,10 +663,6 @@ static int gtp_newlink(struct net *src_net, struct net_device *dev, + + gtp = netdev_priv(dev); + +- err = gtp_encap_enable(gtp, data); +- if (err < 0) +- return err; +- + if (!data[IFLA_GTP_PDP_HASHSIZE]) { + hashsize = 1024; + } else { +@@ -677,12 +673,16 @@ static int gtp_newlink(struct net *src_net, struct net_device *dev, + + err = gtp_hashtable_new(gtp, hashsize); + if (err < 0) +- goto out_encap; ++ return err; ++ ++ err = gtp_encap_enable(gtp, data); ++ if (err < 0) ++ goto out_hashtable; + + err = register_netdevice(dev); + if (err < 0) { + netdev_dbg(dev, "failed to register new netdev %d\n", err); +- goto out_hashtable; ++ goto out_encap; + } + + gn = net_generic(dev_net(dev), gtp_net_id); +@@ -693,11 +693,11 @@ static int gtp_newlink(struct net *src_net, struct net_device *dev, + + return 0; + ++out_encap: ++ gtp_encap_disable(gtp); + out_hashtable: + kfree(gtp->addr_hash); + kfree(gtp->tid_hash); +-out_encap: +- gtp_encap_disable(gtp); + return err; + } + +diff --git a/drivers/net/ipa/gsi_trans.c b/drivers/net/ipa/gsi_trans.c +index bdbfeed359db3..41e9af35a5820 100644 +--- a/drivers/net/ipa/gsi_trans.c ++++ b/drivers/net/ipa/gsi_trans.c +@@ -398,15 +398,24 @@ void gsi_trans_cmd_add(struct gsi_trans *trans, void *buf, u32 size, + + /* assert(which < trans->tre_count); */ + +- /* Set the page information for the buffer. We also need to fill in +- * the DMA address and length for the buffer (something dma_map_sg() +- * normally does). ++ /* Commands are quite different from data transfer requests. ++ * Their payloads come from a pool whose memory is allocated ++ * using dma_alloc_coherent(). We therefore do *not* map them ++ * for DMA (unlike what we do for pages and skbs). ++ * ++ * When a transaction completes, the SGL is normally unmapped. ++ * A command transaction has direction DMA_NONE, which tells ++ * gsi_trans_complete() to skip the unmapping step. ++ * ++ * The only things we use directly in a command scatter/gather ++ * entry are the DMA address and length. We still need the SG ++ * table flags to be maintained though, so assign a NULL page ++ * pointer for that purpose. + */ + sg = &trans->sgl[which]; +- +- sg_set_buf(sg, buf, size); ++ sg_assign_page(sg, NULL); + sg_dma_address(sg) = addr; +- sg_dma_len(sg) = sg->length; ++ sg_dma_len(sg) = size; + + info = &trans->info[which]; + info->opcode = opcode; +diff --git a/drivers/net/wireless/intersil/p54/p54pci.c b/drivers/net/wireless/intersil/p54/p54pci.c +index 80ad0b7eaef43..f8c6027cab6b4 100644 +--- a/drivers/net/wireless/intersil/p54/p54pci.c ++++ b/drivers/net/wireless/intersil/p54/p54pci.c +@@ -329,10 +329,12 @@ static void p54p_tx(struct ieee80211_hw *dev, struct sk_buff *skb) + struct p54p_desc *desc; + dma_addr_t mapping; + u32 idx, i; ++ __le32 device_addr; + + spin_lock_irqsave(&priv->lock, flags); + idx = le32_to_cpu(ring_control->host_idx[1]); + i = idx % ARRAY_SIZE(ring_control->tx_data); ++ device_addr = ((struct p54_hdr *)skb->data)->req_id; + + mapping = pci_map_single(priv->pdev, skb->data, skb->len, + PCI_DMA_TODEVICE); +@@ -346,7 +348,7 @@ static void p54p_tx(struct ieee80211_hw *dev, struct sk_buff *skb) + + desc = &ring_control->tx_data[i]; + desc->host_addr = cpu_to_le32(mapping); +- desc->device_addr = ((struct p54_hdr *)skb->data)->req_id; ++ desc->device_addr = device_addr; + desc->len = cpu_to_le16(skb->len); + desc->flags = 0; + +diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c +index 45964acba9443..22d865ba6353d 100644 +--- a/drivers/nvdimm/claim.c ++++ b/drivers/nvdimm/claim.c +@@ -268,7 +268,7 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns, + if (rw == READ) { + if (unlikely(is_bad_pmem(&nsio->bb, sector, sz_align))) + return -EIO; +- if (memcpy_mcsafe(buf, nsio->addr + offset, size) != 0) ++ if (copy_mc_to_kernel(buf, nsio->addr + offset, size) != 0) + return -EIO; + return 0; + } +diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c +index d25e66fd942dd..5a4f588605caf 100644 +--- a/drivers/nvdimm/pmem.c ++++ b/drivers/nvdimm/pmem.c +@@ -125,7 +125,7 @@ static blk_status_t read_pmem(struct page *page, unsigned int off, + while (len) { + mem = kmap_atomic(page); + chunk = min_t(unsigned int, len, PAGE_SIZE - off); +- rem = memcpy_mcsafe(mem + off, pmem_addr, chunk); ++ rem = copy_mc_to_kernel(mem + off, pmem_addr, chunk); + kunmap_atomic(mem); + if (rem) + return BLK_STS_IOERR; +@@ -305,7 +305,7 @@ static long pmem_dax_direct_access(struct dax_device *dax_dev, + + /* + * Use the 'no check' versions of copy_from_iter_flushcache() and +- * copy_to_iter_mcsafe() to bypass HARDENED_USERCOPY overhead. Bounds ++ * copy_mc_to_iter() to bypass HARDENED_USERCOPY overhead. Bounds + * checking, both file offset and device offset, is handled by + * dax_iomap_actor() + */ +@@ -318,7 +318,7 @@ static size_t pmem_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, + static size_t pmem_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, + void *addr, size_t bytes, struct iov_iter *i) + { +- return _copy_to_iter_mcsafe(addr, bytes, i); ++ return _copy_mc_to_iter(addr, bytes, i); + } + + static const struct dax_operations pmem_dax_ops = { +diff --git a/drivers/pci/controller/pci-aardvark.c b/drivers/pci/controller/pci-aardvark.c +index d5f58684d962c..c79326e699e82 100644 +--- a/drivers/pci/controller/pci-aardvark.c ++++ b/drivers/pci/controller/pci-aardvark.c +@@ -1068,7 +1068,9 @@ static int advk_pcie_enable_phy(struct advk_pcie *pcie) + } + + ret = phy_power_on(pcie->phy); +- if (ret) { ++ if (ret == -EOPNOTSUPP) { ++ dev_warn(&pcie->pdev->dev, "PHY unsupported by firmware\n"); ++ } else if (ret) { + phy_exit(pcie->phy); + return ret; + } +diff --git a/drivers/phy/marvell/phy-mvebu-a3700-comphy.c b/drivers/phy/marvell/phy-mvebu-a3700-comphy.c +index 1a138be8bd6a0..810f25a476321 100644 +--- a/drivers/phy/marvell/phy-mvebu-a3700-comphy.c ++++ b/drivers/phy/marvell/phy-mvebu-a3700-comphy.c +@@ -26,7 +26,6 @@ + #define COMPHY_SIP_POWER_ON 0x82000001 + #define COMPHY_SIP_POWER_OFF 0x82000002 + #define COMPHY_SIP_PLL_LOCK 0x82000003 +-#define COMPHY_FW_NOT_SUPPORTED (-1) + + #define COMPHY_FW_MODE_SATA 0x1 + #define COMPHY_FW_MODE_SGMII 0x2 +@@ -112,10 +111,19 @@ static int mvebu_a3700_comphy_smc(unsigned long function, unsigned long lane, + unsigned long mode) + { + struct arm_smccc_res res; ++ s32 ret; + + arm_smccc_smc(function, lane, mode, 0, 0, 0, 0, 0, &res); ++ ret = res.a0; + +- return res.a0; ++ switch (ret) { ++ case SMCCC_RET_SUCCESS: ++ return 0; ++ case SMCCC_RET_NOT_SUPPORTED: ++ return -EOPNOTSUPP; ++ default: ++ return -EINVAL; ++ } + } + + static int mvebu_a3700_comphy_get_fw_mode(int lane, int port, +@@ -220,7 +228,7 @@ static int mvebu_a3700_comphy_power_on(struct phy *phy) + } + + ret = mvebu_a3700_comphy_smc(COMPHY_SIP_POWER_ON, lane->id, fw_param); +- if (ret == COMPHY_FW_NOT_SUPPORTED) ++ if (ret == -EOPNOTSUPP) + dev_err(lane->dev, + "unsupported SMC call, try updating your firmware\n"); + +diff --git a/drivers/phy/marvell/phy-mvebu-cp110-comphy.c b/drivers/phy/marvell/phy-mvebu-cp110-comphy.c +index e41367f36ee1c..53ad127b100fe 100644 +--- a/drivers/phy/marvell/phy-mvebu-cp110-comphy.c ++++ b/drivers/phy/marvell/phy-mvebu-cp110-comphy.c +@@ -123,7 +123,6 @@ + + #define COMPHY_SIP_POWER_ON 0x82000001 + #define COMPHY_SIP_POWER_OFF 0x82000002 +-#define COMPHY_FW_NOT_SUPPORTED (-1) + + /* + * A lane is described by the following bitfields: +@@ -273,10 +272,19 @@ static int mvebu_comphy_smc(unsigned long function, unsigned long phys, + unsigned long lane, unsigned long mode) + { + struct arm_smccc_res res; ++ s32 ret; + + arm_smccc_smc(function, phys, lane, mode, 0, 0, 0, 0, &res); ++ ret = res.a0; + +- return res.a0; ++ switch (ret) { ++ case SMCCC_RET_SUCCESS: ++ return 0; ++ case SMCCC_RET_NOT_SUPPORTED: ++ return -EOPNOTSUPP; ++ default: ++ return -EINVAL; ++ } + } + + static int mvebu_comphy_get_mode(bool fw_mode, int lane, int port, +@@ -819,7 +827,7 @@ static int mvebu_comphy_power_on(struct phy *phy) + if (!ret) + return ret; + +- if (ret == COMPHY_FW_NOT_SUPPORTED) ++ if (ret == -EOPNOTSUPP) + dev_err(priv->dev, + "unsupported SMC call, try updating your firmware\n"); + +diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c +index a8d1edcf252c7..64e801a3a0206 100644 +--- a/drivers/tty/serial/amba-pl011.c ++++ b/drivers/tty/serial/amba-pl011.c +@@ -308,8 +308,9 @@ static void pl011_write(unsigned int val, const struct uart_amba_port *uap, + */ + static int pl011_fifo_to_tty(struct uart_amba_port *uap) + { +- u16 status; + unsigned int ch, flag, fifotaken; ++ int sysrq; ++ u16 status; + + for (fifotaken = 0; fifotaken != 256; fifotaken++) { + status = pl011_read(uap, REG_FR); +@@ -344,10 +345,12 @@ static int pl011_fifo_to_tty(struct uart_amba_port *uap) + flag = TTY_FRAME; + } + +- if (uart_handle_sysrq_char(&uap->port, ch & 255)) +- continue; ++ spin_unlock(&uap->port.lock); ++ sysrq = uart_handle_sysrq_char(&uap->port, ch & 255); ++ spin_lock(&uap->port.lock); + +- uart_insert_char(&uap->port, ch, UART011_DR_OE, ch, flag); ++ if (!sysrq) ++ uart_insert_char(&uap->port, ch, UART011_DR_OE, ch, flag); + } + + return fifotaken; +diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c +index ffdf6da016c21..2bb800ca5f0ca 100644 +--- a/drivers/tty/serial/qcom_geni_serial.c ++++ b/drivers/tty/serial/qcom_geni_serial.c +@@ -954,7 +954,7 @@ static void qcom_geni_serial_set_termios(struct uart_port *uport, + sampling_rate = UART_OVERSAMPLING; + /* Sampling rate is halved for IP versions >= 2.5 */ + ver = geni_se_get_qup_hw_version(&port->se); +- if (GENI_SE_VERSION_MAJOR(ver) >= 2 && GENI_SE_VERSION_MINOR(ver) >= 5) ++ if (ver >= QUP_SE_VERSION_2_5) + sampling_rate /= 2; + + clk_rate = get_clk_div_rate(baud, sampling_rate, &clk_div); +diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c +index 64a9025a87bee..1f32db7b72b2c 100644 +--- a/drivers/xen/gntdev.c ++++ b/drivers/xen/gntdev.c +@@ -720,17 +720,18 @@ struct gntdev_copy_batch { + s16 __user *status[GNTDEV_COPY_BATCH]; + unsigned int nr_ops; + unsigned int nr_pages; ++ bool writeable; + }; + + static int gntdev_get_page(struct gntdev_copy_batch *batch, void __user *virt, +- bool writeable, unsigned long *gfn) ++ unsigned long *gfn) + { + unsigned long addr = (unsigned long)virt; + struct page *page; + unsigned long xen_pfn; + int ret; + +- ret = get_user_pages_fast(addr, 1, writeable ? FOLL_WRITE : 0, &page); ++ ret = get_user_pages_fast(addr, 1, batch->writeable ? FOLL_WRITE : 0, &page); + if (ret < 0) + return ret; + +@@ -746,9 +747,13 @@ static void gntdev_put_pages(struct gntdev_copy_batch *batch) + { + unsigned int i; + +- for (i = 0; i < batch->nr_pages; i++) ++ for (i = 0; i < batch->nr_pages; i++) { ++ if (batch->writeable && !PageDirty(batch->pages[i])) ++ set_page_dirty_lock(batch->pages[i]); + put_page(batch->pages[i]); ++ } + batch->nr_pages = 0; ++ batch->writeable = false; + } + + static int gntdev_copy(struct gntdev_copy_batch *batch) +@@ -837,8 +842,9 @@ static int gntdev_grant_copy_seg(struct gntdev_copy_batch *batch, + virt = seg->source.virt + copied; + off = (unsigned long)virt & ~XEN_PAGE_MASK; + len = min(len, (size_t)XEN_PAGE_SIZE - off); ++ batch->writeable = false; + +- ret = gntdev_get_page(batch, virt, false, &gfn); ++ ret = gntdev_get_page(batch, virt, &gfn); + if (ret < 0) + return ret; + +@@ -856,8 +862,9 @@ static int gntdev_grant_copy_seg(struct gntdev_copy_batch *batch, + virt = seg->dest.virt + copied; + off = (unsigned long)virt & ~XEN_PAGE_MASK; + len = min(len, (size_t)XEN_PAGE_SIZE - off); ++ batch->writeable = true; + +- ret = gntdev_get_page(batch, virt, true, &gfn); ++ ret = gntdev_get_page(batch, virt, &gfn); + if (ret < 0) + return ret; + +diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c +index 28bb5689333a5..15880a68faadc 100644 +--- a/fs/efivarfs/super.c ++++ b/fs/efivarfs/super.c +@@ -141,6 +141,9 @@ static int efivarfs_callback(efi_char16_t *name16, efi_guid_t vendor, + + name[len + EFI_VARIABLE_GUID_LEN+1] = '\0'; + ++ /* replace invalid slashes like kobject_set_name_vargs does for /sys/firmware/efi/vars. */ ++ strreplace(name, '/', '!'); ++ + inode = efivarfs_get_inode(sb, d_inode(root), S_IFREG | 0644, 0, + is_removable); + if (!inode) +diff --git a/fs/erofs/xattr.c b/fs/erofs/xattr.c +index 87e437e7b34f2..f86e3247febc1 100644 +--- a/fs/erofs/xattr.c ++++ b/fs/erofs/xattr.c +@@ -473,8 +473,6 @@ static int erofs_xattr_generic_get(const struct xattr_handler *handler, + return -EOPNOTSUPP; + break; + case EROFS_XATTR_INDEX_TRUSTED: +- if (!capable(CAP_SYS_ADMIN)) +- return -EPERM; + break; + case EROFS_XATTR_INDEX_SECURITY: + break; +diff --git a/fs/exec.c b/fs/exec.c +index e6e8a9a703278..78976a3260c6a 100644 +--- a/fs/exec.c ++++ b/fs/exec.c +@@ -62,6 +62,7 @@ + #include <linux/oom.h> + #include <linux/compat.h> + #include <linux/vmalloc.h> ++#include <linux/io_uring.h> + + #include <linux/uaccess.h> + #include <asm/mmu_context.h> +@@ -1847,6 +1848,11 @@ static int __do_execve_file(int fd, struct filename *filename, + * further execve() calls fail. */ + current->flags &= ~PF_NPROC_EXCEEDED; + ++ /* ++ * Cancel any io_uring activity across execve ++ */ ++ io_uring_task_cancel(); ++ + retval = unshare_files(&displaced); + if (retval) + goto out_ret; +diff --git a/fs/file.c b/fs/file.c +index abb8b7081d7a4..8e2c532bb02e3 100644 +--- a/fs/file.c ++++ b/fs/file.c +@@ -18,6 +18,7 @@ + #include <linux/bitops.h> + #include <linux/spinlock.h> + #include <linux/rcupdate.h> ++#include <linux/io_uring.h> + + unsigned int sysctl_nr_open __read_mostly = 1024*1024; + unsigned int sysctl_nr_open_min = BITS_PER_LONG; +@@ -439,6 +440,7 @@ void exit_files(struct task_struct *tsk) + struct files_struct * files = tsk->files; + + if (files) { ++ io_uring_files_cancel(files); + task_lock(tsk); + tsk->files = NULL; + task_unlock(tsk); +diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c +index 02b3c36b36766..5078a6ca7dfcd 100644 +--- a/fs/fuse/dev.c ++++ b/fs/fuse/dev.c +@@ -785,15 +785,16 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep) + struct page *newpage; + struct pipe_buffer *buf = cs->pipebufs; + ++ get_page(oldpage); + err = unlock_request(cs->req); + if (err) +- return err; ++ goto out_put_old; + + fuse_copy_finish(cs); + + err = pipe_buf_confirm(cs->pipe, buf); + if (err) +- return err; ++ goto out_put_old; + + BUG_ON(!cs->nr_segs); + cs->currbuf = buf; +@@ -833,7 +834,7 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep) + err = replace_page_cache_page(oldpage, newpage, GFP_KERNEL); + if (err) { + unlock_page(newpage); +- return err; ++ goto out_put_old; + } + + get_page(newpage); +@@ -852,14 +853,19 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep) + if (err) { + unlock_page(newpage); + put_page(newpage); +- return err; ++ goto out_put_old; + } + + unlock_page(oldpage); ++ /* Drop ref for ap->pages[] array */ + put_page(oldpage); + cs->len = 0; + +- return 0; ++ err = 0; ++out_put_old: ++ /* Drop ref obtained in this function */ ++ put_page(oldpage); ++ return err; + + out_fallback_unlock: + unlock_page(newpage); +@@ -868,10 +874,10 @@ out_fallback: + cs->offset = buf->offset; + + err = lock_request(cs->req); +- if (err) +- return err; ++ if (!err) ++ err = 1; + +- return 1; ++ goto out_put_old; + } + + static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page, +@@ -883,14 +889,16 @@ static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page, + if (cs->nr_segs >= cs->pipe->max_usage) + return -EIO; + ++ get_page(page); + err = unlock_request(cs->req); +- if (err) ++ if (err) { ++ put_page(page); + return err; ++ } + + fuse_copy_finish(cs); + + buf = cs->pipebufs; +- get_page(page); + buf->page = page; + buf->offset = offset; + buf->len = count; +diff --git a/fs/io-wq.c b/fs/io-wq.c +index cb9e5a444fba7..56a229621a831 100644 +--- a/fs/io-wq.c ++++ b/fs/io-wq.c +@@ -60,6 +60,7 @@ struct io_worker { + const struct cred *cur_creds; + const struct cred *saved_creds; + struct files_struct *restore_files; ++ struct nsproxy *restore_nsproxy; + struct fs_struct *restore_fs; + }; + +@@ -87,7 +88,7 @@ enum { + */ + struct io_wqe { + struct { +- spinlock_t lock; ++ raw_spinlock_t lock; + struct io_wq_work_list work_list; + unsigned long hash_map; + unsigned flags; +@@ -148,11 +149,12 @@ static bool __io_worker_unuse(struct io_wqe *wqe, struct io_worker *worker) + + if (current->files != worker->restore_files) { + __acquire(&wqe->lock); +- spin_unlock_irq(&wqe->lock); ++ raw_spin_unlock_irq(&wqe->lock); + dropped_lock = true; + + task_lock(current); + current->files = worker->restore_files; ++ current->nsproxy = worker->restore_nsproxy; + task_unlock(current); + } + +@@ -166,7 +168,7 @@ static bool __io_worker_unuse(struct io_wqe *wqe, struct io_worker *worker) + if (worker->mm) { + if (!dropped_lock) { + __acquire(&wqe->lock); +- spin_unlock_irq(&wqe->lock); ++ raw_spin_unlock_irq(&wqe->lock); + dropped_lock = true; + } + __set_current_state(TASK_RUNNING); +@@ -200,7 +202,6 @@ static void io_worker_exit(struct io_worker *worker) + { + struct io_wqe *wqe = worker->wqe; + struct io_wqe_acct *acct = io_wqe_get_acct(wqe, worker); +- unsigned nr_workers; + + /* + * If we're not at zero, someone else is holding a brief reference +@@ -220,23 +221,19 @@ static void io_worker_exit(struct io_worker *worker) + worker->flags = 0; + preempt_enable(); + +- spin_lock_irq(&wqe->lock); ++ raw_spin_lock_irq(&wqe->lock); + hlist_nulls_del_rcu(&worker->nulls_node); + list_del_rcu(&worker->all_list); + if (__io_worker_unuse(wqe, worker)) { + __release(&wqe->lock); +- spin_lock_irq(&wqe->lock); ++ raw_spin_lock_irq(&wqe->lock); + } + acct->nr_workers--; +- nr_workers = wqe->acct[IO_WQ_ACCT_BOUND].nr_workers + +- wqe->acct[IO_WQ_ACCT_UNBOUND].nr_workers; +- spin_unlock_irq(&wqe->lock); +- +- /* all workers gone, wq exit can proceed */ +- if (!nr_workers && refcount_dec_and_test(&wqe->wq->refs)) +- complete(&wqe->wq->done); ++ raw_spin_unlock_irq(&wqe->lock); + + kfree_rcu(worker, rcu); ++ if (refcount_dec_and_test(&wqe->wq->refs)) ++ complete(&wqe->wq->done); + } + + static inline bool io_wqe_run_queue(struct io_wqe *wqe) +@@ -318,6 +315,7 @@ static void io_worker_start(struct io_wqe *wqe, struct io_worker *worker) + + worker->flags |= (IO_WORKER_F_UP | IO_WORKER_F_RUNNING); + worker->restore_files = current->files; ++ worker->restore_nsproxy = current->nsproxy; + worker->restore_fs = current->fs; + io_wqe_inc_running(wqe, worker); + } +@@ -454,6 +452,7 @@ static void io_impersonate_work(struct io_worker *worker, + if (work->files && current->files != work->files) { + task_lock(current); + current->files = work->files; ++ current->nsproxy = work->nsproxy; + task_unlock(current); + } + if (work->fs && current->fs != work->fs) +@@ -504,7 +503,7 @@ get_next: + else if (!wq_list_empty(&wqe->work_list)) + wqe->flags |= IO_WQE_FLAG_STALLED; + +- spin_unlock_irq(&wqe->lock); ++ raw_spin_unlock_irq(&wqe->lock); + if (!work) + break; + io_assign_current_work(worker, work); +@@ -539,7 +538,7 @@ get_next: + io_wqe_enqueue(wqe, linked); + + if (hash != -1U && !next_hashed) { +- spin_lock_irq(&wqe->lock); ++ raw_spin_lock_irq(&wqe->lock); + wqe->hash_map &= ~BIT_ULL(hash); + wqe->flags &= ~IO_WQE_FLAG_STALLED; + /* dependent work is not hashed */ +@@ -547,11 +546,11 @@ get_next: + /* skip unnecessary unlock-lock wqe->lock */ + if (!work) + goto get_next; +- spin_unlock_irq(&wqe->lock); ++ raw_spin_unlock_irq(&wqe->lock); + } + } while (work); + +- spin_lock_irq(&wqe->lock); ++ raw_spin_lock_irq(&wqe->lock); + } while (1); + } + +@@ -566,7 +565,7 @@ static int io_wqe_worker(void *data) + while (!test_bit(IO_WQ_BIT_EXIT, &wq->state)) { + set_current_state(TASK_INTERRUPTIBLE); + loop: +- spin_lock_irq(&wqe->lock); ++ raw_spin_lock_irq(&wqe->lock); + if (io_wqe_run_queue(wqe)) { + __set_current_state(TASK_RUNNING); + io_worker_handle_work(worker); +@@ -577,7 +576,7 @@ loop: + __release(&wqe->lock); + goto loop; + } +- spin_unlock_irq(&wqe->lock); ++ raw_spin_unlock_irq(&wqe->lock); + if (signal_pending(current)) + flush_signals(current); + if (schedule_timeout(WORKER_IDLE_TIMEOUT)) +@@ -589,11 +588,11 @@ loop: + } + + if (test_bit(IO_WQ_BIT_EXIT, &wq->state)) { +- spin_lock_irq(&wqe->lock); ++ raw_spin_lock_irq(&wqe->lock); + if (!wq_list_empty(&wqe->work_list)) + io_worker_handle_work(worker); + else +- spin_unlock_irq(&wqe->lock); ++ raw_spin_unlock_irq(&wqe->lock); + } + + io_worker_exit(worker); +@@ -633,14 +632,14 @@ void io_wq_worker_sleeping(struct task_struct *tsk) + + worker->flags &= ~IO_WORKER_F_RUNNING; + +- spin_lock_irq(&wqe->lock); ++ raw_spin_lock_irq(&wqe->lock); + io_wqe_dec_running(wqe, worker); +- spin_unlock_irq(&wqe->lock); ++ raw_spin_unlock_irq(&wqe->lock); + } + + static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index) + { +- struct io_wqe_acct *acct =&wqe->acct[index]; ++ struct io_wqe_acct *acct = &wqe->acct[index]; + struct io_worker *worker; + + worker = kzalloc_node(sizeof(*worker), GFP_KERNEL, wqe->node); +@@ -659,7 +658,7 @@ static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index) + return false; + } + +- spin_lock_irq(&wqe->lock); ++ raw_spin_lock_irq(&wqe->lock); + hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list); + list_add_tail_rcu(&worker->all_list, &wqe->all_list); + worker->flags |= IO_WORKER_F_FREE; +@@ -668,11 +667,12 @@ static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index) + if (!acct->nr_workers && (worker->flags & IO_WORKER_F_BOUND)) + worker->flags |= IO_WORKER_F_FIXED; + acct->nr_workers++; +- spin_unlock_irq(&wqe->lock); ++ raw_spin_unlock_irq(&wqe->lock); + + if (index == IO_WQ_ACCT_UNBOUND) + atomic_inc(&wq->user->processes); + ++ refcount_inc(&wq->refs); + wake_up_process(worker->task); + return true; + } +@@ -688,28 +688,63 @@ static inline bool io_wqe_need_worker(struct io_wqe *wqe, int index) + return acct->nr_workers < acct->max_workers; + } + ++static bool io_wqe_worker_send_sig(struct io_worker *worker, void *data) ++{ ++ send_sig(SIGINT, worker->task, 1); ++ return false; ++} ++ ++/* ++ * Iterate the passed in list and call the specific function for each ++ * worker that isn't exiting ++ */ ++static bool io_wq_for_each_worker(struct io_wqe *wqe, ++ bool (*func)(struct io_worker *, void *), ++ void *data) ++{ ++ struct io_worker *worker; ++ bool ret = false; ++ ++ list_for_each_entry_rcu(worker, &wqe->all_list, all_list) { ++ if (io_worker_get(worker)) { ++ /* no task if node is/was offline */ ++ if (worker->task) ++ ret = func(worker, data); ++ io_worker_release(worker); ++ if (ret) ++ break; ++ } ++ } ++ ++ return ret; ++} ++ ++static bool io_wq_worker_wake(struct io_worker *worker, void *data) ++{ ++ wake_up_process(worker->task); ++ return false; ++} ++ + /* + * Manager thread. Tasked with creating new workers, if we need them. + */ + static int io_wq_manager(void *data) + { + struct io_wq *wq = data; +- int workers_to_create = num_possible_nodes(); + int node; + + /* create fixed workers */ +- refcount_set(&wq->refs, workers_to_create); ++ refcount_set(&wq->refs, 1); + for_each_node(node) { + if (!node_online(node)) + continue; +- if (!create_io_worker(wq, wq->wqes[node], IO_WQ_ACCT_BOUND)) +- goto err; +- workers_to_create--; ++ if (create_io_worker(wq, wq->wqes[node], IO_WQ_ACCT_BOUND)) ++ continue; ++ set_bit(IO_WQ_BIT_ERROR, &wq->state); ++ set_bit(IO_WQ_BIT_EXIT, &wq->state); ++ goto out; + } + +- while (workers_to_create--) +- refcount_dec(&wq->refs); +- + complete(&wq->done); + + while (!kthread_should_stop()) { +@@ -723,12 +758,12 @@ static int io_wq_manager(void *data) + if (!node_online(node)) + continue; + +- spin_lock_irq(&wqe->lock); ++ raw_spin_lock_irq(&wqe->lock); + if (io_wqe_need_worker(wqe, IO_WQ_ACCT_BOUND)) + fork_worker[IO_WQ_ACCT_BOUND] = true; + if (io_wqe_need_worker(wqe, IO_WQ_ACCT_UNBOUND)) + fork_worker[IO_WQ_ACCT_UNBOUND] = true; +- spin_unlock_irq(&wqe->lock); ++ raw_spin_unlock_irq(&wqe->lock); + if (fork_worker[IO_WQ_ACCT_BOUND]) + create_io_worker(wq, wqe, IO_WQ_ACCT_BOUND); + if (fork_worker[IO_WQ_ACCT_UNBOUND]) +@@ -741,12 +776,18 @@ static int io_wq_manager(void *data) + if (current->task_works) + task_work_run(); + +- return 0; +-err: +- set_bit(IO_WQ_BIT_ERROR, &wq->state); +- set_bit(IO_WQ_BIT_EXIT, &wq->state); +- if (refcount_sub_and_test(workers_to_create, &wq->refs)) ++out: ++ if (refcount_dec_and_test(&wq->refs)) { + complete(&wq->done); ++ return 0; ++ } ++ /* if ERROR is set and we get here, we have workers to wake */ ++ if (test_bit(IO_WQ_BIT_ERROR, &wq->state)) { ++ rcu_read_lock(); ++ for_each_node(node) ++ io_wq_for_each_worker(wq->wqes[node], io_wq_worker_wake, NULL); ++ rcu_read_unlock(); ++ } + return 0; + } + +@@ -825,10 +866,10 @@ static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work) + } + + work_flags = work->flags; +- spin_lock_irqsave(&wqe->lock, flags); ++ raw_spin_lock_irqsave(&wqe->lock, flags); + io_wqe_insert_work(wqe, work); + wqe->flags &= ~IO_WQE_FLAG_STALLED; +- spin_unlock_irqrestore(&wqe->lock, flags); ++ raw_spin_unlock_irqrestore(&wqe->lock, flags); + + if ((work_flags & IO_WQ_WORK_CONCURRENT) || + !atomic_read(&acct->nr_running)) +@@ -854,37 +895,6 @@ void io_wq_hash_work(struct io_wq_work *work, void *val) + work->flags |= (IO_WQ_WORK_HASHED | (bit << IO_WQ_HASH_SHIFT)); + } + +-static bool io_wqe_worker_send_sig(struct io_worker *worker, void *data) +-{ +- send_sig(SIGINT, worker->task, 1); +- return false; +-} +- +-/* +- * Iterate the passed in list and call the specific function for each +- * worker that isn't exiting +- */ +-static bool io_wq_for_each_worker(struct io_wqe *wqe, +- bool (*func)(struct io_worker *, void *), +- void *data) +-{ +- struct io_worker *worker; +- bool ret = false; +- +- list_for_each_entry_rcu(worker, &wqe->all_list, all_list) { +- if (io_worker_get(worker)) { +- /* no task if node is/was offline */ +- if (worker->task) +- ret = func(worker, data); +- io_worker_release(worker); +- if (ret) +- break; +- } +- } +- +- return ret; +-} +- + void io_wq_cancel_all(struct io_wq *wq) + { + int node; +@@ -955,13 +965,13 @@ static void io_wqe_cancel_pending_work(struct io_wqe *wqe, + unsigned long flags; + + retry: +- spin_lock_irqsave(&wqe->lock, flags); ++ raw_spin_lock_irqsave(&wqe->lock, flags); + wq_list_for_each(node, prev, &wqe->work_list) { + work = container_of(node, struct io_wq_work, list); + if (!match->fn(work, match->data)) + continue; + io_wqe_remove_pending(wqe, work, prev); +- spin_unlock_irqrestore(&wqe->lock, flags); ++ raw_spin_unlock_irqrestore(&wqe->lock, flags); + io_run_cancel(work, wqe); + match->nr_pending++; + if (!match->cancel_all) +@@ -970,7 +980,7 @@ retry: + /* not safe to continue after unlock */ + goto retry; + } +- spin_unlock_irqrestore(&wqe->lock, flags); ++ raw_spin_unlock_irqrestore(&wqe->lock, flags); + } + + static void io_wqe_cancel_running_work(struct io_wqe *wqe, +@@ -1078,7 +1088,7 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data) + } + atomic_set(&wqe->acct[IO_WQ_ACCT_UNBOUND].nr_running, 0); + wqe->wq = wq; +- spin_lock_init(&wqe->lock); ++ raw_spin_lock_init(&wqe->lock); + INIT_WQ_LIST(&wqe->work_list); + INIT_HLIST_NULLS_HEAD(&wqe->free_list, 0); + INIT_LIST_HEAD(&wqe->all_list); +@@ -1117,12 +1127,6 @@ bool io_wq_get(struct io_wq *wq, struct io_wq_data *data) + return refcount_inc_not_zero(&wq->use_refs); + } + +-static bool io_wq_worker_wake(struct io_worker *worker, void *data) +-{ +- wake_up_process(worker->task); +- return false; +-} +- + static void __io_wq_destroy(struct io_wq *wq) + { + int node; +diff --git a/fs/io-wq.h b/fs/io-wq.h +index 071f1a9978002..9be6def2b5a6f 100644 +--- a/fs/io-wq.h ++++ b/fs/io-wq.h +@@ -88,6 +88,7 @@ struct io_wq_work { + struct files_struct *files; + struct mm_struct *mm; + const struct cred *creds; ++ struct nsproxy *nsproxy; + struct fs_struct *fs; + unsigned flags; + }; +diff --git a/fs/io_uring.c b/fs/io_uring.c +index d2bb2ae9551f0..8e9c58fa76362 100644 +--- a/fs/io_uring.c ++++ b/fs/io_uring.c +@@ -78,6 +78,7 @@ + #include <linux/fs_struct.h> + #include <linux/splice.h> + #include <linux/task_work.h> ++#include <linux/io_uring.h> + + #define CREATE_TRACE_POINTS + #include <trace/events/io_uring.h> +@@ -264,7 +265,16 @@ struct io_ring_ctx { + /* IO offload */ + struct io_wq *io_wq; + struct task_struct *sqo_thread; /* if using sq thread polling */ +- struct mm_struct *sqo_mm; ++ ++ /* ++ * For SQPOLL usage - we hold a reference to the parent task, so we ++ * have access to the ->files ++ */ ++ struct task_struct *sqo_task; ++ ++ /* Only used for accounting purposes */ ++ struct mm_struct *mm_account; ++ + wait_queue_head_t sqo_wait; + + /* +@@ -274,8 +284,6 @@ struct io_ring_ctx { + */ + struct fixed_file_data *file_data; + unsigned nr_user_files; +- int ring_fd; +- struct file *ring_file; + + /* if used, fixed mapped user buffers */ + unsigned nr_user_bufs; +@@ -541,7 +549,6 @@ enum { + REQ_F_NO_FILE_TABLE_BIT, + REQ_F_QUEUE_TIMEOUT_BIT, + REQ_F_WORK_INITIALIZED_BIT, +- REQ_F_TASK_PINNED_BIT, + + /* not a real bit, just to check we're not overflowing the space */ + __REQ_F_LAST_BIT, +@@ -599,8 +606,6 @@ enum { + REQ_F_QUEUE_TIMEOUT = BIT(REQ_F_QUEUE_TIMEOUT_BIT), + /* io_wq_work is initialized */ + REQ_F_WORK_INITIALIZED = BIT(REQ_F_WORK_INITIALIZED_BIT), +- /* req->task is refcounted */ +- REQ_F_TASK_PINNED = BIT(REQ_F_TASK_PINNED_BIT), + }; + + struct async_poll { +@@ -915,21 +920,6 @@ struct sock *io_uring_get_socket(struct file *file) + } + EXPORT_SYMBOL(io_uring_get_socket); + +-static void io_get_req_task(struct io_kiocb *req) +-{ +- if (req->flags & REQ_F_TASK_PINNED) +- return; +- get_task_struct(req->task); +- req->flags |= REQ_F_TASK_PINNED; +-} +- +-/* not idempotent -- it doesn't clear REQ_F_TASK_PINNED */ +-static void __io_put_req_task(struct io_kiocb *req) +-{ +- if (req->flags & REQ_F_TASK_PINNED) +- put_task_struct(req->task); +-} +- + static void io_file_put_work(struct work_struct *work); + + /* +@@ -1141,14 +1131,34 @@ static void io_kill_timeout(struct io_kiocb *req) + } + } + +-static void io_kill_timeouts(struct io_ring_ctx *ctx) ++static bool io_task_match(struct io_kiocb *req, struct task_struct *tsk) ++{ ++ struct io_ring_ctx *ctx = req->ctx; ++ ++ if (!tsk || req->task == tsk) ++ return true; ++ if ((ctx->flags & IORING_SETUP_SQPOLL) && req->task == ctx->sqo_thread) ++ return true; ++ return false; ++} ++ ++/* ++ * Returns true if we found and killed one or more timeouts ++ */ ++static bool io_kill_timeouts(struct io_ring_ctx *ctx, struct task_struct *tsk) + { + struct io_kiocb *req, *tmp; ++ int canceled = 0; + + spin_lock_irq(&ctx->completion_lock); +- list_for_each_entry_safe(req, tmp, &ctx->timeout_list, list) +- io_kill_timeout(req); ++ list_for_each_entry_safe(req, tmp, &ctx->timeout_list, list) { ++ if (io_task_match(req, tsk)) { ++ io_kill_timeout(req); ++ canceled++; ++ } ++ } + spin_unlock_irq(&ctx->completion_lock); ++ return canceled != 0; + } + + static void __io_queue_deferred(struct io_ring_ctx *ctx) +@@ -1229,12 +1239,24 @@ static void io_cqring_ev_posted(struct io_ring_ctx *ctx) + eventfd_signal(ctx->cq_ev_fd, 1); + } + ++static inline bool io_match_files(struct io_kiocb *req, ++ struct files_struct *files) ++{ ++ if (!files) ++ return true; ++ if (req->flags & REQ_F_WORK_INITIALIZED) ++ return req->work.files == files; ++ return false; ++} ++ + /* Returns true if there are no backlogged entries after the flush */ +-static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force) ++static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force, ++ struct task_struct *tsk, ++ struct files_struct *files) + { + struct io_rings *rings = ctx->rings; ++ struct io_kiocb *req, *tmp; + struct io_uring_cqe *cqe; +- struct io_kiocb *req; + unsigned long flags; + LIST_HEAD(list); + +@@ -1253,7 +1275,12 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force) + ctx->cq_overflow_flushed = 1; + + cqe = NULL; +- while (!list_empty(&ctx->cq_overflow_list)) { ++ list_for_each_entry_safe(req, tmp, &ctx->cq_overflow_list, list) { ++ if (tsk && req->task != tsk) ++ continue; ++ if (!io_match_files(req, files)) ++ continue; ++ + cqe = io_get_cqring(ctx); + if (!cqe && !force) + break; +@@ -1307,7 +1334,12 @@ static void __io_cqring_fill_event(struct io_kiocb *req, long res, long cflags) + WRITE_ONCE(cqe->user_data, req->user_data); + WRITE_ONCE(cqe->res, res); + WRITE_ONCE(cqe->flags, cflags); +- } else if (ctx->cq_overflow_flushed) { ++ } else if (ctx->cq_overflow_flushed || req->task->io_uring->in_idle) { ++ /* ++ * If we're in ring overflow flush mode, or in task cancel mode, ++ * then we cannot store the request for later flushing, we need ++ * to drop it on the floor. ++ */ + WRITE_ONCE(ctx->rings->cq_overflow, + atomic_inc_return(&ctx->cached_cq_overflow)); + } else { +@@ -1412,15 +1444,35 @@ static inline void io_put_file(struct io_kiocb *req, struct file *file, + fput(file); + } + ++static void io_req_drop_files(struct io_kiocb *req) ++{ ++ struct io_ring_ctx *ctx = req->ctx; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&ctx->inflight_lock, flags); ++ list_del(&req->inflight_entry); ++ if (waitqueue_active(&ctx->inflight_wait)) ++ wake_up(&ctx->inflight_wait); ++ spin_unlock_irqrestore(&ctx->inflight_lock, flags); ++ req->flags &= ~REQ_F_INFLIGHT; ++ put_files_struct(req->work.files); ++ put_nsproxy(req->work.nsproxy); ++ req->work.files = NULL; ++} ++ + static void __io_req_aux_free(struct io_kiocb *req) + { ++ struct io_uring_task *tctx = req->task->io_uring; + if (req->flags & REQ_F_NEED_CLEANUP) + io_cleanup_req(req); + + kfree(req->io); + if (req->file) + io_put_file(req, req->file, (req->flags & REQ_F_FIXED_FILE)); +- __io_put_req_task(req); ++ atomic_long_inc(&tctx->req_complete); ++ if (tctx->in_idle) ++ wake_up(&tctx->wait); ++ put_task_struct(req->task); + io_req_work_drop_env(req); + } + +@@ -1428,16 +1480,8 @@ static void __io_free_req(struct io_kiocb *req) + { + __io_req_aux_free(req); + +- if (req->flags & REQ_F_INFLIGHT) { +- struct io_ring_ctx *ctx = req->ctx; +- unsigned long flags; +- +- spin_lock_irqsave(&ctx->inflight_lock, flags); +- list_del(&req->inflight_entry); +- if (waitqueue_active(&ctx->inflight_wait)) +- wake_up(&ctx->inflight_wait); +- spin_unlock_irqrestore(&ctx->inflight_lock, flags); +- } ++ if (req->flags & REQ_F_INFLIGHT) ++ io_req_drop_files(req); + + percpu_ref_put(&req->ctx->refs); + if (likely(!io_is_fallback_req(req))) +@@ -1717,7 +1761,7 @@ static unsigned io_cqring_events(struct io_ring_ctx *ctx, bool noflush) + if (noflush && !list_empty(&ctx->cq_overflow_list)) + return -1U; + +- io_cqring_overflow_flush(ctx, false); ++ io_cqring_overflow_flush(ctx, false, NULL, NULL); + } + + /* See comment at the top of this file */ +@@ -1738,7 +1782,7 @@ static inline bool io_req_multi_free(struct req_batch *rb, struct io_kiocb *req) + if ((req->flags & REQ_F_LINK_HEAD) || io_is_fallback_req(req)) + return false; + +- if (req->file || req->io) ++ if (req->file || req->io || req->task) + rb->need_iter++; + + rb->reqs[rb->to_free++] = req; +@@ -1762,6 +1806,12 @@ static int io_put_kbuf(struct io_kiocb *req) + + static inline bool io_run_task_work(void) + { ++ /* ++ * Not safe to run on exiting task, and the task_work handling will ++ * not add work to such a task. ++ */ ++ if (unlikely(current->flags & PF_EXITING)) ++ return false; + if (current->task_works) { + __set_current_state(TASK_RUNNING); + task_work_run(); +@@ -3492,8 +3542,7 @@ static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) + return -EBADF; + + req->close.fd = READ_ONCE(sqe->fd); +- if ((req->file && req->file->f_op == &io_uring_fops) || +- req->close.fd == req->ctx->ring_fd) ++ if ((req->file && req->file->f_op == &io_uring_fops)) + return -EBADF; + + req->close.put_file = NULL; +@@ -4397,9 +4446,10 @@ static int io_sq_thread_acquire_mm(struct io_ring_ctx *ctx, + { + if (io_op_defs[req->opcode].needs_mm && !current->mm) { + if (unlikely(!(ctx->flags & IORING_SETUP_SQPOLL) || +- !mmget_not_zero(ctx->sqo_mm))) ++ !ctx->sqo_task->mm || ++ !mmget_not_zero(ctx->sqo_task->mm))) + return -EFAULT; +- kthread_use_mm(ctx->sqo_mm); ++ kthread_use_mm(ctx->sqo_task->mm); + } + + return 0; +@@ -4550,7 +4600,6 @@ static bool io_arm_poll_handler(struct io_kiocb *req) + if (req->flags & REQ_F_WORK_INITIALIZED) + memcpy(&apoll->work, &req->work, sizeof(req->work)); + +- io_get_req_task(req); + req->apoll = apoll; + INIT_HLIST_NODE(&req->hash_node); + +@@ -4635,7 +4684,10 @@ static bool io_poll_remove_one(struct io_kiocb *req) + return do_complete; + } + +-static void io_poll_remove_all(struct io_ring_ctx *ctx) ++/* ++ * Returns true if we found and killed one or more poll requests ++ */ ++static bool io_poll_remove_all(struct io_ring_ctx *ctx, struct task_struct *tsk) + { + struct hlist_node *tmp; + struct io_kiocb *req; +@@ -4646,13 +4698,17 @@ static void io_poll_remove_all(struct io_ring_ctx *ctx) + struct hlist_head *list; + + list = &ctx->cancel_hash[i]; +- hlist_for_each_entry_safe(req, tmp, list, hash_node) +- posted += io_poll_remove_one(req); ++ hlist_for_each_entry_safe(req, tmp, list, hash_node) { ++ if (io_task_match(req, tsk)) ++ posted += io_poll_remove_one(req); ++ } + } + spin_unlock_irq(&ctx->completion_lock); + + if (posted) + io_cqring_ev_posted(ctx); ++ ++ return posted != 0; + } + + static int io_poll_cancel(struct io_ring_ctx *ctx, __u64 sqe_addr) +@@ -4738,8 +4794,6 @@ static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe + + events = READ_ONCE(sqe->poll_events); + poll->events = demangle_poll(events) | EPOLLERR | EPOLLHUP; +- +- io_get_req_task(req); + return 0; + } + +@@ -5626,32 +5680,20 @@ static int io_req_set_file(struct io_submit_state *state, struct io_kiocb *req, + + static int io_grab_files(struct io_kiocb *req) + { +- int ret = -EBADF; + struct io_ring_ctx *ctx = req->ctx; + + if (req->work.files || (req->flags & REQ_F_NO_FILE_TABLE)) + return 0; +- if (!ctx->ring_file) +- return -EBADF; + +- rcu_read_lock(); ++ req->work.files = get_files_struct(current); ++ get_nsproxy(current->nsproxy); ++ req->work.nsproxy = current->nsproxy; ++ req->flags |= REQ_F_INFLIGHT; ++ + spin_lock_irq(&ctx->inflight_lock); +- /* +- * We use the f_ops->flush() handler to ensure that we can flush +- * out work accessing these files if the fd is closed. Check if +- * the fd has changed since we started down this path, and disallow +- * this operation if it has. +- */ +- if (fcheck(ctx->ring_fd) == ctx->ring_file) { +- list_add(&req->inflight_entry, &ctx->inflight_list); +- req->flags |= REQ_F_INFLIGHT; +- req->work.files = current->files; +- ret = 0; +- } ++ list_add(&req->inflight_entry, &ctx->inflight_list); + spin_unlock_irq(&ctx->inflight_lock); +- rcu_read_unlock(); +- +- return ret; ++ return 0; + } + + static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer) +@@ -6021,6 +6063,8 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, + /* one is dropped after submission, the other at completion */ + refcount_set(&req->refs, 2); + req->task = current; ++ get_task_struct(req->task); ++ atomic_long_inc(&req->task->io_uring->req_issue); + req->result = 0; + + if (unlikely(req->opcode >= IORING_OP_LAST)) +@@ -6056,8 +6100,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, + return io_req_set_file(state, req, READ_ONCE(sqe->fd)); + } + +-static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr, +- struct file *ring_file, int ring_fd) ++static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr) + { + struct io_submit_state state, *statep = NULL; + struct io_kiocb *link = NULL; +@@ -6066,7 +6109,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr, + /* if we have a backlog and couldn't flush it all, return BUSY */ + if (test_bit(0, &ctx->sq_check_overflow)) { + if (!list_empty(&ctx->cq_overflow_list) && +- !io_cqring_overflow_flush(ctx, false)) ++ !io_cqring_overflow_flush(ctx, false, NULL, NULL)) + return -EBUSY; + } + +@@ -6081,9 +6124,6 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr, + statep = &state; + } + +- ctx->ring_fd = ring_fd; +- ctx->ring_file = ring_file; +- + for (i = 0; i < nr; i++) { + const struct io_uring_sqe *sqe; + struct io_kiocb *req; +@@ -6244,7 +6284,7 @@ static int io_sq_thread(void *data) + + mutex_lock(&ctx->uring_lock); + if (likely(!percpu_ref_is_dying(&ctx->refs))) +- ret = io_submit_sqes(ctx, to_submit, NULL, -1); ++ ret = io_submit_sqes(ctx, to_submit); + mutex_unlock(&ctx->uring_lock); + timeout = jiffies + ctx->sq_thread_idle; + } +@@ -7073,14 +7113,38 @@ out_fput: + return ret; + } + ++static int io_uring_alloc_task_context(struct task_struct *task) ++{ ++ struct io_uring_task *tctx; ++ ++ tctx = kmalloc(sizeof(*tctx), GFP_KERNEL); ++ if (unlikely(!tctx)) ++ return -ENOMEM; ++ ++ xa_init(&tctx->xa); ++ init_waitqueue_head(&tctx->wait); ++ tctx->last = NULL; ++ tctx->in_idle = 0; ++ atomic_long_set(&tctx->req_issue, 0); ++ atomic_long_set(&tctx->req_complete, 0); ++ task->io_uring = tctx; ++ return 0; ++} ++ ++void __io_uring_free(struct task_struct *tsk) ++{ ++ struct io_uring_task *tctx = tsk->io_uring; ++ ++ WARN_ON_ONCE(!xa_empty(&tctx->xa)); ++ kfree(tctx); ++ tsk->io_uring = NULL; ++} ++ + static int io_sq_offload_start(struct io_ring_ctx *ctx, + struct io_uring_params *p) + { + int ret; + +- mmgrab(current->mm); +- ctx->sqo_mm = current->mm; +- + if (ctx->flags & IORING_SETUP_SQPOLL) { + ret = -EPERM; + if (!capable(CAP_SYS_ADMIN)) +@@ -7111,6 +7175,9 @@ static int io_sq_offload_start(struct io_ring_ctx *ctx, + ctx->sqo_thread = NULL; + goto err; + } ++ ret = io_uring_alloc_task_context(ctx->sqo_thread); ++ if (ret) ++ goto err; + wake_up_process(ctx->sqo_thread); + } else if (p->flags & IORING_SETUP_SQ_AFF) { + /* Can't have SQ_AFF without SQPOLL */ +@@ -7125,8 +7192,6 @@ static int io_sq_offload_start(struct io_ring_ctx *ctx, + return 0; + err: + io_finish_async(ctx); +- mmdrop(ctx->sqo_mm); +- ctx->sqo_mm = NULL; + return ret; + } + +@@ -7456,8 +7521,12 @@ static void io_destroy_buffers(struct io_ring_ctx *ctx) + static void io_ring_ctx_free(struct io_ring_ctx *ctx) + { + io_finish_async(ctx); +- if (ctx->sqo_mm) +- mmdrop(ctx->sqo_mm); ++ if (ctx->sqo_task) { ++ put_task_struct(ctx->sqo_task); ++ ctx->sqo_task = NULL; ++ mmdrop(ctx->mm_account); ++ ctx->mm_account = NULL; ++ } + + io_iopoll_reap_events(ctx); + io_sqe_buffer_unregister(ctx); +@@ -7528,7 +7597,7 @@ static void io_ring_exit_work(struct work_struct *work) + + ctx = container_of(work, struct io_ring_ctx, exit_work); + if (ctx->rings) +- io_cqring_overflow_flush(ctx, true); ++ io_cqring_overflow_flush(ctx, true, NULL, NULL); + + /* + * If we're doing polled IO and end up having requests being +@@ -7539,7 +7608,7 @@ static void io_ring_exit_work(struct work_struct *work) + while (!wait_for_completion_timeout(&ctx->ref_comp, HZ/20)) { + io_iopoll_reap_events(ctx); + if (ctx->rings) +- io_cqring_overflow_flush(ctx, true); ++ io_cqring_overflow_flush(ctx, true, NULL, NULL); + } + io_ring_ctx_free(ctx); + } +@@ -7550,8 +7619,8 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx) + percpu_ref_kill(&ctx->refs); + mutex_unlock(&ctx->uring_lock); + +- io_kill_timeouts(ctx); +- io_poll_remove_all(ctx); ++ io_kill_timeouts(ctx, NULL); ++ io_poll_remove_all(ctx, NULL); + + if (ctx->io_wq) + io_wq_cancel_all(ctx->io_wq); +@@ -7559,7 +7628,7 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx) + io_iopoll_reap_events(ctx); + /* if we failed setting up the ctx, we might not have any rings */ + if (ctx->rings) +- io_cqring_overflow_flush(ctx, true); ++ io_cqring_overflow_flush(ctx, true, NULL, NULL); + idr_for_each(&ctx->personality_idr, io_remove_personalities, ctx); + + /* +@@ -7588,7 +7657,7 @@ static bool io_wq_files_match(struct io_wq_work *work, void *data) + { + struct files_struct *files = data; + +- return work->files == files; ++ return !files || work->files == files; + } + + /* +@@ -7609,12 +7678,6 @@ static bool io_match_link(struct io_kiocb *preq, struct io_kiocb *req) + return false; + } + +-static inline bool io_match_files(struct io_kiocb *req, +- struct files_struct *files) +-{ +- return (req->flags & REQ_F_WORK_INITIALIZED) && req->work.files == files; +-} +- + static bool io_match_link_files(struct io_kiocb *req, + struct files_struct *files) + { +@@ -7729,11 +7792,14 @@ static void io_cancel_defer_files(struct io_ring_ctx *ctx, + } + } + +-static void io_uring_cancel_files(struct io_ring_ctx *ctx, ++/* ++ * Returns true if we found and killed one or more files pinning requests ++ */ ++static bool io_uring_cancel_files(struct io_ring_ctx *ctx, + struct files_struct *files) + { + if (list_empty_careful(&ctx->inflight_list)) +- return; ++ return false; + + io_cancel_defer_files(ctx, files); + /* cancel all at once, should be faster than doing it one by one*/ +@@ -7745,7 +7811,7 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx, + + spin_lock_irq(&ctx->inflight_lock); + list_for_each_entry(req, &ctx->inflight_list, inflight_entry) { +- if (req->work.files != files) ++ if (files && req->work.files != files) + continue; + /* req is being completed, ignore */ + if (!refcount_inc_not_zero(&req->refs)) +@@ -7791,9 +7857,13 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx, + io_put_req(cancel_req); + } + ++ /* cancellations _may_ trigger task work */ ++ io_run_task_work(); + schedule(); + finish_wait(&ctx->inflight_wait, &wait); + } ++ ++ return true; + } + + static bool io_cancel_task_cb(struct io_wq_work *work, void *data) +@@ -7801,21 +7871,198 @@ static bool io_cancel_task_cb(struct io_wq_work *work, void *data) + struct io_kiocb *req = container_of(work, struct io_kiocb, work); + struct task_struct *task = data; + +- return req->task == task; ++ return io_task_match(req, task); ++} ++ ++static bool __io_uring_cancel_task_requests(struct io_ring_ctx *ctx, ++ struct task_struct *task, ++ struct files_struct *files) ++{ ++ bool ret; ++ ++ ret = io_uring_cancel_files(ctx, files); ++ if (!files) { ++ enum io_wq_cancel cret; ++ ++ cret = io_wq_cancel_cb(ctx->io_wq, io_cancel_task_cb, task, true); ++ if (cret != IO_WQ_CANCEL_NOTFOUND) ++ ret = true; ++ ++ /* SQPOLL thread does its own polling */ ++ if (!(ctx->flags & IORING_SETUP_SQPOLL)) { ++ if (!list_empty_careful(&ctx->poll_list)) { ++ io_iopoll_reap_events(ctx); ++ ret = true; ++ } ++ } ++ ++ ret |= io_poll_remove_all(ctx, task); ++ ret |= io_kill_timeouts(ctx, task); ++ } ++ ++ return ret; ++} ++ ++/* ++ * We need to iteratively cancel requests, in case a request has dependent ++ * hard links. These persist even for failure of cancelations, hence keep ++ * looping until none are found. ++ */ ++static void io_uring_cancel_task_requests(struct io_ring_ctx *ctx, ++ struct files_struct *files) ++{ ++ struct task_struct *task = current; ++ ++ if (ctx->flags & IORING_SETUP_SQPOLL) ++ task = ctx->sqo_thread; ++ ++ io_cqring_overflow_flush(ctx, true, task, files); ++ ++ while (__io_uring_cancel_task_requests(ctx, task, files)) { ++ io_run_task_work(); ++ cond_resched(); ++ } ++} ++ ++/* ++ * Note that this task has used io_uring. We use it for cancelation purposes. ++ */ ++static int io_uring_add_task_file(struct file *file) ++{ ++ struct io_uring_task *tctx = current->io_uring; ++ ++ if (unlikely(!tctx)) { ++ int ret; ++ ++ ret = io_uring_alloc_task_context(current); ++ if (unlikely(ret)) ++ return ret; ++ tctx = current->io_uring; ++ } ++ if (tctx->last != file) { ++ void *old = xa_load(&tctx->xa, (unsigned long)file); ++ ++ if (!old) { ++ get_file(file); ++ xa_store(&tctx->xa, (unsigned long)file, file, GFP_KERNEL); ++ } ++ tctx->last = file; ++ } ++ ++ return 0; ++} ++ ++/* ++ * Remove this io_uring_file -> task mapping. ++ */ ++static void io_uring_del_task_file(struct file *file) ++{ ++ struct io_uring_task *tctx = current->io_uring; ++ ++ if (tctx->last == file) ++ tctx->last = NULL; ++ file = xa_erase(&tctx->xa, (unsigned long)file); ++ if (file) ++ fput(file); ++} ++ ++static void __io_uring_attempt_task_drop(struct file *file) ++{ ++ struct file *old = xa_load(¤t->io_uring->xa, (unsigned long)file); ++ ++ if (old == file) ++ io_uring_del_task_file(file); ++} ++ ++/* ++ * Drop task note for this file if we're the only ones that hold it after ++ * pending fput() ++ */ ++static void io_uring_attempt_task_drop(struct file *file, bool exiting) ++{ ++ if (!current->io_uring) ++ return; ++ /* ++ * fput() is pending, will be 2 if the only other ref is our potential ++ * task file note. If the task is exiting, drop regardless of count. ++ */ ++ if (!exiting && atomic_long_read(&file->f_count) != 2) ++ return; ++ ++ __io_uring_attempt_task_drop(file); ++} ++ ++void __io_uring_files_cancel(struct files_struct *files) ++{ ++ struct io_uring_task *tctx = current->io_uring; ++ struct file *file; ++ unsigned long index; ++ ++ /* make sure overflow events are dropped */ ++ tctx->in_idle = true; ++ ++ xa_for_each(&tctx->xa, index, file) { ++ struct io_ring_ctx *ctx = file->private_data; ++ ++ io_uring_cancel_task_requests(ctx, files); ++ if (files) ++ io_uring_del_task_file(file); ++ } ++} ++ ++static inline bool io_uring_task_idle(struct io_uring_task *tctx) ++{ ++ return atomic_long_read(&tctx->req_issue) == ++ atomic_long_read(&tctx->req_complete); ++} ++ ++/* ++ * Find any io_uring fd that this task has registered or done IO on, and cancel ++ * requests. ++ */ ++void __io_uring_task_cancel(void) ++{ ++ struct io_uring_task *tctx = current->io_uring; ++ DEFINE_WAIT(wait); ++ long completions; ++ ++ /* make sure overflow events are dropped */ ++ tctx->in_idle = true; ++ ++ while (!io_uring_task_idle(tctx)) { ++ /* read completions before cancelations */ ++ completions = atomic_long_read(&tctx->req_complete); ++ __io_uring_files_cancel(NULL); ++ ++ prepare_to_wait(&tctx->wait, &wait, TASK_UNINTERRUPTIBLE); ++ ++ /* ++ * If we've seen completions, retry. This avoids a race where ++ * a completion comes in before we did prepare_to_wait(). ++ */ ++ if (completions != atomic_long_read(&tctx->req_complete)) ++ continue; ++ if (io_uring_task_idle(tctx)) ++ break; ++ schedule(); ++ } ++ ++ finish_wait(&tctx->wait, &wait); ++ tctx->in_idle = false; + } + + static int io_uring_flush(struct file *file, void *data) + { + struct io_ring_ctx *ctx = file->private_data; + +- io_uring_cancel_files(ctx, data); +- + /* + * If the task is going away, cancel work it may have pending + */ + if (fatal_signal_pending(current) || (current->flags & PF_EXITING)) +- io_wq_cancel_cb(ctx->io_wq, io_cancel_task_cb, current, true); ++ data = NULL; + ++ io_uring_cancel_task_requests(ctx, data); ++ io_uring_attempt_task_drop(file, !data); + return 0; + } + +@@ -7924,13 +8171,16 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, + ret = 0; + if (ctx->flags & IORING_SETUP_SQPOLL) { + if (!list_empty_careful(&ctx->cq_overflow_list)) +- io_cqring_overflow_flush(ctx, false); ++ io_cqring_overflow_flush(ctx, false, NULL, NULL); + if (flags & IORING_ENTER_SQ_WAKEUP) + wake_up(&ctx->sqo_wait); + submitted = to_submit; + } else if (to_submit) { ++ ret = io_uring_add_task_file(f.file); ++ if (unlikely(ret)) ++ goto out; + mutex_lock(&ctx->uring_lock); +- submitted = io_submit_sqes(ctx, to_submit, f.file, fd); ++ submitted = io_submit_sqes(ctx, to_submit); + mutex_unlock(&ctx->uring_lock); + + if (submitted != to_submit) +@@ -8142,6 +8392,7 @@ static int io_uring_get_fd(struct io_ring_ctx *ctx) + file = anon_inode_getfile("[io_uring]", &io_uring_fops, ctx, + O_RDWR | O_CLOEXEC); + if (IS_ERR(file)) { ++err_fd: + put_unused_fd(ret); + ret = PTR_ERR(file); + goto err; +@@ -8150,6 +8401,10 @@ static int io_uring_get_fd(struct io_ring_ctx *ctx) + #if defined(CONFIG_UNIX) + ctx->ring_sock->file = file; + #endif ++ if (unlikely(io_uring_add_task_file(file))) { ++ file = ERR_PTR(-ENOMEM); ++ goto err_fd; ++ } + fd_install(ret, file); + return ret; + err: +@@ -8228,6 +8483,16 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p, + ctx->user = user; + ctx->creds = get_current_cred(); + ++ ctx->sqo_task = get_task_struct(current); ++ /* ++ * This is just grabbed for accounting purposes. When a process exits, ++ * the mm is exited and dropped before the files, hence we need to hang ++ * on to this mm purely for the purposes of being able to unaccount ++ * memory (locked/pinned vm). It's not used for anything else. ++ */ ++ mmgrab(current->mm); ++ ctx->mm_account = current->mm; ++ + ret = io_allocate_scq_urings(ctx, p); + if (ret) + goto err; +diff --git a/include/linux/fs.h b/include/linux/fs.h +index ac1e89872db4f..819245cc9dbd4 100644 +--- a/include/linux/fs.h ++++ b/include/linux/fs.h +@@ -3011,7 +3011,6 @@ extern int do_pipe_flags(int *, int); + id(UNKNOWN, unknown) \ + id(FIRMWARE, firmware) \ + id(FIRMWARE_PREALLOC_BUFFER, firmware) \ +- id(FIRMWARE_EFI_EMBEDDED, firmware) \ + id(MODULE, kernel-module) \ + id(KEXEC_IMAGE, kexec-image) \ + id(KEXEC_INITRAMFS, kexec-initramfs) \ +diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h +new file mode 100644 +index 0000000000000..c09135a1ef132 +--- /dev/null ++++ b/include/linux/io_uring.h +@@ -0,0 +1,53 @@ ++/* SPDX-License-Identifier: GPL-2.0-or-later */ ++#ifndef _LINUX_IO_URING_H ++#define _LINUX_IO_URING_H ++ ++#include <linux/sched.h> ++#include <linux/xarray.h> ++#include <linux/percpu-refcount.h> ++ ++struct io_uring_task { ++ /* submission side */ ++ struct xarray xa; ++ struct wait_queue_head wait; ++ struct file *last; ++ atomic_long_t req_issue; ++ ++ /* completion side */ ++ bool in_idle ____cacheline_aligned_in_smp; ++ atomic_long_t req_complete; ++}; ++ ++#if defined(CONFIG_IO_URING) ++void __io_uring_task_cancel(void); ++void __io_uring_files_cancel(struct files_struct *files); ++void __io_uring_free(struct task_struct *tsk); ++ ++static inline void io_uring_task_cancel(void) ++{ ++ if (current->io_uring && !xa_empty(¤t->io_uring->xa)) ++ __io_uring_task_cancel(); ++} ++static inline void io_uring_files_cancel(struct files_struct *files) ++{ ++ if (current->io_uring && !xa_empty(¤t->io_uring->xa)) ++ __io_uring_files_cancel(files); ++} ++static inline void io_uring_free(struct task_struct *tsk) ++{ ++ if (tsk->io_uring) ++ __io_uring_free(tsk); ++} ++#else ++static inline void io_uring_task_cancel(void) ++{ ++} ++static inline void io_uring_files_cancel(struct files_struct *files) ++{ ++} ++static inline void io_uring_free(struct task_struct *tsk) ++{ ++} ++#endif ++ ++#endif +diff --git a/include/linux/mtd/pfow.h b/include/linux/mtd/pfow.h +index 122f3439e1af2..c65d7a3be3c69 100644 +--- a/include/linux/mtd/pfow.h ++++ b/include/linux/mtd/pfow.h +@@ -128,7 +128,7 @@ static inline void print_drs_error(unsigned dsr) + + if (!(dsr & DSR_AVAILABLE)) + printk(KERN_NOTICE"DSR.15: (0) Device not Available\n"); +- if (prog_status & 0x03) ++ if ((prog_status & 0x03) == 0x03) + printk(KERN_NOTICE"DSR.9,8: (11) Attempt to program invalid " + "half with 41h command\n"); + else if (prog_status & 0x02) +diff --git a/include/linux/pm.h b/include/linux/pm.h +index 121c104a4090e..1010bf3d3008b 100644 +--- a/include/linux/pm.h ++++ b/include/linux/pm.h +@@ -584,7 +584,7 @@ struct dev_pm_info { + #endif + #ifdef CONFIG_PM + struct hrtimer suspend_timer; +- unsigned long timer_expires; ++ u64 timer_expires; + struct work_struct work; + wait_queue_head_t wait_queue; + struct wake_irq *wakeirq; +diff --git a/include/linux/qcom-geni-se.h b/include/linux/qcom-geni-se.h +index dd464943f717a..5b90eff50bf6e 100644 +--- a/include/linux/qcom-geni-se.h ++++ b/include/linux/qcom-geni-se.h +@@ -229,6 +229,9 @@ struct geni_se { + #define GENI_SE_VERSION_MINOR(ver) ((ver & HW_VER_MINOR_MASK) >> HW_VER_MINOR_SHFT) + #define GENI_SE_VERSION_STEP(ver) (ver & HW_VER_STEP_MASK) + ++/* QUP SE VERSION value for major number 2 and minor number 5 */ ++#define QUP_SE_VERSION_2_5 0x20050000 ++ + #if IS_ENABLED(CONFIG_QCOM_GENI_SE) + + u32 geni_se_get_qup_hw_version(struct geni_se *se); +diff --git a/include/linux/sched.h b/include/linux/sched.h +index 6833729430932..f0f38e86ab1ee 100644 +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -61,6 +61,7 @@ struct sighand_struct; + struct signal_struct; + struct task_delay_info; + struct task_group; ++struct io_uring_task; + + /* + * Task state bitmask. NOTE! These bits are also +@@ -923,6 +924,10 @@ struct task_struct { + /* Open file information: */ + struct files_struct *files; + ++#ifdef CONFIG_IO_URING ++ struct io_uring_task *io_uring; ++#endif ++ + /* Namespaces: */ + struct nsproxy *nsproxy; + +diff --git a/include/linux/string.h b/include/linux/string.h +index 9b7a0632e87aa..b1f3894a0a3e4 100644 +--- a/include/linux/string.h ++++ b/include/linux/string.h +@@ -161,20 +161,13 @@ extern int bcmp(const void *,const void *,__kernel_size_t); + #ifndef __HAVE_ARCH_MEMCHR + extern void * memchr(const void *,int,__kernel_size_t); + #endif +-#ifndef __HAVE_ARCH_MEMCPY_MCSAFE +-static inline __must_check unsigned long memcpy_mcsafe(void *dst, +- const void *src, size_t cnt) +-{ +- memcpy(dst, src, cnt); +- return 0; +-} +-#endif + #ifndef __HAVE_ARCH_MEMCPY_FLUSHCACHE + static inline void memcpy_flushcache(void *dst, const void *src, size_t cnt) + { + memcpy(dst, src, cnt); + } + #endif ++ + void *memchr_inv(const void *s, int c, size_t n); + char *strreplace(char *s, char old, char new); + +diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h +index 0a76ddc07d597..1ef3ab2343aa4 100644 +--- a/include/linux/uaccess.h ++++ b/include/linux/uaccess.h +@@ -163,6 +163,19 @@ copy_in_user(void __user *to, const void __user *from, unsigned long n) + } + #endif + ++#ifndef copy_mc_to_kernel ++/* ++ * Without arch opt-in this generic copy_mc_to_kernel() will not handle ++ * #MC (or arch equivalent) during source read. ++ */ ++static inline unsigned long __must_check ++copy_mc_to_kernel(void *dst, const void *src, size_t cnt) ++{ ++ memcpy(dst, src, cnt); ++ return 0; ++} ++#endif ++ + static __always_inline void pagefault_disabled_inc(void) + { + current->pagefault_disabled++; +diff --git a/include/linux/uio.h b/include/linux/uio.h +index 9576fd8158d7d..6a97b4d10b2ed 100644 +--- a/include/linux/uio.h ++++ b/include/linux/uio.h +@@ -186,10 +186,10 @@ size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i); + #define _copy_from_iter_flushcache _copy_from_iter_nocache + #endif + +-#ifdef CONFIG_ARCH_HAS_UACCESS_MCSAFE +-size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i); ++#ifdef CONFIG_ARCH_HAS_COPY_MC ++size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i); + #else +-#define _copy_to_iter_mcsafe _copy_to_iter ++#define _copy_mc_to_iter _copy_to_iter + #endif + + static __always_inline __must_check +@@ -202,12 +202,12 @@ size_t copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i) + } + + static __always_inline __must_check +-size_t copy_to_iter_mcsafe(void *addr, size_t bytes, struct iov_iter *i) ++size_t copy_mc_to_iter(void *addr, size_t bytes, struct iov_iter *i) + { + if (unlikely(!check_copy_size(addr, bytes, true))) + return 0; + else +- return _copy_to_iter_mcsafe(addr, bytes, i); ++ return _copy_mc_to_iter(addr, bytes, i); + } + + size_t iov_iter_zero(size_t bytes, struct iov_iter *); +diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h +index ec2cbfab71f35..f09541cba3c9d 100644 +--- a/include/net/netfilter/nf_tables.h ++++ b/include/net/netfilter/nf_tables.h +@@ -896,6 +896,12 @@ static inline struct nft_expr *nft_expr_last(const struct nft_rule *rule) + return (struct nft_expr *)&rule->data[rule->dlen]; + } + ++static inline bool nft_expr_more(const struct nft_rule *rule, ++ const struct nft_expr *expr) ++{ ++ return expr != nft_expr_last(rule) && expr->ops; ++} ++ + static inline struct nft_userdata *nft_userdata(const struct nft_rule *rule) + { + return (void *)&rule->data[rule->dlen]; +diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h +index a3fd55194e0b1..7bffadcfd6eb0 100644 +--- a/include/uapi/linux/bpf.h ++++ b/include/uapi/linux/bpf.h +@@ -1416,8 +1416,8 @@ union bpf_attr { + * Return + * The return value depends on the result of the test, and can be: + * +- * * 0, if the *skb* task belongs to the cgroup2. +- * * 1, if the *skb* task does not belong to the cgroup2. ++ * * 0, if current task belongs to the cgroup2. ++ * * 1, if current task does not belong to the cgroup2. + * * A negative error code, if an error occurred. + * + * int bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags) +diff --git a/init/init_task.c b/init/init_task.c +index 15089d15010ab..7802f91109b48 100644 +--- a/init/init_task.c ++++ b/init/init_task.c +@@ -113,6 +113,9 @@ struct task_struct init_task + .thread = INIT_THREAD, + .fs = &init_fs, + .files = &init_files, ++#ifdef CONFIG_IO_URING ++ .io_uring = NULL, ++#endif + .signal = &init_signals, + .sighand = &init_sighand, + .nsproxy = &init_nsproxy, +diff --git a/kernel/fork.c b/kernel/fork.c +index 0074bbe8c66f1..c725015b3c465 100644 +--- a/kernel/fork.c ++++ b/kernel/fork.c +@@ -95,6 +95,7 @@ + #include <linux/stackleak.h> + #include <linux/kasan.h> + #include <linux/scs.h> ++#include <linux/io_uring.h> + + #include <asm/pgalloc.h> + #include <linux/uaccess.h> +@@ -745,6 +746,7 @@ void __put_task_struct(struct task_struct *tsk) + WARN_ON(refcount_read(&tsk->usage)); + WARN_ON(tsk == current); + ++ io_uring_free(tsk); + cgroup_free(tsk); + task_numa_free(tsk, true); + security_task_free(tsk); +@@ -2022,6 +2024,10 @@ static __latent_entropy struct task_struct *copy_process( + p->vtime.state = VTIME_INACTIVE; + #endif + ++#ifdef CONFIG_IO_URING ++ p->io_uring = NULL; ++#endif ++ + #if defined(SPLIT_RSS_COUNTING) + memset(&p->rss_stat, 0, sizeof(p->rss_stat)); + #endif +diff --git a/lib/Kconfig b/lib/Kconfig +index df3f3da959900..7761458649377 100644 +--- a/lib/Kconfig ++++ b/lib/Kconfig +@@ -631,7 +631,12 @@ config UACCESS_MEMCPY + config ARCH_HAS_UACCESS_FLUSHCACHE + bool + +-config ARCH_HAS_UACCESS_MCSAFE ++# arch has a concept of a recoverable synchronous exception due to a ++# memory-read error like x86 machine-check or ARM data-abort, and ++# implements copy_mc_to_{user,kernel} to abort and report ++# 'bytes-transferred' if that exception fires when accessing the source ++# buffer. ++config ARCH_HAS_COPY_MC + bool + + # Temporary. Goes away when all archs are cleaned up +diff --git a/lib/iov_iter.c b/lib/iov_iter.c +index bf538c2bec777..aefe469905434 100644 +--- a/lib/iov_iter.c ++++ b/lib/iov_iter.c +@@ -636,30 +636,30 @@ size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i) + } + EXPORT_SYMBOL(_copy_to_iter); + +-#ifdef CONFIG_ARCH_HAS_UACCESS_MCSAFE +-static int copyout_mcsafe(void __user *to, const void *from, size_t n) ++#ifdef CONFIG_ARCH_HAS_COPY_MC ++static int copyout_mc(void __user *to, const void *from, size_t n) + { + if (access_ok(to, n)) { + instrument_copy_to_user(to, from, n); +- n = copy_to_user_mcsafe((__force void *) to, from, n); ++ n = copy_mc_to_user((__force void *) to, from, n); + } + return n; + } + +-static unsigned long memcpy_mcsafe_to_page(struct page *page, size_t offset, ++static unsigned long copy_mc_to_page(struct page *page, size_t offset, + const char *from, size_t len) + { + unsigned long ret; + char *to; + + to = kmap_atomic(page); +- ret = memcpy_mcsafe(to + offset, from, len); ++ ret = copy_mc_to_kernel(to + offset, from, len); + kunmap_atomic(to); + + return ret; + } + +-static size_t copy_pipe_to_iter_mcsafe(const void *addr, size_t bytes, ++static size_t copy_mc_pipe_to_iter(const void *addr, size_t bytes, + struct iov_iter *i) + { + struct pipe_inode_info *pipe = i->pipe; +@@ -677,7 +677,7 @@ static size_t copy_pipe_to_iter_mcsafe(const void *addr, size_t bytes, + size_t chunk = min_t(size_t, n, PAGE_SIZE - off); + unsigned long rem; + +- rem = memcpy_mcsafe_to_page(pipe->bufs[i_head & p_mask].page, ++ rem = copy_mc_to_page(pipe->bufs[i_head & p_mask].page, + off, addr, chunk); + i->head = i_head; + i->iov_offset = off + chunk - rem; +@@ -694,18 +694,17 @@ static size_t copy_pipe_to_iter_mcsafe(const void *addr, size_t bytes, + } + + /** +- * _copy_to_iter_mcsafe - copy to user with source-read error exception handling ++ * _copy_mc_to_iter - copy to iter with source memory error exception handling + * @addr: source kernel address + * @bytes: total transfer length + * @iter: destination iterator + * +- * The pmem driver arranges for filesystem-dax to use this facility via +- * dax_copy_to_iter() for protecting read/write to persistent memory. +- * Unless / until an architecture can guarantee identical performance +- * between _copy_to_iter_mcsafe() and _copy_to_iter() it would be a +- * performance regression to switch more users to the mcsafe version. ++ * The pmem driver deploys this for the dax operation ++ * (dax_copy_to_iter()) for dax reads (bypass page-cache and the ++ * block-layer). Upon #MC read(2) aborts and returns EIO or the bytes ++ * successfully copied. + * +- * Otherwise, the main differences between this and typical _copy_to_iter(). ++ * The main differences between this and typical _copy_to_iter(). + * + * * Typical tail/residue handling after a fault retries the copy + * byte-by-byte until the fault happens again. Re-triggering machine +@@ -716,23 +715,22 @@ static size_t copy_pipe_to_iter_mcsafe(const void *addr, size_t bytes, + * * ITER_KVEC, ITER_PIPE, and ITER_BVEC can return short copies. + * Compare to copy_to_iter() where only ITER_IOVEC attempts might return + * a short copy. +- * +- * See MCSAFE_TEST for self-test. + */ +-size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i) ++size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i) + { + const char *from = addr; + unsigned long rem, curr_addr, s_addr = (unsigned long) addr; + + if (unlikely(iov_iter_is_pipe(i))) +- return copy_pipe_to_iter_mcsafe(addr, bytes, i); ++ return copy_mc_pipe_to_iter(addr, bytes, i); + if (iter_is_iovec(i)) + might_fault(); + iterate_and_advance(i, bytes, v, +- copyout_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len), ++ copyout_mc(v.iov_base, (from += v.iov_len) - v.iov_len, ++ v.iov_len), + ({ +- rem = memcpy_mcsafe_to_page(v.bv_page, v.bv_offset, +- (from += v.bv_len) - v.bv_len, v.bv_len); ++ rem = copy_mc_to_page(v.bv_page, v.bv_offset, ++ (from += v.bv_len) - v.bv_len, v.bv_len); + if (rem) { + curr_addr = (unsigned long) from; + bytes = curr_addr - s_addr - rem; +@@ -740,8 +738,8 @@ size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i) + } + }), + ({ +- rem = memcpy_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len, +- v.iov_len); ++ rem = copy_mc_to_kernel(v.iov_base, (from += v.iov_len) ++ - v.iov_len, v.iov_len); + if (rem) { + curr_addr = (unsigned long) from; + bytes = curr_addr - s_addr - rem; +@@ -752,8 +750,8 @@ size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i) + + return bytes; + } +-EXPORT_SYMBOL_GPL(_copy_to_iter_mcsafe); +-#endif /* CONFIG_ARCH_HAS_UACCESS_MCSAFE */ ++EXPORT_SYMBOL_GPL(_copy_mc_to_iter); ++#endif /* CONFIG_ARCH_HAS_COPY_MC */ + + size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) + { +diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c +index 06a8242aa6980..6dd7f44497ecc 100644 +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -483,6 +483,8 @@ static inline bool tcp_stream_is_readable(const struct tcp_sock *tp, + return true; + if (tcp_rmem_pressure(sk)) + return true; ++ if (tcp_receive_window(tp) <= inet_csk(sk)->icsk_ack.rcv_mss) ++ return true; + } + if (sk->sk_prot->stream_memory_read) + return sk->sk_prot->stream_memory_read(sk); +diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c +index 02cc972edd0b0..6c7e982169467 100644 +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -4790,7 +4790,8 @@ void tcp_data_ready(struct sock *sk) + int avail = tp->rcv_nxt - tp->copied_seq; + + if (avail < sk->sk_rcvlowat && !tcp_rmem_pressure(sk) && +- !sock_flag(sk, SOCK_DONE)) ++ !sock_flag(sk, SOCK_DONE) && ++ tcp_receive_window(tp) > inet_csk(sk)->icsk_ack.rcv_mss) + return; + + sk->sk_data_ready(sk); +diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c +index 05059f620d41e..fe51a7df4f524 100644 +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -295,7 +295,7 @@ static void nft_rule_expr_activate(const struct nft_ctx *ctx, + struct nft_expr *expr; + + expr = nft_expr_first(rule); +- while (expr != nft_expr_last(rule) && expr->ops) { ++ while (nft_expr_more(rule, expr)) { + if (expr->ops->activate) + expr->ops->activate(ctx, expr); + +@@ -310,7 +310,7 @@ static void nft_rule_expr_deactivate(const struct nft_ctx *ctx, + struct nft_expr *expr; + + expr = nft_expr_first(rule); +- while (expr != nft_expr_last(rule) && expr->ops) { ++ while (nft_expr_more(rule, expr)) { + if (expr->ops->deactivate) + expr->ops->deactivate(ctx, expr, phase); + +@@ -2917,7 +2917,7 @@ static void nf_tables_rule_destroy(const struct nft_ctx *ctx, + * is called on error from nf_tables_newrule(). + */ + expr = nft_expr_first(rule); +- while (expr != nft_expr_last(rule) && expr->ops) { ++ while (nft_expr_more(rule, expr)) { + next = nft_expr_next(expr); + nf_tables_expr_destroy(ctx, expr); + expr = next; +diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c +index c7cf1cde46def..ce2387bfb5dce 100644 +--- a/net/netfilter/nf_tables_offload.c ++++ b/net/netfilter/nf_tables_offload.c +@@ -37,7 +37,7 @@ struct nft_flow_rule *nft_flow_rule_create(struct net *net, + struct nft_expr *expr; + + expr = nft_expr_first(rule); +- while (expr->ops && expr != nft_expr_last(rule)) { ++ while (nft_expr_more(rule, expr)) { + if (expr->ops->offload_flags & NFT_OFFLOAD_F_ACTION) + num_actions++; + +@@ -61,7 +61,7 @@ struct nft_flow_rule *nft_flow_rule_create(struct net *net, + ctx->net = net; + ctx->dep.type = NFT_OFFLOAD_DEP_UNSPEC; + +- while (expr->ops && expr != nft_expr_last(rule)) { ++ while (nft_expr_more(rule, expr)) { + if (!expr->ops->offload) { + err = -EOPNOTSUPP; + goto err_out; +diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c +index e298ec3b3c9e3..ca026e2bf8d27 100644 +--- a/net/sched/act_mpls.c ++++ b/net/sched/act_mpls.c +@@ -408,6 +408,7 @@ static void __exit mpls_cleanup_module(void) + module_init(mpls_init_module); + module_exit(mpls_cleanup_module); + ++MODULE_SOFTDEP("post: mpls_gso"); + MODULE_AUTHOR("Netronome Systems <oss-drivers@netronome.com>"); + MODULE_LICENSE("GPL"); + MODULE_DESCRIPTION("MPLS manipulation actions"); +diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c +index 8bf6bde1cfe59..aa2448253dbab 100644 +--- a/net/sched/cls_api.c ++++ b/net/sched/cls_api.c +@@ -650,12 +650,12 @@ static void tc_block_indr_cleanup(struct flow_block_cb *block_cb) + block_cb->indr.binder_type, + &block->flow_block, tcf_block_shared(block), + &extack); ++ rtnl_lock(); + down_write(&block->cb_lock); + list_del(&block_cb->driver_list); + list_move(&block_cb->list, &bo.cb_list); +- up_write(&block->cb_lock); +- rtnl_lock(); + tcf_block_unbind(block, &bo); ++ up_write(&block->cb_lock); + rtnl_unlock(); + } + +diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c +index 84f82771cdf5d..0c345e43a09a3 100644 +--- a/net/sched/sch_netem.c ++++ b/net/sched/sch_netem.c +@@ -330,7 +330,7 @@ static s64 tabledist(s64 mu, s32 sigma, + + /* default uniform distribution */ + if (dist == NULL) +- return ((rnd % (2 * sigma)) + mu) - sigma; ++ return ((rnd % (2 * (u32)sigma)) + mu) - sigma; + + t = dist->table[rnd % dist->size]; + x = (sigma % NETEM_DIST_SCALE) * t; +@@ -812,6 +812,10 @@ static void get_slot(struct netem_sched_data *q, const struct nlattr *attr) + q->slot_config.max_packets = INT_MAX; + if (q->slot_config.max_bytes == 0) + q->slot_config.max_bytes = INT_MAX; ++ ++ /* capping dist_jitter to the range acceptable by tabledist() */ ++ q->slot_config.dist_jitter = min_t(__s64, INT_MAX, abs(q->slot_config.dist_jitter)); ++ + q->slot.packets_left = q->slot_config.max_packets; + q->slot.bytes_left = q->slot_config.max_bytes; + if (q->slot_config.min_delay | q->slot_config.max_delay | +@@ -1037,6 +1041,9 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt, + if (tb[TCA_NETEM_SLOT]) + get_slot(q, tb[TCA_NETEM_SLOT]); + ++ /* capping jitter to the range acceptable by tabledist() */ ++ q->jitter = min_t(s64, abs(q->jitter), INT_MAX); ++ + return ret; + + get_table_failure: +diff --git a/net/tipc/msg.c b/net/tipc/msg.c +index 15b24fbcbe970..0d6297f75df18 100644 +--- a/net/tipc/msg.c ++++ b/net/tipc/msg.c +@@ -150,12 +150,11 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf) + if (fragid == FIRST_FRAGMENT) { + if (unlikely(head)) + goto err; +- if (skb_cloned(frag)) +- frag = skb_copy(frag, GFP_ATOMIC); ++ *buf = NULL; ++ frag = skb_unshare(frag, GFP_ATOMIC); + if (unlikely(!frag)) + goto err; + head = *headbuf = frag; +- *buf = NULL; + TIPC_SKB_CB(head)->tail = NULL; + if (skb_is_nonlinear(head)) { + skb_walk_frags(head, tail) { +diff --git a/scripts/setlocalversion b/scripts/setlocalversion +index 20f2efd57b11a..bb709eda96cdf 100755 +--- a/scripts/setlocalversion ++++ b/scripts/setlocalversion +@@ -45,7 +45,7 @@ scm_version() + + # Check for git and a git repo. + if test -z "$(git rev-parse --show-cdup 2>/dev/null)" && +- head=$(git rev-parse --verify --short HEAD 2>/dev/null); then ++ head=$(git rev-parse --verify HEAD 2>/dev/null); then + + # If we are at a tagged commit (like "v2.6.30-rc6"), we ignore + # it, because this version is defined in the top level Makefile. +@@ -59,11 +59,22 @@ scm_version() + fi + # If we are past a tagged commit (like + # "v2.6.30-rc5-302-g72357d5"), we pretty print it. +- if atag="$(git describe 2>/dev/null)"; then +- echo "$atag" | awk -F- '{printf("-%05d-%s", $(NF-1),$(NF))}' +- +- # If we don't have a tag at all we print -g{commitish}. ++ # ++ # Ensure the abbreviated sha1 has exactly 12 ++ # hex characters, to make the output ++ # independent of git version, local ++ # core.abbrev settings and/or total number of ++ # objects in the current repository - passing ++ # --abbrev=12 ensures a minimum of 12, and the ++ # awk substr() then picks the 'g' and first 12 ++ # hex chars. ++ if atag="$(git describe --abbrev=12 2>/dev/null)"; then ++ echo "$atag" | awk -F- '{printf("-%05d-%s", $(NF-1),substr($(NF),0,13))}' ++ ++ # If we don't have a tag at all we print -g{commitish}, ++ # again using exactly 12 hex chars. + else ++ head="$(echo $head | cut -c1-12)" + printf '%s%s' -g $head + fi + fi +diff --git a/security/integrity/evm/evm_main.c b/security/integrity/evm/evm_main.c +index 0d36259b690df..e4b47759ba1ca 100644 +--- a/security/integrity/evm/evm_main.c ++++ b/security/integrity/evm/evm_main.c +@@ -181,6 +181,12 @@ static enum integrity_status evm_verify_hmac(struct dentry *dentry, + break; + case EVM_IMA_XATTR_DIGSIG: + case EVM_XATTR_PORTABLE_DIGSIG: ++ /* accept xattr with non-empty signature field */ ++ if (xattr_len <= sizeof(struct signature_v2_hdr)) { ++ evm_status = INTEGRITY_FAIL; ++ goto out; ++ } ++ + hdr = (struct signature_v2_hdr *)xattr_data; + digest.hdr.algo = hdr->hash_algo; + rc = evm_calc_hash(dentry, xattr_name, xattr_value, +diff --git a/tools/arch/x86/include/asm/mcsafe_test.h b/tools/arch/x86/include/asm/mcsafe_test.h +deleted file mode 100644 +index 2ccd588fbad45..0000000000000 +--- a/tools/arch/x86/include/asm/mcsafe_test.h ++++ /dev/null +@@ -1,13 +0,0 @@ +-/* SPDX-License-Identifier: GPL-2.0 */ +-#ifndef _MCSAFE_TEST_H_ +-#define _MCSAFE_TEST_H_ +- +-.macro MCSAFE_TEST_CTL +-.endm +- +-.macro MCSAFE_TEST_SRC reg count target +-.endm +- +-.macro MCSAFE_TEST_DST reg count target +-.endm +-#endif /* _MCSAFE_TEST_H_ */ +diff --git a/tools/arch/x86/lib/memcpy_64.S b/tools/arch/x86/lib/memcpy_64.S +index 45f8e1b02241f..0b5b8ae56bd91 100644 +--- a/tools/arch/x86/lib/memcpy_64.S ++++ b/tools/arch/x86/lib/memcpy_64.S +@@ -4,7 +4,6 @@ + #include <linux/linkage.h> + #include <asm/errno.h> + #include <asm/cpufeatures.h> +-#include <asm/mcsafe_test.h> + #include <asm/alternative-asm.h> + #include <asm/export.h> + +@@ -187,117 +186,3 @@ SYM_FUNC_START(memcpy_orig) + SYM_FUNC_END(memcpy_orig) + + .popsection +- +-#ifndef CONFIG_UML +- +-MCSAFE_TEST_CTL +- +-/* +- * __memcpy_mcsafe - memory copy with machine check exception handling +- * Note that we only catch machine checks when reading the source addresses. +- * Writes to target are posted and don't generate machine checks. +- */ +-SYM_FUNC_START(__memcpy_mcsafe) +- cmpl $8, %edx +- /* Less than 8 bytes? Go to byte copy loop */ +- jb .L_no_whole_words +- +- /* Check for bad alignment of source */ +- testl $7, %esi +- /* Already aligned */ +- jz .L_8byte_aligned +- +- /* Copy one byte at a time until source is 8-byte aligned */ +- movl %esi, %ecx +- andl $7, %ecx +- subl $8, %ecx +- negl %ecx +- subl %ecx, %edx +-.L_read_leading_bytes: +- movb (%rsi), %al +- MCSAFE_TEST_SRC %rsi 1 .E_leading_bytes +- MCSAFE_TEST_DST %rdi 1 .E_leading_bytes +-.L_write_leading_bytes: +- movb %al, (%rdi) +- incq %rsi +- incq %rdi +- decl %ecx +- jnz .L_read_leading_bytes +- +-.L_8byte_aligned: +- movl %edx, %ecx +- andl $7, %edx +- shrl $3, %ecx +- jz .L_no_whole_words +- +-.L_read_words: +- movq (%rsi), %r8 +- MCSAFE_TEST_SRC %rsi 8 .E_read_words +- MCSAFE_TEST_DST %rdi 8 .E_write_words +-.L_write_words: +- movq %r8, (%rdi) +- addq $8, %rsi +- addq $8, %rdi +- decl %ecx +- jnz .L_read_words +- +- /* Any trailing bytes? */ +-.L_no_whole_words: +- andl %edx, %edx +- jz .L_done_memcpy_trap +- +- /* Copy trailing bytes */ +- movl %edx, %ecx +-.L_read_trailing_bytes: +- movb (%rsi), %al +- MCSAFE_TEST_SRC %rsi 1 .E_trailing_bytes +- MCSAFE_TEST_DST %rdi 1 .E_trailing_bytes +-.L_write_trailing_bytes: +- movb %al, (%rdi) +- incq %rsi +- incq %rdi +- decl %ecx +- jnz .L_read_trailing_bytes +- +- /* Copy successful. Return zero */ +-.L_done_memcpy_trap: +- xorl %eax, %eax +-.L_done: +- ret +-SYM_FUNC_END(__memcpy_mcsafe) +-EXPORT_SYMBOL_GPL(__memcpy_mcsafe) +- +- .section .fixup, "ax" +- /* +- * Return number of bytes not copied for any failure. Note that +- * there is no "tail" handling since the source buffer is 8-byte +- * aligned and poison is cacheline aligned. +- */ +-.E_read_words: +- shll $3, %ecx +-.E_leading_bytes: +- addl %edx, %ecx +-.E_trailing_bytes: +- mov %ecx, %eax +- jmp .L_done +- +- /* +- * For write fault handling, given the destination is unaligned, +- * we handle faults on multi-byte writes with a byte-by-byte +- * copy up to the write-protected page. +- */ +-.E_write_words: +- shll $3, %ecx +- addl %edx, %ecx +- movl %ecx, %edx +- jmp mcsafe_handle_tail +- +- .previous +- +- _ASM_EXTABLE_FAULT(.L_read_leading_bytes, .E_leading_bytes) +- _ASM_EXTABLE_FAULT(.L_read_words, .E_read_words) +- _ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .E_trailing_bytes) +- _ASM_EXTABLE(.L_write_leading_bytes, .E_leading_bytes) +- _ASM_EXTABLE(.L_write_words, .E_write_words) +- _ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes) +-#endif +diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h +index a3fd55194e0b1..7bffadcfd6eb0 100644 +--- a/tools/include/uapi/linux/bpf.h ++++ b/tools/include/uapi/linux/bpf.h +@@ -1416,8 +1416,8 @@ union bpf_attr { + * Return + * The return value depends on the result of the test, and can be: + * +- * * 0, if the *skb* task belongs to the cgroup2. +- * * 1, if the *skb* task does not belong to the cgroup2. ++ * * 0, if current task belongs to the cgroup2. ++ * * 1, if current task does not belong to the cgroup2. + * * A negative error code, if an error occurred. + * + * int bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags) +diff --git a/tools/objtool/check.c b/tools/objtool/check.c +index 773e6c7ee5f93..0ed92c3b19266 100644 +--- a/tools/objtool/check.c ++++ b/tools/objtool/check.c +@@ -548,8 +548,9 @@ static const char *uaccess_safe_builtin[] = { + "__ubsan_handle_shift_out_of_bounds", + /* misc */ + "csum_partial_copy_generic", +- "__memcpy_mcsafe", +- "mcsafe_handle_tail", ++ "copy_mc_fragile", ++ "copy_mc_fragile_handle_tail", ++ "copy_mc_enhanced_fast_string", + "ftrace_likely_update", /* CONFIG_TRACE_BRANCH_PROFILING */ + NULL + }; +diff --git a/tools/perf/bench/Build b/tools/perf/bench/Build +index 768e408757a05..5352303518e1f 100644 +--- a/tools/perf/bench/Build ++++ b/tools/perf/bench/Build +@@ -11,7 +11,6 @@ perf-y += epoll-ctl.o + perf-y += synthesize.o + perf-y += kallsyms-parse.o + +-perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-lib.o + perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o + perf-$(CONFIG_X86_64) += mem-memset-x86-64-asm.o + +diff --git a/tools/perf/bench/mem-memcpy-x86-64-lib.c b/tools/perf/bench/mem-memcpy-x86-64-lib.c +deleted file mode 100644 +index 4130734dde84b..0000000000000 +--- a/tools/perf/bench/mem-memcpy-x86-64-lib.c ++++ /dev/null +@@ -1,24 +0,0 @@ +-/* +- * From code in arch/x86/lib/usercopy_64.c, copied to keep tools/ copy +- * of the kernel's arch/x86/lib/memcpy_64.s used in 'perf bench mem memcpy' +- * happy. +- */ +-#include <linux/types.h> +- +-unsigned long __memcpy_mcsafe(void *dst, const void *src, size_t cnt); +-unsigned long mcsafe_handle_tail(char *to, char *from, unsigned len); +- +-unsigned long mcsafe_handle_tail(char *to, char *from, unsigned len) +-{ +- for (; len; --len, to++, from++) { +- /* +- * Call the assembly routine back directly since +- * memcpy_mcsafe() may silently fallback to memcpy. +- */ +- unsigned long rem = __memcpy_mcsafe(to, from, 1); +- +- if (rem) +- break; +- } +- return len; +-} +diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c +index a8ee5c4d41ebb..50a390d87db26 100644 +--- a/tools/testing/nvdimm/test/nfit.c ++++ b/tools/testing/nvdimm/test/nfit.c +@@ -23,7 +23,8 @@ + #include "nfit_test.h" + #include "../watermark.h" + +-#include <asm/mcsafe_test.h> ++#include <asm/copy_mc_test.h> ++#include <asm/mce.h> + + /* + * Generate an NFIT table to describe the following topology: +@@ -3052,7 +3053,7 @@ static struct platform_driver nfit_test_driver = { + .id_table = nfit_test_id, + }; + +-static char mcsafe_buf[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE))); ++static char copy_mc_buf[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE))); + + enum INJECT { + INJECT_NONE, +@@ -3060,7 +3061,7 @@ enum INJECT { + INJECT_DST, + }; + +-static void mcsafe_test_init(char *dst, char *src, size_t size) ++static void copy_mc_test_init(char *dst, char *src, size_t size) + { + size_t i; + +@@ -3069,7 +3070,7 @@ static void mcsafe_test_init(char *dst, char *src, size_t size) + src[i] = (char) i; + } + +-static bool mcsafe_test_validate(unsigned char *dst, unsigned char *src, ++static bool copy_mc_test_validate(unsigned char *dst, unsigned char *src, + size_t size, unsigned long rem) + { + size_t i; +@@ -3090,12 +3091,12 @@ static bool mcsafe_test_validate(unsigned char *dst, unsigned char *src, + return true; + } + +-void mcsafe_test(void) ++void copy_mc_test(void) + { + char *inject_desc[] = { "none", "source", "destination" }; + enum INJECT inj; + +- if (IS_ENABLED(CONFIG_MCSAFE_TEST)) { ++ if (IS_ENABLED(CONFIG_COPY_MC_TEST)) { + pr_info("%s: run...\n", __func__); + } else { + pr_info("%s: disabled, skip.\n", __func__); +@@ -3113,31 +3114,31 @@ void mcsafe_test(void) + + switch (inj) { + case INJECT_NONE: +- mcsafe_inject_src(NULL); +- mcsafe_inject_dst(NULL); +- dst = &mcsafe_buf[2048]; +- src = &mcsafe_buf[1024 - i]; ++ copy_mc_inject_src(NULL); ++ copy_mc_inject_dst(NULL); ++ dst = ©_mc_buf[2048]; ++ src = ©_mc_buf[1024 - i]; + expect = 0; + break; + case INJECT_SRC: +- mcsafe_inject_src(&mcsafe_buf[1024]); +- mcsafe_inject_dst(NULL); +- dst = &mcsafe_buf[2048]; +- src = &mcsafe_buf[1024 - i]; ++ copy_mc_inject_src(©_mc_buf[1024]); ++ copy_mc_inject_dst(NULL); ++ dst = ©_mc_buf[2048]; ++ src = ©_mc_buf[1024 - i]; + expect = 512 - i; + break; + case INJECT_DST: +- mcsafe_inject_src(NULL); +- mcsafe_inject_dst(&mcsafe_buf[2048]); +- dst = &mcsafe_buf[2048 - i]; +- src = &mcsafe_buf[1024]; ++ copy_mc_inject_src(NULL); ++ copy_mc_inject_dst(©_mc_buf[2048]); ++ dst = ©_mc_buf[2048 - i]; ++ src = ©_mc_buf[1024]; + expect = 512 - i; + break; + } + +- mcsafe_test_init(dst, src, 512); +- rem = __memcpy_mcsafe(dst, src, 512); +- valid = mcsafe_test_validate(dst, src, 512, expect); ++ copy_mc_test_init(dst, src, 512); ++ rem = copy_mc_fragile(dst, src, 512); ++ valid = copy_mc_test_validate(dst, src, 512, expect); + if (rem == expect && valid) + continue; + pr_info("%s: copy(%#lx, %#lx, %d) off: %d rem: %ld %s expect: %ld\n", +@@ -3149,8 +3150,8 @@ void mcsafe_test(void) + } + } + +- mcsafe_inject_src(NULL); +- mcsafe_inject_dst(NULL); ++ copy_mc_inject_src(NULL); ++ copy_mc_inject_dst(NULL); + } + + static __init int nfit_test_init(void) +@@ -3161,7 +3162,7 @@ static __init int nfit_test_init(void) + libnvdimm_test(); + acpi_nfit_test(); + device_dax_test(); +- mcsafe_test(); ++ copy_mc_test(); + dax_pmem_test(); + dax_pmem_core_test(); + #ifdef CONFIG_DEV_DAX_PMEM_COMPAT +diff --git a/tools/testing/selftests/powerpc/copyloops/.gitignore b/tools/testing/selftests/powerpc/copyloops/.gitignore +index ddaf140b82553..994b11af765ce 100644 +--- a/tools/testing/selftests/powerpc/copyloops/.gitignore ++++ b/tools/testing/selftests/powerpc/copyloops/.gitignore +@@ -12,4 +12,4 @@ memcpy_p7_t1 + copyuser_64_exc_t0 + copyuser_64_exc_t1 + copyuser_64_exc_t2 +-memcpy_mcsafe_64 ++copy_mc_64 +diff --git a/tools/testing/selftests/powerpc/copyloops/Makefile b/tools/testing/selftests/powerpc/copyloops/Makefile +index 0917983a1c781..3095b1f1c02b3 100644 +--- a/tools/testing/selftests/powerpc/copyloops/Makefile ++++ b/tools/testing/selftests/powerpc/copyloops/Makefile +@@ -12,7 +12,7 @@ ASFLAGS = $(CFLAGS) -Wa,-mpower4 + TEST_GEN_PROGS := copyuser_64_t0 copyuser_64_t1 copyuser_64_t2 \ + copyuser_p7_t0 copyuser_p7_t1 \ + memcpy_64_t0 memcpy_64_t1 memcpy_64_t2 \ +- memcpy_p7_t0 memcpy_p7_t1 memcpy_mcsafe_64 \ ++ memcpy_p7_t0 memcpy_p7_t1 copy_mc_64 \ + copyuser_64_exc_t0 copyuser_64_exc_t1 copyuser_64_exc_t2 + + EXTRA_SOURCES := validate.c ../harness.c stubs.S +@@ -45,9 +45,9 @@ $(OUTPUT)/memcpy_p7_t%: memcpy_power7.S $(EXTRA_SOURCES) + -D SELFTEST_CASE=$(subst memcpy_p7_t,,$(notdir $@)) \ + -o $@ $^ + +-$(OUTPUT)/memcpy_mcsafe_64: memcpy_mcsafe_64.S $(EXTRA_SOURCES) ++$(OUTPUT)/copy_mc_64: copy_mc_64.S $(EXTRA_SOURCES) + $(CC) $(CPPFLAGS) $(CFLAGS) \ +- -D COPY_LOOP=test_memcpy_mcsafe \ ++ -D COPY_LOOP=test_copy_mc_generic \ + -o $@ $^ + + $(OUTPUT)/copyuser_64_exc_t%: copyuser_64.S exc_validate.c ../harness.c \ +diff --git a/tools/testing/selftests/powerpc/copyloops/copy_mc_64.S b/tools/testing/selftests/powerpc/copyloops/copy_mc_64.S +new file mode 100644 +index 0000000000000..88d46c471493b +--- /dev/null ++++ b/tools/testing/selftests/powerpc/copyloops/copy_mc_64.S +@@ -0,0 +1,242 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++/* ++ * Copyright (C) IBM Corporation, 2011 ++ * Derived from copyuser_power7.s by Anton Blanchard <anton@au.ibm.com> ++ * Author - Balbir Singh <bsingharora@gmail.com> ++ */ ++#include <asm/ppc_asm.h> ++#include <asm/errno.h> ++#include <asm/export.h> ++ ++ .macro err1 ++100: ++ EX_TABLE(100b,.Ldo_err1) ++ .endm ++ ++ .macro err2 ++200: ++ EX_TABLE(200b,.Ldo_err2) ++ .endm ++ ++ .macro err3 ++300: EX_TABLE(300b,.Ldone) ++ .endm ++ ++.Ldo_err2: ++ ld r22,STK_REG(R22)(r1) ++ ld r21,STK_REG(R21)(r1) ++ ld r20,STK_REG(R20)(r1) ++ ld r19,STK_REG(R19)(r1) ++ ld r18,STK_REG(R18)(r1) ++ ld r17,STK_REG(R17)(r1) ++ ld r16,STK_REG(R16)(r1) ++ ld r15,STK_REG(R15)(r1) ++ ld r14,STK_REG(R14)(r1) ++ addi r1,r1,STACKFRAMESIZE ++.Ldo_err1: ++ /* Do a byte by byte copy to get the exact remaining size */ ++ mtctr r7 ++46: ++err3; lbz r0,0(r4) ++ addi r4,r4,1 ++err3; stb r0,0(r3) ++ addi r3,r3,1 ++ bdnz 46b ++ li r3,0 ++ blr ++ ++.Ldone: ++ mfctr r3 ++ blr ++ ++ ++_GLOBAL(copy_mc_generic) ++ mr r7,r5 ++ cmpldi r5,16 ++ blt .Lshort_copy ++ ++.Lcopy: ++ /* Get the source 8B aligned */ ++ neg r6,r4 ++ mtocrf 0x01,r6 ++ clrldi r6,r6,(64-3) ++ ++ bf cr7*4+3,1f ++err1; lbz r0,0(r4) ++ addi r4,r4,1 ++err1; stb r0,0(r3) ++ addi r3,r3,1 ++ subi r7,r7,1 ++ ++1: bf cr7*4+2,2f ++err1; lhz r0,0(r4) ++ addi r4,r4,2 ++err1; sth r0,0(r3) ++ addi r3,r3,2 ++ subi r7,r7,2 ++ ++2: bf cr7*4+1,3f ++err1; lwz r0,0(r4) ++ addi r4,r4,4 ++err1; stw r0,0(r3) ++ addi r3,r3,4 ++ subi r7,r7,4 ++ ++3: sub r5,r5,r6 ++ cmpldi r5,128 ++ ++ mflr r0 ++ stdu r1,-STACKFRAMESIZE(r1) ++ std r14,STK_REG(R14)(r1) ++ std r15,STK_REG(R15)(r1) ++ std r16,STK_REG(R16)(r1) ++ std r17,STK_REG(R17)(r1) ++ std r18,STK_REG(R18)(r1) ++ std r19,STK_REG(R19)(r1) ++ std r20,STK_REG(R20)(r1) ++ std r21,STK_REG(R21)(r1) ++ std r22,STK_REG(R22)(r1) ++ std r0,STACKFRAMESIZE+16(r1) ++ ++ blt 5f ++ srdi r6,r5,7 ++ mtctr r6 ++ ++ /* Now do cacheline (128B) sized loads and stores. */ ++ .align 5 ++4: ++err2; ld r0,0(r4) ++err2; ld r6,8(r4) ++err2; ld r8,16(r4) ++err2; ld r9,24(r4) ++err2; ld r10,32(r4) ++err2; ld r11,40(r4) ++err2; ld r12,48(r4) ++err2; ld r14,56(r4) ++err2; ld r15,64(r4) ++err2; ld r16,72(r4) ++err2; ld r17,80(r4) ++err2; ld r18,88(r4) ++err2; ld r19,96(r4) ++err2; ld r20,104(r4) ++err2; ld r21,112(r4) ++err2; ld r22,120(r4) ++ addi r4,r4,128 ++err2; std r0,0(r3) ++err2; std r6,8(r3) ++err2; std r8,16(r3) ++err2; std r9,24(r3) ++err2; std r10,32(r3) ++err2; std r11,40(r3) ++err2; std r12,48(r3) ++err2; std r14,56(r3) ++err2; std r15,64(r3) ++err2; std r16,72(r3) ++err2; std r17,80(r3) ++err2; std r18,88(r3) ++err2; std r19,96(r3) ++err2; std r20,104(r3) ++err2; std r21,112(r3) ++err2; std r22,120(r3) ++ addi r3,r3,128 ++ subi r7,r7,128 ++ bdnz 4b ++ ++ clrldi r5,r5,(64-7) ++ ++ /* Up to 127B to go */ ++5: srdi r6,r5,4 ++ mtocrf 0x01,r6 ++ ++6: bf cr7*4+1,7f ++err2; ld r0,0(r4) ++err2; ld r6,8(r4) ++err2; ld r8,16(r4) ++err2; ld r9,24(r4) ++err2; ld r10,32(r4) ++err2; ld r11,40(r4) ++err2; ld r12,48(r4) ++err2; ld r14,56(r4) ++ addi r4,r4,64 ++err2; std r0,0(r3) ++err2; std r6,8(r3) ++err2; std r8,16(r3) ++err2; std r9,24(r3) ++err2; std r10,32(r3) ++err2; std r11,40(r3) ++err2; std r12,48(r3) ++err2; std r14,56(r3) ++ addi r3,r3,64 ++ subi r7,r7,64 ++ ++7: ld r14,STK_REG(R14)(r1) ++ ld r15,STK_REG(R15)(r1) ++ ld r16,STK_REG(R16)(r1) ++ ld r17,STK_REG(R17)(r1) ++ ld r18,STK_REG(R18)(r1) ++ ld r19,STK_REG(R19)(r1) ++ ld r20,STK_REG(R20)(r1) ++ ld r21,STK_REG(R21)(r1) ++ ld r22,STK_REG(R22)(r1) ++ addi r1,r1,STACKFRAMESIZE ++ ++ /* Up to 63B to go */ ++ bf cr7*4+2,8f ++err1; ld r0,0(r4) ++err1; ld r6,8(r4) ++err1; ld r8,16(r4) ++err1; ld r9,24(r4) ++ addi r4,r4,32 ++err1; std r0,0(r3) ++err1; std r6,8(r3) ++err1; std r8,16(r3) ++err1; std r9,24(r3) ++ addi r3,r3,32 ++ subi r7,r7,32 ++ ++ /* Up to 31B to go */ ++8: bf cr7*4+3,9f ++err1; ld r0,0(r4) ++err1; ld r6,8(r4) ++ addi r4,r4,16 ++err1; std r0,0(r3) ++err1; std r6,8(r3) ++ addi r3,r3,16 ++ subi r7,r7,16 ++ ++9: clrldi r5,r5,(64-4) ++ ++ /* Up to 15B to go */ ++.Lshort_copy: ++ mtocrf 0x01,r5 ++ bf cr7*4+0,12f ++err1; lwz r0,0(r4) /* Less chance of a reject with word ops */ ++err1; lwz r6,4(r4) ++ addi r4,r4,8 ++err1; stw r0,0(r3) ++err1; stw r6,4(r3) ++ addi r3,r3,8 ++ subi r7,r7,8 ++ ++12: bf cr7*4+1,13f ++err1; lwz r0,0(r4) ++ addi r4,r4,4 ++err1; stw r0,0(r3) ++ addi r3,r3,4 ++ subi r7,r7,4 ++ ++13: bf cr7*4+2,14f ++err1; lhz r0,0(r4) ++ addi r4,r4,2 ++err1; sth r0,0(r3) ++ addi r3,r3,2 ++ subi r7,r7,2 ++ ++14: bf cr7*4+3,15f ++err1; lbz r0,0(r4) ++err1; stb r0,0(r3) ++ ++15: li r3,0 ++ blr ++ ++EXPORT_SYMBOL_GPL(copy_mc_generic); |