diff options
Diffstat (limited to '1136_linux-4.14.137.patch')
-rw-r--r-- | 1136_linux-4.14.137.patch | 1988 |
1 files changed, 1988 insertions, 0 deletions
diff --git a/1136_linux-4.14.137.patch b/1136_linux-4.14.137.patch new file mode 100644 index 00000000..eea0bc4c --- /dev/null +++ b/1136_linux-4.14.137.patch @@ -0,0 +1,1988 @@ +diff --git a/Documentation/admin-guide/hw-vuln/spectre.rst b/Documentation/admin-guide/hw-vuln/spectre.rst +index 25f3b2532198..e05e581af5cf 100644 +--- a/Documentation/admin-guide/hw-vuln/spectre.rst ++++ b/Documentation/admin-guide/hw-vuln/spectre.rst +@@ -41,10 +41,11 @@ Related CVEs + + The following CVE entries describe Spectre variants: + +- ============= ======================= ================= ++ ============= ======================= ========================== + CVE-2017-5753 Bounds check bypass Spectre variant 1 + CVE-2017-5715 Branch target injection Spectre variant 2 +- ============= ======================= ================= ++ CVE-2019-1125 Spectre v1 swapgs Spectre variant 1 (swapgs) ++ ============= ======================= ========================== + + Problem + ------- +@@ -78,6 +79,13 @@ There are some extensions of Spectre variant 1 attacks for reading data + over the network, see :ref:`[12] <spec_ref12>`. However such attacks + are difficult, low bandwidth, fragile, and are considered low risk. + ++Note that, despite "Bounds Check Bypass" name, Spectre variant 1 is not ++only about user-controlled array bounds checks. It can affect any ++conditional checks. The kernel entry code interrupt, exception, and NMI ++handlers all have conditional swapgs checks. Those may be problematic ++in the context of Spectre v1, as kernel code can speculatively run with ++a user GS. ++ + Spectre variant 2 (Branch Target Injection) + ------------------------------------------- + +@@ -132,6 +140,9 @@ not cover all possible attack vectors. + 1. A user process attacking the kernel + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + ++Spectre variant 1 ++~~~~~~~~~~~~~~~~~ ++ + The attacker passes a parameter to the kernel via a register or + via a known address in memory during a syscall. Such parameter may + be used later by the kernel as an index to an array or to derive +@@ -144,7 +155,40 @@ not cover all possible attack vectors. + potentially be influenced for Spectre attacks, new "nospec" accessor + macros are used to prevent speculative loading of data. + +- Spectre variant 2 attacker can :ref:`poison <poison_btb>` the branch ++Spectre variant 1 (swapgs) ++~~~~~~~~~~~~~~~~~~~~~~~~~~ ++ ++ An attacker can train the branch predictor to speculatively skip the ++ swapgs path for an interrupt or exception. If they initialize ++ the GS register to a user-space value, if the swapgs is speculatively ++ skipped, subsequent GS-related percpu accesses in the speculation ++ window will be done with the attacker-controlled GS value. This ++ could cause privileged memory to be accessed and leaked. ++ ++ For example: ++ ++ :: ++ ++ if (coming from user space) ++ swapgs ++ mov %gs:<percpu_offset>, %reg ++ mov (%reg), %reg1 ++ ++ When coming from user space, the CPU can speculatively skip the ++ swapgs, and then do a speculative percpu load using the user GS ++ value. So the user can speculatively force a read of any kernel ++ value. If a gadget exists which uses the percpu value as an address ++ in another load/store, then the contents of the kernel value may ++ become visible via an L1 side channel attack. ++ ++ A similar attack exists when coming from kernel space. The CPU can ++ speculatively do the swapgs, causing the user GS to get used for the ++ rest of the speculative window. ++ ++Spectre variant 2 ++~~~~~~~~~~~~~~~~~ ++ ++ A spectre variant 2 attacker can :ref:`poison <poison_btb>` the branch + target buffer (BTB) before issuing syscall to launch an attack. + After entering the kernel, the kernel could use the poisoned branch + target buffer on indirect jump and jump to gadget code in speculative +@@ -280,11 +324,18 @@ The sysfs file showing Spectre variant 1 mitigation status is: + + The possible values in this file are: + +- ======================================= ================================= +- 'Mitigation: __user pointer sanitation' Protection in kernel on a case by +- case base with explicit pointer +- sanitation. +- ======================================= ================================= ++ .. list-table:: ++ ++ * - 'Not affected' ++ - The processor is not vulnerable. ++ * - 'Vulnerable: __user pointer sanitization and usercopy barriers only; no swapgs barriers' ++ - The swapgs protections are disabled; otherwise it has ++ protection in the kernel on a case by case base with explicit ++ pointer sanitation and usercopy LFENCE barriers. ++ * - 'Mitigation: usercopy/swapgs barriers and __user pointer sanitization' ++ - Protection in the kernel on a case by case base with explicit ++ pointer sanitation, usercopy LFENCE barriers, and swapgs LFENCE ++ barriers. + + However, the protections are put in place on a case by case basis, + and there is no guarantee that all possible attack vectors for Spectre +@@ -366,12 +417,27 @@ Turning on mitigation for Spectre variant 1 and Spectre variant 2 + 1. Kernel mitigation + ^^^^^^^^^^^^^^^^^^^^ + ++Spectre variant 1 ++~~~~~~~~~~~~~~~~~ ++ + For the Spectre variant 1, vulnerable kernel code (as determined + by code audit or scanning tools) is annotated on a case by case + basis to use nospec accessor macros for bounds clipping :ref:`[2] + <spec_ref2>` to avoid any usable disclosure gadgets. However, it may + not cover all attack vectors for Spectre variant 1. + ++ Copy-from-user code has an LFENCE barrier to prevent the access_ok() ++ check from being mis-speculated. The barrier is done by the ++ barrier_nospec() macro. ++ ++ For the swapgs variant of Spectre variant 1, LFENCE barriers are ++ added to interrupt, exception and NMI entry where needed. These ++ barriers are done by the FENCE_SWAPGS_KERNEL_ENTRY and ++ FENCE_SWAPGS_USER_ENTRY macros. ++ ++Spectre variant 2 ++~~~~~~~~~~~~~~~~~ ++ + For Spectre variant 2 mitigation, the compiler turns indirect calls or + jumps in the kernel into equivalent return trampolines (retpolines) + :ref:`[3] <spec_ref3>` :ref:`[9] <spec_ref9>` to go to the target +@@ -473,6 +539,12 @@ Mitigation control on the kernel command line + Spectre variant 2 mitigation can be disabled or force enabled at the + kernel command line. + ++ nospectre_v1 ++ ++ [X86,PPC] Disable mitigations for Spectre Variant 1 ++ (bounds check bypass). With this option data leaks are ++ possible in the system. ++ + nospectre_v2 + + [X86] Disable all mitigations for the Spectre variant 2 +diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt +index 9240b2caa0b1..13d80111bc1f 100644 +--- a/Documentation/admin-guide/kernel-parameters.txt ++++ b/Documentation/admin-guide/kernel-parameters.txt +@@ -2401,6 +2401,7 @@ + Equivalent to: nopti [X86,PPC] + nospectre_v1 [PPC] + nobp=0 [S390] ++ nospectre_v1 [X86] + nospectre_v2 [X86,PPC,S390] + spectre_v2_user=off [X86] + spec_store_bypass_disable=off [X86,PPC] +@@ -2740,9 +2741,9 @@ + nosmt=force: Force disable SMT, cannot be undone + via the sysfs control file. + +- nospectre_v1 [PPC] Disable mitigations for Spectre Variant 1 (bounds +- check bypass). With this option data leaks are possible +- in the system. ++ nospectre_v1 [X66, PPC] Disable mitigations for Spectre Variant 1 ++ (bounds check bypass). With this option data leaks ++ are possible in the system. + + nospectre_v2 [X86,PPC_FSL_BOOK3E] Disable all mitigations for the Spectre variant 2 + (indirect branch prediction) vulnerability. System may +diff --git a/Makefile b/Makefile +index a798f4777ae2..ff604059b6a8 100644 +--- a/Makefile ++++ b/Makefile +@@ -1,7 +1,7 @@ + # SPDX-License-Identifier: GPL-2.0 + VERSION = 4 + PATCHLEVEL = 14 +-SUBLEVEL = 136 ++SUBLEVEL = 137 + EXTRAVERSION = + NAME = Petit Gorille + +@@ -427,6 +427,7 @@ KBUILD_AFLAGS_MODULE := -DMODULE + KBUILD_CFLAGS_MODULE := -DMODULE + KBUILD_LDFLAGS_MODULE := -T $(srctree)/scripts/module-common.lds + GCC_PLUGINS_CFLAGS := ++CLANG_FLAGS := + + export ARCH SRCARCH CONFIG_SHELL HOSTCC HOSTCFLAGS CROSS_COMPILE AS LD CC + export CPP AR NM STRIP OBJCOPY OBJDUMP HOSTLDFLAGS HOST_LOADLIBES +@@ -479,7 +480,7 @@ endif + + ifeq ($(cc-name),clang) + ifneq ($(CROSS_COMPILE),) +-CLANG_FLAGS := --target=$(notdir $(CROSS_COMPILE:%-=%)) ++CLANG_FLAGS += --target=$(notdir $(CROSS_COMPILE:%-=%)) + GCC_TOOLCHAIN_DIR := $(dir $(shell which $(CROSS_COMPILE)elfedit)) + CLANG_FLAGS += --prefix=$(GCC_TOOLCHAIN_DIR) + GCC_TOOLCHAIN := $(realpath $(GCC_TOOLCHAIN_DIR)/..) +diff --git a/arch/arm/boot/dts/rk3288-veyron-mickey.dts b/arch/arm/boot/dts/rk3288-veyron-mickey.dts +index f0994f0e5774..d6ca67866bc0 100644 +--- a/arch/arm/boot/dts/rk3288-veyron-mickey.dts ++++ b/arch/arm/boot/dts/rk3288-veyron-mickey.dts +@@ -161,10 +161,6 @@ + }; + }; + +-&emmc { +- /delete-property/mmc-hs200-1_8v; +-}; +- + &i2c2 { + status = "disabled"; + }; +diff --git a/arch/arm/boot/dts/rk3288-veyron-minnie.dts b/arch/arm/boot/dts/rk3288-veyron-minnie.dts +index 544de6027aaa..6000dca1cf05 100644 +--- a/arch/arm/boot/dts/rk3288-veyron-minnie.dts ++++ b/arch/arm/boot/dts/rk3288-veyron-minnie.dts +@@ -125,10 +125,6 @@ + power-supply = <&backlight_regulator>; + }; + +-&emmc { +- /delete-property/mmc-hs200-1_8v; +-}; +- + &gpio_keys { + pinctrl-0 = <&pwr_key_l &ap_lid_int_l &volum_down_l &volum_up_l>; + +diff --git a/arch/arm/boot/dts/rk3288.dtsi b/arch/arm/boot/dts/rk3288.dtsi +index 5a7888581eea..23907d9ce89a 100644 +--- a/arch/arm/boot/dts/rk3288.dtsi ++++ b/arch/arm/boot/dts/rk3288.dtsi +@@ -213,6 +213,7 @@ + <GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_HIGH)>, + <GIC_PPI 10 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_HIGH)>; + clock-frequency = <24000000>; ++ arm,no-tick-in-suspend; + }; + + timer: timer@ff810000 { +diff --git a/arch/arm/mach-rpc/dma.c b/arch/arm/mach-rpc/dma.c +index fb48f3141fb4..c4c96661eb89 100644 +--- a/arch/arm/mach-rpc/dma.c ++++ b/arch/arm/mach-rpc/dma.c +@@ -131,7 +131,7 @@ static irqreturn_t iomd_dma_handle(int irq, void *dev_id) + } while (1); + + idma->state = ~DMA_ST_AB; +- disable_irq(irq); ++ disable_irq_nosync(irq); + + return IRQ_HANDLED; + } +@@ -174,6 +174,9 @@ static void iomd_enable_dma(unsigned int chan, dma_t *dma) + DMA_FROM_DEVICE : DMA_TO_DEVICE); + } + ++ idma->dma_addr = idma->dma.sg->dma_address; ++ idma->dma_len = idma->dma.sg->length; ++ + iomd_writeb(DMA_CR_C, dma_base + CR); + idma->state = DMA_ST_AB; + } +diff --git a/arch/mips/lantiq/irq.c b/arch/mips/lantiq/irq.c +index c4ef1c31e0c4..37caeadb2964 100644 +--- a/arch/mips/lantiq/irq.c ++++ b/arch/mips/lantiq/irq.c +@@ -156,8 +156,9 @@ static int ltq_eiu_settype(struct irq_data *d, unsigned int type) + if (edge) + irq_set_handler(d->hwirq, handle_edge_irq); + +- ltq_eiu_w32(ltq_eiu_r32(LTQ_EIU_EXIN_C) | +- (val << (i * 4)), LTQ_EIU_EXIN_C); ++ ltq_eiu_w32((ltq_eiu_r32(LTQ_EIU_EXIN_C) & ++ (~(7 << (i * 4)))) | (val << (i * 4)), ++ LTQ_EIU_EXIN_C); + } + } + +diff --git a/arch/parisc/boot/compressed/vmlinux.lds.S b/arch/parisc/boot/compressed/vmlinux.lds.S +index a4ce3314e78e..b658f77d6369 100644 +--- a/arch/parisc/boot/compressed/vmlinux.lds.S ++++ b/arch/parisc/boot/compressed/vmlinux.lds.S +@@ -40,8 +40,8 @@ SECTIONS + #endif + _startcode_end = .; + +- /* bootloader code and data starts behind area of extracted kernel */ +- . = (SZ_end - SZparisc_kernel_start + KERNEL_BINARY_TEXT_START); ++ /* bootloader code and data starts at least behind area of extracted kernel */ ++ . = MAX(ABSOLUTE(.), (SZ_end - SZparisc_kernel_start + KERNEL_BINARY_TEXT_START)); + + /* align on next page boundary */ + . = ALIGN(4096); +diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c +index 252fee320816..fb07cfa3f2f9 100644 +--- a/arch/x86/boot/compressed/misc.c ++++ b/arch/x86/boot/compressed/misc.c +@@ -16,6 +16,7 @@ + #include "error.h" + #include "../string.h" + #include "../voffset.h" ++#include <asm/bootparam_utils.h> + + /* + * WARNING!! +diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h +index 32d4ec2e0243..5380d45b1c6e 100644 +--- a/arch/x86/boot/compressed/misc.h ++++ b/arch/x86/boot/compressed/misc.h +@@ -19,7 +19,6 @@ + #include <asm/page.h> + #include <asm/boot.h> + #include <asm/bootparam.h> +-#include <asm/bootparam_utils.h> + + #define BOOT_BOOT_H + #include "../ctype.h" +diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h +index 5d10b7a85cad..557c1bdda311 100644 +--- a/arch/x86/entry/calling.h ++++ b/arch/x86/entry/calling.h +@@ -332,6 +332,23 @@ For 32-bit we have the following conventions - kernel is built with + + #endif + ++/* ++ * Mitigate Spectre v1 for conditional swapgs code paths. ++ * ++ * FENCE_SWAPGS_USER_ENTRY is used in the user entry swapgs code path, to ++ * prevent a speculative swapgs when coming from kernel space. ++ * ++ * FENCE_SWAPGS_KERNEL_ENTRY is used in the kernel entry non-swapgs code path, ++ * to prevent the swapgs from getting speculatively skipped when coming from ++ * user space. ++ */ ++.macro FENCE_SWAPGS_USER_ENTRY ++ ALTERNATIVE "", "lfence", X86_FEATURE_FENCE_SWAPGS_USER ++.endm ++.macro FENCE_SWAPGS_KERNEL_ENTRY ++ ALTERNATIVE "", "lfence", X86_FEATURE_FENCE_SWAPGS_KERNEL ++.endm ++ + #endif /* CONFIG_X86_64 */ + + /* +diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S +index e09ba4bc8b98..5ec66fafde4e 100644 +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -531,9 +531,12 @@ END(irq_entries_start) + testb $3, CS-ORIG_RAX(%rsp) + jz 1f + SWAPGS ++ FENCE_SWAPGS_USER_ENTRY + call switch_to_thread_stack ++ jmp 2f + 1: +- ++ FENCE_SWAPGS_KERNEL_ENTRY ++2: + PUSH_AND_CLEAR_REGS + ENCODE_FRAME_POINTER + +@@ -1113,7 +1116,6 @@ idtentry stack_segment do_stack_segment has_error_code=1 + #ifdef CONFIG_XEN + idtentry xennmi do_nmi has_error_code=0 + idtentry xendebug do_debug has_error_code=0 +-idtentry xenint3 do_int3 has_error_code=0 + #endif + + idtentry general_protection do_general_protection has_error_code=1 +@@ -1147,6 +1149,12 @@ ENTRY(paranoid_entry) + + 1: + SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14 ++ /* ++ * The above SAVE_AND_SWITCH_TO_KERNEL_CR3 macro doesn't do an ++ * unconditional CR3 write, even in the PTI case. So do an lfence ++ * to prevent GS speculation, regardless of whether PTI is enabled. ++ */ ++ FENCE_SWAPGS_KERNEL_ENTRY + + ret + END(paranoid_entry) +@@ -1196,6 +1204,7 @@ ENTRY(error_entry) + * from user mode due to an IRET fault. + */ + SWAPGS ++ FENCE_SWAPGS_USER_ENTRY + /* We have user CR3. Change to kernel CR3. */ + SWITCH_TO_KERNEL_CR3 scratch_reg=%rax + +@@ -1217,6 +1226,8 @@ ENTRY(error_entry) + CALL_enter_from_user_mode + ret + ++.Lerror_entry_done_lfence: ++ FENCE_SWAPGS_KERNEL_ENTRY + .Lerror_entry_done: + TRACE_IRQS_OFF + ret +@@ -1235,7 +1246,7 @@ ENTRY(error_entry) + cmpq %rax, RIP+8(%rsp) + je .Lbstep_iret + cmpq $.Lgs_change, RIP+8(%rsp) +- jne .Lerror_entry_done ++ jne .Lerror_entry_done_lfence + + /* + * hack: .Lgs_change can fail with user gsbase. If this happens, fix up +@@ -1243,6 +1254,7 @@ ENTRY(error_entry) + * .Lgs_change's error handler with kernel gsbase. + */ + SWAPGS ++ FENCE_SWAPGS_USER_ENTRY + SWITCH_TO_KERNEL_CR3 scratch_reg=%rax + jmp .Lerror_entry_done + +@@ -1257,6 +1269,7 @@ ENTRY(error_entry) + * gsbase and CR3. Switch to kernel gsbase and CR3: + */ + SWAPGS ++ FENCE_SWAPGS_USER_ENTRY + SWITCH_TO_KERNEL_CR3 scratch_reg=%rax + + /* +@@ -1348,6 +1361,7 @@ ENTRY(nmi) + + swapgs + cld ++ FENCE_SWAPGS_USER_ENTRY + SWITCH_TO_KERNEL_CR3 scratch_reg=%rdx + movq %rsp, %rdx + movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp +diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c +index 9c35dc0a9d64..9f4b1081dee0 100644 +--- a/arch/x86/entry/vdso/vclock_gettime.c ++++ b/arch/x86/entry/vdso/vclock_gettime.c +@@ -29,12 +29,12 @@ extern int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz); + extern time_t __vdso_time(time_t *t); + + #ifdef CONFIG_PARAVIRT_CLOCK +-extern u8 pvclock_page ++extern u8 pvclock_page[PAGE_SIZE] + __attribute__((visibility("hidden"))); + #endif + + #ifdef CONFIG_HYPERV_TSCPAGE +-extern u8 hvclock_page ++extern u8 hvclock_page[PAGE_SIZE] + __attribute__((visibility("hidden"))); + #endif + +@@ -191,13 +191,24 @@ notrace static inline u64 vgetsns(int *mode) + + if (gtod->vclock_mode == VCLOCK_TSC) + cycles = vread_tsc(); ++ ++ /* ++ * For any memory-mapped vclock type, we need to make sure that gcc ++ * doesn't cleverly hoist a load before the mode check. Otherwise we ++ * might end up touching the memory-mapped page even if the vclock in ++ * question isn't enabled, which will segfault. Hence the barriers. ++ */ + #ifdef CONFIG_PARAVIRT_CLOCK +- else if (gtod->vclock_mode == VCLOCK_PVCLOCK) ++ else if (gtod->vclock_mode == VCLOCK_PVCLOCK) { ++ barrier(); + cycles = vread_pvclock(mode); ++ } + #endif + #ifdef CONFIG_HYPERV_TSCPAGE +- else if (gtod->vclock_mode == VCLOCK_HVCLOCK) ++ else if (gtod->vclock_mode == VCLOCK_HVCLOCK) { ++ barrier(); + cycles = vread_hvclock(mode); ++ } + #endif + else + return 0; +diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h +index a1ed92aae12a..25a5a5c6ae90 100644 +--- a/arch/x86/include/asm/apic.h ++++ b/arch/x86/include/asm/apic.h +@@ -48,7 +48,7 @@ static inline void generic_apic_probe(void) + + #ifdef CONFIG_X86_LOCAL_APIC + +-extern unsigned int apic_verbosity; ++extern int apic_verbosity; + extern int local_apic_timer_c2_ok; + + extern int disable_apic; +diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h +index 70eddb3922ff..4e2d03135854 100644 +--- a/arch/x86/include/asm/cpufeature.h ++++ b/arch/x86/include/asm/cpufeature.h +@@ -22,8 +22,8 @@ enum cpuid_leafs + CPUID_LNX_3, + CPUID_7_0_EBX, + CPUID_D_1_EAX, +- CPUID_F_0_EDX, +- CPUID_F_1_EDX, ++ CPUID_LNX_4, ++ CPUID_DUMMY, + CPUID_8000_0008_EBX, + CPUID_6_EAX, + CPUID_8000_000A_EDX, +diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h +index 4cb8315c521f..14357354cd28 100644 +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -271,13 +271,18 @@ + #define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 instruction */ + #define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS instructions */ + +-/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (EDX), word 11 */ +-#define X86_FEATURE_CQM_LLC (11*32+ 1) /* LLC QoS if 1 */ +- +-/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (EDX), word 12 */ +-#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring */ +-#define X86_FEATURE_CQM_MBM_TOTAL (12*32+ 1) /* LLC Total MBM monitoring */ +-#define X86_FEATURE_CQM_MBM_LOCAL (12*32+ 2) /* LLC Local MBM monitoring */ ++/* ++ * Extended auxiliary flags: Linux defined - for features scattered in various ++ * CPUID levels like 0xf, etc. ++ * ++ * Reuse free bits when adding new feature flags! ++ */ ++#define X86_FEATURE_CQM_LLC (11*32+ 0) /* LLC QoS if 1 */ ++#define X86_FEATURE_CQM_OCCUP_LLC (11*32+ 1) /* LLC occupancy monitoring */ ++#define X86_FEATURE_CQM_MBM_TOTAL (11*32+ 2) /* LLC Total MBM monitoring */ ++#define X86_FEATURE_CQM_MBM_LOCAL (11*32+ 3) /* LLC Local MBM monitoring */ ++#define X86_FEATURE_FENCE_SWAPGS_USER (11*32+ 4) /* "" LFENCE in user entry SWAPGS path */ ++#define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */ + + /* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */ + #define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */ +@@ -382,5 +387,6 @@ + #define X86_BUG_L1TF X86_BUG(18) /* CPU is affected by L1 Terminal Fault */ + #define X86_BUG_MDS X86_BUG(19) /* CPU is affected by Microarchitectural data sampling */ + #define X86_BUG_MSBDS_ONLY X86_BUG(20) /* CPU is only affected by the MSDBS variant of BUG_MDS */ ++#define X86_BUG_SWAPGS X86_BUG(21) /* CPU is affected by speculation through SWAPGS */ + + #endif /* _ASM_X86_CPUFEATURES_H */ +diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h +index f9a4b85d7309..9f3eb334c818 100644 +--- a/arch/x86/include/asm/kvm_host.h ++++ b/arch/x86/include/asm/kvm_host.h +@@ -1353,25 +1353,29 @@ enum { + #define kvm_arch_vcpu_memslots_id(vcpu) ((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0) + #define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, (role).smm) + ++asmlinkage void __noreturn kvm_spurious_fault(void); ++ + /* + * Hardware virtualization extension instructions may fault if a + * reboot turns off virtualization while processes are running. +- * Trap the fault and ignore the instruction if that happens. ++ * Usually after catching the fault we just panic; during reboot ++ * instead the instruction is ignored. + */ +-asmlinkage void kvm_spurious_fault(void); +- +-#define ____kvm_handle_fault_on_reboot(insn, cleanup_insn) \ +- "666: " insn "\n\t" \ +- "668: \n\t" \ +- ".pushsection .fixup, \"ax\" \n" \ +- "667: \n\t" \ +- cleanup_insn "\n\t" \ +- "cmpb $0, kvm_rebooting \n\t" \ +- "jne 668b \n\t" \ +- __ASM_SIZE(push) " $666b \n\t" \ +- "jmp kvm_spurious_fault \n\t" \ +- ".popsection \n\t" \ +- _ASM_EXTABLE(666b, 667b) ++#define ____kvm_handle_fault_on_reboot(insn, cleanup_insn) \ ++ "666: \n\t" \ ++ insn "\n\t" \ ++ "jmp 668f \n\t" \ ++ "667: \n\t" \ ++ "call kvm_spurious_fault \n\t" \ ++ "668: \n\t" \ ++ ".pushsection .fixup, \"ax\" \n\t" \ ++ "700: \n\t" \ ++ cleanup_insn "\n\t" \ ++ "cmpb $0, kvm_rebooting\n\t" \ ++ "je 667b \n\t" \ ++ "jmp 668b \n\t" \ ++ ".popsection \n\t" \ ++ _ASM_EXTABLE(666b, 700b) + + #define __kvm_handle_fault_on_reboot(insn) \ + ____kvm_handle_fault_on_reboot(insn, "") +diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h +index c83a2f418cea..4471f0da6ed7 100644 +--- a/arch/x86/include/asm/paravirt.h ++++ b/arch/x86/include/asm/paravirt.h +@@ -758,6 +758,7 @@ static __always_inline bool pv_vcpu_is_preempted(long cpu) + PV_RESTORE_ALL_CALLER_REGS \ + FRAME_END \ + "ret;" \ ++ ".size " PV_THUNK_NAME(func) ", .-" PV_THUNK_NAME(func) ";" \ + ".popsection") + + /* Get a reference to a callee-save function */ +diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h +index afbc87206886..b771bb3d159b 100644 +--- a/arch/x86/include/asm/traps.h ++++ b/arch/x86/include/asm/traps.h +@@ -40,7 +40,7 @@ asmlinkage void simd_coprocessor_error(void); + asmlinkage void xen_divide_error(void); + asmlinkage void xen_xennmi(void); + asmlinkage void xen_xendebug(void); +-asmlinkage void xen_xenint3(void); ++asmlinkage void xen_int3(void); + asmlinkage void xen_overflow(void); + asmlinkage void xen_bounds(void); + asmlinkage void xen_invalid_op(void); +diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c +index 2e64178f284d..ae410f7585f1 100644 +--- a/arch/x86/kernel/apic/apic.c ++++ b/arch/x86/kernel/apic/apic.c +@@ -182,7 +182,7 @@ EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); + /* + * Debug level, exported for io_apic.c + */ +-unsigned int apic_verbosity; ++int apic_verbosity; + + int pic_mode; + +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index 64b6180ce162..8bf21bc7a190 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -32,6 +32,7 @@ + #include <asm/intel-family.h> + #include <asm/e820/api.h> + ++static void __init spectre_v1_select_mitigation(void); + static void __init spectre_v2_select_mitigation(void); + static void __init ssb_select_mitigation(void); + static void __init l1tf_select_mitigation(void); +@@ -96,17 +97,11 @@ void __init check_bugs(void) + if (boot_cpu_has(X86_FEATURE_STIBP)) + x86_spec_ctrl_mask |= SPEC_CTRL_STIBP; + +- /* Select the proper spectre mitigation before patching alternatives */ ++ /* Select the proper CPU mitigations before patching alternatives: */ ++ spectre_v1_select_mitigation(); + spectre_v2_select_mitigation(); +- +- /* +- * Select proper mitigation for any exposure to the Speculative Store +- * Bypass vulnerability. +- */ + ssb_select_mitigation(); +- + l1tf_select_mitigation(); +- + mds_select_mitigation(); + + arch_smt_update(); +@@ -271,6 +266,98 @@ static int __init mds_cmdline(char *str) + } + early_param("mds", mds_cmdline); + ++#undef pr_fmt ++#define pr_fmt(fmt) "Spectre V1 : " fmt ++ ++enum spectre_v1_mitigation { ++ SPECTRE_V1_MITIGATION_NONE, ++ SPECTRE_V1_MITIGATION_AUTO, ++}; ++ ++static enum spectre_v1_mitigation spectre_v1_mitigation __ro_after_init = ++ SPECTRE_V1_MITIGATION_AUTO; ++ ++static const char * const spectre_v1_strings[] = { ++ [SPECTRE_V1_MITIGATION_NONE] = "Vulnerable: __user pointer sanitization and usercopy barriers only; no swapgs barriers", ++ [SPECTRE_V1_MITIGATION_AUTO] = "Mitigation: usercopy/swapgs barriers and __user pointer sanitization", ++}; ++ ++/* ++ * Does SMAP provide full mitigation against speculative kernel access to ++ * userspace? ++ */ ++static bool smap_works_speculatively(void) ++{ ++ if (!boot_cpu_has(X86_FEATURE_SMAP)) ++ return false; ++ ++ /* ++ * On CPUs which are vulnerable to Meltdown, SMAP does not ++ * prevent speculative access to user data in the L1 cache. ++ * Consider SMAP to be non-functional as a mitigation on these ++ * CPUs. ++ */ ++ if (boot_cpu_has(X86_BUG_CPU_MELTDOWN)) ++ return false; ++ ++ return true; ++} ++ ++static void __init spectre_v1_select_mitigation(void) ++{ ++ if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1) || cpu_mitigations_off()) { ++ spectre_v1_mitigation = SPECTRE_V1_MITIGATION_NONE; ++ return; ++ } ++ ++ if (spectre_v1_mitigation == SPECTRE_V1_MITIGATION_AUTO) { ++ /* ++ * With Spectre v1, a user can speculatively control either ++ * path of a conditional swapgs with a user-controlled GS ++ * value. The mitigation is to add lfences to both code paths. ++ * ++ * If FSGSBASE is enabled, the user can put a kernel address in ++ * GS, in which case SMAP provides no protection. ++ * ++ * [ NOTE: Don't check for X86_FEATURE_FSGSBASE until the ++ * FSGSBASE enablement patches have been merged. ] ++ * ++ * If FSGSBASE is disabled, the user can only put a user space ++ * address in GS. That makes an attack harder, but still ++ * possible if there's no SMAP protection. ++ */ ++ if (!smap_works_speculatively()) { ++ /* ++ * Mitigation can be provided from SWAPGS itself or ++ * PTI as the CR3 write in the Meltdown mitigation ++ * is serializing. ++ * ++ * If neither is there, mitigate with an LFENCE to ++ * stop speculation through swapgs. ++ */ ++ if (boot_cpu_has_bug(X86_BUG_SWAPGS) && ++ !boot_cpu_has(X86_FEATURE_PTI)) ++ setup_force_cpu_cap(X86_FEATURE_FENCE_SWAPGS_USER); ++ ++ /* ++ * Enable lfences in the kernel entry (non-swapgs) ++ * paths, to prevent user entry from speculatively ++ * skipping swapgs. ++ */ ++ setup_force_cpu_cap(X86_FEATURE_FENCE_SWAPGS_KERNEL); ++ } ++ } ++ ++ pr_info("%s\n", spectre_v1_strings[spectre_v1_mitigation]); ++} ++ ++static int __init nospectre_v1_cmdline(char *str) ++{ ++ spectre_v1_mitigation = SPECTRE_V1_MITIGATION_NONE; ++ return 0; ++} ++early_param("nospectre_v1", nospectre_v1_cmdline); ++ + #undef pr_fmt + #define pr_fmt(fmt) "Spectre V2 : " fmt + +@@ -1255,7 +1342,7 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr + break; + + case X86_BUG_SPECTRE_V1: +- return sprintf(buf, "Mitigation: __user pointer sanitization\n"); ++ return sprintf(buf, "%s\n", spectre_v1_strings[spectre_v1_mitigation]); + + case X86_BUG_SPECTRE_V2: + return sprintf(buf, "%s%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], +diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c +index ebe547b1ffce..551c6bed7c8c 100644 +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -768,6 +768,30 @@ static void init_speculation_control(struct cpuinfo_x86 *c) + } + } + ++static void init_cqm(struct cpuinfo_x86 *c) ++{ ++ if (!cpu_has(c, X86_FEATURE_CQM_LLC)) { ++ c->x86_cache_max_rmid = -1; ++ c->x86_cache_occ_scale = -1; ++ return; ++ } ++ ++ /* will be overridden if occupancy monitoring exists */ ++ c->x86_cache_max_rmid = cpuid_ebx(0xf); ++ ++ if (cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC) || ++ cpu_has(c, X86_FEATURE_CQM_MBM_TOTAL) || ++ cpu_has(c, X86_FEATURE_CQM_MBM_LOCAL)) { ++ u32 eax, ebx, ecx, edx; ++ ++ /* QoS sub-leaf, EAX=0Fh, ECX=1 */ ++ cpuid_count(0xf, 1, &eax, &ebx, &ecx, &edx); ++ ++ c->x86_cache_max_rmid = ecx; ++ c->x86_cache_occ_scale = ebx; ++ } ++} ++ + void get_cpu_cap(struct cpuinfo_x86 *c) + { + u32 eax, ebx, ecx, edx; +@@ -799,33 +823,6 @@ void get_cpu_cap(struct cpuinfo_x86 *c) + c->x86_capability[CPUID_D_1_EAX] = eax; + } + +- /* Additional Intel-defined flags: level 0x0000000F */ +- if (c->cpuid_level >= 0x0000000F) { +- +- /* QoS sub-leaf, EAX=0Fh, ECX=0 */ +- cpuid_count(0x0000000F, 0, &eax, &ebx, &ecx, &edx); +- c->x86_capability[CPUID_F_0_EDX] = edx; +- +- if (cpu_has(c, X86_FEATURE_CQM_LLC)) { +- /* will be overridden if occupancy monitoring exists */ +- c->x86_cache_max_rmid = ebx; +- +- /* QoS sub-leaf, EAX=0Fh, ECX=1 */ +- cpuid_count(0x0000000F, 1, &eax, &ebx, &ecx, &edx); +- c->x86_capability[CPUID_F_1_EDX] = edx; +- +- if ((cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC)) || +- ((cpu_has(c, X86_FEATURE_CQM_MBM_TOTAL)) || +- (cpu_has(c, X86_FEATURE_CQM_MBM_LOCAL)))) { +- c->x86_cache_max_rmid = ecx; +- c->x86_cache_occ_scale = ebx; +- } +- } else { +- c->x86_cache_max_rmid = -1; +- c->x86_cache_occ_scale = -1; +- } +- } +- + /* AMD-defined flags: level 0x80000001 */ + eax = cpuid_eax(0x80000000); + c->extended_cpuid_level = eax; +@@ -863,6 +860,7 @@ void get_cpu_cap(struct cpuinfo_x86 *c) + + init_scattered_cpuid_features(c); + init_speculation_control(c); ++ init_cqm(c); + + /* + * Clear/Set all flags overridden by options, after probe. +@@ -905,6 +903,7 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) + #define NO_L1TF BIT(3) + #define NO_MDS BIT(4) + #define MSBDS_ONLY BIT(5) ++#define NO_SWAPGS BIT(6) + + #define VULNWL(_vendor, _family, _model, _whitelist) \ + { X86_VENDOR_##_vendor, _family, _model, X86_FEATURE_ANY, _whitelist } +@@ -928,29 +927,37 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { + VULNWL_INTEL(ATOM_BONNELL, NO_SPECULATION), + VULNWL_INTEL(ATOM_BONNELL_MID, NO_SPECULATION), + +- VULNWL_INTEL(ATOM_SILVERMONT, NO_SSB | NO_L1TF | MSBDS_ONLY), +- VULNWL_INTEL(ATOM_SILVERMONT_X, NO_SSB | NO_L1TF | MSBDS_ONLY), +- VULNWL_INTEL(ATOM_SILVERMONT_MID, NO_SSB | NO_L1TF | MSBDS_ONLY), +- VULNWL_INTEL(ATOM_AIRMONT, NO_SSB | NO_L1TF | MSBDS_ONLY), +- VULNWL_INTEL(XEON_PHI_KNL, NO_SSB | NO_L1TF | MSBDS_ONLY), +- VULNWL_INTEL(XEON_PHI_KNM, NO_SSB | NO_L1TF | MSBDS_ONLY), ++ VULNWL_INTEL(ATOM_SILVERMONT, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS), ++ VULNWL_INTEL(ATOM_SILVERMONT_X, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS), ++ VULNWL_INTEL(ATOM_SILVERMONT_MID, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS), ++ VULNWL_INTEL(ATOM_AIRMONT, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS), ++ VULNWL_INTEL(XEON_PHI_KNL, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS), ++ VULNWL_INTEL(XEON_PHI_KNM, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS), + + VULNWL_INTEL(CORE_YONAH, NO_SSB), + +- VULNWL_INTEL(ATOM_AIRMONT_MID, NO_L1TF | MSBDS_ONLY), ++ VULNWL_INTEL(ATOM_AIRMONT_MID, NO_L1TF | MSBDS_ONLY | NO_SWAPGS), + +- VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF), +- VULNWL_INTEL(ATOM_GOLDMONT_X, NO_MDS | NO_L1TF), +- VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF), ++ VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS), ++ VULNWL_INTEL(ATOM_GOLDMONT_X, NO_MDS | NO_L1TF | NO_SWAPGS), ++ VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS), ++ ++ /* ++ * Technically, swapgs isn't serializing on AMD (despite it previously ++ * being documented as such in the APM). But according to AMD, %gs is ++ * updated non-speculatively, and the issuing of %gs-relative memory ++ * operands will be blocked until the %gs update completes, which is ++ * good enough for our purposes. ++ */ + + /* AMD Family 0xf - 0x12 */ +- VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS), +- VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS), +- VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS), +- VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS), ++ VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS), ++ VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS), ++ VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS), ++ VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS), + + /* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */ +- VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS), ++ VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS), + {} + }; + +@@ -987,6 +994,9 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) + setup_force_cpu_bug(X86_BUG_MSBDS_ONLY); + } + ++ if (!cpu_matches(NO_SWAPGS)) ++ setup_force_cpu_bug(X86_BUG_SWAPGS); ++ + if (cpu_matches(NO_MELTDOWN)) + return; + +diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c +index 904b0a3c4e53..4c9fc6a4d1ea 100644 +--- a/arch/x86/kernel/cpu/cpuid-deps.c ++++ b/arch/x86/kernel/cpu/cpuid-deps.c +@@ -59,6 +59,9 @@ const static struct cpuid_dep cpuid_deps[] = { + { X86_FEATURE_AVX512_4VNNIW, X86_FEATURE_AVX512F }, + { X86_FEATURE_AVX512_4FMAPS, X86_FEATURE_AVX512F }, + { X86_FEATURE_AVX512_VPOPCNTDQ, X86_FEATURE_AVX512F }, ++ { X86_FEATURE_CQM_OCCUP_LLC, X86_FEATURE_CQM_LLC }, ++ { X86_FEATURE_CQM_MBM_TOTAL, X86_FEATURE_CQM_LLC }, ++ { X86_FEATURE_CQM_MBM_LOCAL, X86_FEATURE_CQM_LLC }, + {} + }; + +diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c +index df11f5d604be..ed7ce5184a77 100644 +--- a/arch/x86/kernel/cpu/scattered.c ++++ b/arch/x86/kernel/cpu/scattered.c +@@ -21,6 +21,10 @@ struct cpuid_bit { + static const struct cpuid_bit cpuid_bits[] = { + { X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 }, + { X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 }, ++ { X86_FEATURE_CQM_LLC, CPUID_EDX, 1, 0x0000000f, 0 }, ++ { X86_FEATURE_CQM_OCCUP_LLC, CPUID_EDX, 0, 0x0000000f, 1 }, ++ { X86_FEATURE_CQM_MBM_TOTAL, CPUID_EDX, 1, 0x0000000f, 1 }, ++ { X86_FEATURE_CQM_MBM_LOCAL, CPUID_EDX, 2, 0x0000000f, 1 }, + { X86_FEATURE_CAT_L3, CPUID_EBX, 1, 0x00000010, 0 }, + { X86_FEATURE_CAT_L2, CPUID_EBX, 2, 0x00000010, 0 }, + { X86_FEATURE_CDP_L3, CPUID_ECX, 2, 0x00000010, 1 }, +diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c +index 652bdd867782..5853eb50138e 100644 +--- a/arch/x86/kernel/kvm.c ++++ b/arch/x86/kernel/kvm.c +@@ -631,6 +631,7 @@ asm( + "cmpb $0, " __stringify(KVM_STEAL_TIME_preempted) "+steal_time(%rax);" + "setne %al;" + "ret;" ++".size __raw_callee_save___kvm_vcpu_is_preempted, .-__raw_callee_save___kvm_vcpu_is_preempted;" + ".popsection"); + + #endif +diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h +index 9a327d5b6d1f..d78a61408243 100644 +--- a/arch/x86/kvm/cpuid.h ++++ b/arch/x86/kvm/cpuid.h +@@ -47,8 +47,6 @@ static const struct cpuid_reg reverse_cpuid[] = { + [CPUID_8000_0001_ECX] = {0x80000001, 0, CPUID_ECX}, + [CPUID_7_0_EBX] = { 7, 0, CPUID_EBX}, + [CPUID_D_1_EAX] = { 0xd, 1, CPUID_EAX}, +- [CPUID_F_0_EDX] = { 0xf, 0, CPUID_EDX}, +- [CPUID_F_1_EDX] = { 0xf, 1, CPUID_EDX}, + [CPUID_8000_0008_EBX] = {0x80000008, 0, CPUID_EBX}, + [CPUID_6_EAX] = { 6, 0, CPUID_EAX}, + [CPUID_8000_000A_EDX] = {0x8000000a, 0, CPUID_EDX}, +diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c +index f97b533bc6e6..87a0601b1c20 100644 +--- a/arch/x86/kvm/mmu.c ++++ b/arch/x86/kvm/mmu.c +@@ -4313,11 +4313,11 @@ static void update_permission_bitmask(struct kvm_vcpu *vcpu, + */ + + /* Faults from writes to non-writable pages */ +- u8 wf = (pfec & PFERR_WRITE_MASK) ? ~w : 0; ++ u8 wf = (pfec & PFERR_WRITE_MASK) ? (u8)~w : 0; + /* Faults from user mode accesses to supervisor pages */ +- u8 uf = (pfec & PFERR_USER_MASK) ? ~u : 0; ++ u8 uf = (pfec & PFERR_USER_MASK) ? (u8)~u : 0; + /* Faults from fetches of non-executable pages*/ +- u8 ff = (pfec & PFERR_FETCH_MASK) ? ~x : 0; ++ u8 ff = (pfec & PFERR_FETCH_MASK) ? (u8)~x : 0; + /* Faults from kernel mode fetches of user pages */ + u8 smepf = 0; + /* Faults from kernel mode accesses of user pages */ +diff --git a/arch/x86/math-emu/fpu_emu.h b/arch/x86/math-emu/fpu_emu.h +index a5a41ec58072..0c122226ca56 100644 +--- a/arch/x86/math-emu/fpu_emu.h ++++ b/arch/x86/math-emu/fpu_emu.h +@@ -177,7 +177,7 @@ static inline void reg_copy(FPU_REG const *x, FPU_REG *y) + #define setexponentpos(x,y) { (*(short *)&((x)->exp)) = \ + ((y) + EXTENDED_Ebias) & 0x7fff; } + #define exponent16(x) (*(short *)&((x)->exp)) +-#define setexponent16(x,y) { (*(short *)&((x)->exp)) = (y); } ++#define setexponent16(x,y) { (*(short *)&((x)->exp)) = (u16)(y); } + #define addexponent(x,y) { (*(short *)&((x)->exp)) += (y); } + #define stdexp(x) { (*(short *)&((x)->exp)) += EXTENDED_Ebias; } + +diff --git a/arch/x86/math-emu/reg_constant.c b/arch/x86/math-emu/reg_constant.c +index 8dc9095bab22..742619e94bdf 100644 +--- a/arch/x86/math-emu/reg_constant.c ++++ b/arch/x86/math-emu/reg_constant.c +@@ -18,7 +18,7 @@ + #include "control_w.h" + + #define MAKE_REG(s, e, l, h) { l, h, \ +- ((EXTENDED_Ebias+(e)) | ((SIGN_##s != 0)*0x8000)) } ++ (u16)((EXTENDED_Ebias+(e)) | ((SIGN_##s != 0)*0x8000)) } + + FPU_REG const CONST_1 = MAKE_REG(POS, 0, 0x00000000, 0x80000000); + #if 0 +diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c +index 481d7920ea24..f79a0cdc6b4e 100644 +--- a/arch/x86/xen/enlighten_pv.c ++++ b/arch/x86/xen/enlighten_pv.c +@@ -598,12 +598,12 @@ struct trap_array_entry { + + static struct trap_array_entry trap_array[] = { + { debug, xen_xendebug, true }, +- { int3, xen_xenint3, true }, + { double_fault, xen_double_fault, true }, + #ifdef CONFIG_X86_MCE + { machine_check, xen_machine_check, true }, + #endif + { nmi, xen_xennmi, true }, ++ { int3, xen_int3, false }, + { overflow, xen_overflow, false }, + #ifdef CONFIG_IA32_EMULATION + { entry_INT80_compat, xen_entry_INT80_compat, false }, +diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S +index 417b339e5c8e..3a6feed76dfc 100644 +--- a/arch/x86/xen/xen-asm_64.S ++++ b/arch/x86/xen/xen-asm_64.S +@@ -30,7 +30,6 @@ xen_pv_trap divide_error + xen_pv_trap debug + xen_pv_trap xendebug + xen_pv_trap int3 +-xen_pv_trap xenint3 + xen_pv_trap xennmi + xen_pv_trap overflow + xen_pv_trap bounds +diff --git a/drivers/acpi/blacklist.c b/drivers/acpi/blacklist.c +index 995c4d8922b1..761f0c19a451 100644 +--- a/drivers/acpi/blacklist.c ++++ b/drivers/acpi/blacklist.c +@@ -30,7 +30,9 @@ + + #include "internal.h" + ++#ifdef CONFIG_DMI + static const struct dmi_system_id acpi_rev_dmi_table[] __initconst; ++#endif + + /* + * POLICY: If *anything* doesn't work, put it on the blacklist. +@@ -74,7 +76,9 @@ int __init acpi_blacklisted(void) + } + + (void)early_acpi_osi_init(); ++#ifdef CONFIG_DMI + dmi_check_system(acpi_rev_dmi_table); ++#endif + + return blacklisted; + } +diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c +index d32cd943dff2..b77e9281c039 100644 +--- a/drivers/block/nbd.c ++++ b/drivers/block/nbd.c +@@ -1207,7 +1207,7 @@ static void nbd_clear_sock_ioctl(struct nbd_device *nbd, + struct block_device *bdev) + { + sock_shutdown(nbd); +- kill_bdev(bdev); ++ __invalidate_device(bdev, true); + nbd_bdev_reset(bdev); + if (test_and_clear_bit(NBD_HAS_CONFIG_REF, + &nbd->config->runtime_flags)) +diff --git a/drivers/clk/tegra/clk-tegra210.c b/drivers/clk/tegra/clk-tegra210.c +index b92867814e2d..cb2be154db3b 100644 +--- a/drivers/clk/tegra/clk-tegra210.c ++++ b/drivers/clk/tegra/clk-tegra210.c +@@ -2057,9 +2057,9 @@ static struct div_nmp pllu_nmp = { + }; + + static struct tegra_clk_pll_freq_table pll_u_freq_table[] = { +- { 12000000, 480000000, 40, 1, 0, 0 }, +- { 13000000, 480000000, 36, 1, 0, 0 }, /* actual: 468.0 MHz */ +- { 38400000, 480000000, 25, 2, 0, 0 }, ++ { 12000000, 480000000, 40, 1, 1, 0 }, ++ { 13000000, 480000000, 36, 1, 1, 0 }, /* actual: 468.0 MHz */ ++ { 38400000, 480000000, 25, 2, 1, 0 }, + { 0, 0, 0, 0, 0, 0 }, + }; + +@@ -2983,6 +2983,7 @@ static struct tegra_clk_init_table init_table[] __initdata = { + { TEGRA210_CLK_DFLL_REF, TEGRA210_CLK_PLL_P, 51000000, 1 }, + { TEGRA210_CLK_SBC4, TEGRA210_CLK_PLL_P, 12000000, 1 }, + { TEGRA210_CLK_PLL_RE_VCO, TEGRA210_CLK_CLK_MAX, 672000000, 1 }, ++ { TEGRA210_CLK_PLL_U_OUT1, TEGRA210_CLK_CLK_MAX, 48000000, 1 }, + { TEGRA210_CLK_XUSB_GATE, TEGRA210_CLK_CLK_MAX, 0, 1 }, + { TEGRA210_CLK_XUSB_SS_SRC, TEGRA210_CLK_PLL_U_480M, 120000000, 0 }, + { TEGRA210_CLK_XUSB_FS_SRC, TEGRA210_CLK_PLL_U_48M, 48000000, 0 }, +@@ -3008,7 +3009,6 @@ static struct tegra_clk_init_table init_table[] __initdata = { + { TEGRA210_CLK_PLL_DP, TEGRA210_CLK_CLK_MAX, 270000000, 0 }, + { TEGRA210_CLK_SOC_THERM, TEGRA210_CLK_PLL_P, 51000000, 0 }, + { TEGRA210_CLK_CCLK_G, TEGRA210_CLK_CLK_MAX, 0, 1 }, +- { TEGRA210_CLK_PLL_U_OUT1, TEGRA210_CLK_CLK_MAX, 48000000, 1 }, + { TEGRA210_CLK_PLL_U_OUT2, TEGRA210_CLK_CLK_MAX, 60000000, 1 }, + /* This MUST be the last entry. */ + { TEGRA210_CLK_CLK_MAX, TEGRA210_CLK_CLK_MAX, 0, 0 }, +diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c +index 77b126525dac..19c7433e8309 100644 +--- a/drivers/dma/sh/rcar-dmac.c ++++ b/drivers/dma/sh/rcar-dmac.c +@@ -1129,7 +1129,7 @@ rcar_dmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, + struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan); + + /* Someone calling slave DMA on a generic channel? */ +- if (rchan->mid_rid < 0 || !sg_len) { ++ if (rchan->mid_rid < 0 || !sg_len || !sg_dma_len(sgl)) { + dev_warn(chan->device->dev, + "%s: bad parameter: len=%d, id=%d\n", + __func__, sg_len, rchan->mid_rid); +diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c +index 3db0a9b0d259..25351b6b1e34 100644 +--- a/drivers/gpio/gpiolib.c ++++ b/drivers/gpio/gpiolib.c +@@ -835,9 +835,11 @@ static int lineevent_create(struct gpio_device *gdev, void __user *ip) + } + + if (eflags & GPIOEVENT_REQUEST_RISING_EDGE) +- irqflags |= IRQF_TRIGGER_RISING; ++ irqflags |= test_bit(FLAG_ACTIVE_LOW, &desc->flags) ? ++ IRQF_TRIGGER_FALLING : IRQF_TRIGGER_RISING; + if (eflags & GPIOEVENT_REQUEST_FALLING_EDGE) +- irqflags |= IRQF_TRIGGER_FALLING; ++ irqflags |= test_bit(FLAG_ACTIVE_LOW, &desc->flags) ? ++ IRQF_TRIGGER_RISING : IRQF_TRIGGER_FALLING; + irqflags |= IRQF_ONESHOT; + irqflags |= IRQF_SHARED; + +diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c +index 2c6d19683688..4a7d50a96d36 100644 +--- a/drivers/gpu/drm/nouveau/nouveau_connector.c ++++ b/drivers/gpu/drm/nouveau/nouveau_connector.c +@@ -251,7 +251,7 @@ nouveau_conn_reset(struct drm_connector *connector) + return; + + if (connector->state) +- __drm_atomic_helper_connector_destroy_state(connector->state); ++ nouveau_conn_atomic_destroy_state(connector, connector->state); + __drm_atomic_helper_connector_reset(connector, &asyc->state); + asyc->dither.mode = DITHERING_MODE_AUTO; + asyc->dither.depth = DITHERING_DEPTH_AUTO; +diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c +index 69a79fdfa23e..9dcdc0a8685e 100644 +--- a/drivers/infiniband/hw/hfi1/chip.c ++++ b/drivers/infiniband/hw/hfi1/chip.c +@@ -14566,7 +14566,7 @@ void hfi1_deinit_vnic_rsm(struct hfi1_devdata *dd) + clear_rcvctrl(dd, RCV_CTRL_RCV_RSM_ENABLE_SMASK); + } + +-static void init_rxe(struct hfi1_devdata *dd) ++static int init_rxe(struct hfi1_devdata *dd) + { + struct rsm_map_table *rmt; + u64 val; +@@ -14575,6 +14575,9 @@ static void init_rxe(struct hfi1_devdata *dd) + write_csr(dd, RCV_ERR_MASK, ~0ull); + + rmt = alloc_rsm_map_table(dd); ++ if (!rmt) ++ return -ENOMEM; ++ + /* set up QOS, including the QPN map table */ + init_qos(dd, rmt); + init_user_fecn_handling(dd, rmt); +@@ -14599,6 +14602,7 @@ static void init_rxe(struct hfi1_devdata *dd) + val = read_csr(dd, RCV_BYPASS); + val |= (4ull << 16); + write_csr(dd, RCV_BYPASS, val); ++ return 0; + } + + static void init_other(struct hfi1_devdata *dd) +@@ -15154,7 +15158,10 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, + goto bail_cleanup; + + /* set initial RXE CSRs */ +- init_rxe(dd); ++ ret = init_rxe(dd); ++ if (ret) ++ goto bail_cleanup; ++ + /* set initial TXE CSRs */ + init_txe(dd); + /* set initial non-RXE, non-TXE CSRs */ +diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c +index f4372afa0e81..ad78b471c112 100644 +--- a/drivers/infiniband/hw/hfi1/verbs.c ++++ b/drivers/infiniband/hw/hfi1/verbs.c +@@ -54,6 +54,7 @@ + #include <linux/mm.h> + #include <linux/vmalloc.h> + #include <rdma/opa_addr.h> ++#include <linux/nospec.h> + + #include "hfi.h" + #include "common.h" +@@ -1587,6 +1588,7 @@ static int hfi1_check_ah(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr) + sl = rdma_ah_get_sl(ah_attr); + if (sl >= ARRAY_SIZE(ibp->sl_to_sc)) + return -EINVAL; ++ sl = array_index_nospec(sl, ARRAY_SIZE(ibp->sl_to_sc)); + + sc5 = ibp->sl_to_sc[sl]; + if (sc_to_vlt(dd, sc5) > num_vls && sc_to_vlt(dd, sc5) != 0xf) +diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h +index 754103372faa..89c7e391a834 100644 +--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h ++++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h +@@ -427,6 +427,7 @@ struct mlx5_umr_wr { + u64 length; + int access_flags; + u32 mkey; ++ u8 ignore_free_state:1; + }; + + static inline struct mlx5_umr_wr *umr_wr(struct ib_send_wr *wr) +diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c +index e88bb71056cd..cfddca850cb4 100644 +--- a/drivers/infiniband/hw/mlx5/mr.c ++++ b/drivers/infiniband/hw/mlx5/mr.c +@@ -538,14 +538,17 @@ void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) + int c; + + c = order2idx(dev, mr->order); +- if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) { +- mlx5_ib_warn(dev, "order %d, cache index %d\n", mr->order, c); ++ WARN_ON(c < 0 || c >= MAX_MR_CACHE_ENTRIES); ++ ++ if (unreg_umr(dev, mr)) { ++ mr->allocated_from_cache = false; ++ destroy_mkey(dev, mr); ++ ent = &cache->ent[c]; ++ if (ent->cur < ent->limit) ++ queue_work(cache->wq, &ent->work); + return; + } + +- if (unreg_umr(dev, mr)) +- return; +- + ent = &cache->ent[c]; + spin_lock_irq(&ent->lock); + list_add_tail(&mr->list, &ent->head); +@@ -1303,9 +1306,11 @@ static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) + return 0; + + umrwr.wr.send_flags = MLX5_IB_SEND_UMR_DISABLE_MR | +- MLX5_IB_SEND_UMR_FAIL_IF_FREE; ++ MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS; + umrwr.wr.opcode = MLX5_IB_WR_UMR; ++ umrwr.pd = dev->umrc.pd; + umrwr.mkey = mr->mmkey.key; ++ umrwr.ignore_free_state = 1; + + return mlx5_ib_post_send_wait(dev, &umrwr); + } +diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c +index 964c3a0bbf16..5a7dcb5afe6e 100644 +--- a/drivers/infiniband/hw/mlx5/qp.c ++++ b/drivers/infiniband/hw/mlx5/qp.c +@@ -1425,7 +1425,6 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, + } + + MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_TOEPLITZ); +- MLX5_SET(tirc, tirc, rx_hash_symmetric, 1); + memcpy(rss_key, ucmd.rx_hash_key, len); + break; + } +@@ -3265,10 +3264,14 @@ static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, + + memset(umr, 0, sizeof(*umr)); + +- if (wr->send_flags & MLX5_IB_SEND_UMR_FAIL_IF_FREE) +- umr->flags = MLX5_UMR_CHECK_FREE; /* fail if free */ +- else +- umr->flags = MLX5_UMR_CHECK_NOT_FREE; /* fail if not free */ ++ if (!umrwr->ignore_free_state) { ++ if (wr->send_flags & MLX5_IB_SEND_UMR_FAIL_IF_FREE) ++ /* fail if free */ ++ umr->flags = MLX5_UMR_CHECK_FREE; ++ else ++ /* fail if not free */ ++ umr->flags = MLX5_UMR_CHECK_NOT_FREE; ++ } + + umr->xlt_octowords = cpu_to_be16(get_xlt_octo(umrwr->xlt_size)); + if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_XLT) { +diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c +index 6b58ee2e2a25..f8cfcd063649 100644 +--- a/drivers/misc/eeprom/at24.c ++++ b/drivers/misc/eeprom/at24.c +@@ -834,7 +834,7 @@ static int at24_probe(struct i2c_client *client, const struct i2c_device_id *id) + at24->nvmem_config.name = dev_name(&client->dev); + at24->nvmem_config.dev = &client->dev; + at24->nvmem_config.read_only = !writable; +- at24->nvmem_config.root_only = true; ++ at24->nvmem_config.root_only = !(chip.flags & AT24_FLAG_IRUGO); + at24->nvmem_config.owner = THIS_MODULE; + at24->nvmem_config.compat = true; + at24->nvmem_config.base_dev = &client->dev; +diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c +index 5252885e5cda..32001d43e453 100644 +--- a/drivers/mmc/host/dw_mmc.c ++++ b/drivers/mmc/host/dw_mmc.c +@@ -2046,8 +2046,7 @@ static void dw_mci_tasklet_func(unsigned long priv) + * delayed. Allowing the transfer to take place + * avoids races and keeps things simple. + */ +- if ((err != -ETIMEDOUT) && +- (cmd->opcode == MMC_SEND_TUNING_BLOCK)) { ++ if (err != -ETIMEDOUT) { + state = STATE_SENDING_DATA; + continue; + } +diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c +index 39f399741647..cabeb1790db7 100644 +--- a/drivers/net/ethernet/emulex/benet/be_main.c ++++ b/drivers/net/ethernet/emulex/benet/be_main.c +@@ -4600,8 +4600,12 @@ int be_update_queues(struct be_adapter *adapter) + struct net_device *netdev = adapter->netdev; + int status; + +- if (netif_running(netdev)) ++ if (netif_running(netdev)) { ++ /* device cannot transmit now, avoid dev_watchdog timeouts */ ++ netif_carrier_off(netdev); ++ + be_close(netdev); ++ } + + be_cancel_worker(adapter); + +diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c +index 5e06917b4cef..22e365fa440c 100644 +--- a/drivers/perf/arm_pmu.c ++++ b/drivers/perf/arm_pmu.c +@@ -751,8 +751,8 @@ static int cpu_pm_pmu_notify(struct notifier_block *b, unsigned long cmd, + cpu_pm_pmu_setup(armpmu, cmd); + break; + case CPU_PM_EXIT: +- cpu_pm_pmu_setup(armpmu, cmd); + case CPU_PM_ENTER_FAILED: ++ cpu_pm_pmu_setup(armpmu, cmd); + armpmu->start(armpmu); + break; + default: +diff --git a/drivers/rapidio/devices/rio_mport_cdev.c b/drivers/rapidio/devices/rio_mport_cdev.c +index 76afe1449cab..ecd71efe8ea0 100644 +--- a/drivers/rapidio/devices/rio_mport_cdev.c ++++ b/drivers/rapidio/devices/rio_mport_cdev.c +@@ -1742,6 +1742,7 @@ static int rio_mport_add_riodev(struct mport_cdev_priv *priv, + + if (copy_from_user(&dev_info, arg, sizeof(dev_info))) + return -EFAULT; ++ dev_info.name[sizeof(dev_info.name) - 1] = '\0'; + + rmcd_debug(RDEV, "name:%s ct:0x%x did:0x%x hc:0x%x", dev_info.name, + dev_info.comptag, dev_info.destid, dev_info.hopcount); +@@ -1873,6 +1874,7 @@ static int rio_mport_del_riodev(struct mport_cdev_priv *priv, void __user *arg) + + if (copy_from_user(&dev_info, arg, sizeof(dev_info))) + return -EFAULT; ++ dev_info.name[sizeof(dev_info.name) - 1] = '\0'; + + mport = priv->md->mport; + +diff --git a/drivers/s390/block/dasd_alias.c b/drivers/s390/block/dasd_alias.c +index 5e963fe0e38d..fd1dff2bed21 100644 +--- a/drivers/s390/block/dasd_alias.c ++++ b/drivers/s390/block/dasd_alias.c +@@ -383,6 +383,20 @@ suborder_not_supported(struct dasd_ccw_req *cqr) + char msg_format; + char msg_no; + ++ /* ++ * intrc values ENODEV, ENOLINK and EPERM ++ * will be optained from sleep_on to indicate that no ++ * IO operation can be started ++ */ ++ if (cqr->intrc == -ENODEV) ++ return 1; ++ ++ if (cqr->intrc == -ENOLINK) ++ return 1; ++ ++ if (cqr->intrc == -EPERM) ++ return 1; ++ + sense = dasd_get_sense(&cqr->irb); + if (!sense) + return 0; +@@ -447,12 +461,8 @@ static int read_unit_address_configuration(struct dasd_device *device, + lcu->flags &= ~NEED_UAC_UPDATE; + spin_unlock_irqrestore(&lcu->lock, flags); + +- do { +- rc = dasd_sleep_on(cqr); +- if (rc && suborder_not_supported(cqr)) +- return -EOPNOTSUPP; +- } while (rc && (cqr->retries > 0)); +- if (rc) { ++ rc = dasd_sleep_on(cqr); ++ if (rc && !suborder_not_supported(cqr)) { + spin_lock_irqsave(&lcu->lock, flags); + lcu->flags |= NEED_UAC_UPDATE; + spin_unlock_irqrestore(&lcu->lock, flags); +diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c +index 6d5065f679ac..64d70de98cdb 100644 +--- a/drivers/s390/scsi/zfcp_erp.c ++++ b/drivers/s390/scsi/zfcp_erp.c +@@ -11,6 +11,7 @@ + #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + + #include <linux/kthread.h> ++#include <linux/bug.h> + #include "zfcp_ext.h" + #include "zfcp_reqlist.h" + +@@ -245,6 +246,12 @@ static struct zfcp_erp_action *zfcp_erp_setup_act(int need, u32 act_status, + struct zfcp_erp_action *erp_action; + struct zfcp_scsi_dev *zfcp_sdev; + ++ if (WARN_ON_ONCE(need != ZFCP_ERP_ACTION_REOPEN_LUN && ++ need != ZFCP_ERP_ACTION_REOPEN_PORT && ++ need != ZFCP_ERP_ACTION_REOPEN_PORT_FORCED && ++ need != ZFCP_ERP_ACTION_REOPEN_ADAPTER)) ++ return NULL; ++ + switch (need) { + case ZFCP_ERP_ACTION_REOPEN_LUN: + zfcp_sdev = sdev_to_zfcp(sdev); +diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c +index 95dbee89b758..021b5e7f4b7a 100644 +--- a/drivers/xen/swiotlb-xen.c ++++ b/drivers/xen/swiotlb-xen.c +@@ -371,8 +371,8 @@ xen_swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr, + /* Convert the size to actually allocated. */ + size = 1UL << (order + XEN_PAGE_SHIFT); + +- if (((dev_addr + size - 1 <= dma_mask)) || +- range_straddles_page_boundary(phys, size)) ++ if (!WARN_ON((dev_addr + size - 1 > dma_mask) || ++ range_straddles_page_boundary(phys, size))) + xen_destroy_contiguous_region(phys, order); + + xen_free_coherent_pages(hwdev, size, vaddr, (dma_addr_t)phys, attrs); +diff --git a/fs/adfs/super.c b/fs/adfs/super.c +index c9fdfb112933..e42c30001509 100644 +--- a/fs/adfs/super.c ++++ b/fs/adfs/super.c +@@ -368,6 +368,7 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent) + struct buffer_head *bh; + struct object_info root_obj; + unsigned char *b_data; ++ unsigned int blocksize; + struct adfs_sb_info *asb; + struct inode *root; + int ret = -EINVAL; +@@ -419,8 +420,10 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent) + goto error_free_bh; + } + ++ blocksize = 1 << dr->log2secsize; + brelse(bh); +- if (sb_set_blocksize(sb, 1 << dr->log2secsize)) { ++ ++ if (sb_set_blocksize(sb, blocksize)) { + bh = sb_bread(sb, ADFS_DISCRECORD / sb->s_blocksize); + if (!bh) { + adfs_error(sb, "couldn't read superblock on " +diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c +index 14c4062a6e58..a5905f97b3db 100644 +--- a/fs/btrfs/send.c ++++ b/fs/btrfs/send.c +@@ -6130,68 +6130,21 @@ static int changed_extent(struct send_ctx *sctx, + { + int ret = 0; + +- if (sctx->cur_ino != sctx->cmp_key->objectid) { +- +- if (result == BTRFS_COMPARE_TREE_CHANGED) { +- struct extent_buffer *leaf_l; +- struct extent_buffer *leaf_r; +- struct btrfs_file_extent_item *ei_l; +- struct btrfs_file_extent_item *ei_r; +- +- leaf_l = sctx->left_path->nodes[0]; +- leaf_r = sctx->right_path->nodes[0]; +- ei_l = btrfs_item_ptr(leaf_l, +- sctx->left_path->slots[0], +- struct btrfs_file_extent_item); +- ei_r = btrfs_item_ptr(leaf_r, +- sctx->right_path->slots[0], +- struct btrfs_file_extent_item); +- +- /* +- * We may have found an extent item that has changed +- * only its disk_bytenr field and the corresponding +- * inode item was not updated. This case happens due to +- * very specific timings during relocation when a leaf +- * that contains file extent items is COWed while +- * relocation is ongoing and its in the stage where it +- * updates data pointers. So when this happens we can +- * safely ignore it since we know it's the same extent, +- * but just at different logical and physical locations +- * (when an extent is fully replaced with a new one, we +- * know the generation number must have changed too, +- * since snapshot creation implies committing the current +- * transaction, and the inode item must have been updated +- * as well). +- * This replacement of the disk_bytenr happens at +- * relocation.c:replace_file_extents() through +- * relocation.c:btrfs_reloc_cow_block(). +- */ +- if (btrfs_file_extent_generation(leaf_l, ei_l) == +- btrfs_file_extent_generation(leaf_r, ei_r) && +- btrfs_file_extent_ram_bytes(leaf_l, ei_l) == +- btrfs_file_extent_ram_bytes(leaf_r, ei_r) && +- btrfs_file_extent_compression(leaf_l, ei_l) == +- btrfs_file_extent_compression(leaf_r, ei_r) && +- btrfs_file_extent_encryption(leaf_l, ei_l) == +- btrfs_file_extent_encryption(leaf_r, ei_r) && +- btrfs_file_extent_other_encoding(leaf_l, ei_l) == +- btrfs_file_extent_other_encoding(leaf_r, ei_r) && +- btrfs_file_extent_type(leaf_l, ei_l) == +- btrfs_file_extent_type(leaf_r, ei_r) && +- btrfs_file_extent_disk_bytenr(leaf_l, ei_l) != +- btrfs_file_extent_disk_bytenr(leaf_r, ei_r) && +- btrfs_file_extent_disk_num_bytes(leaf_l, ei_l) == +- btrfs_file_extent_disk_num_bytes(leaf_r, ei_r) && +- btrfs_file_extent_offset(leaf_l, ei_l) == +- btrfs_file_extent_offset(leaf_r, ei_r) && +- btrfs_file_extent_num_bytes(leaf_l, ei_l) == +- btrfs_file_extent_num_bytes(leaf_r, ei_r)) +- return 0; +- } +- +- inconsistent_snapshot_error(sctx, result, "extent"); +- return -EIO; +- } ++ /* ++ * We have found an extent item that changed without the inode item ++ * having changed. This can happen either after relocation (where the ++ * disk_bytenr of an extent item is replaced at ++ * relocation.c:replace_file_extents()) or after deduplication into a ++ * file in both the parent and send snapshots (where an extent item can ++ * get modified or replaced with a new one). Note that deduplication ++ * updates the inode item, but it only changes the iversion (sequence ++ * field in the inode item) of the inode, so if a file is deduplicated ++ * the same amount of times in both the parent and send snapshots, its ++ * iversion becames the same in both snapshots, whence the inode item is ++ * the same on both snapshots. ++ */ ++ if (sctx->cur_ino != sctx->cmp_key->objectid) ++ return 0; + + if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) { + if (result != BTRFS_COMPARE_TREE_DELETED) +diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c +index 73c1fbca0c35..fa8f56e6f665 100644 +--- a/fs/btrfs/transaction.c ++++ b/fs/btrfs/transaction.c +@@ -2052,6 +2052,16 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) + } + } else { + spin_unlock(&fs_info->trans_lock); ++ /* ++ * The previous transaction was aborted and was already removed ++ * from the list of transactions at fs_info->trans_list. So we ++ * abort to prevent writing a new superblock that reflects a ++ * corrupt state (pointing to trees with unwritten nodes/leafs). ++ */ ++ if (test_bit(BTRFS_FS_STATE_TRANS_ABORTED, &fs_info->fs_state)) { ++ ret = -EROFS; ++ goto cleanup_transaction; ++ } + } + + extwriter_counter_dec(cur_trans, trans->type); +diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c +index 85294fef1051..358e930df4ac 100644 +--- a/fs/btrfs/volumes.c ++++ b/fs/btrfs/volumes.c +@@ -5019,8 +5019,7 @@ static inline int btrfs_chunk_max_errors(struct map_lookup *map) + + if (map->type & (BTRFS_BLOCK_GROUP_RAID1 | + BTRFS_BLOCK_GROUP_RAID10 | +- BTRFS_BLOCK_GROUP_RAID5 | +- BTRFS_BLOCK_GROUP_DUP)) { ++ BTRFS_BLOCK_GROUP_RAID5)) { + max_errors = 1; + } else if (map->type & BTRFS_BLOCK_GROUP_RAID6) { + max_errors = 2; +diff --git a/fs/ceph/super.h b/fs/ceph/super.h +index 3e27a28aa44a..60b70f0985f6 100644 +--- a/fs/ceph/super.h ++++ b/fs/ceph/super.h +@@ -517,7 +517,12 @@ static inline void __ceph_dir_set_complete(struct ceph_inode_info *ci, + long long release_count, + long long ordered_count) + { +- smp_mb__before_atomic(); ++ /* ++ * Makes sure operations that setup readdir cache (update page ++ * cache and i_size) are strongly ordered w.r.t. the following ++ * atomic64_set() operations. ++ */ ++ smp_mb(); + atomic64_set(&ci->i_complete_seq[0], release_count); + atomic64_set(&ci->i_complete_seq[1], ordered_count); + } +diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c +index e1c4e0b12b4c..0376db8a74f8 100644 +--- a/fs/ceph/xattr.c ++++ b/fs/ceph/xattr.c +@@ -75,7 +75,7 @@ static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val, + const char *ns_field = " pool_namespace="; + char buf[128]; + size_t len, total_len = 0; +- int ret; ++ ssize_t ret; + + pool_ns = ceph_try_get_string(ci->i_layout.pool_ns); + +@@ -99,11 +99,8 @@ static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val, + if (pool_ns) + total_len += strlen(ns_field) + pool_ns->len; + +- if (!size) { +- ret = total_len; +- } else if (total_len > size) { +- ret = -ERANGE; +- } else { ++ ret = total_len; ++ if (size >= total_len) { + memcpy(val, buf, len); + ret = len; + if (pool_name) { +@@ -761,8 +758,11 @@ ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value, + if (err) + return err; + err = -ENODATA; +- if (!(vxattr->exists_cb && !vxattr->exists_cb(ci))) ++ if (!(vxattr->exists_cb && !vxattr->exists_cb(ci))) { + err = vxattr->getxattr_cb(ci, value, size); ++ if (size && size < err) ++ err = -ERANGE; ++ } + return err; + } + +diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c +index 33cd844579ae..57c62ff4e8d6 100644 +--- a/fs/cifs/connect.c ++++ b/fs/cifs/connect.c +@@ -554,10 +554,10 @@ static bool + server_unresponsive(struct TCP_Server_Info *server) + { + /* +- * We need to wait 2 echo intervals to make sure we handle such ++ * We need to wait 3 echo intervals to make sure we handle such + * situations right: + * 1s client sends a normal SMB request +- * 2s client gets a response ++ * 3s client gets a response + * 30s echo workqueue job pops, and decides we got a response recently + * and don't need to send another + * ... +@@ -566,9 +566,9 @@ server_unresponsive(struct TCP_Server_Info *server) + */ + if ((server->tcpStatus == CifsGood || + server->tcpStatus == CifsNeedNegotiate) && +- time_after(jiffies, server->lstrp + 2 * server->echo_interval)) { ++ time_after(jiffies, server->lstrp + 3 * server->echo_interval)) { + cifs_dbg(VFS, "Server %s has not responded in %lu seconds. Reconnecting...\n", +- server->hostname, (2 * server->echo_interval) / HZ); ++ server->hostname, (3 * server->echo_interval) / HZ); + cifs_reconnect(server); + wake_up(&server->response_q); + return true; +diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c +index f40e3953e7fe..a6d9e841a375 100644 +--- a/fs/coda/psdev.c ++++ b/fs/coda/psdev.c +@@ -187,8 +187,11 @@ static ssize_t coda_psdev_write(struct file *file, const char __user *buf, + if (req->uc_opcode == CODA_OPEN_BY_FD) { + struct coda_open_by_fd_out *outp = + (struct coda_open_by_fd_out *)req->uc_data; +- if (!outp->oh.result) ++ if (!outp->oh.result) { + outp->fh = fget(outp->fd); ++ if (!outp->fh) ++ return -EBADF; ++ } + } + + wake_up(&req->uc_sleep); +diff --git a/include/linux/acpi.h b/include/linux/acpi.h +index 13c105121a18..d7a9700b9333 100644 +--- a/include/linux/acpi.h ++++ b/include/linux/acpi.h +@@ -324,7 +324,10 @@ void acpi_set_irq_model(enum acpi_irq_model_id model, + #ifdef CONFIG_X86_IO_APIC + extern int acpi_get_override_irq(u32 gsi, int *trigger, int *polarity); + #else +-#define acpi_get_override_irq(gsi, trigger, polarity) (-1) ++static inline int acpi_get_override_irq(u32 gsi, int *trigger, int *polarity) ++{ ++ return -1; ++} + #endif + /* + * This function undoes the effect of one call to acpi_register_gsi(). +diff --git a/include/linux/coda.h b/include/linux/coda.h +index d30209b9cef8..0ca0c83fdb1c 100644 +--- a/include/linux/coda.h ++++ b/include/linux/coda.h +@@ -58,8 +58,7 @@ Mellon the rights to redistribute these changes without encumbrance. + #ifndef _CODA_HEADER_ + #define _CODA_HEADER_ + +-#if defined(__linux__) + typedef unsigned long long u_quad_t; +-#endif ++ + #include <uapi/linux/coda.h> + #endif +diff --git a/include/linux/coda_psdev.h b/include/linux/coda_psdev.h +index 15170954aa2b..57d2b2faf6a3 100644 +--- a/include/linux/coda_psdev.h ++++ b/include/linux/coda_psdev.h +@@ -19,6 +19,17 @@ struct venus_comm { + struct mutex vc_mutex; + }; + ++/* messages between coda filesystem in kernel and Venus */ ++struct upc_req { ++ struct list_head uc_chain; ++ caddr_t uc_data; ++ u_short uc_flags; ++ u_short uc_inSize; /* Size is at most 5000 bytes */ ++ u_short uc_outSize; ++ u_short uc_opcode; /* copied from data to save lookup */ ++ int uc_unique; ++ wait_queue_head_t uc_sleep; /* process' wait queue */ ++}; + + static inline struct venus_comm *coda_vcp(struct super_block *sb) + { +diff --git a/include/uapi/linux/coda_psdev.h b/include/uapi/linux/coda_psdev.h +index aa6623efd2dd..d50d51a57fe4 100644 +--- a/include/uapi/linux/coda_psdev.h ++++ b/include/uapi/linux/coda_psdev.h +@@ -7,19 +7,6 @@ + #define CODA_PSDEV_MAJOR 67 + #define MAX_CODADEVS 5 /* how many do we allow */ + +- +-/* messages between coda filesystem in kernel and Venus */ +-struct upc_req { +- struct list_head uc_chain; +- caddr_t uc_data; +- u_short uc_flags; +- u_short uc_inSize; /* Size is at most 5000 bytes */ +- u_short uc_outSize; +- u_short uc_opcode; /* copied from data to save lookup */ +- int uc_unique; +- wait_queue_head_t uc_sleep; /* process' wait queue */ +-}; +- + #define CODA_REQ_ASYNC 0x1 + #define CODA_REQ_READ 0x2 + #define CODA_REQ_WRITE 0x4 +diff --git a/ipc/mqueue.c b/ipc/mqueue.c +index 5c0ae912f2f2..dccd4ecb786a 100644 +--- a/ipc/mqueue.c ++++ b/ipc/mqueue.c +@@ -372,7 +372,6 @@ static void mqueue_evict_inode(struct inode *inode) + { + struct mqueue_inode_info *info; + struct user_struct *user; +- unsigned long mq_bytes, mq_treesize; + struct ipc_namespace *ipc_ns; + struct msg_msg *msg, *nmsg; + LIST_HEAD(tmp_msg); +@@ -395,16 +394,18 @@ static void mqueue_evict_inode(struct inode *inode) + free_msg(msg); + } + +- /* Total amount of bytes accounted for the mqueue */ +- mq_treesize = info->attr.mq_maxmsg * sizeof(struct msg_msg) + +- min_t(unsigned int, info->attr.mq_maxmsg, MQ_PRIO_MAX) * +- sizeof(struct posix_msg_tree_node); +- +- mq_bytes = mq_treesize + (info->attr.mq_maxmsg * +- info->attr.mq_msgsize); +- + user = info->user; + if (user) { ++ unsigned long mq_bytes, mq_treesize; ++ ++ /* Total amount of bytes accounted for the mqueue */ ++ mq_treesize = info->attr.mq_maxmsg * sizeof(struct msg_msg) + ++ min_t(unsigned int, info->attr.mq_maxmsg, MQ_PRIO_MAX) * ++ sizeof(struct posix_msg_tree_node); ++ ++ mq_bytes = mq_treesize + (info->attr.mq_maxmsg * ++ info->attr.mq_msgsize); ++ + spin_lock(&mq_lock); + user->mq_bytes -= mq_bytes; + /* +diff --git a/kernel/module.c b/kernel/module.c +index 94528b891027..4b372c14d9a1 100644 +--- a/kernel/module.c ++++ b/kernel/module.c +@@ -3391,8 +3391,7 @@ static bool finished_loading(const char *name) + sched_annotate_sleep(); + mutex_lock(&module_mutex); + mod = find_module_all(name, strlen(name), true); +- ret = !mod || mod->state == MODULE_STATE_LIVE +- || mod->state == MODULE_STATE_GOING; ++ ret = !mod || mod->state == MODULE_STATE_LIVE; + mutex_unlock(&module_mutex); + + return ret; +@@ -3560,8 +3559,7 @@ again: + mutex_lock(&module_mutex); + old = find_module_all(mod->name, strlen(mod->name), true); + if (old != NULL) { +- if (old->state == MODULE_STATE_COMING +- || old->state == MODULE_STATE_UNFORMED) { ++ if (old->state != MODULE_STATE_LIVE) { + /* Wait in case it fails to load. */ + mutex_unlock(&module_mutex); + err = wait_event_interruptible(module_wq, +diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c +index c4a0ad18c859..7420f5f36094 100644 +--- a/kernel/trace/ftrace.c ++++ b/kernel/trace/ftrace.c +@@ -1712,6 +1712,11 @@ static bool test_rec_ops_needs_regs(struct dyn_ftrace *rec) + return keep_regs; + } + ++static struct ftrace_ops * ++ftrace_find_tramp_ops_any(struct dyn_ftrace *rec); ++static struct ftrace_ops * ++ftrace_find_tramp_ops_next(struct dyn_ftrace *rec, struct ftrace_ops *ops); ++ + static bool __ftrace_hash_rec_update(struct ftrace_ops *ops, + int filter_hash, + bool inc) +@@ -1840,15 +1845,17 @@ static bool __ftrace_hash_rec_update(struct ftrace_ops *ops, + } + + /* +- * If the rec had TRAMP enabled, then it needs to +- * be cleared. As TRAMP can only be enabled iff +- * there is only a single ops attached to it. +- * In otherwords, always disable it on decrementing. +- * In the future, we may set it if rec count is +- * decremented to one, and the ops that is left +- * has a trampoline. ++ * The TRAMP needs to be set only if rec count ++ * is decremented to one, and the ops that is ++ * left has a trampoline. As TRAMP can only be ++ * enabled if there is only a single ops attached ++ * to it. + */ +- rec->flags &= ~FTRACE_FL_TRAMP; ++ if (ftrace_rec_count(rec) == 1 && ++ ftrace_find_tramp_ops_any(rec)) ++ rec->flags |= FTRACE_FL_TRAMP; ++ else ++ rec->flags &= ~FTRACE_FL_TRAMP; + + /* + * flags will be cleared in ftrace_check_record() +@@ -2041,11 +2048,6 @@ static void print_ip_ins(const char *fmt, const unsigned char *p) + printk(KERN_CONT "%s%02x", i ? ":" : "", p[i]); + } + +-static struct ftrace_ops * +-ftrace_find_tramp_ops_any(struct dyn_ftrace *rec); +-static struct ftrace_ops * +-ftrace_find_tramp_ops_next(struct dyn_ftrace *rec, struct ftrace_ops *ops); +- + enum ftrace_bug_type ftrace_bug_type; + const void *ftrace_expected; + +diff --git a/mm/cma.c b/mm/cma.c +index 56761e40d191..c4a34c813d47 100644 +--- a/mm/cma.c ++++ b/mm/cma.c +@@ -277,6 +277,12 @@ int __init cma_declare_contiguous(phys_addr_t base, + */ + alignment = max(alignment, (phys_addr_t)PAGE_SIZE << + max_t(unsigned long, MAX_ORDER - 1, pageblock_order)); ++ if (fixed && base & (alignment - 1)) { ++ ret = -EINVAL; ++ pr_err("Region at %pa must be aligned to %pa bytes\n", ++ &base, &alignment); ++ goto err; ++ } + base = ALIGN(base, alignment); + size = ALIGN(size, alignment); + limit &= ~(alignment - 1); +@@ -307,6 +313,13 @@ int __init cma_declare_contiguous(phys_addr_t base, + if (limit == 0 || limit > memblock_end) + limit = memblock_end; + ++ if (base + size > limit) { ++ ret = -EINVAL; ++ pr_err("Size (%pa) of region at %pa exceeds limit (%pa)\n", ++ &size, &base, &limit); ++ goto err; ++ } ++ + /* Reserve memory */ + if (fixed) { + if (memblock_is_region_reserved(base, size) || +diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c +index 524068d71bc1..9d9f6bb1e56e 100644 +--- a/security/selinux/ss/policydb.c ++++ b/security/selinux/ss/policydb.c +@@ -275,6 +275,8 @@ static int rangetr_cmp(struct hashtab *h, const void *k1, const void *k2) + return v; + } + ++static int (*destroy_f[SYM_NUM]) (void *key, void *datum, void *datap); ++ + /* + * Initialize a policy database structure. + */ +@@ -322,8 +324,10 @@ static int policydb_init(struct policydb *p) + out: + hashtab_destroy(p->filename_trans); + hashtab_destroy(p->range_tr); +- for (i = 0; i < SYM_NUM; i++) ++ for (i = 0; i < SYM_NUM; i++) { ++ hashtab_map(p->symtab[i].table, destroy_f[i], NULL); + hashtab_destroy(p->symtab[i].table); ++ } + return rc; + } + +diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c +index e6cef5a160e7..d089c711355a 100644 +--- a/tools/objtool/elf.c ++++ b/tools/objtool/elf.c +@@ -305,7 +305,7 @@ static int read_symbols(struct elf *elf) + if (sym->type != STT_FUNC) + continue; + sym->pfunc = sym->cfunc = sym; +- coldstr = strstr(sym->name, ".cold."); ++ coldstr = strstr(sym->name, ".cold"); + if (!coldstr) + continue; + |