From df00160fd19ac3ffbd1887ee9bdc64384f4f8421 Mon Sep 17 00:00:00 2001 From: Mike Pagano Date: Tue, 16 Aug 2016 19:53:09 -0400 Subject: Linux patch 4.6.7 --- 0000_README | 4 + 1006_linux-4.6.7.patch | 2184 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 2188 insertions(+) create mode 100644 1006_linux-4.6.7.patch diff --git a/0000_README b/0000_README index a0547abb..90202fee 100644 --- a/0000_README +++ b/0000_README @@ -67,6 +67,10 @@ Patch: 1005_linux-4.6.6.patch From: http://www.kernel.org Desc: Linux 4.6.6 +Patch: 1006_linux-4.6.7.patch +From: http://www.kernel.org +Desc: Linux 4.6.7 + Patch: 1500_XATTR_USER_PREFIX.patch From: https://bugs.gentoo.org/show_bug.cgi?id=470644 Desc: Support for namespace user.pax.* on tmpfs. diff --git a/1006_linux-4.6.7.patch b/1006_linux-4.6.7.patch new file mode 100644 index 00000000..0bd4bcea --- /dev/null +++ b/1006_linux-4.6.7.patch @@ -0,0 +1,2184 @@ +diff --git a/Documentation/cpu-freq/pcc-cpufreq.txt b/Documentation/cpu-freq/pcc-cpufreq.txt +index 0a94224ad296..9e3c3b33514c 100644 +--- a/Documentation/cpu-freq/pcc-cpufreq.txt ++++ b/Documentation/cpu-freq/pcc-cpufreq.txt +@@ -159,8 +159,8 @@ to be strictly associated with a P-state. + + 2.2 cpuinfo_transition_latency: + ------------------------------- +-The cpuinfo_transition_latency field is CPUFREQ_ETERNAL. The PCC specification +-does not include a field to expose this value currently. ++The cpuinfo_transition_latency field is 0. The PCC specification does ++not include a field to expose this value currently. + + 2.3 cpuinfo_cur_freq: + --------------------- +diff --git a/Documentation/x86/pat.txt b/Documentation/x86/pat.txt +index 54944c71b819..2a4ee6302122 100644 +--- a/Documentation/x86/pat.txt ++++ b/Documentation/x86/pat.txt +@@ -196,3 +196,35 @@ Another, more verbose way of getting PAT related debug messages is with + "debugpat" boot parameter. With this parameter, various debug messages are + printed to dmesg log. + ++PAT Initialization ++------------------ ++ ++The following table describes how PAT is initialized under various ++configurations. The PAT MSR must be updated by Linux in order to support WC ++and WT attributes. Otherwise, the PAT MSR has the value programmed in it ++by the firmware. Note, Xen enables WC attribute in the PAT MSR for guests. ++ ++ MTRR PAT Call Sequence PAT State PAT MSR ++ ========================================================= ++ E E MTRR -> PAT init Enabled OS ++ E D MTRR -> PAT init Disabled - ++ D E MTRR -> PAT disable Disabled BIOS ++ D D MTRR -> PAT disable Disabled - ++ - np/E PAT -> PAT disable Disabled BIOS ++ - np/D PAT -> PAT disable Disabled - ++ E !P/E MTRR -> PAT init Disabled BIOS ++ D !P/E MTRR -> PAT disable Disabled BIOS ++ !M !P/E MTRR stub -> PAT disable Disabled BIOS ++ ++ Legend ++ ------------------------------------------------ ++ E Feature enabled in CPU ++ D Feature disabled/unsupported in CPU ++ np "nopat" boot option specified ++ !P CONFIG_X86_PAT option unset ++ !M CONFIG_MTRR option unset ++ Enabled PAT state set to enabled ++ Disabled PAT state set to disabled ++ OS PAT initializes PAT MSR with OS setting ++ BIOS PAT keeps PAT MSR with BIOS setting ++ +diff --git a/Makefile b/Makefile +index bee1a1692fed..5fe9a7a9cd65 100644 +--- a/Makefile ++++ b/Makefile +@@ -1,6 +1,6 @@ + VERSION = 4 + PATCHLEVEL = 6 +-SUBLEVEL = 6 ++SUBLEVEL = 7 + EXTRAVERSION = + NAME = Charred Weasel + +diff --git a/arch/arm/kernel/sys_oabi-compat.c b/arch/arm/kernel/sys_oabi-compat.c +index 087acb569b63..5f221acd21ae 100644 +--- a/arch/arm/kernel/sys_oabi-compat.c ++++ b/arch/arm/kernel/sys_oabi-compat.c +@@ -279,8 +279,12 @@ asmlinkage long sys_oabi_epoll_wait(int epfd, + mm_segment_t fs; + long ret, err, i; + +- if (maxevents <= 0 || maxevents > (INT_MAX/sizeof(struct epoll_event))) ++ if (maxevents <= 0 || ++ maxevents > (INT_MAX/sizeof(*kbuf)) || ++ maxevents > (INT_MAX/sizeof(*events))) + return -EINVAL; ++ if (!access_ok(VERIFY_WRITE, events, sizeof(*events) * maxevents)) ++ return -EFAULT; + kbuf = kmalloc(sizeof(*kbuf) * maxevents, GFP_KERNEL); + if (!kbuf) + return -ENOMEM; +@@ -317,6 +321,8 @@ asmlinkage long sys_oabi_semtimedop(int semid, + + if (nsops < 1 || nsops > SEMOPM) + return -EINVAL; ++ if (!access_ok(VERIFY_READ, tsops, sizeof(*tsops) * nsops)) ++ return -EFAULT; + sops = kmalloc(sizeof(*sops) * nsops, GFP_KERNEL); + if (!sops) + return -ENOMEM; +diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S +index 71f99d5f7a06..6021318bfbb0 100644 +--- a/arch/mips/kernel/scall64-n32.S ++++ b/arch/mips/kernel/scall64-n32.S +@@ -344,7 +344,7 @@ EXPORT(sysn32_call_table) + PTR sys_ni_syscall /* available, was setaltroot */ + PTR sys_add_key + PTR sys_request_key +- PTR sys_keyctl /* 6245 */ ++ PTR compat_sys_keyctl /* 6245 */ + PTR sys_set_thread_area + PTR sys_inotify_init + PTR sys_inotify_add_watch +diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S +index 91b43eea2d5a..71fe3259a5e3 100644 +--- a/arch/mips/kernel/scall64-o32.S ++++ b/arch/mips/kernel/scall64-o32.S +@@ -500,7 +500,7 @@ EXPORT(sys32_call_table) + PTR sys_ni_syscall /* available, was setaltroot */ + PTR sys_add_key /* 4280 */ + PTR sys_request_key +- PTR sys_keyctl ++ PTR compat_sys_keyctl + PTR sys_set_thread_area + PTR sys_inotify_init + PTR sys_inotify_add_watch /* 4285 */ +diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig +index 7cd32c038286..4e1b060b8481 100644 +--- a/arch/powerpc/Kconfig ++++ b/arch/powerpc/Kconfig +@@ -126,7 +126,7 @@ config PPC + select IRQ_FORCED_THREADING + select HAVE_RCU_TABLE_FREE if SMP + select HAVE_SYSCALL_TRACEPOINTS +- select HAVE_BPF_JIT ++ select HAVE_BPF_JIT if CPU_BIG_ENDIAN + select HAVE_ARCH_JUMP_LABEL + select ARCH_HAVE_NMI_SAFE_CMPXCHG + select ARCH_HAS_GCOV_PROFILE_ALL +diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c +index 31e4c7e1a4b4..c42627645b54 100644 +--- a/arch/powerpc/kernel/eeh_driver.c ++++ b/arch/powerpc/kernel/eeh_driver.c +@@ -648,7 +648,6 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, + if (pe->type & EEH_PE_VF) { + eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL); + } else { +- eeh_pe_state_clear(pe, EEH_PE_PRI_BUS); + pci_lock_rescan_remove(); + pcibios_remove_pci_devices(bus); + pci_unlock_rescan_remove(); +@@ -698,10 +697,12 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, + */ + edev = list_first_entry(&pe->edevs, struct eeh_dev, list); + eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL); +- if (pe->type & EEH_PE_VF) ++ if (pe->type & EEH_PE_VF) { + eeh_add_virt_device(edev, NULL); +- else ++ } else { ++ eeh_pe_state_clear(pe, EEH_PE_PRI_BUS); + pcibios_add_pci_devices(bus); ++ } + } else if (frozen_bus && rmv_data->removed) { + pr_info("EEH: Sleep 5s ahead of partial hotplug\n"); + ssleep(5); +diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl +index 4cddd17153fb..f848572169ea 100644 +--- a/arch/x86/entry/syscalls/syscall_32.tbl ++++ b/arch/x86/entry/syscalls/syscall_32.tbl +@@ -294,7 +294,7 @@ + # 285 sys_setaltroot + 286 i386 add_key sys_add_key + 287 i386 request_key sys_request_key +-288 i386 keyctl sys_keyctl ++288 i386 keyctl sys_keyctl compat_sys_keyctl + 289 i386 ioprio_set sys_ioprio_set + 290 i386 ioprio_get sys_ioprio_get + 291 i386 inotify_init sys_inotify_init +diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h +index 9d3a96c4da78..01c2d14ec05f 100644 +--- a/arch/x86/include/asm/microcode.h ++++ b/arch/x86/include/asm/microcode.h +@@ -133,13 +133,11 @@ static inline unsigned int x86_cpuid_family(void) + #ifdef CONFIG_MICROCODE + extern void __init load_ucode_bsp(void); + extern void load_ucode_ap(void); +-extern int __init save_microcode_in_initrd(void); + void reload_early_microcode(void); + extern bool get_builtin_firmware(struct cpio_data *cd, const char *name); + #else + static inline void __init load_ucode_bsp(void) { } + static inline void load_ucode_ap(void) { } +-static inline int __init save_microcode_in_initrd(void) { return 0; } + static inline void reload_early_microcode(void) { } + static inline bool + get_builtin_firmware(struct cpio_data *cd, const char *name) { return false; } +diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h +index b94f6f64e23d..dbff1456d215 100644 +--- a/arch/x86/include/asm/mtrr.h ++++ b/arch/x86/include/asm/mtrr.h +@@ -24,6 +24,7 @@ + #define _ASM_X86_MTRR_H + + #include ++#include + + + /* +@@ -83,9 +84,12 @@ static inline int mtrr_trim_uncached_memory(unsigned long end_pfn) + static inline void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi) + { + } ++static inline void mtrr_bp_init(void) ++{ ++ pat_disable("MTRRs disabled, skipping PAT initialization too."); ++} + + #define mtrr_ap_init() do {} while (0) +-#define mtrr_bp_init() do {} while (0) + #define set_mtrr_aps_delayed_init() do {} while (0) + #define mtrr_aps_init() do {} while (0) + #define mtrr_bp_restore() do {} while (0) +diff --git a/arch/x86/include/asm/pat.h b/arch/x86/include/asm/pat.h +index ca6c228d5e62..0b1ff4c1c14e 100644 +--- a/arch/x86/include/asm/pat.h ++++ b/arch/x86/include/asm/pat.h +@@ -5,8 +5,8 @@ + #include + + bool pat_enabled(void); ++void pat_disable(const char *reason); + extern void pat_init(void); +-void pat_init_cache_modes(u64); + + extern int reserve_memtype(u64 start, u64 end, + enum page_cache_mode req_pcm, enum page_cache_mode *ret_pcm); +diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c +index ac360bfbbdb6..12823b6ebd6d 100644 +--- a/arch/x86/kernel/cpu/microcode/core.c ++++ b/arch/x86/kernel/cpu/microcode/core.c +@@ -175,7 +175,7 @@ void load_ucode_ap(void) + } + } + +-int __init save_microcode_in_initrd(void) ++static int __init save_microcode_in_initrd(void) + { + struct cpuinfo_x86 *c = &boot_cpu_data; + +@@ -691,4 +691,5 @@ int __init microcode_init(void) + return error; + + } ++fs_initcall(save_microcode_in_initrd); + late_initcall(microcode_init); +diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c +index 19f57360dfd2..8d7a29ed9377 100644 +--- a/arch/x86/kernel/cpu/mtrr/generic.c ++++ b/arch/x86/kernel/cpu/mtrr/generic.c +@@ -444,11 +444,24 @@ static void __init print_mtrr_state(void) + pr_debug("TOM2: %016llx aka %lldM\n", mtrr_tom2, mtrr_tom2>>20); + } + ++/* PAT setup for BP. We need to go through sync steps here */ ++void __init mtrr_bp_pat_init(void) ++{ ++ unsigned long flags; ++ ++ local_irq_save(flags); ++ prepare_set(); ++ ++ pat_init(); ++ ++ post_set(); ++ local_irq_restore(flags); ++} ++ + /* Grab all of the MTRR state for this CPU into *state */ + bool __init get_mtrr_state(void) + { + struct mtrr_var_range *vrs; +- unsigned long flags; + unsigned lo, dummy; + unsigned int i; + +@@ -481,15 +494,6 @@ bool __init get_mtrr_state(void) + + mtrr_state_set = 1; + +- /* PAT setup for BP. We need to go through sync steps here */ +- local_irq_save(flags); +- prepare_set(); +- +- pat_init(); +- +- post_set(); +- local_irq_restore(flags); +- + return !!(mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED); + } + +diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c +index 10f8d4796240..7d393ecdeee6 100644 +--- a/arch/x86/kernel/cpu/mtrr/main.c ++++ b/arch/x86/kernel/cpu/mtrr/main.c +@@ -752,6 +752,9 @@ void __init mtrr_bp_init(void) + /* BIOS may override */ + __mtrr_enabled = get_mtrr_state(); + ++ if (mtrr_enabled()) ++ mtrr_bp_pat_init(); ++ + if (mtrr_cleanup(phys_addr)) { + changed_by_mtrr_cleanup = 1; + mtrr_if->set_all(); +@@ -759,8 +762,16 @@ void __init mtrr_bp_init(void) + } + } + +- if (!mtrr_enabled()) ++ if (!mtrr_enabled()) { + pr_info("MTRR: Disabled\n"); ++ ++ /* ++ * PAT initialization relies on MTRR's rendezvous handler. ++ * Skip PAT init until the handler can initialize both ++ * features independently. ++ */ ++ pat_disable("MTRRs disabled, skipping PAT initialization too."); ++ } + } + + void mtrr_ap_init(void) +diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h +index 951884dcc433..6c7ced07d16d 100644 +--- a/arch/x86/kernel/cpu/mtrr/mtrr.h ++++ b/arch/x86/kernel/cpu/mtrr/mtrr.h +@@ -52,6 +52,7 @@ void set_mtrr_prepare_save(struct set_mtrr_context *ctxt); + void fill_mtrr_var_range(unsigned int index, + u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi); + bool get_mtrr_state(void); ++void mtrr_bp_pat_init(void); + + extern void set_mtrr_ops(const struct mtrr_ops *ops); + +diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c +index 9d56f271d519..6df291c2987c 100644 +--- a/arch/x86/mm/init.c ++++ b/arch/x86/mm/init.c +@@ -696,13 +696,6 @@ void free_initmem(void) + void __init free_initrd_mem(unsigned long start, unsigned long end) + { + /* +- * Remember, initrd memory may contain microcode or other useful things. +- * Before we lose initrd mem, we need to find a place to hold them +- * now that normal virtual memory is enabled. +- */ +- save_microcode_in_initrd(); +- +- /* + * end could be not aligned, and We can not align that, + * decompresser could be confused by aligned initrd_end + * We already reserve the end partial page before in +diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c +index faec01e7a17d..fb0604f11eec 100644 +--- a/arch/x86/mm/pat.c ++++ b/arch/x86/mm/pat.c +@@ -40,11 +40,22 @@ + static bool boot_cpu_done; + + static int __read_mostly __pat_enabled = IS_ENABLED(CONFIG_X86_PAT); ++static void init_cache_modes(void); + +-static inline void pat_disable(const char *reason) ++void pat_disable(const char *reason) + { ++ if (!__pat_enabled) ++ return; ++ ++ if (boot_cpu_done) { ++ WARN_ONCE(1, "x86/PAT: PAT cannot be disabled after initialization\n"); ++ return; ++ } ++ + __pat_enabled = 0; + pr_info("x86/PAT: %s\n", reason); ++ ++ init_cache_modes(); + } + + static int __init nopat(char *str) +@@ -181,7 +192,7 @@ static enum page_cache_mode pat_get_cache_mode(unsigned pat_val, char *msg) + * configuration. + * Using lower indices is preferred, so we start with highest index. + */ +-void pat_init_cache_modes(u64 pat) ++static void __init_cache_modes(u64 pat) + { + enum page_cache_mode cache; + char pat_msg[33]; +@@ -202,14 +213,11 @@ static void pat_bsp_init(u64 pat) + { + u64 tmp_pat; + +- if (!cpu_has_pat) { ++ if (!boot_cpu_has(X86_FEATURE_PAT)) { + pat_disable("PAT not supported by CPU."); + return; + } + +- if (!pat_enabled()) +- goto done; +- + rdmsrl(MSR_IA32_CR_PAT, tmp_pat); + if (!tmp_pat) { + pat_disable("PAT MSR is 0, disabled."); +@@ -218,16 +226,12 @@ static void pat_bsp_init(u64 pat) + + wrmsrl(MSR_IA32_CR_PAT, pat); + +-done: +- pat_init_cache_modes(pat); ++ __init_cache_modes(pat); + } + + static void pat_ap_init(u64 pat) + { +- if (!pat_enabled()) +- return; +- +- if (!cpu_has_pat) { ++ if (!boot_cpu_has(X86_FEATURE_PAT)) { + /* + * If this happens we are on a secondary CPU, but switched to + * PAT on the boot CPU. We have no way to undo PAT. +@@ -238,18 +242,32 @@ static void pat_ap_init(u64 pat) + wrmsrl(MSR_IA32_CR_PAT, pat); + } + +-void pat_init(void) ++static void init_cache_modes(void) + { +- u64 pat; +- struct cpuinfo_x86 *c = &boot_cpu_data; ++ u64 pat = 0; ++ static int init_cm_done; + +- if (!pat_enabled()) { ++ if (init_cm_done) ++ return; ++ ++ if (boot_cpu_has(X86_FEATURE_PAT)) { ++ /* ++ * CPU supports PAT. Set PAT table to be consistent with ++ * PAT MSR. This case supports "nopat" boot option, and ++ * virtual machine environments which support PAT without ++ * MTRRs. In specific, Xen has unique setup to PAT MSR. ++ * ++ * If PAT MSR returns 0, it is considered invalid and emulates ++ * as No PAT. ++ */ ++ rdmsrl(MSR_IA32_CR_PAT, pat); ++ } ++ ++ if (!pat) { + /* + * No PAT. Emulate the PAT table that corresponds to the two +- * cache bits, PWT (Write Through) and PCD (Cache Disable). This +- * setup is the same as the BIOS default setup when the system +- * has PAT but the "nopat" boot option has been specified. This +- * emulated PAT table is used when MSR_IA32_CR_PAT returns 0. ++ * cache bits, PWT (Write Through) and PCD (Cache Disable). ++ * This setup is also the same as the BIOS default setup. + * + * PTE encoding: + * +@@ -266,10 +284,36 @@ void pat_init(void) + */ + pat = PAT(0, WB) | PAT(1, WT) | PAT(2, UC_MINUS) | PAT(3, UC) | + PAT(4, WB) | PAT(5, WT) | PAT(6, UC_MINUS) | PAT(7, UC); ++ } ++ ++ __init_cache_modes(pat); ++ ++ init_cm_done = 1; ++} ++ ++/** ++ * pat_init - Initialize PAT MSR and PAT table ++ * ++ * This function initializes PAT MSR and PAT table with an OS-defined value ++ * to enable additional cache attributes, WC and WT. ++ * ++ * This function must be called on all CPUs using the specific sequence of ++ * operations defined in Intel SDM. mtrr_rendezvous_handler() provides this ++ * procedure for PAT. ++ */ ++void pat_init(void) ++{ ++ u64 pat; ++ struct cpuinfo_x86 *c = &boot_cpu_data; ++ ++ if (!pat_enabled()) { ++ init_cache_modes(); ++ return; ++ } + +- } else if ((c->x86_vendor == X86_VENDOR_INTEL) && +- (((c->x86 == 0x6) && (c->x86_model <= 0xd)) || +- ((c->x86 == 0xf) && (c->x86_model <= 0x6)))) { ++ if ((c->x86_vendor == X86_VENDOR_INTEL) && ++ (((c->x86 == 0x6) && (c->x86_model <= 0xd)) || ++ ((c->x86 == 0xf) && (c->x86_model <= 0x6)))) { + /* + * PAT support with the lower four entries. Intel Pentium 2, + * 3, M, and 4 are affected by PAT errata, which makes the +@@ -734,25 +778,6 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, + if (file->f_flags & O_DSYNC) + pcm = _PAGE_CACHE_MODE_UC_MINUS; + +-#ifdef CONFIG_X86_32 +- /* +- * On the PPro and successors, the MTRRs are used to set +- * memory types for physical addresses outside main memory, +- * so blindly setting UC or PWT on those pages is wrong. +- * For Pentiums and earlier, the surround logic should disable +- * caching for the high addresses through the KEN pin, but +- * we maintain the tradition of paranoia in this code. +- */ +- if (!pat_enabled() && +- !(boot_cpu_has(X86_FEATURE_MTRR) || +- boot_cpu_has(X86_FEATURE_K6_MTRR) || +- boot_cpu_has(X86_FEATURE_CYRIX_ARR) || +- boot_cpu_has(X86_FEATURE_CENTAUR_MCR)) && +- (pfn << PAGE_SHIFT) >= __pa(high_memory)) { +- pcm = _PAGE_CACHE_MODE_UC; +- } +-#endif +- + *vma_prot = __pgprot((pgprot_val(*vma_prot) & ~_PAGE_CACHE_MASK) | + cachemode2protval(pcm)); + return 1; +diff --git a/arch/x86/power/hibernate_asm_64.S b/arch/x86/power/hibernate_asm_64.S +index 4400a43b9e28..f2a990285a5c 100644 +--- a/arch/x86/power/hibernate_asm_64.S ++++ b/arch/x86/power/hibernate_asm_64.S +@@ -24,7 +24,6 @@ + #include + + ENTRY(swsusp_arch_suspend) +- FRAME_BEGIN + movq $saved_context, %rax + movq %rsp, pt_regs_sp(%rax) + movq %rbp, pt_regs_bp(%rax) +@@ -51,6 +50,7 @@ ENTRY(swsusp_arch_suspend) + movq %cr3, %rax + movq %rax, restore_cr3(%rip) + ++ FRAME_BEGIN + call swsusp_save + FRAME_END + ret +@@ -111,7 +111,6 @@ ENTRY(core_restore_code) + */ + + ENTRY(restore_registers) +- FRAME_BEGIN + /* go back to the original page tables */ + movq %rbx, %cr3 + +@@ -152,6 +151,5 @@ ENTRY(restore_registers) + /* tell the hibernation core that we've just restored the memory */ + movq %rax, in_suspend(%rip) + +- FRAME_END + ret + ENDPROC(restore_registers) +diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c +index 880862c7d9dd..d8cca75e3b3e 100644 +--- a/arch/x86/xen/enlighten.c ++++ b/arch/x86/xen/enlighten.c +@@ -75,7 +75,6 @@ + #include + #include + #include +-#include + #include + + #ifdef CONFIG_ACPI +@@ -1511,7 +1510,6 @@ asmlinkage __visible void __init xen_start_kernel(void) + { + struct physdev_set_iopl set_iopl; + unsigned long initrd_start = 0; +- u64 pat; + int rc; + + if (!xen_start_info) +@@ -1618,13 +1616,6 @@ asmlinkage __visible void __init xen_start_kernel(void) + xen_start_info->nr_pages); + xen_reserve_special_pages(); + +- /* +- * Modify the cache mode translation tables to match Xen's PAT +- * configuration. +- */ +- rdmsrl(MSR_IA32_CR_PAT, pat); +- pat_init_cache_modes(pat); +- + /* keep using Xen gdt for now; no urgent need to change it */ + + #ifdef CONFIG_X86_32 +diff --git a/block/genhd.c b/block/genhd.c +index 9f42526b4d62..3eebd256b765 100644 +--- a/block/genhd.c ++++ b/block/genhd.c +@@ -856,6 +856,7 @@ static void disk_seqf_stop(struct seq_file *seqf, void *v) + if (iter) { + class_dev_iter_exit(iter); + kfree(iter); ++ seqf->private = NULL; + } + } + +diff --git a/crypto/gcm.c b/crypto/gcm.c +index bec329b3de8d..d9ea5f9c0574 100644 +--- a/crypto/gcm.c ++++ b/crypto/gcm.c +@@ -639,7 +639,9 @@ static int crypto_gcm_create_common(struct crypto_template *tmpl, + + ghash_alg = crypto_find_alg(ghash_name, &crypto_ahash_type, + CRYPTO_ALG_TYPE_HASH, +- CRYPTO_ALG_TYPE_AHASH_MASK); ++ CRYPTO_ALG_TYPE_AHASH_MASK | ++ crypto_requires_sync(algt->type, ++ algt->mask)); + if (IS_ERR(ghash_alg)) + return PTR_ERR(ghash_alg); + +diff --git a/crypto/scatterwalk.c b/crypto/scatterwalk.c +index ea5815c5e128..bc769c448d4a 100644 +--- a/crypto/scatterwalk.c ++++ b/crypto/scatterwalk.c +@@ -72,7 +72,8 @@ static void scatterwalk_pagedone(struct scatter_walk *walk, int out, + + void scatterwalk_done(struct scatter_walk *walk, int out, int more) + { +- if (!(scatterwalk_pagelen(walk) & (PAGE_SIZE - 1)) || !more) ++ if (!more || walk->offset >= walk->sg->offset + walk->sg->length || ++ !(walk->offset & (PAGE_SIZE - 1))) + scatterwalk_pagedone(walk, out, more); + } + EXPORT_SYMBOL_GPL(scatterwalk_done); +diff --git a/drivers/char/random.c b/drivers/char/random.c +index b583e5336630..e511f34be177 100644 +--- a/drivers/char/random.c ++++ b/drivers/char/random.c +@@ -722,15 +722,18 @@ retry: + } + } + +-static void credit_entropy_bits_safe(struct entropy_store *r, int nbits) ++static int credit_entropy_bits_safe(struct entropy_store *r, int nbits) + { + const int nbits_max = (int)(~0U >> (ENTROPY_SHIFT + 1)); + ++ if (nbits < 0) ++ return -EINVAL; ++ + /* Cap the value to avoid overflows */ + nbits = min(nbits, nbits_max); +- nbits = max(nbits, -nbits_max); + + credit_entropy_bits(r, nbits); ++ return 0; + } + + /********************************************************************* +@@ -1542,8 +1545,7 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg) + return -EPERM; + if (get_user(ent_count, p)) + return -EFAULT; +- credit_entropy_bits_safe(&input_pool, ent_count); +- return 0; ++ return credit_entropy_bits_safe(&input_pool, ent_count); + case RNDADDENTROPY: + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; +@@ -1557,8 +1559,7 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg) + size); + if (retval < 0) + return retval; +- credit_entropy_bits_safe(&input_pool, ent_count); +- return 0; ++ return credit_entropy_bits_safe(&input_pool, ent_count); + case RNDZAPENTCNT: + case RNDCLEARPOOL: + /* +diff --git a/drivers/cpufreq/pcc-cpufreq.c b/drivers/cpufreq/pcc-cpufreq.c +index 808a320e9d5d..2a0d58959acf 100644 +--- a/drivers/cpufreq/pcc-cpufreq.c ++++ b/drivers/cpufreq/pcc-cpufreq.c +@@ -555,8 +555,6 @@ static int pcc_cpufreq_cpu_init(struct cpufreq_policy *policy) + policy->min = policy->cpuinfo.min_freq = + ioread32(&pcch_hdr->minimum_frequency) * 1000; + +- policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; +- + pr_debug("init: policy->max is %d, policy->min is %d\n", + policy->max, policy->min); + out: +diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c +index 585a3b7915bd..f1b0eafecd0d 100644 +--- a/drivers/i2c/busses/i2c-i801.c ++++ b/drivers/i2c/busses/i2c-i801.c +@@ -244,6 +244,13 @@ struct i801_priv { + struct platform_device *mux_pdev; + #endif + struct platform_device *tco_pdev; ++ ++ /* ++ * If set to true the host controller registers are reserved for ++ * ACPI AML use. Protected by acpi_lock. ++ */ ++ bool acpi_reserved; ++ struct mutex acpi_lock; + }; + + #define FEATURE_SMBUS_PEC (1 << 0) +@@ -714,9 +721,15 @@ static s32 i801_access(struct i2c_adapter *adap, u16 addr, + { + int hwpec; + int block = 0; +- int ret, xact = 0; ++ int ret = 0, xact = 0; + struct i801_priv *priv = i2c_get_adapdata(adap); + ++ mutex_lock(&priv->acpi_lock); ++ if (priv->acpi_reserved) { ++ mutex_unlock(&priv->acpi_lock); ++ return -EBUSY; ++ } ++ + hwpec = (priv->features & FEATURE_SMBUS_PEC) && (flags & I2C_CLIENT_PEC) + && size != I2C_SMBUS_QUICK + && size != I2C_SMBUS_I2C_BLOCK_DATA; +@@ -773,7 +786,8 @@ static s32 i801_access(struct i2c_adapter *adap, u16 addr, + default: + dev_err(&priv->pci_dev->dev, "Unsupported transaction %d\n", + size); +- return -EOPNOTSUPP; ++ ret = -EOPNOTSUPP; ++ goto out; + } + + if (hwpec) /* enable/disable hardware PEC */ +@@ -796,11 +810,11 @@ static s32 i801_access(struct i2c_adapter *adap, u16 addr, + ~(SMBAUXCTL_CRC | SMBAUXCTL_E32B), SMBAUXCTL(priv)); + + if (block) +- return ret; ++ goto out; + if (ret) +- return ret; ++ goto out; + if ((read_write == I2C_SMBUS_WRITE) || (xact == I801_QUICK)) +- return 0; ++ goto out; + + switch (xact & 0x7f) { + case I801_BYTE: /* Result put in SMBHSTDAT0 */ +@@ -812,7 +826,10 @@ static s32 i801_access(struct i2c_adapter *adap, u16 addr, + (inb_p(SMBHSTDAT1(priv)) << 8); + break; + } +- return 0; ++ ++out: ++ mutex_unlock(&priv->acpi_lock); ++ return ret; + } + + +@@ -1249,6 +1266,72 @@ static void i801_add_tco(struct i801_priv *priv) + priv->tco_pdev = pdev; + } + ++#ifdef CONFIG_ACPI ++static acpi_status ++i801_acpi_io_handler(u32 function, acpi_physical_address address, u32 bits, ++ u64 *value, void *handler_context, void *region_context) ++{ ++ struct i801_priv *priv = handler_context; ++ struct pci_dev *pdev = priv->pci_dev; ++ acpi_status status; ++ ++ /* ++ * Once BIOS AML code touches the OpRegion we warn and inhibit any ++ * further access from the driver itself. This device is now owned ++ * by the system firmware. ++ */ ++ mutex_lock(&priv->acpi_lock); ++ ++ if (!priv->acpi_reserved) { ++ priv->acpi_reserved = true; ++ ++ dev_warn(&pdev->dev, "BIOS is accessing SMBus registers\n"); ++ dev_warn(&pdev->dev, "Driver SMBus register access inhibited\n"); ++ } ++ ++ if ((function & ACPI_IO_MASK) == ACPI_READ) ++ status = acpi_os_read_port(address, (u32 *)value, bits); ++ else ++ status = acpi_os_write_port(address, (u32)*value, bits); ++ ++ mutex_unlock(&priv->acpi_lock); ++ ++ return status; ++} ++ ++static int i801_acpi_probe(struct i801_priv *priv) ++{ ++ struct acpi_device *adev; ++ acpi_status status; ++ ++ adev = ACPI_COMPANION(&priv->pci_dev->dev); ++ if (adev) { ++ status = acpi_install_address_space_handler(adev->handle, ++ ACPI_ADR_SPACE_SYSTEM_IO, i801_acpi_io_handler, ++ NULL, priv); ++ if (ACPI_SUCCESS(status)) ++ return 0; ++ } ++ ++ return acpi_check_resource_conflict(&priv->pci_dev->resource[SMBBAR]); ++} ++ ++static void i801_acpi_remove(struct i801_priv *priv) ++{ ++ struct acpi_device *adev; ++ ++ adev = ACPI_COMPANION(&priv->pci_dev->dev); ++ if (!adev) ++ return; ++ ++ acpi_remove_address_space_handler(adev->handle, ++ ACPI_ADR_SPACE_SYSTEM_IO, i801_acpi_io_handler); ++} ++#else ++static inline int i801_acpi_probe(struct i801_priv *priv) { return 0; } ++static inline void i801_acpi_remove(struct i801_priv *priv) { } ++#endif ++ + static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id) + { + unsigned char temp; +@@ -1266,6 +1349,7 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id) + priv->adapter.dev.parent = &dev->dev; + ACPI_COMPANION_SET(&priv->adapter.dev, ACPI_COMPANION(&dev->dev)); + priv->adapter.retries = 3; ++ mutex_init(&priv->acpi_lock); + + priv->pci_dev = dev; + switch (dev->device) { +@@ -1328,10 +1412,8 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id) + return -ENODEV; + } + +- err = acpi_check_resource_conflict(&dev->resource[SMBBAR]); +- if (err) { ++ if (i801_acpi_probe(priv)) + return -ENODEV; +- } + + err = pcim_iomap_regions(dev, 1 << SMBBAR, + dev_driver_string(&dev->dev)); +@@ -1340,6 +1422,7 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id) + "Failed to request SMBus region 0x%lx-0x%Lx\n", + priv->smba, + (unsigned long long)pci_resource_end(dev, SMBBAR)); ++ i801_acpi_remove(priv); + return err; + } + +@@ -1404,6 +1487,7 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id) + err = i2c_add_adapter(&priv->adapter); + if (err) { + dev_err(&dev->dev, "Failed to add SMBus adapter\n"); ++ i801_acpi_remove(priv); + return err; + } + +@@ -1422,6 +1506,7 @@ static void i801_remove(struct pci_dev *dev) + + i801_del_mux(priv); + i2c_del_adapter(&priv->adapter); ++ i801_acpi_remove(priv); + pci_write_config_byte(dev, SMBHSTCFG, priv->original_hstcfg); + + platform_device_unregister(priv->tco_pdev); +diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c +index db760e84119f..b8df0f5e8c25 100644 +--- a/drivers/net/bonding/bond_netlink.c ++++ b/drivers/net/bonding/bond_netlink.c +@@ -446,7 +446,11 @@ static int bond_newlink(struct net *src_net, struct net_device *bond_dev, + if (err < 0) + return err; + +- return register_netdevice(bond_dev); ++ err = register_netdevice(bond_dev); ++ ++ netif_carrier_off(bond_dev); ++ ++ return err; + } + + static size_t bond_get_size(const struct net_device *bond_dev) +diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c +index 38db2e4d7d54..832401b41b98 100644 +--- a/drivers/net/ethernet/broadcom/bgmac.c ++++ b/drivers/net/ethernet/broadcom/bgmac.c +@@ -231,7 +231,7 @@ err_dma: + dma_unmap_single(dma_dev, slot->dma_addr, skb_headlen(skb), + DMA_TO_DEVICE); + +- while (i > 0) { ++ while (i-- > 0) { + int index = (ring->end + i) % BGMAC_TX_RING_SLOTS; + struct bgmac_slot_info *slot = &ring->slots[index]; + u32 ctl1 = le32_to_cpu(ring->cpu_base[index].ctl1); +diff --git a/drivers/net/ethernet/qlogic/qed/qed_spq.c b/drivers/net/ethernet/qlogic/qed/qed_spq.c +index 89469d5aae25..40e6f6c11f20 100644 +--- a/drivers/net/ethernet/qlogic/qed/qed_spq.c ++++ b/drivers/net/ethernet/qlogic/qed/qed_spq.c +@@ -791,13 +791,12 @@ int qed_spq_completion(struct qed_hwfn *p_hwfn, + * in a bitmap and increasing the chain consumer only + * for the first successive completed entries. + */ +- bitmap_set(p_spq->p_comp_bitmap, pos, SPQ_RING_SIZE); ++ __set_bit(pos, p_spq->p_comp_bitmap); + + while (test_bit(p_spq->comp_bitmap_idx, + p_spq->p_comp_bitmap)) { +- bitmap_clear(p_spq->p_comp_bitmap, +- p_spq->comp_bitmap_idx, +- SPQ_RING_SIZE); ++ __clear_bit(p_spq->comp_bitmap_idx, ++ p_spq->p_comp_bitmap); + p_spq->comp_bitmap_idx++; + qed_chain_return_produced(&p_spq->chain); + } +diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c +index 8f3c55d03d5d..f58858b7972c 100644 +--- a/drivers/net/macsec.c ++++ b/drivers/net/macsec.c +@@ -914,7 +914,6 @@ static struct sk_buff *macsec_decrypt(struct sk_buff *skb, + } + + macsec_skb_cb(skb)->req = req; +- macsec_skb_cb(skb)->rx_sa = rx_sa; + skb->dev = dev; + aead_request_set_callback(req, 0, macsec_decrypt_done, skb); + +@@ -1141,6 +1140,8 @@ static rx_handler_result_t macsec_handle_frame(struct sk_buff **pskb) + } + } + ++ macsec_skb_cb(skb)->rx_sa = rx_sa; ++ + /* Disabled && !changed text => skip validation */ + if (hdr->tci_an & MACSEC_TCI_C || + secy->validate_frames != MACSEC_VALIDATE_DISABLED) +diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c +index f572b31a2b20..9ab88e1ed394 100644 +--- a/drivers/net/ppp/ppp_generic.c ++++ b/drivers/net/ppp/ppp_generic.c +@@ -2404,8 +2404,6 @@ ppp_unregister_channel(struct ppp_channel *chan) + spin_lock_bh(&pn->all_channels_lock); + list_del(&pch->list); + spin_unlock_bh(&pn->all_channels_lock); +- put_net(pch->chan_net); +- pch->chan_net = NULL; + + pch->file.dead = 1; + wake_up_interruptible(&pch->file.rwait); +@@ -2999,6 +2997,9 @@ ppp_disconnect_channel(struct channel *pch) + */ + static void ppp_destroy_channel(struct channel *pch) + { ++ put_net(pch->chan_net); ++ pch->chan_net = NULL; ++ + atomic_dec(&channel_count); + + if (!pch->file.dead) { +diff --git a/drivers/nvdimm/pfn.h b/drivers/nvdimm/pfn.h +index 8e343a3ca873..9d2704c83fa7 100644 +--- a/drivers/nvdimm/pfn.h ++++ b/drivers/nvdimm/pfn.h +@@ -33,7 +33,9 @@ struct nd_pfn_sb { + /* minor-version-1 additions for section alignment */ + __le32 start_pad; + __le32 end_trunc; +- u8 padding[4004]; ++ /* minor-version-2 record the base alignment of the mapping */ ++ __le32 align; ++ u8 padding[4000]; + __le64 checksum; + }; + +diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c +index e071e214feba..84f2372dd0bb 100644 +--- a/drivers/nvdimm/pfn_devs.c ++++ b/drivers/nvdimm/pfn_devs.c +@@ -329,6 +329,8 @@ struct device *nd_pfn_create(struct nd_region *nd_region) + int nd_pfn_validate(struct nd_pfn *nd_pfn) + { + u64 checksum, offset; ++ unsigned long align; ++ enum nd_pfn_mode mode; + struct nd_namespace_io *nsio; + struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; + struct nd_namespace_common *ndns = nd_pfn->ndns; +@@ -360,6 +362,9 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn) + pfn_sb->end_trunc = 0; + } + ++ if (__le16_to_cpu(pfn_sb->version_minor) < 2) ++ pfn_sb->align = 0; ++ + switch (le32_to_cpu(pfn_sb->mode)) { + case PFN_MODE_RAM: + case PFN_MODE_PMEM: +@@ -368,20 +373,50 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn) + return -ENXIO; + } + ++ align = le32_to_cpu(pfn_sb->align); ++ offset = le64_to_cpu(pfn_sb->dataoff); ++ if (align == 0) ++ align = 1UL << ilog2(offset); ++ mode = le32_to_cpu(pfn_sb->mode); ++ + if (!nd_pfn->uuid) { +- /* from probe we allocate */ ++ /* ++ * When probing a namepace via nd_pfn_probe() the uuid ++ * is NULL (see: nd_pfn_devinit()) we init settings from ++ * pfn_sb ++ */ + nd_pfn->uuid = kmemdup(pfn_sb->uuid, 16, GFP_KERNEL); + if (!nd_pfn->uuid) + return -ENOMEM; ++ nd_pfn->align = align; ++ nd_pfn->mode = mode; + } else { +- /* from init we validate */ ++ /* ++ * When probing a pfn / dax instance we validate the ++ * live settings against the pfn_sb ++ */ + if (memcmp(nd_pfn->uuid, pfn_sb->uuid, 16) != 0) + return -ENODEV; ++ ++ /* ++ * If the uuid validates, but other settings mismatch ++ * return EINVAL because userspace has managed to change ++ * the configuration without specifying new ++ * identification. ++ */ ++ if (nd_pfn->align != align || nd_pfn->mode != mode) { ++ dev_err(&nd_pfn->dev, ++ "init failed, settings mismatch\n"); ++ dev_dbg(&nd_pfn->dev, "align: %lx:%lx mode: %d:%d\n", ++ nd_pfn->align, align, nd_pfn->mode, ++ mode); ++ return -EINVAL; ++ } + } + +- if (nd_pfn->align > nvdimm_namespace_capacity(ndns)) { ++ if (align > nvdimm_namespace_capacity(ndns)) { + dev_err(&nd_pfn->dev, "alignment: %lx exceeds capacity %llx\n", +- nd_pfn->align, nvdimm_namespace_capacity(ndns)); ++ align, nvdimm_namespace_capacity(ndns)); + return -EINVAL; + } + +@@ -391,7 +426,6 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn) + * namespace has changed since the pfn superblock was + * established. + */ +- offset = le64_to_cpu(pfn_sb->dataoff); + nsio = to_nd_namespace_io(&ndns->dev); + if (offset >= resource_size(&nsio->res)) { + dev_err(&nd_pfn->dev, "pfn array size exceeds capacity of %s\n", +@@ -399,10 +433,11 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn) + return -EBUSY; + } + +- nd_pfn->align = 1UL << ilog2(offset); +- if (!is_power_of_2(offset) || offset < PAGE_SIZE) { +- dev_err(&nd_pfn->dev, "bad offset: %#llx dax disabled\n", +- offset); ++ if ((align && !IS_ALIGNED(offset, align)) ++ || !IS_ALIGNED(offset, PAGE_SIZE)) { ++ dev_err(&nd_pfn->dev, ++ "bad offset: %#llx dax disabled align: %#lx\n", ++ offset, align); + return -ENXIO; + } + +diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c +index 92f536596b24..368efac7a950 100644 +--- a/drivers/nvdimm/pmem.c ++++ b/drivers/nvdimm/pmem.c +@@ -426,9 +426,10 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) + memcpy(pfn_sb->uuid, nd_pfn->uuid, 16); + memcpy(pfn_sb->parent_uuid, nd_dev_to_uuid(&ndns->dev), 16); + pfn_sb->version_major = cpu_to_le16(1); +- pfn_sb->version_minor = cpu_to_le16(1); ++ pfn_sb->version_minor = cpu_to_le16(2); + pfn_sb->start_pad = cpu_to_le32(start_pad); + pfn_sb->end_trunc = cpu_to_le32(end_trunc); ++ pfn_sb->align = cpu_to_le32(nd_pfn->align); + checksum = nd_sb_checksum((struct nd_gen_sb *) pfn_sb); + pfn_sb->checksum = cpu_to_le64(checksum); + +@@ -501,7 +502,6 @@ static int __nvdimm_namespace_attach_pfn(struct nd_pfn *nd_pfn) + pmem = dev_get_drvdata(dev); + pmem->data_offset = le64_to_cpu(pfn_sb->dataoff); + pmem->pfn_pad = start_pad + end_trunc; +- nd_pfn->mode = le32_to_cpu(nd_pfn->pfn_sb->mode); + if (nd_pfn->mode == PFN_MODE_RAM) { + if (pmem->data_offset < SZ_8K) + return -EINVAL; +diff --git a/drivers/regulator/qcom_smd-regulator.c b/drivers/regulator/qcom_smd-regulator.c +index 6c7fe4778793..7e156a0b65dd 100644 +--- a/drivers/regulator/qcom_smd-regulator.c ++++ b/drivers/regulator/qcom_smd-regulator.c +@@ -140,7 +140,6 @@ static const struct regulator_ops rpm_smps_ldo_ops = { + .enable = rpm_reg_enable, + .disable = rpm_reg_disable, + .is_enabled = rpm_reg_is_enabled, +- .list_voltage = regulator_list_voltage_linear_range, + + .get_voltage = rpm_reg_get_voltage, + .set_voltage = rpm_reg_set_voltage, +diff --git a/drivers/staging/rdma/hfi1/ud.c b/drivers/staging/rdma/hfi1/ud.c +index ae8a70f703eb..23e60feb81bb 100644 +--- a/drivers/staging/rdma/hfi1/ud.c ++++ b/drivers/staging/rdma/hfi1/ud.c +@@ -678,8 +678,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) + u32 tlen = packet->tlen; + struct rvt_qp *qp = packet->qp; + bool has_grh = rcv_flags & HFI1_HAS_GRH; +- bool sc4_bit = has_sc4_bit(packet); +- u8 sc; ++ u8 sc5 = hdr2sc((struct hfi1_message_header *)hdr, packet->rhf); + u32 bth1; + int is_mcast; + struct ib_grh *grh = NULL; +@@ -697,10 +696,8 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) + */ + struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); + u32 lqpn = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK; +- u8 sl, sc5; ++ u8 sl; + +- sc5 = (be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf; +- sc5 |= sc4_bit; + sl = ibp->sc_to_sl[sc5]; + + process_becn(ppd, sl, 0, lqpn, 0, IB_CC_SVCTYPE_UD); +@@ -717,10 +714,6 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) + + if (!is_mcast && (opcode != IB_OPCODE_CNP) && bth1 & HFI1_FECN_SMASK) { + u16 slid = be16_to_cpu(hdr->lrh[3]); +- u8 sc5; +- +- sc5 = (be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf; +- sc5 |= sc4_bit; + + return_cnp(ibp, qp, src_qp, pkey, dlid, slid, sc5, grh); + } +@@ -745,10 +738,6 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) + if (qp->ibqp.qp_num > 1) { + struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); + u16 slid; +- u8 sc5; +- +- sc5 = (be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf; +- sc5 |= sc4_bit; + + slid = be16_to_cpu(hdr->lrh[3]); + if (unlikely(rcv_pkey_check(ppd, pkey, sc5, slid))) { +@@ -790,10 +779,6 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) + /* Received on QP0, and so by definition, this is an SMP */ + struct opa_smp *smp = (struct opa_smp *)data; + u16 slid = be16_to_cpu(hdr->lrh[3]); +- u8 sc5; +- +- sc5 = (be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf; +- sc5 |= sc4_bit; + + if (opa_smp_check(ibp, pkey, sc5, qp, slid, smp)) + goto drop; +@@ -890,9 +875,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) + } + + wc.slid = be16_to_cpu(hdr->lrh[3]); +- sc = (be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf; +- sc |= sc4_bit; +- wc.sl = ibp->sc_to_sl[sc]; ++ wc.sl = ibp->sc_to_sl[sc5]; + + /* + * Save the LMC lower bits if the destination LID is a unicast LID. +diff --git a/drivers/staging/rdma/hfi1/verbs_txreq.c b/drivers/staging/rdma/hfi1/verbs_txreq.c +index bc95c4112c61..d8fb056526f8 100644 +--- a/drivers/staging/rdma/hfi1/verbs_txreq.c ++++ b/drivers/staging/rdma/hfi1/verbs_txreq.c +@@ -92,11 +92,10 @@ void hfi1_put_txreq(struct verbs_txreq *tx) + + struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev, + struct rvt_qp *qp) ++ __must_hold(&qp->s_lock) + { + struct verbs_txreq *tx = ERR_PTR(-EBUSY); +- unsigned long flags; + +- spin_lock_irqsave(&qp->s_lock, flags); + write_seqlock(&dev->iowait_lock); + if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) { + struct hfi1_qp_priv *priv; +@@ -116,7 +115,6 @@ struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev, + } + out: + write_sequnlock(&dev->iowait_lock); +- spin_unlock_irqrestore(&qp->s_lock, flags); + return tx; + } + +diff --git a/drivers/staging/rdma/hfi1/verbs_txreq.h b/drivers/staging/rdma/hfi1/verbs_txreq.h +index 1cf69b2fe4a5..a1d6e0807f97 100644 +--- a/drivers/staging/rdma/hfi1/verbs_txreq.h ++++ b/drivers/staging/rdma/hfi1/verbs_txreq.h +@@ -73,6 +73,7 @@ struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev, + + static inline struct verbs_txreq *get_txreq(struct hfi1_ibdev *dev, + struct rvt_qp *qp) ++ __must_hold(&qp->slock) + { + struct verbs_txreq *tx; + struct hfi1_qp_priv *priv = qp->priv; +diff --git a/drivers/tty/serial/mvebu-uart.c b/drivers/tty/serial/mvebu-uart.c +index 0ff27818bb87..25b9f178c8d3 100644 +--- a/drivers/tty/serial/mvebu-uart.c ++++ b/drivers/tty/serial/mvebu-uart.c +@@ -299,6 +299,8 @@ static int mvebu_uart_startup(struct uart_port *port) + static void mvebu_uart_shutdown(struct uart_port *port) + { + writel(0, port->membase + UART_CTRL); ++ ++ free_irq(port->irq, port); + } + + static void mvebu_uart_set_termios(struct uart_port *port, +diff --git a/fs/dcache.c b/fs/dcache.c +index 44008e3fafc4..5612631b7b46 100644 +--- a/fs/dcache.c ++++ b/fs/dcache.c +@@ -578,7 +578,6 @@ static struct dentry *dentry_kill(struct dentry *dentry) + + failed: + spin_unlock(&dentry->d_lock); +- cpu_relax(); + return dentry; /* try again with same dentry */ + } + +@@ -752,6 +751,8 @@ void dput(struct dentry *dentry) + return; + + repeat: ++ might_sleep(); ++ + rcu_read_lock(); + if (likely(fast_dput(dentry))) { + rcu_read_unlock(); +@@ -783,8 +784,10 @@ repeat: + + kill_it: + dentry = dentry_kill(dentry); +- if (dentry) ++ if (dentry) { ++ cond_resched(); + goto repeat; ++ } + } + EXPORT_SYMBOL(dput); + +diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c +index fe1f50fe764f..f97110461c19 100644 +--- a/fs/ext4/balloc.c ++++ b/fs/ext4/balloc.c +@@ -208,6 +208,9 @@ static int ext4_init_block_bitmap(struct super_block *sb, + memset(bh->b_data, 0, sb->s_blocksize); + + bit_max = ext4_num_base_meta_clusters(sb, block_group); ++ if ((bit_max >> 3) >= bh->b_size) ++ return -EFSCORRUPTED; ++ + for (bit = 0; bit < bit_max; bit++) + ext4_set_bit(bit, bh->b_data); + +diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c +index de692b91c166..8211698600c2 100644 +--- a/fs/ext4/extents.c ++++ b/fs/ext4/extents.c +@@ -376,9 +376,13 @@ static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext) + ext4_fsblk_t block = ext4_ext_pblock(ext); + int len = ext4_ext_get_actual_len(ext); + ext4_lblk_t lblock = le32_to_cpu(ext->ee_block); +- ext4_lblk_t last = lblock + len - 1; + +- if (len == 0 || lblock > last) ++ /* ++ * We allow neither: ++ * - zero length ++ * - overflow/wrap-around ++ */ ++ if (lblock + len <= lblock) + return 0; + return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len); + } +diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c +index 250c2df04a92..58197a7c3c2c 100644 +--- a/fs/ext4/inode.c ++++ b/fs/ext4/inode.c +@@ -205,9 +205,9 @@ void ext4_evict_inode(struct inode *inode) + * Note that directories do not have this problem because they + * don't use page cache. + */ +- if (ext4_should_journal_data(inode) && +- (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode)) && +- inode->i_ino != EXT4_JOURNAL_INO) { ++ if (inode->i_ino != EXT4_JOURNAL_INO && ++ ext4_should_journal_data(inode) && ++ (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode))) { + journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; + tid_t commit_tid = EXT4_I(inode)->i_datasync_tid; + +@@ -2741,13 +2741,36 @@ retry: + done = true; + } + } +- ext4_journal_stop(handle); ++ /* ++ * Caution: If the handle is synchronous, ++ * ext4_journal_stop() can wait for transaction commit ++ * to finish which may depend on writeback of pages to ++ * complete or on page lock to be released. In that ++ * case, we have to wait until after after we have ++ * submitted all the IO, released page locks we hold, ++ * and dropped io_end reference (for extent conversion ++ * to be able to complete) before stopping the handle. ++ */ ++ if (!ext4_handle_valid(handle) || handle->h_sync == 0) { ++ ext4_journal_stop(handle); ++ handle = NULL; ++ } + /* Submit prepared bio */ + ext4_io_submit(&mpd.io_submit); + /* Unlock pages we didn't use */ + mpage_release_unused_pages(&mpd, give_up_on_write); +- /* Drop our io_end reference we got from init */ +- ext4_put_io_end(mpd.io_submit.io_end); ++ /* ++ * Drop our io_end reference we got from init. We have ++ * to be careful and use deferred io_end finishing if ++ * we are still holding the transaction as we can ++ * release the last reference to io_end which may end ++ * up doing unwritten extent conversion. ++ */ ++ if (handle) { ++ ext4_put_io_end_defer(mpd.io_submit.io_end); ++ ext4_journal_stop(handle); ++ } else ++ ext4_put_io_end(mpd.io_submit.io_end); + + if (ret == -ENOSPC && sbi->s_journal) { + /* +diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c +index 9d26fa2188f6..5f7ae0898ef7 100644 +--- a/fs/ext4/mballoc.c ++++ b/fs/ext4/mballoc.c +@@ -2939,7 +2939,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, + ext4_error(sb, "Allocating blocks %llu-%llu which overlap " + "fs metadata", block, block+len); + /* File system mounted not to panic on error +- * Fix the bitmap and repeat the block allocation ++ * Fix the bitmap and return EFSCORRUPTED + * We leak some of the blocks here. + */ + ext4_lock_group(sb, ac->ac_b_ex.fe_group); +@@ -2948,7 +2948,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, + ext4_unlock_group(sb, ac->ac_b_ex.fe_group); + err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); + if (!err) +- err = -EAGAIN; ++ err = -EFSCORRUPTED; + goto out_err; + } + +@@ -4513,18 +4513,7 @@ repeat: + } + if (likely(ac->ac_status == AC_STATUS_FOUND)) { + *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_clstrs); +- if (*errp == -EAGAIN) { +- /* +- * drop the reference that we took +- * in ext4_mb_use_best_found +- */ +- ext4_mb_release_context(ac); +- ac->ac_b_ex.fe_group = 0; +- ac->ac_b_ex.fe_start = 0; +- ac->ac_b_ex.fe_len = 0; +- ac->ac_status = AC_STATUS_CONTINUE; +- goto repeat; +- } else if (*errp) { ++ if (*errp) { + ext4_discard_allocated_blocks(ac); + goto errout; + } else { +diff --git a/fs/ext4/super.c b/fs/ext4/super.c +index 304c712dbe12..7fca76b6cd61 100644 +--- a/fs/ext4/super.c ++++ b/fs/ext4/super.c +@@ -2277,6 +2277,16 @@ static void ext4_orphan_cleanup(struct super_block *sb, + while (es->s_last_orphan) { + struct inode *inode; + ++ /* ++ * We may have encountered an error during cleanup; if ++ * so, skip the rest. ++ */ ++ if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) { ++ jbd_debug(1, "Skipping orphan recovery on fs with errors.\n"); ++ es->s_last_orphan = 0; ++ break; ++ } ++ + inode = ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan)); + if (IS_ERR(inode)) { + es->s_last_orphan = 0; +@@ -3415,6 +3425,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) + goto failed_mount; + } + ++ if (le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) > (blocksize / 4)) { ++ ext4_msg(sb, KERN_ERR, ++ "Number of reserved GDT blocks insanely large: %d", ++ le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks)); ++ goto failed_mount; ++ } ++ + if (sbi->s_mount_opt & EXT4_MOUNT_DAX) { + if (blocksize != PAGE_SIZE) { + ext4_msg(sb, KERN_ERR, +diff --git a/fs/fuse/file.c b/fs/fuse/file.c +index dcad5e210525..3c7675fa664f 100644 +--- a/fs/fuse/file.c ++++ b/fs/fuse/file.c +@@ -417,6 +417,15 @@ static int fuse_flush(struct file *file, fl_owner_t id) + fuse_sync_writes(inode); + inode_unlock(inode); + ++ if (test_bit(AS_ENOSPC, &file->f_mapping->flags) && ++ test_and_clear_bit(AS_ENOSPC, &file->f_mapping->flags)) ++ err = -ENOSPC; ++ if (test_bit(AS_EIO, &file->f_mapping->flags) && ++ test_and_clear_bit(AS_EIO, &file->f_mapping->flags)) ++ err = -EIO; ++ if (err) ++ return err; ++ + req = fuse_get_req_nofail_nopages(fc, file); + memset(&inarg, 0, sizeof(inarg)); + inarg.fh = ff->fh; +@@ -462,6 +471,21 @@ int fuse_fsync_common(struct file *file, loff_t start, loff_t end, + goto out; + + fuse_sync_writes(inode); ++ ++ /* ++ * Due to implementation of fuse writeback ++ * filemap_write_and_wait_range() does not catch errors. ++ * We have to do this directly after fuse_sync_writes() ++ */ ++ if (test_bit(AS_ENOSPC, &file->f_mapping->flags) && ++ test_and_clear_bit(AS_ENOSPC, &file->f_mapping->flags)) ++ err = -ENOSPC; ++ if (test_bit(AS_EIO, &file->f_mapping->flags) && ++ test_and_clear_bit(AS_EIO, &file->f_mapping->flags)) ++ err = -EIO; ++ if (err) ++ goto out; ++ + err = sync_inode_metadata(inode, 1); + if (err) + goto out; +diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c +index 1ce67668a8e1..d302a5fff9ba 100644 +--- a/fs/fuse/inode.c ++++ b/fs/fuse/inode.c +@@ -926,7 +926,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req) + arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC | + FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK | + FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ | +- FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA | ++ FUSE_FLOCK_LOCKS | FUSE_HAS_IOCTL_DIR | FUSE_AUTO_INVAL_DATA | + FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO | + FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT; + req->in.h.opcode = FUSE_INIT; +diff --git a/fs/inode.c b/fs/inode.c +index 721fa18ead59..fd832eba4596 100644 +--- a/fs/inode.c ++++ b/fs/inode.c +@@ -344,7 +344,7 @@ EXPORT_SYMBOL(inc_nlink); + void address_space_init_once(struct address_space *mapping) + { + memset(mapping, 0, sizeof(*mapping)); +- INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC); ++ INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC | __GFP_ACCOUNT); + spin_lock_init(&mapping->tree_lock); + init_rwsem(&mapping->i_mmap_rwsem); + INIT_LIST_HEAD(&mapping->private_list); +diff --git a/fs/udf/super.c b/fs/udf/super.c +index 36661acaf33b..5e2c8c814e1b 100644 +--- a/fs/udf/super.c ++++ b/fs/udf/super.c +@@ -78,6 +78,15 @@ + #define VSD_FIRST_SECTOR_OFFSET 32768 + #define VSD_MAX_SECTOR_OFFSET 0x800000 + ++/* ++ * Maximum number of Terminating Descriptor / Logical Volume Integrity ++ * Descriptor redirections. The chosen numbers are arbitrary - just that we ++ * hopefully don't limit any real use of rewritten inode on write-once media ++ * but avoid looping for too long on corrupted media. ++ */ ++#define UDF_MAX_TD_NESTING 64 ++#define UDF_MAX_LVID_NESTING 1000 ++ + enum { UDF_MAX_LINKS = 0xffff }; + + /* These are the "meat" - everything else is stuffing */ +@@ -1541,42 +1550,52 @@ out_bh: + } + + /* +- * udf_load_logicalvolint +- * ++ * Find the prevailing Logical Volume Integrity Descriptor. + */ + static void udf_load_logicalvolint(struct super_block *sb, struct kernel_extent_ad loc) + { +- struct buffer_head *bh = NULL; ++ struct buffer_head *bh, *final_bh; + uint16_t ident; + struct udf_sb_info *sbi = UDF_SB(sb); + struct logicalVolIntegrityDesc *lvid; ++ int indirections = 0; ++ ++ while (++indirections <= UDF_MAX_LVID_NESTING) { ++ final_bh = NULL; ++ while (loc.extLength > 0 && ++ (bh = udf_read_tagged(sb, loc.extLocation, ++ loc.extLocation, &ident))) { ++ if (ident != TAG_IDENT_LVID) { ++ brelse(bh); ++ break; ++ } ++ ++ brelse(final_bh); ++ final_bh = bh; + +- while (loc.extLength > 0 && +- (bh = udf_read_tagged(sb, loc.extLocation, +- loc.extLocation, &ident)) && +- ident == TAG_IDENT_LVID) { +- sbi->s_lvid_bh = bh; +- lvid = (struct logicalVolIntegrityDesc *)bh->b_data; ++ loc.extLength -= sb->s_blocksize; ++ loc.extLocation++; ++ } + +- if (lvid->nextIntegrityExt.extLength) +- udf_load_logicalvolint(sb, +- leea_to_cpu(lvid->nextIntegrityExt)); ++ if (!final_bh) ++ return; + +- if (sbi->s_lvid_bh != bh) +- brelse(bh); +- loc.extLength -= sb->s_blocksize; +- loc.extLocation++; ++ brelse(sbi->s_lvid_bh); ++ sbi->s_lvid_bh = final_bh; ++ ++ lvid = (struct logicalVolIntegrityDesc *)final_bh->b_data; ++ if (lvid->nextIntegrityExt.extLength == 0) ++ return; ++ ++ loc = leea_to_cpu(lvid->nextIntegrityExt); + } +- if (sbi->s_lvid_bh != bh) +- brelse(bh); ++ ++ udf_warn(sb, "Too many LVID indirections (max %u), ignoring.\n", ++ UDF_MAX_LVID_NESTING); ++ brelse(sbi->s_lvid_bh); ++ sbi->s_lvid_bh = NULL; + } + +-/* +- * Maximum number of Terminating Descriptor redirections. The chosen number is +- * arbitrary - just that we hopefully don't limit any real use of rewritten +- * inode on write-once media but avoid looping for too long on corrupted media. +- */ +-#define UDF_MAX_TD_NESTING 64 + + /* + * Process a main/reserve volume descriptor sequence. +diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h +index 78181a88903b..54355a7e46de 100644 +--- a/include/linux/netdevice.h ++++ b/include/linux/netdevice.h +@@ -4156,6 +4156,13 @@ static inline void netif_keep_dst(struct net_device *dev) + dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM); + } + ++/* return true if dev can't cope with mtu frames that need vlan tag insertion */ ++static inline bool netif_reduces_vlan_mtu(struct net_device *dev) ++{ ++ /* TODO: reserve and use an additional IFF bit, if we get more users */ ++ return dev->priv_flags & IFF_MACSEC; ++} ++ + extern struct pernet_operations __net_initdata loopback_net_ops; + + /* Logging, debugging and troubleshooting/diagnostic helpers. */ +diff --git a/ipc/msg.c b/ipc/msg.c +index 1471db9a7e61..c6521c205cb4 100644 +--- a/ipc/msg.c ++++ b/ipc/msg.c +@@ -680,7 +680,7 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext, + rcu_read_lock(); + ipc_lock_object(&msq->q_perm); + +- ipc_rcu_putref(msq, ipc_rcu_free); ++ ipc_rcu_putref(msq, msg_rcu_free); + /* raced with RMID? */ + if (!ipc_valid_object(&msq->q_perm)) { + err = -EIDRM; +diff --git a/ipc/sem.c b/ipc/sem.c +index b3757ea0694b..5d2f875e8e2e 100644 +--- a/ipc/sem.c ++++ b/ipc/sem.c +@@ -449,7 +449,7 @@ static inline struct sem_array *sem_obtain_object_check(struct ipc_namespace *ns + static inline void sem_lock_and_putref(struct sem_array *sma) + { + sem_lock(sma, NULL, -1); +- ipc_rcu_putref(sma, ipc_rcu_free); ++ ipc_rcu_putref(sma, sem_rcu_free); + } + + static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s) +@@ -1392,7 +1392,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, + rcu_read_unlock(); + sem_io = ipc_alloc(sizeof(ushort)*nsems); + if (sem_io == NULL) { +- ipc_rcu_putref(sma, ipc_rcu_free); ++ ipc_rcu_putref(sma, sem_rcu_free); + return -ENOMEM; + } + +@@ -1426,20 +1426,20 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, + if (nsems > SEMMSL_FAST) { + sem_io = ipc_alloc(sizeof(ushort)*nsems); + if (sem_io == NULL) { +- ipc_rcu_putref(sma, ipc_rcu_free); ++ ipc_rcu_putref(sma, sem_rcu_free); + return -ENOMEM; + } + } + + if (copy_from_user(sem_io, p, nsems*sizeof(ushort))) { +- ipc_rcu_putref(sma, ipc_rcu_free); ++ ipc_rcu_putref(sma, sem_rcu_free); + err = -EFAULT; + goto out_free; + } + + for (i = 0; i < nsems; i++) { + if (sem_io[i] > SEMVMX) { +- ipc_rcu_putref(sma, ipc_rcu_free); ++ ipc_rcu_putref(sma, sem_rcu_free); + err = -ERANGE; + goto out_free; + } +@@ -1731,7 +1731,7 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid) + /* step 2: allocate new undo structure */ + new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL); + if (!new) { +- ipc_rcu_putref(sma, ipc_rcu_free); ++ ipc_rcu_putref(sma, sem_rcu_free); + return ERR_PTR(-ENOMEM); + } + +diff --git a/lib/radix-tree.c b/lib/radix-tree.c +index 1624c4117961..9b9be3ffa1f6 100644 +--- a/lib/radix-tree.c ++++ b/lib/radix-tree.c +@@ -228,10 +228,11 @@ radix_tree_node_alloc(struct radix_tree_root *root) + + /* + * Even if the caller has preloaded, try to allocate from the +- * cache first for the new node to get accounted. ++ * cache first for the new node to get accounted to the memory ++ * cgroup. + */ + ret = kmem_cache_alloc(radix_tree_node_cachep, +- gfp_mask | __GFP_ACCOUNT | __GFP_NOWARN); ++ gfp_mask | __GFP_NOWARN); + if (ret) + goto out; + +@@ -254,8 +255,7 @@ radix_tree_node_alloc(struct radix_tree_root *root) + kmemleak_update_trace(ret); + goto out; + } +- ret = kmem_cache_alloc(radix_tree_node_cachep, +- gfp_mask | __GFP_ACCOUNT); ++ ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask); + out: + BUG_ON(radix_tree_is_indirect_ptr(ret)); + return ret; +@@ -302,6 +302,12 @@ static int __radix_tree_preload(gfp_t gfp_mask) + struct radix_tree_node *node; + int ret = -ENOMEM; + ++ /* ++ * Nodes preloaded by one cgroup can be be used by another cgroup, so ++ * they should never be accounted to any particular memory cgroup. ++ */ ++ gfp_mask &= ~__GFP_ACCOUNT; ++ + preempt_disable(); + rtp = this_cpu_ptr(&radix_tree_preloads); + while (rtp->nr < RADIX_TREE_PRELOAD_SIZE) { +diff --git a/mm/memcontrol.c b/mm/memcontrol.c +index bf860dbdd26e..eb2d761a83a2 100644 +--- a/mm/memcontrol.c ++++ b/mm/memcontrol.c +@@ -4064,14 +4064,32 @@ static struct cftype mem_cgroup_legacy_files[] = { + + static DEFINE_IDR(mem_cgroup_idr); + +-static void mem_cgroup_id_get(struct mem_cgroup *memcg) ++static void mem_cgroup_id_get_many(struct mem_cgroup *memcg, unsigned int n) + { +- atomic_inc(&memcg->id.ref); ++ atomic_add(n, &memcg->id.ref); + } + +-static void mem_cgroup_id_put(struct mem_cgroup *memcg) ++static struct mem_cgroup *mem_cgroup_id_get_online(struct mem_cgroup *memcg) + { +- if (atomic_dec_and_test(&memcg->id.ref)) { ++ while (!atomic_inc_not_zero(&memcg->id.ref)) { ++ /* ++ * The root cgroup cannot be destroyed, so it's refcount must ++ * always be >= 1. ++ */ ++ if (WARN_ON_ONCE(memcg == root_mem_cgroup)) { ++ VM_BUG_ON(1); ++ break; ++ } ++ memcg = parent_mem_cgroup(memcg); ++ if (!memcg) ++ memcg = root_mem_cgroup; ++ } ++ return memcg; ++} ++ ++static void mem_cgroup_id_put_many(struct mem_cgroup *memcg, unsigned int n) ++{ ++ if (atomic_sub_and_test(n, &memcg->id.ref)) { + idr_remove(&mem_cgroup_idr, memcg->id.id); + memcg->id.id = 0; + +@@ -4080,6 +4098,16 @@ static void mem_cgroup_id_put(struct mem_cgroup *memcg) + } + } + ++static inline void mem_cgroup_id_get(struct mem_cgroup *memcg) ++{ ++ mem_cgroup_id_get_many(memcg, 1); ++} ++ ++static inline void mem_cgroup_id_put(struct mem_cgroup *memcg) ++{ ++ mem_cgroup_id_put_many(memcg, 1); ++} ++ + /** + * mem_cgroup_from_id - look up a memcg from a memcg id + * @id: the memcg id to look up +@@ -4716,6 +4744,8 @@ static void __mem_cgroup_clear_mc(void) + if (!mem_cgroup_is_root(mc.from)) + page_counter_uncharge(&mc.from->memsw, mc.moved_swap); + ++ mem_cgroup_id_put_many(mc.from, mc.moved_swap); ++ + /* + * we charged both to->memory and to->memsw, so we + * should uncharge to->memory. +@@ -4723,9 +4753,9 @@ static void __mem_cgroup_clear_mc(void) + if (!mem_cgroup_is_root(mc.to)) + page_counter_uncharge(&mc.to->memory, mc.moved_swap); + +- css_put_many(&mc.from->css, mc.moved_swap); ++ mem_cgroup_id_get_many(mc.to, mc.moved_swap); ++ css_put_many(&mc.to->css, mc.moved_swap); + +- /* we've already done css_get(mc.to) */ + mc.moved_swap = 0; + } + memcg_oom_recover(from); +@@ -5785,7 +5815,7 @@ subsys_initcall(mem_cgroup_init); + */ + void mem_cgroup_swapout(struct page *page, swp_entry_t entry) + { +- struct mem_cgroup *memcg; ++ struct mem_cgroup *memcg, *swap_memcg; + unsigned short oldid; + + VM_BUG_ON_PAGE(PageLRU(page), page); +@@ -5800,16 +5830,27 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry) + if (!memcg) + return; + +- mem_cgroup_id_get(memcg); +- oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg)); ++ /* ++ * In case the memcg owning these pages has been offlined and doesn't ++ * have an ID allocated to it anymore, charge the closest online ++ * ancestor for the swap instead and transfer the memory+swap charge. ++ */ ++ swap_memcg = mem_cgroup_id_get_online(memcg); ++ oldid = swap_cgroup_record(entry, mem_cgroup_id(swap_memcg)); + VM_BUG_ON_PAGE(oldid, page); +- mem_cgroup_swap_statistics(memcg, true); ++ mem_cgroup_swap_statistics(swap_memcg, true); + + page->mem_cgroup = NULL; + + if (!mem_cgroup_is_root(memcg)) + page_counter_uncharge(&memcg->memory, 1); + ++ if (memcg != swap_memcg) { ++ if (!mem_cgroup_is_root(swap_memcg)) ++ page_counter_charge(&swap_memcg->memsw, 1); ++ page_counter_uncharge(&memcg->memsw, 1); ++ } ++ + /* + * Interrupts should be disabled here because the caller holds the + * mapping->tree_lock lock which is taken with interrupts-off. It is +@@ -5848,11 +5889,14 @@ int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry) + if (!memcg) + return 0; + ++ memcg = mem_cgroup_id_get_online(memcg); ++ + if (!mem_cgroup_is_root(memcg) && +- !page_counter_try_charge(&memcg->swap, 1, &counter)) ++ !page_counter_try_charge(&memcg->swap, 1, &counter)) { ++ mem_cgroup_id_put(memcg); + return -ENOMEM; ++ } + +- mem_cgroup_id_get(memcg); + oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg)); + VM_BUG_ON_PAGE(oldid, page); + mem_cgroup_swap_statistics(memcg, true); +diff --git a/mm/mempool.c b/mm/mempool.c +index 9b7a14a791cc..fd3a393b9eea 100644 +--- a/mm/mempool.c ++++ b/mm/mempool.c +@@ -310,7 +310,7 @@ EXPORT_SYMBOL(mempool_resize); + * returns NULL. Note that due to preallocation, this function + * *never* fails when called from process contexts. (it might + * fail if called from an IRQ context.) +- * Note: neither __GFP_NOMEMALLOC nor __GFP_ZERO are supported. ++ * Note: using __GFP_ZERO is not supported. + */ + void *mempool_alloc(mempool_t *pool, gfp_t gfp_mask) + { +@@ -319,27 +319,16 @@ void *mempool_alloc(mempool_t *pool, gfp_t gfp_mask) + wait_queue_t wait; + gfp_t gfp_temp; + +- /* If oom killed, memory reserves are essential to prevent livelock */ +- VM_WARN_ON_ONCE(gfp_mask & __GFP_NOMEMALLOC); +- /* No element size to zero on allocation */ + VM_WARN_ON_ONCE(gfp_mask & __GFP_ZERO); +- + might_sleep_if(gfp_mask & __GFP_DIRECT_RECLAIM); + ++ gfp_mask |= __GFP_NOMEMALLOC; /* don't allocate emergency reserves */ + gfp_mask |= __GFP_NORETRY; /* don't loop in __alloc_pages */ + gfp_mask |= __GFP_NOWARN; /* failures are OK */ + + gfp_temp = gfp_mask & ~(__GFP_DIRECT_RECLAIM|__GFP_IO); + + repeat_alloc: +- if (likely(pool->curr_nr)) { +- /* +- * Don't allocate from emergency reserves if there are +- * elements available. This check is racy, but it will +- * be rechecked each loop. +- */ +- gfp_temp |= __GFP_NOMEMALLOC; +- } + + element = pool->alloc(gfp_temp, pool->pool_data); + if (likely(element != NULL)) +@@ -363,12 +352,11 @@ repeat_alloc: + * We use gfp mask w/o direct reclaim or IO for the first round. If + * alloc failed with that and @pool was empty, retry immediately. + */ +- if ((gfp_temp & ~__GFP_NOMEMALLOC) != gfp_mask) { ++ if (gfp_temp != gfp_mask) { + spin_unlock_irqrestore(&pool->lock, flags); + gfp_temp = gfp_mask; + goto repeat_alloc; + } +- gfp_temp = gfp_mask; + + /* We must not sleep if !__GFP_DIRECT_RECLAIM */ + if (!(gfp_mask & __GFP_DIRECT_RECLAIM)) { +diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c +index e7e62570bdb8..3a573a2dcee2 100644 +--- a/net/8021q/vlan_dev.c ++++ b/net/8021q/vlan_dev.c +@@ -146,10 +146,12 @@ static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb, + + static int vlan_dev_change_mtu(struct net_device *dev, int new_mtu) + { +- /* TODO: gotta make sure the underlying layer can handle it, +- * maybe an IFF_VLAN_CAPABLE flag for devices? +- */ +- if (vlan_dev_priv(dev)->real_dev->mtu < new_mtu) ++ struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; ++ unsigned int max_mtu = real_dev->mtu; ++ ++ if (netif_reduces_vlan_mtu(real_dev)) ++ max_mtu -= VLAN_HLEN; ++ if (max_mtu < new_mtu) + return -ERANGE; + + dev->mtu = new_mtu; +diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c +index c92b52f37d38..1270207f3d7c 100644 +--- a/net/8021q/vlan_netlink.c ++++ b/net/8021q/vlan_netlink.c +@@ -118,6 +118,7 @@ static int vlan_newlink(struct net *src_net, struct net_device *dev, + { + struct vlan_dev_priv *vlan = vlan_dev_priv(dev); + struct net_device *real_dev; ++ unsigned int max_mtu; + __be16 proto; + int err; + +@@ -144,9 +145,11 @@ static int vlan_newlink(struct net *src_net, struct net_device *dev, + if (err < 0) + return err; + ++ max_mtu = netif_reduces_vlan_mtu(real_dev) ? real_dev->mtu - VLAN_HLEN : ++ real_dev->mtu; + if (!tb[IFLA_MTU]) +- dev->mtu = real_dev->mtu; +- else if (dev->mtu > real_dev->mtu) ++ dev->mtu = max_mtu; ++ else if (dev->mtu > max_mtu) + return -EINVAL; + + err = vlan_changelink(dev, tb, data); +diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c +index 160797722228..b32f5a4750bf 100644 +--- a/net/bridge/br_input.c ++++ b/net/bridge/br_input.c +@@ -213,6 +213,16 @@ drop: + } + EXPORT_SYMBOL_GPL(br_handle_frame_finish); + ++static void __br_handle_local_finish(struct sk_buff *skb) ++{ ++ struct net_bridge_port *p = br_port_get_rcu(skb->dev); ++ u16 vid = 0; ++ ++ /* check if vlan is allowed, to avoid spoofing */ ++ if (p->flags & BR_LEARNING && br_should_learn(p, skb, &vid)) ++ br_fdb_update(p->br, p, eth_hdr(skb)->h_source, vid, false); ++} ++ + /* note: already called with rcu_read_lock */ + static int br_handle_local_finish(struct net *net, struct sock *sk, struct sk_buff *skb) + { +@@ -279,6 +289,14 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb) + case 0x01: /* IEEE MAC (Pause) */ + goto drop; + ++ case 0x0E: /* 802.1AB LLDP */ ++ fwd_mask |= p->br->group_fwd_mask; ++ if (fwd_mask & (1u << dest[5])) ++ goto forward; ++ *pskb = skb; ++ __br_handle_local_finish(skb); ++ return RX_HANDLER_PASS; ++ + default: + /* Allow selective forwarding for most other protocols */ + fwd_mask |= p->br->group_fwd_mask; +diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c +index c124c3c12f7c..e2e78843301c 100644 +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -87,7 +87,7 @@ int sysctl_tcp_adv_win_scale __read_mostly = 1; + EXPORT_SYMBOL(sysctl_tcp_adv_win_scale); + + /* rfc5961 challenge ack rate limiting */ +-int sysctl_tcp_challenge_ack_limit = 100; ++int sysctl_tcp_challenge_ack_limit = 1000; + + int sysctl_tcp_stdurg __read_mostly; + int sysctl_tcp_rfc1337 __read_mostly; +@@ -3423,6 +3423,23 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32 + return flag; + } + ++static bool __tcp_oow_rate_limited(struct net *net, int mib_idx, ++ u32 *last_oow_ack_time) ++{ ++ if (*last_oow_ack_time) { ++ s32 elapsed = (s32)(tcp_time_stamp - *last_oow_ack_time); ++ ++ if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) { ++ NET_INC_STATS_BH(net, mib_idx); ++ return true; /* rate-limited: don't send yet! */ ++ } ++ } ++ ++ *last_oow_ack_time = tcp_time_stamp; ++ ++ return false; /* not rate-limited: go ahead, send dupack now! */ ++} ++ + /* Return true if we're currently rate-limiting out-of-window ACKs and + * thus shouldn't send a dupack right now. We rate-limit dupacks in + * response to out-of-window SYNs or ACKs to mitigate ACK loops or DoS +@@ -3436,21 +3453,9 @@ bool tcp_oow_rate_limited(struct net *net, const struct sk_buff *skb, + /* Data packets without SYNs are not likely part of an ACK loop. */ + if ((TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq) && + !tcp_hdr(skb)->syn) +- goto not_rate_limited; +- +- if (*last_oow_ack_time) { +- s32 elapsed = (s32)(tcp_time_stamp - *last_oow_ack_time); +- +- if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) { +- NET_INC_STATS_BH(net, mib_idx); +- return true; /* rate-limited: don't send yet! */ +- } +- } +- +- *last_oow_ack_time = tcp_time_stamp; ++ return false; + +-not_rate_limited: +- return false; /* not rate-limited: go ahead, send dupack now! */ ++ return __tcp_oow_rate_limited(net, mib_idx, last_oow_ack_time); + } + + /* RFC 5961 7 [ACK Throttling] */ +@@ -3460,21 +3465,26 @@ static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb) + static u32 challenge_timestamp; + static unsigned int challenge_count; + struct tcp_sock *tp = tcp_sk(sk); +- u32 now; ++ u32 count, now; + + /* First check our per-socket dupack rate limit. */ +- if (tcp_oow_rate_limited(sock_net(sk), skb, +- LINUX_MIB_TCPACKSKIPPEDCHALLENGE, +- &tp->last_oow_ack_time)) ++ if (__tcp_oow_rate_limited(sock_net(sk), ++ LINUX_MIB_TCPACKSKIPPEDCHALLENGE, ++ &tp->last_oow_ack_time)) + return; + +- /* Then check the check host-wide RFC 5961 rate limit. */ ++ /* Then check host-wide RFC 5961 rate limit. */ + now = jiffies / HZ; + if (now != challenge_timestamp) { ++ u32 half = (sysctl_tcp_challenge_ack_limit + 1) >> 1; ++ + challenge_timestamp = now; +- challenge_count = 0; ++ WRITE_ONCE(challenge_count, half + ++ prandom_u32_max(sysctl_tcp_challenge_ack_limit)); + } +- if (++challenge_count <= sysctl_tcp_challenge_ack_limit) { ++ count = READ_ONCE(challenge_count); ++ if (count > 0) { ++ WRITE_ONCE(challenge_count, count - 1); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPCHALLENGEACK); + tcp_send_ack(sk); + } +diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c +index 79a03b87a771..7b8e903b2a97 100644 +--- a/net/ipv4/tcp_output.c ++++ b/net/ipv4/tcp_output.c +@@ -236,7 +236,8 @@ void tcp_select_initial_window(int __space, __u32 mss, + /* Set window scaling on max possible window + * See RFC1323 for an explanation of the limit to 14 + */ +- space = max_t(u32, sysctl_tcp_rmem[2], sysctl_rmem_max); ++ space = max_t(u32, space, sysctl_tcp_rmem[2]); ++ space = max_t(u32, space, sysctl_rmem_max); + space = min_t(u32, space, *window_clamp); + while (space > 65535 && (*rcv_wscale) < 14) { + space >>= 1; +diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c +index 8ec4b3089e20..e3fc0cdf82a9 100644 +--- a/net/ipv6/addrconf.c ++++ b/net/ipv6/addrconf.c +@@ -3563,6 +3563,10 @@ restart: + if (state != INET6_IFADDR_STATE_DEAD) { + __ipv6_ifa_notify(RTM_DELADDR, ifa); + inet6addr_notifier_call_chain(NETDEV_DOWN, ifa); ++ } else { ++ if (idev->cnf.forwarding) ++ addrconf_leave_anycast(ifa); ++ addrconf_leave_solict(ifa->idev, &ifa->addr); + } + + write_lock_bh(&idev->lock); +diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c +index 923abd6b3064..8d2f7c9b491d 100644 +--- a/net/irda/af_irda.c ++++ b/net/irda/af_irda.c +@@ -1024,8 +1024,11 @@ static int irda_connect(struct socket *sock, struct sockaddr *uaddr, + } + + /* Check if we have opened a local TSAP */ +- if (!self->tsap) +- irda_open_tsap(self, LSAP_ANY, addr->sir_name); ++ if (!self->tsap) { ++ err = irda_open_tsap(self, LSAP_ANY, addr->sir_name); ++ if (err) ++ goto out; ++ } + + /* Move to connecting socket, start sending Connect Requests */ + sock->state = SS_CONNECTING; +diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c +index ad4fa49ad1db..9068369f8a1b 100644 +--- a/security/apparmor/apparmorfs.c ++++ b/security/apparmor/apparmorfs.c +@@ -331,6 +331,7 @@ static int aa_fs_seq_hash_show(struct seq_file *seq, void *v) + seq_printf(seq, "%.2x", profile->hash[i]); + seq_puts(seq, "\n"); + } ++ aa_put_profile(profile); + + return 0; + } -- cgit v1.2.3-65-gdbad