summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to '0043-x86-vmx-Revert-VMX-use-a-single-global-APIC-access-p.patch')
-rw-r--r--0043-x86-vmx-Revert-VMX-use-a-single-global-APIC-access-p.patch259
1 files changed, 259 insertions, 0 deletions
diff --git a/0043-x86-vmx-Revert-VMX-use-a-single-global-APIC-access-p.patch b/0043-x86-vmx-Revert-VMX-use-a-single-global-APIC-access-p.patch
new file mode 100644
index 0000000..92b3bf1
--- /dev/null
+++ b/0043-x86-vmx-Revert-VMX-use-a-single-global-APIC-access-p.patch
@@ -0,0 +1,259 @@
+From 62e7fb702db4adaa9415ac87d95e0f461e32d9ca Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Wed, 24 Aug 2022 14:16:44 +0100
+Subject: [PATCH 43/87] x86/vmx: Revert "VMX: use a single, global APIC access
+ page"
+
+The claim "No accesses would ever go to this page." is false. A consequence
+of how Intel's APIC Acceleration works, and Xen's choice to have per-domain
+P2Ms (rather than per-vCPU P2Ms) means that the APIC page is fully read-write
+to any vCPU which is not in xAPIC mode.
+
+This reverts commit 58850b9074d3e7affdf3bc94c84e417ecfa4d165.
+
+This is XSA-412 / CVE-2022-42327.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+(cherry picked from commit 3b5beaf49033cddf4b2cc4e4d391b966f4203471)
+---
+ xen/arch/x86/hvm/vmx/vmx.c | 59 ++++++++++++++++++++++--------
+ xen/arch/x86/mm/shadow/set.c | 8 ----
+ xen/arch/x86/mm/shadow/types.h | 7 ----
+ xen/include/asm-x86/hvm/vmx/vmcs.h | 1 +
+ xen/include/asm-x86/mm.h | 20 +---------
+ 5 files changed, 46 insertions(+), 49 deletions(-)
+
+diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
+index d429d76c18c9..3f4276531322 100644
+--- a/xen/arch/x86/hvm/vmx/vmx.c
++++ b/xen/arch/x86/hvm/vmx/vmx.c
+@@ -66,7 +66,8 @@ boolean_param("force-ept", opt_force_ept);
+ static void vmx_ctxt_switch_from(struct vcpu *v);
+ static void vmx_ctxt_switch_to(struct vcpu *v);
+
+-static int alloc_vlapic_mapping(void);
++static int vmx_alloc_vlapic_mapping(struct domain *d);
++static void vmx_free_vlapic_mapping(struct domain *d);
+ static void vmx_install_vlapic_mapping(struct vcpu *v);
+ static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr,
+ unsigned int flags);
+@@ -77,8 +78,6 @@ static int vmx_msr_read_intercept(unsigned int msr, uint64_t *msr_content);
+ static int vmx_msr_write_intercept(unsigned int msr, uint64_t msr_content);
+ static void vmx_invlpg(struct vcpu *v, unsigned long linear);
+
+-static mfn_t __read_mostly apic_access_mfn = INVALID_MFN_INITIALIZER;
+-
+ /* Values for domain's ->arch.hvm_domain.pi_ops.flags. */
+ #define PI_CSW_FROM (1u << 0)
+ #define PI_CSW_TO (1u << 1)
+@@ -402,6 +401,7 @@ static int vmx_domain_initialise(struct domain *d)
+ .to = vmx_ctxt_switch_to,
+ .tail = vmx_do_resume,
+ };
++ int rc;
+
+ d->arch.ctxt_switch = &csw;
+
+@@ -411,15 +411,24 @@ static int vmx_domain_initialise(struct domain *d)
+ */
+ d->arch.hvm.vmx.exec_sp = is_hardware_domain(d) || opt_ept_exec_sp;
+
++ if ( (rc = vmx_alloc_vlapic_mapping(d)) != 0 )
++ return rc;
++
+ return 0;
+ }
+
++static void vmx_domain_relinquish_resources(struct domain *d)
++{
++ vmx_free_vlapic_mapping(d);
++}
++
+ static void domain_creation_finished(struct domain *d)
+ {
+ gfn_t gfn = gaddr_to_gfn(APIC_DEFAULT_PHYS_BASE);
++ mfn_t apic_access_mfn = d->arch.hvm.vmx.apic_access_mfn;
+ bool ipat;
+
+- if ( !has_vlapic(d) || mfn_eq(apic_access_mfn, INVALID_MFN) )
++ if ( mfn_eq(apic_access_mfn, _mfn(0)) )
+ return;
+
+ ASSERT(epte_get_entry_emt(d, gfn, apic_access_mfn, 0, &ipat,
+@@ -2481,6 +2490,7 @@ static struct hvm_function_table __initdata vmx_function_table = {
+ .cpu_up_prepare = vmx_cpu_up_prepare,
+ .cpu_dead = vmx_cpu_dead,
+ .domain_initialise = vmx_domain_initialise,
++ .domain_relinquish_resources = vmx_domain_relinquish_resources,
+ .domain_creation_finished = domain_creation_finished,
+ .vcpu_initialise = vmx_vcpu_initialise,
+ .vcpu_destroy = vmx_vcpu_destroy,
+@@ -2731,7 +2741,7 @@ const struct hvm_function_table * __init start_vmx(void)
+ {
+ set_in_cr4(X86_CR4_VMXE);
+
+- if ( vmx_vmcs_init() || alloc_vlapic_mapping() )
++ if ( vmx_vmcs_init() )
+ {
+ printk("VMX: failed to initialise.\n");
+ return NULL;
+@@ -3305,36 +3315,55 @@ gp_fault:
+ return X86EMUL_EXCEPTION;
+ }
+
+-static int __init alloc_vlapic_mapping(void)
++static int vmx_alloc_vlapic_mapping(struct domain *d)
+ {
+ struct page_info *pg;
+ mfn_t mfn;
+
+- if ( !cpu_has_vmx_virtualize_apic_accesses )
++ if ( !has_vlapic(d) || !cpu_has_vmx_virtualize_apic_accesses )
+ return 0;
+
+- pg = alloc_domheap_page(NULL, 0);
++ pg = alloc_domheap_page(d, MEMF_no_refcount);
+ if ( !pg )
+ return -ENOMEM;
+
+- /*
+- * Signal to shadow code that this page cannot be refcounted. This also
+- * makes epte_get_entry_emt() recognize this page as "special".
+- */
+- page_suppress_refcounting(pg);
++ if ( !get_page_and_type(pg, d, PGT_writable_page) )
++ {
++ /*
++ * The domain can't possibly know about this page yet, so failure
++ * here is a clear indication of something fishy going on.
++ */
++ domain_crash(d);
++ return -ENODATA;
++ }
+
+ mfn = page_to_mfn(pg);
+ clear_domain_page(mfn);
+- apic_access_mfn = mfn;
++ d->arch.hvm.vmx.apic_access_mfn = mfn;
+
+ return 0;
+ }
+
++static void vmx_free_vlapic_mapping(struct domain *d)
++{
++ mfn_t mfn = d->arch.hvm.vmx.apic_access_mfn;
++
++ d->arch.hvm.vmx.apic_access_mfn = _mfn(0);
++ if ( !mfn_eq(mfn, _mfn(0)) )
++ {
++ struct page_info *pg = mfn_to_page(mfn);
++
++ put_page_alloc_ref(pg);
++ put_page_and_type(pg);
++ }
++}
++
+ static void vmx_install_vlapic_mapping(struct vcpu *v)
+ {
++ mfn_t apic_access_mfn = v->domain->arch.hvm.vmx.apic_access_mfn;
+ paddr_t virt_page_ma, apic_page_ma;
+
+- if ( !has_vlapic(v->domain) || mfn_eq(apic_access_mfn, INVALID_MFN) )
++ if ( mfn_eq(apic_access_mfn, _mfn(0)) )
+ return;
+
+ ASSERT(cpu_has_vmx_virtualize_apic_accesses);
+diff --git a/xen/arch/x86/mm/shadow/set.c b/xen/arch/x86/mm/shadow/set.c
+index 87e9c6eeb219..bd6c68b547c9 100644
+--- a/xen/arch/x86/mm/shadow/set.c
++++ b/xen/arch/x86/mm/shadow/set.c
+@@ -101,14 +101,6 @@ shadow_get_page_from_l1e(shadow_l1e_t sl1e, struct domain *d, p2m_type_t type)
+ owner = page_get_owner(pg);
+ }
+
+- /*
+- * Check whether refcounting is suppressed on this page. For example,
+- * VMX'es APIC access MFN is just a surrogate page. It doesn't actually
+- * get accessed, and hence there's no need to refcount it.
+- */
+- if ( pg && page_refcounting_suppressed(pg) )
+- return 0;
+-
+ if ( owner == dom_io )
+ owner = NULL;
+
+diff --git a/xen/arch/x86/mm/shadow/types.h b/xen/arch/x86/mm/shadow/types.h
+index 6970e7d6ea4a..814a4018535a 100644
+--- a/xen/arch/x86/mm/shadow/types.h
++++ b/xen/arch/x86/mm/shadow/types.h
+@@ -276,16 +276,9 @@ int shadow_set_l4e(struct domain *d, shadow_l4e_t *sl4e,
+ static void inline
+ shadow_put_page_from_l1e(shadow_l1e_t sl1e, struct domain *d)
+ {
+- mfn_t mfn = shadow_l1e_get_mfn(sl1e);
+-
+ if ( !shadow_mode_refcounts(d) )
+ return;
+
+- if ( mfn_valid(mfn) &&
+- /* See the respective comment in shadow_get_page_from_l1e(). */
+- page_refcounting_suppressed(mfn_to_page(mfn)) )
+- return;
+-
+ put_page_from_l1e(sl1e, d);
+ }
+
+diff --git a/xen/include/asm-x86/hvm/vmx/vmcs.h b/xen/include/asm-x86/hvm/vmx/vmcs.h
+index 03c9ccf627ab..8073af323b96 100644
+--- a/xen/include/asm-x86/hvm/vmx/vmcs.h
++++ b/xen/include/asm-x86/hvm/vmx/vmcs.h
+@@ -58,6 +58,7 @@ struct ept_data {
+ #define _VMX_DOMAIN_PML_ENABLED 0
+ #define VMX_DOMAIN_PML_ENABLED (1ul << _VMX_DOMAIN_PML_ENABLED)
+ struct vmx_domain {
++ mfn_t apic_access_mfn;
+ /* VMX_DOMAIN_* */
+ unsigned int status;
+
+diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h
+index 7bdf9c2290d8..e1bcea57a8f5 100644
+--- a/xen/include/asm-x86/mm.h
++++ b/xen/include/asm-x86/mm.h
+@@ -83,7 +83,7 @@
+ #define PGC_state_offlined PG_mask(2, 6)
+ #define PGC_state_free PG_mask(3, 6)
+ #define page_state_is(pg, st) (((pg)->count_info&PGC_state) == PGC_state_##st)
+-/* Page is not reference counted (see below for caveats) */
++/* Page is not reference counted */
+ #define _PGC_extra PG_shift(7)
+ #define PGC_extra PG_mask(1, 7)
+
+@@ -375,24 +375,6 @@ void zap_ro_mpt(mfn_t mfn);
+
+ bool is_iomem_page(mfn_t mfn);
+
+-/*
+- * Pages with no owner which may get passed to functions wanting to
+- * refcount them can be marked PGC_extra to bypass this refcounting (which
+- * would fail due to the lack of an owner).
+- *
+- * (For pages with owner PGC_extra has different meaning.)
+- */
+-static inline void page_suppress_refcounting(struct page_info *pg)
+-{
+- ASSERT(!page_get_owner(pg));
+- pg->count_info |= PGC_extra;
+-}
+-
+-static inline bool page_refcounting_suppressed(const struct page_info *pg)
+-{
+- return !page_get_owner(pg) && (pg->count_info & PGC_extra);
+-}
+-
+ struct platform_bad_page {
+ unsigned long mfn;
+ unsigned int order;
+--
+2.37.4
+