summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFlorian Schmaus <flow@gentoo.org>2022-10-19 10:38:35 +0200
committerFlorian Schmaus <flow@gentoo.org>2022-10-19 10:38:35 +0200
commit4a9cd4d8a4efd8f2e52483117b4009122393c6a6 (patch)
tree8a467d1618431f5353fa676ca58b32bb18ba21aa
parentXen 4.15.4-pre-patchset-0.1 (diff)
downloadxen-upstream-patches-4a9cd4d8a4efd8f2e52483117b4009122393c6a6.tar.gz
xen-upstream-patches-4a9cd4d8a4efd8f2e52483117b4009122393c6a6.tar.bz2
xen-upstream-patches-4a9cd4d8a4efd8f2e52483117b4009122393c6a6.zip
Xen 4.15.4-pre-patchset-14.15.4-pre-patchset-1
Signed-off-by: Florian Schmaus <flow@gentoo.org>
-rw-r--r--0001-build-fix-exported-variable-name-CFLAGS_stack_bounda.patch4
-rw-r--r--0002-IOMMU-x86-work-around-bogus-gcc12-warning-in-hvm_gsi.patch4
-rw-r--r--0003-ehci-dbgp-fix-selecting-n-th-ehci-controller.patch4
-rw-r--r--0004-tools-xenstored-Harden-corrupt.patch4
-rw-r--r--0005-x86-spec-ctrl-Only-adjust-MSR_SPEC_CTRL-for-idle-wit.patch4
-rw-r--r--0006-x86-spec-ctrl-Knobs-for-STIBP-and-PSFD-and-follow-ha.patch4
-rw-r--r--0007-libxc-fix-compilation-error-with-gcc13.patch4
-rw-r--r--0008-x86-spec-ctrl-Honour-spec-ctrl-0-for-unpriv-mmio-sub.patch4
-rw-r--r--0009-xen-cmdline-Extend-parse_boolean-to-signal-a-name-ma.patch4
-rw-r--r--0010-x86-spec-ctrl-Add-fine-grained-cmdline-suboptions-fo.patch4
-rw-r--r--0011-tools-helpers-fix-build-of-xen-init-dom0-with-Werror.patch4
-rw-r--r--0012-libxl-check-return-value-of-libxl__xs_directory-in-n.patch4
-rw-r--r--0013-update-Xen-version-to-4.15.4-pre.patch4
-rw-r--r--0014-x86-spec-ctrl-Rework-spec_ctrl_flags-context-switchi.patch4
-rw-r--r--0015-x86-spec-ctrl-Rename-SCF_ist_wrmsr-to-SCF_ist_sc_msr.patch4
-rw-r--r--0016-x86-spec-ctrl-Rename-opt_ibpb-to-opt_ibpb_ctxt_switc.patch4
-rw-r--r--0017-x86-spec-ctrl-Rework-SPEC_CTRL_ENTRY_FROM_INTR_IST.patch4
-rw-r--r--0018-x86-spec-ctrl-Support-IBPB-on-entry.patch4
-rw-r--r--0019-x86-cpuid-Enumeration-for-BTC_NO.patch4
-rw-r--r--0020-x86-spec-ctrl-Enable-Zen2-chickenbit.patch4
-rw-r--r--0021-x86-spec-ctrl-Mitigate-Branch-Type-Confusion-when-po.patch4
-rw-r--r--0022-x86-mm-correct-TLB-flush-condition-in-_get_page_type.patch45
-rw-r--r--0023-xl-relax-freemem-s-retry-calculation.patch80
-rw-r--r--0024-tools-init-xenstore-domain-fix-memory-map-for-PVH-st.patch59
-rw-r--r--0025-xl-move-freemem-s-credit-expired-loop-exit.patch55
-rw-r--r--0026-x86-spec-ctrl-correct-per-guest-type-reporting-of-MD.patch56
-rw-r--r--0027-x86-deal-with-gcc12-release-build-issues.patch65
-rw-r--r--0028-x86emul-add-memory-operand-low-bits-checks-for-ENQCM.patch45
-rw-r--r--0029-x86-also-suppress-use-of-MMX-insns.patch39
-rw-r--r--0030-common-memory-Fix-ifdefs-for-ptdom_max_order.patch52
-rw-r--r--0031-tools-libxl-env-variable-to-signal-whether-disk-nic-.patch107
-rw-r--r--0032-x86-msr-fix-X2APIC_LAST.patch66
-rw-r--r--0033-x86-spec-ctrl-Use-IST-RSB-protection-for-SVM-systems.patch54
-rw-r--r--0034-x86-Expose-more-MSR_ARCH_CAPS-to-hwdom.patch68
-rw-r--r--0035-xen-sched-setup-dom0-vCPUs-affinity-only-once.patch123
-rw-r--r--0036-tools-libxl-Replace-deprecated-sdl-option-on-QEMU-co.patch38
-rw-r--r--0037-x86-spec-ctrl-Enumeration-for-PBRSB_NO.patch67
-rw-r--r--0038-x86-amd-only-call-setup_force_cpu_cap-for-boot-CPU.patch33
-rw-r--r--0039-build-x86-suppress-GNU-ld-2.39-warning-about-RWX-loa.patch38
-rw-r--r--0040-PCI-simplify-and-thus-correct-pci_get_pdev-_by_domai.patch153
-rw-r--r--0041-xen-arm-p2m-Prevent-adding-mapping-when-domain-is-dy.patch62
-rw-r--r--0042-xen-arm-p2m-Handle-preemption-when-freeing-intermedi.patch167
-rw-r--r--0043-x86-p2m-add-option-to-skip-root-pagetable-removal-in.patch138
-rw-r--r--0044-x86-HAP-adjust-monitor-table-related-error-handling.patch77
-rw-r--r--0045-x86-shadow-tolerate-failure-of-sh_set_toplevel_shado.patch76
-rw-r--r--0046-x86-shadow-tolerate-failure-in-shadow_prealloc.patch279
-rw-r--r--0047-x86-p2m-refuse-new-allocations-for-dying-domains.patch100
-rw-r--r--0048-x86-p2m-truly-free-paging-pool-memory-for-dying-doma.patch115
-rw-r--r--0049-x86-p2m-free-the-paging-memory-pool-preemptively.patch181
-rw-r--r--0050-xen-x86-p2m-Add-preemption-in-p2m_teardown.patch197
-rw-r--r--0051-libxl-docs-Use-arch-specific-default-paging-memory.patch147
-rw-r--r--0052-xen-arm-Construct-the-P2M-pages-pool-for-guests.patch189
-rw-r--r--0053-xen-arm-libxl-Implement-XEN_DOMCTL_shadow_op-for-Arm.patch108
-rw-r--r--0054-xen-arm-Allocate-and-free-P2M-pages-from-the-P2M-poo.patch289
-rw-r--r--0055-gnttab-correct-locking-on-transitive-grant-copy-erro.patch66
-rw-r--r--0056-tools-libxl-Replace-deprecated-soundhw-on-QEMU-comma.patch112
-rw-r--r--0057-x86-CPUID-surface-suitable-value-in-EBX-of-XSTATE-su.patch44
-rw-r--r--0058-xen-sched-introduce-cpupool_update_node_affinity.patch257
-rw-r--r--0059-xen-sched-carve-out-memory-allocation-and-freeing-fr.patch263
-rw-r--r--0060-xen-sched-fix-cpu-hotplug.patch307
-rw-r--r--0061-Config.mk-correct-PIE-related-option-s-in-EMBEDDED_E.patch58
-rw-r--r--0062-tools-xenstore-minor-fix-of-the-migration-stream-doc.patch41
-rw-r--r--0063-xen-gnttab-fix-gnttab_acquire_resource.patch69
-rw-r--r--0064-x86-wire-up-VCPUOP_register_vcpu_time_memory_area-fo.patch59
-rw-r--r--0065-x86-vpmu-Fix-race-condition-in-vpmu_load.patch97
-rw-r--r--0066-tools-tests-fix-wrong-backport-of-upstream-commit-52.patch31
-rw-r--r--0067-libxl-Arm-correct-xc_shadow_control-invocation-to-fi.patch42
-rw-r--r--info.txt4
68 files changed, 4858 insertions, 44 deletions
diff --git a/0001-build-fix-exported-variable-name-CFLAGS_stack_bounda.patch b/0001-build-fix-exported-variable-name-CFLAGS_stack_bounda.patch
index 96eb282..32ff417 100644
--- a/0001-build-fix-exported-variable-name-CFLAGS_stack_bounda.patch
+++ b/0001-build-fix-exported-variable-name-CFLAGS_stack_bounda.patch
@@ -1,7 +1,7 @@
From f6e26ce7d9317abc41130ead6dc2443a7e2dde00 Mon Sep 17 00:00:00 2001
From: Anthony PERARD <anthony.perard@citrix.com>
Date: Tue, 12 Jul 2022 11:20:46 +0200
-Subject: [PATCH 01/21] build: fix exported variable name CFLAGS_stack_boundary
+Subject: [PATCH 01/67] build: fix exported variable name CFLAGS_stack_boundary
Exporting a variable with a dash doesn't work reliably, they may be
striped from the environment when calling a sub-make or sub-shell.
@@ -63,5 +63,5 @@ index e857c0f2cc2c..a5b2041f9b96 100644
obj-y := stub.o
obj-$(XEN_BUILD_EFI) := $(filter-out %.init.o,$(EFIOBJ))
--
-2.35.1
+2.37.3
diff --git a/0002-IOMMU-x86-work-around-bogus-gcc12-warning-in-hvm_gsi.patch b/0002-IOMMU-x86-work-around-bogus-gcc12-warning-in-hvm_gsi.patch
index 45e4cfd..9f2f8e4 100644
--- a/0002-IOMMU-x86-work-around-bogus-gcc12-warning-in-hvm_gsi.patch
+++ b/0002-IOMMU-x86-work-around-bogus-gcc12-warning-in-hvm_gsi.patch
@@ -1,7 +1,7 @@
From b89b932cfe86556c5de4ad56702aed83142e22a3 Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@suse.com>
Date: Tue, 12 Jul 2022 11:21:14 +0200
-Subject: [PATCH 02/21] IOMMU/x86: work around bogus gcc12 warning in
+Subject: [PATCH 02/67] IOMMU/x86: work around bogus gcc12 warning in
hvm_gsi_eoi()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
@@ -48,5 +48,5 @@ index 9544f3234e65..50865eec2c04 100644
/*
--
-2.35.1
+2.37.3
diff --git a/0003-ehci-dbgp-fix-selecting-n-th-ehci-controller.patch b/0003-ehci-dbgp-fix-selecting-n-th-ehci-controller.patch
index b79f4b3..777ef8a 100644
--- a/0003-ehci-dbgp-fix-selecting-n-th-ehci-controller.patch
+++ b/0003-ehci-dbgp-fix-selecting-n-th-ehci-controller.patch
@@ -2,7 +2,7 @@ From b53df5b4341fa97614ad064a7c8e781c88b6ed71 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?=
<marmarek@invisiblethingslab.com>
Date: Tue, 12 Jul 2022 11:22:09 +0200
-Subject: [PATCH 03/21] ehci-dbgp: fix selecting n-th ehci controller
+Subject: [PATCH 03/67] ehci-dbgp: fix selecting n-th ehci controller
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
@@ -32,5 +32,5 @@ index c893d246defa..66b4811af24a 100644
dbgp->cap = find_dbgp(dbgp, num);
if ( !dbgp->cap )
--
-2.35.1
+2.37.3
diff --git a/0004-tools-xenstored-Harden-corrupt.patch b/0004-tools-xenstored-Harden-corrupt.patch
index 8b30166..62b7ec9 100644
--- a/0004-tools-xenstored-Harden-corrupt.patch
+++ b/0004-tools-xenstored-Harden-corrupt.patch
@@ -1,7 +1,7 @@
From 7fe638c28fa693d8bb8f9419de1220d4359a1b2d Mon Sep 17 00:00:00 2001
From: Julien Grall <jgrall@amazon.com>
Date: Tue, 12 Jul 2022 11:23:01 +0200
-Subject: [PATCH 04/21] tools/xenstored: Harden corrupt()
+Subject: [PATCH 04/67] tools/xenstored: Harden corrupt()
At the moment, corrupt() is neither checking for allocation failure
nor freeing the allocated memory.
@@ -40,5 +40,5 @@ index 8033c1e0eb28..9172dd767140 100644
check_store();
}
--
-2.35.1
+2.37.3
diff --git a/0005-x86-spec-ctrl-Only-adjust-MSR_SPEC_CTRL-for-idle-wit.patch b/0005-x86-spec-ctrl-Only-adjust-MSR_SPEC_CTRL-for-idle-wit.patch
index 158e2b0..7d79c2e 100644
--- a/0005-x86-spec-ctrl-Only-adjust-MSR_SPEC_CTRL-for-idle-wit.patch
+++ b/0005-x86-spec-ctrl-Only-adjust-MSR_SPEC_CTRL-for-idle-wit.patch
@@ -1,7 +1,7 @@
From 799a8d49237a62ea0d33c3756a6a7f665b8389b2 Mon Sep 17 00:00:00 2001
From: Andrew Cooper <andrew.cooper3@citrix.com>
Date: Tue, 12 Jul 2022 11:23:32 +0200
-Subject: [PATCH 05/21] x86/spec-ctrl: Only adjust MSR_SPEC_CTRL for idle with
+Subject: [PATCH 05/67] x86/spec-ctrl: Only adjust MSR_SPEC_CTRL for idle with
legacy IBRS
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
@@ -89,5 +89,5 @@ index 68f6c46c470c..12283573cdd5 100644
* Disable shadowing before updating the MSR. There are no SMP issues
* here; only local processor ordering concerns.
--
-2.35.1
+2.37.3
diff --git a/0006-x86-spec-ctrl-Knobs-for-STIBP-and-PSFD-and-follow-ha.patch b/0006-x86-spec-ctrl-Knobs-for-STIBP-and-PSFD-and-follow-ha.patch
index 65670fd..965c965 100644
--- a/0006-x86-spec-ctrl-Knobs-for-STIBP-and-PSFD-and-follow-ha.patch
+++ b/0006-x86-spec-ctrl-Knobs-for-STIBP-and-PSFD-and-follow-ha.patch
@@ -1,7 +1,7 @@
From cd5081e8c31651e623d86532306b4c56bbcb6e6d Mon Sep 17 00:00:00 2001
From: Andrew Cooper <andrew.cooper3@citrix.com>
Date: Tue, 12 Jul 2022 11:24:11 +0200
-Subject: [PATCH 06/21] x86/spec-ctrl: Knobs for STIBP and PSFD, and follow
+Subject: [PATCH 06/67] x86/spec-ctrl: Knobs for STIBP and PSFD, and follow
hardware STIBP hint
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
@@ -230,5 +230,5 @@ index eb7fb70e86f9..8212227ee02a 100644
/*
* PV guests can poison the RSB to any virtual address from which
--
-2.35.1
+2.37.3
diff --git a/0007-libxc-fix-compilation-error-with-gcc13.patch b/0007-libxc-fix-compilation-error-with-gcc13.patch
index b46552f..9a1ca92 100644
--- a/0007-libxc-fix-compilation-error-with-gcc13.patch
+++ b/0007-libxc-fix-compilation-error-with-gcc13.patch
@@ -1,7 +1,7 @@
From 77deab4233b5d9ec5cf214fdc1652424fd4fc9d6 Mon Sep 17 00:00:00 2001
From: Charles Arnold <carnold@suse.com>
Date: Tue, 12 Jul 2022 11:24:39 +0200
-Subject: [PATCH 07/21] libxc: fix compilation error with gcc13
+Subject: [PATCH 07/67] libxc: fix compilation error with gcc13
xc_psr.c:161:5: error: conflicting types for 'xc_psr_cmt_get_data'
due to enum/integer mismatch;
@@ -29,5 +29,5 @@ index 318920166c5e..2013200b9eff 100644
int xc_psr_cmt_enabled(xc_interface *xch);
--
-2.35.1
+2.37.3
diff --git a/0008-x86-spec-ctrl-Honour-spec-ctrl-0-for-unpriv-mmio-sub.patch b/0008-x86-spec-ctrl-Honour-spec-ctrl-0-for-unpriv-mmio-sub.patch
index 94f729b..22a1ebe 100644
--- a/0008-x86-spec-ctrl-Honour-spec-ctrl-0-for-unpriv-mmio-sub.patch
+++ b/0008-x86-spec-ctrl-Honour-spec-ctrl-0-for-unpriv-mmio-sub.patch
@@ -1,7 +1,7 @@
From 5be1f46f435f8b05608b1eae029cb17d8bd3a560 Mon Sep 17 00:00:00 2001
From: Andrew Cooper <andrew.cooper3@citrix.com>
Date: Tue, 12 Jul 2022 11:25:05 +0200
-Subject: [PATCH 08/21] x86/spec-ctrl: Honour spec-ctrl=0 for unpriv-mmio
+Subject: [PATCH 08/67] x86/spec-ctrl: Honour spec-ctrl=0 for unpriv-mmio
sub-option
This was an oversight from when unpriv-mmio was introduced.
@@ -28,5 +28,5 @@ index 8212227ee02a..06790897e496 100644
else if ( val > 0 )
rc = -EINVAL;
--
-2.35.1
+2.37.3
diff --git a/0009-xen-cmdline-Extend-parse_boolean-to-signal-a-name-ma.patch b/0009-xen-cmdline-Extend-parse_boolean-to-signal-a-name-ma.patch
index 1b8787f..53a8b70 100644
--- a/0009-xen-cmdline-Extend-parse_boolean-to-signal-a-name-ma.patch
+++ b/0009-xen-cmdline-Extend-parse_boolean-to-signal-a-name-ma.patch
@@ -1,7 +1,7 @@
From ae417706870333bb52ebcf33c527809cdd2d7265 Mon Sep 17 00:00:00 2001
From: Andrew Cooper <andrew.cooper3@citrix.com>
Date: Tue, 12 Jul 2022 11:25:40 +0200
-Subject: [PATCH 09/21] xen/cmdline: Extend parse_boolean() to signal a name
+Subject: [PATCH 09/67] xen/cmdline: Extend parse_boolean() to signal a name
match
This will help parsing a sub-option which has boolean and non-boolean options
@@ -83,5 +83,5 @@ index 1198c7c0b207..be7498135170 100644
int parse_boolean(const char *name, const char *s, const char *e);
--
-2.35.1
+2.37.3
diff --git a/0010-x86-spec-ctrl-Add-fine-grained-cmdline-suboptions-fo.patch b/0010-x86-spec-ctrl-Add-fine-grained-cmdline-suboptions-fo.patch
index a808523..36577d6 100644
--- a/0010-x86-spec-ctrl-Add-fine-grained-cmdline-suboptions-fo.patch
+++ b/0010-x86-spec-ctrl-Add-fine-grained-cmdline-suboptions-fo.patch
@@ -1,7 +1,7 @@
From 08bfd4d01185e94fda1be9dd79a981d890a9085e Mon Sep 17 00:00:00 2001
From: Andrew Cooper <andrew.cooper3@citrix.com>
Date: Tue, 12 Jul 2022 11:26:14 +0200
-Subject: [PATCH 10/21] x86/spec-ctrl: Add fine-grained cmdline suboptions for
+Subject: [PATCH 10/67] x86/spec-ctrl: Add fine-grained cmdline suboptions for
primitives
Support controling the PV/HVM suboption of msr-sc/rsb/md-clear, which
@@ -133,5 +133,5 @@ index 06790897e496..225fe08259b3 100644
/* Xen's speculative sidechannel mitigation settings. */
--
-2.35.1
+2.37.3
diff --git a/0011-tools-helpers-fix-build-of-xen-init-dom0-with-Werror.patch b/0011-tools-helpers-fix-build-of-xen-init-dom0-with-Werror.patch
index b597673..dc468c8 100644
--- a/0011-tools-helpers-fix-build-of-xen-init-dom0-with-Werror.patch
+++ b/0011-tools-helpers-fix-build-of-xen-init-dom0-with-Werror.patch
@@ -1,7 +1,7 @@
From f241cc48dabeef6cb0b381db62f2562b0a3970eb Mon Sep 17 00:00:00 2001
From: Anthony PERARD <anthony.perard@citrix.com>
Date: Tue, 12 Jul 2022 11:26:47 +0200
-Subject: [PATCH 11/21] tools/helpers: fix build of xen-init-dom0 with -Werror
+Subject: [PATCH 11/67] tools/helpers: fix build of xen-init-dom0 with -Werror
Missing prototype of asprintf() without _GNU_SOURCE.
@@ -24,5 +24,5 @@ index c99224a4b607..b4861c9e8041 100644
#include <stdint.h>
#include <string.h>
--
-2.35.1
+2.37.3
diff --git a/0012-libxl-check-return-value-of-libxl__xs_directory-in-n.patch b/0012-libxl-check-return-value-of-libxl__xs_directory-in-n.patch
index 898889b..74fee03 100644
--- a/0012-libxl-check-return-value-of-libxl__xs_directory-in-n.patch
+++ b/0012-libxl-check-return-value-of-libxl__xs_directory-in-n.patch
@@ -1,7 +1,7 @@
From d470a54087e0fbd813dae4d773ad0b830eeec4a1 Mon Sep 17 00:00:00 2001
From: Anthony PERARD <anthony.perard@citrix.com>
Date: Tue, 12 Jul 2022 11:26:58 +0200
-Subject: [PATCH 12/21] libxl: check return value of libxl__xs_directory in
+Subject: [PATCH 12/67] libxl: check return value of libxl__xs_directory in
name2bdf
libxl__xs_directory() can potentially return NULL without setting `n`.
@@ -34,5 +34,5 @@ index 92bf86b2bebd..a5f5cdf62b80 100644
for (i = 0; i < n; i++) {
--
-2.35.1
+2.37.3
diff --git a/0013-update-Xen-version-to-4.15.4-pre.patch b/0013-update-Xen-version-to-4.15.4-pre.patch
index 664e9df..8626fdd 100644
--- a/0013-update-Xen-version-to-4.15.4-pre.patch
+++ b/0013-update-Xen-version-to-4.15.4-pre.patch
@@ -1,7 +1,7 @@
From 505771bb1dffdf6f763fad18ee49a913b98abfea Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@suse.com>
Date: Tue, 12 Jul 2022 11:28:33 +0200
-Subject: [PATCH 13/21] update Xen version to 4.15.4-pre
+Subject: [PATCH 13/67] update Xen version to 4.15.4-pre
---
xen/Makefile | 2 +-
@@ -21,5 +21,5 @@ index e9a88325c467..cd66bb3b1c84 100644
-include xen-version
--
-2.35.1
+2.37.3
diff --git a/0014-x86-spec-ctrl-Rework-spec_ctrl_flags-context-switchi.patch b/0014-x86-spec-ctrl-Rework-spec_ctrl_flags-context-switchi.patch
index 681282e..a21b4d8 100644
--- a/0014-x86-spec-ctrl-Rework-spec_ctrl_flags-context-switchi.patch
+++ b/0014-x86-spec-ctrl-Rework-spec_ctrl_flags-context-switchi.patch
@@ -1,7 +1,7 @@
From 156ab775769d39b2dfb048ccd34dee7e86ba83a2 Mon Sep 17 00:00:00 2001
From: Andrew Cooper <andrew.cooper3@citrix.com>
Date: Fri, 1 Jul 2022 15:59:40 +0100
-Subject: [PATCH 14/21] x86/spec-ctrl: Rework spec_ctrl_flags context switching
+Subject: [PATCH 14/67] x86/spec-ctrl: Rework spec_ctrl_flags context switching
We are shortly going to need to context switch new bits in both the vcpu and
S3 paths. Introduce SCF_IST_MASK and SCF_DOM_MASK, and rework d->arch.verw
@@ -163,5 +163,5 @@ index 5a590bac44aa..66b00d511fc6 100644
.macro SPEC_CTRL_ENTRY_FROM_INTR_IST
/*
--
-2.35.1
+2.37.3
diff --git a/0015-x86-spec-ctrl-Rename-SCF_ist_wrmsr-to-SCF_ist_sc_msr.patch b/0015-x86-spec-ctrl-Rename-SCF_ist_wrmsr-to-SCF_ist_sc_msr.patch
index 553dbd2..49351ae 100644
--- a/0015-x86-spec-ctrl-Rename-SCF_ist_wrmsr-to-SCF_ist_sc_msr.patch
+++ b/0015-x86-spec-ctrl-Rename-SCF_ist_wrmsr-to-SCF_ist_sc_msr.patch
@@ -1,7 +1,7 @@
From 2cfbca32b9dc3a8d6520549ff468a7f550daf1b1 Mon Sep 17 00:00:00 2001
From: Andrew Cooper <andrew.cooper3@citrix.com>
Date: Tue, 28 Jun 2022 14:36:56 +0100
-Subject: [PATCH 15/21] x86/spec-ctrl: Rename SCF_ist_wrmsr to SCF_ist_sc_msr
+Subject: [PATCH 15/67] x86/spec-ctrl: Rename SCF_ist_wrmsr to SCF_ist_sc_msr
We are about to introduce SCF_ist_ibpb, at which point SCF_ist_wrmsr becomes
ambiguous.
@@ -106,5 +106,5 @@ index 66b00d511fc6..0ff1b118f882 100644
DO_SPEC_CTRL_EXIT_TO_XEN
--
-2.35.1
+2.37.3
diff --git a/0016-x86-spec-ctrl-Rename-opt_ibpb-to-opt_ibpb_ctxt_switc.patch b/0016-x86-spec-ctrl-Rename-opt_ibpb-to-opt_ibpb_ctxt_switc.patch
index 9ed0093..f114f6d 100644
--- a/0016-x86-spec-ctrl-Rename-opt_ibpb-to-opt_ibpb_ctxt_switc.patch
+++ b/0016-x86-spec-ctrl-Rename-opt_ibpb-to-opt_ibpb_ctxt_switc.patch
@@ -1,7 +1,7 @@
From c707015bf118df2c43e3a48b3774916322fca50a Mon Sep 17 00:00:00 2001
From: Andrew Cooper <andrew.cooper3@citrix.com>
Date: Mon, 4 Jul 2022 21:32:17 +0100
-Subject: [PATCH 16/21] x86/spec-ctrl: Rename opt_ibpb to opt_ibpb_ctxt_switch
+Subject: [PATCH 16/67] x86/spec-ctrl: Rename opt_ibpb to opt_ibpb_ctxt_switch
We are about to introduce the use of IBPB at different points in Xen, making
opt_ibpb ambiguous. Rename it to opt_ibpb_ctxt_switch.
@@ -93,5 +93,5 @@ index 6f8b0e09348e..fd8162ca9ab9 100644
extern int8_t opt_eager_fpu;
extern int8_t opt_l1d_flush;
--
-2.35.1
+2.37.3
diff --git a/0017-x86-spec-ctrl-Rework-SPEC_CTRL_ENTRY_FROM_INTR_IST.patch b/0017-x86-spec-ctrl-Rework-SPEC_CTRL_ENTRY_FROM_INTR_IST.patch
index bae2818..e162148 100644
--- a/0017-x86-spec-ctrl-Rework-SPEC_CTRL_ENTRY_FROM_INTR_IST.patch
+++ b/0017-x86-spec-ctrl-Rework-SPEC_CTRL_ENTRY_FROM_INTR_IST.patch
@@ -1,7 +1,7 @@
From d7f5fb1e2abd0d56cada9bfcf96ab530d214d9aa Mon Sep 17 00:00:00 2001
From: Andrew Cooper <andrew.cooper3@citrix.com>
Date: Fri, 1 Jul 2022 15:59:40 +0100
-Subject: [PATCH 17/21] x86/spec-ctrl: Rework SPEC_CTRL_ENTRY_FROM_INTR_IST
+Subject: [PATCH 17/67] x86/spec-ctrl: Rework SPEC_CTRL_ENTRY_FROM_INTR_IST
We are shortly going to add a conditional IBPB in this path.
@@ -102,5 +102,5 @@ index 0ff1b118f882..15e24cde00d1 100644
/* Opencoded UNLIKELY_START() with no condition. */
--
-2.35.1
+2.37.3
diff --git a/0018-x86-spec-ctrl-Support-IBPB-on-entry.patch b/0018-x86-spec-ctrl-Support-IBPB-on-entry.patch
index 06efb27..1de9d4c 100644
--- a/0018-x86-spec-ctrl-Support-IBPB-on-entry.patch
+++ b/0018-x86-spec-ctrl-Support-IBPB-on-entry.patch
@@ -1,7 +1,7 @@
From f0d78e0c11d3984c74f34a7325f862dee93a5835 Mon Sep 17 00:00:00 2001
From: Andrew Cooper <andrew.cooper3@citrix.com>
Date: Thu, 24 Feb 2022 13:44:33 +0000
-Subject: [PATCH 18/21] x86/spec-ctrl: Support IBPB-on-entry
+Subject: [PATCH 18/67] x86/spec-ctrl: Support IBPB-on-entry
We are going to need this to mitigate Branch Type Confusion on AMD/Hygon CPUs,
but as we've talked about using it in other cases too, arrange to support it
@@ -296,5 +296,5 @@ index 15e24cde00d1..9eb4ad9ab71d 100644
jz .L\@_skip_rsb
--
-2.35.1
+2.37.3
diff --git a/0019-x86-cpuid-Enumeration-for-BTC_NO.patch b/0019-x86-cpuid-Enumeration-for-BTC_NO.patch
index 91c38ee..a4444f4 100644
--- a/0019-x86-cpuid-Enumeration-for-BTC_NO.patch
+++ b/0019-x86-cpuid-Enumeration-for-BTC_NO.patch
@@ -1,7 +1,7 @@
From 2b29ac476fa0c91655906fac3512202e514ecbed Mon Sep 17 00:00:00 2001
From: Andrew Cooper <andrew.cooper3@citrix.com>
Date: Mon, 16 May 2022 15:48:24 +0100
-Subject: [PATCH 19/21] x86/cpuid: Enumeration for BTC_NO
+Subject: [PATCH 19/67] x86/cpuid: Enumeration for BTC_NO
BTC_NO indicates that hardware is not succeptable to Branch Type Confusion.
@@ -102,5 +102,5 @@ index 9686c82ed75c..1bbc7da4b53c 100644
/* Intel-defined CPU features, CPUID level 0x00000007:0.edx, word 9 */
XEN_CPUFEATURE(AVX512_4VNNIW, 9*32+ 2) /*A AVX512 Neural Network Instructions */
--
-2.35.1
+2.37.3
diff --git a/0020-x86-spec-ctrl-Enable-Zen2-chickenbit.patch b/0020-x86-spec-ctrl-Enable-Zen2-chickenbit.patch
index 9fd2fe0..4d12421 100644
--- a/0020-x86-spec-ctrl-Enable-Zen2-chickenbit.patch
+++ b/0020-x86-spec-ctrl-Enable-Zen2-chickenbit.patch
@@ -1,7 +1,7 @@
From 409976bed91f61fb7b053d536d2fc87cf3ad7018 Mon Sep 17 00:00:00 2001
From: Andrew Cooper <andrew.cooper3@citrix.com>
Date: Tue, 15 Mar 2022 18:30:25 +0000
-Subject: [PATCH 20/21] x86/spec-ctrl: Enable Zen2 chickenbit
+Subject: [PATCH 20/67] x86/spec-ctrl: Enable Zen2 chickenbit
... as instructed in the Branch Type Confusion whitepaper.
@@ -101,5 +101,5 @@ index 1e743461e91d..b4a360723b14 100644
#define MSR_AMD64_DR0_ADDRESS_MASK 0xc0011027
#define MSR_AMD64_DR1_ADDRESS_MASK 0xc0011019
--
-2.35.1
+2.37.3
diff --git a/0021-x86-spec-ctrl-Mitigate-Branch-Type-Confusion-when-po.patch b/0021-x86-spec-ctrl-Mitigate-Branch-Type-Confusion-when-po.patch
index 12ecc5b..b676ba3 100644
--- a/0021-x86-spec-ctrl-Mitigate-Branch-Type-Confusion-when-po.patch
+++ b/0021-x86-spec-ctrl-Mitigate-Branch-Type-Confusion-when-po.patch
@@ -1,7 +1,7 @@
From 35bf91d30f1a480dcf5bfd99b79384b2b283da7f Mon Sep 17 00:00:00 2001
From: Andrew Cooper <andrew.cooper3@citrix.com>
Date: Mon, 27 Jun 2022 19:29:40 +0100
-Subject: [PATCH 21/21] x86/spec-ctrl: Mitigate Branch Type Confusion when
+Subject: [PATCH 21/67] x86/spec-ctrl: Mitigate Branch Type Confusion when
possible
Branch Type Confusion affects AMD/Hygon CPUs on Zen2 and earlier. To
@@ -301,5 +301,5 @@ index 10cd0cd2518f..33e845991b0a 100644
extern int8_t opt_eager_fpu;
extern int8_t opt_l1d_flush;
--
-2.35.1
+2.37.3
diff --git a/0022-x86-mm-correct-TLB-flush-condition-in-_get_page_type.patch b/0022-x86-mm-correct-TLB-flush-condition-in-_get_page_type.patch
new file mode 100644
index 0000000..81f5b9a
--- /dev/null
+++ b/0022-x86-mm-correct-TLB-flush-condition-in-_get_page_type.patch
@@ -0,0 +1,45 @@
+From 3859f3ee7e37323ae5e0014c07ba8d3a4d7890b2 Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Tue, 26 Jul 2022 15:03:14 +0200
+Subject: [PATCH 22/67] x86/mm: correct TLB flush condition in _get_page_type()
+
+When this logic was moved, it was moved across the point where nx is
+updated to hold the new type for the page. IOW originally it was
+equivalent to using x (and perhaps x would better have been used), but
+now it isn't anymore. Switch to using x, which then brings things in
+line again with the slightly earlier comment there (now) talking about
+transitions _from_ writable.
+
+I have to confess though that I cannot make a direct connection between
+the reported observed behavior of guests leaving several pages around
+with pending general references and the change here. Repeated testing,
+nevertheless, confirms the reported issue is no longer there.
+
+This is CVE-2022-33745 / XSA-408.
+
+Reported-by: Charles Arnold <carnold@suse.com>
+Fixes: 8cc5036bc385 ("x86/pv: Fix ABAC cmpxchg() race in _get_page_type()")
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
+master commit: a9949efb288fd6e21bbaf9d5826207c7c41cda27
+master date: 2022-07-26 14:54:34 +0200
+---
+ xen/arch/x86/mm.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
+index 7d0747017db5..c88dc749d431 100644
+--- a/xen/arch/x86/mm.c
++++ b/xen/arch/x86/mm.c
+@@ -2992,7 +2992,7 @@ static int _get_page_type(struct page_info *page, unsigned long type,
+ if ( unlikely(!cpumask_empty(mask)) &&
+ /* Shadow mode: track only writable pages. */
+ (!shadow_mode_enabled(d) ||
+- ((nx & PGT_type_mask) == PGT_writable_page)) )
++ ((x & PGT_type_mask) == PGT_writable_page)) )
+ {
+ perfc_incr(need_flush_tlb_flush);
+ /*
+--
+2.37.3
+
diff --git a/0023-xl-relax-freemem-s-retry-calculation.patch b/0023-xl-relax-freemem-s-retry-calculation.patch
new file mode 100644
index 0000000..d7dda30
--- /dev/null
+++ b/0023-xl-relax-freemem-s-retry-calculation.patch
@@ -0,0 +1,80 @@
+From 2173d9c8be28d5f33c0e299a363ac994867d111b Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Wed, 27 Jul 2022 09:28:46 +0200
+Subject: [PATCH 23/67] xl: relax freemem()'s retry calculation
+
+While in principle possible also under other conditions as long as other
+parallel operations potentially consuming memory aren't "locked out", in
+particular with IOMMU large page mappings used in Dom0 (for PV when in
+strict mode; for PVH when not sharing page tables with HAP) ballooning
+out of individual pages can actually lead to less free memory available
+afterwards. This is because to split a large page, one or more page
+table pages are necessary (one per level that is split).
+
+When rebooting a guest I've observed freemem() to fail: A single page
+was required to be ballooned out (presumably because of heap
+fragmentation in the hypervisor). This ballooning out of a single page
+of course went fast, but freemem() then found that it would require to
+balloon out another page. This repeating just another time leads to the
+function to signal failure to the caller - without having come anywhere
+near the designated 30s that the whole process is allowed to not make
+any progress at all.
+
+Convert from a simple retry count to actually calculating elapsed time,
+subtracting from an initial credit of 30s. Don't go as far as limiting
+the "wait_secs" value passed to libxl_wait_for_memory_target(), though.
+While this leads to the overall process now possibly taking longer (if
+the previous iteration ended very close to the intended 30s), this
+compensates to some degree for the value passed really meaning "allowed
+to run for this long without making progress".
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Anthony PERARD <anthony.perard@citrix.com>
+master commit: e58370df76eacf1f7ca0340e9b96430c77b41a79
+master date: 2022-07-12 15:25:00 +0200
+---
+ tools/xl/xl_vmcontrol.c | 10 +++++++---
+ 1 file changed, 7 insertions(+), 3 deletions(-)
+
+diff --git a/tools/xl/xl_vmcontrol.c b/tools/xl/xl_vmcontrol.c
+index 435155a03396..5dee7730ca76 100644
+--- a/tools/xl/xl_vmcontrol.c
++++ b/tools/xl/xl_vmcontrol.c
+@@ -321,7 +321,8 @@ static int domain_wait_event(uint32_t domid, libxl_event **event_r)
+ */
+ static bool freemem(uint32_t domid, libxl_domain_config *d_config)
+ {
+- int rc, retries = 3;
++ int rc;
++ double credit = 30;
+ uint64_t need_memkb, free_memkb;
+
+ if (!autoballoon)
+@@ -332,6 +333,8 @@ static bool freemem(uint32_t domid, libxl_domain_config *d_config)
+ return false;
+
+ do {
++ time_t start;
++
+ rc = libxl_get_free_memory(ctx, &free_memkb);
+ if (rc < 0)
+ return false;
+@@ -345,12 +348,13 @@ static bool freemem(uint32_t domid, libxl_domain_config *d_config)
+
+ /* wait until dom0 reaches its target, as long as we are making
+ * progress */
++ start = time(NULL);
+ rc = libxl_wait_for_memory_target(ctx, 0, 10);
+ if (rc < 0)
+ return false;
+
+- retries--;
+- } while (retries > 0);
++ credit -= difftime(time(NULL), start);
++ } while (credit > 0);
+
+ return false;
+ }
+--
+2.37.3
+
diff --git a/0024-tools-init-xenstore-domain-fix-memory-map-for-PVH-st.patch b/0024-tools-init-xenstore-domain-fix-memory-map-for-PVH-st.patch
new file mode 100644
index 0000000..fbb1448
--- /dev/null
+++ b/0024-tools-init-xenstore-domain-fix-memory-map-for-PVH-st.patch
@@ -0,0 +1,59 @@
+From a2684d9cbbfb02b268be7e551674f709db0617a4 Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Wed, 27 Jul 2022 09:29:08 +0200
+Subject: [PATCH 24/67] tools/init-xenstore-domain: fix memory map for PVH
+ stubdom
+
+In case of maxmem != memsize the E820 map of the PVH stubdom is wrong,
+as it is missing the RAM above memsize.
+
+Additionally the memory map should only specify the Xen special pages
+as reserved.
+
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Anthony PERARD <anthony.perard@citrix.com>
+master commit: 134d53f577076d4f26091e25762f27cc3c73bf58
+master date: 2022-07-12 15:25:20 +0200
+---
+ tools/helpers/init-xenstore-domain.c | 14 +++++++++-----
+ 1 file changed, 9 insertions(+), 5 deletions(-)
+
+diff --git a/tools/helpers/init-xenstore-domain.c b/tools/helpers/init-xenstore-domain.c
+index 6836002f0bad..32689abd7479 100644
+--- a/tools/helpers/init-xenstore-domain.c
++++ b/tools/helpers/init-xenstore-domain.c
+@@ -72,8 +72,9 @@ static int build(xc_interface *xch)
+ char cmdline[512];
+ int rv, xs_fd;
+ struct xc_dom_image *dom = NULL;
+- int limit_kb = (maxmem ? : (memory + 1)) * 1024;
++ int limit_kb = (maxmem ? : memory) * 1024 + X86_HVM_NR_SPECIAL_PAGES * 4;
+ uint64_t mem_size = MB(memory);
++ uint64_t max_size = MB(maxmem ? : memory);
+ struct e820entry e820[3];
+ struct xen_domctl_createdomain config = {
+ .ssidref = SECINITSID_DOMU,
+@@ -157,13 +158,16 @@ static int build(xc_interface *xch)
+ dom->mmio_start = LAPIC_BASE_ADDRESS;
+ dom->max_vcpus = 1;
+ e820[0].addr = 0;
+- e820[0].size = dom->lowmem_end;
++ e820[0].size = (max_size > LAPIC_BASE_ADDRESS) ?
++ LAPIC_BASE_ADDRESS : max_size;
+ e820[0].type = E820_RAM;
+- e820[1].addr = LAPIC_BASE_ADDRESS;
+- e820[1].size = dom->mmio_size;
++ e820[1].addr = (X86_HVM_END_SPECIAL_REGION -
++ X86_HVM_NR_SPECIAL_PAGES) << XC_PAGE_SHIFT;
++ e820[1].size = X86_HVM_NR_SPECIAL_PAGES << XC_PAGE_SHIFT;
+ e820[1].type = E820_RESERVED;
+ e820[2].addr = GB(4);
+- e820[2].size = dom->highmem_end - GB(4);
++ e820[2].size = (max_size > LAPIC_BASE_ADDRESS) ?
++ max_size - LAPIC_BASE_ADDRESS : 0;
+ e820[2].type = E820_RAM;
+ }
+
+--
+2.37.3
+
diff --git a/0025-xl-move-freemem-s-credit-expired-loop-exit.patch b/0025-xl-move-freemem-s-credit-expired-loop-exit.patch
new file mode 100644
index 0000000..c3a1965
--- /dev/null
+++ b/0025-xl-move-freemem-s-credit-expired-loop-exit.patch
@@ -0,0 +1,55 @@
+From c37099426ea678c1d5b6c99ae5ad6834f4edd2e6 Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Wed, 27 Jul 2022 09:29:31 +0200
+Subject: [PATCH 25/67] xl: move freemem()'s "credit expired" loop exit
+
+Move the "credit expired" loop exit to the middle of the loop,
+immediately after "return true". This way having reached the goal on the
+last iteration would be reported as success to the caller, rather than
+as "timed out".
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Anthony PERARD <anthony.perard@citrix.com>
+master commit: d8f8cb8bdd02fad3b6986ae93511f750fa7f7e6a
+master date: 2022-07-18 17:48:18 +0200
+---
+ tools/xl/xl_vmcontrol.c | 9 +++++----
+ 1 file changed, 5 insertions(+), 4 deletions(-)
+
+diff --git a/tools/xl/xl_vmcontrol.c b/tools/xl/xl_vmcontrol.c
+index 5dee7730ca76..d1c6f8aae67a 100644
+--- a/tools/xl/xl_vmcontrol.c
++++ b/tools/xl/xl_vmcontrol.c
+@@ -332,7 +332,7 @@ static bool freemem(uint32_t domid, libxl_domain_config *d_config)
+ if (rc < 0)
+ return false;
+
+- do {
++ for (;;) {
+ time_t start;
+
+ rc = libxl_get_free_memory(ctx, &free_memkb);
+@@ -342,6 +342,9 @@ static bool freemem(uint32_t domid, libxl_domain_config *d_config)
+ if (free_memkb >= need_memkb)
+ return true;
+
++ if (credit <= 0)
++ return false;
++
+ rc = libxl_set_memory_target(ctx, 0, free_memkb - need_memkb, 1, 0);
+ if (rc < 0)
+ return false;
+@@ -354,9 +357,7 @@ static bool freemem(uint32_t domid, libxl_domain_config *d_config)
+ return false;
+
+ credit -= difftime(time(NULL), start);
+- } while (credit > 0);
+-
+- return false;
++ }
+ }
+
+ static void reload_domain_config(uint32_t domid,
+--
+2.37.3
+
diff --git a/0026-x86-spec-ctrl-correct-per-guest-type-reporting-of-MD.patch b/0026-x86-spec-ctrl-correct-per-guest-type-reporting-of-MD.patch
new file mode 100644
index 0000000..fbf3f41
--- /dev/null
+++ b/0026-x86-spec-ctrl-correct-per-guest-type-reporting-of-MD.patch
@@ -0,0 +1,56 @@
+From 5f1d0179e15d726622a49044a825894d5010df15 Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Wed, 27 Jul 2022 09:29:54 +0200
+Subject: [PATCH 26/67] x86/spec-ctrl: correct per-guest-type reporting of
+ MD_CLEAR
+
+There are command line controls for this and the default also isn't "always
+enable when hardware supports it", which logging should take into account.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
+master commit: fdbf8bdfebc2ed323c521848f642cc4f6b8cb662
+master date: 2022-07-19 08:36:53 +0200
+---
+ xen/arch/x86/spec_ctrl.c | 10 ++++------
+ 1 file changed, 4 insertions(+), 6 deletions(-)
+
+diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
+index 563519ce0e31..f7b0251c42bc 100644
+--- a/xen/arch/x86/spec_ctrl.c
++++ b/xen/arch/x86/spec_ctrl.c
+@@ -511,13 +511,12 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps)
+ printk(" Support for HVM VMs:%s%s%s%s%s%s\n",
+ (boot_cpu_has(X86_FEATURE_SC_MSR_HVM) ||
+ boot_cpu_has(X86_FEATURE_SC_RSB_HVM) ||
+- boot_cpu_has(X86_FEATURE_MD_CLEAR) ||
+ boot_cpu_has(X86_FEATURE_IBPB_ENTRY_HVM) ||
+- opt_eager_fpu) ? "" : " None",
++ opt_eager_fpu || opt_md_clear_hvm) ? "" : " None",
+ boot_cpu_has(X86_FEATURE_SC_MSR_HVM) ? " MSR_SPEC_CTRL" : "",
+ boot_cpu_has(X86_FEATURE_SC_RSB_HVM) ? " RSB" : "",
+ opt_eager_fpu ? " EAGER_FPU" : "",
+- boot_cpu_has(X86_FEATURE_MD_CLEAR) ? " MD_CLEAR" : "",
++ opt_md_clear_hvm ? " MD_CLEAR" : "",
+ boot_cpu_has(X86_FEATURE_IBPB_ENTRY_HVM) ? " IBPB-entry" : "");
+
+ #endif
+@@ -525,13 +524,12 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps)
+ printk(" Support for PV VMs:%s%s%s%s%s%s\n",
+ (boot_cpu_has(X86_FEATURE_SC_MSR_PV) ||
+ boot_cpu_has(X86_FEATURE_SC_RSB_PV) ||
+- boot_cpu_has(X86_FEATURE_MD_CLEAR) ||
+ boot_cpu_has(X86_FEATURE_IBPB_ENTRY_PV) ||
+- opt_eager_fpu) ? "" : " None",
++ opt_eager_fpu || opt_md_clear_pv) ? "" : " None",
+ boot_cpu_has(X86_FEATURE_SC_MSR_PV) ? " MSR_SPEC_CTRL" : "",
+ boot_cpu_has(X86_FEATURE_SC_RSB_PV) ? " RSB" : "",
+ opt_eager_fpu ? " EAGER_FPU" : "",
+- boot_cpu_has(X86_FEATURE_MD_CLEAR) ? " MD_CLEAR" : "",
++ opt_md_clear_pv ? " MD_CLEAR" : "",
+ boot_cpu_has(X86_FEATURE_IBPB_ENTRY_PV) ? " IBPB-entry" : "");
+
+ printk(" XPTI (64-bit PV only): Dom0 %s, DomU %s (with%s PCID)\n",
+--
+2.37.3
+
diff --git a/0027-x86-deal-with-gcc12-release-build-issues.patch b/0027-x86-deal-with-gcc12-release-build-issues.patch
new file mode 100644
index 0000000..d26f6d3
--- /dev/null
+++ b/0027-x86-deal-with-gcc12-release-build-issues.patch
@@ -0,0 +1,65 @@
+From a095c6cde8a717325cc31bb393c547cad5e16e35 Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Wed, 27 Jul 2022 09:30:24 +0200
+Subject: [PATCH 27/67] x86: deal with gcc12 release build issues
+
+While a number of issues we previously had with pre-release gcc12 were
+fixed in the final release, we continue to have one issue (with multiple
+instances) when doing release builds (i.e. at higher optimization
+levels): The compiler takes issue with subtracting (always 1 in our
+case) from artifical labels (expressed as array) marking the end of
+certain regions. This isn't an unreasonable position to take. Simply
+hide the "array-ness" by casting to an integer type. To keep things
+looking consistently, apply the same cast also on the respective
+expressions dealing with the starting addresses. (Note how
+efi_arch_memory_setup()'s l2_table_offset() invocations avoid a similar
+issue by already having the necessary casts.) In is_xen_fixed_mfn()
+further switch from __pa() to virt_to_maddr() to better match the left
+sides of the <= operators.
+
+Reported-by: Charles Arnold <carnold@suse.com>
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
+master commit: 9723507daf2120131410c91980d4e4d9b0d0aa90
+master date: 2022-07-19 08:37:29 +0200
+---
+ xen/arch/x86/efi/efi-boot.h | 6 +++---
+ xen/include/asm-x86/mm.h | 4 ++--
+ 2 files changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/xen/arch/x86/efi/efi-boot.h b/xen/arch/x86/efi/efi-boot.h
+index 2541ba1f320a..84fd77931456 100644
+--- a/xen/arch/x86/efi/efi-boot.h
++++ b/xen/arch/x86/efi/efi-boot.h
+@@ -624,10 +624,10 @@ static void __init efi_arch_memory_setup(void)
+ * appropriate l2 slots to map.
+ */
+ #define l2_4G_offset(a) \
+- (((UINTN)(a) >> L2_PAGETABLE_SHIFT) & (4 * L2_PAGETABLE_ENTRIES - 1))
++ (((a) >> L2_PAGETABLE_SHIFT) & (4 * L2_PAGETABLE_ENTRIES - 1))
+
+- for ( i = l2_4G_offset(_start);
+- i <= l2_4G_offset(_end - 1); ++i )
++ for ( i = l2_4G_offset((UINTN)_start);
++ i <= l2_4G_offset((UINTN)_end - 1); ++i )
+ {
+ l2_pgentry_t pte = l2e_from_paddr(i << L2_PAGETABLE_SHIFT,
+ __PAGE_HYPERVISOR | _PAGE_PSE);
+diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h
+index 5c19b71eca70..71dd28f126c3 100644
+--- a/xen/include/asm-x86/mm.h
++++ b/xen/include/asm-x86/mm.h
+@@ -309,8 +309,8 @@ struct page_info
+ #define is_xen_heap_mfn(mfn) \
+ (mfn_valid(mfn) && is_xen_heap_page(mfn_to_page(mfn)))
+ #define is_xen_fixed_mfn(mfn) \
+- (((mfn_to_maddr(mfn)) >= __pa(_stext)) && \
+- ((mfn_to_maddr(mfn)) <= __pa(__2M_rwdata_end - 1)))
++ (((mfn_to_maddr(mfn)) >= virt_to_maddr((unsigned long)_stext)) && \
++ ((mfn_to_maddr(mfn)) <= virt_to_maddr((unsigned long)__2M_rwdata_end - 1)))
+
+ #define PRtype_info "016lx"/* should only be used for printk's */
+
+--
+2.37.3
+
diff --git a/0028-x86emul-add-memory-operand-low-bits-checks-for-ENQCM.patch b/0028-x86emul-add-memory-operand-low-bits-checks-for-ENQCM.patch
new file mode 100644
index 0000000..26b959e
--- /dev/null
+++ b/0028-x86emul-add-memory-operand-low-bits-checks-for-ENQCM.patch
@@ -0,0 +1,45 @@
+From 4799a202a9017360708c18aa8cd699bd8d6be08b Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Wed, 27 Jul 2022 09:31:01 +0200
+Subject: [PATCH 28/67] x86emul: add memory operand low bits checks for
+ ENQCMD{,S}
+
+Already ISE rev 044 added text to this effect; rev 045 further dropped
+leftover earlier text indicating the contrary:
+- ENQCMD requires the low 32 bits of the memory operand to be clear,
+- ENDCMDS requires bits 20...30 of the memory operand to be clear.
+
+Fixes: d27385968741 ("x86emul: support ENQCMD insns")
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
+master commit: d620c66bdbe5510c3bae89be8cc7ca9a2a6cbaba
+master date: 2022-07-20 15:46:48 +0200
+---
+ xen/arch/x86/x86_emulate/x86_emulate.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c b/xen/arch/x86/x86_emulate/x86_emulate.c
+index 5e297f797187..247c14dc4e68 100644
+--- a/xen/arch/x86/x86_emulate/x86_emulate.c
++++ b/xen/arch/x86/x86_emulate/x86_emulate.c
+@@ -10464,6 +10464,7 @@ x86_emulate(
+ goto done;
+ if ( vex.pfx == vex_f2 ) /* enqcmd */
+ {
++ generate_exception_if(mmvalp->data32[0], EXC_GP, 0);
+ fail_if(!ops->read_msr);
+ if ( (rc = ops->read_msr(MSR_PASID, &msr_val,
+ ctxt)) != X86EMUL_OKAY )
+@@ -10471,7 +10472,8 @@ x86_emulate(
+ generate_exception_if(!(msr_val & PASID_VALID), EXC_GP, 0);
+ mmvalp->data32[0] = MASK_EXTR(msr_val, PASID_PASID_MASK);
+ }
+- mmvalp->data32[0] &= ~0x7ff00000;
++ else
++ generate_exception_if(mmvalp->data32[0] & 0x7ff00000, EXC_GP, 0);
+ state->blk = blk_enqcmd;
+ if ( (rc = ops->blk(x86_seg_es, src.val, mmvalp, 64, &_regs.eflags,
+ state, ctxt)) != X86EMUL_OKAY )
+--
+2.37.3
+
diff --git a/0029-x86-also-suppress-use-of-MMX-insns.patch b/0029-x86-also-suppress-use-of-MMX-insns.patch
new file mode 100644
index 0000000..1298a47
--- /dev/null
+++ b/0029-x86-also-suppress-use-of-MMX-insns.patch
@@ -0,0 +1,39 @@
+From 30d3de4c61c297e12662df1fdb89af335947e59d Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Wed, 27 Jul 2022 09:31:31 +0200
+Subject: [PATCH 29/67] x86: also suppress use of MMX insns
+
+Passing -mno-sse alone is not enough: The compiler may still find
+(questionable) reasons to use MMX insns. In particular with gcc12 use
+of MOVD+PUNPCKLDQ+MOVQ was observed in an apparent attempt to auto-
+vectorize the storing of two adjacent zeroes, 32 bits each.
+
+Reported-by: ChrisD <chris@dalessio.org>
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
+master commit: 6fe2e39a0243bddba60f83b77b972a5922d25eb8
+master date: 2022-07-20 15:48:49 +0200
+---
+ xen/arch/x86/arch.mk | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/xen/arch/x86/arch.mk b/xen/arch/x86/arch.mk
+index 456e5d5c1ad7..c4337a1a118c 100644
+--- a/xen/arch/x86/arch.mk
++++ b/xen/arch/x86/arch.mk
+@@ -37,9 +37,9 @@ $(call as-option-add,CFLAGS,CC,\
+
+ CFLAGS += -mno-red-zone -fpic
+
+-# Xen doesn't use SSE interally. If the compiler supports it, also skip the
+-# SSE setup for variadic function calls.
+-CFLAGS += -mno-sse $(call cc-option,$(CC),-mskip-rax-setup)
++# Xen doesn't use MMX or SSE interally. If the compiler supports it, also skip
++# the SSE setup for variadic function calls.
++CFLAGS += -mno-mmx -mno-sse $(call cc-option,$(CC),-mskip-rax-setup)
+
+ # Compile with thunk-extern, indirect-branch-register if avaiable.
+ CFLAGS-$(CONFIG_INDIRECT_THUNK) += -mindirect-branch=thunk-extern
+--
+2.37.3
+
diff --git a/0030-common-memory-Fix-ifdefs-for-ptdom_max_order.patch b/0030-common-memory-Fix-ifdefs-for-ptdom_max_order.patch
new file mode 100644
index 0000000..a9bf845
--- /dev/null
+++ b/0030-common-memory-Fix-ifdefs-for-ptdom_max_order.patch
@@ -0,0 +1,52 @@
+From b64f1c9e3e3a2a416c7bb5aab77ba5d2cba98638 Mon Sep 17 00:00:00 2001
+From: Luca Fancellu <luca.fancellu@arm.com>
+Date: Wed, 27 Jul 2022 09:31:49 +0200
+Subject: [PATCH 30/67] common/memory: Fix ifdefs for ptdom_max_order
+
+In common/memory.c the ifdef code surrounding ptdom_max_order is
+using HAS_PASSTHROUGH instead of CONFIG_HAS_PASSTHROUGH, fix the
+problem using the correct macro.
+
+Fixes: e0d44c1f9461 ("build: convert HAS_PASSTHROUGH use to Kconfig")
+Signed-off-by: Luca Fancellu <luca.fancellu@arm.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+master commit: 5707470bf3103ebae43697a7ac2faced6cd35f92
+master date: 2022-07-26 08:33:46 +0200
+---
+ xen/common/memory.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/xen/common/memory.c b/xen/common/memory.c
+index 297b98a562b2..95b2b934e4a2 100644
+--- a/xen/common/memory.c
++++ b/xen/common/memory.c
+@@ -58,7 +58,7 @@ struct memop_args {
+ static unsigned int __read_mostly domu_max_order = CONFIG_DOMU_MAX_ORDER;
+ static unsigned int __read_mostly ctldom_max_order = CONFIG_CTLDOM_MAX_ORDER;
+ static unsigned int __read_mostly hwdom_max_order = CONFIG_HWDOM_MAX_ORDER;
+-#ifdef HAS_PASSTHROUGH
++#ifdef CONFIG_HAS_PASSTHROUGH
+ static unsigned int __read_mostly ptdom_max_order = CONFIG_PTDOM_MAX_ORDER;
+ #endif
+
+@@ -70,7 +70,7 @@ static int __init parse_max_order(const char *s)
+ ctldom_max_order = simple_strtoul(s, &s, 0);
+ if ( *s == ',' && *++s != ',' )
+ hwdom_max_order = simple_strtoul(s, &s, 0);
+-#ifdef HAS_PASSTHROUGH
++#ifdef CONFIG_HAS_PASSTHROUGH
+ if ( *s == ',' && *++s != ',' )
+ ptdom_max_order = simple_strtoul(s, &s, 0);
+ #endif
+@@ -83,7 +83,7 @@ static unsigned int max_order(const struct domain *d)
+ {
+ unsigned int order = domu_max_order;
+
+-#ifdef HAS_PASSTHROUGH
++#ifdef CONFIG_HAS_PASSTHROUGH
+ if ( cache_flush_permitted(d) && order < ptdom_max_order )
+ order = ptdom_max_order;
+ #endif
+--
+2.37.3
+
diff --git a/0031-tools-libxl-env-variable-to-signal-whether-disk-nic-.patch b/0031-tools-libxl-env-variable-to-signal-whether-disk-nic-.patch
new file mode 100644
index 0000000..a52055a
--- /dev/null
+++ b/0031-tools-libxl-env-variable-to-signal-whether-disk-nic-.patch
@@ -0,0 +1,107 @@
+From 1b9845dcf959421db3a071a6bc0aa9d8edbffb50 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
+Date: Wed, 3 Aug 2022 12:41:18 +0200
+Subject: [PATCH 31/67] tools/libxl: env variable to signal whether disk/nic
+ backend is trusted
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Introduce support in libxl for fetching the default backend trusted
+option for disk and nic devices.
+
+Users can set LIBXL_{DISK,NIC}_BACKEND_UNTRUSTED environment variable
+to notify libxl of whether the backends for disk and nic devices
+should be trusted. Such information is passed into the frontend so it
+can take the appropriate measures.
+
+This is part of XSA-403.
+
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Signed-off-by: Anthony PERARD <anthony.perard@citrix.com>
+---
+ docs/man/xl.1.pod.in | 18 ++++++++++++++++++
+ tools/libs/light/libxl_disk.c | 5 +++++
+ tools/libs/light/libxl_nic.c | 7 +++++++
+ 3 files changed, 30 insertions(+)
+
+diff --git a/docs/man/xl.1.pod.in b/docs/man/xl.1.pod.in
+index e2176bd696cb..45e1430aeb74 100644
+--- a/docs/man/xl.1.pod.in
++++ b/docs/man/xl.1.pod.in
+@@ -1946,6 +1946,24 @@ shows the decimal value. For non-linear mode, it shows hexadecimal value.
+
+ =back
+
++=head1 ENVIRONMENT
++
++=over 4
++
++=item B<LIBXL_DISK_BACKEND_UNTRUSTED>
++
++Set this environment variable to "1" to suggest to the guest that the disk
++backend shouldn't be trusted. If the variable is absent or set to "0", the
++backend will be trusted.
++
++=item B<LIBXL_NIC_BACKEND_UNTRUSTED>
++
++Set this environment variable to "1" to suggest to the guest that the network
++backend shouldn't be trusted. If the variable is absent or set to "0", the
++backend will be trusted.
++
++=back
++
+ =head1 IGNORED FOR COMPATIBILITY WITH XM
+
+ xl is mostly command-line compatible with the old xm utility used with
+diff --git a/tools/libs/light/libxl_disk.c b/tools/libs/light/libxl_disk.c
+index 93936d0dd0f8..67d1cc18578f 100644
+--- a/tools/libs/light/libxl_disk.c
++++ b/tools/libs/light/libxl_disk.c
+@@ -246,6 +246,7 @@ static void device_disk_add(libxl__egc *egc, uint32_t domid,
+ libxl_domain_config d_config;
+ libxl_device_disk disk_saved;
+ libxl__flock *lock = NULL;
++ const char *envvar;
+
+ libxl_domain_config_init(&d_config);
+ libxl_device_disk_init(&disk_saved);
+@@ -395,6 +396,10 @@ static void device_disk_add(libxl__egc *egc, uint32_t domid,
+ flexarray_append(front, GCSPRINTF("%d", device->devid));
+ flexarray_append(front, "device-type");
+ flexarray_append(front, disk->is_cdrom ? "cdrom" : "disk");
++ flexarray_append(front, "trusted");
++ envvar = getenv("LIBXL_DISK_BACKEND_UNTRUSTED");
++ /* Set "trusted=1" if envvar missing or is "0". */
++ flexarray_append(front, !envvar || !strcmp("0", envvar) ? "1" : "0");
+
+ /*
+ * Old PV kernel disk frontends before 2.6.26 rely on tool stack to
+diff --git a/tools/libs/light/libxl_nic.c b/tools/libs/light/libxl_nic.c
+index 0b9e70c9d13d..f87890d1d65f 100644
+--- a/tools/libs/light/libxl_nic.c
++++ b/tools/libs/light/libxl_nic.c
+@@ -132,6 +132,8 @@ static int libxl__set_xenstore_nic(libxl__gc *gc, uint32_t domid,
+ flexarray_t *back, flexarray_t *front,
+ flexarray_t *ro_front)
+ {
++ const char *envvar;
++
+ flexarray_grow(back, 2);
+
+ if (nic->script)
+@@ -255,6 +257,11 @@ static int libxl__set_xenstore_nic(libxl__gc *gc, uint32_t domid,
+ flexarray_append(back, "hotplug-status");
+ flexarray_append(back, "");
+
++ flexarray_append(front, "trusted");
++ envvar = getenv("LIBXL_NIC_BACKEND_UNTRUSTED");
++ /* Set "trusted=1" if envvar missing or is "0". */
++ flexarray_append(front, !envvar || !strcmp("0", envvar) ? "1" : "0");
++
+ return 0;
+ }
+
+--
+2.37.3
+
diff --git a/0032-x86-msr-fix-X2APIC_LAST.patch b/0032-x86-msr-fix-X2APIC_LAST.patch
new file mode 100644
index 0000000..ac42842
--- /dev/null
+++ b/0032-x86-msr-fix-X2APIC_LAST.patch
@@ -0,0 +1,66 @@
+From df3395f6b2d759aba39fb67a7bc0fe49147c8b39 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= <edvin.torok@citrix.com>
+Date: Wed, 3 Aug 2022 12:41:49 +0200
+Subject: [PATCH 32/67] x86/msr: fix X2APIC_LAST
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+The latest Intel manual now says the X2APIC reserved range is only
+0x800 to 0x8ff (NOT 0xbff).
+This changed between SDM 68 (Nov 2018) and SDM 69 (Jan 2019).
+The AMD manual documents 0x800-0x8ff too.
+
+There are non-X2APIC MSRs in the 0x900-0xbff range now:
+e.g. 0x981 is IA32_TME_CAPABILITY, an architectural MSR.
+
+The new MSR in this range appears to have been introduced in Icelake,
+so this commit should be backported to Xen versions supporting Icelake.
+
+Signed-off-by: Edwin Török <edvin.torok@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+master commit: 13316827faadbb4f72ae6c625af9938d8f976f86
+master date: 2022-07-27 12:57:10 +0200
+---
+ xen/arch/x86/hvm/vmx/vmx.c | 4 ++--
+ xen/include/asm-x86/msr-index.h | 2 +-
+ 2 files changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
+index 868151a2e533..775b36433e24 100644
+--- a/xen/arch/x86/hvm/vmx/vmx.c
++++ b/xen/arch/x86/hvm/vmx/vmx.c
+@@ -3401,7 +3401,7 @@ void vmx_vlapic_msr_changed(struct vcpu *v)
+ if ( cpu_has_vmx_apic_reg_virt )
+ {
+ for ( msr = MSR_X2APIC_FIRST;
+- msr <= MSR_X2APIC_FIRST + 0xff; msr++ )
++ msr <= MSR_X2APIC_LAST; msr++ )
+ vmx_clear_msr_intercept(v, msr, VMX_MSR_R);
+
+ vmx_set_msr_intercept(v, MSR_X2APIC_PPR, VMX_MSR_R);
+@@ -3422,7 +3422,7 @@ void vmx_vlapic_msr_changed(struct vcpu *v)
+ if ( !(v->arch.hvm.vmx.secondary_exec_control &
+ SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE) )
+ for ( msr = MSR_X2APIC_FIRST;
+- msr <= MSR_X2APIC_FIRST + 0xff; msr++ )
++ msr <= MSR_X2APIC_LAST; msr++ )
+ vmx_set_msr_intercept(v, msr, VMX_MSR_RW);
+
+ vmx_update_secondary_exec_control(v);
+diff --git a/xen/include/asm-x86/msr-index.h b/xen/include/asm-x86/msr-index.h
+index b4a360723b14..f1b2cf5460c1 100644
+--- a/xen/include/asm-x86/msr-index.h
++++ b/xen/include/asm-x86/msr-index.h
+@@ -459,7 +459,7 @@
+ #define MSR_IA32_TSC_ADJUST 0x0000003b
+
+ #define MSR_X2APIC_FIRST 0x00000800
+-#define MSR_X2APIC_LAST 0x00000bff
++#define MSR_X2APIC_LAST 0x000008ff
+
+ #define MSR_X2APIC_TPR 0x00000808
+ #define MSR_X2APIC_PPR 0x0000080a
+--
+2.37.3
+
diff --git a/0033-x86-spec-ctrl-Use-IST-RSB-protection-for-SVM-systems.patch b/0033-x86-spec-ctrl-Use-IST-RSB-protection-for-SVM-systems.patch
new file mode 100644
index 0000000..46780c4
--- /dev/null
+++ b/0033-x86-spec-ctrl-Use-IST-RSB-protection-for-SVM-systems.patch
@@ -0,0 +1,54 @@
+From 8ae0b4d1331c14fb9e30a42987c0152c9b00f530 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Mon, 15 Aug 2022 15:40:05 +0200
+Subject: [PATCH 33/67] x86/spec-ctrl: Use IST RSB protection for !SVM systems
+
+There is a corner case where a VT-x guest which manages to reliably trigger
+non-fatal #MC's could evade the rogue RSB speculation protections that were
+supposed to be in place.
+
+This is a lack of defence in depth; Xen does not architecturally execute more
+RET than CALL instructions, so an attacker would have to locate a different
+gadget (e.g. SpectreRSB) first to execute a transient path of excess RET
+instructions.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+master commit: e570e8d520ab542d8d35666b95cb3a0125b7b110
+master date: 2022-08-05 12:16:24 +0100
+---
+ xen/arch/x86/spec_ctrl.c | 16 ++++++++++++++++
+ 1 file changed, 16 insertions(+)
+
+diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
+index f7b0251c42bc..ac73806eacd8 100644
+--- a/xen/arch/x86/spec_ctrl.c
++++ b/xen/arch/x86/spec_ctrl.c
+@@ -1279,8 +1279,24 @@ void __init init_speculation_mitigations(void)
+ * mappings.
+ */
+ if ( opt_rsb_hvm )
++ {
+ setup_force_cpu_cap(X86_FEATURE_SC_RSB_HVM);
+
++ /*
++ * For SVM, Xen's RSB safety actions are performed before STGI, so
++ * behave atomically with respect to IST sources.
++ *
++ * For VT-x, NMIs are atomic with VMExit (the NMI gets queued but not
++ * delivered) whereas other IST sources are not atomic. Specifically,
++ * #MC can hit ahead the RSB safety action in the vmexit path.
++ *
++ * Therefore, it is necessary for the IST logic to protect Xen against
++ * possible rogue RSB speculation.
++ */
++ if ( !cpu_has_svm )
++ default_spec_ctrl_flags |= SCF_ist_rsb;
++ }
++
+ ibpb_calculations();
+
+ /* Check whether Eager FPU should be enabled by default. */
+--
+2.37.3
+
diff --git a/0034-x86-Expose-more-MSR_ARCH_CAPS-to-hwdom.patch b/0034-x86-Expose-more-MSR_ARCH_CAPS-to-hwdom.patch
new file mode 100644
index 0000000..6a73c21
--- /dev/null
+++ b/0034-x86-Expose-more-MSR_ARCH_CAPS-to-hwdom.patch
@@ -0,0 +1,68 @@
+From 5efcae1eb30ff24e100954e00889a568c1745ea1 Mon Sep 17 00:00:00 2001
+From: Jason Andryuk <jandryuk@gmail.com>
+Date: Mon, 15 Aug 2022 15:40:47 +0200
+Subject: [PATCH 34/67] x86: Expose more MSR_ARCH_CAPS to hwdom
+
+commit e46474278a0e ("x86/intel: Expose MSR_ARCH_CAPS to dom0") started
+exposing MSR_ARCH_CAPS to dom0. More bits in MSR_ARCH_CAPS have since
+been defined, but they haven't been exposed. Update the list to allow
+them through.
+
+As one example, this allows a Linux Dom0 to know that it has the
+appropriate microcode via FB_CLEAR. Notably, and with the updated
+microcode, this changes dom0's
+/sys/devices/system/cpu/vulnerabilities/mmio_stale_data changes from:
+
+ "Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown"
+
+to:
+
+ "Mitigation: Clear CPU buffers; SMT Host state unknown"
+
+This exposes the MMIO Stale Data and Intel Branch History Injection
+(BHI) controls as well as the page size change MCE issue bit.
+
+Fixes: commit 2ebe8fe9b7e0 ("x86/spec-ctrl: Enumeration for MMIO Stale Data controls")
+Fixes: commit cea9ae062295 ("x86/spec-ctrl: Enumeration for new Intel BHI controls")
+Fixes: commit 59e89cdabc71 ("x86/vtx: Disable executable EPT superpages to work around CVE-2018-12207")
+Signed-off-by: Jason Andryuk <jandryuk@gmail.com>
+Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
+master commit: e83cd54611fec5b7a539fa1281a14319143490e6
+master date: 2022-08-09 16:35:25 +0100
+---
+ xen/arch/x86/msr.c | 5 ++++-
+ xen/include/asm-x86/msr-index.h | 2 ++
+ 2 files changed, 6 insertions(+), 1 deletion(-)
+
+diff --git a/xen/arch/x86/msr.c b/xen/arch/x86/msr.c
+index 0739d00e74f1..aa9face9aad3 100644
+--- a/xen/arch/x86/msr.c
++++ b/xen/arch/x86/msr.c
+@@ -145,7 +145,10 @@ int init_domain_msr_policy(struct domain *d)
+
+ mp->arch_caps.raw = val &
+ (ARCH_CAPS_RDCL_NO | ARCH_CAPS_IBRS_ALL | ARCH_CAPS_RSBA |
+- ARCH_CAPS_SSB_NO | ARCH_CAPS_MDS_NO | ARCH_CAPS_TAA_NO);
++ ARCH_CAPS_SSB_NO | ARCH_CAPS_MDS_NO | ARCH_CAPS_IF_PSCHANGE_MC_NO |
++ ARCH_CAPS_TAA_NO | ARCH_CAPS_SBDR_SSDP_NO | ARCH_CAPS_FBSDP_NO |
++ ARCH_CAPS_PSDP_NO | ARCH_CAPS_FB_CLEAR | ARCH_CAPS_RRSBA |
++ ARCH_CAPS_BHI_NO);
+ }
+
+ d->arch.msr = mp;
+diff --git a/xen/include/asm-x86/msr-index.h b/xen/include/asm-x86/msr-index.h
+index f1b2cf5460c1..49ca1f1845e6 100644
+--- a/xen/include/asm-x86/msr-index.h
++++ b/xen/include/asm-x86/msr-index.h
+@@ -64,6 +64,8 @@
+ #define ARCH_CAPS_PSDP_NO (_AC(1, ULL) << 15)
+ #define ARCH_CAPS_FB_CLEAR (_AC(1, ULL) << 17)
+ #define ARCH_CAPS_FB_CLEAR_CTRL (_AC(1, ULL) << 18)
++#define ARCH_CAPS_RRSBA (_AC(1, ULL) << 19)
++#define ARCH_CAPS_BHI_NO (_AC(1, ULL) << 20)
+
+ #define MSR_FLUSH_CMD 0x0000010b
+ #define FLUSH_CMD_L1D (_AC(1, ULL) << 0)
+--
+2.37.3
+
diff --git a/0035-xen-sched-setup-dom0-vCPUs-affinity-only-once.patch b/0035-xen-sched-setup-dom0-vCPUs-affinity-only-once.patch
new file mode 100644
index 0000000..0dfb3b4
--- /dev/null
+++ b/0035-xen-sched-setup-dom0-vCPUs-affinity-only-once.patch
@@ -0,0 +1,123 @@
+From 1e31848cdd8d2ff3cb76f364f04f9771f9b3a8b1 Mon Sep 17 00:00:00 2001
+From: Dario Faggioli <dfaggioli@suse.com>
+Date: Mon, 15 Aug 2022 15:41:25 +0200
+Subject: [PATCH 35/67] xen/sched: setup dom0 vCPUs affinity only once
+
+Right now, affinity for dom0 vCPUs is setup in two steps. This is a
+problem as, at least in Credit2, unit_insert() sees and uses the
+"intermediate" affinity, and place the vCPUs on CPUs where they cannot
+be run. And this in turn results in boot hangs, if the "dom0_nodes"
+parameter is used.
+
+Fix this by setting up the affinity properly once and for all, in
+sched_init_vcpu() called by create_vcpu().
+
+Note that, unless a soft-affinity is explicitly specified for dom0 (by
+using the relaxed mode of "dom0_nodes") we set it to the default, which
+is all CPUs, instead of computing it basing on hard affinity (if any).
+This is because hard and soft affinity should be considered as
+independent user controlled properties. In fact, if we dor derive dom0's
+soft-affinity from its boot-time hard-affinity, such computed value will
+continue to be used even if later the user changes the hard-affinity.
+And this could result in the vCPUs behaving differently than what the
+user wanted and expects.
+
+Fixes: dafd936dddbd ("Make credit2 the default scheduler")
+Reported-by: Olaf Hering <ohering@suse.de>
+Signed-off-by: Dario Faggioli <dfaggioli@suse.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+master commit: c79e4d209be3ed2a6b8e97c35944786ed2a66b94
+master date: 2022-08-11 11:46:22 +0200
+---
+ xen/common/sched/core.c | 63 +++++++++++++++++++++++++----------------
+ 1 file changed, 39 insertions(+), 24 deletions(-)
+
+diff --git a/xen/common/sched/core.c b/xen/common/sched/core.c
+index 8f4b1ca10d1c..f07bd2681fcb 100644
+--- a/xen/common/sched/core.c
++++ b/xen/common/sched/core.c
+@@ -571,12 +571,46 @@ int sched_init_vcpu(struct vcpu *v)
+ return 1;
+ }
+
+- /*
+- * Initialize affinity settings. The idler, and potentially
+- * domain-0 VCPUs, are pinned onto their respective physical CPUs.
+- */
+- if ( is_idle_domain(d) || (is_hardware_domain(d) && opt_dom0_vcpus_pin) )
++ if ( is_idle_domain(d) )
++ {
++ /* Idle vCPUs are always pinned onto their respective pCPUs */
+ sched_set_affinity(unit, cpumask_of(processor), &cpumask_all);
++ }
++ else if ( pv_shim && v->vcpu_id == 0 )
++ {
++ /*
++ * PV-shim: vcpus are pinned 1:1. Initially only 1 cpu is online,
++ * others will be dealt with when onlining them. This avoids pinning
++ * a vcpu to a not yet online cpu here.
++ */
++ sched_set_affinity(unit, cpumask_of(0), cpumask_of(0));
++ }
++ else if ( is_hardware_domain(d) && opt_dom0_vcpus_pin )
++ {
++ /*
++ * If dom0_vcpus_pin is specified, dom0 vCPUs are pinned 1:1 to
++ * their respective pCPUs too.
++ */
++ sched_set_affinity(unit, cpumask_of(processor), &cpumask_all);
++ }
++#ifdef CONFIG_X86
++ else if ( d->domain_id == 0 )
++ {
++ /*
++ * In absence of dom0_vcpus_pin instead, the hard and soft affinity of
++ * dom0 is controlled by the (x86 only) dom0_nodes parameter. At this
++ * point it has been parsed and decoded into the dom0_cpus mask.
++ *
++ * Note that we always honor what user explicitly requested, for both
++ * hard and soft affinity, without doing any dynamic computation of
++ * either of them.
++ */
++ if ( !dom0_affinity_relaxed )
++ sched_set_affinity(unit, &dom0_cpus, &cpumask_all);
++ else
++ sched_set_affinity(unit, &cpumask_all, &dom0_cpus);
++ }
++#endif
+ else
+ sched_set_affinity(unit, &cpumask_all, &cpumask_all);
+
+@@ -3386,29 +3420,10 @@ void wait(void)
+ void __init sched_setup_dom0_vcpus(struct domain *d)
+ {
+ unsigned int i;
+- struct sched_unit *unit;
+
+ for ( i = 1; i < d->max_vcpus; i++ )
+ vcpu_create(d, i);
+
+- /*
+- * PV-shim: vcpus are pinned 1:1.
+- * Initially only 1 cpu is online, others will be dealt with when
+- * onlining them. This avoids pinning a vcpu to a not yet online cpu here.
+- */
+- if ( pv_shim )
+- sched_set_affinity(d->vcpu[0]->sched_unit,
+- cpumask_of(0), cpumask_of(0));
+- else
+- {
+- for_each_sched_unit ( d, unit )
+- {
+- if ( !opt_dom0_vcpus_pin && !dom0_affinity_relaxed )
+- sched_set_affinity(unit, &dom0_cpus, NULL);
+- sched_set_affinity(unit, NULL, &dom0_cpus);
+- }
+- }
+-
+ domain_update_node_affinity(d);
+ }
+ #endif
+--
+2.37.3
+
diff --git a/0036-tools-libxl-Replace-deprecated-sdl-option-on-QEMU-co.patch b/0036-tools-libxl-Replace-deprecated-sdl-option-on-QEMU-co.patch
new file mode 100644
index 0000000..1637236
--- /dev/null
+++ b/0036-tools-libxl-Replace-deprecated-sdl-option-on-QEMU-co.patch
@@ -0,0 +1,38 @@
+From c373ad3d084614a93c55e25dc20e70ffc7574971 Mon Sep 17 00:00:00 2001
+From: Anthony PERARD <anthony.perard@citrix.com>
+Date: Mon, 15 Aug 2022 15:42:09 +0200
+Subject: [PATCH 36/67] tools/libxl: Replace deprecated -sdl option on QEMU
+ command line
+
+"-sdl" is deprecated upstream since 6695e4c0fd9e ("softmmu/vl:
+Deprecate the -sdl and -curses option"), QEMU v6.2, and the option is
+removed by 707d93d4abc6 ("ui: Remove deprecated options "-sdl" and
+"-curses""), in upcoming QEMU v7.1.
+
+Instead, use "-display sdl", available since 1472a95bab1e ("Introduce
+-display argument"), before QEMU v1.0.
+
+Signed-off-by: Anthony PERARD <anthony.perard@citrix.com>
+Reviewed-by: Jason Andryuk <jandryuk@gmail.com>
+master commit: 41fcb3af8ad6d4c9f65a9d72798e6d18afec55ac
+master date: 2022-08-11 11:47:11 +0200
+---
+ tools/libs/light/libxl_dm.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/tools/libs/light/libxl_dm.c b/tools/libs/light/libxl_dm.c
+index 24f6e73b0a77..ae5f35e0c3fd 100644
+--- a/tools/libs/light/libxl_dm.c
++++ b/tools/libs/light/libxl_dm.c
+@@ -1349,7 +1349,7 @@ static int libxl__build_device_model_args_new(libxl__gc *gc,
+ flexarray_append_pair(dm_args, "-display", "none");
+
+ if (sdl && !is_stubdom) {
+- flexarray_append(dm_args, "-sdl");
++ flexarray_append_pair(dm_args, "-display", "sdl");
+ if (sdl->display)
+ flexarray_append_pair(dm_envs, "DISPLAY", sdl->display);
+ if (sdl->xauthority)
+--
+2.37.3
+
diff --git a/0037-x86-spec-ctrl-Enumeration-for-PBRSB_NO.patch b/0037-x86-spec-ctrl-Enumeration-for-PBRSB_NO.patch
new file mode 100644
index 0000000..d27766b
--- /dev/null
+++ b/0037-x86-spec-ctrl-Enumeration-for-PBRSB_NO.patch
@@ -0,0 +1,67 @@
+From fba0c22e79922085c46527eb1391123aadfb24d1 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Mon, 15 Aug 2022 15:42:31 +0200
+Subject: [PATCH 37/67] x86/spec-ctrl: Enumeration for PBRSB_NO
+
+The PBRSB_NO bit indicates that the CPU is not vulnerable to the Post-Barrier
+RSB speculative vulnerability.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+master commit: b874e47eb13feb75be3ee7b5dc4ae9c97d80d774
+master date: 2022-08-11 16:19:50 +0100
+---
+ xen/arch/x86/msr.c | 2 +-
+ xen/arch/x86/spec_ctrl.c | 3 ++-
+ xen/include/asm-x86/msr-index.h | 1 +
+ 3 files changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/xen/arch/x86/msr.c b/xen/arch/x86/msr.c
+index aa9face9aad3..9bced8d36caa 100644
+--- a/xen/arch/x86/msr.c
++++ b/xen/arch/x86/msr.c
+@@ -148,7 +148,7 @@ int init_domain_msr_policy(struct domain *d)
+ ARCH_CAPS_SSB_NO | ARCH_CAPS_MDS_NO | ARCH_CAPS_IF_PSCHANGE_MC_NO |
+ ARCH_CAPS_TAA_NO | ARCH_CAPS_SBDR_SSDP_NO | ARCH_CAPS_FBSDP_NO |
+ ARCH_CAPS_PSDP_NO | ARCH_CAPS_FB_CLEAR | ARCH_CAPS_RRSBA |
+- ARCH_CAPS_BHI_NO);
++ ARCH_CAPS_BHI_NO | ARCH_CAPS_PBRSB_NO);
+ }
+
+ d->arch.msr = mp;
+diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
+index ac73806eacd8..3ff602bd0281 100644
+--- a/xen/arch/x86/spec_ctrl.c
++++ b/xen/arch/x86/spec_ctrl.c
+@@ -419,7 +419,7 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps)
+ * Hardware read-only information, stating immunity to certain issues, or
+ * suggestions of which mitigation to use.
+ */
+- printk(" Hardware hints:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
++ printk(" Hardware hints:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
+ (caps & ARCH_CAPS_RDCL_NO) ? " RDCL_NO" : "",
+ (caps & ARCH_CAPS_IBRS_ALL) ? " IBRS_ALL" : "",
+ (caps & ARCH_CAPS_RSBA) ? " RSBA" : "",
+@@ -431,6 +431,7 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps)
+ (caps & ARCH_CAPS_SBDR_SSDP_NO) ? " SBDR_SSDP_NO" : "",
+ (caps & ARCH_CAPS_FBSDP_NO) ? " FBSDP_NO" : "",
+ (caps & ARCH_CAPS_PSDP_NO) ? " PSDP_NO" : "",
++ (caps & ARCH_CAPS_PBRSB_NO) ? " PBRSB_NO" : "",
+ (e8b & cpufeat_mask(X86_FEATURE_IBRS_ALWAYS)) ? " IBRS_ALWAYS" : "",
+ (e8b & cpufeat_mask(X86_FEATURE_STIBP_ALWAYS)) ? " STIBP_ALWAYS" : "",
+ (e8b & cpufeat_mask(X86_FEATURE_IBRS_FAST)) ? " IBRS_FAST" : "",
+diff --git a/xen/include/asm-x86/msr-index.h b/xen/include/asm-x86/msr-index.h
+index 49ca1f1845e6..5a830f76a8d4 100644
+--- a/xen/include/asm-x86/msr-index.h
++++ b/xen/include/asm-x86/msr-index.h
+@@ -66,6 +66,7 @@
+ #define ARCH_CAPS_FB_CLEAR_CTRL (_AC(1, ULL) << 18)
+ #define ARCH_CAPS_RRSBA (_AC(1, ULL) << 19)
+ #define ARCH_CAPS_BHI_NO (_AC(1, ULL) << 20)
++#define ARCH_CAPS_PBRSB_NO (_AC(1, ULL) << 24)
+
+ #define MSR_FLUSH_CMD 0x0000010b
+ #define FLUSH_CMD_L1D (_AC(1, ULL) << 0)
+--
+2.37.3
+
diff --git a/0038-x86-amd-only-call-setup_force_cpu_cap-for-boot-CPU.patch b/0038-x86-amd-only-call-setup_force_cpu_cap-for-boot-CPU.patch
new file mode 100644
index 0000000..e0e0f87
--- /dev/null
+++ b/0038-x86-amd-only-call-setup_force_cpu_cap-for-boot-CPU.patch
@@ -0,0 +1,33 @@
+From 104a54a307b08945365faf6d285cd5a02f94a80f Mon Sep 17 00:00:00 2001
+From: Ross Lagerwall <ross.lagerwall@citrix.com>
+Date: Mon, 15 Aug 2022 15:43:08 +0200
+Subject: [PATCH 38/67] x86/amd: only call setup_force_cpu_cap for boot CPU
+
+This should only be called for the boot CPU to avoid calling _init code
+after it has been unloaded.
+
+Fixes: 062868a5a8b4 ("x86/amd: Work around CLFLUSH ordering on older parts")
+Signed-off-by: Ross Lagerwall <ross.lagerwall@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+master commit: 31b41ce858c8bd5159212d40969f8e0b7124bbf0
+master date: 2022-08-11 17:44:26 +0200
+---
+ xen/arch/x86/cpu/amd.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/xen/arch/x86/cpu/amd.c b/xen/arch/x86/cpu/amd.c
+index 60dbe61a61ca..a8d2fb8a1590 100644
+--- a/xen/arch/x86/cpu/amd.c
++++ b/xen/arch/x86/cpu/amd.c
+@@ -820,7 +820,7 @@ static void init_amd(struct cpuinfo_x86 *c)
+ * everything, including reads and writes to address, and
+ * LFENCE/SFENCE instructions.
+ */
+- if (!cpu_has_clflushopt)
++ if (c == &boot_cpu_data && !cpu_has_clflushopt)
+ setup_force_cpu_cap(X86_BUG_CLFLUSH_MFENCE);
+
+ switch(c->x86)
+--
+2.37.3
+
diff --git a/0039-build-x86-suppress-GNU-ld-2.39-warning-about-RWX-loa.patch b/0039-build-x86-suppress-GNU-ld-2.39-warning-about-RWX-loa.patch
new file mode 100644
index 0000000..50d83b6
--- /dev/null
+++ b/0039-build-x86-suppress-GNU-ld-2.39-warning-about-RWX-loa.patch
@@ -0,0 +1,38 @@
+From a075900cf768fe45f270b6f1d09c4e99281da142 Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Mon, 15 Aug 2022 15:43:56 +0200
+Subject: [PATCH 39/67] build/x86: suppress GNU ld 2.39 warning about RWX load
+ segments
+
+Commit 68f5aac012b9 ("build: suppress future GNU ld warning about RWX
+load segments") didn't quite cover all the cases: Apparently I missed
+ones in the building of 32-bit helper objects because of only looking at
+incremental builds (where those wouldn't normally be re-built). Clone
+the workaround there to the specific Makefile in question.
+
+Reported-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
+master commit: 3eb1865ae305772b558757904d81951e31de43de
+master date: 2022-08-11 17:45:12 +0200
+---
+ xen/arch/x86/boot/build32.mk | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/xen/arch/x86/boot/build32.mk b/xen/arch/x86/boot/build32.mk
+index e90680cd9f52..d2fae5cf9eee 100644
+--- a/xen/arch/x86/boot/build32.mk
++++ b/xen/arch/x86/boot/build32.mk
+@@ -8,6 +8,9 @@ CFLAGS += -Werror -fno-builtin -g0 -msoft-float
+ CFLAGS += -I$(BASEDIR)/include
+ CFLAGS := $(filter-out -flto,$(CFLAGS))
+
++LDFLAGS_DIRECT-$(shell $(LD) -v --warn-rwx-segments >/dev/null 2>&1 && echo y) := --no-warn-rwx-segments
++LDFLAGS_DIRECT += $(LDFLAGS_DIRECT-y)
++
+ # NB. awk invocation is a portable alternative to 'head -n -1'
+ %.S: %.bin
+ (od -v -t x $< | tr -s ' ' | awk 'NR > 1 {print s} {s=$$0}' | \
+--
+2.37.3
+
diff --git a/0040-PCI-simplify-and-thus-correct-pci_get_pdev-_by_domai.patch b/0040-PCI-simplify-and-thus-correct-pci_get_pdev-_by_domai.patch
new file mode 100644
index 0000000..c29e5ac
--- /dev/null
+++ b/0040-PCI-simplify-and-thus-correct-pci_get_pdev-_by_domai.patch
@@ -0,0 +1,153 @@
+From 9acedc3c58c31930737edbe212f2ccf437a0b757 Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Mon, 15 Aug 2022 15:44:23 +0200
+Subject: [PATCH 40/67] PCI: simplify (and thus correct)
+ pci_get_pdev{,_by_domain}()
+
+The last "wildcard" use of either function went away with f591755823a7
+("IOMMU/PCI: don't let domain cleanup continue when device de-assignment
+failed"). Don't allow them to be called this way anymore. Besides
+simplifying the code this also fixes two bugs:
+
+1) When seg != -1, the outer loops should have been terminated after the
+ first iteration, or else a device with the same BDF but on another
+ segment could be found / returned.
+
+Reported-by: Rahul Singh <rahul.singh@arm.com>
+
+2) When seg == -1 calling get_pseg() is bogus. The function (taking a
+ u16) would look for segment 0xffff, which might exist. If it exists,
+ we might then find / return a wrong device.
+
+In pci_get_pdev_by_domain() also switch from using the per-segment list
+to using the per-domain one, with the exception of the hardware domain
+(see the code comment there).
+
+While there also constify "pseg" and drop "pdev"'s already previously
+unnecessary initializer.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Rahul Singh <rahul.singh@arm.com>
+Tested-by: Rahul Singh <rahul.singh@arm.com>
+master commit: 8cf6e0738906fc269af40135ed82a07815dd3b9c
+master date: 2022-08-12 08:34:33 +0200
+---
+ xen/drivers/passthrough/pci.c | 61 +++++++++++++++--------------------
+ xen/include/xen/pci.h | 6 ++--
+ 2 files changed, 29 insertions(+), 38 deletions(-)
+
+diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
+index bbacbe41dac4..9b81b941c8bb 100644
+--- a/xen/drivers/passthrough/pci.c
++++ b/xen/drivers/passthrough/pci.c
+@@ -528,30 +528,19 @@ int __init pci_ro_device(int seg, int bus, int devfn)
+ return 0;
+ }
+
+-struct pci_dev *pci_get_pdev(int seg, int bus, int devfn)
++struct pci_dev *pci_get_pdev(uint16_t seg, uint8_t bus, uint8_t devfn)
+ {
+- struct pci_seg *pseg = get_pseg(seg);
+- struct pci_dev *pdev = NULL;
++ const struct pci_seg *pseg = get_pseg(seg);
++ struct pci_dev *pdev;
+
+ ASSERT(pcidevs_locked());
+- ASSERT(seg != -1 || bus == -1);
+- ASSERT(bus != -1 || devfn == -1);
+
+ if ( !pseg )
+- {
+- if ( seg == -1 )
+- radix_tree_gang_lookup(&pci_segments, (void **)&pseg, 0, 1);
+- if ( !pseg )
+- return NULL;
+- }
++ return NULL;
+
+- do {
+- list_for_each_entry ( pdev, &pseg->alldevs_list, alldevs_list )
+- if ( (pdev->bus == bus || bus == -1) &&
+- (pdev->devfn == devfn || devfn == -1) )
+- return pdev;
+- } while ( radix_tree_gang_lookup(&pci_segments, (void **)&pseg,
+- pseg->nr + 1, 1) );
++ list_for_each_entry ( pdev, &pseg->alldevs_list, alldevs_list )
++ if ( pdev->bus == bus && pdev->devfn == devfn )
++ return pdev;
+
+ return NULL;
+ }
+@@ -577,31 +566,33 @@ struct pci_dev *pci_get_real_pdev(int seg, int bus, int devfn)
+ return pdev;
+ }
+
+-struct pci_dev *pci_get_pdev_by_domain(const struct domain *d, int seg,
+- int bus, int devfn)
++struct pci_dev *pci_get_pdev_by_domain(const struct domain *d, uint16_t seg,
++ uint8_t bus, uint8_t devfn)
+ {
+- struct pci_seg *pseg = get_pseg(seg);
+- struct pci_dev *pdev = NULL;
++ struct pci_dev *pdev;
+
+- ASSERT(seg != -1 || bus == -1);
+- ASSERT(bus != -1 || devfn == -1);
+-
+- if ( !pseg )
++ /*
++ * The hardware domain owns the majority of the devices in the system.
++ * When there are multiple segments, traversing the per-segment list is
++ * likely going to be faster, whereas for a single segment the difference
++ * shouldn't be that large.
++ */
++ if ( is_hardware_domain(d) )
+ {
+- if ( seg == -1 )
+- radix_tree_gang_lookup(&pci_segments, (void **)&pseg, 0, 1);
++ const struct pci_seg *pseg = get_pseg(seg);
++
+ if ( !pseg )
+ return NULL;
+- }
+
+- do {
+ list_for_each_entry ( pdev, &pseg->alldevs_list, alldevs_list )
+- if ( (pdev->bus == bus || bus == -1) &&
+- (pdev->devfn == devfn || devfn == -1) &&
+- (pdev->domain == d) )
++ if ( pdev->bus == bus && pdev->devfn == devfn &&
++ pdev->domain == d )
++ return pdev;
++ }
++ else
++ list_for_each_entry ( pdev, &d->pdev_list, domain_list )
++ if ( pdev->bus == bus && pdev->devfn == devfn )
+ return pdev;
+- } while ( radix_tree_gang_lookup(&pci_segments, (void **)&pseg,
+- pseg->nr + 1, 1) );
+
+ return NULL;
+ }
+diff --git a/xen/include/xen/pci.h b/xen/include/xen/pci.h
+index 8e3d4d94543a..cd238ae852b0 100644
+--- a/xen/include/xen/pci.h
++++ b/xen/include/xen/pci.h
+@@ -166,10 +166,10 @@ int pci_add_device(u16 seg, u8 bus, u8 devfn,
+ int pci_remove_device(u16 seg, u8 bus, u8 devfn);
+ int pci_ro_device(int seg, int bus, int devfn);
+ int pci_hide_device(unsigned int seg, unsigned int bus, unsigned int devfn);
+-struct pci_dev *pci_get_pdev(int seg, int bus, int devfn);
++struct pci_dev *pci_get_pdev(uint16_t seg, uint8_t bus, uint8_t devfn);
+ struct pci_dev *pci_get_real_pdev(int seg, int bus, int devfn);
+-struct pci_dev *pci_get_pdev_by_domain(const struct domain *, int seg,
+- int bus, int devfn);
++struct pci_dev *pci_get_pdev_by_domain(const struct domain *, uint16_t seg,
++ uint8_t bus, uint8_t devfn);
+ void pci_check_disable_device(u16 seg, u8 bus, u8 devfn);
+
+ uint8_t pci_conf_read8(pci_sbdf_t sbdf, unsigned int reg);
+--
+2.37.3
+
diff --git a/0041-xen-arm-p2m-Prevent-adding-mapping-when-domain-is-dy.patch b/0041-xen-arm-p2m-Prevent-adding-mapping-when-domain-is-dy.patch
new file mode 100644
index 0000000..3fa0e43
--- /dev/null
+++ b/0041-xen-arm-p2m-Prevent-adding-mapping-when-domain-is-dy.patch
@@ -0,0 +1,62 @@
+From 09fc590c15773c2471946a78740c6b02e8c34a45 Mon Sep 17 00:00:00 2001
+From: Julien Grall <jgrall@amazon.com>
+Date: Tue, 11 Oct 2022 15:05:53 +0200
+Subject: [PATCH 41/67] xen/arm: p2m: Prevent adding mapping when domain is
+ dying
+
+During the domain destroy process, the domain will still be accessible
+until it is fully destroyed. So does the P2M because we don't bail
+out early if is_dying is non-zero. If a domain has permission to
+modify the other domain's P2M (i.e. dom0, or a stubdomain), then
+foreign mapping can be added past relinquish_p2m_mapping().
+
+Therefore, we need to prevent mapping to be added when the domain
+is dying. This commit prevents such adding of mapping by adding the
+d->is_dying check to p2m_set_entry(). Also this commit enhances the
+check in relinquish_p2m_mapping() to make sure that no mappings can
+be added in the P2M after the P2M lock is released.
+
+This is part of CVE-2022-33746 / XSA-410.
+
+Signed-off-by: Julien Grall <jgrall@amazon.com>
+Signed-off-by: Henry Wang <Henry.Wang@arm.com>
+Tested-by: Henry Wang <Henry.Wang@arm.com>
+Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
+master commit: 3ebe773293e3b945460a3d6f54f3b91915397bab
+master date: 2022-10-11 14:20:18 +0200
+---
+ xen/arch/arm/p2m.c | 11 +++++++++++
+ 1 file changed, 11 insertions(+)
+
+diff --git a/xen/arch/arm/p2m.c b/xen/arch/arm/p2m.c
+index 2ddd06801a82..8398251c518b 100644
+--- a/xen/arch/arm/p2m.c
++++ b/xen/arch/arm/p2m.c
+@@ -1093,6 +1093,15 @@ int p2m_set_entry(struct p2m_domain *p2m,
+ {
+ int rc = 0;
+
++ /*
++ * Any reference taken by the P2M mappings (e.g. foreign mapping) will
++ * be dropped in relinquish_p2m_mapping(). As the P2M will still
++ * be accessible after, we need to prevent mapping to be added when the
++ * domain is dying.
++ */
++ if ( unlikely(p2m->domain->is_dying) )
++ return -ENOMEM;
++
+ while ( nr )
+ {
+ unsigned long mask;
+@@ -1613,6 +1622,8 @@ int relinquish_p2m_mapping(struct domain *d)
+ unsigned int order;
+ gfn_t start, end;
+
++ BUG_ON(!d->is_dying);
++ /* No mappings can be added in the P2M after the P2M lock is released. */
+ p2m_write_lock(p2m);
+
+ start = p2m->lowest_mapped_gfn;
+--
+2.37.3
+
diff --git a/0042-xen-arm-p2m-Handle-preemption-when-freeing-intermedi.patch b/0042-xen-arm-p2m-Handle-preemption-when-freeing-intermedi.patch
new file mode 100644
index 0000000..8217a06
--- /dev/null
+++ b/0042-xen-arm-p2m-Handle-preemption-when-freeing-intermedi.patch
@@ -0,0 +1,167 @@
+From 0d805f9fba4bc155d15047685024f7d842e925e4 Mon Sep 17 00:00:00 2001
+From: Julien Grall <jgrall@amazon.com>
+Date: Tue, 11 Oct 2022 15:06:36 +0200
+Subject: [PATCH 42/67] xen/arm: p2m: Handle preemption when freeing
+ intermediate page tables
+
+At the moment the P2M page tables will be freed when the domain structure
+is freed without any preemption. As the P2M is quite large, iterating
+through this may take more time than it is reasonable without intermediate
+preemption (to run softirqs and perhaps scheduler).
+
+Split p2m_teardown() in two parts: one preemptible and called when
+relinquishing the resources, the other one non-preemptible and called
+when freeing the domain structure.
+
+As we are now freeing the P2M pages early, we also need to prevent
+further allocation if someone call p2m_set_entry() past p2m_teardown()
+(I wasn't able to prove this will never happen). This is done by
+the checking domain->is_dying from previous patch in p2m_set_entry().
+
+Similarly, we want to make sure that no-one can accessed the free
+pages. Therefore the root is cleared before freeing pages.
+
+This is part of CVE-2022-33746 / XSA-410.
+
+Signed-off-by: Julien Grall <jgrall@amazon.com>
+Signed-off-by: Henry Wang <Henry.Wang@arm.com>
+Tested-by: Henry Wang <Henry.Wang@arm.com>
+Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
+master commit: 3202084566bba0ef0c45caf8c24302f83d92f9c8
+master date: 2022-10-11 14:20:56 +0200
+---
+ xen/arch/arm/domain.c | 10 +++++++--
+ xen/arch/arm/p2m.c | 47 ++++++++++++++++++++++++++++++++++++---
+ xen/include/asm-arm/p2m.h | 13 +++++++++--
+ 3 files changed, 63 insertions(+), 7 deletions(-)
+
+diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c
+index 5eaf4c718ec3..223ec9694df1 100644
+--- a/xen/arch/arm/domain.c
++++ b/xen/arch/arm/domain.c
+@@ -779,10 +779,10 @@ fail:
+ void arch_domain_destroy(struct domain *d)
+ {
+ /* IOMMU page table is shared with P2M, always call
+- * iommu_domain_destroy() before p2m_teardown().
++ * iommu_domain_destroy() before p2m_final_teardown().
+ */
+ iommu_domain_destroy(d);
+- p2m_teardown(d);
++ p2m_final_teardown(d);
+ domain_vgic_free(d);
+ domain_vuart_free(d);
+ free_xenheap_page(d->shared_info);
+@@ -984,6 +984,7 @@ enum {
+ PROG_xen,
+ PROG_page,
+ PROG_mapping,
++ PROG_p2m,
+ PROG_done,
+ };
+
+@@ -1038,6 +1039,11 @@ int domain_relinquish_resources(struct domain *d)
+ if ( ret )
+ return ret;
+
++ PROGRESS(p2m):
++ ret = p2m_teardown(d);
++ if ( ret )
++ return ret;
++
+ PROGRESS(done):
+ break;
+
+diff --git a/xen/arch/arm/p2m.c b/xen/arch/arm/p2m.c
+index 8398251c518b..4ad3e0606e9c 100644
+--- a/xen/arch/arm/p2m.c
++++ b/xen/arch/arm/p2m.c
+@@ -1530,17 +1530,58 @@ static void p2m_free_vmid(struct domain *d)
+ spin_unlock(&vmid_alloc_lock);
+ }
+
+-void p2m_teardown(struct domain *d)
++int p2m_teardown(struct domain *d)
+ {
+ struct p2m_domain *p2m = p2m_get_hostp2m(d);
++ unsigned long count = 0;
+ struct page_info *pg;
++ unsigned int i;
++ int rc = 0;
++
++ p2m_write_lock(p2m);
++
++ /*
++ * We are about to free the intermediate page-tables, so clear the
++ * root to prevent any walk to use them.
++ */
++ for ( i = 0; i < P2M_ROOT_PAGES; i++ )
++ clear_and_clean_page(p2m->root + i);
++
++ /*
++ * The domain will not be scheduled anymore, so in theory we should
++ * not need to flush the TLBs. Do it for safety purpose.
++ *
++ * Note that all the devices have already been de-assigned. So we don't
++ * need to flush the IOMMU TLB here.
++ */
++ p2m_force_tlb_flush_sync(p2m);
++
++ while ( (pg = page_list_remove_head(&p2m->pages)) )
++ {
++ free_domheap_page(pg);
++ count++;
++ /* Arbitrarily preempt every 512 iterations */
++ if ( !(count % 512) && hypercall_preempt_check() )
++ {
++ rc = -ERESTART;
++ break;
++ }
++ }
++
++ p2m_write_unlock(p2m);
++
++ return rc;
++}
++
++void p2m_final_teardown(struct domain *d)
++{
++ struct p2m_domain *p2m = p2m_get_hostp2m(d);
+
+ /* p2m not actually initialized */
+ if ( !p2m->domain )
+ return;
+
+- while ( (pg = page_list_remove_head(&p2m->pages)) )
+- free_domheap_page(pg);
++ ASSERT(page_list_empty(&p2m->pages));
+
+ if ( p2m->root )
+ free_domheap_pages(p2m->root, P2M_ROOT_ORDER);
+diff --git a/xen/include/asm-arm/p2m.h b/xen/include/asm-arm/p2m.h
+index 6a2108398fd7..3a2d51b35d71 100644
+--- a/xen/include/asm-arm/p2m.h
++++ b/xen/include/asm-arm/p2m.h
+@@ -192,8 +192,17 @@ void setup_virt_paging(void);
+ /* Init the datastructures for later use by the p2m code */
+ int p2m_init(struct domain *d);
+
+-/* Return all the p2m resources to Xen. */
+-void p2m_teardown(struct domain *d);
++/*
++ * The P2M resources are freed in two parts:
++ * - p2m_teardown() will be called when relinquish the resources. It
++ * will free large resources (e.g. intermediate page-tables) that
++ * requires preemption.
++ * - p2m_final_teardown() will be called when domain struct is been
++ * freed. This *cannot* be preempted and therefore one small
++ * resources should be freed here.
++ */
++int p2m_teardown(struct domain *d);
++void p2m_final_teardown(struct domain *d);
+
+ /*
+ * Remove mapping refcount on each mapping page in the p2m
+--
+2.37.3
+
diff --git a/0043-x86-p2m-add-option-to-skip-root-pagetable-removal-in.patch b/0043-x86-p2m-add-option-to-skip-root-pagetable-removal-in.patch
new file mode 100644
index 0000000..f3f7e3a
--- /dev/null
+++ b/0043-x86-p2m-add-option-to-skip-root-pagetable-removal-in.patch
@@ -0,0 +1,138 @@
+From 0f3eab90f327210d91e8e31a769376f286e8819a Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
+Date: Tue, 11 Oct 2022 15:07:25 +0200
+Subject: [PATCH 43/67] x86/p2m: add option to skip root pagetable removal in
+ p2m_teardown()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Add a new parameter to p2m_teardown() in order to select whether the
+root page table should also be freed. Note that all users are
+adjusted to pass the parameter to remove the root page tables, so
+behavior is not modified.
+
+No functional change intended.
+
+This is part of CVE-2022-33746 / XSA-410.
+
+Suggested-by: Julien Grall <julien@xen.org>
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+Acked-by: Tim Deegan <tim@xen.org>
+master commit: 1df52a270225527ae27bfa2fc40347bf93b78357
+master date: 2022-10-11 14:21:23 +0200
+---
+ xen/arch/x86/mm/hap/hap.c | 6 +++---
+ xen/arch/x86/mm/p2m.c | 20 ++++++++++++++++----
+ xen/arch/x86/mm/shadow/common.c | 4 ++--
+ xen/include/asm-x86/p2m.h | 2 +-
+ 4 files changed, 22 insertions(+), 10 deletions(-)
+
+diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c
+index 47a7487fa7a3..a8f5a19da917 100644
+--- a/xen/arch/x86/mm/hap/hap.c
++++ b/xen/arch/x86/mm/hap/hap.c
+@@ -541,18 +541,18 @@ void hap_final_teardown(struct domain *d)
+ }
+
+ for ( i = 0; i < MAX_ALTP2M; i++ )
+- p2m_teardown(d->arch.altp2m_p2m[i]);
++ p2m_teardown(d->arch.altp2m_p2m[i], true);
+ }
+
+ /* Destroy nestedp2m's first */
+ for (i = 0; i < MAX_NESTEDP2M; i++) {
+- p2m_teardown(d->arch.nested_p2m[i]);
++ p2m_teardown(d->arch.nested_p2m[i], true);
+ }
+
+ if ( d->arch.paging.hap.total_pages != 0 )
+ hap_teardown(d, NULL);
+
+- p2m_teardown(p2m_get_hostp2m(d));
++ p2m_teardown(p2m_get_hostp2m(d), true);
+ /* Free any memory that the p2m teardown released */
+ paging_lock(d);
+ hap_set_allocation(d, 0, NULL);
+diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
+index 85681dee2623..8ba73082c1bf 100644
+--- a/xen/arch/x86/mm/p2m.c
++++ b/xen/arch/x86/mm/p2m.c
+@@ -741,11 +741,11 @@ int p2m_alloc_table(struct p2m_domain *p2m)
+ * hvm fixme: when adding support for pvh non-hardware domains, this path must
+ * cleanup any foreign p2m types (release refcnts on them).
+ */
+-void p2m_teardown(struct p2m_domain *p2m)
++void p2m_teardown(struct p2m_domain *p2m, bool remove_root)
+ /* Return all the p2m pages to Xen.
+ * We know we don't have any extra mappings to these pages */
+ {
+- struct page_info *pg;
++ struct page_info *pg, *root_pg = NULL;
+ struct domain *d;
+
+ if (p2m == NULL)
+@@ -755,10 +755,22 @@ void p2m_teardown(struct p2m_domain *p2m)
+
+ p2m_lock(p2m);
+ ASSERT(atomic_read(&d->shr_pages) == 0);
+- p2m->phys_table = pagetable_null();
++
++ if ( remove_root )
++ p2m->phys_table = pagetable_null();
++ else if ( !pagetable_is_null(p2m->phys_table) )
++ {
++ root_pg = pagetable_get_page(p2m->phys_table);
++ clear_domain_page(pagetable_get_mfn(p2m->phys_table));
++ }
+
+ while ( (pg = page_list_remove_head(&p2m->pages)) )
+- d->arch.paging.free_page(d, pg);
++ if ( pg != root_pg )
++ d->arch.paging.free_page(d, pg);
++
++ if ( root_pg )
++ page_list_add(root_pg, &p2m->pages);
++
+ p2m_unlock(p2m);
+ }
+
+diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c
+index 4a8882430b3f..abe6d4334382 100644
+--- a/xen/arch/x86/mm/shadow/common.c
++++ b/xen/arch/x86/mm/shadow/common.c
+@@ -2768,7 +2768,7 @@ int shadow_enable(struct domain *d, u32 mode)
+ paging_unlock(d);
+ out_unlocked:
+ if ( rv != 0 && !pagetable_is_null(p2m_get_pagetable(p2m)) )
+- p2m_teardown(p2m);
++ p2m_teardown(p2m, true);
+ if ( rv != 0 && pg != NULL )
+ {
+ pg->count_info &= ~PGC_count_mask;
+@@ -2933,7 +2933,7 @@ void shadow_final_teardown(struct domain *d)
+ shadow_teardown(d, NULL);
+
+ /* It is now safe to pull down the p2m map. */
+- p2m_teardown(p2m_get_hostp2m(d));
++ p2m_teardown(p2m_get_hostp2m(d), true);
+ /* Free any shadow memory that the p2m teardown released */
+ paging_lock(d);
+ shadow_set_allocation(d, 0, NULL);
+diff --git a/xen/include/asm-x86/p2m.h b/xen/include/asm-x86/p2m.h
+index 46e8b94a49df..46eb51d44cf5 100644
+--- a/xen/include/asm-x86/p2m.h
++++ b/xen/include/asm-x86/p2m.h
+@@ -619,7 +619,7 @@ int p2m_init(struct domain *d);
+ int p2m_alloc_table(struct p2m_domain *p2m);
+
+ /* Return all the p2m resources to Xen. */
+-void p2m_teardown(struct p2m_domain *p2m);
++void p2m_teardown(struct p2m_domain *p2m, bool remove_root);
+ void p2m_final_teardown(struct domain *d);
+
+ /* Add a page to a domain's p2m table */
+--
+2.37.3
+
diff --git a/0044-x86-HAP-adjust-monitor-table-related-error-handling.patch b/0044-x86-HAP-adjust-monitor-table-related-error-handling.patch
new file mode 100644
index 0000000..39db626
--- /dev/null
+++ b/0044-x86-HAP-adjust-monitor-table-related-error-handling.patch
@@ -0,0 +1,77 @@
+From d24a10a91d46a56e1d406239643ec651a31033d4 Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Tue, 11 Oct 2022 15:07:42 +0200
+Subject: [PATCH 44/67] x86/HAP: adjust monitor table related error handling
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+hap_make_monitor_table() will return INVALID_MFN if it encounters an
+error condition, but hap_update_paging_modes() wasn’t handling this
+value, resulting in an inappropriate value being stored in
+monitor_table. This would subsequently misguide at least
+hap_vcpu_teardown(). Avoid this by bailing early.
+
+Further, when a domain has/was already crashed or (perhaps less
+important as there's no such path known to lead here) is already dying,
+avoid calling domain_crash() on it again - that's at best confusing.
+
+This is part of CVE-2022-33746 / XSA-410.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
+master commit: 5b44a61180f4f2e4f490a28400c884dd357ff45d
+master date: 2022-10-11 14:21:56 +0200
+---
+ xen/arch/x86/mm/hap/hap.c | 14 ++++++++++++--
+ 1 file changed, 12 insertions(+), 2 deletions(-)
+
+diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c
+index a8f5a19da917..d75dc2b9ed3d 100644
+--- a/xen/arch/x86/mm/hap/hap.c
++++ b/xen/arch/x86/mm/hap/hap.c
+@@ -39,6 +39,7 @@
+ #include <asm/domain.h>
+ #include <xen/numa.h>
+ #include <asm/hvm/nestedhvm.h>
++#include <public/sched.h>
+
+ #include "private.h"
+
+@@ -405,8 +406,13 @@ static mfn_t hap_make_monitor_table(struct vcpu *v)
+ return m4mfn;
+
+ oom:
+- printk(XENLOG_G_ERR "out of memory building monitor pagetable\n");
+- domain_crash(d);
++ if ( !d->is_dying &&
++ (!d->is_shutting_down || d->shutdown_code != SHUTDOWN_crash) )
++ {
++ printk(XENLOG_G_ERR "%pd: out of memory building monitor pagetable\n",
++ d);
++ domain_crash(d);
++ }
+ return INVALID_MFN;
+ }
+
+@@ -766,6 +772,9 @@ static void hap_update_paging_modes(struct vcpu *v)
+ if ( pagetable_is_null(v->arch.hvm.monitor_table) )
+ {
+ mfn_t mmfn = hap_make_monitor_table(v);
++
++ if ( mfn_eq(mmfn, INVALID_MFN) )
++ goto unlock;
+ v->arch.hvm.monitor_table = pagetable_from_mfn(mmfn);
+ make_cr3(v, mmfn);
+ hvm_update_host_cr3(v);
+@@ -774,6 +783,7 @@ static void hap_update_paging_modes(struct vcpu *v)
+ /* CR3 is effectively updated by a mode change. Flush ASIDs, etc. */
+ hap_update_cr3(v, 0, false);
+
++ unlock:
+ paging_unlock(d);
+ put_gfn(d, cr3_gfn);
+ }
+--
+2.37.3
+
diff --git a/0045-x86-shadow-tolerate-failure-of-sh_set_toplevel_shado.patch b/0045-x86-shadow-tolerate-failure-of-sh_set_toplevel_shado.patch
new file mode 100644
index 0000000..7cf356d
--- /dev/null
+++ b/0045-x86-shadow-tolerate-failure-of-sh_set_toplevel_shado.patch
@@ -0,0 +1,76 @@
+From 95f6d555ec84383f7daaf3374f65bec5ff4351f5 Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Tue, 11 Oct 2022 15:07:57 +0200
+Subject: [PATCH 45/67] x86/shadow: tolerate failure of
+ sh_set_toplevel_shadow()
+
+Subsequently sh_set_toplevel_shadow() will be adjusted to install a
+blank entry in case prealloc fails. There are, in fact, pre-existing
+error paths which would put in place a blank entry. The 4- and 2-level
+code in sh_update_cr3(), however, assume the top level entry to be
+valid.
+
+Hence bail from the function in the unlikely event that it's not. Note
+that 3-level logic works differently: In particular a guest is free to
+supply a PDPTR pointing at 4 non-present (or otherwise deemed invalid)
+entries. The guest will crash, but we already cope with that.
+
+Really mfn_valid() is likely wrong to use in sh_set_toplevel_shadow(),
+and it should instead be !mfn_eq(gmfn, INVALID_MFN). Avoid such a change
+in security context, but add a respective assertion.
+
+This is part of CVE-2022-33746 / XSA-410.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Acked-by: Tim Deegan <tim@xen.org>
+Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
+master commit: eac000978c1feb5a9ee3236ab0c0da9a477e5336
+master date: 2022-10-11 14:22:24 +0200
+---
+ xen/arch/x86/mm/shadow/common.c | 1 +
+ xen/arch/x86/mm/shadow/multi.c | 10 ++++++++++
+ 2 files changed, 11 insertions(+)
+
+diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c
+index abe6d4334382..0ab2ac6b7a3c 100644
+--- a/xen/arch/x86/mm/shadow/common.c
++++ b/xen/arch/x86/mm/shadow/common.c
+@@ -2583,6 +2583,7 @@ void sh_set_toplevel_shadow(struct vcpu *v,
+ /* Now figure out the new contents: is this a valid guest MFN? */
+ if ( !mfn_valid(gmfn) )
+ {
++ ASSERT(mfn_eq(gmfn, INVALID_MFN));
+ new_entry = pagetable_null();
+ goto install_new_entry;
+ }
+diff --git a/xen/arch/x86/mm/shadow/multi.c b/xen/arch/x86/mm/shadow/multi.c
+index 9b43cb116c47..7e0494cf7faa 100644
+--- a/xen/arch/x86/mm/shadow/multi.c
++++ b/xen/arch/x86/mm/shadow/multi.c
+@@ -3697,6 +3697,11 @@ sh_update_cr3(struct vcpu *v, int do_locking, bool noflush)
+ if ( sh_remove_write_access(d, gmfn, 4, 0) != 0 )
+ guest_flush_tlb_mask(d, d->dirty_cpumask);
+ sh_set_toplevel_shadow(v, 0, gmfn, SH_type_l4_shadow, sh_make_shadow);
++ if ( unlikely(pagetable_is_null(v->arch.paging.shadow.shadow_table[0])) )
++ {
++ ASSERT(d->is_dying || d->is_shutting_down);
++ return;
++ }
+ if ( !shadow_mode_external(d) && !is_pv_32bit_domain(d) )
+ {
+ mfn_t smfn = pagetable_get_mfn(v->arch.paging.shadow.shadow_table[0]);
+@@ -3757,6 +3762,11 @@ sh_update_cr3(struct vcpu *v, int do_locking, bool noflush)
+ if ( sh_remove_write_access(d, gmfn, 2, 0) != 0 )
+ guest_flush_tlb_mask(d, d->dirty_cpumask);
+ sh_set_toplevel_shadow(v, 0, gmfn, SH_type_l2_shadow, sh_make_shadow);
++ if ( unlikely(pagetable_is_null(v->arch.paging.shadow.shadow_table[0])) )
++ {
++ ASSERT(d->is_dying || d->is_shutting_down);
++ return;
++ }
+ #else
+ #error This should never happen
+ #endif
+--
+2.37.3
+
diff --git a/0046-x86-shadow-tolerate-failure-in-shadow_prealloc.patch b/0046-x86-shadow-tolerate-failure-in-shadow_prealloc.patch
new file mode 100644
index 0000000..62be72a
--- /dev/null
+++ b/0046-x86-shadow-tolerate-failure-in-shadow_prealloc.patch
@@ -0,0 +1,279 @@
+From 1e26afa846fb9a00b9155280eeae3b8cb8375dd6 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
+Date: Tue, 11 Oct 2022 15:08:14 +0200
+Subject: [PATCH 46/67] x86/shadow: tolerate failure in shadow_prealloc()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Prevent _shadow_prealloc() from calling BUG() when unable to fulfill
+the pre-allocation and instead return true/false. Modify
+shadow_prealloc() to crash the domain on allocation failure (if the
+domain is not already dying), as shadow cannot operate normally after
+that. Modify callers to also gracefully handle {_,}shadow_prealloc()
+failing to fulfill the request.
+
+Note this in turn requires adjusting the callers of
+sh_make_monitor_table() also to handle it returning INVALID_MFN.
+sh_update_paging_modes() is also modified to add additional error
+paths in case of allocation failure, some of those will return with
+null monitor page tables (and the domain likely crashed). This is no
+different that current error paths, but the newly introduced ones are
+more likely to trigger.
+
+The now added failure points in sh_update_paging_modes() also require
+that on some error return paths the previous structures are cleared,
+and thus monitor table is null.
+
+While there adjust the 'type' parameter type of shadow_prealloc() to
+unsigned int rather than u32.
+
+This is part of CVE-2022-33746 / XSA-410.
+
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Acked-by: Tim Deegan <tim@xen.org>
+master commit: b7f93c6afb12b6061e2d19de2f39ea09b569ac68
+master date: 2022-10-11 14:22:53 +0200
+---
+ xen/arch/x86/mm/shadow/common.c | 69 ++++++++++++++++++++++++--------
+ xen/arch/x86/mm/shadow/hvm.c | 4 +-
+ xen/arch/x86/mm/shadow/multi.c | 11 +++--
+ xen/arch/x86/mm/shadow/private.h | 3 +-
+ 4 files changed, 66 insertions(+), 21 deletions(-)
+
+diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c
+index 0ab2ac6b7a3c..fc4f7f78ce43 100644
+--- a/xen/arch/x86/mm/shadow/common.c
++++ b/xen/arch/x86/mm/shadow/common.c
+@@ -36,6 +36,7 @@
+ #include <asm/flushtlb.h>
+ #include <asm/shadow.h>
+ #include <xen/numa.h>
++#include <public/sched.h>
+ #include "private.h"
+
+ DEFINE_PER_CPU(uint32_t,trace_shadow_path_flags);
+@@ -927,14 +928,15 @@ static inline void trace_shadow_prealloc_unpin(struct domain *d, mfn_t smfn)
+
+ /* Make sure there are at least count order-sized pages
+ * available in the shadow page pool. */
+-static void _shadow_prealloc(struct domain *d, unsigned int pages)
++static bool __must_check _shadow_prealloc(struct domain *d, unsigned int pages)
+ {
+ struct vcpu *v;
+ struct page_info *sp, *t;
+ mfn_t smfn;
+ int i;
+
+- if ( d->arch.paging.shadow.free_pages >= pages ) return;
++ if ( d->arch.paging.shadow.free_pages >= pages )
++ return true;
+
+ /* Shouldn't have enabled shadows if we've no vcpus. */
+ ASSERT(d->vcpu && d->vcpu[0]);
+@@ -950,7 +952,8 @@ static void _shadow_prealloc(struct domain *d, unsigned int pages)
+ sh_unpin(d, smfn);
+
+ /* See if that freed up enough space */
+- if ( d->arch.paging.shadow.free_pages >= pages ) return;
++ if ( d->arch.paging.shadow.free_pages >= pages )
++ return true;
+ }
+
+ /* Stage two: all shadow pages are in use in hierarchies that are
+@@ -973,7 +976,7 @@ static void _shadow_prealloc(struct domain *d, unsigned int pages)
+ if ( d->arch.paging.shadow.free_pages >= pages )
+ {
+ guest_flush_tlb_mask(d, d->dirty_cpumask);
+- return;
++ return true;
+ }
+ }
+ }
+@@ -986,7 +989,12 @@ static void _shadow_prealloc(struct domain *d, unsigned int pages)
+ d->arch.paging.shadow.total_pages,
+ d->arch.paging.shadow.free_pages,
+ d->arch.paging.shadow.p2m_pages);
+- BUG();
++
++ ASSERT(d->is_dying);
++
++ guest_flush_tlb_mask(d, d->dirty_cpumask);
++
++ return false;
+ }
+
+ /* Make sure there are at least count pages of the order according to
+@@ -994,9 +1002,19 @@ static void _shadow_prealloc(struct domain *d, unsigned int pages)
+ * This must be called before any calls to shadow_alloc(). Since this
+ * will free existing shadows to make room, it must be called early enough
+ * to avoid freeing shadows that the caller is currently working on. */
+-void shadow_prealloc(struct domain *d, u32 type, unsigned int count)
++bool shadow_prealloc(struct domain *d, unsigned int type, unsigned int count)
+ {
+- return _shadow_prealloc(d, shadow_size(type) * count);
++ bool ret = _shadow_prealloc(d, shadow_size(type) * count);
++
++ if ( !ret && !d->is_dying &&
++ (!d->is_shutting_down || d->shutdown_code != SHUTDOWN_crash) )
++ /*
++ * Failing to allocate memory required for shadow usage can only result in
++ * a domain crash, do it here rather that relying on every caller to do it.
++ */
++ domain_crash(d);
++
++ return ret;
+ }
+
+ /* Deliberately free all the memory we can: this will tear down all of
+@@ -1215,7 +1233,7 @@ void shadow_free(struct domain *d, mfn_t smfn)
+ static struct page_info *
+ shadow_alloc_p2m_page(struct domain *d)
+ {
+- struct page_info *pg;
++ struct page_info *pg = NULL;
+
+ /* This is called both from the p2m code (which never holds the
+ * paging lock) and the log-dirty code (which always does). */
+@@ -1233,16 +1251,18 @@ shadow_alloc_p2m_page(struct domain *d)
+ d->arch.paging.shadow.p2m_pages,
+ shadow_min_acceptable_pages(d));
+ }
+- paging_unlock(d);
+- return NULL;
++ goto out;
+ }
+
+- shadow_prealloc(d, SH_type_p2m_table, 1);
++ if ( !shadow_prealloc(d, SH_type_p2m_table, 1) )
++ goto out;
++
+ pg = mfn_to_page(shadow_alloc(d, SH_type_p2m_table, 0));
+ d->arch.paging.shadow.p2m_pages++;
+ d->arch.paging.shadow.total_pages--;
+ ASSERT(!page_get_owner(pg) && !(pg->count_info & PGC_count_mask));
+
++ out:
+ paging_unlock(d);
+
+ return pg;
+@@ -1333,7 +1353,9 @@ int shadow_set_allocation(struct domain *d, unsigned int pages, bool *preempted)
+ else if ( d->arch.paging.shadow.total_pages > pages )
+ {
+ /* Need to return memory to domheap */
+- _shadow_prealloc(d, 1);
++ if ( !_shadow_prealloc(d, 1) )
++ return -ENOMEM;
++
+ sp = page_list_remove_head(&d->arch.paging.shadow.freelist);
+ ASSERT(sp);
+ /*
+@@ -2401,12 +2423,13 @@ static void sh_update_paging_modes(struct vcpu *v)
+ if ( mfn_eq(v->arch.paging.shadow.oos_snapshot[0], INVALID_MFN) )
+ {
+ int i;
++
++ if ( !shadow_prealloc(d, SH_type_oos_snapshot, SHADOW_OOS_PAGES) )
++ return;
++
+ for(i = 0; i < SHADOW_OOS_PAGES; i++)
+- {
+- shadow_prealloc(d, SH_type_oos_snapshot, 1);
+ v->arch.paging.shadow.oos_snapshot[i] =
+ shadow_alloc(d, SH_type_oos_snapshot, 0);
+- }
+ }
+ #endif /* OOS */
+
+@@ -2470,6 +2493,9 @@ static void sh_update_paging_modes(struct vcpu *v)
+ mfn_t mmfn = sh_make_monitor_table(
+ v, v->arch.paging.mode->shadow.shadow_levels);
+
++ if ( mfn_eq(mmfn, INVALID_MFN) )
++ return;
++
+ v->arch.hvm.monitor_table = pagetable_from_mfn(mmfn);
+ make_cr3(v, mmfn);
+ hvm_update_host_cr3(v);
+@@ -2508,6 +2534,12 @@ static void sh_update_paging_modes(struct vcpu *v)
+ v->arch.hvm.monitor_table = pagetable_null();
+ new_mfn = sh_make_monitor_table(
+ v, v->arch.paging.mode->shadow.shadow_levels);
++ if ( mfn_eq(new_mfn, INVALID_MFN) )
++ {
++ sh_destroy_monitor_table(v, old_mfn,
++ old_mode->shadow.shadow_levels);
++ return;
++ }
+ v->arch.hvm.monitor_table = pagetable_from_mfn(new_mfn);
+ SHADOW_PRINTK("new monitor table %"PRI_mfn "\n",
+ mfn_x(new_mfn));
+@@ -2593,7 +2625,12 @@ void sh_set_toplevel_shadow(struct vcpu *v,
+ if ( !mfn_valid(smfn) )
+ {
+ /* Make sure there's enough free shadow memory. */
+- shadow_prealloc(d, root_type, 1);
++ if ( !shadow_prealloc(d, root_type, 1) )
++ {
++ new_entry = pagetable_null();
++ goto install_new_entry;
++ }
++
+ /* Shadow the page. */
+ smfn = make_shadow(v, gmfn, root_type);
+ }
+diff --git a/xen/arch/x86/mm/shadow/hvm.c b/xen/arch/x86/mm/shadow/hvm.c
+index 87fc57704f25..d68796c495b7 100644
+--- a/xen/arch/x86/mm/shadow/hvm.c
++++ b/xen/arch/x86/mm/shadow/hvm.c
+@@ -700,7 +700,9 @@ mfn_t sh_make_monitor_table(const struct vcpu *v, unsigned int shadow_levels)
+ ASSERT(!pagetable_get_pfn(v->arch.hvm.monitor_table));
+
+ /* Guarantee we can get the memory we need */
+- shadow_prealloc(d, SH_type_monitor_table, CONFIG_PAGING_LEVELS);
++ if ( !shadow_prealloc(d, SH_type_monitor_table, CONFIG_PAGING_LEVELS) )
++ return INVALID_MFN;
++
+ m4mfn = shadow_alloc(d, SH_type_monitor_table, 0);
+ mfn_to_page(m4mfn)->shadow_flags = 4;
+
+diff --git a/xen/arch/x86/mm/shadow/multi.c b/xen/arch/x86/mm/shadow/multi.c
+index 7e0494cf7faa..6a9f82d39ce6 100644
+--- a/xen/arch/x86/mm/shadow/multi.c
++++ b/xen/arch/x86/mm/shadow/multi.c
+@@ -2825,9 +2825,14 @@ static int sh_page_fault(struct vcpu *v,
+ * Preallocate shadow pages *before* removing writable accesses
+ * otherwhise an OOS L1 might be demoted and promoted again with
+ * writable mappings. */
+- shadow_prealloc(d,
+- SH_type_l1_shadow,
+- GUEST_PAGING_LEVELS < 4 ? 1 : GUEST_PAGING_LEVELS - 1);
++ if ( !shadow_prealloc(d, SH_type_l1_shadow,
++ GUEST_PAGING_LEVELS < 4
++ ? 1 : GUEST_PAGING_LEVELS - 1) )
++ {
++ paging_unlock(d);
++ put_gfn(d, gfn_x(gfn));
++ return 0;
++ }
+
+ rc = gw_remove_write_accesses(v, va, &gw);
+
+diff --git a/xen/arch/x86/mm/shadow/private.h b/xen/arch/x86/mm/shadow/private.h
+index 911db46e7399..3fe0388e7c4f 100644
+--- a/xen/arch/x86/mm/shadow/private.h
++++ b/xen/arch/x86/mm/shadow/private.h
+@@ -351,7 +351,8 @@ void shadow_promote(struct domain *d, mfn_t gmfn, u32 type);
+ void shadow_demote(struct domain *d, mfn_t gmfn, u32 type);
+
+ /* Shadow page allocation functions */
+-void shadow_prealloc(struct domain *d, u32 shadow_type, unsigned int count);
++bool __must_check shadow_prealloc(struct domain *d, unsigned int shadow_type,
++ unsigned int count);
+ mfn_t shadow_alloc(struct domain *d,
+ u32 shadow_type,
+ unsigned long backpointer);
+--
+2.37.3
+
diff --git a/0047-x86-p2m-refuse-new-allocations-for-dying-domains.patch b/0047-x86-p2m-refuse-new-allocations-for-dying-domains.patch
new file mode 100644
index 0000000..c81cfab
--- /dev/null
+++ b/0047-x86-p2m-refuse-new-allocations-for-dying-domains.patch
@@ -0,0 +1,100 @@
+From 4f9b535194f70582863f2a78f113547d8822b2b9 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
+Date: Tue, 11 Oct 2022 15:08:28 +0200
+Subject: [PATCH 47/67] x86/p2m: refuse new allocations for dying domains
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+This will in particular prevent any attempts to add entries to the p2m,
+once - in a subsequent change - non-root entries have been removed.
+
+This is part of CVE-2022-33746 / XSA-410.
+
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Acked-by: Tim Deegan <tim@xen.org>
+master commit: ff600a8cf8e36f8ecbffecf96a035952e022ab87
+master date: 2022-10-11 14:23:22 +0200
+---
+ xen/arch/x86/mm/hap/hap.c | 5 ++++-
+ xen/arch/x86/mm/shadow/common.c | 18 ++++++++++++++----
+ 2 files changed, 18 insertions(+), 5 deletions(-)
+
+diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c
+index d75dc2b9ed3d..787991233e53 100644
+--- a/xen/arch/x86/mm/hap/hap.c
++++ b/xen/arch/x86/mm/hap/hap.c
+@@ -245,6 +245,9 @@ static struct page_info *hap_alloc(struct domain *d)
+
+ ASSERT(paging_locked_by_me(d));
+
++ if ( unlikely(d->is_dying) )
++ return NULL;
++
+ pg = page_list_remove_head(&d->arch.paging.hap.freelist);
+ if ( unlikely(!pg) )
+ return NULL;
+@@ -281,7 +284,7 @@ static struct page_info *hap_alloc_p2m_page(struct domain *d)
+ d->arch.paging.hap.p2m_pages++;
+ ASSERT(!page_get_owner(pg) && !(pg->count_info & PGC_count_mask));
+ }
+- else if ( !d->arch.paging.p2m_alloc_failed )
++ else if ( !d->arch.paging.p2m_alloc_failed && !d->is_dying )
+ {
+ d->arch.paging.p2m_alloc_failed = 1;
+ dprintk(XENLOG_ERR, "d%i failed to allocate from HAP pool\n",
+diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c
+index fc4f7f78ce43..9ad7e5a88650 100644
+--- a/xen/arch/x86/mm/shadow/common.c
++++ b/xen/arch/x86/mm/shadow/common.c
+@@ -938,6 +938,10 @@ static bool __must_check _shadow_prealloc(struct domain *d, unsigned int pages)
+ if ( d->arch.paging.shadow.free_pages >= pages )
+ return true;
+
++ if ( unlikely(d->is_dying) )
++ /* No reclaim when the domain is dying, teardown will take care of it. */
++ return false;
++
+ /* Shouldn't have enabled shadows if we've no vcpus. */
+ ASSERT(d->vcpu && d->vcpu[0]);
+
+@@ -990,7 +994,7 @@ static bool __must_check _shadow_prealloc(struct domain *d, unsigned int pages)
+ d->arch.paging.shadow.free_pages,
+ d->arch.paging.shadow.p2m_pages);
+
+- ASSERT(d->is_dying);
++ ASSERT_UNREACHABLE();
+
+ guest_flush_tlb_mask(d, d->dirty_cpumask);
+
+@@ -1004,10 +1008,13 @@ static bool __must_check _shadow_prealloc(struct domain *d, unsigned int pages)
+ * to avoid freeing shadows that the caller is currently working on. */
+ bool shadow_prealloc(struct domain *d, unsigned int type, unsigned int count)
+ {
+- bool ret = _shadow_prealloc(d, shadow_size(type) * count);
++ bool ret;
+
+- if ( !ret && !d->is_dying &&
+- (!d->is_shutting_down || d->shutdown_code != SHUTDOWN_crash) )
++ if ( unlikely(d->is_dying) )
++ return false;
++
++ ret = _shadow_prealloc(d, shadow_size(type) * count);
++ if ( !ret && (!d->is_shutting_down || d->shutdown_code != SHUTDOWN_crash) )
+ /*
+ * Failing to allocate memory required for shadow usage can only result in
+ * a domain crash, do it here rather that relying on every caller to do it.
+@@ -1235,6 +1242,9 @@ shadow_alloc_p2m_page(struct domain *d)
+ {
+ struct page_info *pg = NULL;
+
++ if ( unlikely(d->is_dying) )
++ return NULL;
++
+ /* This is called both from the p2m code (which never holds the
+ * paging lock) and the log-dirty code (which always does). */
+ paging_lock_recursive(d);
+--
+2.37.3
+
diff --git a/0048-x86-p2m-truly-free-paging-pool-memory-for-dying-doma.patch b/0048-x86-p2m-truly-free-paging-pool-memory-for-dying-doma.patch
new file mode 100644
index 0000000..c3d5a2c
--- /dev/null
+++ b/0048-x86-p2m-truly-free-paging-pool-memory-for-dying-doma.patch
@@ -0,0 +1,115 @@
+From 7f055b011a657f8f16b0df242301efb312058eea Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
+Date: Tue, 11 Oct 2022 15:08:42 +0200
+Subject: [PATCH 48/67] x86/p2m: truly free paging pool memory for dying
+ domains
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Modify {hap,shadow}_free to free the page immediately if the domain is
+dying, so that pages don't accumulate in the pool when
+{shadow,hap}_final_teardown() get called. This is to limit the amount of
+work which needs to be done there (in a non-preemptable manner).
+
+Note the call to shadow_free() in shadow_free_p2m_page() is moved after
+increasing total_pages, so that the decrease done in shadow_free() in
+case the domain is dying doesn't underflow the counter, even if just for
+a short interval.
+
+This is part of CVE-2022-33746 / XSA-410.
+
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Acked-by: Tim Deegan <tim@xen.org>
+master commit: f50a2c0e1d057c00d6061f40ae24d068226052ad
+master date: 2022-10-11 14:23:51 +0200
+---
+ xen/arch/x86/mm/hap/hap.c | 12 ++++++++++++
+ xen/arch/x86/mm/shadow/common.c | 28 +++++++++++++++++++++++++---
+ 2 files changed, 37 insertions(+), 3 deletions(-)
+
+diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c
+index 787991233e53..aef2297450e1 100644
+--- a/xen/arch/x86/mm/hap/hap.c
++++ b/xen/arch/x86/mm/hap/hap.c
+@@ -265,6 +265,18 @@ static void hap_free(struct domain *d, mfn_t mfn)
+
+ ASSERT(paging_locked_by_me(d));
+
++ /*
++ * For dying domains, actually free the memory here. This way less work is
++ * left to hap_final_teardown(), which cannot easily have preemption checks
++ * added.
++ */
++ if ( unlikely(d->is_dying) )
++ {
++ free_domheap_page(pg);
++ d->arch.paging.hap.total_pages--;
++ return;
++ }
++
+ d->arch.paging.hap.free_pages++;
+ page_list_add_tail(pg, &d->arch.paging.hap.freelist);
+ }
+diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c
+index 9ad7e5a88650..366956c146aa 100644
+--- a/xen/arch/x86/mm/shadow/common.c
++++ b/xen/arch/x86/mm/shadow/common.c
+@@ -1184,6 +1184,7 @@ mfn_t shadow_alloc(struct domain *d,
+ void shadow_free(struct domain *d, mfn_t smfn)
+ {
+ struct page_info *next = NULL, *sp = mfn_to_page(smfn);
++ bool dying = ACCESS_ONCE(d->is_dying);
+ struct page_list_head *pin_list;
+ unsigned int pages;
+ u32 shadow_type;
+@@ -1226,11 +1227,32 @@ void shadow_free(struct domain *d, mfn_t smfn)
+ * just before the allocator hands the page out again. */
+ page_set_tlbflush_timestamp(sp);
+ perfc_decr(shadow_alloc_count);
+- page_list_add_tail(sp, &d->arch.paging.shadow.freelist);
++
++ /*
++ * For dying domains, actually free the memory here. This way less
++ * work is left to shadow_final_teardown(), which cannot easily have
++ * preemption checks added.
++ */
++ if ( unlikely(dying) )
++ {
++ /*
++ * The backpointer field (sh.back) used by shadow code aliases the
++ * domain owner field, unconditionally clear it here to avoid
++ * free_domheap_page() attempting to parse it.
++ */
++ page_set_owner(sp, NULL);
++ free_domheap_page(sp);
++ }
++ else
++ page_list_add_tail(sp, &d->arch.paging.shadow.freelist);
++
+ sp = next;
+ }
+
+- d->arch.paging.shadow.free_pages += pages;
++ if ( unlikely(dying) )
++ d->arch.paging.shadow.total_pages -= pages;
++ else
++ d->arch.paging.shadow.free_pages += pages;
+ }
+
+ /* Divert a page from the pool to be used by the p2m mapping.
+@@ -1300,9 +1322,9 @@ shadow_free_p2m_page(struct domain *d, struct page_info *pg)
+ * paging lock) and the log-dirty code (which always does). */
+ paging_lock_recursive(d);
+
+- shadow_free(d, page_to_mfn(pg));
+ d->arch.paging.shadow.p2m_pages--;
+ d->arch.paging.shadow.total_pages++;
++ shadow_free(d, page_to_mfn(pg));
+
+ paging_unlock(d);
+ }
+--
+2.37.3
+
diff --git a/0049-x86-p2m-free-the-paging-memory-pool-preemptively.patch b/0049-x86-p2m-free-the-paging-memory-pool-preemptively.patch
new file mode 100644
index 0000000..83502a6
--- /dev/null
+++ b/0049-x86-p2m-free-the-paging-memory-pool-preemptively.patch
@@ -0,0 +1,181 @@
+From 686c920fa9389fe2b6b619643024ed98b4b7d51f Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
+Date: Tue, 11 Oct 2022 15:08:58 +0200
+Subject: [PATCH 49/67] x86/p2m: free the paging memory pool preemptively
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+The paging memory pool is currently freed in two different places:
+from {shadow,hap}_teardown() via domain_relinquish_resources() and
+from {shadow,hap}_final_teardown() via complete_domain_destroy().
+While the former does handle preemption, the later doesn't.
+
+Attempt to move as much p2m related freeing as possible to happen
+before the call to {shadow,hap}_teardown(), so that most memory can be
+freed in a preemptive way. In order to avoid causing issues to
+existing callers leave the root p2m page tables set and free them in
+{hap,shadow}_final_teardown(). Also modify {hap,shadow}_free to free
+the page immediately if the domain is dying, so that pages don't
+accumulate in the pool when {shadow,hap}_final_teardown() get called.
+
+Move altp2m_vcpu_disable_ve() to be done in hap_teardown(), as that's
+the place where altp2m_active gets disabled now.
+
+This is part of CVE-2022-33746 / XSA-410.
+
+Reported-by: Julien Grall <jgrall@amazon.com>
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Acked-by: Tim Deegan <tim@xen.org>
+master commit: e7aa55c0aab36d994bf627c92bd5386ae167e16e
+master date: 2022-10-11 14:24:21 +0200
+---
+ xen/arch/x86/domain.c | 7 ------
+ xen/arch/x86/mm/hap/hap.c | 42 ++++++++++++++++++++-------------
+ xen/arch/x86/mm/shadow/common.c | 12 ++++++++++
+ 3 files changed, 38 insertions(+), 23 deletions(-)
+
+diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
+index 2838f976d729..ce6ddcf31397 100644
+--- a/xen/arch/x86/domain.c
++++ b/xen/arch/x86/domain.c
+@@ -38,7 +38,6 @@
+ #include <xen/livepatch.h>
+ #include <public/sysctl.h>
+ #include <public/hvm/hvm_vcpu.h>
+-#include <asm/altp2m.h>
+ #include <asm/regs.h>
+ #include <asm/mc146818rtc.h>
+ #include <asm/system.h>
+@@ -2358,12 +2357,6 @@ int domain_relinquish_resources(struct domain *d)
+ vpmu_destroy(v);
+ }
+
+- if ( altp2m_active(d) )
+- {
+- for_each_vcpu ( d, v )
+- altp2m_vcpu_disable_ve(v);
+- }
+-
+ if ( is_pv_domain(d) )
+ {
+ for_each_vcpu ( d, v )
+diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c
+index aef2297450e1..a44fcfd95e1e 100644
+--- a/xen/arch/x86/mm/hap/hap.c
++++ b/xen/arch/x86/mm/hap/hap.c
+@@ -28,6 +28,7 @@
+ #include <xen/domain_page.h>
+ #include <xen/guest_access.h>
+ #include <xen/keyhandler.h>
++#include <asm/altp2m.h>
+ #include <asm/event.h>
+ #include <asm/page.h>
+ #include <asm/current.h>
+@@ -546,24 +547,8 @@ void hap_final_teardown(struct domain *d)
+ unsigned int i;
+
+ if ( hvm_altp2m_supported() )
+- {
+- d->arch.altp2m_active = 0;
+-
+- if ( d->arch.altp2m_eptp )
+- {
+- free_xenheap_page(d->arch.altp2m_eptp);
+- d->arch.altp2m_eptp = NULL;
+- }
+-
+- if ( d->arch.altp2m_visible_eptp )
+- {
+- free_xenheap_page(d->arch.altp2m_visible_eptp);
+- d->arch.altp2m_visible_eptp = NULL;
+- }
+-
+ for ( i = 0; i < MAX_ALTP2M; i++ )
+ p2m_teardown(d->arch.altp2m_p2m[i], true);
+- }
+
+ /* Destroy nestedp2m's first */
+ for (i = 0; i < MAX_NESTEDP2M; i++) {
+@@ -578,6 +563,8 @@ void hap_final_teardown(struct domain *d)
+ paging_lock(d);
+ hap_set_allocation(d, 0, NULL);
+ ASSERT(d->arch.paging.hap.p2m_pages == 0);
++ ASSERT(d->arch.paging.hap.free_pages == 0);
++ ASSERT(d->arch.paging.hap.total_pages == 0);
+ paging_unlock(d);
+ }
+
+@@ -603,6 +590,7 @@ void hap_vcpu_teardown(struct vcpu *v)
+ void hap_teardown(struct domain *d, bool *preempted)
+ {
+ struct vcpu *v;
++ unsigned int i;
+
+ ASSERT(d->is_dying);
+ ASSERT(d != current->domain);
+@@ -611,6 +599,28 @@ void hap_teardown(struct domain *d, bool *preempted)
+ for_each_vcpu ( d, v )
+ hap_vcpu_teardown(v);
+
++ /* Leave the root pt in case we get further attempts to modify the p2m. */
++ if ( hvm_altp2m_supported() )
++ {
++ if ( altp2m_active(d) )
++ for_each_vcpu ( d, v )
++ altp2m_vcpu_disable_ve(v);
++
++ d->arch.altp2m_active = 0;
++
++ FREE_XENHEAP_PAGE(d->arch.altp2m_eptp);
++ FREE_XENHEAP_PAGE(d->arch.altp2m_visible_eptp);
++
++ for ( i = 0; i < MAX_ALTP2M; i++ )
++ p2m_teardown(d->arch.altp2m_p2m[i], false);
++ }
++
++ /* Destroy nestedp2m's after altp2m. */
++ for ( i = 0; i < MAX_NESTEDP2M; i++ )
++ p2m_teardown(d->arch.nested_p2m[i], false);
++
++ p2m_teardown(p2m_get_hostp2m(d), false);
++
+ paging_lock(d); /* Keep various asserts happy */
+
+ if ( d->arch.paging.hap.total_pages != 0 )
+diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c
+index 366956c146aa..680766fd5170 100644
+--- a/xen/arch/x86/mm/shadow/common.c
++++ b/xen/arch/x86/mm/shadow/common.c
+@@ -2891,8 +2891,17 @@ void shadow_teardown(struct domain *d, bool *preempted)
+ for_each_vcpu ( d, v )
+ shadow_vcpu_teardown(v);
+
++ p2m_teardown(p2m_get_hostp2m(d), false);
++
+ paging_lock(d);
+
++ /*
++ * Reclaim all shadow memory so that shadow_set_allocation() doesn't find
++ * in-use pages, as _shadow_prealloc() will no longer try to reclaim pages
++ * because the domain is dying.
++ */
++ shadow_blow_tables(d);
++
+ #if (SHADOW_OPTIMIZATIONS & (SHOPT_VIRTUAL_TLB|SHOPT_OUT_OF_SYNC))
+ /* Free the virtual-TLB array attached to each vcpu */
+ for_each_vcpu(d, v)
+@@ -3013,6 +3022,9 @@ void shadow_final_teardown(struct domain *d)
+ d->arch.paging.shadow.total_pages,
+ d->arch.paging.shadow.free_pages,
+ d->arch.paging.shadow.p2m_pages);
++ ASSERT(!d->arch.paging.shadow.total_pages);
++ ASSERT(!d->arch.paging.shadow.free_pages);
++ ASSERT(!d->arch.paging.shadow.p2m_pages);
+ paging_unlock(d);
+ }
+
+--
+2.37.3
+
diff --git a/0050-xen-x86-p2m-Add-preemption-in-p2m_teardown.patch b/0050-xen-x86-p2m-Add-preemption-in-p2m_teardown.patch
new file mode 100644
index 0000000..23e10ba
--- /dev/null
+++ b/0050-xen-x86-p2m-Add-preemption-in-p2m_teardown.patch
@@ -0,0 +1,197 @@
+From b03074bb47d10c9373688b3661c7c31da01c21a3 Mon Sep 17 00:00:00 2001
+From: Julien Grall <jgrall@amazon.com>
+Date: Tue, 11 Oct 2022 15:09:12 +0200
+Subject: [PATCH 50/67] xen/x86: p2m: Add preemption in p2m_teardown()
+
+The list p2m->pages contain all the pages used by the P2M. On large
+instance this can be quite large and the time spent to call
+d->arch.paging.free_page() will take more than 1ms for a 80GB guest
+on a Xen running in nested environment on a c5.metal.
+
+By extrapolation, it would take > 100ms for a 8TB guest (what we
+current security support). So add some preemption in p2m_teardown()
+and propagate to the callers. Note there are 3 places where
+the preemption is not enabled:
+ - hap_final_teardown()/shadow_final_teardown(): We are
+ preventing update the P2M once the domain is dying (so
+ no more pages could be allocated) and most of the P2M pages
+ will be freed in preemptive manneer when relinquishing the
+ resources. So this is fine to disable preemption.
+ - shadow_enable(): This is fine because it will undo the allocation
+ that may have been made by p2m_alloc_table() (so only the root
+ page table).
+
+The preemption is arbitrarily checked every 1024 iterations.
+
+We now need to include <xen/event.h> in p2m-basic in order to
+import the definition for local_events_need_delivery() used by
+general_preempt_check(). Ideally, the inclusion should happen in
+xen/sched.h but it opened a can of worms.
+
+Note that with the current approach, Xen doesn't keep track on whether
+the alt/nested P2Ms have been cleared. So there are some redundant work.
+However, this is not expected to incurr too much overhead (the P2M lock
+shouldn't be contended during teardown). So this is optimization is
+left outside of the security event.
+
+This is part of CVE-2022-33746 / XSA-410.
+
+Signed-off-by: Julien Grall <jgrall@amazon.com>
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+master commit: 8a2111250b424edc49c65c4d41b276766d30635c
+master date: 2022-10-11 14:24:48 +0200
+---
+ xen/arch/x86/mm/hap/hap.c | 22 ++++++++++++++++------
+ xen/arch/x86/mm/p2m.c | 18 +++++++++++++++---
+ xen/arch/x86/mm/shadow/common.c | 12 +++++++++---
+ xen/include/asm-x86/p2m.h | 2 +-
+ 4 files changed, 41 insertions(+), 13 deletions(-)
+
+diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c
+index a44fcfd95e1e..1f9a157a0c34 100644
+--- a/xen/arch/x86/mm/hap/hap.c
++++ b/xen/arch/x86/mm/hap/hap.c
+@@ -548,17 +548,17 @@ void hap_final_teardown(struct domain *d)
+
+ if ( hvm_altp2m_supported() )
+ for ( i = 0; i < MAX_ALTP2M; i++ )
+- p2m_teardown(d->arch.altp2m_p2m[i], true);
++ p2m_teardown(d->arch.altp2m_p2m[i], true, NULL);
+
+ /* Destroy nestedp2m's first */
+ for (i = 0; i < MAX_NESTEDP2M; i++) {
+- p2m_teardown(d->arch.nested_p2m[i], true);
++ p2m_teardown(d->arch.nested_p2m[i], true, NULL);
+ }
+
+ if ( d->arch.paging.hap.total_pages != 0 )
+ hap_teardown(d, NULL);
+
+- p2m_teardown(p2m_get_hostp2m(d), true);
++ p2m_teardown(p2m_get_hostp2m(d), true, NULL);
+ /* Free any memory that the p2m teardown released */
+ paging_lock(d);
+ hap_set_allocation(d, 0, NULL);
+@@ -612,14 +612,24 @@ void hap_teardown(struct domain *d, bool *preempted)
+ FREE_XENHEAP_PAGE(d->arch.altp2m_visible_eptp);
+
+ for ( i = 0; i < MAX_ALTP2M; i++ )
+- p2m_teardown(d->arch.altp2m_p2m[i], false);
++ {
++ p2m_teardown(d->arch.altp2m_p2m[i], false, preempted);
++ if ( preempted && *preempted )
++ return;
++ }
+ }
+
+ /* Destroy nestedp2m's after altp2m. */
+ for ( i = 0; i < MAX_NESTEDP2M; i++ )
+- p2m_teardown(d->arch.nested_p2m[i], false);
++ {
++ p2m_teardown(d->arch.nested_p2m[i], false, preempted);
++ if ( preempted && *preempted )
++ return;
++ }
+
+- p2m_teardown(p2m_get_hostp2m(d), false);
++ p2m_teardown(p2m_get_hostp2m(d), false, preempted);
++ if ( preempted && *preempted )
++ return;
+
+ paging_lock(d); /* Keep various asserts happy */
+
+diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
+index 8ba73082c1bf..107f6778a6e1 100644
+--- a/xen/arch/x86/mm/p2m.c
++++ b/xen/arch/x86/mm/p2m.c
+@@ -741,12 +741,13 @@ int p2m_alloc_table(struct p2m_domain *p2m)
+ * hvm fixme: when adding support for pvh non-hardware domains, this path must
+ * cleanup any foreign p2m types (release refcnts on them).
+ */
+-void p2m_teardown(struct p2m_domain *p2m, bool remove_root)
++void p2m_teardown(struct p2m_domain *p2m, bool remove_root, bool *preempted)
+ /* Return all the p2m pages to Xen.
+ * We know we don't have any extra mappings to these pages */
+ {
+ struct page_info *pg, *root_pg = NULL;
+ struct domain *d;
++ unsigned int i = 0;
+
+ if (p2m == NULL)
+ return;
+@@ -765,8 +766,19 @@ void p2m_teardown(struct p2m_domain *p2m, bool remove_root)
+ }
+
+ while ( (pg = page_list_remove_head(&p2m->pages)) )
+- if ( pg != root_pg )
+- d->arch.paging.free_page(d, pg);
++ {
++ if ( pg == root_pg )
++ continue;
++
++ d->arch.paging.free_page(d, pg);
++
++ /* Arbitrarily check preemption every 1024 iterations */
++ if ( preempted && !(++i % 1024) && general_preempt_check() )
++ {
++ *preempted = true;
++ break;
++ }
++ }
+
+ if ( root_pg )
+ page_list_add(root_pg, &p2m->pages);
+diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c
+index 680766fd5170..8f7fddcee1e5 100644
+--- a/xen/arch/x86/mm/shadow/common.c
++++ b/xen/arch/x86/mm/shadow/common.c
+@@ -2837,8 +2837,12 @@ int shadow_enable(struct domain *d, u32 mode)
+ out_locked:
+ paging_unlock(d);
+ out_unlocked:
++ /*
++ * This is fine to ignore the preemption here because only the root
++ * will be allocated by p2m_alloc_table().
++ */
+ if ( rv != 0 && !pagetable_is_null(p2m_get_pagetable(p2m)) )
+- p2m_teardown(p2m, true);
++ p2m_teardown(p2m, true, NULL);
+ if ( rv != 0 && pg != NULL )
+ {
+ pg->count_info &= ~PGC_count_mask;
+@@ -2891,7 +2895,9 @@ void shadow_teardown(struct domain *d, bool *preempted)
+ for_each_vcpu ( d, v )
+ shadow_vcpu_teardown(v);
+
+- p2m_teardown(p2m_get_hostp2m(d), false);
++ p2m_teardown(p2m_get_hostp2m(d), false, preempted);
++ if ( preempted && *preempted )
++ return;
+
+ paging_lock(d);
+
+@@ -3012,7 +3018,7 @@ void shadow_final_teardown(struct domain *d)
+ shadow_teardown(d, NULL);
+
+ /* It is now safe to pull down the p2m map. */
+- p2m_teardown(p2m_get_hostp2m(d), true);
++ p2m_teardown(p2m_get_hostp2m(d), true, NULL);
+ /* Free any shadow memory that the p2m teardown released */
+ paging_lock(d);
+ shadow_set_allocation(d, 0, NULL);
+diff --git a/xen/include/asm-x86/p2m.h b/xen/include/asm-x86/p2m.h
+index 46eb51d44cf5..edbe4cee2717 100644
+--- a/xen/include/asm-x86/p2m.h
++++ b/xen/include/asm-x86/p2m.h
+@@ -619,7 +619,7 @@ int p2m_init(struct domain *d);
+ int p2m_alloc_table(struct p2m_domain *p2m);
+
+ /* Return all the p2m resources to Xen. */
+-void p2m_teardown(struct p2m_domain *p2m, bool remove_root);
++void p2m_teardown(struct p2m_domain *p2m, bool remove_root, bool *preempted);
+ void p2m_final_teardown(struct domain *d);
+
+ /* Add a page to a domain's p2m table */
+--
+2.37.3
+
diff --git a/0051-libxl-docs-Use-arch-specific-default-paging-memory.patch b/0051-libxl-docs-Use-arch-specific-default-paging-memory.patch
new file mode 100644
index 0000000..f3bded4
--- /dev/null
+++ b/0051-libxl-docs-Use-arch-specific-default-paging-memory.patch
@@ -0,0 +1,147 @@
+From 0c0680d6e7953ca4c91699e60060c732f9ead5c1 Mon Sep 17 00:00:00 2001
+From: Henry Wang <Henry.Wang@arm.com>
+Date: Tue, 11 Oct 2022 15:09:32 +0200
+Subject: [PATCH 51/67] libxl, docs: Use arch-specific default paging memory
+
+The default paging memory (descibed in `shadow_memory` entry in xl
+config) in libxl is used to determine the memory pool size for xl
+guests. Currently this size is only used for x86, and contains a part
+of RAM to shadow the resident processes. Since on Arm there is no
+shadow mode guests, so the part of RAM to shadow the resident processes
+is not necessary. Therefore, this commit splits the function
+`libxl_get_required_shadow_memory()` to arch specific helpers and
+renamed the helper to `libxl__arch_get_required_paging_memory()`.
+
+On x86, this helper calls the original value from
+`libxl_get_required_shadow_memory()` so no functional change intended.
+
+On Arm, this helper returns 1MB per vcpu plus 4KB per MiB of RAM
+for the P2M map and additional 512KB.
+
+Also update the xl.cfg documentation to add Arm documentation
+according to code changes and correct the comment style following Xen
+coding style.
+
+This is part of CVE-2022-33747 / XSA-409.
+
+Suggested-by: Julien Grall <jgrall@amazon.com>
+Signed-off-by: Henry Wang <Henry.Wang@arm.com>
+Reviewed-by: Anthony PERARD <anthony.perard@citrix.com>
+master commit: 156a239ea288972425f967ac807b3cb5b5e14874
+master date: 2022-10-11 14:28:37 +0200
+---
+ docs/man/xl.cfg.5.pod.in | 5 +++++
+ tools/libs/light/libxl_arch.h | 4 ++++
+ tools/libs/light/libxl_arm.c | 12 ++++++++++++
+ tools/libs/light/libxl_utils.c | 9 ++-------
+ tools/libs/light/libxl_x86.c | 13 +++++++++++++
+ 5 files changed, 36 insertions(+), 7 deletions(-)
+
+diff --git a/docs/man/xl.cfg.5.pod.in b/docs/man/xl.cfg.5.pod.in
+index 56370a37dbb1..af7fae7c52f9 100644
+--- a/docs/man/xl.cfg.5.pod.in
++++ b/docs/man/xl.cfg.5.pod.in
+@@ -1746,6 +1746,11 @@ are not using hardware assisted paging (i.e. you are using shadow
+ mode) and your guest workload consists of a very large number of
+ similar processes then increasing this value may improve performance.
+
++On Arm, this field is used to determine the size of the guest P2M pages
++pool, and the default value is 1MB per vCPU plus 4KB per MB of RAM for
++the P2M map. Users should adjust this value if bigger P2M pool size is
++needed.
++
+ =back
+
+ =head3 Processor and Platform Features
+diff --git a/tools/libs/light/libxl_arch.h b/tools/libs/light/libxl_arch.h
+index 8527fc5c6c23..6741b7f6f457 100644
+--- a/tools/libs/light/libxl_arch.h
++++ b/tools/libs/light/libxl_arch.h
+@@ -90,6 +90,10 @@ void libxl__arch_update_domain_config(libxl__gc *gc,
+ libxl_domain_config *dst,
+ const libxl_domain_config *src);
+
++_hidden
++unsigned long libxl__arch_get_required_paging_memory(unsigned long maxmem_kb,
++ unsigned int smp_cpus);
++
+ #if defined(__i386__) || defined(__x86_64__)
+
+ #define LAPIC_BASE_ADDRESS 0xfee00000
+diff --git a/tools/libs/light/libxl_arm.c b/tools/libs/light/libxl_arm.c
+index e2901f13b724..d59b464192c2 100644
+--- a/tools/libs/light/libxl_arm.c
++++ b/tools/libs/light/libxl_arm.c
+@@ -154,6 +154,18 @@ out:
+ return rc;
+ }
+
++unsigned long libxl__arch_get_required_paging_memory(unsigned long maxmem_kb,
++ unsigned int smp_cpus)
++{
++ /*
++ * 256 pages (1MB) per vcpu,
++ * plus 1 page per MiB of RAM for the P2M map,
++ * This is higher than the minimum that Xen would allocate if no value
++ * were given (but the Xen minimum is for safety, not performance).
++ */
++ return 4 * (256 * smp_cpus + maxmem_kb / 1024);
++}
++
+ static struct arch_info {
+ const char *guest_type;
+ const char *timer_compat;
+diff --git a/tools/libs/light/libxl_utils.c b/tools/libs/light/libxl_utils.c
+index 4699c4a0a36f..e276c0ee9cc3 100644
+--- a/tools/libs/light/libxl_utils.c
++++ b/tools/libs/light/libxl_utils.c
+@@ -18,6 +18,7 @@
+ #include <ctype.h>
+
+ #include "libxl_internal.h"
++#include "libxl_arch.h"
+ #include "_paths.h"
+
+ #ifndef LIBXL_HAVE_NONCONST_LIBXL_BASENAME_RETURN_VALUE
+@@ -39,13 +40,7 @@ char *libxl_basename(const char *name)
+
+ unsigned long libxl_get_required_shadow_memory(unsigned long maxmem_kb, unsigned int smp_cpus)
+ {
+- /* 256 pages (1MB) per vcpu,
+- plus 1 page per MiB of RAM for the P2M map,
+- plus 1 page per MiB of RAM to shadow the resident processes.
+- This is higher than the minimum that Xen would allocate if no value
+- were given (but the Xen minimum is for safety, not performance).
+- */
+- return 4 * (256 * smp_cpus + 2 * (maxmem_kb / 1024));
++ return libxl__arch_get_required_paging_memory(maxmem_kb, smp_cpus);
+ }
+
+ char *libxl_domid_to_name(libxl_ctx *ctx, uint32_t domid)
+diff --git a/tools/libs/light/libxl_x86.c b/tools/libs/light/libxl_x86.c
+index 18c3c77ccde3..4d66478fe9dd 100644
+--- a/tools/libs/light/libxl_x86.c
++++ b/tools/libs/light/libxl_x86.c
+@@ -882,6 +882,19 @@ void libxl__arch_update_domain_config(libxl__gc *gc,
+ libxl_defbool_val(src->b_info.arch_x86.msr_relaxed));
+ }
+
++unsigned long libxl__arch_get_required_paging_memory(unsigned long maxmem_kb,
++ unsigned int smp_cpus)
++{
++ /*
++ * 256 pages (1MB) per vcpu,
++ * plus 1 page per MiB of RAM for the P2M map,
++ * plus 1 page per MiB of RAM to shadow the resident processes.
++ * This is higher than the minimum that Xen would allocate if no value
++ * were given (but the Xen minimum is for safety, not performance).
++ */
++ return 4 * (256 * smp_cpus + 2 * (maxmem_kb / 1024));
++}
++
+ /*
+ * Local variables:
+ * mode: C
+--
+2.37.3
+
diff --git a/0052-xen-arm-Construct-the-P2M-pages-pool-for-guests.patch b/0052-xen-arm-Construct-the-P2M-pages-pool-for-guests.patch
new file mode 100644
index 0000000..77093a7
--- /dev/null
+++ b/0052-xen-arm-Construct-the-P2M-pages-pool-for-guests.patch
@@ -0,0 +1,189 @@
+From 45336d8f88725aec65ee177b1b09abf6eef1dc8d Mon Sep 17 00:00:00 2001
+From: Henry Wang <Henry.Wang@arm.com>
+Date: Tue, 11 Oct 2022 15:09:58 +0200
+Subject: [PATCH 52/67] xen/arm: Construct the P2M pages pool for guests
+
+This commit constructs the p2m pages pool for guests from the
+data structure and helper perspective.
+
+This is implemented by:
+
+- Adding a `struct paging_domain` which contains a freelist, a
+counter variable and a spinlock to `struct arch_domain` to
+indicate the free p2m pages and the number of p2m total pages in
+the p2m pages pool.
+
+- Adding a helper `p2m_get_allocation` to get the p2m pool size.
+
+- Adding a helper `p2m_set_allocation` to set the p2m pages pool
+size. This helper should be called before allocating memory for
+a guest.
+
+- Adding a helper `p2m_teardown_allocation` to free the p2m pages
+pool. This helper should be called during the xl domain destory.
+
+This is part of CVE-2022-33747 / XSA-409.
+
+Signed-off-by: Henry Wang <Henry.Wang@arm.com>
+Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
+master commit: 55914f7fc91a468649b8a3ec3f53ae1c4aca6670
+master date: 2022-10-11 14:28:39 +0200
+---
+ xen/arch/arm/p2m.c | 88 ++++++++++++++++++++++++++++++++++++
+ xen/include/asm-arm/domain.h | 10 ++++
+ xen/include/asm-arm/p2m.h | 4 ++
+ 3 files changed, 102 insertions(+)
+
+diff --git a/xen/arch/arm/p2m.c b/xen/arch/arm/p2m.c
+index 4ad3e0606e9c..6883d8627702 100644
+--- a/xen/arch/arm/p2m.c
++++ b/xen/arch/arm/p2m.c
+@@ -50,6 +50,92 @@ static uint64_t generate_vttbr(uint16_t vmid, mfn_t root_mfn)
+ return (mfn_to_maddr(root_mfn) | ((uint64_t)vmid << 48));
+ }
+
++/* Return the size of the pool, rounded up to the nearest MB */
++unsigned int p2m_get_allocation(struct domain *d)
++{
++ unsigned long nr_pages = ACCESS_ONCE(d->arch.paging.p2m_total_pages);
++
++ return ROUNDUP(nr_pages, 1 << (20 - PAGE_SHIFT)) >> (20 - PAGE_SHIFT);
++}
++
++/*
++ * Set the pool of pages to the required number of pages.
++ * Returns 0 for success, non-zero for failure.
++ * Call with d->arch.paging.lock held.
++ */
++int p2m_set_allocation(struct domain *d, unsigned long pages, bool *preempted)
++{
++ struct page_info *pg;
++
++ ASSERT(spin_is_locked(&d->arch.paging.lock));
++
++ for ( ; ; )
++ {
++ if ( d->arch.paging.p2m_total_pages < pages )
++ {
++ /* Need to allocate more memory from domheap */
++ pg = alloc_domheap_page(NULL, 0);
++ if ( pg == NULL )
++ {
++ printk(XENLOG_ERR "Failed to allocate P2M pages.\n");
++ return -ENOMEM;
++ }
++ ACCESS_ONCE(d->arch.paging.p2m_total_pages) =
++ d->arch.paging.p2m_total_pages + 1;
++ page_list_add_tail(pg, &d->arch.paging.p2m_freelist);
++ }
++ else if ( d->arch.paging.p2m_total_pages > pages )
++ {
++ /* Need to return memory to domheap */
++ pg = page_list_remove_head(&d->arch.paging.p2m_freelist);
++ if( pg )
++ {
++ ACCESS_ONCE(d->arch.paging.p2m_total_pages) =
++ d->arch.paging.p2m_total_pages - 1;
++ free_domheap_page(pg);
++ }
++ else
++ {
++ printk(XENLOG_ERR
++ "Failed to free P2M pages, P2M freelist is empty.\n");
++ return -ENOMEM;
++ }
++ }
++ else
++ break;
++
++ /* Check to see if we need to yield and try again */
++ if ( preempted && general_preempt_check() )
++ {
++ *preempted = true;
++ return -ERESTART;
++ }
++ }
++
++ return 0;
++}
++
++int p2m_teardown_allocation(struct domain *d)
++{
++ int ret = 0;
++ bool preempted = false;
++
++ spin_lock(&d->arch.paging.lock);
++ if ( d->arch.paging.p2m_total_pages != 0 )
++ {
++ ret = p2m_set_allocation(d, 0, &preempted);
++ if ( preempted )
++ {
++ spin_unlock(&d->arch.paging.lock);
++ return -ERESTART;
++ }
++ ASSERT(d->arch.paging.p2m_total_pages == 0);
++ }
++ spin_unlock(&d->arch.paging.lock);
++
++ return ret;
++}
++
+ /* Unlock the flush and do a P2M TLB flush if necessary */
+ void p2m_write_unlock(struct p2m_domain *p2m)
+ {
+@@ -1602,7 +1688,9 @@ int p2m_init(struct domain *d)
+ unsigned int cpu;
+
+ rwlock_init(&p2m->lock);
++ spin_lock_init(&d->arch.paging.lock);
+ INIT_PAGE_LIST_HEAD(&p2m->pages);
++ INIT_PAGE_LIST_HEAD(&d->arch.paging.p2m_freelist);
+
+ p2m->vmid = INVALID_VMID;
+
+diff --git a/xen/include/asm-arm/domain.h b/xen/include/asm-arm/domain.h
+index bb0a6adbe00b..1d8935778f3b 100644
+--- a/xen/include/asm-arm/domain.h
++++ b/xen/include/asm-arm/domain.h
+@@ -40,6 +40,14 @@ struct vtimer {
+ uint64_t cval;
+ };
+
++struct paging_domain {
++ spinlock_t lock;
++ /* Free P2M pages from the pre-allocated P2M pool */
++ struct page_list_head p2m_freelist;
++ /* Number of pages from the pre-allocated P2M pool */
++ unsigned long p2m_total_pages;
++};
++
+ struct arch_domain
+ {
+ #ifdef CONFIG_ARM_64
+@@ -51,6 +59,8 @@ struct arch_domain
+
+ struct hvm_domain hvm;
+
++ struct paging_domain paging;
++
+ struct vmmio vmmio;
+
+ /* Continuable domain_relinquish_resources(). */
+diff --git a/xen/include/asm-arm/p2m.h b/xen/include/asm-arm/p2m.h
+index 3a2d51b35d71..18675b234570 100644
+--- a/xen/include/asm-arm/p2m.h
++++ b/xen/include/asm-arm/p2m.h
+@@ -218,6 +218,10 @@ void p2m_restore_state(struct vcpu *n);
+ /* Print debugging/statistial info about a domain's p2m */
+ void p2m_dump_info(struct domain *d);
+
++unsigned int p2m_get_allocation(struct domain *d);
++int p2m_set_allocation(struct domain *d, unsigned long pages, bool *preempted);
++int p2m_teardown_allocation(struct domain *d);
++
+ static inline void p2m_write_lock(struct p2m_domain *p2m)
+ {
+ write_lock(&p2m->lock);
+--
+2.37.3
+
diff --git a/0053-xen-arm-libxl-Implement-XEN_DOMCTL_shadow_op-for-Arm.patch b/0053-xen-arm-libxl-Implement-XEN_DOMCTL_shadow_op-for-Arm.patch
new file mode 100644
index 0000000..52ce67c
--- /dev/null
+++ b/0053-xen-arm-libxl-Implement-XEN_DOMCTL_shadow_op-for-Arm.patch
@@ -0,0 +1,108 @@
+From c5215044578e88b401a1296ed6302df05c113c5f Mon Sep 17 00:00:00 2001
+From: Henry Wang <Henry.Wang@arm.com>
+Date: Tue, 11 Oct 2022 15:10:16 +0200
+Subject: [PATCH 53/67] xen/arm, libxl: Implement XEN_DOMCTL_shadow_op for Arm
+
+This commit implements the `XEN_DOMCTL_shadow_op` support in Xen
+for Arm. The p2m pages pool size for xl guests is supposed to be
+determined by `XEN_DOMCTL_shadow_op`. Hence, this commit:
+
+- Introduces a function `p2m_domctl` and implements the subops
+`XEN_DOMCTL_SHADOW_OP_SET_ALLOCATION` and
+`XEN_DOMCTL_SHADOW_OP_GET_ALLOCATION` of `XEN_DOMCTL_shadow_op`.
+
+- Adds the `XEN_DOMCTL_SHADOW_OP_SET_ALLOCATION` support in libxl.
+
+Therefore enabling the setting of shadow memory pool size
+when creating a guest from xl and getting shadow memory pool size
+from Xen.
+
+Note that the `XEN_DOMCTL_shadow_op` added in this commit is only
+a dummy op, and the functionality of setting/getting p2m memory pool
+size for xl guests will be added in following commits.
+
+This is part of CVE-2022-33747 / XSA-409.
+
+Signed-off-by: Henry Wang <Henry.Wang@arm.com>
+Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
+master commit: cf2a68d2ffbc3ce95e01449d46180bddb10d24a0
+master date: 2022-10-11 14:28:42 +0200
+---
+ tools/libs/light/libxl_arm.c | 12 ++++++++++++
+ xen/arch/arm/domctl.c | 32 ++++++++++++++++++++++++++++++++
+ 2 files changed, 44 insertions(+)
+
+diff --git a/tools/libs/light/libxl_arm.c b/tools/libs/light/libxl_arm.c
+index d59b464192c2..d21f614ed788 100644
+--- a/tools/libs/light/libxl_arm.c
++++ b/tools/libs/light/libxl_arm.c
+@@ -131,6 +131,18 @@ int libxl__arch_domain_create(libxl__gc *gc,
+ libxl__domain_build_state *state,
+ uint32_t domid)
+ {
++ libxl_ctx *ctx = libxl__gc_owner(gc);
++ unsigned int shadow_mb = DIV_ROUNDUP(d_config->b_info.shadow_memkb, 1024);
++
++ int r = xc_shadow_control(ctx->xch, domid,
++ XEN_DOMCTL_SHADOW_OP_SET_ALLOCATION,
++ &shadow_mb, 0);
++ if (r) {
++ LOGED(ERROR, domid,
++ "Failed to set %u MiB shadow allocation", shadow_mb);
++ return ERROR_FAIL;
++ }
++
+ return 0;
+ }
+
+diff --git a/xen/arch/arm/domctl.c b/xen/arch/arm/domctl.c
+index a8c48b0beaab..a049bc7f3e52 100644
+--- a/xen/arch/arm/domctl.c
++++ b/xen/arch/arm/domctl.c
+@@ -45,11 +45,43 @@ static int handle_vuart_init(struct domain *d,
+ return rc;
+ }
+
++static long p2m_domctl(struct domain *d, struct xen_domctl_shadow_op *sc,
++ XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
++{
++ if ( unlikely(d == current->domain) )
++ {
++ printk(XENLOG_ERR "Tried to do a p2m domctl op on itself.\n");
++ return -EINVAL;
++ }
++
++ if ( unlikely(d->is_dying) )
++ {
++ printk(XENLOG_ERR "Tried to do a p2m domctl op on dying domain %u\n",
++ d->domain_id);
++ return -EINVAL;
++ }
++
++ switch ( sc->op )
++ {
++ case XEN_DOMCTL_SHADOW_OP_SET_ALLOCATION:
++ return 0;
++ case XEN_DOMCTL_SHADOW_OP_GET_ALLOCATION:
++ return 0;
++ default:
++ {
++ printk(XENLOG_ERR "Bad p2m domctl op %u\n", sc->op);
++ return -EINVAL;
++ }
++ }
++}
++
+ long arch_do_domctl(struct xen_domctl *domctl, struct domain *d,
+ XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
+ {
+ switch ( domctl->cmd )
+ {
++ case XEN_DOMCTL_shadow_op:
++ return p2m_domctl(d, &domctl->u.shadow_op, u_domctl);
+ case XEN_DOMCTL_cacheflush:
+ {
+ gfn_t s = _gfn(domctl->u.cacheflush.start_pfn);
+--
+2.37.3
+
diff --git a/0054-xen-arm-Allocate-and-free-P2M-pages-from-the-P2M-poo.patch b/0054-xen-arm-Allocate-and-free-P2M-pages-from-the-P2M-poo.patch
new file mode 100644
index 0000000..3ef7019
--- /dev/null
+++ b/0054-xen-arm-Allocate-and-free-P2M-pages-from-the-P2M-poo.patch
@@ -0,0 +1,289 @@
+From 7ad38a39f08aadc1578bdb46ccabaad79ed0faee Mon Sep 17 00:00:00 2001
+From: Henry Wang <Henry.Wang@arm.com>
+Date: Tue, 11 Oct 2022 15:10:34 +0200
+Subject: [PATCH 54/67] xen/arm: Allocate and free P2M pages from the P2M pool
+
+This commit sets/tearsdown of p2m pages pool for non-privileged Arm
+guests by calling `p2m_set_allocation` and `p2m_teardown_allocation`.
+
+- For dom0, P2M pages should come from heap directly instead of p2m
+pool, so that the kernel may take advantage of the extended regions.
+
+- For xl guests, the setting of the p2m pool is called in
+`XEN_DOMCTL_shadow_op` and the p2m pool is destroyed in
+`domain_relinquish_resources`. Note that domctl->u.shadow_op.mb is
+updated with the new size when setting the p2m pool.
+
+- For dom0less domUs, the setting of the p2m pool is called before
+allocating memory during domain creation. Users can specify the p2m
+pool size by `xen,domain-p2m-mem-mb` dts property.
+
+To actually allocate/free pages from the p2m pool, this commit adds
+two helper functions namely `p2m_alloc_page` and `p2m_free_page` to
+`struct p2m_domain`. By replacing the `alloc_domheap_page` and
+`free_domheap_page` with these two helper functions, p2m pages can
+be added/removed from the list of p2m pool rather than from the heap.
+
+Since page from `p2m_alloc_page` is cleaned, take the opportunity
+to remove the redundant `clean_page` in `p2m_create_table`.
+
+This is part of CVE-2022-33747 / XSA-409.
+
+Signed-off-by: Henry Wang <Henry.Wang@arm.com>
+Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
+master commit: cbea5a1149ca7fd4b7cdbfa3ec2e4f109b601ff7
+master date: 2022-10-11 14:28:44 +0200
+---
+ docs/misc/arm/device-tree/booting.txt | 8 ++++
+ xen/arch/arm/domain.c | 6 +++
+ xen/arch/arm/domain_build.c | 29 ++++++++++++++
+ xen/arch/arm/domctl.c | 23 ++++++++++-
+ xen/arch/arm/p2m.c | 57 +++++++++++++++++++++++++--
+ 5 files changed, 118 insertions(+), 5 deletions(-)
+
+diff --git a/docs/misc/arm/device-tree/booting.txt b/docs/misc/arm/device-tree/booting.txt
+index 5243bc7fd344..470c9491a781 100644
+--- a/docs/misc/arm/device-tree/booting.txt
++++ b/docs/misc/arm/device-tree/booting.txt
+@@ -164,6 +164,14 @@ with the following properties:
+ Both #address-cells and #size-cells need to be specified because
+ both sub-nodes (described shortly) have reg properties.
+
++- xen,domain-p2m-mem-mb
++
++ Optional. A 32-bit integer specifying the amount of megabytes of RAM
++ used for the domain P2M pool. This is in-sync with the shadow_memory
++ option in xl.cfg. Leaving this field empty in device tree will lead to
++ the default size of domain P2M pool, i.e. 1MB per guest vCPU plus 4KB
++ per MB of guest RAM plus 512KB for guest extended regions.
++
+ Under the "xen,domain" compatible node, one or more sub-nodes are present
+ for the DomU kernel and ramdisk.
+
+diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c
+index 223ec9694df1..a5ffd952ecd0 100644
+--- a/xen/arch/arm/domain.c
++++ b/xen/arch/arm/domain.c
+@@ -985,6 +985,7 @@ enum {
+ PROG_page,
+ PROG_mapping,
+ PROG_p2m,
++ PROG_p2m_pool,
+ PROG_done,
+ };
+
+@@ -1044,6 +1045,11 @@ int domain_relinquish_resources(struct domain *d)
+ if ( ret )
+ return ret;
+
++ PROGRESS(p2m_pool):
++ ret = p2m_teardown_allocation(d);
++ if( ret )
++ return ret;
++
+ PROGRESS(done):
+ break;
+
+diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c
+index 26c13429488d..df0ec84f034c 100644
+--- a/xen/arch/arm/domain_build.c
++++ b/xen/arch/arm/domain_build.c
+@@ -2333,6 +2333,21 @@ static void __init find_gnttab_region(struct domain *d,
+ kinfo->gnttab_start, kinfo->gnttab_start + kinfo->gnttab_size);
+ }
+
++static unsigned long __init domain_p2m_pages(unsigned long maxmem_kb,
++ unsigned int smp_cpus)
++{
++ /*
++ * Keep in sync with libxl__get_required_paging_memory().
++ * 256 pages (1MB) per vcpu, plus 1 page per MiB of RAM for the P2M map,
++ * plus 128 pages to cover extended regions.
++ */
++ unsigned long memkb = 4 * (256 * smp_cpus + (maxmem_kb / 1024) + 128);
++
++ BUILD_BUG_ON(PAGE_SIZE != SZ_4K);
++
++ return DIV_ROUND_UP(memkb, 1024) << (20 - PAGE_SHIFT);
++}
++
+ static int __init construct_domain(struct domain *d, struct kernel_info *kinfo)
+ {
+ unsigned int i;
+@@ -2424,6 +2439,8 @@ static int __init construct_domU(struct domain *d,
+ struct kernel_info kinfo = {};
+ int rc;
+ u64 mem;
++ u32 p2m_mem_mb;
++ unsigned long p2m_pages;
+
+ rc = dt_property_read_u64(node, "memory", &mem);
+ if ( !rc )
+@@ -2433,6 +2450,18 @@ static int __init construct_domU(struct domain *d,
+ }
+ kinfo.unassigned_mem = (paddr_t)mem * SZ_1K;
+
++ rc = dt_property_read_u32(node, "xen,domain-p2m-mem-mb", &p2m_mem_mb);
++ /* If xen,domain-p2m-mem-mb is not specified, use the default value. */
++ p2m_pages = rc ?
++ p2m_mem_mb << (20 - PAGE_SHIFT) :
++ domain_p2m_pages(mem, d->max_vcpus);
++
++ spin_lock(&d->arch.paging.lock);
++ rc = p2m_set_allocation(d, p2m_pages, NULL);
++ spin_unlock(&d->arch.paging.lock);
++ if ( rc != 0 )
++ return rc;
++
+ printk("*** LOADING DOMU cpus=%u memory=%"PRIx64"KB ***\n", d->max_vcpus, mem);
+
+ kinfo.vpl011 = dt_property_read_bool(node, "vpl011");
+diff --git a/xen/arch/arm/domctl.c b/xen/arch/arm/domctl.c
+index a049bc7f3e52..4ab5ed4ab24d 100644
+--- a/xen/arch/arm/domctl.c
++++ b/xen/arch/arm/domctl.c
+@@ -48,6 +48,9 @@ static int handle_vuart_init(struct domain *d,
+ static long p2m_domctl(struct domain *d, struct xen_domctl_shadow_op *sc,
+ XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
+ {
++ long rc;
++ bool preempted = false;
++
+ if ( unlikely(d == current->domain) )
+ {
+ printk(XENLOG_ERR "Tried to do a p2m domctl op on itself.\n");
+@@ -64,9 +67,27 @@ static long p2m_domctl(struct domain *d, struct xen_domctl_shadow_op *sc,
+ switch ( sc->op )
+ {
+ case XEN_DOMCTL_SHADOW_OP_SET_ALLOCATION:
+- return 0;
++ {
++ /* Allow and handle preemption */
++ spin_lock(&d->arch.paging.lock);
++ rc = p2m_set_allocation(d, sc->mb << (20 - PAGE_SHIFT), &preempted);
++ spin_unlock(&d->arch.paging.lock);
++
++ if ( preempted )
++ /* Not finished. Set up to re-run the call. */
++ rc = hypercall_create_continuation(__HYPERVISOR_domctl, "h",
++ u_domctl);
++ else
++ /* Finished. Return the new allocation. */
++ sc->mb = p2m_get_allocation(d);
++
++ return rc;
++ }
+ case XEN_DOMCTL_SHADOW_OP_GET_ALLOCATION:
++ {
++ sc->mb = p2m_get_allocation(d);
+ return 0;
++ }
+ default:
+ {
+ printk(XENLOG_ERR "Bad p2m domctl op %u\n", sc->op);
+diff --git a/xen/arch/arm/p2m.c b/xen/arch/arm/p2m.c
+index 6883d8627702..c1055ff2a745 100644
+--- a/xen/arch/arm/p2m.c
++++ b/xen/arch/arm/p2m.c
+@@ -50,6 +50,54 @@ static uint64_t generate_vttbr(uint16_t vmid, mfn_t root_mfn)
+ return (mfn_to_maddr(root_mfn) | ((uint64_t)vmid << 48));
+ }
+
++static struct page_info *p2m_alloc_page(struct domain *d)
++{
++ struct page_info *pg;
++
++ spin_lock(&d->arch.paging.lock);
++ /*
++ * For hardware domain, there should be no limit in the number of pages that
++ * can be allocated, so that the kernel may take advantage of the extended
++ * regions. Hence, allocate p2m pages for hardware domains from heap.
++ */
++ if ( is_hardware_domain(d) )
++ {
++ pg = alloc_domheap_page(NULL, 0);
++ if ( pg == NULL )
++ {
++ printk(XENLOG_G_ERR "Failed to allocate P2M pages for hwdom.\n");
++ spin_unlock(&d->arch.paging.lock);
++ return NULL;
++ }
++ }
++ else
++ {
++ pg = page_list_remove_head(&d->arch.paging.p2m_freelist);
++ if ( unlikely(!pg) )
++ {
++ spin_unlock(&d->arch.paging.lock);
++ return NULL;
++ }
++ d->arch.paging.p2m_total_pages--;
++ }
++ spin_unlock(&d->arch.paging.lock);
++
++ return pg;
++}
++
++static void p2m_free_page(struct domain *d, struct page_info *pg)
++{
++ spin_lock(&d->arch.paging.lock);
++ if ( is_hardware_domain(d) )
++ free_domheap_page(pg);
++ else
++ {
++ d->arch.paging.p2m_total_pages++;
++ page_list_add_tail(pg, &d->arch.paging.p2m_freelist);
++ }
++ spin_unlock(&d->arch.paging.lock);
++}
++
+ /* Return the size of the pool, rounded up to the nearest MB */
+ unsigned int p2m_get_allocation(struct domain *d)
+ {
+@@ -751,7 +799,7 @@ static int p2m_create_table(struct p2m_domain *p2m, lpae_t *entry)
+
+ ASSERT(!p2m_is_valid(*entry));
+
+- page = alloc_domheap_page(NULL, 0);
++ page = p2m_alloc_page(p2m->domain);
+ if ( page == NULL )
+ return -ENOMEM;
+
+@@ -878,7 +926,7 @@ static void p2m_free_entry(struct p2m_domain *p2m,
+ pg = mfn_to_page(mfn);
+
+ page_list_del(pg, &p2m->pages);
+- free_domheap_page(pg);
++ p2m_free_page(p2m->domain, pg);
+ }
+
+ static bool p2m_split_superpage(struct p2m_domain *p2m, lpae_t *entry,
+@@ -902,7 +950,7 @@ static bool p2m_split_superpage(struct p2m_domain *p2m, lpae_t *entry,
+ ASSERT(level < target);
+ ASSERT(p2m_is_superpage(*entry, level));
+
+- page = alloc_domheap_page(NULL, 0);
++ page = p2m_alloc_page(p2m->domain);
+ if ( !page )
+ return false;
+
+@@ -1644,7 +1692,7 @@ int p2m_teardown(struct domain *d)
+
+ while ( (pg = page_list_remove_head(&p2m->pages)) )
+ {
+- free_domheap_page(pg);
++ p2m_free_page(p2m->domain, pg);
+ count++;
+ /* Arbitrarily preempt every 512 iterations */
+ if ( !(count % 512) && hypercall_preempt_check() )
+@@ -1668,6 +1716,7 @@ void p2m_final_teardown(struct domain *d)
+ return;
+
+ ASSERT(page_list_empty(&p2m->pages));
++ ASSERT(page_list_empty(&d->arch.paging.p2m_freelist));
+
+ if ( p2m->root )
+ free_domheap_pages(p2m->root, P2M_ROOT_ORDER);
+--
+2.37.3
+
diff --git a/0055-gnttab-correct-locking-on-transitive-grant-copy-erro.patch b/0055-gnttab-correct-locking-on-transitive-grant-copy-erro.patch
new file mode 100644
index 0000000..be83ce5
--- /dev/null
+++ b/0055-gnttab-correct-locking-on-transitive-grant-copy-erro.patch
@@ -0,0 +1,66 @@
+From bb43a10fefe494ab747b020fef3e823b63fc566d Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Tue, 11 Oct 2022 15:11:01 +0200
+Subject: [PATCH 55/67] gnttab: correct locking on transitive grant copy error
+ path
+
+While the comment next to the lock dropping in preparation of
+recursively calling acquire_grant_for_copy() mistakenly talks about the
+rd == td case (excluded a few lines further up), the same concerns apply
+to the calling of release_grant_for_copy() on a subsequent error path.
+
+This is CVE-2022-33748 / XSA-411.
+
+Fixes: ad48fb963dbf ("gnttab: fix transitive grant handling")
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+master commit: 6e3aab858eef614a21a782a3b73acc88e74690ea
+master date: 2022-10-11 14:29:30 +0200
+---
+ xen/common/grant_table.c | 19 ++++++++++++++++---
+ 1 file changed, 16 insertions(+), 3 deletions(-)
+
+diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c
+index 77bba9806937..0523beb9b734 100644
+--- a/xen/common/grant_table.c
++++ b/xen/common/grant_table.c
+@@ -2608,9 +2608,8 @@ acquire_grant_for_copy(
+ trans_domid);
+
+ /*
+- * acquire_grant_for_copy() could take the lock on the
+- * remote table (if rd == td), so we have to drop the lock
+- * here and reacquire.
++ * acquire_grant_for_copy() will take the lock on the remote table,
++ * so we have to drop the lock here and reacquire.
+ */
+ active_entry_release(act);
+ grant_read_unlock(rgt);
+@@ -2647,11 +2646,25 @@ acquire_grant_for_copy(
+ act->trans_gref != trans_gref ||
+ !act->is_sub_page)) )
+ {
++ /*
++ * Like above for acquire_grant_for_copy() we need to drop and then
++ * re-acquire the locks here to prevent lock order inversion issues.
++ * Unlike for acquire_grant_for_copy() we don't need to re-check
++ * anything, as release_grant_for_copy() doesn't depend on the grant
++ * table entry: It only updates internal state and the status flags.
++ */
++ active_entry_release(act);
++ grant_read_unlock(rgt);
++
+ release_grant_for_copy(td, trans_gref, readonly);
+ rcu_unlock_domain(td);
++
++ grant_read_lock(rgt);
++ act = active_entry_acquire(rgt, gref);
+ reduce_status_for_pin(rd, act, status, readonly);
+ active_entry_release(act);
+ grant_read_unlock(rgt);
++
+ put_page(*page);
+ *page = NULL;
+ return ERESTART;
+--
+2.37.3
+
diff --git a/0056-tools-libxl-Replace-deprecated-soundhw-on-QEMU-comma.patch b/0056-tools-libxl-Replace-deprecated-soundhw-on-QEMU-comma.patch
new file mode 100644
index 0000000..c5d2c9c
--- /dev/null
+++ b/0056-tools-libxl-Replace-deprecated-soundhw-on-QEMU-comma.patch
@@ -0,0 +1,112 @@
+From d65ebacb78901b695bc5e8a075ad1ad865a78928 Mon Sep 17 00:00:00 2001
+From: Anthony PERARD <anthony.perard@citrix.com>
+Date: Tue, 11 Oct 2022 15:13:15 +0200
+Subject: [PATCH 56/67] tools/libxl: Replace deprecated -soundhw on QEMU
+ command line
+
+-soundhw is deprecated since 825ff02911c9 ("audio: add soundhw
+deprecation notice"), QEMU v5.1, and is been remove for upcoming v7.1
+by 039a68373c45 ("introduce -audio as a replacement for -soundhw").
+
+Instead we can just add the sound card with "-device", for most option
+that "-soundhw" could handle. "-device" is an option that existed
+before QEMU 1.0, and could already be used to add audio hardware.
+
+The list of possible option for libxl's "soundhw" is taken the list
+from QEMU 7.0.
+
+The list of options for "soundhw" are listed in order of preference in
+the manual. The first three (hda, ac97, es1370) are PCI devices and
+easy to test on Linux, and the last four are ISA devices which doesn't
+seems to work out of the box on linux.
+
+The sound card 'pcspk' isn't listed even if it used to be accepted by
+'-soundhw' because QEMU crash when trying to add it to a Xen domain.
+Also, it wouldn't work with "-device" might need to be "-machine
+pcspk-audiodev=default" instead.
+
+Signed-off-by: Anthony PERARD <anthony.perard@citrix.com>
+Reviewed-by: Jason Andryuk <jandryuk@gmail.com>
+master commit: 62ca138c2c052187783aca3957d3f47c4dcfd683
+master date: 2022-08-18 09:25:50 +0200
+---
+ docs/man/xl.cfg.5.pod.in | 6 +++---
+ tools/libs/light/libxl_dm.c | 19 ++++++++++++++++++-
+ tools/libs/light/libxl_types_internal.idl | 10 ++++++++++
+ 3 files changed, 31 insertions(+), 4 deletions(-)
+
+diff --git a/docs/man/xl.cfg.5.pod.in b/docs/man/xl.cfg.5.pod.in
+index af7fae7c52f9..ef9505f91341 100644
+--- a/docs/man/xl.cfg.5.pod.in
++++ b/docs/man/xl.cfg.5.pod.in
+@@ -2523,9 +2523,9 @@ The form serial=DEVICE is also accepted for backwards compatibility.
+
+ =item B<soundhw="DEVICE">
+
+-Select the virtual sound card to expose to the guest. The valid
+-devices are defined by the device model configuration, please see the
+-B<qemu(1)> manpage for details. The default is not to export any sound
++Select the virtual sound card to expose to the guest. The valid devices are
++B<hda>, B<ac97>, B<es1370>, B<adlib>, B<cs4231a>, B<gus>, B<sb16> if there are
++available with the device model QEMU. The default is not to export any sound
+ device.
+
+ =item B<vkb_device=BOOLEAN>
+diff --git a/tools/libs/light/libxl_dm.c b/tools/libs/light/libxl_dm.c
+index ae5f35e0c3fd..b86e8ccc858f 100644
+--- a/tools/libs/light/libxl_dm.c
++++ b/tools/libs/light/libxl_dm.c
+@@ -1204,6 +1204,7 @@ static int libxl__build_device_model_args_new(libxl__gc *gc,
+ uint64_t ram_size;
+ const char *path, *chardev;
+ bool is_stubdom = libxl_defbool_val(b_info->device_model_stubdomain);
++ int rc;
+
+ dm_args = flexarray_make(gc, 16, 1);
+ dm_envs = flexarray_make(gc, 16, 1);
+@@ -1531,7 +1532,23 @@ static int libxl__build_device_model_args_new(libxl__gc *gc,
+ }
+ }
+ if (b_info->u.hvm.soundhw) {
+- flexarray_vappend(dm_args, "-soundhw", b_info->u.hvm.soundhw, NULL);
++ libxl__qemu_soundhw soundhw;
++
++ rc = libxl__qemu_soundhw_from_string(b_info->u.hvm.soundhw, &soundhw);
++ if (rc) {
++ LOGD(ERROR, guest_domid, "Unknown soundhw option '%s'", b_info->u.hvm.soundhw);
++ return ERROR_INVAL;
++ }
++
++ switch (soundhw) {
++ case LIBXL__QEMU_SOUNDHW_HDA:
++ flexarray_vappend(dm_args, "-device", "intel-hda",
++ "-device", "hda-duplex", NULL);
++ break;
++ default:
++ flexarray_append_pair(dm_args, "-device",
++ (char*)libxl__qemu_soundhw_to_string(soundhw));
++ }
+ }
+ if (!libxl__acpi_defbool_val(b_info)) {
+ flexarray_append(dm_args, "-no-acpi");
+diff --git a/tools/libs/light/libxl_types_internal.idl b/tools/libs/light/libxl_types_internal.idl
+index 3593e21dbb64..caa08d3229cd 100644
+--- a/tools/libs/light/libxl_types_internal.idl
++++ b/tools/libs/light/libxl_types_internal.idl
+@@ -55,3 +55,13 @@ libxl__device_action = Enumeration("device_action", [
+ (1, "ADD"),
+ (2, "REMOVE"),
+ ])
++
++libxl__qemu_soundhw = Enumeration("qemu_soundhw", [
++ (1, "ac97"),
++ (2, "adlib"),
++ (3, "cs4231a"),
++ (4, "es1370"),
++ (5, "gus"),
++ (6, "hda"),
++ (7, "sb16"),
++ ])
+--
+2.37.3
+
diff --git a/0057-x86-CPUID-surface-suitable-value-in-EBX-of-XSTATE-su.patch b/0057-x86-CPUID-surface-suitable-value-in-EBX-of-XSTATE-su.patch
new file mode 100644
index 0000000..9b1cce8
--- /dev/null
+++ b/0057-x86-CPUID-surface-suitable-value-in-EBX-of-XSTATE-su.patch
@@ -0,0 +1,44 @@
+From 7923ea47e578bca30a6e45951a9da09e827ff028 Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Tue, 11 Oct 2022 15:14:05 +0200
+Subject: [PATCH 57/67] x86/CPUID: surface suitable value in EBX of XSTATE
+ subleaf 1
+
+While the SDM isn't very clear about this, our present behavior make
+Linux 5.19 unhappy. As of commit 8ad7e8f69695 ("x86/fpu/xsave: Support
+XSAVEC in the kernel") they're using this CPUID output also to size
+the compacted area used by XSAVEC. Getting back zero there isn't really
+liked, yet for PV that's the default on capable hardware: XSAVES isn't
+exposed to PV domains.
+
+Considering that the size reported is that of the compacted save area,
+I view Linux'es assumption as appropriate (short of the SDM properly
+considering the case). Therefore we need to populate the field also when
+only XSAVEC is supported for a guest.
+
+Fixes: 460b9a4b3630 ("x86/xsaves: enable xsaves/xrstors for hvm guest")
+Fixes: 8d050ed1097c ("x86: don't expose XSAVES capability to PV guests")
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
+master commit: c3bd0b83ea5b7c0da6542687436042eeea1e7909
+master date: 2022-08-24 14:23:59 +0200
+---
+ xen/arch/x86/cpuid.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/xen/arch/x86/cpuid.c b/xen/arch/x86/cpuid.c
+index ee2c4ea03a89..11c95178f110 100644
+--- a/xen/arch/x86/cpuid.c
++++ b/xen/arch/x86/cpuid.c
+@@ -1052,7 +1052,7 @@ void guest_cpuid(const struct vcpu *v, uint32_t leaf,
+ switch ( subleaf )
+ {
+ case 1:
+- if ( p->xstate.xsaves )
++ if ( p->xstate.xsavec || p->xstate.xsaves )
+ {
+ /*
+ * TODO: Figure out what to do for XSS state. VT-x manages
+--
+2.37.3
+
diff --git a/0058-xen-sched-introduce-cpupool_update_node_affinity.patch b/0058-xen-sched-introduce-cpupool_update_node_affinity.patch
new file mode 100644
index 0000000..c15edb8
--- /dev/null
+++ b/0058-xen-sched-introduce-cpupool_update_node_affinity.patch
@@ -0,0 +1,257 @@
+From 735b10844489babf52d3193193285a7311cf2c39 Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Tue, 11 Oct 2022 15:14:22 +0200
+Subject: [PATCH 58/67] xen/sched: introduce cpupool_update_node_affinity()
+
+For updating the node affinities of all domains in a cpupool add a new
+function cpupool_update_node_affinity().
+
+In order to avoid multiple allocations of cpumasks carve out memory
+allocation and freeing from domain_update_node_affinity() into new
+helpers, which can be used by cpupool_update_node_affinity().
+
+Modify domain_update_node_affinity() to take an additional parameter
+for passing the allocated memory in and to allocate and free the memory
+via the new helpers in case NULL was passed.
+
+This will help later to pre-allocate the cpumasks in order to avoid
+allocations in stop-machine context.
+
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Tested-by: Andrew Cooper <andrew.cooper3@citrix.com>
+master commit: a83fa1e2b96ace65b45dde6954d67012633a082b
+master date: 2022-09-05 11:42:30 +0100
+---
+ xen/common/sched/core.c | 54 ++++++++++++++++++++++++++------------
+ xen/common/sched/cpupool.c | 39 +++++++++++++++------------
+ xen/common/sched/private.h | 7 +++++
+ xen/include/xen/sched.h | 9 ++++++-
+ 4 files changed, 74 insertions(+), 35 deletions(-)
+
+diff --git a/xen/common/sched/core.c b/xen/common/sched/core.c
+index f07bd2681fcb..065a83eca912 100644
+--- a/xen/common/sched/core.c
++++ b/xen/common/sched/core.c
+@@ -1824,9 +1824,28 @@ int vcpu_affinity_domctl(struct domain *d, uint32_t cmd,
+ return ret;
+ }
+
+-void domain_update_node_affinity(struct domain *d)
++bool alloc_affinity_masks(struct affinity_masks *affinity)
+ {
+- cpumask_var_t dom_cpumask, dom_cpumask_soft;
++ if ( !alloc_cpumask_var(&affinity->hard) )
++ return false;
++ if ( !alloc_cpumask_var(&affinity->soft) )
++ {
++ free_cpumask_var(affinity->hard);
++ return false;
++ }
++
++ return true;
++}
++
++void free_affinity_masks(struct affinity_masks *affinity)
++{
++ free_cpumask_var(affinity->soft);
++ free_cpumask_var(affinity->hard);
++}
++
++void domain_update_node_aff(struct domain *d, struct affinity_masks *affinity)
++{
++ struct affinity_masks masks;
+ cpumask_t *dom_affinity;
+ const cpumask_t *online;
+ struct sched_unit *unit;
+@@ -1836,14 +1855,16 @@ void domain_update_node_affinity(struct domain *d)
+ if ( !d->vcpu || !d->vcpu[0] )
+ return;
+
+- if ( !zalloc_cpumask_var(&dom_cpumask) )
+- return;
+- if ( !zalloc_cpumask_var(&dom_cpumask_soft) )
++ if ( !affinity )
+ {
+- free_cpumask_var(dom_cpumask);
+- return;
++ affinity = &masks;
++ if ( !alloc_affinity_masks(affinity) )
++ return;
+ }
+
++ cpumask_clear(affinity->hard);
++ cpumask_clear(affinity->soft);
++
+ online = cpupool_domain_master_cpumask(d);
+
+ spin_lock(&d->node_affinity_lock);
+@@ -1864,22 +1885,21 @@ void domain_update_node_affinity(struct domain *d)
+ */
+ for_each_sched_unit ( d, unit )
+ {
+- cpumask_or(dom_cpumask, dom_cpumask, unit->cpu_hard_affinity);
+- cpumask_or(dom_cpumask_soft, dom_cpumask_soft,
+- unit->cpu_soft_affinity);
++ cpumask_or(affinity->hard, affinity->hard, unit->cpu_hard_affinity);
++ cpumask_or(affinity->soft, affinity->soft, unit->cpu_soft_affinity);
+ }
+ /* Filter out non-online cpus */
+- cpumask_and(dom_cpumask, dom_cpumask, online);
+- ASSERT(!cpumask_empty(dom_cpumask));
++ cpumask_and(affinity->hard, affinity->hard, online);
++ ASSERT(!cpumask_empty(affinity->hard));
+ /* And compute the intersection between hard, online and soft */
+- cpumask_and(dom_cpumask_soft, dom_cpumask_soft, dom_cpumask);
++ cpumask_and(affinity->soft, affinity->soft, affinity->hard);
+
+ /*
+ * If not empty, the intersection of hard, soft and online is the
+ * narrowest set we want. If empty, we fall back to hard&online.
+ */
+- dom_affinity = cpumask_empty(dom_cpumask_soft) ?
+- dom_cpumask : dom_cpumask_soft;
++ dom_affinity = cpumask_empty(affinity->soft) ? affinity->hard
++ : affinity->soft;
+
+ nodes_clear(d->node_affinity);
+ for_each_cpu ( cpu, dom_affinity )
+@@ -1888,8 +1908,8 @@ void domain_update_node_affinity(struct domain *d)
+
+ spin_unlock(&d->node_affinity_lock);
+
+- free_cpumask_var(dom_cpumask_soft);
+- free_cpumask_var(dom_cpumask);
++ if ( affinity == &masks )
++ free_affinity_masks(affinity);
+ }
+
+ typedef long ret_t;
+diff --git a/xen/common/sched/cpupool.c b/xen/common/sched/cpupool.c
+index 8c6e6eb9ccd5..45b6ff99561a 100644
+--- a/xen/common/sched/cpupool.c
++++ b/xen/common/sched/cpupool.c
+@@ -401,6 +401,25 @@ int cpupool_move_domain(struct domain *d, struct cpupool *c)
+ return ret;
+ }
+
++/* Update affinities of all domains in a cpupool. */
++static void cpupool_update_node_affinity(const struct cpupool *c)
++{
++ struct affinity_masks masks;
++ struct domain *d;
++
++ if ( !alloc_affinity_masks(&masks) )
++ return;
++
++ rcu_read_lock(&domlist_read_lock);
++
++ for_each_domain_in_cpupool(d, c)
++ domain_update_node_aff(d, &masks);
++
++ rcu_read_unlock(&domlist_read_lock);
++
++ free_affinity_masks(&masks);
++}
++
+ /*
+ * assign a specific cpu to a cpupool
+ * cpupool_lock must be held
+@@ -408,7 +427,6 @@ int cpupool_move_domain(struct domain *d, struct cpupool *c)
+ static int cpupool_assign_cpu_locked(struct cpupool *c, unsigned int cpu)
+ {
+ int ret;
+- struct domain *d;
+ const cpumask_t *cpus;
+
+ cpus = sched_get_opt_cpumask(c->gran, cpu);
+@@ -433,12 +451,7 @@ static int cpupool_assign_cpu_locked(struct cpupool *c, unsigned int cpu)
+
+ rcu_read_unlock(&sched_res_rculock);
+
+- rcu_read_lock(&domlist_read_lock);
+- for_each_domain_in_cpupool(d, c)
+- {
+- domain_update_node_affinity(d);
+- }
+- rcu_read_unlock(&domlist_read_lock);
++ cpupool_update_node_affinity(c);
+
+ return 0;
+ }
+@@ -447,18 +460,14 @@ static int cpupool_unassign_cpu_finish(struct cpupool *c)
+ {
+ int cpu = cpupool_moving_cpu;
+ const cpumask_t *cpus;
+- struct domain *d;
+ int ret;
+
+ if ( c != cpupool_cpu_moving )
+ return -EADDRNOTAVAIL;
+
+- /*
+- * We need this for scanning the domain list, both in
+- * cpu_disable_scheduler(), and at the bottom of this function.
+- */
+ rcu_read_lock(&domlist_read_lock);
+ ret = cpu_disable_scheduler(cpu);
++ rcu_read_unlock(&domlist_read_lock);
+
+ rcu_read_lock(&sched_res_rculock);
+ cpus = get_sched_res(cpu)->cpus;
+@@ -485,11 +494,7 @@ static int cpupool_unassign_cpu_finish(struct cpupool *c)
+ }
+ rcu_read_unlock(&sched_res_rculock);
+
+- for_each_domain_in_cpupool(d, c)
+- {
+- domain_update_node_affinity(d);
+- }
+- rcu_read_unlock(&domlist_read_lock);
++ cpupool_update_node_affinity(c);
+
+ return ret;
+ }
+diff --git a/xen/common/sched/private.h b/xen/common/sched/private.h
+index 92d0d4961063..6e036f8c8077 100644
+--- a/xen/common/sched/private.h
++++ b/xen/common/sched/private.h
+@@ -593,6 +593,13 @@ affinity_balance_cpumask(const struct sched_unit *unit, int step,
+ cpumask_copy(mask, unit->cpu_hard_affinity);
+ }
+
++struct affinity_masks {
++ cpumask_var_t hard;
++ cpumask_var_t soft;
++};
++
++bool alloc_affinity_masks(struct affinity_masks *affinity);
++void free_affinity_masks(struct affinity_masks *affinity);
+ void sched_rm_cpu(unsigned int cpu);
+ const cpumask_t *sched_get_opt_cpumask(enum sched_gran opt, unsigned int cpu);
+ void schedule_dump(struct cpupool *c);
+diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
+index 701963f84cb8..4e25627d9685 100644
+--- a/xen/include/xen/sched.h
++++ b/xen/include/xen/sched.h
+@@ -649,8 +649,15 @@ static inline void get_knownalive_domain(struct domain *d)
+ ASSERT(!(atomic_read(&d->refcnt) & DOMAIN_DESTROYED));
+ }
+
++struct affinity_masks;
++
+ int domain_set_node_affinity(struct domain *d, const nodemask_t *affinity);
+-void domain_update_node_affinity(struct domain *d);
++void domain_update_node_aff(struct domain *d, struct affinity_masks *affinity);
++
++static inline void domain_update_node_affinity(struct domain *d)
++{
++ domain_update_node_aff(d, NULL);
++}
+
+ /*
+ * To be implemented by each architecture, sanity checking the configuration
+--
+2.37.3
+
diff --git a/0059-xen-sched-carve-out-memory-allocation-and-freeing-fr.patch b/0059-xen-sched-carve-out-memory-allocation-and-freeing-fr.patch
new file mode 100644
index 0000000..587eef7
--- /dev/null
+++ b/0059-xen-sched-carve-out-memory-allocation-and-freeing-fr.patch
@@ -0,0 +1,263 @@
+From d638c2085f71f694344b34e70eb1b371c86b00f0 Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Tue, 11 Oct 2022 15:15:14 +0200
+Subject: [PATCH 59/67] xen/sched: carve out memory allocation and freeing from
+ schedule_cpu_rm()
+
+In order to prepare not allocating or freeing memory from
+schedule_cpu_rm(), move this functionality to dedicated functions.
+
+For now call those functions from schedule_cpu_rm().
+
+No change of behavior expected.
+
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
+master commit: d42be6f83480b3ada286dc18444331a816be88a3
+master date: 2022-09-05 11:42:30 +0100
+---
+ xen/common/sched/core.c | 143 ++++++++++++++++++++++---------------
+ xen/common/sched/private.h | 11 +++
+ 2 files changed, 98 insertions(+), 56 deletions(-)
+
+diff --git a/xen/common/sched/core.c b/xen/common/sched/core.c
+index 065a83eca912..2decb1161a63 100644
+--- a/xen/common/sched/core.c
++++ b/xen/common/sched/core.c
+@@ -3221,6 +3221,75 @@ out:
+ return ret;
+ }
+
++/*
++ * Allocate all memory needed for free_cpu_rm_data(), as allocations cannot
++ * be made in stop_machine() context.
++ *
++ * Between alloc_cpu_rm_data() and the real cpu removal action the relevant
++ * contents of struct sched_resource can't change, as the cpu in question is
++ * locked against any other movement to or from cpupools, and the data copied
++ * by alloc_cpu_rm_data() is modified only in case the cpu in question is
++ * being moved from or to a cpupool.
++ */
++struct cpu_rm_data *alloc_cpu_rm_data(unsigned int cpu)
++{
++ struct cpu_rm_data *data;
++ const struct sched_resource *sr;
++ unsigned int idx;
++
++ rcu_read_lock(&sched_res_rculock);
++
++ sr = get_sched_res(cpu);
++ data = xmalloc_flex_struct(struct cpu_rm_data, sr, sr->granularity - 1);
++ if ( !data )
++ goto out;
++
++ data->old_ops = sr->scheduler;
++ data->vpriv_old = idle_vcpu[cpu]->sched_unit->priv;
++ data->ppriv_old = sr->sched_priv;
++
++ for ( idx = 0; idx < sr->granularity - 1; idx++ )
++ {
++ data->sr[idx] = sched_alloc_res();
++ if ( data->sr[idx] )
++ {
++ data->sr[idx]->sched_unit_idle = sched_alloc_unit_mem();
++ if ( !data->sr[idx]->sched_unit_idle )
++ {
++ sched_res_free(&data->sr[idx]->rcu);
++ data->sr[idx] = NULL;
++ }
++ }
++ if ( !data->sr[idx] )
++ {
++ while ( idx > 0 )
++ sched_res_free(&data->sr[--idx]->rcu);
++ XFREE(data);
++ goto out;
++ }
++
++ data->sr[idx]->curr = data->sr[idx]->sched_unit_idle;
++ data->sr[idx]->scheduler = &sched_idle_ops;
++ data->sr[idx]->granularity = 1;
++
++ /* We want the lock not to change when replacing the resource. */
++ data->sr[idx]->schedule_lock = sr->schedule_lock;
++ }
++
++ out:
++ rcu_read_unlock(&sched_res_rculock);
++
++ return data;
++}
++
++void free_cpu_rm_data(struct cpu_rm_data *mem, unsigned int cpu)
++{
++ sched_free_udata(mem->old_ops, mem->vpriv_old);
++ sched_free_pdata(mem->old_ops, mem->ppriv_old, cpu);
++
++ xfree(mem);
++}
++
+ /*
+ * Remove a pCPU from its cpupool. Its scheduler becomes &sched_idle_ops
+ * (the idle scheduler).
+@@ -3229,53 +3298,23 @@ out:
+ */
+ int schedule_cpu_rm(unsigned int cpu)
+ {
+- void *ppriv_old, *vpriv_old;
+- struct sched_resource *sr, **sr_new = NULL;
++ struct sched_resource *sr;
++ struct cpu_rm_data *data;
+ struct sched_unit *unit;
+- struct scheduler *old_ops;
+ spinlock_t *old_lock;
+ unsigned long flags;
+- int idx, ret = -ENOMEM;
++ int idx = 0;
+ unsigned int cpu_iter;
+
++ data = alloc_cpu_rm_data(cpu);
++ if ( !data )
++ return -ENOMEM;
++
+ rcu_read_lock(&sched_res_rculock);
+
+ sr = get_sched_res(cpu);
+- old_ops = sr->scheduler;
+
+- if ( sr->granularity > 1 )
+- {
+- sr_new = xmalloc_array(struct sched_resource *, sr->granularity - 1);
+- if ( !sr_new )
+- goto out;
+- for ( idx = 0; idx < sr->granularity - 1; idx++ )
+- {
+- sr_new[idx] = sched_alloc_res();
+- if ( sr_new[idx] )
+- {
+- sr_new[idx]->sched_unit_idle = sched_alloc_unit_mem();
+- if ( !sr_new[idx]->sched_unit_idle )
+- {
+- sched_res_free(&sr_new[idx]->rcu);
+- sr_new[idx] = NULL;
+- }
+- }
+- if ( !sr_new[idx] )
+- {
+- for ( idx--; idx >= 0; idx-- )
+- sched_res_free(&sr_new[idx]->rcu);
+- goto out;
+- }
+- sr_new[idx]->curr = sr_new[idx]->sched_unit_idle;
+- sr_new[idx]->scheduler = &sched_idle_ops;
+- sr_new[idx]->granularity = 1;
+-
+- /* We want the lock not to change when replacing the resource. */
+- sr_new[idx]->schedule_lock = sr->schedule_lock;
+- }
+- }
+-
+- ret = 0;
++ ASSERT(sr->granularity);
+ ASSERT(sr->cpupool != NULL);
+ ASSERT(cpumask_test_cpu(cpu, &cpupool_free_cpus));
+ ASSERT(!cpumask_test_cpu(cpu, sr->cpupool->cpu_valid));
+@@ -3283,10 +3322,6 @@ int schedule_cpu_rm(unsigned int cpu)
+ /* See comment in schedule_cpu_add() regarding lock switching. */
+ old_lock = pcpu_schedule_lock_irqsave(cpu, &flags);
+
+- vpriv_old = idle_vcpu[cpu]->sched_unit->priv;
+- ppriv_old = sr->sched_priv;
+-
+- idx = 0;
+ for_each_cpu ( cpu_iter, sr->cpus )
+ {
+ per_cpu(sched_res_idx, cpu_iter) = 0;
+@@ -3300,27 +3335,27 @@ int schedule_cpu_rm(unsigned int cpu)
+ else
+ {
+ /* Initialize unit. */
+- unit = sr_new[idx]->sched_unit_idle;
+- unit->res = sr_new[idx];
++ unit = data->sr[idx]->sched_unit_idle;
++ unit->res = data->sr[idx];
+ unit->is_running = true;
+ sched_unit_add_vcpu(unit, idle_vcpu[cpu_iter]);
+ sched_domain_insert_unit(unit, idle_vcpu[cpu_iter]->domain);
+
+ /* Adjust cpu masks of resources (old and new). */
+ cpumask_clear_cpu(cpu_iter, sr->cpus);
+- cpumask_set_cpu(cpu_iter, sr_new[idx]->cpus);
++ cpumask_set_cpu(cpu_iter, data->sr[idx]->cpus);
+ cpumask_set_cpu(cpu_iter, &sched_res_mask);
+
+ /* Init timer. */
+- init_timer(&sr_new[idx]->s_timer, s_timer_fn, NULL, cpu_iter);
++ init_timer(&data->sr[idx]->s_timer, s_timer_fn, NULL, cpu_iter);
+
+ /* Last resource initializations and insert resource pointer. */
+- sr_new[idx]->master_cpu = cpu_iter;
+- set_sched_res(cpu_iter, sr_new[idx]);
++ data->sr[idx]->master_cpu = cpu_iter;
++ set_sched_res(cpu_iter, data->sr[idx]);
+
+ /* Last action: set the new lock pointer. */
+ smp_mb();
+- sr_new[idx]->schedule_lock = &sched_free_cpu_lock;
++ data->sr[idx]->schedule_lock = &sched_free_cpu_lock;
+
+ idx++;
+ }
+@@ -3336,16 +3371,12 @@ int schedule_cpu_rm(unsigned int cpu)
+ /* _Not_ pcpu_schedule_unlock(): schedule_lock may have changed! */
+ spin_unlock_irqrestore(old_lock, flags);
+
+- sched_deinit_pdata(old_ops, ppriv_old, cpu);
++ sched_deinit_pdata(data->old_ops, data->ppriv_old, cpu);
+
+- sched_free_udata(old_ops, vpriv_old);
+- sched_free_pdata(old_ops, ppriv_old, cpu);
+-
+-out:
+ rcu_read_unlock(&sched_res_rculock);
+- xfree(sr_new);
++ free_cpu_rm_data(data, cpu);
+
+- return ret;
++ return 0;
+ }
+
+ struct scheduler *scheduler_get_default(void)
+diff --git a/xen/common/sched/private.h b/xen/common/sched/private.h
+index 6e036f8c8077..ff3185425219 100644
+--- a/xen/common/sched/private.h
++++ b/xen/common/sched/private.h
+@@ -600,6 +600,15 @@ struct affinity_masks {
+
+ bool alloc_affinity_masks(struct affinity_masks *affinity);
+ void free_affinity_masks(struct affinity_masks *affinity);
++
++/* Memory allocation related data for schedule_cpu_rm(). */
++struct cpu_rm_data {
++ const struct scheduler *old_ops;
++ void *ppriv_old;
++ void *vpriv_old;
++ struct sched_resource *sr[];
++};
++
+ void sched_rm_cpu(unsigned int cpu);
+ const cpumask_t *sched_get_opt_cpumask(enum sched_gran opt, unsigned int cpu);
+ void schedule_dump(struct cpupool *c);
+@@ -608,6 +617,8 @@ struct scheduler *scheduler_alloc(unsigned int sched_id);
+ void scheduler_free(struct scheduler *sched);
+ int cpu_disable_scheduler(unsigned int cpu);
+ int schedule_cpu_add(unsigned int cpu, struct cpupool *c);
++struct cpu_rm_data *alloc_cpu_rm_data(unsigned int cpu);
++void free_cpu_rm_data(struct cpu_rm_data *mem, unsigned int cpu);
+ int schedule_cpu_rm(unsigned int cpu);
+ int sched_move_domain(struct domain *d, struct cpupool *c);
+ struct cpupool *cpupool_get_by_id(unsigned int poolid);
+--
+2.37.3
+
diff --git a/0060-xen-sched-fix-cpu-hotplug.patch b/0060-xen-sched-fix-cpu-hotplug.patch
new file mode 100644
index 0000000..3e158f4
--- /dev/null
+++ b/0060-xen-sched-fix-cpu-hotplug.patch
@@ -0,0 +1,307 @@
+From d17680808b4c8015e31070c971e1ee548170ae34 Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Tue, 11 Oct 2022 15:15:41 +0200
+Subject: [PATCH 60/67] xen/sched: fix cpu hotplug
+
+Cpu unplugging is calling schedule_cpu_rm() via stop_machine_run() with
+interrupts disabled, thus any memory allocation or freeing must be
+avoided.
+
+Since commit 5047cd1d5dea ("xen/common: Use enhanced
+ASSERT_ALLOC_CONTEXT in xmalloc()") this restriction is being enforced
+via an assertion, which will now fail.
+
+Fix this by allocating needed memory before entering stop_machine_run()
+and freeing any memory only after having finished stop_machine_run().
+
+Fixes: 1ec410112cdd ("xen/sched: support differing granularity in schedule_cpu_[add/rm]()")
+Reported-by: Gao Ruifeng <ruifeng.gao@intel.com>
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Tested-by: Andrew Cooper <andrew.cooper3@citrix.com>
+master commit: d84473689611eed32fd90b27e614f28af767fa3f
+master date: 2022-09-05 11:42:30 +0100
+---
+ xen/common/sched/core.c | 25 +++++++++++---
+ xen/common/sched/cpupool.c | 69 +++++++++++++++++++++++++++++---------
+ xen/common/sched/private.h | 5 +--
+ 3 files changed, 77 insertions(+), 22 deletions(-)
+
+diff --git a/xen/common/sched/core.c b/xen/common/sched/core.c
+index 2decb1161a63..900aab8f66a7 100644
+--- a/xen/common/sched/core.c
++++ b/xen/common/sched/core.c
+@@ -3231,7 +3231,7 @@ out:
+ * by alloc_cpu_rm_data() is modified only in case the cpu in question is
+ * being moved from or to a cpupool.
+ */
+-struct cpu_rm_data *alloc_cpu_rm_data(unsigned int cpu)
++struct cpu_rm_data *alloc_cpu_rm_data(unsigned int cpu, bool aff_alloc)
+ {
+ struct cpu_rm_data *data;
+ const struct sched_resource *sr;
+@@ -3244,6 +3244,17 @@ struct cpu_rm_data *alloc_cpu_rm_data(unsigned int cpu)
+ if ( !data )
+ goto out;
+
++ if ( aff_alloc )
++ {
++ if ( !alloc_affinity_masks(&data->affinity) )
++ {
++ XFREE(data);
++ goto out;
++ }
++ }
++ else
++ memset(&data->affinity, 0, sizeof(data->affinity));
++
+ data->old_ops = sr->scheduler;
+ data->vpriv_old = idle_vcpu[cpu]->sched_unit->priv;
+ data->ppriv_old = sr->sched_priv;
+@@ -3264,6 +3275,7 @@ struct cpu_rm_data *alloc_cpu_rm_data(unsigned int cpu)
+ {
+ while ( idx > 0 )
+ sched_res_free(&data->sr[--idx]->rcu);
++ free_affinity_masks(&data->affinity);
+ XFREE(data);
+ goto out;
+ }
+@@ -3286,6 +3298,7 @@ void free_cpu_rm_data(struct cpu_rm_data *mem, unsigned int cpu)
+ {
+ sched_free_udata(mem->old_ops, mem->vpriv_old);
+ sched_free_pdata(mem->old_ops, mem->ppriv_old, cpu);
++ free_affinity_masks(&mem->affinity);
+
+ xfree(mem);
+ }
+@@ -3296,17 +3309,18 @@ void free_cpu_rm_data(struct cpu_rm_data *mem, unsigned int cpu)
+ * The cpu is already marked as "free" and not valid any longer for its
+ * cpupool.
+ */
+-int schedule_cpu_rm(unsigned int cpu)
++int schedule_cpu_rm(unsigned int cpu, struct cpu_rm_data *data)
+ {
+ struct sched_resource *sr;
+- struct cpu_rm_data *data;
+ struct sched_unit *unit;
+ spinlock_t *old_lock;
+ unsigned long flags;
+ int idx = 0;
+ unsigned int cpu_iter;
++ bool free_data = !data;
+
+- data = alloc_cpu_rm_data(cpu);
++ if ( !data )
++ data = alloc_cpu_rm_data(cpu, false);
+ if ( !data )
+ return -ENOMEM;
+
+@@ -3374,7 +3388,8 @@ int schedule_cpu_rm(unsigned int cpu)
+ sched_deinit_pdata(data->old_ops, data->ppriv_old, cpu);
+
+ rcu_read_unlock(&sched_res_rculock);
+- free_cpu_rm_data(data, cpu);
++ if ( free_data )
++ free_cpu_rm_data(data, cpu);
+
+ return 0;
+ }
+diff --git a/xen/common/sched/cpupool.c b/xen/common/sched/cpupool.c
+index 45b6ff99561a..b5a948639aad 100644
+--- a/xen/common/sched/cpupool.c
++++ b/xen/common/sched/cpupool.c
+@@ -402,22 +402,28 @@ int cpupool_move_domain(struct domain *d, struct cpupool *c)
+ }
+
+ /* Update affinities of all domains in a cpupool. */
+-static void cpupool_update_node_affinity(const struct cpupool *c)
++static void cpupool_update_node_affinity(const struct cpupool *c,
++ struct affinity_masks *masks)
+ {
+- struct affinity_masks masks;
++ struct affinity_masks local_masks;
+ struct domain *d;
+
+- if ( !alloc_affinity_masks(&masks) )
+- return;
++ if ( !masks )
++ {
++ if ( !alloc_affinity_masks(&local_masks) )
++ return;
++ masks = &local_masks;
++ }
+
+ rcu_read_lock(&domlist_read_lock);
+
+ for_each_domain_in_cpupool(d, c)
+- domain_update_node_aff(d, &masks);
++ domain_update_node_aff(d, masks);
+
+ rcu_read_unlock(&domlist_read_lock);
+
+- free_affinity_masks(&masks);
++ if ( masks == &local_masks )
++ free_affinity_masks(masks);
+ }
+
+ /*
+@@ -451,15 +457,17 @@ static int cpupool_assign_cpu_locked(struct cpupool *c, unsigned int cpu)
+
+ rcu_read_unlock(&sched_res_rculock);
+
+- cpupool_update_node_affinity(c);
++ cpupool_update_node_affinity(c, NULL);
+
+ return 0;
+ }
+
+-static int cpupool_unassign_cpu_finish(struct cpupool *c)
++static int cpupool_unassign_cpu_finish(struct cpupool *c,
++ struct cpu_rm_data *mem)
+ {
+ int cpu = cpupool_moving_cpu;
+ const cpumask_t *cpus;
++ struct affinity_masks *masks = mem ? &mem->affinity : NULL;
+ int ret;
+
+ if ( c != cpupool_cpu_moving )
+@@ -482,7 +490,7 @@ static int cpupool_unassign_cpu_finish(struct cpupool *c)
+ */
+ if ( !ret )
+ {
+- ret = schedule_cpu_rm(cpu);
++ ret = schedule_cpu_rm(cpu, mem);
+ if ( ret )
+ cpumask_andnot(&cpupool_free_cpus, &cpupool_free_cpus, cpus);
+ else
+@@ -494,7 +502,7 @@ static int cpupool_unassign_cpu_finish(struct cpupool *c)
+ }
+ rcu_read_unlock(&sched_res_rculock);
+
+- cpupool_update_node_affinity(c);
++ cpupool_update_node_affinity(c, masks);
+
+ return ret;
+ }
+@@ -558,7 +566,7 @@ static long cpupool_unassign_cpu_helper(void *info)
+ cpupool_cpu_moving->cpupool_id, cpupool_moving_cpu);
+ spin_lock(&cpupool_lock);
+
+- ret = cpupool_unassign_cpu_finish(c);
++ ret = cpupool_unassign_cpu_finish(c, NULL);
+
+ spin_unlock(&cpupool_lock);
+ debugtrace_printk("cpupool_unassign_cpu ret=%ld\n", ret);
+@@ -701,7 +709,7 @@ static int cpupool_cpu_add(unsigned int cpu)
+ * This function is called in stop_machine context, so we can be sure no
+ * non-idle vcpu is active on the system.
+ */
+-static void cpupool_cpu_remove(unsigned int cpu)
++static void cpupool_cpu_remove(unsigned int cpu, struct cpu_rm_data *mem)
+ {
+ int ret;
+
+@@ -709,7 +717,7 @@ static void cpupool_cpu_remove(unsigned int cpu)
+
+ if ( !cpumask_test_cpu(cpu, &cpupool_free_cpus) )
+ {
+- ret = cpupool_unassign_cpu_finish(cpupool0);
++ ret = cpupool_unassign_cpu_finish(cpupool0, mem);
+ BUG_ON(ret);
+ }
+ cpumask_clear_cpu(cpu, &cpupool_free_cpus);
+@@ -775,7 +783,7 @@ static void cpupool_cpu_remove_forced(unsigned int cpu)
+ {
+ ret = cpupool_unassign_cpu_start(c, master_cpu);
+ BUG_ON(ret);
+- ret = cpupool_unassign_cpu_finish(c);
++ ret = cpupool_unassign_cpu_finish(c, NULL);
+ BUG_ON(ret);
+ }
+ }
+@@ -993,12 +1001,24 @@ void dump_runq(unsigned char key)
+ static int cpu_callback(
+ struct notifier_block *nfb, unsigned long action, void *hcpu)
+ {
++ static struct cpu_rm_data *mem;
++
+ unsigned int cpu = (unsigned long)hcpu;
+ int rc = 0;
+
+ switch ( action )
+ {
+ case CPU_DOWN_FAILED:
++ if ( system_state <= SYS_STATE_active )
++ {
++ if ( mem )
++ {
++ free_cpu_rm_data(mem, cpu);
++ mem = NULL;
++ }
++ rc = cpupool_cpu_add(cpu);
++ }
++ break;
+ case CPU_ONLINE:
+ if ( system_state <= SYS_STATE_active )
+ rc = cpupool_cpu_add(cpu);
+@@ -1006,12 +1026,31 @@ static int cpu_callback(
+ case CPU_DOWN_PREPARE:
+ /* Suspend/Resume don't change assignments of cpus to cpupools. */
+ if ( system_state <= SYS_STATE_active )
++ {
+ rc = cpupool_cpu_remove_prologue(cpu);
++ if ( !rc )
++ {
++ ASSERT(!mem);
++ mem = alloc_cpu_rm_data(cpu, true);
++ rc = mem ? 0 : -ENOMEM;
++ }
++ }
+ break;
+ case CPU_DYING:
+ /* Suspend/Resume don't change assignments of cpus to cpupools. */
+ if ( system_state <= SYS_STATE_active )
+- cpupool_cpu_remove(cpu);
++ {
++ ASSERT(mem);
++ cpupool_cpu_remove(cpu, mem);
++ }
++ break;
++ case CPU_DEAD:
++ if ( system_state <= SYS_STATE_active )
++ {
++ ASSERT(mem);
++ free_cpu_rm_data(mem, cpu);
++ mem = NULL;
++ }
+ break;
+ case CPU_RESUME_FAILED:
+ cpupool_cpu_remove_forced(cpu);
+diff --git a/xen/common/sched/private.h b/xen/common/sched/private.h
+index ff3185425219..3bab78ccb240 100644
+--- a/xen/common/sched/private.h
++++ b/xen/common/sched/private.h
+@@ -603,6 +603,7 @@ void free_affinity_masks(struct affinity_masks *affinity);
+
+ /* Memory allocation related data for schedule_cpu_rm(). */
+ struct cpu_rm_data {
++ struct affinity_masks affinity;
+ const struct scheduler *old_ops;
+ void *ppriv_old;
+ void *vpriv_old;
+@@ -617,9 +618,9 @@ struct scheduler *scheduler_alloc(unsigned int sched_id);
+ void scheduler_free(struct scheduler *sched);
+ int cpu_disable_scheduler(unsigned int cpu);
+ int schedule_cpu_add(unsigned int cpu, struct cpupool *c);
+-struct cpu_rm_data *alloc_cpu_rm_data(unsigned int cpu);
++struct cpu_rm_data *alloc_cpu_rm_data(unsigned int cpu, bool aff_alloc);
+ void free_cpu_rm_data(struct cpu_rm_data *mem, unsigned int cpu);
+-int schedule_cpu_rm(unsigned int cpu);
++int schedule_cpu_rm(unsigned int cpu, struct cpu_rm_data *mem);
+ int sched_move_domain(struct domain *d, struct cpupool *c);
+ struct cpupool *cpupool_get_by_id(unsigned int poolid);
+ void cpupool_put(struct cpupool *pool);
+--
+2.37.3
+
diff --git a/0061-Config.mk-correct-PIE-related-option-s-in-EMBEDDED_E.patch b/0061-Config.mk-correct-PIE-related-option-s-in-EMBEDDED_E.patch
new file mode 100644
index 0000000..0f044b2
--- /dev/null
+++ b/0061-Config.mk-correct-PIE-related-option-s-in-EMBEDDED_E.patch
@@ -0,0 +1,58 @@
+From 19cf28b515f21da02df80e68f901ad7650daaa37 Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Tue, 11 Oct 2022 15:15:55 +0200
+Subject: [PATCH 61/67] Config.mk: correct PIE-related option(s) in
+ EMBEDDED_EXTRA_CFLAGS
+
+I haven't been able to find evidence of "-nopie" ever having been a
+supported compiler option. The correct spelling is "-no-pie".
+Furthermore like "-pie" this is an option which is solely passed to the
+linker. The compiler only recognizes "-fpie" / "-fPIE" / "-fno-pie", and
+it doesn't infer these options from "-pie" / "-no-pie".
+
+Add the compiler recognized form, but for the possible case of the
+variable also being used somewhere for linking keep the linker option as
+well (with corrected spelling).
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Acked-by: Julien Grall <jgrall@amazon.com>
+
+Build: Drop -no-pie from EMBEDDED_EXTRA_CFLAGS
+
+This breaks all Clang builds, as demostrated by Gitlab CI.
+
+Contrary to the description in ecd6b9759919, -no-pie is not even an option
+passed to the linker. GCC's actual behaviour is to inhibit the passing of
+-pie to the linker, as well as selecting different cr0 artefacts to be linked.
+
+EMBEDDED_EXTRA_CFLAGS is not used for $(CC)-doing-linking, and not liable to
+gain such a usecase.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Acked-by: Jan Beulich <jbeulich@suse.com>
+Tested-by: Stefano Stabellini <sstabellini@kernel.org>
+Fixes: ecd6b9759919 ("Config.mk: correct PIE-related option(s) in EMBEDDED_EXTRA_CFLAGS")
+master commit: ecd6b9759919fa6335b0be1b5fc5cce29a30c4f1
+master date: 2022-09-08 09:25:26 +0200
+master commit: 13a7c0074ac8fb31f6c0485429b7a20a1946cb22
+master date: 2022-09-27 15:40:42 -0700
+---
+ Config.mk | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/Config.mk b/Config.mk
+index 96d89b2f7dfc..9f87608f6602 100644
+--- a/Config.mk
++++ b/Config.mk
+@@ -203,7 +203,7 @@ endif
+ APPEND_LDFLAGS += $(foreach i, $(APPEND_LIB), -L$(i))
+ APPEND_CFLAGS += $(foreach i, $(APPEND_INCLUDES), -I$(i))
+
+-EMBEDDED_EXTRA_CFLAGS := -nopie -fno-stack-protector -fno-stack-protector-all
++EMBEDDED_EXTRA_CFLAGS := -fno-pie -fno-stack-protector -fno-stack-protector-all
+ EMBEDDED_EXTRA_CFLAGS += -fno-exceptions -fno-asynchronous-unwind-tables
+
+ XEN_EXTFILES_URL ?= http://xenbits.xen.org/xen-extfiles
+--
+2.37.3
+
diff --git a/0062-tools-xenstore-minor-fix-of-the-migration-stream-doc.patch b/0062-tools-xenstore-minor-fix-of-the-migration-stream-doc.patch
new file mode 100644
index 0000000..65882a9
--- /dev/null
+++ b/0062-tools-xenstore-minor-fix-of-the-migration-stream-doc.patch
@@ -0,0 +1,41 @@
+From 182f8bb503b9dd3db5dd9118dc763d241787c6fc Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Tue, 11 Oct 2022 15:16:09 +0200
+Subject: [PATCH 62/67] tools/xenstore: minor fix of the migration stream doc
+
+Drop mentioning the non-existent read-only socket in the migration
+stream description document.
+
+The related record field was removed in commit 8868a0e3f674 ("docs:
+update the xenstore migration stream documentation).
+
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Acked-by: Julien Grall <jgrall@amazon.com>
+master commit: ace1d2eff80d3d66c37ae765dae3e3cb5697e5a4
+master date: 2022-09-08 09:25:58 +0200
+---
+ docs/designs/xenstore-migration.md | 8 +++-----
+ 1 file changed, 3 insertions(+), 5 deletions(-)
+
+diff --git a/docs/designs/xenstore-migration.md b/docs/designs/xenstore-migration.md
+index 5f1155273ec3..78530bbb0ef4 100644
+--- a/docs/designs/xenstore-migration.md
++++ b/docs/designs/xenstore-migration.md
+@@ -129,11 +129,9 @@ xenstored state that needs to be restored.
+ | `evtchn-fd` | The file descriptor used to communicate with |
+ | | the event channel driver |
+
+-xenstored will resume in the original process context. Hence `rw-socket-fd` and
+-`ro-socket-fd` simply specify the file descriptors of the sockets. Sockets
+-are not always used, however, and so -1 will be used to denote an unused
+-socket.
+-
++xenstored will resume in the original process context. Hence `rw-socket-fd`
++simply specifies the file descriptor of the socket. Sockets are not always
++used, however, and so -1 will be used to denote an unused socket.
+
+ \pagebreak
+
+--
+2.37.3
+
diff --git a/0063-xen-gnttab-fix-gnttab_acquire_resource.patch b/0063-xen-gnttab-fix-gnttab_acquire_resource.patch
new file mode 100644
index 0000000..0d58157
--- /dev/null
+++ b/0063-xen-gnttab-fix-gnttab_acquire_resource.patch
@@ -0,0 +1,69 @@
+From 3ac64b3751837a117ee3dfb3e2cc27057a83d0f7 Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Tue, 11 Oct 2022 15:16:53 +0200
+Subject: [PATCH 63/67] xen/gnttab: fix gnttab_acquire_resource()
+
+Commit 9dc46386d89d ("gnttab: work around "may be used uninitialized"
+warning") was wrong, as vaddrs can legitimately be NULL in case
+XENMEM_resource_grant_table_id_status was specified for a grant table
+v1. This would result in crashes in debug builds due to
+ASSERT_UNREACHABLE() triggering.
+
+Check vaddrs only to be NULL in the rc == 0 case.
+
+Expand the tests in tools/tests/resource to tickle this path, and verify that
+using XENMEM_resource_grant_table_id_status on a v1 grant table fails.
+
+Fixes: 9dc46386d89d ("gnttab: work around "may be used uninitialized" warning")
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com> # xen
+Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
+master commit: 52daa6a8483e4fbd6757c9d1b791e23931791608
+master date: 2022-09-09 16:28:38 +0100
+---
+ tools/tests/resource/test-resource.c | 15 +++++++++++++++
+ xen/common/grant_table.c | 2 +-
+ 2 files changed, 16 insertions(+), 1 deletion(-)
+
+diff --git a/tools/tests/resource/test-resource.c b/tools/tests/resource/test-resource.c
+index 1caaa60e62d9..bf485baff2b4 100644
+--- a/tools/tests/resource/test-resource.c
++++ b/tools/tests/resource/test-resource.c
+@@ -63,6 +63,21 @@ static void test_gnttab(uint32_t domid, unsigned int nr_frames)
+ rc = xenforeignmemory_unmap_resource(fh, res);
+ if ( rc )
+ return fail(" Fail: Unmap %d - %s\n", errno, strerror(errno));
++
++ /*
++ * Verify that an attempt to map the status frames fails, as the domain is
++ * in gnttab v1 mode.
++ */
++ res = xenforeignmemory_map_resource(
++ fh, domid, XENMEM_resource_grant_table,
++ XENMEM_resource_grant_table_id_status, 0, 1,
++ (void **)&gnttab, PROT_READ | PROT_WRITE, 0);
++
++ if ( res )
++ {
++ fail(" Fail: Managed to map gnttab v2 status frames in v1 mode\n");
++ xenforeignmemory_unmap_resource(fh, res);
++ }
+ }
+
+ static void test_domain_configurations(void)
+diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c
+index 0523beb9b734..01e426c67fb6 100644
+--- a/xen/common/grant_table.c
++++ b/xen/common/grant_table.c
+@@ -4138,7 +4138,7 @@ int gnttab_acquire_resource(
+ * on non-error paths, and hence it needs setting to NULL at the top of the
+ * function. Leave some runtime safety.
+ */
+- if ( !vaddrs )
++ if ( !rc && !vaddrs )
+ {
+ ASSERT_UNREACHABLE();
+ rc = -ENODATA;
+--
+2.37.3
+
diff --git a/0064-x86-wire-up-VCPUOP_register_vcpu_time_memory_area-fo.patch b/0064-x86-wire-up-VCPUOP_register_vcpu_time_memory_area-fo.patch
new file mode 100644
index 0000000..4246b01
--- /dev/null
+++ b/0064-x86-wire-up-VCPUOP_register_vcpu_time_memory_area-fo.patch
@@ -0,0 +1,59 @@
+From 62e534d17cdd838828bfd75d3d845e31524dd336 Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Tue, 11 Oct 2022 15:17:12 +0200
+Subject: [PATCH 64/67] x86: wire up VCPUOP_register_vcpu_time_memory_area for
+ 32-bit guests
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Forever sinced its introduction VCPUOP_register_vcpu_time_memory_area
+was available only to native domains. Linux, for example, would attempt
+to use it irrespective of guest bitness (including in its so called
+PVHVM mode) as long as it finds XEN_PVCLOCK_TSC_STABLE_BIT set (which we
+set only for clocksource=tsc, which in turn needs engaging via command
+line option).
+
+Fixes: a5d39947cb89 ("Allow guests to register secondary vcpu_time_info")
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Acked-by: Roger Pau Monné <roger.pau@citrix.com>
+master commit: b726541d94bd0a80b5864d17a2cd2e6d73a3fe0a
+master date: 2022-09-29 14:47:45 +0200
+---
+ xen/arch/x86/x86_64/domain.c | 20 ++++++++++++++++++++
+ 1 file changed, 20 insertions(+)
+
+diff --git a/xen/arch/x86/x86_64/domain.c b/xen/arch/x86/x86_64/domain.c
+index c46dccc25a54..d51d99344796 100644
+--- a/xen/arch/x86/x86_64/domain.c
++++ b/xen/arch/x86/x86_64/domain.c
+@@ -54,6 +54,26 @@ arch_compat_vcpu_op(
+ break;
+ }
+
++ case VCPUOP_register_vcpu_time_memory_area:
++ {
++ struct compat_vcpu_register_time_memory_area area = { .addr.p = 0 };
++
++ rc = -EFAULT;
++ if ( copy_from_guest(&area.addr.h, arg, 1) )
++ break;
++
++ if ( area.addr.h.c != area.addr.p ||
++ !compat_handle_okay(area.addr.h, 1) )
++ break;
++
++ rc = 0;
++ guest_from_compat_handle(v->arch.time_info_guest, area.addr.h);
++
++ force_update_vcpu_system_time(v);
++
++ break;
++ }
++
+ case VCPUOP_get_physid:
+ rc = arch_do_vcpu_op(cmd, v, arg);
+ break;
+--
+2.37.3
+
diff --git a/0065-x86-vpmu-Fix-race-condition-in-vpmu_load.patch b/0065-x86-vpmu-Fix-race-condition-in-vpmu_load.patch
new file mode 100644
index 0000000..df4fb38
--- /dev/null
+++ b/0065-x86-vpmu-Fix-race-condition-in-vpmu_load.patch
@@ -0,0 +1,97 @@
+From 9690bb261d5fa09cb281e1fa124d93db7b84fda5 Mon Sep 17 00:00:00 2001
+From: Tamas K Lengyel <tamas.lengyel@intel.com>
+Date: Tue, 11 Oct 2022 15:17:42 +0200
+Subject: [PATCH 65/67] x86/vpmu: Fix race-condition in vpmu_load
+
+The vPMU code-bases attempts to perform an optimization on saving/reloading the
+PMU context by keeping track of what vCPU ran on each pCPU. When a pCPU is
+getting scheduled, checks if the previous vCPU isn't the current one. If so,
+attempts a call to vpmu_save_force. Unfortunately if the previous vCPU is
+already getting scheduled to run on another pCPU its state will be already
+runnable, which results in an ASSERT failure.
+
+Fix this by always performing a pmu context save in vpmu_save when called from
+vpmu_switch_from, and do a vpmu_load when called from vpmu_switch_to.
+
+While this presents a minimal overhead in case the same vCPU is getting
+rescheduled on the same pCPU, the ASSERT failure is avoided and the code is a
+lot easier to reason about.
+
+Signed-off-by: Tamas K Lengyel <tamas.lengyel@intel.com>
+Acked-by: Jan Beulich <jbeulich@suse.com>
+master commit: defa4e51d20a143bdd4395a075bf0933bb38a9a4
+master date: 2022-09-30 09:53:49 +0200
+---
+ xen/arch/x86/cpu/vpmu.c | 42 ++++-------------------------------------
+ 1 file changed, 4 insertions(+), 38 deletions(-)
+
+diff --git a/xen/arch/x86/cpu/vpmu.c b/xen/arch/x86/cpu/vpmu.c
+index fb1b296a6cc1..800eff87dc03 100644
+--- a/xen/arch/x86/cpu/vpmu.c
++++ b/xen/arch/x86/cpu/vpmu.c
+@@ -364,58 +364,24 @@ void vpmu_save(struct vcpu *v)
+ vpmu->last_pcpu = pcpu;
+ per_cpu(last_vcpu, pcpu) = v;
+
++ vpmu_set(vpmu, VPMU_CONTEXT_SAVE);
++
+ if ( vpmu->arch_vpmu_ops )
+ if ( vpmu->arch_vpmu_ops->arch_vpmu_save(v, 0) )
+ vpmu_reset(vpmu, VPMU_CONTEXT_LOADED);
+
++ vpmu_reset(vpmu, VPMU_CONTEXT_SAVE);
++
+ apic_write(APIC_LVTPC, PMU_APIC_VECTOR | APIC_LVT_MASKED);
+ }
+
+ int vpmu_load(struct vcpu *v, bool_t from_guest)
+ {
+ struct vpmu_struct *vpmu = vcpu_vpmu(v);
+- int pcpu = smp_processor_id();
+- struct vcpu *prev = NULL;
+
+ if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) )
+ return 0;
+
+- /* First time this VCPU is running here */
+- if ( vpmu->last_pcpu != pcpu )
+- {
+- /*
+- * Get the context from last pcpu that we ran on. Note that if another
+- * VCPU is running there it must have saved this VPCU's context before
+- * startig to run (see below).
+- * There should be no race since remote pcpu will disable interrupts
+- * before saving the context.
+- */
+- if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
+- {
+- on_selected_cpus(cpumask_of(vpmu->last_pcpu),
+- vpmu_save_force, (void *)v, 1);
+- vpmu_reset(vpmu, VPMU_CONTEXT_LOADED);
+- }
+- }
+-
+- /* Prevent forced context save from remote CPU */
+- local_irq_disable();
+-
+- prev = per_cpu(last_vcpu, pcpu);
+-
+- if ( prev != v && prev )
+- {
+- vpmu = vcpu_vpmu(prev);
+-
+- /* Someone ran here before us */
+- vpmu_save_force(prev);
+- vpmu_reset(vpmu, VPMU_CONTEXT_LOADED);
+-
+- vpmu = vcpu_vpmu(v);
+- }
+-
+- local_irq_enable();
+-
+ /* Only when PMU is counting, we load PMU context immediately. */
+ if ( !vpmu_is_set(vpmu, VPMU_RUNNING) ||
+ (!has_vlapic(vpmu_vcpu(vpmu)->domain) &&
+--
+2.37.3
+
diff --git a/0066-tools-tests-fix-wrong-backport-of-upstream-commit-52.patch b/0066-tools-tests-fix-wrong-backport-of-upstream-commit-52.patch
new file mode 100644
index 0000000..24b9576
--- /dev/null
+++ b/0066-tools-tests-fix-wrong-backport-of-upstream-commit-52.patch
@@ -0,0 +1,31 @@
+From 0d233924d4b0f676056856096e8761205add3ee8 Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Wed, 12 Oct 2022 17:31:44 +0200
+Subject: [PATCH 66/67] tools/tests: fix wrong backport of upstream commit
+ 52daa6a8483e4
+
+The backport of upstream commit 52daa6a8483e4 had a bug, correct it.
+
+Fixes: 3ac64b375183 ("xen/gnttab: fix gnttab_acquire_resource()")
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+---
+ tools/tests/resource/test-resource.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/tools/tests/resource/test-resource.c b/tools/tests/resource/test-resource.c
+index bf485baff2b4..51a8f4a000f6 100644
+--- a/tools/tests/resource/test-resource.c
++++ b/tools/tests/resource/test-resource.c
+@@ -71,7 +71,7 @@ static void test_gnttab(uint32_t domid, unsigned int nr_frames)
+ res = xenforeignmemory_map_resource(
+ fh, domid, XENMEM_resource_grant_table,
+ XENMEM_resource_grant_table_id_status, 0, 1,
+- (void **)&gnttab, PROT_READ | PROT_WRITE, 0);
++ &addr, PROT_READ | PROT_WRITE, 0);
+
+ if ( res )
+ {
+--
+2.37.3
+
diff --git a/0067-libxl-Arm-correct-xc_shadow_control-invocation-to-fi.patch b/0067-libxl-Arm-correct-xc_shadow_control-invocation-to-fi.patch
new file mode 100644
index 0000000..309d486
--- /dev/null
+++ b/0067-libxl-Arm-correct-xc_shadow_control-invocation-to-fi.patch
@@ -0,0 +1,42 @@
+From 816580afdd1730d4f85f64477a242a439af1cdf8 Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Wed, 12 Oct 2022 17:33:40 +0200
+Subject: [PATCH 67/67] libxl/Arm: correct xc_shadow_control() invocation to
+ fix build
+
+The backport didn't adapt to the earlier function prototype taking more
+(unused here) arguments.
+
+Fixes: c5215044578e ("xen/arm, libxl: Implement XEN_DOMCTL_shadow_op for Arm")
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Henry Wang <Henry.Wang@arm.com>
+Acked-by: Anthony PERARD <anthony.perard@citrix.com>
+---
+ tools/libs/light/libxl_arm.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/tools/libs/light/libxl_arm.c b/tools/libs/light/libxl_arm.c
+index d21f614ed788..ba548befdd25 100644
+--- a/tools/libs/light/libxl_arm.c
++++ b/tools/libs/light/libxl_arm.c
+@@ -132,14 +132,14 @@ int libxl__arch_domain_create(libxl__gc *gc,
+ uint32_t domid)
+ {
+ libxl_ctx *ctx = libxl__gc_owner(gc);
+- unsigned int shadow_mb = DIV_ROUNDUP(d_config->b_info.shadow_memkb, 1024);
++ unsigned long shadow_mb = DIV_ROUNDUP(d_config->b_info.shadow_memkb, 1024);
+
+ int r = xc_shadow_control(ctx->xch, domid,
+ XEN_DOMCTL_SHADOW_OP_SET_ALLOCATION,
+- &shadow_mb, 0);
++ NULL, 0, &shadow_mb, 0, NULL);
+ if (r) {
+ LOGED(ERROR, domid,
+- "Failed to set %u MiB shadow allocation", shadow_mb);
++ "Failed to set %lu MiB shadow allocation", shadow_mb);
+ return ERROR_FAIL;
+ }
+
+--
+2.37.3
+
diff --git a/info.txt b/info.txt
index 7a2843f..a677aa4 100644
--- a/info.txt
+++ b/info.txt
@@ -1,6 +1,6 @@
-Xen upstream patchset #0.1 for 4.15.4-pre
+Xen upstream patchset #1 for 4.15.4-pre
Containing patches from
RELEASE-4.15.3 (feecaf4abf733e83b7a297190819eca7a7f65168)
to
-staging-4.15 (35bf91d30f1a480dcf5bfd99b79384b2b283da7f)
+staging-4.15 (816580afdd1730d4f85f64477a242a439af1cdf8)