summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTomáš Mózes <hydrapolic@gmail.com>2023-10-18 20:36:03 +0200
committerTomáš Mózes <hydrapolic@gmail.com>2023-10-18 20:36:03 +0200
commit9a4ce2cf2f3f7ac0a5cb7adf0b6ab6bf3ea3301c (patch)
tree2fe0e12f95912d12b3fc7ea8a9b17fbff71786f1
parentXen 4.16.4-pre-patchset-0 (diff)
downloadxen-upstream-patches-9a4ce2cf2f3f7ac0a5cb7adf0b6ab6bf3ea3301c.tar.gz
xen-upstream-patches-9a4ce2cf2f3f7ac0a5cb7adf0b6ab6bf3ea3301c.tar.bz2
xen-upstream-patches-9a4ce2cf2f3f7ac0a5cb7adf0b6ab6bf3ea3301c.zip
Xen 4.16.6-pre-patchset-04.16.6-pre-patchset-0
Signed-off-by: Tomáš Mózes <hydrapolic@gmail.com>
-rw-r--r--0001-update-Xen-version-to-4.16.4-pre.patch25
-rw-r--r--0001-xen-arm-page-Handle-cache-flush-of-an-element-at-the.patch111
-rw-r--r--0002-ioreq_broadcast-accept-partial-broadcast-success.patch34
-rw-r--r--0002-x86-AMD-extend-Zenbleed-check-to-models-good-ucode-i.patch48
-rw-r--r--0003-x86-spec-ctrl-Fix-confusion-between-SPEC_CTRL_EXIT_T.patch74
-rw-r--r--0003-x86-time-prevent-overflow-with-high-frequency-TSCs.patch34
-rw-r--r--0004-x86-S3-Restore-Xen-s-MSR_PAT-value-on-S3-resume.patch36
-rw-r--r--0004-x86-spec-ctrl-Fold-DO_SPEC_CTRL_EXIT_TO_XEN-into-it-.patch85
-rw-r--r--0005-tools-Fix-build-with-recent-QEMU-use-enable-trace-ba.patch50
-rw-r--r--0005-x86-spec-ctrl-Turn-the-remaining-SPEC_CTRL_-ENTRY-EX.patch83
-rw-r--r--0006-x86-spec-ctrl-Improve-all-SPEC_CTRL_-ENTER-EXIT-_-co.patch106
-rw-r--r--0006-x86-vmx-Calculate-model-specific-LBRs-once-at-start-.patch342
-rw-r--r--0007-x86-entry-Adjust-restore_all_xen-to-hold-stack_end-i.patch74
-rw-r--r--0007-x86-vmx-Support-for-CPUs-without-model-specific-LBR.patch83
-rw-r--r--0008-x86-entry-Track-the-IST-ness-of-an-entry-for-the-exi.patch109
-rw-r--r--0008-x86-shadow-fix-PAE-check-for-top-level-table-unshado.patch39
-rw-r--r--0009-ns16550-fix-an-incorrect-assignment-to-uart-io_size.patch34
-rw-r--r--0009-x86-spec-ctrl-Issue-VERW-during-IST-exit-to-Xen.patch89
-rw-r--r--0010-libxl-fix-guest-kexec-skip-cpuid-policy.patch72
-rw-r--r--0010-x86-amd-Introduce-is_zen-1-2-_uarch-predicates.patch91
-rw-r--r--0011-tools-ocaml-xenctrl-Make-domain_getinfolist-tail-rec.patch71
-rw-r--r--0011-x86-spec-ctrl-Mitigate-the-Zen1-DIV-leakage.patch236
-rw-r--r--0012-tools-ocaml-xenctrl-Use-larger-chunksize-in-domain_g.patch41
-rw-r--r--0012-x86-shadow-defer-releasing-of-PV-s-top-level-shadow-.patch453
-rw-r--r--0013-tools-ocaml-xb-mmap-Use-Data_abstract_val-wrapper.patch75
-rw-r--r--0013-tools-xenstored-domain_entry_fix-Handle-conflicting-.patch65
-rw-r--r--0014-iommu-amd-vi-flush-IOMMU-TLB-when-flushing-the-DTE.patch186
-rw-r--r--0014-tools-ocaml-xb-Drop-Xs_ring.write.patch62
-rw-r--r--0015-libfsimage-xfs-Remove-dead-code.patch71
-rw-r--r--0015-tools-oxenstored-validate-config-file-before-live-up.patch131
-rw-r--r--0016-libfsimage-xfs-Amend-mask32lo-to-allow-the-value-32.patch33
-rw-r--r--0016-tools-ocaml-libs-Don-t-declare-stubs-as-taking-void.patch61
-rw-r--r--0017-libfsimage-xfs-Sanity-check-the-superblock-during-mo.patch137
-rw-r--r--0017-tools-ocaml-libs-Allocate-the-correct-amount-of-memo.patch80
-rw-r--r--0018-libfsimage-xfs-Add-compile-time-check-to-libfsimage.patch61
-rw-r--r--0018-tools-ocaml-evtchn-Don-t-reference-Custom-objects-wi.patch213
-rw-r--r--0019-tools-ocaml-xc-Fix-binding-for-xc_domain_assign_devi.patch70
-rw-r--r--0019-tools-pygrub-Remove-unnecessary-hypercall.patch60
-rw-r--r--0020-tools-ocaml-xc-Don-t-reference-Abstract_Tag-objects-.patch76
-rw-r--r--0020-tools-pygrub-Small-refactors.patch65
-rw-r--r--0021-tools-ocaml-libs-Fix-memory-resource-leaks-with-caml.patch61
-rw-r--r--0021-tools-pygrub-Open-the-output-files-earlier.patch105
-rw-r--r--0022-tools-libfsimage-Export-a-new-function-to-preload-al.patch126
-rw-r--r--0022-x86-spec-ctrl-Mitigate-Cross-Thread-Return-Address-P.patch120
-rw-r--r--0023-automation-Remove-clang-8-from-Debian-unstable-conta.patch84
-rw-r--r--0023-tools-pygrub-Deprivilege-pygrub.patch307
-rw-r--r--0024-libs-util-Fix-parallel-build-between-flex-bison-and-.patch50
-rw-r--r--0024-libxl-add-support-for-running-bootloader-in-restrict.patch251
-rw-r--r--0025-libxl-limit-bootloader-execution-in-restricted-mode.patch158
-rw-r--r--0025-x86-cpuid-Infrastructure-for-leaves-7-1-ecx-edx.patch128
-rw-r--r--0026-x86-shskt-Disable-CET-SS-on-parts-susceptible-to-fra.patch191
-rw-r--r--0026-x86-svm-Fix-asymmetry-with-AMD-DR-MASK-context-switc.patch104
-rw-r--r--0027-credit2-respect-credit2_runqueue-all-when-arranging-.patch69
-rw-r--r--0027-x86-pv-Correct-the-auditing-of-guest-breakpoint-addr.patch86
-rw-r--r--0028-x86-ucode-AMD-apply-the-patch-early-on-every-logical.patch152
-rw-r--r--0029-x86-perform-mem_sharing-teardown-before-paging-teard.patch111
-rw-r--r--0030-xen-Work-around-Clang-IAS-macro-expansion-bug.patch115
-rw-r--r--0031-xen-Fix-Clang-Wunicode-diagnostic-when-building-asm-.patch83
-rw-r--r--0032-tools-Use-PKG_CONFIG_FILE-instead-of-PKG_CONFIG-vari.patch98
-rw-r--r--0033-libs-guest-Fix-resource-leaks-in-xc_core_arch_map_p2.patch65
-rw-r--r--0034-libs-guest-Fix-leak-on-realloc-failure-in-backup_pte.patch56
-rw-r--r--0035-x86-ucode-AMD-late-load-the-patch-on-every-logical-t.patch90
-rw-r--r--0036-x86-shadow-account-for-log-dirty-mode-when-pre-alloc.patch92
-rw-r--r--0037-x86-HVM-bound-number-of-pinned-cache-attribute-regio.patch50
-rw-r--r--0038-x86-HVM-serialize-pinned-cache-attribute-list-manipu.patch126
-rw-r--r--0039-x86-spec-ctrl-Defer-CR4_PV32_RESTORE-on-the-cstar_en.patch56
-rw-r--r--0040-tools-python-change-s-size-type-for-Python-3.10.patch72
-rw-r--r--0041-tools-xenmon-Fix-xenmon.py-for-with-python3.x.patch54
-rw-r--r--0042-core-parking-fix-build-with-gcc12-and-NR_CPUS-1.patch95
-rw-r--r--0043-x86-altp2m-help-gcc13-to-avoid-it-emitting-a-warning.patch129
-rw-r--r--0044-VT-d-constrain-IGD-check.patch44
-rw-r--r--0045-bunzip-work-around-gcc13-warning.patch42
-rw-r--r--0046-libacpi-fix-PCI-hotplug-AML.patch57
-rw-r--r--0047-AMD-IOMMU-without-XT-x2APIC-needs-to-be-forced-into-.patch42
-rw-r--r--0048-VT-d-fix-iommu-no-igfx-if-the-IOMMU-scope-contains-f.patch44
-rw-r--r--0049-x86-shadow-fix-and-improve-sh_page_has_multiple_shad.patch47
-rw-r--r--0050-x86-nospec-Fix-evaluate_nospec-code-generation-under.patch101
-rw-r--r--0051-x86-shadow-Fix-build-with-no-PG_log_dirty.patch56
-rw-r--r--0052-x86-vmx-Don-t-spuriously-crash-the-domain-when-INIT-.patch51
-rw-r--r--0053-x86-ucode-Fix-error-paths-control_thread_fn.patch56
-rw-r--r--0054-vpci-msix-handle-accesses-adjacent-to-the-MSI-X-tabl.patch543
-rw-r--r--0055-ns16550-correct-name-value-pair-parsing-for-PCI-port.patch59
-rw-r--r--0056-bump-default-SeaBIOS-version-to-1.16.0.patch28
-rw-r--r--0057-CI-Drop-automation-configs.patch87
-rw-r--r--0058-automation-Switch-arm32-cross-builds-to-run-on-arm64.patch87
-rw-r--r--0059-automation-Remove-CentOS-7.2-containers-and-builds.patch145
-rw-r--r--0060-automation-Remove-non-debug-x86_32-build-jobs.patch67
-rw-r--r--0061-CI-Remove-llvm-8-from-the-Debian-Stretch-container.patch103
-rw-r--r--info.txt6
89 files changed, 3377 insertions, 5438 deletions
diff --git a/0001-update-Xen-version-to-4.16.4-pre.patch b/0001-update-Xen-version-to-4.16.4-pre.patch
deleted file mode 100644
index 961358a..0000000
--- a/0001-update-Xen-version-to-4.16.4-pre.patch
+++ /dev/null
@@ -1,25 +0,0 @@
-From e3396cd8be5ee99d363a23f30c680e42fb2757bd Mon Sep 17 00:00:00 2001
-From: Jan Beulich <jbeulich@suse.com>
-Date: Tue, 20 Dec 2022 13:50:16 +0100
-Subject: [PATCH 01/61] update Xen version to 4.16.4-pre
-
----
- xen/Makefile | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/xen/Makefile b/xen/Makefile
-index 06dde1e03c..67c5551ffd 100644
---- a/xen/Makefile
-+++ b/xen/Makefile
-@@ -2,7 +2,7 @@
- # All other places this is stored (eg. compile.h) should be autogenerated.
- export XEN_VERSION = 4
- export XEN_SUBVERSION = 16
--export XEN_EXTRAVERSION ?= .3$(XEN_VENDORVERSION)
-+export XEN_EXTRAVERSION ?= .4-pre$(XEN_VENDORVERSION)
- export XEN_FULLVERSION = $(XEN_VERSION).$(XEN_SUBVERSION)$(XEN_EXTRAVERSION)
- -include xen-version
-
---
-2.40.0
-
diff --git a/0001-xen-arm-page-Handle-cache-flush-of-an-element-at-the.patch b/0001-xen-arm-page-Handle-cache-flush-of-an-element-at-the.patch
new file mode 100644
index 0000000..8da66a5
--- /dev/null
+++ b/0001-xen-arm-page-Handle-cache-flush-of-an-element-at-the.patch
@@ -0,0 +1,111 @@
+From d720c2310a7ac8878c01fe9d9fdc13f43cb266b3 Mon Sep 17 00:00:00 2001
+From: Stefano Stabellini <stefano.stabellini@amd.com>
+Date: Tue, 5 Sep 2023 14:34:28 +0200
+Subject: [PATCH 01/27] xen/arm: page: Handle cache flush of an element at the
+ top of the address space
+
+The region that needs to be cleaned/invalidated may be at the top
+of the address space. This means that 'end' (i.e. 'p + size') will
+be 0 and therefore nothing will be cleaned/invalidated as the check
+in the loop will always be false.
+
+On Arm64, we only support we only support up to 48-bit Virtual
+address space. So this is not a concern there. However, for 32-bit,
+the mapcache is using the last 2GB of the address space. Therefore
+we may not clean/invalidate properly some pages. This could lead
+to memory corruption or data leakage (the scrubbed value may
+still sit in the cache when the guest could read directly the memory
+and therefore read the old content).
+
+Rework invalidate_dcache_va_range(), clean_dcache_va_range(),
+clean_and_invalidate_dcache_va_range() to handle a cache flush
+with an element at the top of the address space.
+
+This is CVE-2023-34321 / XSA-437.
+
+Reported-by: Julien Grall <jgrall@amazon.com>
+Signed-off-by: Stefano Stabellini <stefano.stabellini@amd.com>
+Signed-off-by: Julien Grall <jgrall@amazon.com>
+Acked-by: Bertrand Marquis <bertrand.marquis@arm.com>
+master commit: 9a216e92de9f9011097e4f1fb55ff67ba0a21704
+master date: 2023-09-05 14:30:08 +0200
+---
+ xen/include/asm-arm/page.h | 33 ++++++++++++++++++++-------------
+ 1 file changed, 20 insertions(+), 13 deletions(-)
+
+diff --git a/xen/include/asm-arm/page.h b/xen/include/asm-arm/page.h
+index c6f9fb0d4e..eff5883ef8 100644
+--- a/xen/include/asm-arm/page.h
++++ b/xen/include/asm-arm/page.h
+@@ -152,26 +152,25 @@ static inline size_t read_dcache_line_bytes(void)
+
+ static inline int invalidate_dcache_va_range(const void *p, unsigned long size)
+ {
+- const void *end = p + size;
+ size_t cacheline_mask = dcache_line_bytes - 1;
+
+ dsb(sy); /* So the CPU issues all writes to the range */
+
+ if ( (uintptr_t)p & cacheline_mask )
+ {
++ size -= dcache_line_bytes - ((uintptr_t)p & cacheline_mask);
+ p = (void *)((uintptr_t)p & ~cacheline_mask);
+ asm volatile (__clean_and_invalidate_dcache_one(0) : : "r" (p));
+ p += dcache_line_bytes;
+ }
+- if ( (uintptr_t)end & cacheline_mask )
+- {
+- end = (void *)((uintptr_t)end & ~cacheline_mask);
+- asm volatile (__clean_and_invalidate_dcache_one(0) : : "r" (end));
+- }
+
+- for ( ; p < end; p += dcache_line_bytes )
++ for ( ; size >= dcache_line_bytes;
++ p += dcache_line_bytes, size -= dcache_line_bytes )
+ asm volatile (__invalidate_dcache_one(0) : : "r" (p));
+
++ if ( size > 0 )
++ asm volatile (__clean_and_invalidate_dcache_one(0) : : "r" (p));
++
+ dsb(sy); /* So we know the flushes happen before continuing */
+
+ return 0;
+@@ -179,10 +178,14 @@ static inline int invalidate_dcache_va_range(const void *p, unsigned long size)
+
+ static inline int clean_dcache_va_range(const void *p, unsigned long size)
+ {
+- const void *end = p + size;
++ size_t cacheline_mask = dcache_line_bytes - 1;
++
+ dsb(sy); /* So the CPU issues all writes to the range */
+- p = (void *)((uintptr_t)p & ~(dcache_line_bytes - 1));
+- for ( ; p < end; p += dcache_line_bytes )
++ size += (uintptr_t)p & cacheline_mask;
++ size = (size + cacheline_mask) & ~cacheline_mask;
++ p = (void *)((uintptr_t)p & ~cacheline_mask);
++ for ( ; size >= dcache_line_bytes;
++ p += dcache_line_bytes, size -= dcache_line_bytes )
+ asm volatile (__clean_dcache_one(0) : : "r" (p));
+ dsb(sy); /* So we know the flushes happen before continuing */
+ /* ARM callers assume that dcache_* functions cannot fail. */
+@@ -192,10 +195,14 @@ static inline int clean_dcache_va_range(const void *p, unsigned long size)
+ static inline int clean_and_invalidate_dcache_va_range
+ (const void *p, unsigned long size)
+ {
+- const void *end = p + size;
++ size_t cacheline_mask = dcache_line_bytes - 1;
++
+ dsb(sy); /* So the CPU issues all writes to the range */
+- p = (void *)((uintptr_t)p & ~(dcache_line_bytes - 1));
+- for ( ; p < end; p += dcache_line_bytes )
++ size += (uintptr_t)p & cacheline_mask;
++ size = (size + cacheline_mask) & ~cacheline_mask;
++ p = (void *)((uintptr_t)p & ~cacheline_mask);
++ for ( ; size >= dcache_line_bytes;
++ p += dcache_line_bytes, size -= dcache_line_bytes )
+ asm volatile (__clean_and_invalidate_dcache_one(0) : : "r" (p));
+ dsb(sy); /* So we know the flushes happen before continuing */
+ /* ARM callers assume that dcache_* functions cannot fail. */
+--
+2.42.0
+
diff --git a/0002-ioreq_broadcast-accept-partial-broadcast-success.patch b/0002-ioreq_broadcast-accept-partial-broadcast-success.patch
deleted file mode 100644
index 1b0ae9c..0000000
--- a/0002-ioreq_broadcast-accept-partial-broadcast-success.patch
+++ /dev/null
@@ -1,34 +0,0 @@
-From f2edbd79f5d5ce3b633885469852e1215dc0d4b5 Mon Sep 17 00:00:00 2001
-From: Per Bilse <per.bilse@citrix.com>
-Date: Tue, 20 Dec 2022 13:50:47 +0100
-Subject: [PATCH 02/61] ioreq_broadcast(): accept partial broadcast success
-
-Avoid incorrectly triggering an error when a broadcast buffered ioreq
-is not handled by all registered clients, as long as the failure is
-strictly because the client doesn't handle buffered ioreqs.
-
-Signed-off-by: Per Bilse <per.bilse@citrix.com>
-Reviewed-by: Paul Durrant <paul@xen.org>
-master commit: a44734df6c24fadbdb001f051cc5580c467caf7d
-master date: 2022-12-07 12:17:30 +0100
----
- xen/common/ioreq.c | 3 ++-
- 1 file changed, 2 insertions(+), 1 deletion(-)
-
-diff --git a/xen/common/ioreq.c b/xen/common/ioreq.c
-index 42414b750b..2a8d8de2d5 100644
---- a/xen/common/ioreq.c
-+++ b/xen/common/ioreq.c
-@@ -1322,7 +1322,8 @@ unsigned int ioreq_broadcast(ioreq_t *p, bool buffered)
-
- FOR_EACH_IOREQ_SERVER(d, id, s)
- {
-- if ( !s->enabled )
-+ if ( !s->enabled ||
-+ (buffered && s->bufioreq_handling == HVM_IOREQSRV_BUFIOREQ_OFF) )
- continue;
-
- if ( ioreq_send(s, p, buffered) == IOREQ_STATUS_UNHANDLED )
---
-2.40.0
-
diff --git a/0002-x86-AMD-extend-Zenbleed-check-to-models-good-ucode-i.patch b/0002-x86-AMD-extend-Zenbleed-check-to-models-good-ucode-i.patch
new file mode 100644
index 0000000..d315be0
--- /dev/null
+++ b/0002-x86-AMD-extend-Zenbleed-check-to-models-good-ucode-i.patch
@@ -0,0 +1,48 @@
+From 08539e8315fdae5f5bfd655d53ed35fd2922fe6c Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Wed, 23 Aug 2023 09:26:36 +0200
+Subject: [PATCH 02/27] x86/AMD: extend Zenbleed check to models "good" ucode
+ isn't known for
+
+Reportedly the AMD Custom APU 0405 found on SteamDeck, models 0x90 and
+0x91, (quoting the respective Linux commit) is similarly affected. Put
+another instance of our Zen1 vs Zen2 distinction checks in
+amd_check_zenbleed(), forcing use of the chickenbit irrespective of
+ucode version (building upon real hardware never surfacing a version of
+0xffffffff).
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
+(cherry picked from commit 145a69c0944ac70cfcf9d247c85dee9e99d9d302)
+---
+ xen/arch/x86/cpu/amd.c | 13 ++++++++++---
+ 1 file changed, 10 insertions(+), 3 deletions(-)
+
+diff --git a/xen/arch/x86/cpu/amd.c b/xen/arch/x86/cpu/amd.c
+index 60c6d88edf..a591038757 100644
+--- a/xen/arch/x86/cpu/amd.c
++++ b/xen/arch/x86/cpu/amd.c
+@@ -800,10 +800,17 @@ void amd_check_zenbleed(void)
+ case 0xa0 ... 0xaf: good_rev = 0x08a00008; break;
+ default:
+ /*
+- * With the Fam17h check above, parts getting here are Zen1.
+- * They're not affected.
++ * With the Fam17h check above, most parts getting here are
++ * Zen1. They're not affected. Assume Zen2 ones making it
++ * here are affected regardless of microcode version.
++ *
++ * Zen1 vs Zen2 isn't a simple model number comparison, so use
++ * STIBP as a heuristic to distinguish.
+ */
+- return;
++ if (!boot_cpu_has(X86_FEATURE_AMD_STIBP))
++ return;
++ good_rev = ~0U;
++ break;
+ }
+
+ rdmsrl(MSR_AMD64_DE_CFG, val);
+--
+2.42.0
+
diff --git a/0003-x86-spec-ctrl-Fix-confusion-between-SPEC_CTRL_EXIT_T.patch b/0003-x86-spec-ctrl-Fix-confusion-between-SPEC_CTRL_EXIT_T.patch
new file mode 100644
index 0000000..ecd9c37
--- /dev/null
+++ b/0003-x86-spec-ctrl-Fix-confusion-between-SPEC_CTRL_EXIT_T.patch
@@ -0,0 +1,74 @@
+From 1e52cdf07cdf52e5d99957c3ecbddf5b1feda963 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Tue, 12 Sep 2023 15:06:49 +0100
+Subject: [PATCH 03/27] x86/spec-ctrl: Fix confusion between
+ SPEC_CTRL_EXIT_TO_XEN{,_IST}
+
+c/s 3fffaf9c13e9 ("x86/entry: Avoid using alternatives in NMI/#MC paths")
+dropped the only user, leaving behind the (incorrect) implication that Xen had
+split exit paths.
+
+Delete the unused SPEC_CTRL_EXIT_TO_XEN and rename SPEC_CTRL_EXIT_TO_XEN_IST
+to SPEC_CTRL_EXIT_TO_XEN for consistency.
+
+No functional change.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+(cherry picked from commit 1c18d73774533a55ba9d1cbee8bdace03efdb5e7)
+---
+ xen/arch/x86/x86_64/entry.S | 2 +-
+ xen/include/asm-x86/spec_ctrl_asm.h | 10 ++--------
+ 2 files changed, 3 insertions(+), 9 deletions(-)
+
+diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
+index db2ea7871e..59f2040787 100644
+--- a/xen/arch/x86/x86_64/entry.S
++++ b/xen/arch/x86/x86_64/entry.S
+@@ -664,7 +664,7 @@ UNLIKELY_START(ne, exit_cr3)
+ UNLIKELY_END(exit_cr3)
+
+ /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */
+- SPEC_CTRL_EXIT_TO_XEN_IST /* Req: %rbx=end, Clob: acd */
++ SPEC_CTRL_EXIT_TO_XEN /* Req: %rbx=end, Clob: acd */
+
+ RESTORE_ALL adj=8
+ iretq
+diff --git a/xen/include/asm-x86/spec_ctrl_asm.h b/xen/include/asm-x86/spec_ctrl_asm.h
+index b61a5571ae..f5110616e4 100644
+--- a/xen/include/asm-x86/spec_ctrl_asm.h
++++ b/xen/include/asm-x86/spec_ctrl_asm.h
+@@ -79,7 +79,6 @@
+ * - SPEC_CTRL_ENTRY_FROM_PV
+ * - SPEC_CTRL_ENTRY_FROM_INTR
+ * - SPEC_CTRL_ENTRY_FROM_INTR_IST
+- * - SPEC_CTRL_EXIT_TO_XEN_IST
+ * - SPEC_CTRL_EXIT_TO_XEN
+ * - SPEC_CTRL_EXIT_TO_PV
+ *
+@@ -273,11 +272,6 @@
+ ALTERNATIVE "", __stringify(DO_SPEC_CTRL_ENTRY maybexen=1), \
+ X86_FEATURE_SC_MSR_PV
+
+-/* Use when exiting to Xen context. */
+-#define SPEC_CTRL_EXIT_TO_XEN \
+- ALTERNATIVE "", \
+- DO_SPEC_CTRL_EXIT_TO_XEN, X86_FEATURE_SC_MSR_PV
+-
+ /* Use when exiting to PV guest context. */
+ #define SPEC_CTRL_EXIT_TO_PV \
+ ALTERNATIVE "", \
+@@ -344,8 +338,8 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise):
+ UNLIKELY_END(\@_serialise)
+ .endm
+
+-/* Use when exiting to Xen in IST context. */
+-.macro SPEC_CTRL_EXIT_TO_XEN_IST
++/* Use when exiting to Xen context. */
++.macro SPEC_CTRL_EXIT_TO_XEN
+ /*
+ * Requires %rbx=stack_end
+ * Clobbers %rax, %rcx, %rdx
+--
+2.42.0
+
diff --git a/0003-x86-time-prevent-overflow-with-high-frequency-TSCs.patch b/0003-x86-time-prevent-overflow-with-high-frequency-TSCs.patch
deleted file mode 100644
index a031317..0000000
--- a/0003-x86-time-prevent-overflow-with-high-frequency-TSCs.patch
+++ /dev/null
@@ -1,34 +0,0 @@
-From 65bf12135f618614bbf44626fba1c20ca8d1a127 Mon Sep 17 00:00:00 2001
-From: Neowutran <xen@neowutran.ovh>
-Date: Tue, 20 Dec 2022 13:51:42 +0100
-Subject: [PATCH 03/61] x86/time: prevent overflow with high frequency TSCs
-
-Make sure tsc_khz is promoted to a 64-bit type before multiplying by
-1000 to avoid an 'overflow before widen' bug. Otherwise just above
-4.294GHz the value will overflow. Processors with clocks this high are
-now in production and require this to work correctly.
-
-Signed-off-by: Neowutran <xen@neowutran.ovh>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: ad15a0a8ca2515d8ac58edfc0bc1d3719219cb77
-master date: 2022-12-19 11:34:16 +0100
----
- xen/arch/x86/time.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/xen/arch/x86/time.c b/xen/arch/x86/time.c
-index 1daff92dca..db0b149ec6 100644
---- a/xen/arch/x86/time.c
-+++ b/xen/arch/x86/time.c
-@@ -2490,7 +2490,7 @@ int tsc_set_info(struct domain *d,
- case TSC_MODE_ALWAYS_EMULATE:
- d->arch.vtsc_offset = get_s_time() - elapsed_nsec;
- d->arch.tsc_khz = gtsc_khz ?: cpu_khz;
-- set_time_scale(&d->arch.vtsc_to_ns, d->arch.tsc_khz * 1000);
-+ set_time_scale(&d->arch.vtsc_to_ns, d->arch.tsc_khz * 1000UL);
-
- /*
- * In default mode use native TSC if the host has safe TSC and
---
-2.40.0
-
diff --git a/0004-x86-S3-Restore-Xen-s-MSR_PAT-value-on-S3-resume.patch b/0004-x86-S3-Restore-Xen-s-MSR_PAT-value-on-S3-resume.patch
deleted file mode 100644
index 3d1c089..0000000
--- a/0004-x86-S3-Restore-Xen-s-MSR_PAT-value-on-S3-resume.patch
+++ /dev/null
@@ -1,36 +0,0 @@
-From 7b1b9849e8a0d7791866d6d21c45993dfe27836c Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Tue, 7 Feb 2023 17:03:09 +0100
-Subject: [PATCH 04/61] x86/S3: Restore Xen's MSR_PAT value on S3 resume
-
-There are two paths in the trampoline, and Xen's PAT needs setting up in both,
-not just the boot path.
-
-Fixes: 4304ff420e51 ("x86/S3: Drop {save,restore}_rest_processor_state() completely")
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: 4d975798e11579fdf405b348543061129e01b0fb
-master date: 2023-01-10 21:21:30 +0000
----
- xen/arch/x86/boot/wakeup.S | 5 +++++
- 1 file changed, 5 insertions(+)
-
-diff --git a/xen/arch/x86/boot/wakeup.S b/xen/arch/x86/boot/wakeup.S
-index c17d613b61..08447e1934 100644
---- a/xen/arch/x86/boot/wakeup.S
-+++ b/xen/arch/x86/boot/wakeup.S
-@@ -130,6 +130,11 @@ wakeup_32:
- and %edi, %edx
- wrmsr
- 1:
-+ /* Set up PAT before enabling paging. */
-+ mov $XEN_MSR_PAT & 0xffffffff, %eax
-+ mov $XEN_MSR_PAT >> 32, %edx
-+ mov $MSR_IA32_CR_PAT, %ecx
-+ wrmsr
-
- /* Set up EFER (Extended Feature Enable Register). */
- movl $MSR_EFER,%ecx
---
-2.40.0
-
diff --git a/0004-x86-spec-ctrl-Fold-DO_SPEC_CTRL_EXIT_TO_XEN-into-it-.patch b/0004-x86-spec-ctrl-Fold-DO_SPEC_CTRL_EXIT_TO_XEN-into-it-.patch
new file mode 100644
index 0000000..52d055c
--- /dev/null
+++ b/0004-x86-spec-ctrl-Fold-DO_SPEC_CTRL_EXIT_TO_XEN-into-it-.patch
@@ -0,0 +1,85 @@
+From afa5b17f385372226de6b0862f12ab39fda16b5c Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Tue, 12 Sep 2023 17:03:16 +0100
+Subject: [PATCH 04/27] x86/spec-ctrl: Fold DO_SPEC_CTRL_EXIT_TO_XEN into it's
+ single user
+
+With the SPEC_CTRL_EXIT_TO_XEN{,_IST} confusion fixed, it's now obvious that
+there's only a single EXIT_TO_XEN path. Fold DO_SPEC_CTRL_EXIT_TO_XEN into
+SPEC_CTRL_EXIT_TO_XEN to simplify further fixes.
+
+When merging labels, switch the name to .L\@_skip_sc_msr as "skip" on its own
+is going to be too generic shortly.
+
+No functional change.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+(cherry picked from commit 694bb0f280fd08a4377e36e32b84b5062def4de2)
+---
+ xen/include/asm-x86/spec_ctrl_asm.h | 40 ++++++++++++-----------------
+ 1 file changed, 16 insertions(+), 24 deletions(-)
+
+diff --git a/xen/include/asm-x86/spec_ctrl_asm.h b/xen/include/asm-x86/spec_ctrl_asm.h
+index f5110616e4..251c30eee5 100644
+--- a/xen/include/asm-x86/spec_ctrl_asm.h
++++ b/xen/include/asm-x86/spec_ctrl_asm.h
+@@ -216,27 +216,6 @@
+ wrmsr
+ .endm
+
+-.macro DO_SPEC_CTRL_EXIT_TO_XEN
+-/*
+- * Requires %rbx=stack_end
+- * Clobbers %rax, %rcx, %rdx
+- *
+- * When returning to Xen context, look to see whether SPEC_CTRL shadowing is
+- * in effect, and reload the shadow value. This covers race conditions which
+- * exist with an NMI/MCE/etc hitting late in the return-to-guest path.
+- */
+- xor %edx, %edx
+-
+- testb $SCF_use_shadow, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%rbx)
+- jz .L\@_skip
+-
+- mov STACK_CPUINFO_FIELD(shadow_spec_ctrl)(%rbx), %eax
+- mov $MSR_SPEC_CTRL, %ecx
+- wrmsr
+-
+-.L\@_skip:
+-.endm
+-
+ .macro DO_SPEC_CTRL_EXIT_TO_GUEST
+ /*
+ * Requires %eax=spec_ctrl, %rsp=regs/cpuinfo
+@@ -345,11 +324,24 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise):
+ * Clobbers %rax, %rcx, %rdx
+ */
+ testb $SCF_ist_sc_msr, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%rbx)
+- jz .L\@_skip
++ jz .L\@_skip_sc_msr
+
+- DO_SPEC_CTRL_EXIT_TO_XEN
++ /*
++ * When returning to Xen context, look to see whether SPEC_CTRL shadowing
++ * is in effect, and reload the shadow value. This covers race conditions
++ * which exist with an NMI/MCE/etc hitting late in the return-to-guest
++ * path.
++ */
++ xor %edx, %edx
+
+-.L\@_skip:
++ testb $SCF_use_shadow, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%rbx)
++ jz .L\@_skip_sc_msr
++
++ mov STACK_CPUINFO_FIELD(shadow_spec_ctrl)(%rbx), %eax
++ mov $MSR_SPEC_CTRL, %ecx
++ wrmsr
++
++.L\@_skip_sc_msr:
+ .endm
+
+ #endif /* __ASSEMBLY__ */
+--
+2.42.0
+
diff --git a/0005-tools-Fix-build-with-recent-QEMU-use-enable-trace-ba.patch b/0005-tools-Fix-build-with-recent-QEMU-use-enable-trace-ba.patch
deleted file mode 100644
index ff66a43..0000000
--- a/0005-tools-Fix-build-with-recent-QEMU-use-enable-trace-ba.patch
+++ /dev/null
@@ -1,50 +0,0 @@
-From 998c03b2abfbf17ff96bccad1512de1ea18d0d75 Mon Sep 17 00:00:00 2001
-From: Anthony PERARD <anthony.perard@citrix.com>
-Date: Tue, 7 Feb 2023 17:03:51 +0100
-Subject: [PATCH 05/61] tools: Fix build with recent QEMU, use
- "--enable-trace-backends"
-
-The configure option "--enable-trace-backend" isn't accepted anymore
-and we should use "--enable-trace-backends" instead which was
-introduce in 2014 and allow multiple backends.
-
-"--enable-trace-backends" was introduced by:
- 5b808275f3bb ("trace: Multi-backend tracing")
-The backward compatible option "--enable-trace-backend" is removed by
- 10229ec3b0ff ("configure: remove backwards-compatibility and obsolete options")
-
-As we already use ./configure options that wouldn't be accepted by
-older version of QEMU's configure, we will simply use the new spelling
-for the option and avoid trying to detect which spelling to use.
-
-We already make use if "--firmwarepath=" which was introduced by
- 3d5eecab4a5a ("Add --firmwarepath to configure")
-which already include the new spelling for "--enable-trace-backends".
-
-Signed-off-by: Anthony PERARD <anthony.perard@citrix.com>
-Reviewed-by: Jason Andryuk <jandryuk@gmail.com>
-master commit: e66d450b6e0ffec635639df993ab43ce28b3383f
-master date: 2023-01-11 10:45:29 +0100
----
- tools/Makefile | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/tools/Makefile b/tools/Makefile
-index 757a560be0..9b6b605ec9 100644
---- a/tools/Makefile
-+++ b/tools/Makefile
-@@ -218,9 +218,9 @@ subdir-all-qemu-xen-dir: qemu-xen-dir-find
- mkdir -p qemu-xen-build; \
- cd qemu-xen-build; \
- if $$source/scripts/tracetool.py --check-backend --backend log ; then \
-- enable_trace_backend='--enable-trace-backend=log'; \
-+ enable_trace_backend="--enable-trace-backends=log"; \
- elif $$source/scripts/tracetool.py --check-backend --backend stderr ; then \
-- enable_trace_backend='--enable-trace-backend=stderr'; \
-+ enable_trace_backend='--enable-trace-backends=stderr'; \
- else \
- enable_trace_backend='' ; \
- fi ; \
---
-2.40.0
-
diff --git a/0005-x86-spec-ctrl-Turn-the-remaining-SPEC_CTRL_-ENTRY-EX.patch b/0005-x86-spec-ctrl-Turn-the-remaining-SPEC_CTRL_-ENTRY-EX.patch
new file mode 100644
index 0000000..ceca67a
--- /dev/null
+++ b/0005-x86-spec-ctrl-Turn-the-remaining-SPEC_CTRL_-ENTRY-EX.patch
@@ -0,0 +1,83 @@
+From 353e876a9dd5a93d0bf8819e77613c33db0de97b Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Fri, 1 Sep 2023 11:38:44 +0100
+Subject: [PATCH 05/27] x86/spec-ctrl: Turn the remaining
+ SPEC_CTRL_{ENTRY,EXIT}_* into asm macros
+
+These have grown more complex over time, with some already having been
+converted.
+
+Provide full Requires/Clobbers comments, otherwise missing at this level of
+indirection.
+
+No functional change.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+(cherry picked from commit 7125429aafb9e3c9c88fc93001fc2300e0ac2cc8)
+---
+ xen/include/asm-x86/spec_ctrl_asm.h | 37 ++++++++++++++++++++++-------
+ 1 file changed, 28 insertions(+), 9 deletions(-)
+
+diff --git a/xen/include/asm-x86/spec_ctrl_asm.h b/xen/include/asm-x86/spec_ctrl_asm.h
+index 251c30eee5..94ed5dc880 100644
+--- a/xen/include/asm-x86/spec_ctrl_asm.h
++++ b/xen/include/asm-x86/spec_ctrl_asm.h
+@@ -236,26 +236,45 @@
+ .endm
+
+ /* Use after an entry from PV context (syscall/sysenter/int80/int82/etc). */
+-#define SPEC_CTRL_ENTRY_FROM_PV \
++.macro SPEC_CTRL_ENTRY_FROM_PV
++/*
++ * Requires %rsp=regs/cpuinfo, %rdx=0
++ * Clobbers %rax, %rcx, %rdx
++ */
+ ALTERNATIVE "", __stringify(DO_SPEC_CTRL_COND_IBPB maybexen=0), \
+- X86_FEATURE_IBPB_ENTRY_PV; \
+- ALTERNATIVE "", DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_PV; \
++ X86_FEATURE_IBPB_ENTRY_PV
++
++ ALTERNATIVE "", DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_PV
++
+ ALTERNATIVE "", __stringify(DO_SPEC_CTRL_ENTRY maybexen=0), \
+ X86_FEATURE_SC_MSR_PV
++.endm
+
+ /* Use in interrupt/exception context. May interrupt Xen or PV context. */
+-#define SPEC_CTRL_ENTRY_FROM_INTR \
++.macro SPEC_CTRL_ENTRY_FROM_INTR
++/*
++ * Requires %rsp=regs, %r14=stack_end, %rdx=0
++ * Clobbers %rax, %rcx, %rdx
++ */
+ ALTERNATIVE "", __stringify(DO_SPEC_CTRL_COND_IBPB maybexen=1), \
+- X86_FEATURE_IBPB_ENTRY_PV; \
+- ALTERNATIVE "", DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_PV; \
++ X86_FEATURE_IBPB_ENTRY_PV
++
++ ALTERNATIVE "", DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_PV
++
+ ALTERNATIVE "", __stringify(DO_SPEC_CTRL_ENTRY maybexen=1), \
+ X86_FEATURE_SC_MSR_PV
++.endm
+
+ /* Use when exiting to PV guest context. */
+-#define SPEC_CTRL_EXIT_TO_PV \
+- ALTERNATIVE "", \
+- DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_SC_MSR_PV; \
++.macro SPEC_CTRL_EXIT_TO_PV
++/*
++ * Requires %rax=spec_ctrl, %rsp=regs/info
++ * Clobbers %rcx, %rdx
++ */
++ ALTERNATIVE "", DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_SC_MSR_PV
++
+ DO_SPEC_CTRL_COND_VERW
++.endm
+
+ /*
+ * Use in IST interrupt/exception context. May interrupt Xen or PV context.
+--
+2.42.0
+
diff --git a/0006-x86-spec-ctrl-Improve-all-SPEC_CTRL_-ENTER-EXIT-_-co.patch b/0006-x86-spec-ctrl-Improve-all-SPEC_CTRL_-ENTER-EXIT-_-co.patch
new file mode 100644
index 0000000..1784fe1
--- /dev/null
+++ b/0006-x86-spec-ctrl-Improve-all-SPEC_CTRL_-ENTER-EXIT-_-co.patch
@@ -0,0 +1,106 @@
+From 6cc49c355e952f4ff564c6b817e7eff57c5a02c7 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Wed, 30 Aug 2023 20:11:50 +0100
+Subject: [PATCH 06/27] x86/spec-ctrl: Improve all SPEC_CTRL_{ENTER,EXIT}_*
+ comments
+
+... to better explain how they're used.
+
+Doing so highlights that SPEC_CTRL_EXIT_TO_XEN is missing a VERW flush for the
+corner case when e.g. an NMI hits late in an exit-to-guest path.
+
+Leave a TODO, which will be addressed in subsequent patches which arrange for
+VERW flushing to be safe within SPEC_CTRL_EXIT_TO_XEN.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+(cherry picked from commit 45f00557350dc7d0756551069803fc49c29184ca)
+---
+ xen/include/asm-x86/spec_ctrl_asm.h | 36 +++++++++++++++++++++++++----
+ 1 file changed, 31 insertions(+), 5 deletions(-)
+
+diff --git a/xen/include/asm-x86/spec_ctrl_asm.h b/xen/include/asm-x86/spec_ctrl_asm.h
+index 94ed5dc880..9c397f7cbd 100644
+--- a/xen/include/asm-x86/spec_ctrl_asm.h
++++ b/xen/include/asm-x86/spec_ctrl_asm.h
+@@ -235,7 +235,10 @@
+ wrmsr
+ .endm
+
+-/* Use after an entry from PV context (syscall/sysenter/int80/int82/etc). */
++/*
++ * Used after an entry from PV context: SYSCALL, SYSENTER, INT,
++ * etc. There is always a guest speculation state in context.
++ */
+ .macro SPEC_CTRL_ENTRY_FROM_PV
+ /*
+ * Requires %rsp=regs/cpuinfo, %rdx=0
+@@ -250,7 +253,11 @@
+ X86_FEATURE_SC_MSR_PV
+ .endm
+
+-/* Use in interrupt/exception context. May interrupt Xen or PV context. */
++/*
++ * Used after an exception or maskable interrupt, hitting Xen or PV context.
++ * There will either be a guest speculation context, or (barring fatal
++ * exceptions) a well-formed Xen speculation context.
++ */
+ .macro SPEC_CTRL_ENTRY_FROM_INTR
+ /*
+ * Requires %rsp=regs, %r14=stack_end, %rdx=0
+@@ -265,7 +272,10 @@
+ X86_FEATURE_SC_MSR_PV
+ .endm
+
+-/* Use when exiting to PV guest context. */
++/*
++ * Used when exiting from any entry context, back to PV context. This
++ * includes from an IST entry which moved onto the primary stack.
++ */
+ .macro SPEC_CTRL_EXIT_TO_PV
+ /*
+ * Requires %rax=spec_ctrl, %rsp=regs/info
+@@ -277,7 +287,13 @@
+ .endm
+
+ /*
+- * Use in IST interrupt/exception context. May interrupt Xen or PV context.
++ * Used after an IST entry hitting Xen or PV context. Special care is needed,
++ * because when hitting Xen context, there may not be a well-formed
++ * speculation context. (i.e. it can hit in the middle of
++ * SPEC_CTRL_{ENTRY,EXIT}_* regions.)
++ *
++ * An IST entry which hits PV context moves onto the primary stack and leaves
++ * via SPEC_CTRL_EXIT_TO_PV, *not* SPEC_CTRL_EXIT_TO_XEN.
+ */
+ .macro SPEC_CTRL_ENTRY_FROM_INTR_IST
+ /*
+@@ -336,7 +352,14 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise):
+ UNLIKELY_END(\@_serialise)
+ .endm
+
+-/* Use when exiting to Xen context. */
++/*
++ * Use when exiting from any entry context, back to Xen context. This
++ * includes returning to other SPEC_CTRL_{ENTRY,EXIT}_* regions with an
++ * incomplete speculation context.
++ *
++ * Because we might have interrupted Xen beyond SPEC_CTRL_EXIT_TO_$GUEST, we
++ * need to treat this as if it were an EXIT_TO_$GUEST case too.
++ */
+ .macro SPEC_CTRL_EXIT_TO_XEN
+ /*
+ * Requires %rbx=stack_end
+@@ -361,6 +384,9 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise):
+ wrmsr
+
+ .L\@_skip_sc_msr:
++
++ /* TODO VERW */
++
+ .endm
+
+ #endif /* __ASSEMBLY__ */
+--
+2.42.0
+
diff --git a/0006-x86-vmx-Calculate-model-specific-LBRs-once-at-start-.patch b/0006-x86-vmx-Calculate-model-specific-LBRs-once-at-start-.patch
deleted file mode 100644
index c010110..0000000
--- a/0006-x86-vmx-Calculate-model-specific-LBRs-once-at-start-.patch
+++ /dev/null
@@ -1,342 +0,0 @@
-From 401e9e33a04c2a9887636ef58490c764543f0538 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Tue, 7 Feb 2023 17:04:18 +0100
-Subject: [PATCH 06/61] x86/vmx: Calculate model-specific LBRs once at start of
- day
-
-There is no point repeating this calculation at runtime, especially as it is
-in the fallback path of the WRSMR/RDMSR handlers.
-
-Move the infrastructure higher in vmx.c to avoid forward declarations,
-renaming last_branch_msr_get() to get_model_specific_lbr() to highlight that
-these are model-specific only.
-
-No practical change.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Kevin Tian <kevin.tian@intel.com>
-master commit: e94af0d58f86c3a914b9cbbf4d9ed3d43b974771
-master date: 2023-01-12 18:42:00 +0000
----
- xen/arch/x86/hvm/vmx/vmx.c | 276 +++++++++++++++++++------------------
- 1 file changed, 139 insertions(+), 137 deletions(-)
-
-diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
-index 3f42765313..bc308d9df2 100644
---- a/xen/arch/x86/hvm/vmx/vmx.c
-+++ b/xen/arch/x86/hvm/vmx/vmx.c
-@@ -394,6 +394,142 @@ void vmx_pi_hooks_deassign(struct domain *d)
- domain_unpause(d);
- }
-
-+static const struct lbr_info {
-+ u32 base, count;
-+} p4_lbr[] = {
-+ { MSR_P4_LER_FROM_LIP, 1 },
-+ { MSR_P4_LER_TO_LIP, 1 },
-+ { MSR_P4_LASTBRANCH_TOS, 1 },
-+ { MSR_P4_LASTBRANCH_0_FROM_LIP, NUM_MSR_P4_LASTBRANCH_FROM_TO },
-+ { MSR_P4_LASTBRANCH_0_TO_LIP, NUM_MSR_P4_LASTBRANCH_FROM_TO },
-+ { 0, 0 }
-+}, c2_lbr[] = {
-+ { MSR_IA32_LASTINTFROMIP, 1 },
-+ { MSR_IA32_LASTINTTOIP, 1 },
-+ { MSR_C2_LASTBRANCH_TOS, 1 },
-+ { MSR_C2_LASTBRANCH_0_FROM_IP, NUM_MSR_C2_LASTBRANCH_FROM_TO },
-+ { MSR_C2_LASTBRANCH_0_TO_IP, NUM_MSR_C2_LASTBRANCH_FROM_TO },
-+ { 0, 0 }
-+}, nh_lbr[] = {
-+ { MSR_IA32_LASTINTFROMIP, 1 },
-+ { MSR_IA32_LASTINTTOIP, 1 },
-+ { MSR_NHL_LBR_SELECT, 1 },
-+ { MSR_NHL_LASTBRANCH_TOS, 1 },
-+ { MSR_P4_LASTBRANCH_0_FROM_LIP, NUM_MSR_P4_LASTBRANCH_FROM_TO },
-+ { MSR_P4_LASTBRANCH_0_TO_LIP, NUM_MSR_P4_LASTBRANCH_FROM_TO },
-+ { 0, 0 }
-+}, sk_lbr[] = {
-+ { MSR_IA32_LASTINTFROMIP, 1 },
-+ { MSR_IA32_LASTINTTOIP, 1 },
-+ { MSR_NHL_LBR_SELECT, 1 },
-+ { MSR_NHL_LASTBRANCH_TOS, 1 },
-+ { MSR_SKL_LASTBRANCH_0_FROM_IP, NUM_MSR_SKL_LASTBRANCH },
-+ { MSR_SKL_LASTBRANCH_0_TO_IP, NUM_MSR_SKL_LASTBRANCH },
-+ { MSR_SKL_LASTBRANCH_0_INFO, NUM_MSR_SKL_LASTBRANCH },
-+ { 0, 0 }
-+}, at_lbr[] = {
-+ { MSR_IA32_LASTINTFROMIP, 1 },
-+ { MSR_IA32_LASTINTTOIP, 1 },
-+ { MSR_C2_LASTBRANCH_TOS, 1 },
-+ { MSR_C2_LASTBRANCH_0_FROM_IP, NUM_MSR_ATOM_LASTBRANCH_FROM_TO },
-+ { MSR_C2_LASTBRANCH_0_TO_IP, NUM_MSR_ATOM_LASTBRANCH_FROM_TO },
-+ { 0, 0 }
-+}, sm_lbr[] = {
-+ { MSR_IA32_LASTINTFROMIP, 1 },
-+ { MSR_IA32_LASTINTTOIP, 1 },
-+ { MSR_SM_LBR_SELECT, 1 },
-+ { MSR_SM_LASTBRANCH_TOS, 1 },
-+ { MSR_C2_LASTBRANCH_0_FROM_IP, NUM_MSR_ATOM_LASTBRANCH_FROM_TO },
-+ { MSR_C2_LASTBRANCH_0_TO_IP, NUM_MSR_ATOM_LASTBRANCH_FROM_TO },
-+ { 0, 0 }
-+}, gm_lbr[] = {
-+ { MSR_IA32_LASTINTFROMIP, 1 },
-+ { MSR_IA32_LASTINTTOIP, 1 },
-+ { MSR_SM_LBR_SELECT, 1 },
-+ { MSR_SM_LASTBRANCH_TOS, 1 },
-+ { MSR_GM_LASTBRANCH_0_FROM_IP, NUM_MSR_GM_LASTBRANCH_FROM_TO },
-+ { MSR_GM_LASTBRANCH_0_TO_IP, NUM_MSR_GM_LASTBRANCH_FROM_TO },
-+ { 0, 0 }
-+};
-+static const struct lbr_info *__read_mostly model_specific_lbr;
-+
-+static const struct lbr_info *__init get_model_specific_lbr(void)
-+{
-+ switch ( boot_cpu_data.x86 )
-+ {
-+ case 6:
-+ switch ( boot_cpu_data.x86_model )
-+ {
-+ /* Core2 Duo */
-+ case 0x0f:
-+ /* Enhanced Core */
-+ case 0x17:
-+ /* Xeon 7400 */
-+ case 0x1d:
-+ return c2_lbr;
-+ /* Nehalem */
-+ case 0x1a: case 0x1e: case 0x1f: case 0x2e:
-+ /* Westmere */
-+ case 0x25: case 0x2c: case 0x2f:
-+ /* Sandy Bridge */
-+ case 0x2a: case 0x2d:
-+ /* Ivy Bridge */
-+ case 0x3a: case 0x3e:
-+ /* Haswell */
-+ case 0x3c: case 0x3f: case 0x45: case 0x46:
-+ /* Broadwell */
-+ case 0x3d: case 0x47: case 0x4f: case 0x56:
-+ return nh_lbr;
-+ /* Skylake */
-+ case 0x4e: case 0x5e:
-+ /* Xeon Scalable */
-+ case 0x55:
-+ /* Cannon Lake */
-+ case 0x66:
-+ /* Goldmont Plus */
-+ case 0x7a:
-+ /* Ice Lake */
-+ case 0x6a: case 0x6c: case 0x7d: case 0x7e:
-+ /* Tiger Lake */
-+ case 0x8c: case 0x8d:
-+ /* Tremont */
-+ case 0x86:
-+ /* Kaby Lake */
-+ case 0x8e: case 0x9e:
-+ /* Comet Lake */
-+ case 0xa5: case 0xa6:
-+ return sk_lbr;
-+ /* Atom */
-+ case 0x1c: case 0x26: case 0x27: case 0x35: case 0x36:
-+ return at_lbr;
-+ /* Silvermont */
-+ case 0x37: case 0x4a: case 0x4d: case 0x5a: case 0x5d:
-+ /* Xeon Phi Knights Landing */
-+ case 0x57:
-+ /* Xeon Phi Knights Mill */
-+ case 0x85:
-+ /* Airmont */
-+ case 0x4c:
-+ return sm_lbr;
-+ /* Goldmont */
-+ case 0x5c: case 0x5f:
-+ return gm_lbr;
-+ }
-+ break;
-+
-+ case 15:
-+ switch ( boot_cpu_data.x86_model )
-+ {
-+ /* Pentium4/Xeon with em64t */
-+ case 3: case 4: case 6:
-+ return p4_lbr;
-+ }
-+ break;
-+ }
-+
-+ return NULL;
-+}
-+
- static int vmx_domain_initialise(struct domain *d)
- {
- static const struct arch_csw csw = {
-@@ -2812,6 +2948,7 @@ const struct hvm_function_table * __init start_vmx(void)
- vmx_function_table.get_guest_bndcfgs = vmx_get_guest_bndcfgs;
- }
-
-+ model_specific_lbr = get_model_specific_lbr();
- lbr_tsx_fixup_check();
- ler_to_fixup_check();
-
-@@ -2958,141 +3095,6 @@ static int vmx_cr_access(cr_access_qual_t qual)
- return X86EMUL_OKAY;
- }
-
--static const struct lbr_info {
-- u32 base, count;
--} p4_lbr[] = {
-- { MSR_P4_LER_FROM_LIP, 1 },
-- { MSR_P4_LER_TO_LIP, 1 },
-- { MSR_P4_LASTBRANCH_TOS, 1 },
-- { MSR_P4_LASTBRANCH_0_FROM_LIP, NUM_MSR_P4_LASTBRANCH_FROM_TO },
-- { MSR_P4_LASTBRANCH_0_TO_LIP, NUM_MSR_P4_LASTBRANCH_FROM_TO },
-- { 0, 0 }
--}, c2_lbr[] = {
-- { MSR_IA32_LASTINTFROMIP, 1 },
-- { MSR_IA32_LASTINTTOIP, 1 },
-- { MSR_C2_LASTBRANCH_TOS, 1 },
-- { MSR_C2_LASTBRANCH_0_FROM_IP, NUM_MSR_C2_LASTBRANCH_FROM_TO },
-- { MSR_C2_LASTBRANCH_0_TO_IP, NUM_MSR_C2_LASTBRANCH_FROM_TO },
-- { 0, 0 }
--}, nh_lbr[] = {
-- { MSR_IA32_LASTINTFROMIP, 1 },
-- { MSR_IA32_LASTINTTOIP, 1 },
-- { MSR_NHL_LBR_SELECT, 1 },
-- { MSR_NHL_LASTBRANCH_TOS, 1 },
-- { MSR_P4_LASTBRANCH_0_FROM_LIP, NUM_MSR_P4_LASTBRANCH_FROM_TO },
-- { MSR_P4_LASTBRANCH_0_TO_LIP, NUM_MSR_P4_LASTBRANCH_FROM_TO },
-- { 0, 0 }
--}, sk_lbr[] = {
-- { MSR_IA32_LASTINTFROMIP, 1 },
-- { MSR_IA32_LASTINTTOIP, 1 },
-- { MSR_NHL_LBR_SELECT, 1 },
-- { MSR_NHL_LASTBRANCH_TOS, 1 },
-- { MSR_SKL_LASTBRANCH_0_FROM_IP, NUM_MSR_SKL_LASTBRANCH },
-- { MSR_SKL_LASTBRANCH_0_TO_IP, NUM_MSR_SKL_LASTBRANCH },
-- { MSR_SKL_LASTBRANCH_0_INFO, NUM_MSR_SKL_LASTBRANCH },
-- { 0, 0 }
--}, at_lbr[] = {
-- { MSR_IA32_LASTINTFROMIP, 1 },
-- { MSR_IA32_LASTINTTOIP, 1 },
-- { MSR_C2_LASTBRANCH_TOS, 1 },
-- { MSR_C2_LASTBRANCH_0_FROM_IP, NUM_MSR_ATOM_LASTBRANCH_FROM_TO },
-- { MSR_C2_LASTBRANCH_0_TO_IP, NUM_MSR_ATOM_LASTBRANCH_FROM_TO },
-- { 0, 0 }
--}, sm_lbr[] = {
-- { MSR_IA32_LASTINTFROMIP, 1 },
-- { MSR_IA32_LASTINTTOIP, 1 },
-- { MSR_SM_LBR_SELECT, 1 },
-- { MSR_SM_LASTBRANCH_TOS, 1 },
-- { MSR_C2_LASTBRANCH_0_FROM_IP, NUM_MSR_ATOM_LASTBRANCH_FROM_TO },
-- { MSR_C2_LASTBRANCH_0_TO_IP, NUM_MSR_ATOM_LASTBRANCH_FROM_TO },
-- { 0, 0 }
--}, gm_lbr[] = {
-- { MSR_IA32_LASTINTFROMIP, 1 },
-- { MSR_IA32_LASTINTTOIP, 1 },
-- { MSR_SM_LBR_SELECT, 1 },
-- { MSR_SM_LASTBRANCH_TOS, 1 },
-- { MSR_GM_LASTBRANCH_0_FROM_IP, NUM_MSR_GM_LASTBRANCH_FROM_TO },
-- { MSR_GM_LASTBRANCH_0_TO_IP, NUM_MSR_GM_LASTBRANCH_FROM_TO },
-- { 0, 0 }
--};
--
--static const struct lbr_info *last_branch_msr_get(void)
--{
-- switch ( boot_cpu_data.x86 )
-- {
-- case 6:
-- switch ( boot_cpu_data.x86_model )
-- {
-- /* Core2 Duo */
-- case 0x0f:
-- /* Enhanced Core */
-- case 0x17:
-- /* Xeon 7400 */
-- case 0x1d:
-- return c2_lbr;
-- /* Nehalem */
-- case 0x1a: case 0x1e: case 0x1f: case 0x2e:
-- /* Westmere */
-- case 0x25: case 0x2c: case 0x2f:
-- /* Sandy Bridge */
-- case 0x2a: case 0x2d:
-- /* Ivy Bridge */
-- case 0x3a: case 0x3e:
-- /* Haswell */
-- case 0x3c: case 0x3f: case 0x45: case 0x46:
-- /* Broadwell */
-- case 0x3d: case 0x47: case 0x4f: case 0x56:
-- return nh_lbr;
-- /* Skylake */
-- case 0x4e: case 0x5e:
-- /* Xeon Scalable */
-- case 0x55:
-- /* Cannon Lake */
-- case 0x66:
-- /* Goldmont Plus */
-- case 0x7a:
-- /* Ice Lake */
-- case 0x6a: case 0x6c: case 0x7d: case 0x7e:
-- /* Tiger Lake */
-- case 0x8c: case 0x8d:
-- /* Tremont */
-- case 0x86:
-- /* Kaby Lake */
-- case 0x8e: case 0x9e:
-- /* Comet Lake */
-- case 0xa5: case 0xa6:
-- return sk_lbr;
-- /* Atom */
-- case 0x1c: case 0x26: case 0x27: case 0x35: case 0x36:
-- return at_lbr;
-- /* Silvermont */
-- case 0x37: case 0x4a: case 0x4d: case 0x5a: case 0x5d:
-- /* Xeon Phi Knights Landing */
-- case 0x57:
-- /* Xeon Phi Knights Mill */
-- case 0x85:
-- /* Airmont */
-- case 0x4c:
-- return sm_lbr;
-- /* Goldmont */
-- case 0x5c: case 0x5f:
-- return gm_lbr;
-- }
-- break;
--
-- case 15:
-- switch ( boot_cpu_data.x86_model )
-- {
-- /* Pentium4/Xeon with em64t */
-- case 3: case 4: case 6:
-- return p4_lbr;
-- }
-- break;
-- }
--
-- return NULL;
--}
--
- enum
- {
- LBR_FORMAT_32 = 0x0, /* 32-bit record format */
-@@ -3199,7 +3201,7 @@ static void __init ler_to_fixup_check(void)
-
- static int is_last_branch_msr(u32 ecx)
- {
-- const struct lbr_info *lbr = last_branch_msr_get();
-+ const struct lbr_info *lbr = model_specific_lbr;
-
- if ( lbr == NULL )
- return 0;
-@@ -3536,7 +3538,7 @@ static int vmx_msr_write_intercept(unsigned int msr, uint64_t msr_content)
- if ( !(v->arch.hvm.vmx.lbr_flags & LBR_MSRS_INSERTED) &&
- (msr_content & IA32_DEBUGCTLMSR_LBR) )
- {
-- const struct lbr_info *lbr = last_branch_msr_get();
-+ const struct lbr_info *lbr = model_specific_lbr;
-
- if ( unlikely(!lbr) )
- {
---
-2.40.0
-
diff --git a/0007-x86-entry-Adjust-restore_all_xen-to-hold-stack_end-i.patch b/0007-x86-entry-Adjust-restore_all_xen-to-hold-stack_end-i.patch
new file mode 100644
index 0000000..5d586b1
--- /dev/null
+++ b/0007-x86-entry-Adjust-restore_all_xen-to-hold-stack_end-i.patch
@@ -0,0 +1,74 @@
+From 19aca8f32778f289112fc8db2ee547cdf29c81ca Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Wed, 13 Sep 2023 13:48:16 +0100
+Subject: [PATCH 07/27] x86/entry: Adjust restore_all_xen to hold stack_end in
+ %r14
+
+All other SPEC_CTRL_{ENTRY,EXIT}_* helpers hold stack_end in %r14. Adjust it
+for consistency.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+(cherry picked from commit 7aa28849a1155d856e214e9a80a7e65fffdc3e58)
+---
+ xen/arch/x86/x86_64/entry.S | 8 ++++----
+ xen/include/asm-x86/spec_ctrl_asm.h | 8 ++++----
+ 2 files changed, 8 insertions(+), 8 deletions(-)
+
+diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
+index 59f2040787..266c0a0990 100644
+--- a/xen/arch/x86/x86_64/entry.S
++++ b/xen/arch/x86/x86_64/entry.S
+@@ -656,15 +656,15 @@ restore_all_xen:
+ * Check whether we need to switch to the per-CPU page tables, in
+ * case we return to late PV exit code (from an NMI or #MC).
+ */
+- GET_STACK_END(bx)
+- cmpb $0, STACK_CPUINFO_FIELD(use_pv_cr3)(%rbx)
++ GET_STACK_END(14)
++ cmpb $0, STACK_CPUINFO_FIELD(use_pv_cr3)(%r14)
+ UNLIKELY_START(ne, exit_cr3)
+- mov STACK_CPUINFO_FIELD(pv_cr3)(%rbx), %rax
++ mov STACK_CPUINFO_FIELD(pv_cr3)(%r14), %rax
+ mov %rax, %cr3
+ UNLIKELY_END(exit_cr3)
+
+ /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */
+- SPEC_CTRL_EXIT_TO_XEN /* Req: %rbx=end, Clob: acd */
++ SPEC_CTRL_EXIT_TO_XEN /* Req: %r14=end, Clob: acd */
+
+ RESTORE_ALL adj=8
+ iretq
+diff --git a/xen/include/asm-x86/spec_ctrl_asm.h b/xen/include/asm-x86/spec_ctrl_asm.h
+index 9c397f7cbd..3e745813cf 100644
+--- a/xen/include/asm-x86/spec_ctrl_asm.h
++++ b/xen/include/asm-x86/spec_ctrl_asm.h
+@@ -362,10 +362,10 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise):
+ */
+ .macro SPEC_CTRL_EXIT_TO_XEN
+ /*
+- * Requires %rbx=stack_end
++ * Requires %r14=stack_end
+ * Clobbers %rax, %rcx, %rdx
+ */
+- testb $SCF_ist_sc_msr, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%rbx)
++ testb $SCF_ist_sc_msr, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14)
+ jz .L\@_skip_sc_msr
+
+ /*
+@@ -376,10 +376,10 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise):
+ */
+ xor %edx, %edx
+
+- testb $SCF_use_shadow, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%rbx)
++ testb $SCF_use_shadow, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14)
+ jz .L\@_skip_sc_msr
+
+- mov STACK_CPUINFO_FIELD(shadow_spec_ctrl)(%rbx), %eax
++ mov STACK_CPUINFO_FIELD(shadow_spec_ctrl)(%r14), %eax
+ mov $MSR_SPEC_CTRL, %ecx
+ wrmsr
+
+--
+2.42.0
+
diff --git a/0007-x86-vmx-Support-for-CPUs-without-model-specific-LBR.patch b/0007-x86-vmx-Support-for-CPUs-without-model-specific-LBR.patch
deleted file mode 100644
index fc81a17..0000000
--- a/0007-x86-vmx-Support-for-CPUs-without-model-specific-LBR.patch
+++ /dev/null
@@ -1,83 +0,0 @@
-From 9f425039ca50e8cc8db350ec54d8a7cd4175f417 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Tue, 7 Feb 2023 17:04:49 +0100
-Subject: [PATCH 07/61] x86/vmx: Support for CPUs without model-specific LBR
-
-Ice Lake (server at least) has both architectural LBR and model-specific LBR.
-Sapphire Rapids does not have model-specific LBR at all. I.e. On SPR and
-later, model_specific_lbr will always be NULL, so we must make changes to
-avoid reliably hitting the domain_crash().
-
-The Arch LBR spec states that CPUs without model-specific LBR implement
-MSR_DBG_CTL.LBR by discarding writes and always returning 0.
-
-Do this for any CPU for which we lack model-specific LBR information.
-
-Adjust the now-stale comment, now that the Arch LBR spec has created a way to
-signal "no model specific LBR" to guests.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Kevin Tian <kevin.tian@intel.com>
-master commit: 3edca52ce736297d7fcf293860cd94ef62638052
-master date: 2023-01-12 18:42:00 +0000
----
- xen/arch/x86/hvm/vmx/vmx.c | 31 ++++++++++++++++---------------
- 1 file changed, 16 insertions(+), 15 deletions(-)
-
-diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
-index bc308d9df2..094141be9a 100644
---- a/xen/arch/x86/hvm/vmx/vmx.c
-+++ b/xen/arch/x86/hvm/vmx/vmx.c
-@@ -3518,18 +3518,26 @@ static int vmx_msr_write_intercept(unsigned int msr, uint64_t msr_content)
- if ( msr_content & rsvd )
- goto gp_fault;
-
-+ /*
-+ * The Arch LBR spec (new in Ice Lake) states that CPUs with no
-+ * model-specific LBRs implement MSR_DBG_CTL.LBR by discarding writes
-+ * and always returning 0.
-+ *
-+ * Use this property in all cases where we don't know any
-+ * model-specific LBR information, as it matches real hardware
-+ * behaviour on post-Ice Lake systems.
-+ */
-+ if ( !model_specific_lbr )
-+ msr_content &= ~IA32_DEBUGCTLMSR_LBR;
-+
- /*
- * When a guest first enables LBR, arrange to save and restore the LBR
- * MSRs and allow the guest direct access.
- *
-- * MSR_DEBUGCTL and LBR has existed almost as long as MSRs have
-- * existed, and there is no architectural way to hide the feature, or
-- * fail the attempt to enable LBR.
-- *
-- * Unknown host LBR MSRs or hitting -ENOSPC with the guest load/save
-- * list are definitely hypervisor bugs, whereas -ENOMEM for allocating
-- * the load/save list is simply unlucky (and shouldn't occur with
-- * sensible management by the toolstack).
-+ * Hitting -ENOSPC with the guest load/save list is definitely a
-+ * hypervisor bug, whereas -ENOMEM for allocating the load/save list
-+ * is simply unlucky (and shouldn't occur with sensible management by
-+ * the toolstack).
- *
- * Either way, there is nothing we can do right now to recover, and
- * the guest won't execute correctly either. Simply crash the domain
-@@ -3540,13 +3548,6 @@ static int vmx_msr_write_intercept(unsigned int msr, uint64_t msr_content)
- {
- const struct lbr_info *lbr = model_specific_lbr;
-
-- if ( unlikely(!lbr) )
-- {
-- gprintk(XENLOG_ERR, "Unknown Host LBR MSRs\n");
-- domain_crash(v->domain);
-- return X86EMUL_OKAY;
-- }
--
- for ( ; lbr->count; lbr++ )
- {
- unsigned int i;
---
-2.40.0
-
diff --git a/0008-x86-entry-Track-the-IST-ness-of-an-entry-for-the-exi.patch b/0008-x86-entry-Track-the-IST-ness-of-an-entry-for-the-exi.patch
new file mode 100644
index 0000000..c88a8b1
--- /dev/null
+++ b/0008-x86-entry-Track-the-IST-ness-of-an-entry-for-the-exi.patch
@@ -0,0 +1,109 @@
+From 8064cbdbef79e328fad5158beeaf1c45bd0f5bd3 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Wed, 13 Sep 2023 12:20:12 +0100
+Subject: [PATCH 08/27] x86/entry: Track the IST-ness of an entry for the exit
+ paths
+
+Use %r12 to hold an ist_exit boolean. This register is zero elsewhere in the
+entry/exit asm, so it only needs setting in the IST path.
+
+As this is subtle and fragile, add check_ist_exit() to be used in debugging
+builds to cross-check that the ist_exit boolean matches the entry vector.
+
+Write check_ist_exit() it in C, because it's debug only and the logic more
+complicated than I care to maintain in asm.
+
+For now, we only need to use this signal in the exit-to-Xen path, but some
+exit-to-guest paths happen in IST context too. Check the correctness in all
+exit paths to avoid the logic bit-rotting.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+(cherry picked from commit 21bdc25b05a0f8ab6bc73520a9ca01327360732c)
+
+x86/entry: Partially revert IST-exit checks
+
+The patch adding check_ist_exit() didn't account for the fact that
+reset_stack_and_jump() is not an ABI-preserving boundary. The IST-ness in
+%r12 doesn't survive into the next context, and is a stale value C.
+
+This shows up in Gitlab CI for the Clang build:
+
+ https://gitlab.com/xen-project/people/andyhhp/xen/-/jobs/5112783827
+
+and in OSSTest for GCC 8:
+
+ http://logs.test-lab.xenproject.org/osstest/logs/183045/test-amd64-amd64-xl-qemuu-debianhvm-amd64/serial-pinot0.log
+
+There's no straightforward way to reconstruct the IST-exit-ness on the
+exit-to-guest path after a context switch. For now, we only need IST-exit on
+the return-to-Xen path.
+
+Fixes: 21bdc25b05a0 ("x86/entry: Track the IST-ness of an entry for the exit paths")
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+(cherry picked from commit 9b57c800b79b96769ea3dcd6468578fa664d19f9)
+---
+ xen/arch/x86/traps.c | 13 +++++++++++++
+ xen/arch/x86/x86_64/entry.S | 13 ++++++++++++-
+ 2 files changed, 25 insertions(+), 1 deletion(-)
+
+diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
+index 9679bfdb08..f7992ff230 100644
+--- a/xen/arch/x86/traps.c
++++ b/xen/arch/x86/traps.c
+@@ -2348,6 +2348,19 @@ void asm_domain_crash_synchronous(unsigned long addr)
+ do_softirq();
+ }
+
++#ifdef CONFIG_DEBUG
++void check_ist_exit(const struct cpu_user_regs *regs, bool ist_exit)
++{
++ const unsigned int ist_mask =
++ (1U << X86_EXC_NMI) | (1U << X86_EXC_DB) |
++ (1U << X86_EXC_DF) | (1U << X86_EXC_MC);
++ uint8_t ev = regs->entry_vector;
++ bool is_ist = (ev < TRAP_nr) && ((1U << ev) & ist_mask);
++
++ ASSERT(is_ist == ist_exit);
++}
++#endif
++
+ /*
+ * Local variables:
+ * mode: C
+diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
+index 266c0a0990..671e3b3fd5 100644
+--- a/xen/arch/x86/x86_64/entry.S
++++ b/xen/arch/x86/x86_64/entry.S
+@@ -650,8 +650,15 @@ ret_from_intr:
+ .section .text.entry, "ax", @progbits
+
+ ALIGN
+-/* No special register assumptions. */
++/* %r12=ist_exit */
+ restore_all_xen:
++
++#ifdef CONFIG_DEBUG
++ mov %rsp, %rdi
++ mov %r12, %rsi
++ call check_ist_exit
++#endif
++
+ /*
+ * Check whether we need to switch to the per-CPU page tables, in
+ * case we return to late PV exit code (from an NMI or #MC).
+@@ -1032,6 +1039,10 @@ handle_ist_exception:
+ INDIRECT_CALL %rdx
+ mov %r15, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
+ mov %bl, STACK_CPUINFO_FIELD(use_pv_cr3)(%r14)
++
++ /* This is an IST exit */
++ mov $1, %r12d
++
+ cmpb $TRAP_nmi,UREGS_entry_vector(%rsp)
+ jne ret_from_intr
+
+--
+2.42.0
+
diff --git a/0008-x86-shadow-fix-PAE-check-for-top-level-table-unshado.patch b/0008-x86-shadow-fix-PAE-check-for-top-level-table-unshado.patch
deleted file mode 100644
index ab7862b..0000000
--- a/0008-x86-shadow-fix-PAE-check-for-top-level-table-unshado.patch
+++ /dev/null
@@ -1,39 +0,0 @@
-From 1550835b381a18fc0e972e5d04925e02fab31553 Mon Sep 17 00:00:00 2001
-From: Jan Beulich <jbeulich@suse.com>
-Date: Tue, 7 Feb 2023 17:05:22 +0100
-Subject: [PATCH 08/61] x86/shadow: fix PAE check for top-level table
- unshadowing
-
-Clearly within the for_each_vcpu() the vCPU of this loop is meant, not
-the (loop invariant) one the fault occurred on.
-
-Fixes: 3d5e6a3ff383 ("x86 hvm: implement HVMOP_pagetable_dying")
-Fixes: ef3b0d8d2c39 ("x86/shadow: shadow_table[] needs only one entry for PV-only configs")
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
-master commit: f8fdceefbb1193ec81667eb40b83bc525cb71204
-master date: 2023-01-20 09:23:42 +0100
----
- xen/arch/x86/mm/shadow/multi.c | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/xen/arch/x86/mm/shadow/multi.c b/xen/arch/x86/mm/shadow/multi.c
-index c07af0bd99..f7acd18a36 100644
---- a/xen/arch/x86/mm/shadow/multi.c
-+++ b/xen/arch/x86/mm/shadow/multi.c
-@@ -2665,10 +2665,10 @@ static int sh_page_fault(struct vcpu *v,
- #if GUEST_PAGING_LEVELS == 3
- unsigned int i;
-
-- for_each_shadow_table(v, i)
-+ for_each_shadow_table(tmp, i)
- {
- mfn_t smfn = pagetable_get_mfn(
-- v->arch.paging.shadow.shadow_table[i]);
-+ tmp->arch.paging.shadow.shadow_table[i]);
-
- if ( mfn_valid(smfn) && (mfn_x(smfn) != 0) )
- {
---
-2.40.0
-
diff --git a/0009-ns16550-fix-an-incorrect-assignment-to-uart-io_size.patch b/0009-ns16550-fix-an-incorrect-assignment-to-uart-io_size.patch
deleted file mode 100644
index 83e46c7..0000000
--- a/0009-ns16550-fix-an-incorrect-assignment-to-uart-io_size.patch
+++ /dev/null
@@ -1,34 +0,0 @@
-From 0fd9ad2b9c0c9d9c4879a566f1788d3e9cd38ef6 Mon Sep 17 00:00:00 2001
-From: Ayan Kumar Halder <ayan.kumar.halder@amd.com>
-Date: Tue, 7 Feb 2023 17:05:56 +0100
-Subject: [PATCH 09/61] ns16550: fix an incorrect assignment to uart->io_size
-
-uart->io_size represents the size in bytes. Thus, when serial_port.bit_width
-is assigned to it, it should be converted to size in bytes.
-
-Fixes: 17b516196c ("ns16550: add ACPI support for ARM only")
-Reported-by: Jan Beulich <jbeulich@suse.com>
-Signed-off-by: Ayan Kumar Halder <ayan.kumar.halder@amd.com>
-Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
-master commit: 352c89f72ddb67b8d9d4e492203f8c77f85c8df1
-master date: 2023-01-24 16:54:38 +0100
----
- xen/drivers/char/ns16550.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/xen/drivers/char/ns16550.c b/xen/drivers/char/ns16550.c
-index 2d2bd2a024..5dd4d723f5 100644
---- a/xen/drivers/char/ns16550.c
-+++ b/xen/drivers/char/ns16550.c
-@@ -1780,7 +1780,7 @@ static int __init ns16550_acpi_uart_init(const void *data)
- uart->parity = spcr->parity;
- uart->stop_bits = spcr->stop_bits;
- uart->io_base = spcr->serial_port.address;
-- uart->io_size = spcr->serial_port.bit_width;
-+ uart->io_size = DIV_ROUND_UP(spcr->serial_port.bit_width, BITS_PER_BYTE);
- uart->reg_shift = spcr->serial_port.bit_offset;
- uart->reg_width = spcr->serial_port.access_width;
-
---
-2.40.0
-
diff --git a/0009-x86-spec-ctrl-Issue-VERW-during-IST-exit-to-Xen.patch b/0009-x86-spec-ctrl-Issue-VERW-during-IST-exit-to-Xen.patch
new file mode 100644
index 0000000..4396aa9
--- /dev/null
+++ b/0009-x86-spec-ctrl-Issue-VERW-during-IST-exit-to-Xen.patch
@@ -0,0 +1,89 @@
+From 3e51782ebb088fde39fdcfa30d002baddd1a9e06 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Wed, 13 Sep 2023 13:53:33 +0100
+Subject: [PATCH 09/27] x86/spec-ctrl: Issue VERW during IST exit to Xen
+
+There is a corner case where e.g. an NMI hitting an exit-to-guest path after
+SPEC_CTRL_EXIT_TO_* would have run the entire NMI handler *after* the VERW
+flush to scrub potentially sensitive data from uarch buffers.
+
+In order to compensate, issue VERW when exiting to Xen from an IST entry.
+
+SPEC_CTRL_EXIT_TO_XEN already has two reads of spec_ctrl_flags off the stack,
+and we're about to add a third. Load the field into %ebx, and list the
+register as clobbered.
+
+%r12 has been arranged to be the ist_exit signal, so add this as an input
+dependency and use it to identify when to issue a VERW.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+(cherry picked from commit 3ee6066bcd737756b0990d417d94eddc0b0d2585)
+---
+ xen/arch/x86/x86_64/entry.S | 2 +-
+ xen/include/asm-x86/spec_ctrl_asm.h | 20 +++++++++++++++-----
+ 2 files changed, 16 insertions(+), 6 deletions(-)
+
+diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
+index 671e3b3fd5..88ff5c150f 100644
+--- a/xen/arch/x86/x86_64/entry.S
++++ b/xen/arch/x86/x86_64/entry.S
+@@ -671,7 +671,7 @@ UNLIKELY_START(ne, exit_cr3)
+ UNLIKELY_END(exit_cr3)
+
+ /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */
+- SPEC_CTRL_EXIT_TO_XEN /* Req: %r14=end, Clob: acd */
++ SPEC_CTRL_EXIT_TO_XEN /* Req: %r12=ist_exit %r14=end, Clob: abcd */
+
+ RESTORE_ALL adj=8
+ iretq
+diff --git a/xen/include/asm-x86/spec_ctrl_asm.h b/xen/include/asm-x86/spec_ctrl_asm.h
+index 3e745813cf..8a816b8cf6 100644
+--- a/xen/include/asm-x86/spec_ctrl_asm.h
++++ b/xen/include/asm-x86/spec_ctrl_asm.h
+@@ -362,10 +362,12 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise):
+ */
+ .macro SPEC_CTRL_EXIT_TO_XEN
+ /*
+- * Requires %r14=stack_end
+- * Clobbers %rax, %rcx, %rdx
++ * Requires %r12=ist_exit, %r14=stack_end
++ * Clobbers %rax, %rbx, %rcx, %rdx
+ */
+- testb $SCF_ist_sc_msr, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14)
++ movzbl STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14), %ebx
++
++ testb $SCF_ist_sc_msr, %bl
+ jz .L\@_skip_sc_msr
+
+ /*
+@@ -376,7 +378,7 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise):
+ */
+ xor %edx, %edx
+
+- testb $SCF_use_shadow, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14)
++ testb $SCF_use_shadow, %bl
+ jz .L\@_skip_sc_msr
+
+ mov STACK_CPUINFO_FIELD(shadow_spec_ctrl)(%r14), %eax
+@@ -385,8 +387,16 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise):
+
+ .L\@_skip_sc_msr:
+
+- /* TODO VERW */
++ test %r12, %r12
++ jz .L\@_skip_ist_exit
++
++ /* Logically DO_SPEC_CTRL_COND_VERW but without the %rsp=cpuinfo dependency */
++ testb $SCF_verw, %bl
++ jz .L\@_skip_verw
++ verw STACK_CPUINFO_FIELD(verw_sel)(%r14)
++.L\@_skip_verw:
+
++.L\@_skip_ist_exit:
+ .endm
+
+ #endif /* __ASSEMBLY__ */
+--
+2.42.0
+
diff --git a/0010-libxl-fix-guest-kexec-skip-cpuid-policy.patch b/0010-libxl-fix-guest-kexec-skip-cpuid-policy.patch
deleted file mode 100644
index 6150286..0000000
--- a/0010-libxl-fix-guest-kexec-skip-cpuid-policy.patch
+++ /dev/null
@@ -1,72 +0,0 @@
-From 6e081438bf8ef616d0123aab7a743476d8114ef6 Mon Sep 17 00:00:00 2001
-From: Jason Andryuk <jandryuk@gmail.com>
-Date: Tue, 7 Feb 2023 17:06:47 +0100
-Subject: [PATCH 10/61] libxl: fix guest kexec - skip cpuid policy
-
-When a domain performs a kexec (soft reset), libxl__build_pre() is
-called with the existing domid. Calling libxl__cpuid_legacy() on the
-existing domain fails since the cpuid policy has already been set, and
-the guest isn't rebuilt and doesn't kexec.
-
-xc: error: Failed to set d1's policy (err leaf 0xffffffff, subleaf 0xffffffff, msr 0xffffffff) (17 = File exists): Internal error
-libxl: error: libxl_cpuid.c:494:libxl__cpuid_legacy: Domain 1:Failed to apply CPUID policy: File exists
-libxl: error: libxl_create.c:1641:domcreate_rebuild_done: Domain 1:cannot (re-)build domain: -3
-libxl: error: libxl_xshelp.c:201:libxl__xs_read_mandatory: xenstore read failed: `/libxl/1/type': No such file or directory
-libxl: warning: libxl_dom.c:49:libxl__domain_type: unable to get domain type for domid=1, assuming HVM
-
-During a soft_reset, skip calling libxl__cpuid_legacy() to avoid the
-issue. Before commit 34990446ca91, the libxl__cpuid_legacy() failure
-would have been ignored, so kexec would continue.
-
-Fixes: 34990446ca91 ("libxl: don't ignore the return value from xc_cpuid_apply_policy")
-Signed-off-by: Jason Andryuk <jandryuk@gmail.com>
-Reviewed-by: Anthony PERARD <anthony.perard@citrix.com>
-master commit: 1e454c2b5b1172e0fc7457e411ebaba61db8fc87
-master date: 2023-01-26 10:58:23 +0100
----
- tools/libs/light/libxl_create.c | 2 ++
- tools/libs/light/libxl_dom.c | 2 +-
- tools/libs/light/libxl_internal.h | 1 +
- 3 files changed, 4 insertions(+), 1 deletion(-)
-
-diff --git a/tools/libs/light/libxl_create.c b/tools/libs/light/libxl_create.c
-index 885675591f..2e6357a9d7 100644
---- a/tools/libs/light/libxl_create.c
-+++ b/tools/libs/light/libxl_create.c
-@@ -2176,6 +2176,8 @@ static int do_domain_soft_reset(libxl_ctx *ctx,
- aop_console_how);
- cdcs->domid_out = &domid_out;
-
-+ state->soft_reset = true;
-+
- dom_path = libxl__xs_get_dompath(gc, domid);
- if (!dom_path) {
- LOGD(ERROR, domid, "failed to read domain path");
-diff --git a/tools/libs/light/libxl_dom.c b/tools/libs/light/libxl_dom.c
-index 73fccd9243..a2bd2395fa 100644
---- a/tools/libs/light/libxl_dom.c
-+++ b/tools/libs/light/libxl_dom.c
-@@ -384,7 +384,7 @@ int libxl__build_pre(libxl__gc *gc, uint32_t domid,
- /* Construct a CPUID policy, but only for brand new domains. Domains
- * being migrated-in/restored have CPUID handled during the
- * static_data_done() callback. */
-- if (!state->restore)
-+ if (!state->restore && !state->soft_reset)
- rc = libxl__cpuid_legacy(ctx, domid, false, info);
-
- out:
-diff --git a/tools/libs/light/libxl_internal.h b/tools/libs/light/libxl_internal.h
-index 0b4671318c..ee6a251700 100644
---- a/tools/libs/light/libxl_internal.h
-+++ b/tools/libs/light/libxl_internal.h
-@@ -1407,6 +1407,7 @@ typedef struct {
- /* Whether this domain is being migrated/restored, or booting fresh. Only
- * applicable to the primary domain, not support domains (e.g. stub QEMU). */
- bool restore;
-+ bool soft_reset;
- } libxl__domain_build_state;
-
- _hidden void libxl__domain_build_state_init(libxl__domain_build_state *s);
---
-2.40.0
-
diff --git a/0010-x86-amd-Introduce-is_zen-1-2-_uarch-predicates.patch b/0010-x86-amd-Introduce-is_zen-1-2-_uarch-predicates.patch
new file mode 100644
index 0000000..c9cf0df
--- /dev/null
+++ b/0010-x86-amd-Introduce-is_zen-1-2-_uarch-predicates.patch
@@ -0,0 +1,91 @@
+From a5857f1eca17a609119ae928c9fa73bb0996ddd9 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Fri, 15 Sep 2023 12:13:51 +0100
+Subject: [PATCH 10/27] x86/amd: Introduce is_zen{1,2}_uarch() predicates
+
+We already have 3 cases using STIBP as a Zen1/2 heuristic, and are about to
+introduce a 4th. Wrap the heuristic into a pair of predicates rather than
+opencoding it, and the explanation of the heuristic, at each usage site.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+(cherry picked from commit de1d265001397f308c5c3c5d3ffc30e7ef8c0705)
+---
+ xen/arch/x86/cpu/amd.c | 18 ++++--------------
+ xen/include/asm-x86/amd.h | 11 +++++++++++
+ 2 files changed, 15 insertions(+), 14 deletions(-)
+
+diff --git a/xen/arch/x86/cpu/amd.c b/xen/arch/x86/cpu/amd.c
+index a591038757..b71d891901 100644
+--- a/xen/arch/x86/cpu/amd.c
++++ b/xen/arch/x86/cpu/amd.c
+@@ -746,15 +746,13 @@ void amd_init_ssbd(const struct cpuinfo_x86 *c)
+ * non-branch instructions to be ignored. It is to be set unilaterally in
+ * newer microcode.
+ *
+- * This chickenbit is something unrelated on Zen1, and Zen1 vs Zen2 isn't a
+- * simple model number comparison, so use STIBP as a heuristic to separate the
+- * two uarches in Fam17h(AMD)/18h(Hygon).
++ * This chickenbit is something unrelated on Zen1.
+ */
+ void amd_init_spectral_chicken(void)
+ {
+ uint64_t val, chickenbit = 1 << 1;
+
+- if (cpu_has_hypervisor || !boot_cpu_has(X86_FEATURE_AMD_STIBP))
++ if (cpu_has_hypervisor || !is_zen2_uarch())
+ return;
+
+ if (rdmsr_safe(MSR_AMD64_DE_CFG2, val) == 0 && !(val & chickenbit))
+@@ -803,11 +801,8 @@ void amd_check_zenbleed(void)
+ * With the Fam17h check above, most parts getting here are
+ * Zen1. They're not affected. Assume Zen2 ones making it
+ * here are affected regardless of microcode version.
+- *
+- * Zen1 vs Zen2 isn't a simple model number comparison, so use
+- * STIBP as a heuristic to distinguish.
+ */
+- if (!boot_cpu_has(X86_FEATURE_AMD_STIBP))
++ if (is_zen1_uarch())
+ return;
+ good_rev = ~0U;
+ break;
+@@ -1168,12 +1163,7 @@ static int __init zen2_c6_errata_check(void)
+ */
+ s_time_t delta;
+
+- /*
+- * Zen1 vs Zen2 isn't a simple model number comparison, so use STIBP as
+- * a heuristic to separate the two uarches in Fam17h.
+- */
+- if (cpu_has_hypervisor || boot_cpu_data.x86 != 0x17 ||
+- !boot_cpu_has(X86_FEATURE_AMD_STIBP))
++ if (cpu_has_hypervisor || boot_cpu_data.x86 != 0x17 || !is_zen2_uarch())
+ return 0;
+
+ /*
+diff --git a/xen/include/asm-x86/amd.h b/xen/include/asm-x86/amd.h
+index a82382e6bf..7fe1e19217 100644
+--- a/xen/include/asm-x86/amd.h
++++ b/xen/include/asm-x86/amd.h
+@@ -140,6 +140,17 @@
+ AMD_MODEL_RANGE(0x11, 0x0, 0x0, 0xff, 0xf), \
+ AMD_MODEL_RANGE(0x12, 0x0, 0x0, 0xff, 0xf))
+
++/*
++ * The Zen1 and Zen2 microarchitectures are implemented by AMD (Fam17h) and
++ * Hygon (Fam18h) but without simple model number rules. Instead, use STIBP
++ * as a heuristic that distinguishes the two.
++ *
++ * The caller is required to perform the appropriate vendor/family checks
++ * first.
++ */
++#define is_zen1_uarch() (!boot_cpu_has(X86_FEATURE_AMD_STIBP))
++#define is_zen2_uarch() boot_cpu_has(X86_FEATURE_AMD_STIBP)
++
+ struct cpuinfo_x86;
+ int cpu_has_amd_erratum(const struct cpuinfo_x86 *, int, ...);
+
+--
+2.42.0
+
diff --git a/0011-tools-ocaml-xenctrl-Make-domain_getinfolist-tail-rec.patch b/0011-tools-ocaml-xenctrl-Make-domain_getinfolist-tail-rec.patch
deleted file mode 100644
index 1d4455f..0000000
--- a/0011-tools-ocaml-xenctrl-Make-domain_getinfolist-tail-rec.patch
+++ /dev/null
@@ -1,71 +0,0 @@
-From c6a3d14df051bae0323af539e34cf5a65fba1112 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= <edvin.torok@citrix.com>
-Date: Tue, 1 Nov 2022 17:59:16 +0000
-Subject: [PATCH 11/61] tools/ocaml/xenctrl: Make domain_getinfolist tail
- recursive
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-domain_getinfolist() is quadratic with the number of domains, because of the
-behaviour of the underlying hypercall. xenopsd was further observed to be
-wasting excessive quantites of time manipulating the list of already-obtained
-domains.
-
-Implement a tail recursive `rev_concat` equivalent to `concat |> rev`, and use
-it instead of calling `@` multiple times.
-
-An incidental benefit is that the list of domains will now be in domid order,
-instead of having pairs of 2 domains changing direction every time.
-
-In a scalability testing scenario with ~1000 VMs, a combination of this and
-the subsequent change takes xenopsd's wallclock time in domain_getinfolist()
-down from 88% to 0.02%
-
-Signed-off-by: Edwin Török <edvin.torok@citrix.com>
-Tested-by: Pau Ruiz Safont <pau.safont@citrix.com>
-Acked-by: Christian Lindig <christian.lindig@citrix.com>
-(cherry picked from commit c3b6be714c64aa62b56d0bce96f4b6a10b5c2078)
----
- tools/ocaml/libs/xc/xenctrl.ml | 23 +++++++++++++++++------
- 1 file changed, 17 insertions(+), 6 deletions(-)
-
-diff --git a/tools/ocaml/libs/xc/xenctrl.ml b/tools/ocaml/libs/xc/xenctrl.ml
-index 7503031d8f..f10b686215 100644
---- a/tools/ocaml/libs/xc/xenctrl.ml
-+++ b/tools/ocaml/libs/xc/xenctrl.ml
-@@ -212,14 +212,25 @@ external domain_shutdown: handle -> domid -> shutdown_reason -> unit
- external _domain_getinfolist: handle -> domid -> int -> domaininfo list
- = "stub_xc_domain_getinfolist"
-
-+let rev_append_fold acc e = List.rev_append e acc
-+
-+(**
-+ * [rev_concat lst] is equivalent to [lst |> List.concat |> List.rev]
-+ * except it is tail recursive, whereas [List.concat] isn't.
-+ * Example:
-+ * rev_concat [[10;9;8];[7;6];[5]]] = [5; 6; 7; 8; 9; 10]
-+ *)
-+let rev_concat lst = List.fold_left rev_append_fold [] lst
-+
- let domain_getinfolist handle first_domain =
- let nb = 2 in
-- let last_domid l = (List.hd l).domid + 1 in
-- let rec __getlist from =
-- let l = _domain_getinfolist handle from nb in
-- (if List.length l = nb then __getlist (last_domid l) else []) @ l
-- in
-- List.rev (__getlist first_domain)
-+ let rec __getlist lst from =
-+ (* _domain_getinfolist returns domains in reverse order, largest first *)
-+ match _domain_getinfolist handle from nb with
-+ | [] -> rev_concat lst
-+ | (hd :: _) as l -> __getlist (l :: lst) (hd.domid + 1)
-+ in
-+ __getlist [] first_domain
-
- external domain_getinfo: handle -> domid -> domaininfo= "stub_xc_domain_getinfo"
-
---
-2.40.0
-
diff --git a/0011-x86-spec-ctrl-Mitigate-the-Zen1-DIV-leakage.patch b/0011-x86-spec-ctrl-Mitigate-the-Zen1-DIV-leakage.patch
new file mode 100644
index 0000000..cce4fa7
--- /dev/null
+++ b/0011-x86-spec-ctrl-Mitigate-the-Zen1-DIV-leakage.patch
@@ -0,0 +1,236 @@
+From de751c3d906d17b2e25ee429f81b17a689c7c6c0 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Wed, 30 Aug 2023 20:24:25 +0100
+Subject: [PATCH 11/27] x86/spec-ctrl: Mitigate the Zen1 DIV leakage
+
+In the Zen1 microarchitecure, there is one divider in the pipeline which
+services uops from both threads. In the case of #DE, the latched result from
+the previous DIV to execute will be forwarded speculatively.
+
+This is an interesting covert channel that allows two threads to communicate
+without any system calls. In also allows userspace to obtain the result of
+the most recent DIV instruction executed (even speculatively) in the core,
+which can be from a higher privilege context.
+
+Scrub the result from the divider by executing a non-faulting divide. This
+needs performing on the exit-to-guest paths, and ist_exit-to-Xen.
+
+Alternatives in IST context is believed safe now that it's done in NMI
+context.
+
+This is XSA-439 / CVE-2023-20588.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+(cherry picked from commit b5926c6ecf05c28ee99c6248c42d691ccbf0c315)
+---
+ docs/misc/xen-command-line.pandoc | 6 +++-
+ xen/arch/x86/hvm/svm/entry.S | 1 +
+ xen/arch/x86/spec_ctrl.c | 49 ++++++++++++++++++++++++++++-
+ xen/include/asm-x86/cpufeatures.h | 2 +-
+ xen/include/asm-x86/spec_ctrl_asm.h | 17 ++++++++++
+ 5 files changed, 72 insertions(+), 3 deletions(-)
+
+diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc
+index a37a3890d1..a7a1362bac 100644
+--- a/docs/misc/xen-command-line.pandoc
++++ b/docs/misc/xen-command-line.pandoc
+@@ -2263,7 +2263,7 @@ By default SSBD will be mitigated at runtime (i.e `ssbd=runtime`).
+ > {msr-sc,rsb,md-clear,ibpb-entry}=<bool>|{pv,hvm}=<bool>,
+ > bti-thunk=retpoline|lfence|jmp, {ibrs,ibpb,ssbd,psfd,
+ > eager-fpu,l1d-flush,branch-harden,srb-lock,
+-> unpriv-mmio,gds-mit}=<bool> ]`
++> unpriv-mmio,gds-mit,div-scrub}=<bool> ]`
+
+ Controls for speculative execution sidechannel mitigations. By default, Xen
+ will pick the most appropriate mitigations based on compiled in support,
+@@ -2383,6 +2383,10 @@ has elected not to lock the configuration, Xen will use GDS_CTRL to mitigate
+ GDS with. Otherwise, Xen will mitigate by disabling AVX, which blocks the use
+ of the AVX2 Gather instructions.
+
++On all hardware, the `div-scrub=` option can be used to force or prevent Xen
++from mitigating the DIV-leakage vulnerability. By default, Xen will mitigate
++DIV-leakage on hardware believed to be vulnerable.
++
+ ### sync_console
+ > `= <boolean>`
+
+diff --git a/xen/arch/x86/hvm/svm/entry.S b/xen/arch/x86/hvm/svm/entry.S
+index 0ff4008060..ad5ca50c12 100644
+--- a/xen/arch/x86/hvm/svm/entry.S
++++ b/xen/arch/x86/hvm/svm/entry.S
+@@ -72,6 +72,7 @@ __UNLIKELY_END(nsvm_hap)
+ 1: /* No Spectre v1 concerns. Execution will hit VMRUN imminently. */
+ .endm
+ ALTERNATIVE "", svm_vmentry_spec_ctrl, X86_FEATURE_SC_MSR_HVM
++ ALTERNATIVE "", DO_SPEC_CTRL_DIV, X86_FEATURE_SC_DIV
+
+ pop %r15
+ pop %r14
+diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
+index 082445179d..6e82a126a3 100644
+--- a/xen/arch/x86/spec_ctrl.c
++++ b/xen/arch/x86/spec_ctrl.c
+@@ -22,6 +22,7 @@
+ #include <xen/param.h>
+ #include <xen/warning.h>
+
++#include <asm/amd.h>
+ #include <asm/hvm/svm/svm.h>
+ #include <asm/microcode.h>
+ #include <asm/msr.h>
+@@ -78,6 +79,7 @@ static int8_t __initdata opt_srb_lock = -1;
+ static bool __initdata opt_unpriv_mmio;
+ static bool __read_mostly opt_fb_clear_mmio;
+ static int8_t __initdata opt_gds_mit = -1;
++static int8_t __initdata opt_div_scrub = -1;
+
+ static int __init parse_spec_ctrl(const char *s)
+ {
+@@ -132,6 +134,7 @@ static int __init parse_spec_ctrl(const char *s)
+ opt_srb_lock = 0;
+ opt_unpriv_mmio = false;
+ opt_gds_mit = 0;
++ opt_div_scrub = 0;
+ }
+ else if ( val > 0 )
+ rc = -EINVAL;
+@@ -284,6 +287,8 @@ static int __init parse_spec_ctrl(const char *s)
+ opt_unpriv_mmio = val;
+ else if ( (val = parse_boolean("gds-mit", s, ss)) >= 0 )
+ opt_gds_mit = val;
++ else if ( (val = parse_boolean("div-scrub", s, ss)) >= 0 )
++ opt_div_scrub = val;
+ else
+ rc = -EINVAL;
+
+@@ -484,7 +489,7 @@ static void __init print_details(enum ind_thunk thunk)
+ "\n");
+
+ /* Settings for Xen's protection, irrespective of guests. */
+- printk(" Xen settings: BTI-Thunk %s, SPEC_CTRL: %s%s%s%s%s, Other:%s%s%s%s%s\n",
++ printk(" Xen settings: BTI-Thunk %s, SPEC_CTRL: %s%s%s%s%s, Other:%s%s%s%s%s%s\n",
+ thunk == THUNK_NONE ? "N/A" :
+ thunk == THUNK_RETPOLINE ? "RETPOLINE" :
+ thunk == THUNK_LFENCE ? "LFENCE" :
+@@ -509,6 +514,7 @@ static void __init print_details(enum ind_thunk thunk)
+ opt_l1d_flush ? " L1D_FLUSH" : "",
+ opt_md_clear_pv || opt_md_clear_hvm ||
+ opt_fb_clear_mmio ? " VERW" : "",
++ opt_div_scrub ? " DIV" : "",
+ opt_branch_harden ? " BRANCH_HARDEN" : "");
+
+ /* L1TF diagnostics, printed if vulnerable or PV shadowing is in use. */
+@@ -962,6 +968,45 @@ static void __init srso_calculations(bool hw_smt_enabled)
+ setup_force_cpu_cap(X86_FEATURE_SRSO_NO);
+ }
+
++/*
++ * The Div leakage issue is specific to the AMD Zen1 microarchitecure.
++ *
++ * However, there's no $FOO_NO bit defined, so if we're virtualised we have no
++ * hope of spotting the case where we might move to vulnerable hardware. We
++ * also can't make any useful conclusion about SMT-ness.
++ *
++ * Don't check the hypervisor bit, so at least we do the safe thing when
++ * booting on something that looks like a Zen1 CPU.
++ */
++static bool __init has_div_vuln(void)
++{
++ if ( !(boot_cpu_data.x86_vendor &
++ (X86_VENDOR_AMD | X86_VENDOR_HYGON)) )
++ return false;
++
++ if ( boot_cpu_data.x86 != 0x17 && boot_cpu_data.x86 != 0x18 )
++ return false;
++
++ return is_zen1_uarch();
++}
++
++static void __init div_calculations(bool hw_smt_enabled)
++{
++ bool cpu_bug_div = has_div_vuln();
++
++ if ( opt_div_scrub == -1 )
++ opt_div_scrub = cpu_bug_div;
++
++ if ( opt_div_scrub )
++ setup_force_cpu_cap(X86_FEATURE_SC_DIV);
++
++ if ( opt_smt == -1 && !cpu_has_hypervisor && cpu_bug_div && hw_smt_enabled )
++ warning_add(
++ "Booted on leaky-DIV hardware with SMT/Hyperthreading\n"
++ "enabled. Please assess your configuration and choose an\n"
++ "explicit 'smt=<bool>' setting. See XSA-439.\n");
++}
++
+ static void __init ibpb_calculations(void)
+ {
+ bool def_ibpb_entry = false;
+@@ -1716,6 +1761,8 @@ void __init init_speculation_mitigations(void)
+
+ ibpb_calculations();
+
++ div_calculations(hw_smt_enabled);
++
+ /* Check whether Eager FPU should be enabled by default. */
+ if ( opt_eager_fpu == -1 )
+ opt_eager_fpu = should_use_eager_fpu();
+diff --git a/xen/include/asm-x86/cpufeatures.h b/xen/include/asm-x86/cpufeatures.h
+index ccf9d7287c..70b93b6b44 100644
+--- a/xen/include/asm-x86/cpufeatures.h
++++ b/xen/include/asm-x86/cpufeatures.h
+@@ -35,7 +35,7 @@ XEN_CPUFEATURE(SC_RSB_HVM, X86_SYNTH(19)) /* RSB overwrite needed for HVM
+ XEN_CPUFEATURE(XEN_SELFSNOOP, X86_SYNTH(20)) /* SELFSNOOP gets used by Xen itself */
+ XEN_CPUFEATURE(SC_MSR_IDLE, X86_SYNTH(21)) /* Clear MSR_SPEC_CTRL on idle */
+ XEN_CPUFEATURE(XEN_LBR, X86_SYNTH(22)) /* Xen uses MSR_DEBUGCTL.LBR */
+-/* Bits 23 unused. */
++XEN_CPUFEATURE(SC_DIV, X86_SYNTH(23)) /* DIV scrub needed */
+ XEN_CPUFEATURE(SC_RSB_IDLE, X86_SYNTH(24)) /* RSB overwrite needed for idle. */
+ XEN_CPUFEATURE(SC_VERW_IDLE, X86_SYNTH(25)) /* VERW used by Xen for idle */
+ XEN_CPUFEATURE(XEN_SHSTK, X86_SYNTH(26)) /* Xen uses CET Shadow Stacks */
+diff --git a/xen/include/asm-x86/spec_ctrl_asm.h b/xen/include/asm-x86/spec_ctrl_asm.h
+index 8a816b8cf6..0e69971f66 100644
+--- a/xen/include/asm-x86/spec_ctrl_asm.h
++++ b/xen/include/asm-x86/spec_ctrl_asm.h
+@@ -182,6 +182,19 @@
+ .L\@_verw_skip:
+ .endm
+
++.macro DO_SPEC_CTRL_DIV
++/*
++ * Requires nothing
++ * Clobbers %rax
++ *
++ * Issue a DIV for its flushing side effect (Zen1 uarch specific). Any
++ * non-faulting DIV will do; a byte DIV has least latency, and doesn't clobber
++ * %rdx.
++ */
++ mov $1, %eax
++ div %al
++.endm
++
+ .macro DO_SPEC_CTRL_ENTRY maybexen:req
+ /*
+ * Requires %rsp=regs (also cpuinfo if !maybexen)
+@@ -284,6 +297,8 @@
+ ALTERNATIVE "", DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_SC_MSR_PV
+
+ DO_SPEC_CTRL_COND_VERW
++
++ ALTERNATIVE "", DO_SPEC_CTRL_DIV, X86_FEATURE_SC_DIV
+ .endm
+
+ /*
+@@ -396,6 +411,8 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise):
+ verw STACK_CPUINFO_FIELD(verw_sel)(%r14)
+ .L\@_skip_verw:
+
++ ALTERNATIVE "", DO_SPEC_CTRL_DIV, X86_FEATURE_SC_DIV
++
+ .L\@_skip_ist_exit:
+ .endm
+
+--
+2.42.0
+
diff --git a/0012-tools-ocaml-xenctrl-Use-larger-chunksize-in-domain_g.patch b/0012-tools-ocaml-xenctrl-Use-larger-chunksize-in-domain_g.patch
deleted file mode 100644
index fc352ad..0000000
--- a/0012-tools-ocaml-xenctrl-Use-larger-chunksize-in-domain_g.patch
+++ /dev/null
@@ -1,41 +0,0 @@
-From 8c66a2d88a9f17e5b5099fcb83231b7a1169ca25 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= <edvin.torok@citrix.com>
-Date: Tue, 1 Nov 2022 17:59:17 +0000
-Subject: [PATCH 12/61] tools/ocaml/xenctrl: Use larger chunksize in
- domain_getinfolist
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-domain_getinfolist() is quadratic with the number of domains, because of the
-behaviour of the underlying hypercall. Nevertheless, getting domain info in
-blocks of 1024 is far more efficient than blocks of 2.
-
-In a scalability testing scenario with ~1000 VMs, a combination of this and
-the previous change takes xenopsd's wallclock time in domain_getinfolist()
-down from 88% to 0.02%
-
-Signed-off-by: Edwin Török <edvin.torok@citrix.com>
-Tested-by: Pau Ruiz Safont <pau.safont@citrix.com>
-Acked-by: Christian Lindig <christian.lindig@citrix.com>
-(cherry picked from commit 95db09b1b154fb72fad861815ceae1f3fa49fc4e)
----
- tools/ocaml/libs/xc/xenctrl.ml | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/tools/ocaml/libs/xc/xenctrl.ml b/tools/ocaml/libs/xc/xenctrl.ml
-index f10b686215..b40c70d33f 100644
---- a/tools/ocaml/libs/xc/xenctrl.ml
-+++ b/tools/ocaml/libs/xc/xenctrl.ml
-@@ -223,7 +223,7 @@ let rev_append_fold acc e = List.rev_append e acc
- let rev_concat lst = List.fold_left rev_append_fold [] lst
-
- let domain_getinfolist handle first_domain =
-- let nb = 2 in
-+ let nb = 1024 in
- let rec __getlist lst from =
- (* _domain_getinfolist returns domains in reverse order, largest first *)
- match _domain_getinfolist handle from nb with
---
-2.40.0
-
diff --git a/0012-x86-shadow-defer-releasing-of-PV-s-top-level-shadow-.patch b/0012-x86-shadow-defer-releasing-of-PV-s-top-level-shadow-.patch
new file mode 100644
index 0000000..b374a11
--- /dev/null
+++ b/0012-x86-shadow-defer-releasing-of-PV-s-top-level-shadow-.patch
@@ -0,0 +1,453 @@
+From c450a4bc11e97eabe97dcefe06f510d7acea8d6d Mon Sep 17 00:00:00 2001
+From: Jan Beulich <JBeulich@suse.com>
+Date: Wed, 20 Sep 2023 10:34:24 +0100
+Subject: [PATCH 12/27] x86/shadow: defer releasing of PV's top-level shadow
+ reference
+
+sh_set_toplevel_shadow() re-pinning the top-level shadow we may be
+running on is not enough (and at the same time unnecessary when the
+shadow isn't what we're running on): That shadow becomes eligible for
+blowing away (from e.g. shadow_prealloc()) immediately after the
+paging lock was dropped. Yet it needs to remain valid until the actual
+page table switch occurred.
+
+Propagate up the call chain the shadow entry that needs releasing
+eventually, and carry out the release immediately after switching page
+tables. Handle update_cr3() failures by switching to idle pagetables.
+Note that various further uses of update_cr3() are HVM-only or only act
+on paused vCPU-s, in which case sh_set_toplevel_shadow() will not defer
+releasing of the reference.
+
+While changing the update_cr3() hook, also convert the "do_locking"
+parameter to boolean.
+
+This is CVE-2023-34322 / XSA-438.
+
+Reported-by: Tim Deegan <tim@xen.org>
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: George Dunlap <george.dunlap@cloud.com>
+(cherry picked from commit fb0ff49fe9f784bfee0370c2a3c5f20e39d7a1cb)
+---
+ xen/arch/x86/mm.c | 27 ++++++++++++----
+ xen/arch/x86/mm/hap/hap.c | 4 ++-
+ xen/arch/x86/mm/shadow/common.c | 55 +++++++++++++++++++++-----------
+ xen/arch/x86/mm/shadow/multi.c | 34 +++++++++++++-------
+ xen/arch/x86/mm/shadow/none.c | 3 +-
+ xen/arch/x86/mm/shadow/private.h | 14 ++++----
+ xen/arch/x86/pv/domain.c | 25 +++++++++++++--
+ xen/include/asm-x86/mm.h | 2 +-
+ xen/include/asm-x86/paging.h | 6 ++--
+ xen/include/asm-x86/shadow.h | 8 +++++
+ 10 files changed, 125 insertions(+), 53 deletions(-)
+
+diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
+index a9b159891b..ea024c1450 100644
+--- a/xen/arch/x86/mm.c
++++ b/xen/arch/x86/mm.c
+@@ -565,15 +565,12 @@ void write_ptbase(struct vcpu *v)
+ *
+ * Update ref counts to shadow tables appropriately.
+ */
+-void update_cr3(struct vcpu *v)
++pagetable_t update_cr3(struct vcpu *v)
+ {
+ mfn_t cr3_mfn;
+
+ if ( paging_mode_enabled(v->domain) )
+- {
+- paging_update_cr3(v, false);
+- return;
+- }
++ return paging_update_cr3(v, false);
+
+ if ( !(v->arch.flags & TF_kernel_mode) )
+ cr3_mfn = pagetable_get_mfn(v->arch.guest_table_user);
+@@ -581,6 +578,8 @@ void update_cr3(struct vcpu *v)
+ cr3_mfn = pagetable_get_mfn(v->arch.guest_table);
+
+ make_cr3(v, cr3_mfn);
++
++ return pagetable_null();
+ }
+
+ static inline void set_tlbflush_timestamp(struct page_info *page)
+@@ -3254,6 +3253,7 @@ int new_guest_cr3(mfn_t mfn)
+ struct domain *d = curr->domain;
+ int rc;
+ mfn_t old_base_mfn;
++ pagetable_t old_shadow;
+
+ if ( is_pv_32bit_domain(d) )
+ {
+@@ -3321,9 +3321,22 @@ int new_guest_cr3(mfn_t mfn)
+ if ( !VM_ASSIST(d, m2p_strict) )
+ fill_ro_mpt(mfn);
+ curr->arch.guest_table = pagetable_from_mfn(mfn);
+- update_cr3(curr);
++ old_shadow = update_cr3(curr);
++
++ /*
++ * In shadow mode update_cr3() can fail, in which case here we're still
++ * running on the prior top-level shadow (which we're about to release).
++ * Switch to the idle page tables in such an event; the guest will have
++ * been crashed already.
++ */
++ if ( likely(!mfn_eq(pagetable_get_mfn(old_shadow),
++ maddr_to_mfn(curr->arch.cr3 & ~X86_CR3_NOFLUSH))) )
++ write_ptbase(curr);
++ else
++ write_ptbase(idle_vcpu[curr->processor]);
+
+- write_ptbase(curr);
++ if ( !pagetable_is_null(old_shadow) )
++ shadow_put_top_level(d, old_shadow);
+
+ if ( likely(mfn_x(old_base_mfn) != 0) )
+ {
+diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c
+index 1f9a157a0c..fa479d3d97 100644
+--- a/xen/arch/x86/mm/hap/hap.c
++++ b/xen/arch/x86/mm/hap/hap.c
+@@ -728,10 +728,12 @@ static bool_t hap_invlpg(struct vcpu *v, unsigned long linear)
+ return 1;
+ }
+
+-static void hap_update_cr3(struct vcpu *v, int do_locking, bool noflush)
++static pagetable_t hap_update_cr3(struct vcpu *v, bool do_locking, bool noflush)
+ {
+ v->arch.hvm.hw_cr[3] = v->arch.hvm.guest_cr[3];
+ hvm_update_guest_cr3(v, noflush);
++
++ return pagetable_null();
+ }
+
+ /*
+diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c
+index c14a269935..242b93537f 100644
+--- a/xen/arch/x86/mm/shadow/common.c
++++ b/xen/arch/x86/mm/shadow/common.c
+@@ -2574,13 +2574,13 @@ void shadow_update_paging_modes(struct vcpu *v)
+ }
+
+ /* Set up the top-level shadow and install it in slot 'slot' of shadow_table */
+-void sh_set_toplevel_shadow(struct vcpu *v,
+- unsigned int slot,
+- mfn_t gmfn,
+- unsigned int root_type,
+- mfn_t (*make_shadow)(struct vcpu *v,
+- mfn_t gmfn,
+- uint32_t shadow_type))
++pagetable_t sh_set_toplevel_shadow(struct vcpu *v,
++ unsigned int slot,
++ mfn_t gmfn,
++ unsigned int root_type,
++ mfn_t (*make_shadow)(struct vcpu *v,
++ mfn_t gmfn,
++ uint32_t shadow_type))
+ {
+ mfn_t smfn;
+ pagetable_t old_entry, new_entry;
+@@ -2637,20 +2637,37 @@ void sh_set_toplevel_shadow(struct vcpu *v,
+ mfn_x(gmfn), mfn_x(pagetable_get_mfn(new_entry)));
+ v->arch.paging.shadow.shadow_table[slot] = new_entry;
+
+- /* Decrement the refcount of the old contents of this slot */
+- if ( !pagetable_is_null(old_entry) )
++ /*
++ * Decrement the refcount of the old contents of this slot, unless
++ * we're still running on that shadow - in that case it'll need holding
++ * on to until the actual page table switch did occur.
++ */
++ if ( !pagetable_is_null(old_entry) && (v != current || !is_pv_domain(d)) )
+ {
+- mfn_t old_smfn = pagetable_get_mfn(old_entry);
+- /* Need to repin the old toplevel shadow if it's been unpinned
+- * by shadow_prealloc(): in PV mode we're still running on this
+- * shadow and it's not safe to free it yet. */
+- if ( !mfn_to_page(old_smfn)->u.sh.pinned && !sh_pin(d, old_smfn) )
+- {
+- printk(XENLOG_G_ERR "can't re-pin %"PRI_mfn"\n", mfn_x(old_smfn));
+- domain_crash(d);
+- }
+- sh_put_ref(d, old_smfn, 0);
++ sh_put_ref(d, pagetable_get_mfn(old_entry), 0);
++ old_entry = pagetable_null();
+ }
++
++ /*
++ * 2- and 3-level shadow mode is used for HVM only. Therefore we never run
++ * on such a shadow, so only call sites requesting an L4 shadow need to pay
++ * attention to the returned value.
++ */
++ ASSERT(pagetable_is_null(old_entry) || root_type == SH_type_l4_64_shadow);
++
++ return old_entry;
++}
++
++/*
++ * Helper invoked when releasing of a top-level shadow's reference was
++ * deferred in sh_set_toplevel_shadow() above.
++ */
++void shadow_put_top_level(struct domain *d, pagetable_t old_entry)
++{
++ ASSERT(!pagetable_is_null(old_entry));
++ paging_lock(d);
++ sh_put_ref(d, pagetable_get_mfn(old_entry), 0);
++ paging_unlock(d);
+ }
+
+ /**************************************************************************/
+diff --git a/xen/arch/x86/mm/shadow/multi.c b/xen/arch/x86/mm/shadow/multi.c
+index f7acd18a36..cf3ded70e7 100644
+--- a/xen/arch/x86/mm/shadow/multi.c
++++ b/xen/arch/x86/mm/shadow/multi.c
+@@ -3219,8 +3219,8 @@ sh_detach_old_tables(struct vcpu *v)
+ }
+ }
+
+-static void
+-sh_update_cr3(struct vcpu *v, int do_locking, bool noflush)
++static pagetable_t
++sh_update_cr3(struct vcpu *v, bool do_locking, bool noflush)
+ /* Updates vcpu->arch.cr3 after the guest has changed CR3.
+ * Paravirtual guests should set v->arch.guest_table (and guest_table_user,
+ * if appropriate).
+@@ -3234,6 +3234,7 @@ sh_update_cr3(struct vcpu *v, int do_locking, bool noflush)
+ {
+ struct domain *d = v->domain;
+ mfn_t gmfn;
++ pagetable_t old_entry = pagetable_null();
+ #if GUEST_PAGING_LEVELS == 3 && defined(CONFIG_HVM)
+ const guest_l3e_t *gl3e;
+ unsigned int i, guest_idx;
+@@ -3243,7 +3244,7 @@ sh_update_cr3(struct vcpu *v, int do_locking, bool noflush)
+ if ( !is_hvm_domain(d) && !v->is_initialised )
+ {
+ ASSERT(v->arch.cr3 == 0);
+- return;
++ return old_entry;
+ }
+
+ if ( do_locking ) paging_lock(v->domain);
+@@ -3316,11 +3317,12 @@ sh_update_cr3(struct vcpu *v, int do_locking, bool noflush)
+ #if GUEST_PAGING_LEVELS == 4
+ if ( sh_remove_write_access(d, gmfn, 4, 0) != 0 )
+ guest_flush_tlb_mask(d, d->dirty_cpumask);
+- sh_set_toplevel_shadow(v, 0, gmfn, SH_type_l4_shadow, sh_make_shadow);
++ old_entry = sh_set_toplevel_shadow(v, 0, gmfn, SH_type_l4_shadow,
++ sh_make_shadow);
+ if ( unlikely(pagetable_is_null(v->arch.paging.shadow.shadow_table[0])) )
+ {
+ ASSERT(d->is_dying || d->is_shutting_down);
+- return;
++ return old_entry;
+ }
+ if ( !shadow_mode_external(d) && !is_pv_32bit_domain(d) )
+ {
+@@ -3366,24 +3368,30 @@ sh_update_cr3(struct vcpu *v, int do_locking, bool noflush)
+ gl2gfn = guest_l3e_get_gfn(gl3e[i]);
+ gl2mfn = get_gfn_query_unlocked(d, gfn_x(gl2gfn), &p2mt);
+ if ( p2m_is_ram(p2mt) )
+- sh_set_toplevel_shadow(v, i, gl2mfn, SH_type_l2_shadow,
+- sh_make_shadow);
++ old_entry = sh_set_toplevel_shadow(v, i, gl2mfn,
++ SH_type_l2_shadow,
++ sh_make_shadow);
+ else
+- sh_set_toplevel_shadow(v, i, INVALID_MFN, 0,
+- sh_make_shadow);
++ old_entry = sh_set_toplevel_shadow(v, i, INVALID_MFN, 0,
++ sh_make_shadow);
+ }
+ else
+- sh_set_toplevel_shadow(v, i, INVALID_MFN, 0, sh_make_shadow);
++ old_entry = sh_set_toplevel_shadow(v, i, INVALID_MFN, 0,
++ sh_make_shadow);
++
++ ASSERT(pagetable_is_null(old_entry));
+ }
+ }
+ #elif GUEST_PAGING_LEVELS == 2
+ if ( sh_remove_write_access(d, gmfn, 2, 0) != 0 )
+ guest_flush_tlb_mask(d, d->dirty_cpumask);
+- sh_set_toplevel_shadow(v, 0, gmfn, SH_type_l2_shadow, sh_make_shadow);
++ old_entry = sh_set_toplevel_shadow(v, 0, gmfn, SH_type_l2_shadow,
++ sh_make_shadow);
++ ASSERT(pagetable_is_null(old_entry));
+ if ( unlikely(pagetable_is_null(v->arch.paging.shadow.shadow_table[0])) )
+ {
+ ASSERT(d->is_dying || d->is_shutting_down);
+- return;
++ return old_entry;
+ }
+ #else
+ #error This should never happen
+@@ -3477,6 +3485,8 @@ sh_update_cr3(struct vcpu *v, int do_locking, bool noflush)
+
+ /* Release the lock, if we took it (otherwise it's the caller's problem) */
+ if ( do_locking ) paging_unlock(v->domain);
++
++ return old_entry;
+ }
+
+
+diff --git a/xen/arch/x86/mm/shadow/none.c b/xen/arch/x86/mm/shadow/none.c
+index 79889b926a..2a5fd409b2 100644
+--- a/xen/arch/x86/mm/shadow/none.c
++++ b/xen/arch/x86/mm/shadow/none.c
+@@ -52,9 +52,10 @@ static unsigned long _gva_to_gfn(struct vcpu *v, struct p2m_domain *p2m,
+ }
+ #endif
+
+-static void _update_cr3(struct vcpu *v, int do_locking, bool noflush)
++static pagetable_t _update_cr3(struct vcpu *v, bool do_locking, bool noflush)
+ {
+ ASSERT_UNREACHABLE();
++ return pagetable_null();
+ }
+
+ static void _update_paging_modes(struct vcpu *v)
+diff --git a/xen/arch/x86/mm/shadow/private.h b/xen/arch/x86/mm/shadow/private.h
+index 762214f73c..876745a401 100644
+--- a/xen/arch/x86/mm/shadow/private.h
++++ b/xen/arch/x86/mm/shadow/private.h
+@@ -391,13 +391,13 @@ mfn_t shadow_alloc(struct domain *d,
+ void shadow_free(struct domain *d, mfn_t smfn);
+
+ /* Set up the top-level shadow and install it in slot 'slot' of shadow_table */
+-void sh_set_toplevel_shadow(struct vcpu *v,
+- unsigned int slot,
+- mfn_t gmfn,
+- unsigned int root_type,
+- mfn_t (*make_shadow)(struct vcpu *v,
+- mfn_t gmfn,
+- uint32_t shadow_type));
++pagetable_t sh_set_toplevel_shadow(struct vcpu *v,
++ unsigned int slot,
++ mfn_t gmfn,
++ unsigned int root_type,
++ mfn_t (*make_shadow)(struct vcpu *v,
++ mfn_t gmfn,
++ uint32_t shadow_type));
+
+ /* Update the shadows in response to a pagetable write from Xen */
+ int sh_validate_guest_entry(struct vcpu *v, mfn_t gmfn, void *entry, u32 size);
+diff --git a/xen/arch/x86/pv/domain.c b/xen/arch/x86/pv/domain.c
+index 7fe2c04aac..91f53214fd 100644
+--- a/xen/arch/x86/pv/domain.c
++++ b/xen/arch/x86/pv/domain.c
+@@ -424,10 +424,13 @@ bool __init xpti_pcid_enabled(void)
+
+ static void _toggle_guest_pt(struct vcpu *v)
+ {
++ bool guest_update;
++ pagetable_t old_shadow;
+ unsigned long cr3;
+
+ v->arch.flags ^= TF_kernel_mode;
+- update_cr3(v);
++ guest_update = v->arch.flags & TF_kernel_mode;
++ old_shadow = update_cr3(v);
+
+ /*
+ * Don't flush user global mappings from the TLB. Don't tick TLB clock.
+@@ -436,13 +439,31 @@ static void _toggle_guest_pt(struct vcpu *v)
+ * TLB flush (for just the incoming PCID), as the top level page table may
+ * have changed behind our backs. To be on the safe side, suppress the
+ * no-flush unconditionally in this case.
++ *
++ * Furthermore in shadow mode update_cr3() can fail, in which case here
++ * we're still running on the prior top-level shadow (which we're about
++ * to release). Switch to the idle page tables in such an event; the
++ * guest will have been crashed already.
+ */
+ cr3 = v->arch.cr3;
+ if ( shadow_mode_enabled(v->domain) )
++ {
+ cr3 &= ~X86_CR3_NOFLUSH;
++
++ if ( unlikely(mfn_eq(pagetable_get_mfn(old_shadow),
++ maddr_to_mfn(cr3))) )
++ {
++ cr3 = idle_vcpu[v->processor]->arch.cr3;
++ /* Also suppress runstate/time area updates below. */
++ guest_update = false;
++ }
++ }
+ write_cr3(cr3);
+
+- if ( !(v->arch.flags & TF_kernel_mode) )
++ if ( !pagetable_is_null(old_shadow) )
++ shadow_put_top_level(v->domain, old_shadow);
++
++ if ( !guest_update )
+ return;
+
+ if ( v->arch.pv.need_update_runstate_area && update_runstate_area(v) )
+diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h
+index e1bcea57a8..cccef852b4 100644
+--- a/xen/include/asm-x86/mm.h
++++ b/xen/include/asm-x86/mm.h
+@@ -564,7 +564,7 @@ void audit_domains(void);
+ #endif
+
+ void make_cr3(struct vcpu *v, mfn_t mfn);
+-void update_cr3(struct vcpu *v);
++pagetable_t update_cr3(struct vcpu *v);
+ int vcpu_destroy_pagetables(struct vcpu *);
+ void *do_page_walk(struct vcpu *v, unsigned long addr);
+
+diff --git a/xen/include/asm-x86/paging.h b/xen/include/asm-x86/paging.h
+index 43abaa5bd1..fceb208d36 100644
+--- a/xen/include/asm-x86/paging.h
++++ b/xen/include/asm-x86/paging.h
+@@ -138,7 +138,7 @@ struct paging_mode {
+ paddr_t ga, uint32_t *pfec,
+ unsigned int *page_order);
+ #endif
+- void (*update_cr3 )(struct vcpu *v, int do_locking,
++ pagetable_t (*update_cr3 )(struct vcpu *v, bool do_locking,
+ bool noflush);
+ void (*update_paging_modes )(struct vcpu *v);
+ bool (*flush_tlb )(bool (*flush_vcpu)(void *ctxt,
+@@ -315,9 +315,9 @@ static inline unsigned long paging_ga_to_gfn_cr3(struct vcpu *v,
+ /* Update all the things that are derived from the guest's CR3.
+ * Called when the guest changes CR3; the caller can then use v->arch.cr3
+ * as the value to load into the host CR3 to schedule this vcpu */
+-static inline void paging_update_cr3(struct vcpu *v, bool noflush)
++static inline pagetable_t paging_update_cr3(struct vcpu *v, bool noflush)
+ {
+- paging_get_hostmode(v)->update_cr3(v, 1, noflush);
++ return paging_get_hostmode(v)->update_cr3(v, 1, noflush);
+ }
+
+ /* Update all the things that are derived from the guest's CR0/CR3/CR4.
+diff --git a/xen/include/asm-x86/shadow.h b/xen/include/asm-x86/shadow.h
+index e25f9604d8..302ae97fc6 100644
+--- a/xen/include/asm-x86/shadow.h
++++ b/xen/include/asm-x86/shadow.h
+@@ -97,6 +97,9 @@ void shadow_blow_tables_per_domain(struct domain *d);
+ int shadow_set_allocation(struct domain *d, unsigned int pages,
+ bool *preempted);
+
++/* Helper to invoke for deferred releasing of a top-level shadow's reference. */
++void shadow_put_top_level(struct domain *d, pagetable_t old);
++
+ #else /* !CONFIG_SHADOW_PAGING */
+
+ #define shadow_vcpu_teardown(v) ASSERT(is_pv_vcpu(v))
+@@ -118,6 +121,11 @@ static inline void shadow_prepare_page_type_change(struct domain *d,
+
+ static inline void shadow_blow_tables_per_domain(struct domain *d) {}
+
++static inline void shadow_put_top_level(struct domain *d, pagetable_t old)
++{
++ ASSERT_UNREACHABLE();
++}
++
+ static inline int shadow_domctl(struct domain *d,
+ struct xen_domctl_shadow_op *sc,
+ XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
+--
+2.42.0
+
diff --git a/0013-tools-ocaml-xb-mmap-Use-Data_abstract_val-wrapper.patch b/0013-tools-ocaml-xb-mmap-Use-Data_abstract_val-wrapper.patch
deleted file mode 100644
index a999dd8..0000000
--- a/0013-tools-ocaml-xb-mmap-Use-Data_abstract_val-wrapper.patch
+++ /dev/null
@@ -1,75 +0,0 @@
-From 049d16c8ce900dfc8f4b657849aeb82b95ed857c Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= <edvin.torok@citrix.com>
-Date: Fri, 16 Dec 2022 18:25:10 +0000
-Subject: [PATCH 13/61] tools/ocaml/xb,mmap: Use Data_abstract_val wrapper
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-This is not strictly necessary since it is essentially a no-op currently: a
-cast to void * and value *, even in OCaml 5.0.
-
-However it does make it clearer that what we have here is not a regular OCaml
-value, but one allocated with Abstract_tag or Custom_tag, and follows the
-example from the manual more closely:
-https://v2.ocaml.org/manual/intfc.html#ss:c-outside-head
-
-It also makes it clearer that these modules have been reviewed for
-compat with OCaml 5.0.
-
-We cannot use OCaml finalizers here, because we want exact control over when
-to unmap these pages from remote domains.
-
-No functional change.
-
-Signed-off-by: Edwin Török <edvin.torok@citrix.com>
-Acked-by: Christian Lindig <christian.lindig@citrix.com>
-(cherry picked from commit d2ccc637111d6dbcf808aaffeec7a46f0b1e1c81)
----
- tools/ocaml/libs/mmap/mmap_stubs.h | 4 ++++
- tools/ocaml/libs/mmap/xenmmap_stubs.c | 2 +-
- tools/ocaml/libs/xb/xs_ring_stubs.c | 2 +-
- 3 files changed, 6 insertions(+), 2 deletions(-)
-
-diff --git a/tools/ocaml/libs/mmap/mmap_stubs.h b/tools/ocaml/libs/mmap/mmap_stubs.h
-index 65e4239890..f4784e4715 100644
---- a/tools/ocaml/libs/mmap/mmap_stubs.h
-+++ b/tools/ocaml/libs/mmap/mmap_stubs.h
-@@ -30,4 +30,8 @@ struct mmap_interface
- int len;
- };
-
-+#ifndef Data_abstract_val
-+#define Data_abstract_val(x) ((void *)Op_val(x))
-+#endif
-+
- #endif
-diff --git a/tools/ocaml/libs/mmap/xenmmap_stubs.c b/tools/ocaml/libs/mmap/xenmmap_stubs.c
-index e2ce088e25..e03951d781 100644
---- a/tools/ocaml/libs/mmap/xenmmap_stubs.c
-+++ b/tools/ocaml/libs/mmap/xenmmap_stubs.c
-@@ -28,7 +28,7 @@
- #include <caml/fail.h>
- #include <caml/callback.h>
-
--#define Intf_val(a) ((struct mmap_interface *) a)
-+#define Intf_val(a) ((struct mmap_interface *)Data_abstract_val(a))
-
- static int mmap_interface_init(struct mmap_interface *intf,
- int fd, int pflag, int mflag,
-diff --git a/tools/ocaml/libs/xb/xs_ring_stubs.c b/tools/ocaml/libs/xb/xs_ring_stubs.c
-index 7a91fdee75..1f58524535 100644
---- a/tools/ocaml/libs/xb/xs_ring_stubs.c
-+++ b/tools/ocaml/libs/xb/xs_ring_stubs.c
-@@ -35,7 +35,7 @@
- #include <sys/mman.h>
- #include "mmap_stubs.h"
-
--#define GET_C_STRUCT(a) ((struct mmap_interface *) a)
-+#define GET_C_STRUCT(a) ((struct mmap_interface *)Data_abstract_val(a))
-
- /*
- * Bytes_val has been introduced by Ocaml 4.06.1. So define our own version
---
-2.40.0
-
diff --git a/0013-tools-xenstored-domain_entry_fix-Handle-conflicting-.patch b/0013-tools-xenstored-domain_entry_fix-Handle-conflicting-.patch
new file mode 100644
index 0000000..963818d
--- /dev/null
+++ b/0013-tools-xenstored-domain_entry_fix-Handle-conflicting-.patch
@@ -0,0 +1,65 @@
+From 3382512b9f5e0d8cf37709d7cb47389d2ce8e624 Mon Sep 17 00:00:00 2001
+From: Julien Grall <jgrall@amazon.com>
+Date: Fri, 22 Sep 2023 11:32:16 +0100
+Subject: [PATCH 13/27] tools/xenstored: domain_entry_fix(): Handle conflicting
+ transaction
+
+The function domain_entry_fix() will be initially called to check if the
+quota is correct before attempt to commit any nodes. So it would be
+possible that accounting is temporarily negative. This is the case
+in the following sequence:
+
+ 1) Create 50 nodes
+ 2) Start two transactions
+ 3) Delete all the nodes in each transaction
+ 4) Commit the two transactions
+
+Because the first transaction will have succeed and updated the
+accounting, there is no guarantee that 'd->nbentry + num' will still
+be above 0. So the assert() would be triggered.
+The assert() was introduced in dbef1f748289 ("tools/xenstore: simplify
+and fix per domain node accounting") with the assumption that the
+value can't be negative. As this is not true revert to the original
+check but restricted to the path where we don't update. Take the
+opportunity to explain the rationale behind the check.
+
+This CVE-2023-34323 / XSA-440.
+
+Fixes: dbef1f748289 ("tools/xenstore: simplify and fix per domain node accounting")
+Signed-off-by: Julien Grall <jgrall@amazon.com>
+Reviewed-by: Juergen Gross <jgross@suse.com>
+(cherry picked from commit c4e05c97f57d236040d1da5c1fbf6e3699dc86ea)
+---
+ tools/xenstore/xenstored_domain.c | 14 ++++++++++++--
+ 1 file changed, 12 insertions(+), 2 deletions(-)
+
+diff --git a/tools/xenstore/xenstored_domain.c b/tools/xenstore/xenstored_domain.c
+index ddd49eddfa..a3475284ea 100644
+--- a/tools/xenstore/xenstored_domain.c
++++ b/tools/xenstore/xenstored_domain.c
+@@ -1062,10 +1062,20 @@ int domain_entry_fix(unsigned int domid, int num, bool update)
+ }
+
+ cnt = d->nbentry + num;
+- assert(cnt >= 0);
+
+- if (update)
++ if (update) {
++ assert(cnt >= 0);
+ d->nbentry = cnt;
++ } else if (cnt < 0) {
++ /*
++ * In a transaction when a node is being added/removed AND
++ * the same node has been added/removed outside the
++ * transaction in parallel, the result value may be negative.
++ * This is no problem, as the transaction will fail due to
++ * the resulting conflict. So override 'cnt'.
++ */
++ cnt = 0;
++ }
+
+ return domid_is_unprivileged(domid) ? cnt : 0;
+ }
+--
+2.42.0
+
diff --git a/0014-iommu-amd-vi-flush-IOMMU-TLB-when-flushing-the-DTE.patch b/0014-iommu-amd-vi-flush-IOMMU-TLB-when-flushing-the-DTE.patch
new file mode 100644
index 0000000..9642714
--- /dev/null
+++ b/0014-iommu-amd-vi-flush-IOMMU-TLB-when-flushing-the-DTE.patch
@@ -0,0 +1,186 @@
+From 35217b78048e91a0f4d0f14b31a474cc59ec1388 Mon Sep 17 00:00:00 2001
+From: Roger Pau Monne <roger.pau@citrix.com>
+Date: Tue, 13 Jun 2023 15:01:05 +0200
+Subject: [PATCH 14/27] iommu/amd-vi: flush IOMMU TLB when flushing the DTE
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+The caching invalidation guidelines from the AMD-Vi specification (48882—Rev
+3.07-PUB—Oct 2022) seem to be misleading on some hardware, as devices will
+malfunction (see stale DMA mappings) if some fields of the DTE are updated but
+the IOMMU TLB is not flushed. This has been observed in practice on AMD
+systems. Due to the lack of guidance from the currently published
+specification this patch aims to increase the flushing done in order to prevent
+device malfunction.
+
+In order to fix, issue an INVALIDATE_IOMMU_PAGES command from
+amd_iommu_flush_device(), flushing all the address space. Note this requires
+callers to be adjusted in order to pass the DomID on the DTE previous to the
+modification.
+
+Some call sites don't provide a valid DomID to amd_iommu_flush_device() in
+order to avoid the flush. That's because the device had address translations
+disabled and hence the previous DomID on the DTE is not valid. Note the
+current logic relies on the entity disabling address translations to also flush
+the TLB of the in use DomID.
+
+Device I/O TLB flushing when ATS are enabled is not covered by the current
+change, as ATS usage is not security supported.
+
+This is XSA-442 / CVE-2023-34326
+
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+(cherry picked from commit 5fc98b97084a46884acef9320e643faf40d42212)
+---
+ xen/drivers/passthrough/amd/iommu.h | 3 ++-
+ xen/drivers/passthrough/amd/iommu_cmd.c | 10 +++++++++-
+ xen/drivers/passthrough/amd/iommu_guest.c | 5 +++--
+ xen/drivers/passthrough/amd/iommu_init.c | 6 +++++-
+ xen/drivers/passthrough/amd/pci_amd_iommu.c | 14 ++++++++++----
+ 5 files changed, 29 insertions(+), 9 deletions(-)
+
+diff --git a/xen/drivers/passthrough/amd/iommu.h b/xen/drivers/passthrough/amd/iommu.h
+index 3c702eb517..6dd24593a0 100644
+--- a/xen/drivers/passthrough/amd/iommu.h
++++ b/xen/drivers/passthrough/amd/iommu.h
+@@ -280,7 +280,8 @@ void amd_iommu_flush_pages(struct domain *d, unsigned long dfn,
+ unsigned int order);
+ void amd_iommu_flush_iotlb(u8 devfn, const struct pci_dev *pdev,
+ uint64_t gaddr, unsigned int order);
+-void amd_iommu_flush_device(struct amd_iommu *iommu, uint16_t bdf);
++void amd_iommu_flush_device(struct amd_iommu *iommu, uint16_t bdf,
++ domid_t domid);
+ void amd_iommu_flush_intremap(struct amd_iommu *iommu, uint16_t bdf);
+ void amd_iommu_flush_all_caches(struct amd_iommu *iommu);
+
+diff --git a/xen/drivers/passthrough/amd/iommu_cmd.c b/xen/drivers/passthrough/amd/iommu_cmd.c
+index 809d93b89f..41a32c757b 100644
+--- a/xen/drivers/passthrough/amd/iommu_cmd.c
++++ b/xen/drivers/passthrough/amd/iommu_cmd.c
+@@ -362,10 +362,18 @@ void amd_iommu_flush_pages(struct domain *d,
+ _amd_iommu_flush_pages(d, __dfn_to_daddr(dfn), order);
+ }
+
+-void amd_iommu_flush_device(struct amd_iommu *iommu, uint16_t bdf)
++void amd_iommu_flush_device(struct amd_iommu *iommu, uint16_t bdf,
++ domid_t domid)
+ {
+ invalidate_dev_table_entry(iommu, bdf);
+ flush_command_buffer(iommu, 0);
++
++ /* Also invalidate IOMMU TLB entries when flushing the DTE. */
++ if ( domid != DOMID_INVALID )
++ {
++ invalidate_iommu_pages(iommu, INV_IOMMU_ALL_PAGES_ADDRESS, domid, 0);
++ flush_command_buffer(iommu, 0);
++ }
+ }
+
+ void amd_iommu_flush_intremap(struct amd_iommu *iommu, uint16_t bdf)
+diff --git a/xen/drivers/passthrough/amd/iommu_guest.c b/xen/drivers/passthrough/amd/iommu_guest.c
+index 85828490ff..38c7b4d979 100644
+--- a/xen/drivers/passthrough/amd/iommu_guest.c
++++ b/xen/drivers/passthrough/amd/iommu_guest.c
+@@ -385,7 +385,7 @@ static int do_completion_wait(struct domain *d, cmd_entry_t *cmd)
+
+ static int do_invalidate_dte(struct domain *d, cmd_entry_t *cmd)
+ {
+- uint16_t gbdf, mbdf, req_id, gdom_id, hdom_id;
++ uint16_t gbdf, mbdf, req_id, gdom_id, hdom_id, prev_domid;
+ struct amd_iommu_dte *gdte, *mdte, *dte_base;
+ struct amd_iommu *iommu = NULL;
+ struct guest_iommu *g_iommu;
+@@ -445,13 +445,14 @@ static int do_invalidate_dte(struct domain *d, cmd_entry_t *cmd)
+ req_id = get_dma_requestor_id(iommu->seg, mbdf);
+ dte_base = iommu->dev_table.buffer;
+ mdte = &dte_base[req_id];
++ prev_domid = mdte->domain_id;
+
+ spin_lock_irqsave(&iommu->lock, flags);
+ dte_set_gcr3_table(mdte, hdom_id, gcr3_mfn << PAGE_SHIFT, gv, glx);
+
+ spin_unlock_irqrestore(&iommu->lock, flags);
+
+- amd_iommu_flush_device(iommu, req_id);
++ amd_iommu_flush_device(iommu, req_id, prev_domid);
+
+ return 0;
+ }
+diff --git a/xen/drivers/passthrough/amd/iommu_init.c b/xen/drivers/passthrough/amd/iommu_init.c
+index ca791d4e54..7dfe4b15dc 100644
+--- a/xen/drivers/passthrough/amd/iommu_init.c
++++ b/xen/drivers/passthrough/amd/iommu_init.c
+@@ -1556,7 +1556,11 @@ static int _invalidate_all_devices(
+ req_id = ivrs_mappings[bdf].dte_requestor_id;
+ if ( iommu )
+ {
+- amd_iommu_flush_device(iommu, req_id);
++ /*
++ * IOMMU TLB flush performed separately (see
++ * invalidate_all_domain_pages()).
++ */
++ amd_iommu_flush_device(iommu, req_id, DOMID_INVALID);
+ amd_iommu_flush_intremap(iommu, req_id);
+ }
+ }
+diff --git a/xen/drivers/passthrough/amd/pci_amd_iommu.c b/xen/drivers/passthrough/amd/pci_amd_iommu.c
+index e5e0f00402..7b6dbf546a 100644
+--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
++++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
+@@ -192,10 +192,13 @@ static int __must_check amd_iommu_setup_domain_device(
+
+ spin_unlock_irqrestore(&iommu->lock, flags);
+
+- amd_iommu_flush_device(iommu, req_id);
++ /* DTE didn't have DMA translations enabled, do not flush the TLB. */
++ amd_iommu_flush_device(iommu, req_id, DOMID_INVALID);
+ }
+ else if ( dte->pt_root != mfn_x(page_to_mfn(root_pg)) )
+ {
++ domid_t prev_domid = dte->domain_id;
++
+ /*
+ * Strictly speaking if the device is the only one with this requestor
+ * ID, it could be allowed to be re-assigned regardless of unity map
+@@ -252,7 +255,7 @@ static int __must_check amd_iommu_setup_domain_device(
+
+ spin_unlock_irqrestore(&iommu->lock, flags);
+
+- amd_iommu_flush_device(iommu, req_id);
++ amd_iommu_flush_device(iommu, req_id, prev_domid);
+ }
+ else
+ spin_unlock_irqrestore(&iommu->lock, flags);
+@@ -421,6 +424,8 @@ static void amd_iommu_disable_domain_device(const struct domain *domain,
+ spin_lock_irqsave(&iommu->lock, flags);
+ if ( dte->tv || dte->v )
+ {
++ domid_t prev_domid = dte->domain_id;
++
+ /* See the comment in amd_iommu_setup_device_table(). */
+ dte->int_ctl = IOMMU_DEV_TABLE_INT_CONTROL_ABORTED;
+ smp_wmb();
+@@ -439,7 +444,7 @@ static void amd_iommu_disable_domain_device(const struct domain *domain,
+
+ spin_unlock_irqrestore(&iommu->lock, flags);
+
+- amd_iommu_flush_device(iommu, req_id);
++ amd_iommu_flush_device(iommu, req_id, prev_domid);
+
+ AMD_IOMMU_DEBUG("Disable: device id = %#x, "
+ "domain = %d, paging mode = %d\n",
+@@ -611,7 +616,8 @@ static int amd_iommu_add_device(u8 devfn, struct pci_dev *pdev)
+
+ spin_unlock_irqrestore(&iommu->lock, flags);
+
+- amd_iommu_flush_device(iommu, bdf);
++ /* DTE didn't have DMA translations enabled, do not flush the TLB. */
++ amd_iommu_flush_device(iommu, bdf, DOMID_INVALID);
+ }
+
+ if ( amd_iommu_reserve_domain_unity_map(
+--
+2.42.0
+
diff --git a/0014-tools-ocaml-xb-Drop-Xs_ring.write.patch b/0014-tools-ocaml-xb-Drop-Xs_ring.write.patch
deleted file mode 100644
index 813f041..0000000
--- a/0014-tools-ocaml-xb-Drop-Xs_ring.write.patch
+++ /dev/null
@@ -1,62 +0,0 @@
-From f7c4fab9b50af74d0e1170fbf35367ced48d8209 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= <edvin.torok@citrix.com>
-Date: Fri, 16 Dec 2022 18:25:20 +0000
-Subject: [PATCH 14/61] tools/ocaml/xb: Drop Xs_ring.write
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-This function is unusued (only Xs_ring.write_substring is used), and the
-bytes/string conversion here is backwards: the C stub implements the bytes
-version and then we use a Bytes.unsafe_of_string to convert a string into
-bytes.
-
-However the operation here really is read-only: we read from the string and
-write it to the ring, so the C stub should implement the read-only string
-version, and if needed we could use Bytes.unsafe_to_string to be able to send
-'bytes'. However that is not necessary as the 'bytes' version is dropped above.
-
-Signed-off-by: Edwin Török <edvin.torok@citrix.com>
-Acked-by: Christian Lindig <christian.lindig@citrix.com>
-(cherry picked from commit 01f139215e678c2dc7d4bb3f9f2777069bb1b091)
----
- tools/ocaml/libs/xb/xs_ring.ml | 5 +----
- tools/ocaml/libs/xb/xs_ring_stubs.c | 2 +-
- 2 files changed, 2 insertions(+), 5 deletions(-)
-
-diff --git a/tools/ocaml/libs/xb/xs_ring.ml b/tools/ocaml/libs/xb/xs_ring.ml
-index db7f86bd27..dd5e014a33 100644
---- a/tools/ocaml/libs/xb/xs_ring.ml
-+++ b/tools/ocaml/libs/xb/xs_ring.ml
-@@ -25,14 +25,11 @@ module Server_features = Set.Make(struct
- end)
-
- external read: Xenmmap.mmap_interface -> bytes -> int -> int = "ml_interface_read"
--external write: Xenmmap.mmap_interface -> bytes -> int -> int = "ml_interface_write"
-+external write_substring: Xenmmap.mmap_interface -> string -> int -> int = "ml_interface_write"
-
- external _internal_set_server_features: Xenmmap.mmap_interface -> int -> unit = "ml_interface_set_server_features" [@@noalloc]
- external _internal_get_server_features: Xenmmap.mmap_interface -> int = "ml_interface_get_server_features" [@@noalloc]
-
--let write_substring mmap buff len =
-- write mmap (Bytes.unsafe_of_string buff) len
--
- let get_server_features mmap =
- (* NB only one feature currently defined above *)
- let x = _internal_get_server_features mmap in
-diff --git a/tools/ocaml/libs/xb/xs_ring_stubs.c b/tools/ocaml/libs/xb/xs_ring_stubs.c
-index 1f58524535..1243c63f03 100644
---- a/tools/ocaml/libs/xb/xs_ring_stubs.c
-+++ b/tools/ocaml/libs/xb/xs_ring_stubs.c
-@@ -112,7 +112,7 @@ CAMLprim value ml_interface_write(value ml_interface,
- CAMLlocal1(ml_result);
-
- struct mmap_interface *interface = GET_C_STRUCT(ml_interface);
-- const unsigned char *buffer = Bytes_val(ml_buffer);
-+ const char *buffer = String_val(ml_buffer);
- int len = Int_val(ml_len);
- int result;
-
---
-2.40.0
-
diff --git a/0015-libfsimage-xfs-Remove-dead-code.patch b/0015-libfsimage-xfs-Remove-dead-code.patch
new file mode 100644
index 0000000..93b6e2d
--- /dev/null
+++ b/0015-libfsimage-xfs-Remove-dead-code.patch
@@ -0,0 +1,71 @@
+From d51a2a1843b612b03f764703159a0946fe026750 Mon Sep 17 00:00:00 2001
+From: Alejandro Vallejo <alejandro.vallejo@cloud.com>
+Date: Thu, 14 Sep 2023 13:22:50 +0100
+Subject: [PATCH 15/27] libfsimage/xfs: Remove dead code
+
+xfs_info.agnolog (and related code) and XFS_INO_AGBNO_BITS are dead code
+that serve no purpose.
+
+This is part of XSA-443 / CVE-2023-34325
+
+Signed-off-by: Alejandro Vallejo <alejandro.vallejo@cloud.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+(cherry picked from commit 37fc1e6c1c5c63aafd9cfd76a37728d5baea7d71)
+---
+ tools/libfsimage/xfs/fsys_xfs.c | 18 ------------------
+ 1 file changed, 18 deletions(-)
+
+diff --git a/tools/libfsimage/xfs/fsys_xfs.c b/tools/libfsimage/xfs/fsys_xfs.c
+index d735a88e55..2800699f59 100644
+--- a/tools/libfsimage/xfs/fsys_xfs.c
++++ b/tools/libfsimage/xfs/fsys_xfs.c
+@@ -37,7 +37,6 @@ struct xfs_info {
+ int blklog;
+ int inopblog;
+ int agblklog;
+- int agnolog;
+ unsigned int nextents;
+ xfs_daddr_t next;
+ xfs_daddr_t daddr;
+@@ -65,9 +64,7 @@ static struct xfs_info xfs;
+
+ #define XFS_INO_MASK(k) ((xfs_uint32_t)((1ULL << (k)) - 1))
+ #define XFS_INO_OFFSET_BITS xfs.inopblog
+-#define XFS_INO_AGBNO_BITS xfs.agblklog
+ #define XFS_INO_AGINO_BITS (xfs.agblklog + xfs.inopblog)
+-#define XFS_INO_AGNO_BITS xfs.agnolog
+
+ static inline xfs_agblock_t
+ agino2agbno (xfs_agino_t agino)
+@@ -149,20 +146,6 @@ xt_len (xfs_bmbt_rec_32_t *r)
+ return le32(r->l3) & mask32lo(21);
+ }
+
+-static inline int
+-xfs_highbit32(xfs_uint32_t v)
+-{
+- int i;
+-
+- if (--v) {
+- for (i = 0; i < 31; i++, v >>= 1) {
+- if (v == 0)
+- return i;
+- }
+- }
+- return 0;
+-}
+-
+ static int
+ isinxt (xfs_fileoff_t key, xfs_fileoff_t offset, xfs_filblks_t len)
+ {
+@@ -472,7 +455,6 @@ xfs_mount (fsi_file_t *ffi, const char *options)
+
+ xfs.inopblog = super.sb_inopblog;
+ xfs.agblklog = super.sb_agblklog;
+- xfs.agnolog = xfs_highbit32 (le32(super.sb_agcount));
+
+ xfs.btnode_ptr0_off =
+ ((xfs.bsize - sizeof(xfs_btree_block_t)) /
+--
+2.42.0
+
diff --git a/0015-tools-oxenstored-validate-config-file-before-live-up.patch b/0015-tools-oxenstored-validate-config-file-before-live-up.patch
deleted file mode 100644
index f65fbd6..0000000
--- a/0015-tools-oxenstored-validate-config-file-before-live-up.patch
+++ /dev/null
@@ -1,131 +0,0 @@
-From fd1c70442d3aa962be4d041d5f8fce9d2fa72ce1 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= <edvin.torok@citrix.com>
-Date: Tue, 11 May 2021 15:56:50 +0000
-Subject: [PATCH 15/61] tools/oxenstored: validate config file before live
- update
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-The configuration file can contain typos or various errors that could prevent
-live update from succeeding (e.g. a flag only valid on a different version).
-Unknown entries in the config file would be ignored on startup normally,
-add a strict --config-test that live-update can use to check that the config file
-is valid *for the new binary*.
-
-For compatibility with running old code during live update recognize
---live --help as an equivalent to --config-test.
-
-Signed-off-by: Edwin Török <edvin.torok@citrix.com>
-Acked-by: Christian Lindig <christian.lindig@citrix.com>
-(cherry picked from commit e6f07052ce4a0f0b7d4dc522d87465efb2d9ee86)
----
- tools/ocaml/xenstored/parse_arg.ml | 26 ++++++++++++++++++++++++++
- tools/ocaml/xenstored/xenstored.ml | 11 +++++++++--
- 2 files changed, 35 insertions(+), 2 deletions(-)
-
-diff --git a/tools/ocaml/xenstored/parse_arg.ml b/tools/ocaml/xenstored/parse_arg.ml
-index 7c0478e76a..5e4ca6f1f7 100644
---- a/tools/ocaml/xenstored/parse_arg.ml
-+++ b/tools/ocaml/xenstored/parse_arg.ml
-@@ -26,8 +26,14 @@ type config =
- restart: bool;
- live_reload: bool;
- disable_socket: bool;
-+ config_test: bool;
- }
-
-+let get_config_filename config_file =
-+ match config_file with
-+ | Some name -> name
-+ | None -> Define.default_config_dir ^ "/oxenstored.conf"
-+
- let do_argv =
- let pidfile = ref "" and tracefile = ref "" (* old xenstored compatibility *)
- and domain_init = ref true
-@@ -38,6 +44,8 @@ let do_argv =
- and restart = ref false
- and live_reload = ref false
- and disable_socket = ref false
-+ and config_test = ref false
-+ and help = ref false
- in
-
- let speclist =
-@@ -55,10 +63,27 @@ let do_argv =
- ("-T", Arg.Set_string tracefile, ""); (* for compatibility *)
- ("--restart", Arg.Set restart, "Read database on starting");
- ("--live", Arg.Set live_reload, "Read live dump on startup");
-+ ("--config-test", Arg.Set config_test, "Test validity of config file");
- ("--disable-socket", Arg.Unit (fun () -> disable_socket := true), "Disable socket");
-+ ("--help", Arg.Set help, "Display this list of options")
- ] in
- let usage_msg = "usage : xenstored [--config-file <filename>] [--no-domain-init] [--help] [--no-fork] [--reraise-top-level] [--restart] [--disable-socket]" in
- Arg.parse speclist (fun _ -> ()) usage_msg;
-+ let () =
-+ if !help then begin
-+ if !live_reload then
-+ (*
-+ * Transform --live --help into --config-test for backward compat with
-+ * running code during live update.
-+ * Caller will validate config and exit
-+ *)
-+ config_test := true
-+ else begin
-+ Arg.usage_string speclist usage_msg |> print_endline;
-+ exit 0
-+ end
-+ end
-+ in
- {
- domain_init = !domain_init;
- activate_access_log = !activate_access_log;
-@@ -70,4 +95,5 @@ let do_argv =
- restart = !restart;
- live_reload = !live_reload;
- disable_socket = !disable_socket;
-+ config_test = !config_test;
- }
-diff --git a/tools/ocaml/xenstored/xenstored.ml b/tools/ocaml/xenstored/xenstored.ml
-index 4d5851c5cb..e2638a5af2 100644
---- a/tools/ocaml/xenstored/xenstored.ml
-+++ b/tools/ocaml/xenstored/xenstored.ml
-@@ -88,7 +88,7 @@ let default_pidfile = Paths.xen_run_dir ^ "/xenstored.pid"
-
- let ring_scan_interval = ref 20
-
--let parse_config filename =
-+let parse_config ?(strict=false) filename =
- let pidfile = ref default_pidfile in
- let options = [
- ("merge-activate", Config.Set_bool Transaction.do_coalesce);
-@@ -129,11 +129,12 @@ let parse_config filename =
- ("xenstored-port", Config.Set_string Domains.xenstored_port); ] in
- begin try Config.read filename options (fun _ _ -> raise Not_found)
- with
-- | Config.Error err -> List.iter (fun (k, e) ->
-+ | Config.Error err as e -> List.iter (fun (k, e) ->
- match e with
- | "unknown key" -> eprintf "config: unknown key %s\n" k
- | _ -> eprintf "config: %s: %s\n" k e
- ) err;
-+ if strict then raise e
- | Sys_error m -> eprintf "error: config: %s\n" m;
- end;
- !pidfile
-@@ -358,6 +359,12 @@ let tweak_gc () =
- let () =
- Printexc.set_uncaught_exception_handler Logging.fallback_exception_handler;
- let cf = do_argv in
-+ if cf.config_test then begin
-+ let path = config_filename cf in
-+ let _pidfile:string = parse_config ~strict:true path in
-+ Printf.printf "Configuration valid at %s\n%!" path;
-+ exit 0
-+ end;
- let pidfile =
- if Sys.file_exists (config_filename cf) then
- parse_config (config_filename cf)
---
-2.40.0
-
diff --git a/0016-libfsimage-xfs-Amend-mask32lo-to-allow-the-value-32.patch b/0016-libfsimage-xfs-Amend-mask32lo-to-allow-the-value-32.patch
new file mode 100644
index 0000000..a6b4527
--- /dev/null
+++ b/0016-libfsimage-xfs-Amend-mask32lo-to-allow-the-value-32.patch
@@ -0,0 +1,33 @@
+From 7d520b8d4ec7495f1ef1e4343a4f705a363e0c9c Mon Sep 17 00:00:00 2001
+From: Alejandro Vallejo <alejandro.vallejo@cloud.com>
+Date: Thu, 14 Sep 2023 13:22:51 +0100
+Subject: [PATCH 16/27] libfsimage/xfs: Amend mask32lo() to allow the value 32
+
+agblklog could plausibly be 32, but that would overflow this shift.
+Perform the shift as ULL and cast to u32 at the end instead.
+
+This is part of XSA-443 / CVE-2023-34325
+
+Signed-off-by: Alejandro Vallejo <alejandro.vallejo@cloud.com>
+Acked-by: Jan Beulich <jbeulich@suse.com>
+(cherry picked from commit ddc45e4eea946bb373a4b4a60c84bf9339cf413b)
+---
+ tools/libfsimage/xfs/fsys_xfs.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/tools/libfsimage/xfs/fsys_xfs.c b/tools/libfsimage/xfs/fsys_xfs.c
+index 2800699f59..4720bb4505 100644
+--- a/tools/libfsimage/xfs/fsys_xfs.c
++++ b/tools/libfsimage/xfs/fsys_xfs.c
+@@ -60,7 +60,7 @@ static struct xfs_info xfs;
+ #define inode ((xfs_dinode_t *)((char *)FSYS_BUF + 8192))
+ #define icore (inode->di_core)
+
+-#define mask32lo(n) (((xfs_uint32_t)1 << (n)) - 1)
++#define mask32lo(n) ((xfs_uint32_t)((1ull << (n)) - 1))
+
+ #define XFS_INO_MASK(k) ((xfs_uint32_t)((1ULL << (k)) - 1))
+ #define XFS_INO_OFFSET_BITS xfs.inopblog
+--
+2.42.0
+
diff --git a/0016-tools-ocaml-libs-Don-t-declare-stubs-as-taking-void.patch b/0016-tools-ocaml-libs-Don-t-declare-stubs-as-taking-void.patch
deleted file mode 100644
index a64d657..0000000
--- a/0016-tools-ocaml-libs-Don-t-declare-stubs-as-taking-void.patch
+++ /dev/null
@@ -1,61 +0,0 @@
-From 552e5f28d411c1a1a92f2fd3592a76e74f47610b Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= <edwin.torok@cloud.com>
-Date: Thu, 12 Jan 2023 11:28:29 +0000
-Subject: [PATCH 16/61] tools/ocaml/libs: Don't declare stubs as taking void
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-There is no such thing as an Ocaml function (C stub or otherwise) taking no
-parameters. In the absence of any other parameters, unit is still passed.
-
-This doesn't explode with any ABI we care about, but would malfunction for an
-ABI environment such as stdcall.
-
-Fixes: c3afd398ba7f ("ocaml: Add XS bindings.")
-Fixes: 8b7ce06a2d34 ("ocaml: Add XC bindings.")
-Signed-off-by: Edwin Török <edwin.torok@cloud.com>
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Acked-by: Christian Lindig <christian.lindig@citrix.com>
-(cherry picked from commit ff8b560be80b9211c303d74df7e4b3921d2bb8ca)
----
- tools/ocaml/libs/xb/xenbus_stubs.c | 5 ++---
- tools/ocaml/libs/xc/xenctrl_stubs.c | 4 ++--
- 2 files changed, 4 insertions(+), 5 deletions(-)
-
-diff --git a/tools/ocaml/libs/xb/xenbus_stubs.c b/tools/ocaml/libs/xb/xenbus_stubs.c
-index 3065181a55..97116b0782 100644
---- a/tools/ocaml/libs/xb/xenbus_stubs.c
-+++ b/tools/ocaml/libs/xb/xenbus_stubs.c
-@@ -30,10 +30,9 @@
- #include <xenctrl.h>
- #include <xen/io/xs_wire.h>
-
--CAMLprim value stub_header_size(void)
-+CAMLprim value stub_header_size(value unit)
- {
-- CAMLparam0();
-- CAMLreturn(Val_int(sizeof(struct xsd_sockmsg)));
-+ return Val_int(sizeof(struct xsd_sockmsg));
- }
-
- CAMLprim value stub_header_of_string(value s)
-diff --git a/tools/ocaml/libs/xc/xenctrl_stubs.c b/tools/ocaml/libs/xc/xenctrl_stubs.c
-index 5b4fe72c8d..434fc0345b 100644
---- a/tools/ocaml/libs/xc/xenctrl_stubs.c
-+++ b/tools/ocaml/libs/xc/xenctrl_stubs.c
-@@ -67,9 +67,9 @@ static void Noreturn failwith_xc(xc_interface *xch)
- caml_raise_with_string(*caml_named_value("xc.error"), error_str);
- }
-
--CAMLprim value stub_xc_interface_open(void)
-+CAMLprim value stub_xc_interface_open(value unit)
- {
-- CAMLparam0();
-+ CAMLparam1(unit);
- xc_interface *xch;
-
- /* Don't assert XC_OPENFLAG_NON_REENTRANT because these bindings
---
-2.40.0
-
diff --git a/0017-libfsimage-xfs-Sanity-check-the-superblock-during-mo.patch b/0017-libfsimage-xfs-Sanity-check-the-superblock-during-mo.patch
new file mode 100644
index 0000000..4694ea1
--- /dev/null
+++ b/0017-libfsimage-xfs-Sanity-check-the-superblock-during-mo.patch
@@ -0,0 +1,137 @@
+From 2de503f8fd0d07401e92abed1097ceb5fd1801f6 Mon Sep 17 00:00:00 2001
+From: Alejandro Vallejo <alejandro.vallejo@cloud.com>
+Date: Thu, 14 Sep 2023 13:22:52 +0100
+Subject: [PATCH 17/27] libfsimage/xfs: Sanity-check the superblock during
+ mounts
+
+Sanity-check the XFS superblock for wellformedness at the mount handler.
+This forces pygrub to abort parsing a potentially malformed filesystem and
+ensures the invariants assumed throughout the rest of the code hold.
+
+Also, derive parameters from previously sanitized parameters where possible
+(rather than reading them off the superblock)
+
+The code doesn't try to avoid overflowing the end of the disk, because
+that's an unlikely and benign error. Parameters used in calculations of
+xfs_daddr_t (like the root inode index) aren't in critical need of being
+sanitized.
+
+The sanitization of agblklog is basically checking that no obvious
+overflows happen on agblklog, and then ensuring agblocks is contained in
+the range (2^(sb_agblklog-1), 2^sb_agblklog].
+
+This is part of XSA-443 / CVE-2023-34325
+
+Signed-off-by: Alejandro Vallejo <alejandro.vallejo@cloud.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+(cherry picked from commit 620500dd1baf33347dfde5e7fde7cf7fe347da5c)
+---
+ tools/libfsimage/xfs/fsys_xfs.c | 48 ++++++++++++++++++++++++++-------
+ tools/libfsimage/xfs/xfs.h | 12 +++++++++
+ 2 files changed, 50 insertions(+), 10 deletions(-)
+
+diff --git a/tools/libfsimage/xfs/fsys_xfs.c b/tools/libfsimage/xfs/fsys_xfs.c
+index 4720bb4505..e4eb7e1ee2 100644
+--- a/tools/libfsimage/xfs/fsys_xfs.c
++++ b/tools/libfsimage/xfs/fsys_xfs.c
+@@ -17,6 +17,7 @@
+ * along with this program; If not, see <http://www.gnu.org/licenses/>.
+ */
+
++#include <stdbool.h>
+ #include <xenfsimage_grub.h>
+ #include "xfs.h"
+
+@@ -433,29 +434,56 @@ first_dentry (fsi_file_t *ffi, xfs_ino_t *ino)
+ return next_dentry (ffi, ino);
+ }
+
++static bool
++xfs_sb_is_invalid (const xfs_sb_t *super)
++{
++ return (le32(super->sb_magicnum) != XFS_SB_MAGIC)
++ || ((le16(super->sb_versionnum) & XFS_SB_VERSION_NUMBITS) !=
++ XFS_SB_VERSION_4)
++ || (super->sb_inodelog < XFS_SB_INODELOG_MIN)
++ || (super->sb_inodelog > XFS_SB_INODELOG_MAX)
++ || (super->sb_blocklog < XFS_SB_BLOCKLOG_MIN)
++ || (super->sb_blocklog > XFS_SB_BLOCKLOG_MAX)
++ || (super->sb_blocklog < super->sb_inodelog)
++ || (super->sb_agblklog > XFS_SB_AGBLKLOG_MAX)
++ || ((1ull << super->sb_agblklog) < le32(super->sb_agblocks))
++ || (((1ull << super->sb_agblklog) >> 1) >=
++ le32(super->sb_agblocks))
++ || ((super->sb_blocklog + super->sb_dirblklog) >=
++ XFS_SB_DIRBLK_NUMBITS);
++}
++
+ static int
+ xfs_mount (fsi_file_t *ffi, const char *options)
+ {
+ xfs_sb_t super;
+
+ if (!devread (ffi, 0, 0, sizeof(super), (char *)&super)
+- || (le32(super.sb_magicnum) != XFS_SB_MAGIC)
+- || ((le16(super.sb_versionnum)
+- & XFS_SB_VERSION_NUMBITS) != XFS_SB_VERSION_4) ) {
++ || xfs_sb_is_invalid(&super)) {
+ return 0;
+ }
+
+- xfs.bsize = le32 (super.sb_blocksize);
+- xfs.blklog = super.sb_blocklog;
+- xfs.bdlog = xfs.blklog - SECTOR_BITS;
++ /*
++ * Not sanitized. It's exclusively used to generate disk addresses,
++ * so it's not important from a security standpoint.
++ */
+ xfs.rootino = le64 (super.sb_rootino);
+- xfs.isize = le16 (super.sb_inodesize);
+- xfs.agblocks = le32 (super.sb_agblocks);
+- xfs.dirbsize = xfs.bsize << super.sb_dirblklog;
+
+- xfs.inopblog = super.sb_inopblog;
++ /*
++ * Sanitized to be consistent with each other, only used to
++ * generate disk addresses, so it's safe
++ */
++ xfs.agblocks = le32 (super.sb_agblocks);
+ xfs.agblklog = super.sb_agblklog;
+
++ /* Derived from sanitized parameters */
++ xfs.bsize = 1 << super.sb_blocklog;
++ xfs.blklog = super.sb_blocklog;
++ xfs.bdlog = super.sb_blocklog - SECTOR_BITS;
++ xfs.isize = 1 << super.sb_inodelog;
++ xfs.dirbsize = 1 << (super.sb_blocklog + super.sb_dirblklog);
++ xfs.inopblog = super.sb_blocklog - super.sb_inodelog;
++
+ xfs.btnode_ptr0_off =
+ ((xfs.bsize - sizeof(xfs_btree_block_t)) /
+ (sizeof (xfs_bmbt_key_t) + sizeof (xfs_bmbt_ptr_t)))
+diff --git a/tools/libfsimage/xfs/xfs.h b/tools/libfsimage/xfs/xfs.h
+index 40699281e4..b87e37d3d7 100644
+--- a/tools/libfsimage/xfs/xfs.h
++++ b/tools/libfsimage/xfs/xfs.h
+@@ -134,6 +134,18 @@ typedef struct xfs_sb
+ xfs_uint8_t sb_dummy[7]; /* padding */
+ } xfs_sb_t;
+
++/* Bound taken from xfs.c in GRUB2. It doesn't exist in the spec */
++#define XFS_SB_DIRBLK_NUMBITS 27
++/* Implied by the XFS specification. The minimum block size is 512 octets */
++#define XFS_SB_BLOCKLOG_MIN 9
++/* Implied by the XFS specification. The maximum block size is 65536 octets */
++#define XFS_SB_BLOCKLOG_MAX 16
++/* Implied by the XFS specification. The minimum inode size is 256 octets */
++#define XFS_SB_INODELOG_MIN 8
++/* Implied by the XFS specification. The maximum inode size is 2048 octets */
++#define XFS_SB_INODELOG_MAX 11
++/* High bound for sb_agblklog */
++#define XFS_SB_AGBLKLOG_MAX 32
+
+ /* those are from xfs_btree.h */
+
+--
+2.42.0
+
diff --git a/0017-tools-ocaml-libs-Allocate-the-correct-amount-of-memo.patch b/0017-tools-ocaml-libs-Allocate-the-correct-amount-of-memo.patch
deleted file mode 100644
index 9fa8d08..0000000
--- a/0017-tools-ocaml-libs-Allocate-the-correct-amount-of-memo.patch
+++ /dev/null
@@ -1,80 +0,0 @@
-From 6d66fb984cc768406158353cabf9a55652b0dea7 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Tue, 31 Jan 2023 10:59:42 +0000
-Subject: [PATCH 17/61] tools/ocaml/libs: Allocate the correct amount of memory
- for Abstract_tag
-
-caml_alloc() takes units of Wsize (word size), not bytes. As a consequence,
-we're allocating 4 or 8 times too much memory.
-
-Ocaml has a helper, Wsize_bsize(), but it truncates cases which aren't an
-exact multiple. Use a BUILD_BUG_ON() to cover the potential for truncation,
-as there's no rounding-up form of the helper.
-
-Fixes: 8b7ce06a2d34 ("ocaml: Add XC bindings.")
-Fixes: d3e649277a13 ("ocaml: add mmap bindings implementation.")
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Acked-by: Christian Lindig <christian.lindig@citrix.com>
-(cherry picked from commit 36eb2de31b6ecb8787698fb1a701bd708c8971b2)
----
- tools/ocaml/libs/mmap/Makefile | 2 ++
- tools/ocaml/libs/mmap/xenmmap_stubs.c | 6 +++++-
- tools/ocaml/libs/xc/xenctrl_stubs.c | 5 ++++-
- 3 files changed, 11 insertions(+), 2 deletions(-)
-
-diff --git a/tools/ocaml/libs/mmap/Makefile b/tools/ocaml/libs/mmap/Makefile
-index df45819df5..a3bd75e33a 100644
---- a/tools/ocaml/libs/mmap/Makefile
-+++ b/tools/ocaml/libs/mmap/Makefile
-@@ -2,6 +2,8 @@ TOPLEVEL=$(CURDIR)/../..
- XEN_ROOT=$(TOPLEVEL)/../..
- include $(TOPLEVEL)/common.make
-
-+CFLAGS += $(CFLAGS_xeninclude)
-+
- OBJS = xenmmap
- INTF = $(foreach obj, $(OBJS),$(obj).cmi)
- LIBS = xenmmap.cma xenmmap.cmxa
-diff --git a/tools/ocaml/libs/mmap/xenmmap_stubs.c b/tools/ocaml/libs/mmap/xenmmap_stubs.c
-index e03951d781..d623ad390e 100644
---- a/tools/ocaml/libs/mmap/xenmmap_stubs.c
-+++ b/tools/ocaml/libs/mmap/xenmmap_stubs.c
-@@ -21,6 +21,8 @@
- #include <errno.h>
- #include "mmap_stubs.h"
-
-+#include <xen-tools/libs.h>
-+
- #include <caml/mlvalues.h>
- #include <caml/memory.h>
- #include <caml/alloc.h>
-@@ -59,7 +61,9 @@ CAMLprim value stub_mmap_init(value fd, value pflag, value mflag,
- default: caml_invalid_argument("maptype");
- }
-
-- result = caml_alloc(sizeof(struct mmap_interface), Abstract_tag);
-+ BUILD_BUG_ON((sizeof(struct mmap_interface) % sizeof(value)) != 0);
-+ result = caml_alloc(Wsize_bsize(sizeof(struct mmap_interface)),
-+ Abstract_tag);
-
- if (mmap_interface_init(Intf_val(result), Int_val(fd),
- c_pflag, c_mflag,
-diff --git a/tools/ocaml/libs/xc/xenctrl_stubs.c b/tools/ocaml/libs/xc/xenctrl_stubs.c
-index 434fc0345b..ec64341a9a 100644
---- a/tools/ocaml/libs/xc/xenctrl_stubs.c
-+++ b/tools/ocaml/libs/xc/xenctrl_stubs.c
-@@ -940,7 +940,10 @@ CAMLprim value stub_map_foreign_range(value xch, value dom,
- uint32_t c_dom;
- unsigned long c_mfn;
-
-- result = caml_alloc(sizeof(struct mmap_interface), Abstract_tag);
-+ BUILD_BUG_ON((sizeof(struct mmap_interface) % sizeof(value)) != 0);
-+ result = caml_alloc(Wsize_bsize(sizeof(struct mmap_interface)),
-+ Abstract_tag);
-+
- intf = (struct mmap_interface *) result;
-
- intf->len = Int_val(size);
---
-2.40.0
-
diff --git a/0018-libfsimage-xfs-Add-compile-time-check-to-libfsimage.patch b/0018-libfsimage-xfs-Add-compile-time-check-to-libfsimage.patch
new file mode 100644
index 0000000..2f2d7ca
--- /dev/null
+++ b/0018-libfsimage-xfs-Add-compile-time-check-to-libfsimage.patch
@@ -0,0 +1,61 @@
+From 766126159ee963cdc16ba9cb2b0ca54b98bc148f Mon Sep 17 00:00:00 2001
+From: Alejandro Vallejo <alejandro.vallejo@cloud.com>
+Date: Thu, 14 Sep 2023 13:22:53 +0100
+Subject: [PATCH 18/27] libfsimage/xfs: Add compile-time check to libfsimage
+
+Adds the common tools include folder to the -I compile flags
+of libfsimage. This allows us to use:
+ xen-tools/common-macros.h:BUILD_BUG_ON()
+
+With it, statically assert a sanitized "blocklog - SECTOR_BITS" cannot
+underflow.
+
+This is part of XSA-443 / CVE-2023-34325
+
+Signed-off-by: Alejandro Vallejo <alejandro.vallejo@cloud.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+(cherry picked from commit 7d85c70431593550e32022e3a19a37f306f49e00)
+---
+ tools/libfsimage/Rules.mk | 2 +-
+ tools/libfsimage/xfs/fsys_xfs.c | 4 +++-
+ 2 files changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/tools/libfsimage/Rules.mk b/tools/libfsimage/Rules.mk
+index bb6d42abb4..80598fb70a 100644
+--- a/tools/libfsimage/Rules.mk
++++ b/tools/libfsimage/Rules.mk
+@@ -1,6 +1,6 @@
+ include $(XEN_ROOT)/tools/Rules.mk
+
+-CFLAGS += -Wno-unknown-pragmas -I$(XEN_ROOT)/tools/libfsimage/common/ -DFSIMAGE_FSDIR=\"$(FSDIR)\"
++CFLAGS += -Wno-unknown-pragmas -I$(XEN_ROOT)/tools/libfsimage/common/ $(CFLAGS_xeninclude) -DFSIMAGE_FSDIR=\"$(FSDIR)\"
+ CFLAGS += -Werror -D_GNU_SOURCE
+ LDFLAGS += -L../common/
+
+diff --git a/tools/libfsimage/xfs/fsys_xfs.c b/tools/libfsimage/xfs/fsys_xfs.c
+index e4eb7e1ee2..4a8dd6f239 100644
+--- a/tools/libfsimage/xfs/fsys_xfs.c
++++ b/tools/libfsimage/xfs/fsys_xfs.c
+@@ -19,6 +19,7 @@
+
+ #include <stdbool.h>
+ #include <xenfsimage_grub.h>
++#include <xen-tools/libs.h>
+ #include "xfs.h"
+
+ #define MAX_LINK_COUNT 8
+@@ -477,9 +478,10 @@ xfs_mount (fsi_file_t *ffi, const char *options)
+ xfs.agblklog = super.sb_agblklog;
+
+ /* Derived from sanitized parameters */
++ BUILD_BUG_ON(XFS_SB_BLOCKLOG_MIN < SECTOR_BITS);
++ xfs.bdlog = super.sb_blocklog - SECTOR_BITS;
+ xfs.bsize = 1 << super.sb_blocklog;
+ xfs.blklog = super.sb_blocklog;
+- xfs.bdlog = super.sb_blocklog - SECTOR_BITS;
+ xfs.isize = 1 << super.sb_inodelog;
+ xfs.dirbsize = 1 << (super.sb_blocklog + super.sb_dirblklog);
+ xfs.inopblog = super.sb_blocklog - super.sb_inodelog;
+--
+2.42.0
+
diff --git a/0018-tools-ocaml-evtchn-Don-t-reference-Custom-objects-wi.patch b/0018-tools-ocaml-evtchn-Don-t-reference-Custom-objects-wi.patch
deleted file mode 100644
index 8e1c860..0000000
--- a/0018-tools-ocaml-evtchn-Don-t-reference-Custom-objects-wi.patch
+++ /dev/null
@@ -1,213 +0,0 @@
-From e18faeb91e620624106b94c8821f8c9574eddb17 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= <edwin.torok@cloud.com>
-Date: Thu, 12 Jan 2023 17:48:29 +0000
-Subject: [PATCH 18/61] tools/ocaml/evtchn: Don't reference Custom objects with
- the GC lock released
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-The modification to the _H() macro for Ocaml 5 support introduced a subtle
-bug. From the manual:
-
- https://ocaml.org/manual/intfc.html#ss:parallel-execution-long-running-c-code
-
-"After caml_release_runtime_system() was called and until
-caml_acquire_runtime_system() is called, the C code must not access any OCaml
-data, nor call any function of the run-time system, nor call back into OCaml
-code."
-
-Previously, the value was a naked C pointer, so dereferencing it wasn't
-"accessing any Ocaml data", but the fix to avoid naked C pointers added a
-layer of indirection through an Ocaml Custom object, meaning that the common
-pattern of using _H() in a blocking section is unsafe.
-
-In order to fix:
-
- * Drop the _H() macro and replace it with a static inline xce_of_val().
- * Opencode the assignment into Data_custom_val() in the two constructors.
- * Rename "value xce" parameters to "value xce_val" so we can consistently
- have "xenevtchn_handle *xce" on the stack, and obtain the pointer with the
- GC lock still held.
-
-Fixes: 22d5affdf0ce ("tools/ocaml/evtchn: OCaml 5 support, fix potential resource leak")
-Signed-off-by: Edwin Török <edwin.torok@cloud.com>
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Acked-by: Christian Lindig <christian.lindig@citrix.com>
-(cherry picked from commit 2636d8ff7a670c4d2485757dbe966e36c259a960)
----
- tools/ocaml/libs/eventchn/xeneventchn_stubs.c | 60 +++++++++++--------
- 1 file changed, 35 insertions(+), 25 deletions(-)
-
-diff --git a/tools/ocaml/libs/eventchn/xeneventchn_stubs.c b/tools/ocaml/libs/eventchn/xeneventchn_stubs.c
-index aa8a69cc1e..d7881ca95f 100644
---- a/tools/ocaml/libs/eventchn/xeneventchn_stubs.c
-+++ b/tools/ocaml/libs/eventchn/xeneventchn_stubs.c
-@@ -33,11 +33,14 @@
- #include <caml/fail.h>
- #include <caml/signals.h>
-
--#define _H(__h) (*((xenevtchn_handle **)Data_custom_val(__h)))
-+static inline xenevtchn_handle *xce_of_val(value v)
-+{
-+ return *(xenevtchn_handle **)Data_custom_val(v);
-+}
-
- static void stub_evtchn_finalize(value v)
- {
-- xenevtchn_close(_H(v));
-+ xenevtchn_close(xce_of_val(v));
- }
-
- static struct custom_operations xenevtchn_ops = {
-@@ -68,7 +71,7 @@ CAMLprim value stub_eventchn_init(value cloexec)
- caml_failwith("open failed");
-
- result = caml_alloc_custom(&xenevtchn_ops, sizeof(xce), 0, 1);
-- _H(result) = xce;
-+ *(xenevtchn_handle **)Data_custom_val(result) = xce;
-
- CAMLreturn(result);
- }
-@@ -87,18 +90,19 @@ CAMLprim value stub_eventchn_fdopen(value fdval)
- caml_failwith("evtchn fdopen failed");
-
- result = caml_alloc_custom(&xenevtchn_ops, sizeof(xce), 0, 1);
-- _H(result) = xce;
-+ *(xenevtchn_handle **)Data_custom_val(result) = xce;
-
- CAMLreturn(result);
- }
-
--CAMLprim value stub_eventchn_fd(value xce)
-+CAMLprim value stub_eventchn_fd(value xce_val)
- {
-- CAMLparam1(xce);
-+ CAMLparam1(xce_val);
- CAMLlocal1(result);
-+ xenevtchn_handle *xce = xce_of_val(xce_val);
- int fd;
-
-- fd = xenevtchn_fd(_H(xce));
-+ fd = xenevtchn_fd(xce);
- if (fd == -1)
- caml_failwith("evtchn fd failed");
-
-@@ -107,13 +111,14 @@ CAMLprim value stub_eventchn_fd(value xce)
- CAMLreturn(result);
- }
-
--CAMLprim value stub_eventchn_notify(value xce, value port)
-+CAMLprim value stub_eventchn_notify(value xce_val, value port)
- {
-- CAMLparam2(xce, port);
-+ CAMLparam2(xce_val, port);
-+ xenevtchn_handle *xce = xce_of_val(xce_val);
- int rc;
-
- caml_enter_blocking_section();
-- rc = xenevtchn_notify(_H(xce), Int_val(port));
-+ rc = xenevtchn_notify(xce, Int_val(port));
- caml_leave_blocking_section();
-
- if (rc == -1)
-@@ -122,15 +127,16 @@ CAMLprim value stub_eventchn_notify(value xce, value port)
- CAMLreturn(Val_unit);
- }
-
--CAMLprim value stub_eventchn_bind_interdomain(value xce, value domid,
-+CAMLprim value stub_eventchn_bind_interdomain(value xce_val, value domid,
- value remote_port)
- {
-- CAMLparam3(xce, domid, remote_port);
-+ CAMLparam3(xce_val, domid, remote_port);
- CAMLlocal1(port);
-+ xenevtchn_handle *xce = xce_of_val(xce_val);
- xenevtchn_port_or_error_t rc;
-
- caml_enter_blocking_section();
-- rc = xenevtchn_bind_interdomain(_H(xce), Int_val(domid), Int_val(remote_port));
-+ rc = xenevtchn_bind_interdomain(xce, Int_val(domid), Int_val(remote_port));
- caml_leave_blocking_section();
-
- if (rc == -1)
-@@ -140,14 +146,15 @@ CAMLprim value stub_eventchn_bind_interdomain(value xce, value domid,
- CAMLreturn(port);
- }
-
--CAMLprim value stub_eventchn_bind_virq(value xce, value virq_type)
-+CAMLprim value stub_eventchn_bind_virq(value xce_val, value virq_type)
- {
-- CAMLparam2(xce, virq_type);
-+ CAMLparam2(xce_val, virq_type);
- CAMLlocal1(port);
-+ xenevtchn_handle *xce = xce_of_val(xce_val);
- xenevtchn_port_or_error_t rc;
-
- caml_enter_blocking_section();
-- rc = xenevtchn_bind_virq(_H(xce), Int_val(virq_type));
-+ rc = xenevtchn_bind_virq(xce, Int_val(virq_type));
- caml_leave_blocking_section();
-
- if (rc == -1)
-@@ -157,13 +164,14 @@ CAMLprim value stub_eventchn_bind_virq(value xce, value virq_type)
- CAMLreturn(port);
- }
-
--CAMLprim value stub_eventchn_unbind(value xce, value port)
-+CAMLprim value stub_eventchn_unbind(value xce_val, value port)
- {
-- CAMLparam2(xce, port);
-+ CAMLparam2(xce_val, port);
-+ xenevtchn_handle *xce = xce_of_val(xce_val);
- int rc;
-
- caml_enter_blocking_section();
-- rc = xenevtchn_unbind(_H(xce), Int_val(port));
-+ rc = xenevtchn_unbind(xce, Int_val(port));
- caml_leave_blocking_section();
-
- if (rc == -1)
-@@ -172,14 +180,15 @@ CAMLprim value stub_eventchn_unbind(value xce, value port)
- CAMLreturn(Val_unit);
- }
-
--CAMLprim value stub_eventchn_pending(value xce)
-+CAMLprim value stub_eventchn_pending(value xce_val)
- {
-- CAMLparam1(xce);
-+ CAMLparam1(xce_val);
- CAMLlocal1(result);
-+ xenevtchn_handle *xce = xce_of_val(xce_val);
- xenevtchn_port_or_error_t port;
-
- caml_enter_blocking_section();
-- port = xenevtchn_pending(_H(xce));
-+ port = xenevtchn_pending(xce);
- caml_leave_blocking_section();
-
- if (port == -1)
-@@ -189,16 +198,17 @@ CAMLprim value stub_eventchn_pending(value xce)
- CAMLreturn(result);
- }
-
--CAMLprim value stub_eventchn_unmask(value xce, value _port)
-+CAMLprim value stub_eventchn_unmask(value xce_val, value _port)
- {
-- CAMLparam2(xce, _port);
-+ CAMLparam2(xce_val, _port);
-+ xenevtchn_handle *xce = xce_of_val(xce_val);
- evtchn_port_t port;
- int rc;
-
- port = Int_val(_port);
-
- caml_enter_blocking_section();
-- rc = xenevtchn_unmask(_H(xce), port);
-+ rc = xenevtchn_unmask(xce, port);
- caml_leave_blocking_section();
-
- if (rc)
---
-2.40.0
-
diff --git a/0019-tools-ocaml-xc-Fix-binding-for-xc_domain_assign_devi.patch b/0019-tools-ocaml-xc-Fix-binding-for-xc_domain_assign_devi.patch
deleted file mode 100644
index 5571446..0000000
--- a/0019-tools-ocaml-xc-Fix-binding-for-xc_domain_assign_devi.patch
+++ /dev/null
@@ -1,70 +0,0 @@
-From 854013084e2c6267af7787df8b35d85646f79a54 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= <edwin.torok@cloud.com>
-Date: Thu, 12 Jan 2023 11:38:38 +0000
-Subject: [PATCH 19/61] tools/ocaml/xc: Fix binding for
- xc_domain_assign_device()
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-The patch adding this binding was plain broken, and unreviewed. It modified
-the C stub to add a 4th parameter without an equivalent adjustment in the
-Ocaml side of the bindings.
-
-In 64bit builds, this causes us to dereference whatever dead value is in %rcx
-when trying to interpret the rflags parameter.
-
-This has gone unnoticed because Xapi doesn't use this binding (it has its
-own), but unbreak the binding by passing RDM_RELAXED unconditionally for
-now (matching the libxl default behaviour).
-
-Fixes: 9b34056cb4 ("tools: extend xc_assign_device() to support rdm reservation policy")
-Signed-off-by: Edwin Török <edwin.torok@cloud.com>
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Acked-by: Christian Lindig <christian.lindig@citrix.com>
-(cherry picked from commit 4250683842104f02996428f93927a035c8e19266)
----
- tools/ocaml/libs/xc/xenctrl_stubs.c | 17 +++++------------
- 1 file changed, 5 insertions(+), 12 deletions(-)
-
-diff --git a/tools/ocaml/libs/xc/xenctrl_stubs.c b/tools/ocaml/libs/xc/xenctrl_stubs.c
-index ec64341a9a..e2efcbe182 100644
---- a/tools/ocaml/libs/xc/xenctrl_stubs.c
-+++ b/tools/ocaml/libs/xc/xenctrl_stubs.c
-@@ -1123,17 +1123,12 @@ CAMLprim value stub_xc_domain_test_assign_device(value xch, value domid, value d
- CAMLreturn(Val_bool(ret == 0));
- }
-
--static int domain_assign_device_rdm_flag_table[] = {
-- XEN_DOMCTL_DEV_RDM_RELAXED,
--};
--
--CAMLprim value stub_xc_domain_assign_device(value xch, value domid, value desc,
-- value rflag)
-+CAMLprim value stub_xc_domain_assign_device(value xch, value domid, value desc)
- {
-- CAMLparam4(xch, domid, desc, rflag);
-+ CAMLparam3(xch, domid, desc);
- int ret;
- int domain, bus, dev, func;
-- uint32_t sbdf, flag;
-+ uint32_t sbdf;
-
- domain = Int_val(Field(desc, 0));
- bus = Int_val(Field(desc, 1));
-@@ -1141,10 +1136,8 @@ CAMLprim value stub_xc_domain_assign_device(value xch, value domid, value desc,
- func = Int_val(Field(desc, 3));
- sbdf = encode_sbdf(domain, bus, dev, func);
-
-- ret = Int_val(Field(rflag, 0));
-- flag = domain_assign_device_rdm_flag_table[ret];
--
-- ret = xc_assign_device(_H(xch), _D(domid), sbdf, flag);
-+ ret = xc_assign_device(_H(xch), _D(domid), sbdf,
-+ XEN_DOMCTL_DEV_RDM_RELAXED);
-
- if (ret < 0)
- failwith_xc(_H(xch));
---
-2.40.0
-
diff --git a/0019-tools-pygrub-Remove-unnecessary-hypercall.patch b/0019-tools-pygrub-Remove-unnecessary-hypercall.patch
new file mode 100644
index 0000000..f345a6f
--- /dev/null
+++ b/0019-tools-pygrub-Remove-unnecessary-hypercall.patch
@@ -0,0 +1,60 @@
+From 3d760a3bb9b55e5dd45534cac3cdb561a57f2ee0 Mon Sep 17 00:00:00 2001
+From: Alejandro Vallejo <alejandro.vallejo@cloud.com>
+Date: Mon, 25 Sep 2023 18:32:21 +0100
+Subject: [PATCH 19/27] tools/pygrub: Remove unnecessary hypercall
+
+There's a hypercall being issued in order to determine whether PV64 is
+supported, but since Xen 4.3 that's strictly true so it's not required.
+
+Plus, this way we can avoid mapping the privcmd interface altogether in the
+depriv pygrub.
+
+This is part of XSA-443 / CVE-2023-34325
+
+Signed-off-by: Alejandro Vallejo <alejandro.vallejo@cloud.com>
+Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
+(cherry picked from commit f4b504c6170c446e61055cbd388ae4e832a9deca)
+---
+ tools/pygrub/src/pygrub | 12 +-----------
+ 1 file changed, 1 insertion(+), 11 deletions(-)
+
+diff --git a/tools/pygrub/src/pygrub b/tools/pygrub/src/pygrub
+index ce7ab0eb8c..ce4e07d3e8 100755
+--- a/tools/pygrub/src/pygrub
++++ b/tools/pygrub/src/pygrub
+@@ -18,7 +18,6 @@ import os, sys, string, struct, tempfile, re, traceback, stat, errno
+ import copy
+ import logging
+ import platform
+-import xen.lowlevel.xc
+
+ import curses, _curses, curses.textpad, curses.ascii
+ import getopt
+@@ -668,14 +667,6 @@ def run_grub(file, entry, fs, cfg_args):
+
+ return grubcfg
+
+-def supports64bitPVguest():
+- xc = xen.lowlevel.xc.xc()
+- caps = xc.xeninfo()['xen_caps'].split(" ")
+- for cap in caps:
+- if cap == "xen-3.0-x86_64":
+- return True
+- return False
+-
+ # If nothing has been specified, look for a Solaris domU. If found, perform the
+ # necessary tweaks.
+ def sniff_solaris(fs, cfg):
+@@ -684,8 +675,7 @@ def sniff_solaris(fs, cfg):
+ return cfg
+
+ if not cfg["kernel"]:
+- if supports64bitPVguest() and \
+- fs.file_exists("/platform/i86xpv/kernel/amd64/unix"):
++ if fs.file_exists("/platform/i86xpv/kernel/amd64/unix"):
+ cfg["kernel"] = "/platform/i86xpv/kernel/amd64/unix"
+ cfg["ramdisk"] = "/platform/i86pc/amd64/boot_archive"
+ elif fs.file_exists("/platform/i86xpv/kernel/unix"):
+--
+2.42.0
+
diff --git a/0020-tools-ocaml-xc-Don-t-reference-Abstract_Tag-objects-.patch b/0020-tools-ocaml-xc-Don-t-reference-Abstract_Tag-objects-.patch
deleted file mode 100644
index a829d36..0000000
--- a/0020-tools-ocaml-xc-Don-t-reference-Abstract_Tag-objects-.patch
+++ /dev/null
@@ -1,76 +0,0 @@
-From 1fdff77e26290ae1ed40e8253959d12a0c4b3d3f Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Tue, 31 Jan 2023 17:19:30 +0000
-Subject: [PATCH 20/61] tools/ocaml/xc: Don't reference Abstract_Tag objects
- with the GC lock released
-
-The intf->{addr,len} references in the xc_map_foreign_range() call are unsafe.
-From the manual:
-
- https://ocaml.org/manual/intfc.html#ss:parallel-execution-long-running-c-code
-
-"After caml_release_runtime_system() was called and until
-caml_acquire_runtime_system() is called, the C code must not access any OCaml
-data, nor call any function of the run-time system, nor call back into OCaml
-code."
-
-More than what the manual says, the intf pointer is (potentially) invalidated
-by caml_enter_blocking_section() if another thread happens to perform garbage
-collection at just the right (wrong) moment.
-
-Rewrite the logic. There's no need to stash data in the Ocaml object until
-the success path at the very end.
-
-Fixes: 8b7ce06a2d34 ("ocaml: Add XC bindings.")
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Acked-by: Christian Lindig <christian.lindig@citrix.com>
-(cherry picked from commit 9e7c74e6f9fd2e44df1212643b80af9032b45b07)
----
- tools/ocaml/libs/xc/xenctrl_stubs.c | 23 +++++++++++------------
- 1 file changed, 11 insertions(+), 12 deletions(-)
-
-diff --git a/tools/ocaml/libs/xc/xenctrl_stubs.c b/tools/ocaml/libs/xc/xenctrl_stubs.c
-index e2efcbe182..0a0fe45c54 100644
---- a/tools/ocaml/libs/xc/xenctrl_stubs.c
-+++ b/tools/ocaml/libs/xc/xenctrl_stubs.c
-@@ -937,26 +937,25 @@ CAMLprim value stub_map_foreign_range(value xch, value dom,
- CAMLparam4(xch, dom, size, mfn);
- CAMLlocal1(result);
- struct mmap_interface *intf;
-- uint32_t c_dom;
-- unsigned long c_mfn;
-+ unsigned long c_mfn = Nativeint_val(mfn);
-+ int len = Int_val(size);
-+ void *ptr;
-
- BUILD_BUG_ON((sizeof(struct mmap_interface) % sizeof(value)) != 0);
- result = caml_alloc(Wsize_bsize(sizeof(struct mmap_interface)),
- Abstract_tag);
-
-- intf = (struct mmap_interface *) result;
--
-- intf->len = Int_val(size);
--
-- c_dom = _D(dom);
-- c_mfn = Nativeint_val(mfn);
- caml_enter_blocking_section();
-- intf->addr = xc_map_foreign_range(_H(xch), c_dom,
-- intf->len, PROT_READ|PROT_WRITE,
-- c_mfn);
-+ ptr = xc_map_foreign_range(_H(xch), _D(dom), len,
-+ PROT_READ|PROT_WRITE, c_mfn);
- caml_leave_blocking_section();
-- if (!intf->addr)
-+
-+ if (!ptr)
- caml_failwith("xc_map_foreign_range error");
-+
-+ intf = Data_abstract_val(result);
-+ *intf = (struct mmap_interface){ ptr, len };
-+
- CAMLreturn(result);
- }
-
---
-2.40.0
-
diff --git a/0020-tools-pygrub-Small-refactors.patch b/0020-tools-pygrub-Small-refactors.patch
new file mode 100644
index 0000000..5b24800
--- /dev/null
+++ b/0020-tools-pygrub-Small-refactors.patch
@@ -0,0 +1,65 @@
+From 4f46a077fde520dcdc466da611d7abd124f260f8 Mon Sep 17 00:00:00 2001
+From: Alejandro Vallejo <alejandro.vallejo@cloud.com>
+Date: Mon, 25 Sep 2023 18:32:22 +0100
+Subject: [PATCH 20/27] tools/pygrub: Small refactors
+
+Small tidy up to ensure output_directory always has a trailing '/' to ease
+concatenating paths and that `output` can only be a filename or None.
+
+This is part of XSA-443 / CVE-2023-34325
+
+Signed-off-by: Alejandro Vallejo <alejandro.vallejo@cloud.com>
+(cherry picked from commit 9f2ff9a7c9b3ac734ae99f17f0134ed0343dcccf)
+---
+ tools/pygrub/src/pygrub | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/tools/pygrub/src/pygrub b/tools/pygrub/src/pygrub
+index ce4e07d3e8..1042c05b86 100755
+--- a/tools/pygrub/src/pygrub
++++ b/tools/pygrub/src/pygrub
+@@ -793,7 +793,7 @@ if __name__ == "__main__":
+ debug = False
+ not_really = False
+ output_format = "sxp"
+- output_directory = "/var/run/xen/pygrub"
++ output_directory = "/var/run/xen/pygrub/"
+
+ # what was passed in
+ incfg = { "kernel": None, "ramdisk": None, "args": "" }
+@@ -815,7 +815,8 @@ if __name__ == "__main__":
+ usage()
+ sys.exit()
+ elif o in ("--output",):
+- output = a
++ if a != "-":
++ output = a
+ elif o in ("--kernel",):
+ incfg["kernel"] = a
+ elif o in ("--ramdisk",):
+@@ -847,12 +848,11 @@ if __name__ == "__main__":
+ if not os.path.isdir(a):
+ print("%s is not an existing directory" % a)
+ sys.exit(1)
+- output_directory = a
++ output_directory = a + '/'
+
+ if debug:
+ logging.basicConfig(level=logging.DEBUG)
+
+-
+ try:
+ os.makedirs(output_directory, 0o700)
+ except OSError as e:
+@@ -861,7 +861,7 @@ if __name__ == "__main__":
+ else:
+ raise
+
+- if output is None or output == "-":
++ if output is None:
+ fd = sys.stdout.fileno()
+ else:
+ fd = os.open(output, os.O_WRONLY)
+--
+2.42.0
+
diff --git a/0021-tools-ocaml-libs-Fix-memory-resource-leaks-with-caml.patch b/0021-tools-ocaml-libs-Fix-memory-resource-leaks-with-caml.patch
deleted file mode 100644
index 8ed7dfa..0000000
--- a/0021-tools-ocaml-libs-Fix-memory-resource-leaks-with-caml.patch
+++ /dev/null
@@ -1,61 +0,0 @@
-From 1b6acdeeb2323c53d841356da50440e274e7bf9a Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Wed, 1 Feb 2023 11:27:42 +0000
-Subject: [PATCH 21/61] tools/ocaml/libs: Fix memory/resource leaks with
- caml_alloc_custom()
-
-All caml_alloc_*() functions can throw exceptions, and longjump out of
-context. If this happens, we leak the xch/xce handle.
-
-Reorder the logic to allocate the the Ocaml object first.
-
-Fixes: 8b3c06a3e545 ("tools/ocaml/xenctrl: OCaml 5 support, fix use-after-free")
-Fixes: 22d5affdf0ce ("tools/ocaml/evtchn: OCaml 5 support, fix potential resource leak")
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Acked-by: Christian Lindig <christian.lindig@citrix.com>
-(cherry picked from commit d69ccf52ad467ccc22029172a8e61dc621187889)
----
- tools/ocaml/libs/eventchn/xeneventchn_stubs.c | 6 ++++--
- 1 file changed, 4 insertions(+), 2 deletions(-)
-
-diff --git a/tools/ocaml/libs/eventchn/xeneventchn_stubs.c b/tools/ocaml/libs/eventchn/xeneventchn_stubs.c
-index d7881ca95f..de2fc29292 100644
---- a/tools/ocaml/libs/eventchn/xeneventchn_stubs.c
-+++ b/tools/ocaml/libs/eventchn/xeneventchn_stubs.c
-@@ -63,6 +63,8 @@ CAMLprim value stub_eventchn_init(value cloexec)
- if ( !Bool_val(cloexec) )
- flags |= XENEVTCHN_NO_CLOEXEC;
-
-+ result = caml_alloc_custom(&xenevtchn_ops, sizeof(xce), 0, 1);
-+
- caml_enter_blocking_section();
- xce = xenevtchn_open(NULL, flags);
- caml_leave_blocking_section();
-@@ -70,7 +72,6 @@ CAMLprim value stub_eventchn_init(value cloexec)
- if (xce == NULL)
- caml_failwith("open failed");
-
-- result = caml_alloc_custom(&xenevtchn_ops, sizeof(xce), 0, 1);
- *(xenevtchn_handle **)Data_custom_val(result) = xce;
-
- CAMLreturn(result);
-@@ -82,6 +83,8 @@ CAMLprim value stub_eventchn_fdopen(value fdval)
- CAMLlocal1(result);
- xenevtchn_handle *xce;
-
-+ result = caml_alloc_custom(&xenevtchn_ops, sizeof(xce), 0, 1);
-+
- caml_enter_blocking_section();
- xce = xenevtchn_fdopen(NULL, Int_val(fdval), 0);
- caml_leave_blocking_section();
-@@ -89,7 +92,6 @@ CAMLprim value stub_eventchn_fdopen(value fdval)
- if (xce == NULL)
- caml_failwith("evtchn fdopen failed");
-
-- result = caml_alloc_custom(&xenevtchn_ops, sizeof(xce), 0, 1);
- *(xenevtchn_handle **)Data_custom_val(result) = xce;
-
- CAMLreturn(result);
---
-2.40.0
-
diff --git a/0021-tools-pygrub-Open-the-output-files-earlier.patch b/0021-tools-pygrub-Open-the-output-files-earlier.patch
new file mode 100644
index 0000000..7eb13b8
--- /dev/null
+++ b/0021-tools-pygrub-Open-the-output-files-earlier.patch
@@ -0,0 +1,105 @@
+From d01f651da05b77714f0f172501993121b77039a7 Mon Sep 17 00:00:00 2001
+From: Alejandro Vallejo <alejandro.vallejo@cloud.com>
+Date: Mon, 25 Sep 2023 18:32:23 +0100
+Subject: [PATCH 21/27] tools/pygrub: Open the output files earlier
+
+This patch allows pygrub to get ahold of every RW file descriptor it needs
+early on. A later patch will clamp the filesystem it can access so it can't
+obtain any others.
+
+This is part of XSA-443 / CVE-2023-34325
+
+Signed-off-by: Alejandro Vallejo <alejandro.vallejo@cloud.com>
+(cherry picked from commit 0710d7d44586251bfca9758890616dc3d6de8a74)
+---
+ tools/pygrub/src/pygrub | 37 ++++++++++++++++++++++---------------
+ 1 file changed, 22 insertions(+), 15 deletions(-)
+
+diff --git a/tools/pygrub/src/pygrub b/tools/pygrub/src/pygrub
+index 1042c05b86..91e2ec2ab1 100755
+--- a/tools/pygrub/src/pygrub
++++ b/tools/pygrub/src/pygrub
+@@ -738,8 +738,7 @@ if __name__ == "__main__":
+ def usage():
+ print("Usage: %s [-q|--quiet] [-i|--interactive] [-l|--list-entries] [-n|--not-really] [--output=] [--kernel=] [--ramdisk=] [--args=] [--entry=] [--output-directory=] [--output-format=sxp|simple|simple0] [--offset=] <image>" %(sys.argv[0],), file=sys.stderr)
+
+- def copy_from_image(fs, file_to_read, file_type, output_directory,
+- not_really):
++ def copy_from_image(fs, file_to_read, file_type, fd_dst, path_dst, not_really):
+ if not_really:
+ if fs.file_exists(file_to_read):
+ return "<%s:%s>" % (file_type, file_to_read)
+@@ -750,21 +749,18 @@ if __name__ == "__main__":
+ except Exception as e:
+ print(e, file=sys.stderr)
+ sys.exit("Error opening %s in guest" % file_to_read)
+- (tfd, ret) = tempfile.mkstemp(prefix="boot_"+file_type+".",
+- dir=output_directory)
+ dataoff = 0
+ while True:
+ data = datafile.read(FS_READ_MAX, dataoff)
+ if len(data) == 0:
+- os.close(tfd)
++ os.close(fd_dst)
+ del datafile
+- return ret
++ return
+ try:
+- os.write(tfd, data)
++ os.write(fd_dst, data)
+ except Exception as e:
+ print(e, file=sys.stderr)
+- os.close(tfd)
+- os.unlink(ret)
++ os.unlink(path_dst)
+ del datafile
+ sys.exit("Error writing temporary copy of "+file_type)
+ dataoff += len(data)
+@@ -861,6 +857,14 @@ if __name__ == "__main__":
+ else:
+ raise
+
++ if not_really:
++ fd_kernel = path_kernel = fd_ramdisk = path_ramdisk = None
++ else:
++ (fd_kernel, path_kernel) = tempfile.mkstemp(prefix="boot_kernel.",
++ dir=output_directory)
++ (fd_ramdisk, path_ramdisk) = tempfile.mkstemp(prefix="boot_ramdisk.",
++ dir=output_directory)
++
+ if output is None:
+ fd = sys.stdout.fileno()
+ else:
+@@ -920,20 +924,23 @@ if __name__ == "__main__":
+ if fs is None:
+ raise RuntimeError("Unable to find partition containing kernel")
+
+- bootcfg["kernel"] = copy_from_image(fs, chosencfg["kernel"], "kernel",
+- output_directory, not_really)
++ copy_from_image(fs, chosencfg["kernel"], "kernel",
++ fd_kernel, path_kernel, not_really)
++ bootcfg["kernel"] = path_kernel
+
+ if chosencfg["ramdisk"]:
+ try:
+- bootcfg["ramdisk"] = copy_from_image(fs, chosencfg["ramdisk"],
+- "ramdisk", output_directory,
+- not_really)
++ copy_from_image(fs, chosencfg["ramdisk"], "ramdisk",
++ fd_ramdisk, path_ramdisk, not_really)
+ except:
+ if not not_really:
+- os.unlink(bootcfg["kernel"])
++ os.unlink(path_kernel)
+ raise
++ bootcfg["ramdisk"] = path_ramdisk
+ else:
+ initrd = None
++ if not not_really:
++ os.unlink(path_ramdisk)
+
+ args = None
+ if chosencfg["args"]:
+--
+2.42.0
+
diff --git a/0022-tools-libfsimage-Export-a-new-function-to-preload-al.patch b/0022-tools-libfsimage-Export-a-new-function-to-preload-al.patch
new file mode 100644
index 0000000..3128eef
--- /dev/null
+++ b/0022-tools-libfsimage-Export-a-new-function-to-preload-al.patch
@@ -0,0 +1,126 @@
+From c1159b5ed4ad7fadc5c650f749b072da9a78fb13 Mon Sep 17 00:00:00 2001
+From: Alejandro Vallejo <alejandro.vallejo@cloud.com>
+Date: Mon, 25 Sep 2023 18:32:24 +0100
+Subject: [PATCH 22/27] tools/libfsimage: Export a new function to preload all
+ plugins
+
+This is work required in order to let pygrub operate in highly deprivileged
+chroot mode. This patch adds a function that preloads every plugin, hence
+ensuring that a on function exit, every shared library is loaded in memory.
+
+The new "init" function is supposed to be used before depriv, but that's
+fine because it's not acting on untrusted data.
+
+This is part of XSA-443 / CVE-2023-34325
+
+Signed-off-by: Alejandro Vallejo <alejandro.vallejo@cloud.com>
+(cherry picked from commit 990e65c3ad9ac08642ce62a92852c80be6c83e96)
+---
+ tools/libfsimage/common/fsimage_plugin.c | 4 ++--
+ tools/libfsimage/common/mapfile-GNU | 1 +
+ tools/libfsimage/common/mapfile-SunOS | 1 +
+ tools/libfsimage/common/xenfsimage.h | 8 ++++++++
+ tools/pygrub/src/fsimage/fsimage.c | 15 +++++++++++++++
+ 5 files changed, 27 insertions(+), 2 deletions(-)
+
+diff --git a/tools/libfsimage/common/fsimage_plugin.c b/tools/libfsimage/common/fsimage_plugin.c
+index de1412b423..d0cb9e96a6 100644
+--- a/tools/libfsimage/common/fsimage_plugin.c
++++ b/tools/libfsimage/common/fsimage_plugin.c
+@@ -119,7 +119,7 @@ fail:
+ return (-1);
+ }
+
+-static int load_plugins(void)
++int fsi_init(void)
+ {
+ const char *fsdir = getenv("XEN_FSIMAGE_FSDIR");
+ struct dirent *dp = NULL;
+@@ -180,7 +180,7 @@ int find_plugin(fsi_t *fsi, const char *path, const char *options)
+ fsi_plugin_t *fp;
+ int ret = 0;
+
+- if (plugins == NULL && (ret = load_plugins()) != 0)
++ if (plugins == NULL && (ret = fsi_init()) != 0)
+ goto out;
+
+ for (fp = plugins; fp != NULL; fp = fp->fp_next) {
+diff --git a/tools/libfsimage/common/mapfile-GNU b/tools/libfsimage/common/mapfile-GNU
+index 26d4d7a69e..2d54d527d7 100644
+--- a/tools/libfsimage/common/mapfile-GNU
++++ b/tools/libfsimage/common/mapfile-GNU
+@@ -1,6 +1,7 @@
+ VERSION {
+ libfsimage.so.1.0 {
+ global:
++ fsi_init;
+ fsi_open_fsimage;
+ fsi_close_fsimage;
+ fsi_file_exists;
+diff --git a/tools/libfsimage/common/mapfile-SunOS b/tools/libfsimage/common/mapfile-SunOS
+index e99b90b650..48deedb425 100644
+--- a/tools/libfsimage/common/mapfile-SunOS
++++ b/tools/libfsimage/common/mapfile-SunOS
+@@ -1,5 +1,6 @@
+ libfsimage.so.1.0 {
+ global:
++ fsi_init;
+ fsi_open_fsimage;
+ fsi_close_fsimage;
+ fsi_file_exists;
+diff --git a/tools/libfsimage/common/xenfsimage.h b/tools/libfsimage/common/xenfsimage.h
+index 201abd54f2..341883b2d7 100644
+--- a/tools/libfsimage/common/xenfsimage.h
++++ b/tools/libfsimage/common/xenfsimage.h
+@@ -35,6 +35,14 @@ extern C {
+ typedef struct fsi fsi_t;
+ typedef struct fsi_file fsi_file_t;
+
++/*
++ * Optional initialization function. If invoked it loads the associated
++ * dynamic libraries for the backends ahead of time. This is required if
++ * the library is to run as part of a highly deprivileged executable, as
++ * the libraries may not be reachable after depriv.
++ */
++int fsi_init(void);
++
+ fsi_t *fsi_open_fsimage(const char *, uint64_t, const char *);
+ void fsi_close_fsimage(fsi_t *);
+
+diff --git a/tools/pygrub/src/fsimage/fsimage.c b/tools/pygrub/src/fsimage/fsimage.c
+index 2ebbbe35df..92fbf2851f 100644
+--- a/tools/pygrub/src/fsimage/fsimage.c
++++ b/tools/pygrub/src/fsimage/fsimage.c
+@@ -286,6 +286,15 @@ fsimage_getbootstring(PyObject *o, PyObject *args)
+ return Py_BuildValue("s", bootstring);
+ }
+
++static PyObject *
++fsimage_init(PyObject *o, PyObject *args)
++{
++ if (!PyArg_ParseTuple(args, ""))
++ return (NULL);
++
++ return Py_BuildValue("i", fsi_init());
++}
++
+ PyDoc_STRVAR(fsimage_open__doc__,
+ "open(name, [offset=off]) - Open the given file as a filesystem image.\n"
+ "\n"
+@@ -297,7 +306,13 @@ PyDoc_STRVAR(fsimage_getbootstring__doc__,
+ "getbootstring(fs) - Return the boot string needed for this file system "
+ "or NULL if none is needed.\n");
+
++PyDoc_STRVAR(fsimage_init__doc__,
++ "init() - Loads every dynamic library contained in xenfsimage "
++ "into memory so that it can be used in chrooted environments.\n");
++
+ static struct PyMethodDef fsimage_module_methods[] = {
++ { "init", (PyCFunction)fsimage_init,
++ METH_VARARGS, fsimage_init__doc__ },
+ { "open", (PyCFunction)fsimage_open,
+ METH_VARARGS|METH_KEYWORDS, fsimage_open__doc__ },
+ { "getbootstring", (PyCFunction)fsimage_getbootstring,
+--
+2.42.0
+
diff --git a/0022-x86-spec-ctrl-Mitigate-Cross-Thread-Return-Address-P.patch b/0022-x86-spec-ctrl-Mitigate-Cross-Thread-Return-Address-P.patch
deleted file mode 100644
index 1d1edb0..0000000
--- a/0022-x86-spec-ctrl-Mitigate-Cross-Thread-Return-Address-P.patch
+++ /dev/null
@@ -1,120 +0,0 @@
-From d4e286db89d80c862b4a24bf971dd71008c8b53e Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Thu, 8 Sep 2022 21:27:58 +0100
-Subject: [PATCH 22/61] x86/spec-ctrl: Mitigate Cross-Thread Return Address
- Predictions
-
-This is XSA-426 / CVE-2022-27672
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-(cherry picked from commit 63305e5392ec2d17b85e7996a97462744425db80)
----
- docs/misc/xen-command-line.pandoc | 2 +-
- xen/arch/x86/spec_ctrl.c | 31 ++++++++++++++++++++++++++++---
- xen/include/asm-x86/cpufeatures.h | 3 ++-
- xen/include/asm-x86/spec_ctrl.h | 15 +++++++++++++++
- 4 files changed, 46 insertions(+), 5 deletions(-)
-
-diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc
-index bd6826d0ae..b3f60cd923 100644
---- a/docs/misc/xen-command-line.pandoc
-+++ b/docs/misc/xen-command-line.pandoc
-@@ -2275,7 +2275,7 @@ guests to use.
- on entry and exit. These blocks are necessary to virtualise support for
- guests and if disabled, guests will be unable to use IBRS/STIBP/SSBD/etc.
- * `rsb=` offers control over whether to overwrite the Return Stack Buffer /
-- Return Address Stack on entry to Xen.
-+ Return Address Stack on entry to Xen and on idle.
- * `md-clear=` offers control over whether to use VERW to flush
- microarchitectural buffers on idle and exit from Xen. *Note: For
- compatibility with development versions of this fix, `mds=` is also accepted
-diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
-index 90d86fe5cb..14649d92f5 100644
---- a/xen/arch/x86/spec_ctrl.c
-+++ b/xen/arch/x86/spec_ctrl.c
-@@ -1317,13 +1317,38 @@ void __init init_speculation_mitigations(void)
- * 3) Some CPUs have RSBs which are not full width, which allow the
- * attacker's entries to alias Xen addresses.
- *
-+ * 4) Some CPUs have RSBs which are re-partitioned based on thread
-+ * idleness, which allows an attacker to inject entries into the other
-+ * thread. We still active the optimisation in this case, and mitigate
-+ * in the idle path which has lower overhead.
-+ *
- * It is safe to turn off RSB stuffing when Xen is using SMEP itself, and
- * 32bit PV guests are disabled, and when the RSB is full width.
- */
- BUILD_BUG_ON(RO_MPT_VIRT_START != PML4_ADDR(256));
-- if ( opt_rsb_pv == -1 && boot_cpu_has(X86_FEATURE_XEN_SMEP) &&
-- !opt_pv32 && rsb_is_full_width() )
-- opt_rsb_pv = 0;
-+ if ( opt_rsb_pv == -1 )
-+ {
-+ opt_rsb_pv = (opt_pv32 || !boot_cpu_has(X86_FEATURE_XEN_SMEP) ||
-+ !rsb_is_full_width());
-+
-+ /*
-+ * Cross-Thread Return Address Predictions.
-+ *
-+ * Vulnerable systems are Zen1/Zen2 uarch, which is AMD Fam17 / Hygon
-+ * Fam18, when SMT is active.
-+ *
-+ * To mitigate, we must flush the RSB/RAS/RAP once between entering
-+ * Xen and going idle.
-+ *
-+ * Most cases flush on entry to Xen anyway. The one case where we
-+ * don't is when using the SMEP optimisation for PV guests. Flushing
-+ * before going idle is less overhead than flushing on PV entry.
-+ */
-+ if ( !opt_rsb_pv && hw_smt_enabled &&
-+ (boot_cpu_data.x86_vendor & (X86_VENDOR_AMD|X86_VENDOR_HYGON)) &&
-+ (boot_cpu_data.x86 == 0x17 || boot_cpu_data.x86 == 0x18) )
-+ setup_force_cpu_cap(X86_FEATURE_SC_RSB_IDLE);
-+ }
-
- if ( opt_rsb_pv )
- {
-diff --git a/xen/include/asm-x86/cpufeatures.h b/xen/include/asm-x86/cpufeatures.h
-index ecc1bb0950..ccf9d7287c 100644
---- a/xen/include/asm-x86/cpufeatures.h
-+++ b/xen/include/asm-x86/cpufeatures.h
-@@ -35,7 +35,8 @@ XEN_CPUFEATURE(SC_RSB_HVM, X86_SYNTH(19)) /* RSB overwrite needed for HVM
- XEN_CPUFEATURE(XEN_SELFSNOOP, X86_SYNTH(20)) /* SELFSNOOP gets used by Xen itself */
- XEN_CPUFEATURE(SC_MSR_IDLE, X86_SYNTH(21)) /* Clear MSR_SPEC_CTRL on idle */
- XEN_CPUFEATURE(XEN_LBR, X86_SYNTH(22)) /* Xen uses MSR_DEBUGCTL.LBR */
--/* Bits 23,24 unused. */
-+/* Bits 23 unused. */
-+XEN_CPUFEATURE(SC_RSB_IDLE, X86_SYNTH(24)) /* RSB overwrite needed for idle. */
- XEN_CPUFEATURE(SC_VERW_IDLE, X86_SYNTH(25)) /* VERW used by Xen for idle */
- XEN_CPUFEATURE(XEN_SHSTK, X86_SYNTH(26)) /* Xen uses CET Shadow Stacks */
- XEN_CPUFEATURE(XEN_IBT, X86_SYNTH(27)) /* Xen uses CET Indirect Branch Tracking */
-diff --git a/xen/include/asm-x86/spec_ctrl.h b/xen/include/asm-x86/spec_ctrl.h
-index 6a77c39378..391973ef6a 100644
---- a/xen/include/asm-x86/spec_ctrl.h
-+++ b/xen/include/asm-x86/spec_ctrl.h
-@@ -159,6 +159,21 @@ static always_inline void spec_ctrl_enter_idle(struct cpu_info *info)
- */
- alternative_input("", "verw %[sel]", X86_FEATURE_SC_VERW_IDLE,
- [sel] "m" (info->verw_sel));
-+
-+ /*
-+ * Cross-Thread Return Address Predictions:
-+ *
-+ * On vulnerable systems, the return predictions (RSB/RAS) are statically
-+ * partitioned between active threads. When entering idle, our entries
-+ * are re-partitioned to allow the other threads to use them.
-+ *
-+ * In some cases, we might still have guest entries in the RAS, so flush
-+ * them before injecting them sideways to our sibling thread.
-+ *
-+ * (ab)use alternative_input() to specify clobbers.
-+ */
-+ alternative_input("", "DO_OVERWRITE_RSB", X86_FEATURE_SC_RSB_IDLE,
-+ : "rax", "rcx");
- }
-
- /* WARNING! `ret`, `call *`, `jmp *` not safe before this call. */
---
-2.40.0
-
diff --git a/0023-automation-Remove-clang-8-from-Debian-unstable-conta.patch b/0023-automation-Remove-clang-8-from-Debian-unstable-conta.patch
deleted file mode 100644
index 36dfb4f..0000000
--- a/0023-automation-Remove-clang-8-from-Debian-unstable-conta.patch
+++ /dev/null
@@ -1,84 +0,0 @@
-From 0802504627453a54b1ab408b6e9dc8b5c561172d Mon Sep 17 00:00:00 2001
-From: Anthony PERARD <anthony.perard@citrix.com>
-Date: Tue, 21 Feb 2023 16:55:38 +0000
-Subject: [PATCH 23/61] automation: Remove clang-8 from Debian unstable
- container
-
-First, apt complain that it isn't the right way to add keys anymore,
-but hopefully that's just a warning.
-
-Second, we can't install clang-8:
-The following packages have unmet dependencies:
- clang-8 : Depends: libstdc++-8-dev but it is not installable
- Depends: libgcc-8-dev but it is not installable
- Depends: libobjc-8-dev but it is not installable
- Recommends: llvm-8-dev but it is not going to be installed
- Recommends: libomp-8-dev but it is not going to be installed
- libllvm8 : Depends: libffi7 (>= 3.3~20180313) but it is not installable
-E: Unable to correct problems, you have held broken packages.
-
-clang on Debian unstable is now version 14.0.6.
-
-Signed-off-by: Anthony PERARD <anthony.perard@citrix.com>
-Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
-(cherry picked from commit a6b1e2b80fe2053b1c9c9843fb086a668513ea36)
----
- automation/build/debian/unstable-llvm-8.list | 3 ---
- automation/build/debian/unstable.dockerfile | 12 ------------
- automation/gitlab-ci/build.yaml | 10 ----------
- 3 files changed, 25 deletions(-)
- delete mode 100644 automation/build/debian/unstable-llvm-8.list
-
-diff --git a/automation/build/debian/unstable-llvm-8.list b/automation/build/debian/unstable-llvm-8.list
-deleted file mode 100644
-index dc119fa0b4..0000000000
---- a/automation/build/debian/unstable-llvm-8.list
-+++ /dev/null
-@@ -1,3 +0,0 @@
--# Unstable LLVM 8 repos
--deb http://apt.llvm.org/unstable/ llvm-toolchain-8 main
--deb-src http://apt.llvm.org/unstable/ llvm-toolchain-8 main
-diff --git a/automation/build/debian/unstable.dockerfile b/automation/build/debian/unstable.dockerfile
-index bd61cd12c2..828afa2e1e 100644
---- a/automation/build/debian/unstable.dockerfile
-+++ b/automation/build/debian/unstable.dockerfile
-@@ -52,15 +52,3 @@ RUN apt-get update && \
- apt-get autoremove -y && \
- apt-get clean && \
- rm -rf /var/lib/apt/lists* /tmp/* /var/tmp/*
--
--RUN wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key|apt-key add -
--COPY unstable-llvm-8.list /etc/apt/sources.list.d/
--
--RUN apt-get update && \
-- apt-get --quiet --yes install \
-- clang-8 \
-- lld-8 \
-- && \
-- apt-get autoremove -y && \
-- apt-get clean && \
-- rm -rf /var/lib/apt/lists* /tmp/* /var/tmp/*
-diff --git a/automation/gitlab-ci/build.yaml b/automation/gitlab-ci/build.yaml
-index fdd5c76582..06a75a8c5a 100644
---- a/automation/gitlab-ci/build.yaml
-+++ b/automation/gitlab-ci/build.yaml
-@@ -304,16 +304,6 @@ debian-unstable-clang-debug:
- variables:
- CONTAINER: debian:unstable
-
--debian-unstable-clang-8:
-- extends: .clang-8-x86-64-build
-- variables:
-- CONTAINER: debian:unstable
--
--debian-unstable-clang-8-debug:
-- extends: .clang-8-x86-64-build-debug
-- variables:
-- CONTAINER: debian:unstable
--
- debian-unstable-gcc:
- extends: .gcc-x86-64-build
- variables:
---
-2.40.0
-
diff --git a/0023-tools-pygrub-Deprivilege-pygrub.patch b/0023-tools-pygrub-Deprivilege-pygrub.patch
new file mode 100644
index 0000000..8885b99
--- /dev/null
+++ b/0023-tools-pygrub-Deprivilege-pygrub.patch
@@ -0,0 +1,307 @@
+From 1395852e1bc352bf727d18ebe33426e279cdc967 Mon Sep 17 00:00:00 2001
+From: Alejandro Vallejo <alejandro.vallejo@cloud.com>
+Date: Mon, 25 Sep 2023 18:32:25 +0100
+Subject: [PATCH 23/27] tools/pygrub: Deprivilege pygrub
+
+Introduce a --runas=<uid> flag to deprivilege pygrub on Linux and *BSDs. It
+also implicitly creates a chroot env where it drops a deprivileged forked
+process. The chroot itself is cleaned up at the end.
+
+If the --runas arg is present, then pygrub forks, leaving the child to
+deprivilege itself, and waiting for it to complete. When the child exists,
+the parent performs cleanup and exits with the same error code.
+
+This is roughly what the child does:
+ 1. Initialize libfsimage (this loads every .so in memory so the chroot
+ can avoid bind-mounting /{,usr}/lib*
+ 2. Create a temporary empty chroot directory
+ 3. Mount tmpfs in it
+ 4. Bind mount the disk inside, because libfsimage expects a path, not a
+ file descriptor.
+ 5. Remount the root tmpfs to be stricter (ro,nosuid,nodev)
+ 6. Set RLIMIT_FSIZE to a sensibly high amount (128 MiB)
+ 7. Depriv gid, groups and uid
+
+With this scheme in place, the "output" files are writable (up to
+RLIMIT_FSIZE octets) and the exposed filesystem is immutable and contains
+the single only file we can't easily get rid of (the disk).
+
+If running on Linux, the child process also unshares mount, IPC, and
+network namespaces before dropping its privileges.
+
+This is part of XSA-443 / CVE-2023-34325
+
+Signed-off-by: Alejandro Vallejo <alejandro.vallejo@cloud.com>
+(cherry picked from commit e0342ae5556f2b6e2db50701b8a0679a45822ca6)
+---
+ tools/pygrub/setup.py | 2 +-
+ tools/pygrub/src/pygrub | 162 +++++++++++++++++++++++++++++++++++++---
+ 2 files changed, 154 insertions(+), 10 deletions(-)
+
+diff --git a/tools/pygrub/setup.py b/tools/pygrub/setup.py
+index b8f1dc4590..f16187b6d1 100644
+--- a/tools/pygrub/setup.py
++++ b/tools/pygrub/setup.py
+@@ -17,7 +17,7 @@ xenfsimage = Extension("xenfsimage",
+ pkgs = [ 'grub' ]
+
+ setup(name='pygrub',
+- version='0.6',
++ version='0.7',
+ description='Boot loader that looks a lot like grub for Xen',
+ author='Jeremy Katz',
+ author_email='katzj@redhat.com',
+diff --git a/tools/pygrub/src/pygrub b/tools/pygrub/src/pygrub
+index 91e2ec2ab1..7cea496ade 100755
+--- a/tools/pygrub/src/pygrub
++++ b/tools/pygrub/src/pygrub
+@@ -16,8 +16,11 @@ from __future__ import print_function
+
+ import os, sys, string, struct, tempfile, re, traceback, stat, errno
+ import copy
++import ctypes, ctypes.util
+ import logging
+ import platform
++import resource
++import subprocess
+
+ import curses, _curses, curses.textpad, curses.ascii
+ import getopt
+@@ -27,10 +30,135 @@ import grub.GrubConf
+ import grub.LiloConf
+ import grub.ExtLinuxConf
+
+-PYGRUB_VER = 0.6
++PYGRUB_VER = 0.7
+ FS_READ_MAX = 1024 * 1024
+ SECTOR_SIZE = 512
+
++# Unless provided through the env variable PYGRUB_MAX_FILE_SIZE_MB, then
++# this is the maximum filesize allowed for files written by the depriv
++# pygrub
++LIMIT_FSIZE = 128 << 20
++
++CLONE_NEWNS = 0x00020000 # mount namespace
++CLONE_NEWNET = 0x40000000 # network namespace
++CLONE_NEWIPC = 0x08000000 # IPC namespace
++
++def unshare(flags):
++ if not sys.platform.startswith("linux"):
++ print("skip_unshare reason=not_linux platform=%s", sys.platform, file=sys.stderr)
++ return
++
++ libc = ctypes.CDLL(ctypes.util.find_library('c'), use_errno=True)
++ unshare_prototype = ctypes.CFUNCTYPE(ctypes.c_int, ctypes.c_int, use_errno=True)
++ unshare = unshare_prototype(('unshare', libc))
++
++ if unshare(flags) < 0:
++ raise OSError(ctypes.get_errno(), os.strerror(ctypes.get_errno()))
++
++def bind_mount(src, dst, options):
++ open(dst, "a").close() # touch
++
++ rc = subprocess.call(["mount", "--bind", "-o", options, src, dst])
++ if rc != 0:
++ raise RuntimeError("bad_mount: src=%s dst=%s opts=%s" %
++ (src, dst, options))
++
++def downgrade_rlimits():
++ # Wipe the authority to use unrequired resources
++ resource.setrlimit(resource.RLIMIT_NPROC, (0, 0))
++ resource.setrlimit(resource.RLIMIT_CORE, (0, 0))
++ resource.setrlimit(resource.RLIMIT_MEMLOCK, (0, 0))
++
++ # py2's resource module doesn't know about resource.RLIMIT_MSGQUEUE
++ #
++ # TODO: Use resource.RLIMIT_MSGQUEUE after python2 is deprecated
++ if sys.platform.startswith('linux'):
++ RLIMIT_MSGQUEUE = 12
++ resource.setrlimit(RLIMIT_MSGQUEUE, (0, 0))
++
++ # The final look of the filesystem for this process is fully RO, but
++ # note we have some file descriptor already open (notably, kernel and
++ # ramdisk). In order to avoid a compromised pygrub from filling up the
++ # filesystem we set RLIMIT_FSIZE to a high bound, so that the file
++ # write permissions are bound.
++ fsize = LIMIT_FSIZE
++ if "PYGRUB_MAX_FILE_SIZE_MB" in os.environ.keys():
++ fsize = os.environ["PYGRUB_MAX_FILE_SIZE_MB"] << 20
++
++ resource.setrlimit(resource.RLIMIT_FSIZE, (fsize, fsize))
++
++def depriv(output_directory, output, device, uid, path_kernel, path_ramdisk):
++ # The only point of this call is to force the loading of libfsimage.
++ # That way, we don't need to bind-mount it into the chroot
++ rc = xenfsimage.init()
++ if rc != 0:
++ os.unlink(path_ramdisk)
++ os.unlink(path_kernel)
++ raise RuntimeError("bad_xenfsimage: rc=%d" % rc)
++
++ # Create a temporary directory for the chroot
++ chroot = tempfile.mkdtemp(prefix=str(uid)+'-', dir=output_directory) + '/'
++ device_path = '/device'
++
++ pid = os.fork()
++ if pid:
++ # parent
++ _, rc = os.waitpid(pid, 0)
++
++ for path in [path_kernel, path_ramdisk]:
++ # If the child didn't write anything, just get rid of it,
++ # otherwise we end up consuming a 0-size file when parsing
++ # systems without a ramdisk that the ultimate caller of pygrub
++ # may just be unaware of
++ if rc != 0 or os.path.getsize(path) == 0:
++ os.unlink(path)
++
++ # Normally, unshare(CLONE_NEWNS) will ensure this is not required.
++ # However, this syscall doesn't exist in *BSD systems and doesn't
++ # auto-unmount everything on older Linux kernels (At least as of
++ # Linux 4.19, but it seems fixed in 5.15). Either way,
++ # recursively unmount everything if needed. Quietly.
++ with open('/dev/null', 'w') as devnull:
++ subprocess.call(["umount", "-f", chroot + device_path],
++ stdout=devnull, stderr=devnull)
++ subprocess.call(["umount", "-f", chroot],
++ stdout=devnull, stderr=devnull)
++ os.rmdir(chroot)
++
++ sys.exit(rc)
++
++ # By unsharing the namespace we're making sure it's all bulk-released
++ # at the end, when the namespaces disappear. This means the kernel does
++ # (almost) all the cleanup for us and the parent just has to remove the
++ # temporary directory.
++ unshare(CLONE_NEWNS | CLONE_NEWIPC | CLONE_NEWNET)
++
++ # Set sensible limits using the setrlimit interface
++ downgrade_rlimits()
++
++ # We'll mount tmpfs on the chroot to ensure the deprivileged child
++ # cannot affect the persistent state. It's RW now in order to
++ # bind-mount the device, but note it's remounted RO after that.
++ rc = subprocess.call(["mount", "-t", "tmpfs", "none", chroot])
++ if rc != 0:
++ raise RuntimeError("mount_tmpfs rc=%d dst=\"%s\"" % (rc, chroot))
++
++ # Bind the untrusted device RO
++ bind_mount(device, chroot + device_path, "ro,nosuid,noexec")
++
++ rc = subprocess.call(["mount", "-t", "tmpfs", "-o", "remount,ro,nosuid,noexec,nodev", "none", chroot])
++ if rc != 0:
++ raise RuntimeError("remount_tmpfs rc=%d dst=\"%s\"" % (rc, chroot))
++
++ # Drop superpowers!
++ os.chroot(chroot)
++ os.chdir('/')
++ os.setgid(uid)
++ os.setgroups([uid])
++ os.setuid(uid)
++
++ return device_path
++
+ def read_size_roundup(fd, size):
+ if platform.system() != 'FreeBSD':
+ return size
+@@ -736,7 +864,7 @@ if __name__ == "__main__":
+ sel = None
+
+ def usage():
+- print("Usage: %s [-q|--quiet] [-i|--interactive] [-l|--list-entries] [-n|--not-really] [--output=] [--kernel=] [--ramdisk=] [--args=] [--entry=] [--output-directory=] [--output-format=sxp|simple|simple0] [--offset=] <image>" %(sys.argv[0],), file=sys.stderr)
++ print("Usage: %s [-q|--quiet] [-i|--interactive] [-l|--list-entries] [-n|--not-really] [--output=] [--kernel=] [--ramdisk=] [--args=] [--entry=] [--output-directory=] [--output-format=sxp|simple|simple0] [--runas=] [--offset=] <image>" %(sys.argv[0],), file=sys.stderr)
+
+ def copy_from_image(fs, file_to_read, file_type, fd_dst, path_dst, not_really):
+ if not_really:
+@@ -760,7 +888,8 @@ if __name__ == "__main__":
+ os.write(fd_dst, data)
+ except Exception as e:
+ print(e, file=sys.stderr)
+- os.unlink(path_dst)
++ if path_dst:
++ os.unlink(path_dst)
+ del datafile
+ sys.exit("Error writing temporary copy of "+file_type)
+ dataoff += len(data)
+@@ -769,7 +898,7 @@ if __name__ == "__main__":
+ opts, args = getopt.gnu_getopt(sys.argv[1:], 'qilnh::',
+ ["quiet", "interactive", "list-entries", "not-really", "help",
+ "output=", "output-format=", "output-directory=", "offset=",
+- "entry=", "kernel=",
++ "runas=", "entry=", "kernel=",
+ "ramdisk=", "args=", "isconfig", "debug"])
+ except getopt.GetoptError:
+ usage()
+@@ -790,6 +919,7 @@ if __name__ == "__main__":
+ not_really = False
+ output_format = "sxp"
+ output_directory = "/var/run/xen/pygrub/"
++ uid = None
+
+ # what was passed in
+ incfg = { "kernel": None, "ramdisk": None, "args": "" }
+@@ -813,6 +943,13 @@ if __name__ == "__main__":
+ elif o in ("--output",):
+ if a != "-":
+ output = a
++ elif o in ("--runas",):
++ try:
++ uid = int(a)
++ except ValueError:
++ print("runas value must be an integer user id")
++ usage()
++ sys.exit(1)
+ elif o in ("--kernel",):
+ incfg["kernel"] = a
+ elif o in ("--ramdisk",):
+@@ -849,6 +986,10 @@ if __name__ == "__main__":
+ if debug:
+ logging.basicConfig(level=logging.DEBUG)
+
++ if interactive and uid:
++ print("In order to use --runas, you must also set --entry or -q", file=sys.stderr)
++ sys.exit(1)
++
+ try:
+ os.makedirs(output_directory, 0o700)
+ except OSError as e:
+@@ -870,6 +1011,9 @@ if __name__ == "__main__":
+ else:
+ fd = os.open(output, os.O_WRONLY)
+
++ if uid:
++ file = depriv(output_directory, output, file, uid, path_kernel, path_ramdisk)
++
+ # debug
+ if isconfig:
+ chosencfg = run_grub(file, entry, fs, incfg["args"])
+@@ -925,21 +1069,21 @@ if __name__ == "__main__":
+ raise RuntimeError("Unable to find partition containing kernel")
+
+ copy_from_image(fs, chosencfg["kernel"], "kernel",
+- fd_kernel, path_kernel, not_really)
++ fd_kernel, None if uid else path_kernel, not_really)
+ bootcfg["kernel"] = path_kernel
+
+ if chosencfg["ramdisk"]:
+ try:
+ copy_from_image(fs, chosencfg["ramdisk"], "ramdisk",
+- fd_ramdisk, path_ramdisk, not_really)
++ fd_ramdisk, None if uid else path_ramdisk, not_really)
+ except:
+- if not not_really:
+- os.unlink(path_kernel)
++ if not uid and not not_really:
++ os.unlink(path_kernel)
+ raise
+ bootcfg["ramdisk"] = path_ramdisk
+ else:
+ initrd = None
+- if not not_really:
++ if not uid and not not_really:
+ os.unlink(path_ramdisk)
+
+ args = None
+--
+2.42.0
+
diff --git a/0024-libs-util-Fix-parallel-build-between-flex-bison-and-.patch b/0024-libs-util-Fix-parallel-build-between-flex-bison-and-.patch
deleted file mode 100644
index 6164878..0000000
--- a/0024-libs-util-Fix-parallel-build-between-flex-bison-and-.patch
+++ /dev/null
@@ -1,50 +0,0 @@
-From e4b5dff3d06421847761669a3676bef1f23e705a Mon Sep 17 00:00:00 2001
-From: Anthony PERARD <anthony.perard@citrix.com>
-Date: Fri, 3 Mar 2023 08:06:23 +0100
-Subject: [PATCH 24/61] libs/util: Fix parallel build between flex/bison and CC
- rules
-
-flex/bison generate two targets, and when those targets are
-prerequisite of other rules they are considered independently by make.
-
-We can have a situation where the .c file is out-of-date but not the
-.h, git checkout for example. In this case, if a rule only have the .h
-file as prerequiste, make will procced and start to build the object.
-In parallel, another target can have the .c file as prerequisite and
-make will find out it need re-generating and do so, changing the .h at
-the same time. This parallel task breaks the first one.
-
-To avoid this scenario, we put both the header and the source as
-prerequisite for all object even if they only need the header.
-
-Reported-by: Andrew Cooper <Andrew.Cooper3@citrix.com>
-Signed-off-by: Anthony PERARD <anthony.perard@citrix.com>
-Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
-master commit: bf652a50fb3bb3b1b3d93db6fb79bc28f978fe75
-master date: 2023-02-09 18:26:17 +0000
----
- tools/libs/util/Makefile | 8 ++++++++
- 1 file changed, 8 insertions(+)
-
-diff --git a/tools/libs/util/Makefile b/tools/libs/util/Makefile
-index b739360be7..977849c056 100644
---- a/tools/libs/util/Makefile
-+++ b/tools/libs/util/Makefile
-@@ -41,6 +41,14 @@ include $(XEN_ROOT)/tools/libs/libs.mk
-
- $(LIB_OBJS) $(PIC_OBJS): $(AUTOINCS) _paths.h
-
-+# Adding the .c conterparts of the headers generated by flex/bison as
-+# prerequisite of all objects.
-+# This is to tell make that if only the .c file is out-of-date but not the
-+# header, it should still wait for the .c file to be rebuilt.
-+# Otherwise, make doesn't considered "%.c %.h" as grouped targets, and will run
-+# the flex/bison rules in parallel of CC rules which only need the header.
-+$(LIB_OBJS) $(PIC_OBJS): libxlu_cfg_l.c libxlu_cfg_y.c libxlu_disk_l.c
-+
- %.c %.h:: %.y
- @rm -f $*.[ch]
- $(BISON) --output=$*.c $<
---
-2.40.0
-
diff --git a/0024-libxl-add-support-for-running-bootloader-in-restrict.patch b/0024-libxl-add-support-for-running-bootloader-in-restrict.patch
new file mode 100644
index 0000000..89d7299
--- /dev/null
+++ b/0024-libxl-add-support-for-running-bootloader-in-restrict.patch
@@ -0,0 +1,251 @@
+From 5182683fffa6b1d4c940203bbb85bb054558c137 Mon Sep 17 00:00:00 2001
+From: Roger Pau Monne <roger.pau@citrix.com>
+Date: Mon, 25 Sep 2023 14:30:20 +0200
+Subject: [PATCH 24/27] libxl: add support for running bootloader in restricted
+ mode
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Much like the device model depriv mode, add the same kind of support for the
+bootloader. Such feature allows passing a UID as a parameter for the
+bootloader to run as, together with the bootloader itself taking the necessary
+actions to isolate.
+
+Note that the user to run the bootloader as must have the right permissions to
+access the guest disk image (in read mode only), and that the bootloader will
+be run in non-interactive mode when restricted.
+
+If enabled bootloader restrict mode will attempt to re-use the user(s) from the
+QEMU depriv implementation if no user is provided on the configuration file or
+the environment. See docs/features/qemu-deprivilege.pandoc for more
+information about how to setup those users.
+
+Bootloader restrict mode is not enabled by default as it requires certain
+setup to be done first (setup of the user(s) to use in restrict mode).
+
+This is part of XSA-443 / CVE-2023-34325
+
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Reviewed-by: Anthony PERARD <anthony.perard@citrix.com>
+(cherry picked from commit 1f762642d2cad1a40634e3280361928109d902f1)
+---
+ docs/man/xl.1.pod.in | 33 +++++++++++
+ tools/libs/light/libxl_bootloader.c | 89 ++++++++++++++++++++++++++++-
+ tools/libs/light/libxl_dm.c | 8 +--
+ tools/libs/light/libxl_internal.h | 8 +++
+ 4 files changed, 131 insertions(+), 7 deletions(-)
+
+diff --git a/docs/man/xl.1.pod.in b/docs/man/xl.1.pod.in
+index 45e1430aeb..96e6fb1c32 100644
+--- a/docs/man/xl.1.pod.in
++++ b/docs/man/xl.1.pod.in
+@@ -1976,6 +1976,39 @@ ignored:
+
+ =back
+
++=head1 ENVIRONMENT VARIABLES
++
++The following environment variables shall affect the execution of xl:
++
++=over 4
++
++=item LIBXL_BOOTLOADER_RESTRICT
++
++Attempt to restrict the bootloader after startup, to limit the
++consequences of security vulnerabilities due to parsing guest
++owned image files.
++
++See docs/features/qemu-deprivilege.pandoc for more information
++on how to setup the unprivileged users.
++
++Note that running the bootloader in restricted mode also implies using
++non-interactive mode, and the disk image must be readable by the
++restricted user.
++
++Having this variable set is equivalent to enabling the option, even if the
++value is 0.
++
++=item LIBXL_BOOTLOADER_USER
++
++When using bootloader_restrict, run the bootloader as this user. If
++not set the default QEMU restrict users will be used.
++
++NOTE: Each domain MUST have a SEPARATE username.
++
++See docs/features/qemu-deprivilege.pandoc for more information.
++
++=back
++
+ =head1 SEE ALSO
+
+ The following man pages:
+diff --git a/tools/libs/light/libxl_bootloader.c b/tools/libs/light/libxl_bootloader.c
+index 1bc6e51827..d3a8a4a9ba 100644
+--- a/tools/libs/light/libxl_bootloader.c
++++ b/tools/libs/light/libxl_bootloader.c
+@@ -14,6 +14,7 @@
+
+ #include "libxl_osdeps.h" /* must come before any other headers */
+
++#include <pwd.h>
+ #include <termios.h>
+ #ifdef HAVE_UTMP_H
+ #include <utmp.h>
+@@ -42,8 +43,71 @@ static void bootloader_arg(libxl__bootloader_state *bl, const char *arg)
+ bl->args[bl->nargs++] = arg;
+ }
+
+-static void make_bootloader_args(libxl__gc *gc, libxl__bootloader_state *bl,
+- const char *bootloader_path)
++static int bootloader_uid(libxl__gc *gc, domid_t guest_domid,
++ const char *user, uid_t *intended_uid)
++{
++ struct passwd *user_base, user_pwbuf;
++ int rc;
++
++ if (user) {
++ rc = userlookup_helper_getpwnam(gc, user, &user_pwbuf, &user_base);
++ if (rc) return rc;
++
++ if (!user_base) {
++ LOGD(ERROR, guest_domid, "Couldn't find user %s", user);
++ return ERROR_INVAL;
++ }
++
++ *intended_uid = user_base->pw_uid;
++ return 0;
++ }
++
++ /* Re-use QEMU user range for the bootloader. */
++ rc = userlookup_helper_getpwnam(gc, LIBXL_QEMU_USER_RANGE_BASE,
++ &user_pwbuf, &user_base);
++ if (rc) return rc;
++
++ if (user_base) {
++ struct passwd *user_clash, user_clash_pwbuf;
++ uid_t temp_uid = user_base->pw_uid + guest_domid;
++
++ rc = userlookup_helper_getpwuid(gc, temp_uid, &user_clash_pwbuf,
++ &user_clash);
++ if (rc) return rc;
++
++ if (user_clash) {
++ LOGD(ERROR, guest_domid,
++ "wanted to use uid %ld (%s + %d) but that is user %s !",
++ (long)temp_uid, LIBXL_QEMU_USER_RANGE_BASE,
++ guest_domid, user_clash->pw_name);
++ return ERROR_INVAL;
++ }
++
++ *intended_uid = temp_uid;
++ return 0;
++ }
++
++ rc = userlookup_helper_getpwnam(gc, LIBXL_QEMU_USER_SHARED, &user_pwbuf,
++ &user_base);
++ if (rc) return rc;
++
++ if (user_base) {
++ LOGD(WARN, guest_domid, "Could not find user %s, falling back to %s",
++ LIBXL_QEMU_USER_RANGE_BASE, LIBXL_QEMU_USER_SHARED);
++ *intended_uid = user_base->pw_uid;
++
++ return 0;
++ }
++
++ LOGD(ERROR, guest_domid,
++ "Could not find user %s or range base pseudo-user %s, cannot restrict",
++ LIBXL_QEMU_USER_SHARED, LIBXL_QEMU_USER_RANGE_BASE);
++
++ return ERROR_INVAL;
++}
++
++static int make_bootloader_args(libxl__gc *gc, libxl__bootloader_state *bl,
++ const char *bootloader_path)
+ {
+ const libxl_domain_build_info *info = bl->info;
+
+@@ -61,6 +125,23 @@ static void make_bootloader_args(libxl__gc *gc, libxl__bootloader_state *bl,
+ ARG(GCSPRINTF("--ramdisk=%s", info->ramdisk));
+ if (info->cmdline && *info->cmdline != '\0')
+ ARG(GCSPRINTF("--args=%s", info->cmdline));
++ if (getenv("LIBXL_BOOTLOADER_RESTRICT") ||
++ getenv("LIBXL_BOOTLOADER_USER")) {
++ uid_t uid = -1;
++ int rc = bootloader_uid(gc, bl->domid, getenv("LIBXL_BOOTLOADER_USER"),
++ &uid);
++
++ if (rc) return rc;
++
++ assert(uid != -1);
++ if (!uid) {
++ LOGD(ERROR, bl->domid, "bootloader restrict UID is 0 (root)!");
++ return ERROR_INVAL;
++ }
++ LOGD(DEBUG, bl->domid, "using uid %ld", (long)uid);
++ ARG(GCSPRINTF("--runas=%ld", (long)uid));
++ ARG("--quiet");
++ }
+
+ ARG(GCSPRINTF("--output=%s", bl->outputpath));
+ ARG("--output-format=simple0");
+@@ -79,6 +160,7 @@ static void make_bootloader_args(libxl__gc *gc, libxl__bootloader_state *bl,
+ /* Sentinel for execv */
+ ARG(NULL);
+
++ return 0;
+ #undef ARG
+ }
+
+@@ -443,7 +525,8 @@ static void bootloader_disk_attached_cb(libxl__egc *egc,
+ bootloader = bltmp;
+ }
+
+- make_bootloader_args(gc, bl, bootloader);
++ rc = make_bootloader_args(gc, bl, bootloader);
++ if (rc) goto out;
+
+ bl->openpty.ao = ao;
+ bl->openpty.callback = bootloader_gotptys;
+diff --git a/tools/libs/light/libxl_dm.c b/tools/libs/light/libxl_dm.c
+index fc264a3a13..14b593110f 100644
+--- a/tools/libs/light/libxl_dm.c
++++ b/tools/libs/light/libxl_dm.c
+@@ -80,10 +80,10 @@ static int libxl__create_qemu_logfile(libxl__gc *gc, char *name)
+ * On error, return a libxl-style error code.
+ */
+ #define DEFINE_USERLOOKUP_HELPER(NAME,SPEC_TYPE,STRUCTNAME,SYSCONF) \
+- static int userlookup_helper_##NAME(libxl__gc *gc, \
+- SPEC_TYPE spec, \
+- struct STRUCTNAME *resultbuf, \
+- struct STRUCTNAME **out) \
++ int userlookup_helper_##NAME(libxl__gc *gc, \
++ SPEC_TYPE spec, \
++ struct STRUCTNAME *resultbuf, \
++ struct STRUCTNAME **out) \
+ { \
+ struct STRUCTNAME *resultp = NULL; \
+ char *buf = NULL; \
+diff --git a/tools/libs/light/libxl_internal.h b/tools/libs/light/libxl_internal.h
+index cc27c72ecf..8415d1feed 100644
+--- a/tools/libs/light/libxl_internal.h
++++ b/tools/libs/light/libxl_internal.h
+@@ -4864,6 +4864,14 @@ struct libxl__cpu_policy {
+ struct xc_msr *msr;
+ };
+
++struct passwd;
++_hidden int userlookup_helper_getpwnam(libxl__gc*, const char *user,
++ struct passwd *res,
++ struct passwd **out);
++_hidden int userlookup_helper_getpwuid(libxl__gc*, uid_t uid,
++ struct passwd *res,
++ struct passwd **out);
++
+ #endif
+
+ /*
+--
+2.42.0
+
diff --git a/0025-libxl-limit-bootloader-execution-in-restricted-mode.patch b/0025-libxl-limit-bootloader-execution-in-restricted-mode.patch
new file mode 100644
index 0000000..1b5fef6
--- /dev/null
+++ b/0025-libxl-limit-bootloader-execution-in-restricted-mode.patch
@@ -0,0 +1,158 @@
+From a157b71cf530603d794d16eca3dd92ce83d4d55f Mon Sep 17 00:00:00 2001
+From: Roger Pau Monne <roger.pau@citrix.com>
+Date: Thu, 28 Sep 2023 12:22:35 +0200
+Subject: [PATCH 25/27] libxl: limit bootloader execution in restricted mode
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Introduce a timeout for bootloader execution when running in restricted mode.
+
+Allow overwriting the default time out with an environment provided value.
+
+This is part of XSA-443 / CVE-2023-34325
+
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Reviewed-by: Anthony PERARD <anthony.perard@citrix.com>
+(cherry picked from commit 9c114178ffd700112e91f5ec66cf5151b9c9a8cc)
+---
+ docs/man/xl.1.pod.in | 8 ++++++
+ tools/libs/light/libxl_bootloader.c | 40 +++++++++++++++++++++++++++++
+ tools/libs/light/libxl_internal.h | 2 ++
+ 3 files changed, 50 insertions(+)
+
+diff --git a/docs/man/xl.1.pod.in b/docs/man/xl.1.pod.in
+index 96e6fb1c32..8f056450a7 100644
+--- a/docs/man/xl.1.pod.in
++++ b/docs/man/xl.1.pod.in
+@@ -2007,6 +2007,14 @@ NOTE: Each domain MUST have a SEPARATE username.
+
+ See docs/features/qemu-deprivilege.pandoc for more information.
+
++=item LIBXL_BOOTLOADER_TIMEOUT
++
++Timeout in seconds for bootloader execution when running in restricted mode.
++Otherwise the build time default in LIBXL_BOOTLOADER_TIMEOUT will be used.
++
++If defined the value must be an unsigned integer between 0 and INT_MAX,
++otherwise behavior is undefined. Setting to 0 disables the timeout.
++
+ =back
+
+ =head1 SEE ALSO
+diff --git a/tools/libs/light/libxl_bootloader.c b/tools/libs/light/libxl_bootloader.c
+index d3a8a4a9ba..a4beff4265 100644
+--- a/tools/libs/light/libxl_bootloader.c
++++ b/tools/libs/light/libxl_bootloader.c
+@@ -30,6 +30,8 @@ static void bootloader_keystrokes_copyfail(libxl__egc *egc,
+ libxl__datacopier_state *dc, int rc, int onwrite, int errnoval);
+ static void bootloader_display_copyfail(libxl__egc *egc,
+ libxl__datacopier_state *dc, int rc, int onwrite, int errnoval);
++static void bootloader_timeout(libxl__egc *egc, libxl__ev_time *ev,
++ const struct timeval *requested_abs, int rc);
+ static void bootloader_domaindeath(libxl__egc*, libxl__domaindeathcheck *dc,
+ int rc);
+ static void bootloader_finished(libxl__egc *egc, libxl__ev_child *child,
+@@ -297,6 +299,7 @@ void libxl__bootloader_init(libxl__bootloader_state *bl)
+ bl->ptys[0].master = bl->ptys[0].slave = 0;
+ bl->ptys[1].master = bl->ptys[1].slave = 0;
+ libxl__ev_child_init(&bl->child);
++ libxl__ev_time_init(&bl->time);
+ libxl__domaindeathcheck_init(&bl->deathcheck);
+ bl->keystrokes.ao = bl->ao; libxl__datacopier_init(&bl->keystrokes);
+ bl->display.ao = bl->ao; libxl__datacopier_init(&bl->display);
+@@ -314,6 +317,7 @@ static void bootloader_cleanup(libxl__egc *egc, libxl__bootloader_state *bl)
+ libxl__domaindeathcheck_stop(gc,&bl->deathcheck);
+ libxl__datacopier_kill(&bl->keystrokes);
+ libxl__datacopier_kill(&bl->display);
++ libxl__ev_time_deregister(gc, &bl->time);
+ for (i=0; i<2; i++) {
+ libxl__carefd_close(bl->ptys[i].master);
+ libxl__carefd_close(bl->ptys[i].slave);
+@@ -375,6 +379,7 @@ static void bootloader_stop(libxl__egc *egc,
+
+ libxl__datacopier_kill(&bl->keystrokes);
+ libxl__datacopier_kill(&bl->display);
++ libxl__ev_time_deregister(gc, &bl->time);
+ if (libxl__ev_child_inuse(&bl->child)) {
+ r = kill(bl->child.pid, SIGTERM);
+ if (r) LOGED(WARN, bl->domid, "%sfailed to kill bootloader [%lu]",
+@@ -637,6 +642,25 @@ static void bootloader_gotptys(libxl__egc *egc, libxl__openpty_state *op)
+
+ struct termios termattr;
+
++ if (getenv("LIBXL_BOOTLOADER_RESTRICT") ||
++ getenv("LIBXL_BOOTLOADER_USER")) {
++ const char *timeout_env = getenv("LIBXL_BOOTLOADER_TIMEOUT");
++ int timeout = timeout_env ? atoi(timeout_env)
++ : LIBXL_BOOTLOADER_TIMEOUT;
++
++ if (timeout) {
++ /* Set execution timeout */
++ rc = libxl__ev_time_register_rel(ao, &bl->time,
++ bootloader_timeout,
++ timeout * 1000);
++ if (rc) {
++ LOGED(ERROR, bl->domid,
++ "unable to register timeout for bootloader execution");
++ goto out;
++ }
++ }
++ }
++
+ pid_t pid = libxl__ev_child_fork(gc, &bl->child, bootloader_finished);
+ if (pid == -1) {
+ rc = ERROR_FAIL;
+@@ -702,6 +726,21 @@ static void bootloader_display_copyfail(libxl__egc *egc,
+ libxl__bootloader_state *bl = CONTAINER_OF(dc, *bl, display);
+ bootloader_copyfail(egc, "bootloader output", bl, 1, rc,onwrite,errnoval);
+ }
++static void bootloader_timeout(libxl__egc *egc, libxl__ev_time *ev,
++ const struct timeval *requested_abs, int rc)
++{
++ libxl__bootloader_state *bl = CONTAINER_OF(ev, *bl, time);
++ STATE_AO_GC(bl->ao);
++
++ libxl__ev_time_deregister(gc, &bl->time);
++
++ assert(libxl__ev_child_inuse(&bl->child));
++ LOGD(ERROR, bl->domid, "killing bootloader because of timeout");
++
++ libxl__ev_child_kill_deregister(ao, &bl->child, SIGKILL);
++
++ bootloader_callback(egc, bl, rc);
++}
+
+ static void bootloader_domaindeath(libxl__egc *egc,
+ libxl__domaindeathcheck *dc,
+@@ -718,6 +757,7 @@ static void bootloader_finished(libxl__egc *egc, libxl__ev_child *child,
+ STATE_AO_GC(bl->ao);
+ int rc;
+
++ libxl__ev_time_deregister(gc, &bl->time);
+ libxl__datacopier_kill(&bl->keystrokes);
+ libxl__datacopier_kill(&bl->display);
+
+diff --git a/tools/libs/light/libxl_internal.h b/tools/libs/light/libxl_internal.h
+index 8415d1feed..a9581289f4 100644
+--- a/tools/libs/light/libxl_internal.h
++++ b/tools/libs/light/libxl_internal.h
+@@ -103,6 +103,7 @@
+ #define LIBXL_QMP_CMD_TIMEOUT 10
+ #define LIBXL_STUBDOM_START_TIMEOUT 30
+ #define LIBXL_QEMU_BODGE_TIMEOUT 2
++#define LIBXL_BOOTLOADER_TIMEOUT 120
+ #define LIBXL_XENCONSOLE_LIMIT 1048576
+ #define LIBXL_XENCONSOLE_PROTOCOL "vt100"
+ #define LIBXL_MAXMEM_CONSTANT 1024
+@@ -3738,6 +3739,7 @@ struct libxl__bootloader_state {
+ libxl__openpty_state openpty;
+ libxl__openpty_result ptys[2]; /* [0] is for bootloader */
+ libxl__ev_child child;
++ libxl__ev_time time;
+ libxl__domaindeathcheck deathcheck;
+ int nargs, argsspace;
+ const char **args;
+--
+2.42.0
+
diff --git a/0025-x86-cpuid-Infrastructure-for-leaves-7-1-ecx-edx.patch b/0025-x86-cpuid-Infrastructure-for-leaves-7-1-ecx-edx.patch
deleted file mode 100644
index e73f62d..0000000
--- a/0025-x86-cpuid-Infrastructure-for-leaves-7-1-ecx-edx.patch
+++ /dev/null
@@ -1,128 +0,0 @@
-From 2094f834b85d32233c76763b014bc8764c3e36b1 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Fri, 3 Mar 2023 08:06:44 +0100
-Subject: [PATCH 25/61] x86/cpuid: Infrastructure for leaves 7:1{ecx,edx}
-
-We don't actually need ecx yet, but adding it in now will reduce the amount to
-which leaf 7 is out of order in a featureset.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: b4a23bf6293aadecfd03bf9e83974443e2eac9cb
-master date: 2023-02-09 18:26:17 +0000
----
- tools/misc/xen-cpuid.c | 10 ++++++++++
- xen/arch/x86/cpu/common.c | 3 ++-
- xen/include/public/arch-x86/cpufeatureset.h | 4 ++++
- xen/include/xen/lib/x86/cpuid.h | 17 +++++++++++++++--
- 4 files changed, 31 insertions(+), 3 deletions(-)
-
-diff --git a/tools/misc/xen-cpuid.c b/tools/misc/xen-cpuid.c
-index cd094427dd..3cfbbf043f 100644
---- a/tools/misc/xen-cpuid.c
-+++ b/tools/misc/xen-cpuid.c
-@@ -198,6 +198,14 @@ static const char *const str_7b1[32] =
- {
- };
-
-+static const char *const str_7c1[32] =
-+{
-+};
-+
-+static const char *const str_7d1[32] =
-+{
-+};
-+
- static const char *const str_7d2[32] =
- {
- [ 0] = "intel-psfd",
-@@ -223,6 +231,8 @@ static const struct {
- { "0x80000021.eax", "e21a", str_e21a },
- { "0x00000007:1.ebx", "7b1", str_7b1 },
- { "0x00000007:2.edx", "7d2", str_7d2 },
-+ { "0x00000007:1.ecx", "7c1", str_7c1 },
-+ { "0x00000007:1.edx", "7d1", str_7d1 },
- };
-
- #define COL_ALIGN "18"
-diff --git a/xen/arch/x86/cpu/common.c b/xen/arch/x86/cpu/common.c
-index 9ce148a666..8222de6461 100644
---- a/xen/arch/x86/cpu/common.c
-+++ b/xen/arch/x86/cpu/common.c
-@@ -448,7 +448,8 @@ static void generic_identify(struct cpuinfo_x86 *c)
- cpuid_count(7, 1,
- &c->x86_capability[FEATURESET_7a1],
- &c->x86_capability[FEATURESET_7b1],
-- &tmp, &tmp);
-+ &c->x86_capability[FEATURESET_7c1],
-+ &c->x86_capability[FEATURESET_7d1]);
- if (max_subleaf >= 2)
- cpuid_count(7, 2,
- &tmp, &tmp, &tmp,
-diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h
-index e073122140..0b01ca5e8f 100644
---- a/xen/include/public/arch-x86/cpufeatureset.h
-+++ b/xen/include/public/arch-x86/cpufeatureset.h
-@@ -304,6 +304,10 @@ XEN_CPUFEATURE(NSCB, 11*32+ 6) /*A Null Selector Clears Base (and
- /* Intel-defined CPU features, CPUID level 0x00000007:2.edx, word 13 */
- XEN_CPUFEATURE(INTEL_PSFD, 13*32+ 0) /*A MSR_SPEC_CTRL.PSFD */
-
-+/* Intel-defined CPU features, CPUID level 0x00000007:1.ecx, word 14 */
-+
-+/* Intel-defined CPU features, CPUID level 0x00000007:1.edx, word 15 */
-+
- #endif /* XEN_CPUFEATURE */
-
- /* Clean up from a default include. Close the enum (for C). */
-diff --git a/xen/include/xen/lib/x86/cpuid.h b/xen/include/xen/lib/x86/cpuid.h
-index 50be07c0eb..fa98b371ee 100644
---- a/xen/include/xen/lib/x86/cpuid.h
-+++ b/xen/include/xen/lib/x86/cpuid.h
-@@ -17,7 +17,9 @@
- #define FEATURESET_7a1 10 /* 0x00000007:1.eax */
- #define FEATURESET_e21a 11 /* 0x80000021.eax */
- #define FEATURESET_7b1 12 /* 0x00000007:1.ebx */
--#define FEATURESET_7d2 13 /* 0x80000007:2.edx */
-+#define FEATURESET_7d2 13 /* 0x00000007:2.edx */
-+#define FEATURESET_7c1 14 /* 0x00000007:1.ecx */
-+#define FEATURESET_7d1 15 /* 0x00000007:1.edx */
-
- struct cpuid_leaf
- {
-@@ -194,7 +196,14 @@ struct cpuid_policy
- uint32_t _7b1;
- struct { DECL_BITFIELD(7b1); };
- };
-- uint32_t /* c */:32, /* d */:32;
-+ union {
-+ uint32_t _7c1;
-+ struct { DECL_BITFIELD(7c1); };
-+ };
-+ union {
-+ uint32_t _7d1;
-+ struct { DECL_BITFIELD(7d1); };
-+ };
-
- /* Subleaf 2. */
- uint32_t /* a */:32, /* b */:32, /* c */:32;
-@@ -343,6 +352,8 @@ static inline void cpuid_policy_to_featureset(
- fs[FEATURESET_e21a] = p->extd.e21a;
- fs[FEATURESET_7b1] = p->feat._7b1;
- fs[FEATURESET_7d2] = p->feat._7d2;
-+ fs[FEATURESET_7c1] = p->feat._7c1;
-+ fs[FEATURESET_7d1] = p->feat._7d1;
- }
-
- /* Fill in a CPUID policy from a featureset bitmap. */
-@@ -363,6 +374,8 @@ static inline void cpuid_featureset_to_policy(
- p->extd.e21a = fs[FEATURESET_e21a];
- p->feat._7b1 = fs[FEATURESET_7b1];
- p->feat._7d2 = fs[FEATURESET_7d2];
-+ p->feat._7c1 = fs[FEATURESET_7c1];
-+ p->feat._7d1 = fs[FEATURESET_7d1];
- }
-
- static inline uint64_t cpuid_policy_xcr0_max(const struct cpuid_policy *p)
---
-2.40.0
-
diff --git a/0026-x86-shskt-Disable-CET-SS-on-parts-susceptible-to-fra.patch b/0026-x86-shskt-Disable-CET-SS-on-parts-susceptible-to-fra.patch
deleted file mode 100644
index 7fd4031..0000000
--- a/0026-x86-shskt-Disable-CET-SS-on-parts-susceptible-to-fra.patch
+++ /dev/null
@@ -1,191 +0,0 @@
-From 5857cc632b884711c172c5766b8fbba59f990b47 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Fri, 3 Mar 2023 08:12:24 +0100
-Subject: [PATCH 26/61] x86/shskt: Disable CET-SS on parts susceptible to
- fractured updates
-
-Refer to Intel SDM Rev 70 (Dec 2022), Vol3 17.2.3 "Supervisor Shadow Stack
-Token".
-
-Architecturally, an event delivery which starts in CPL<3 and switches shadow
-stack will first validate the Supervisor Shadow Stack Token (setting the busy
-bit), then pushes CS/LIP/SSP. One example of this is an NMI interrupting Xen.
-
-Some CPUs suffer from an issue called fracturing, whereby a fault/vmexit/etc
-between setting the busy bit and completing the event injection renders the
-action non-restartable, because when it comes time to restart, the busy bit is
-found to be already set.
-
-This is far more easily encountered under virt, yet it is not the fault of the
-hypervisor, nor the fault of the guest kernel. The fault lies somewhere
-between the architectural specification, and the uarch behaviour.
-
-Intel have allocated CPUID.7[1].ecx[18] CET_SSS to enumerate that supervisor
-shadow stacks are safe to use. Because of how Xen lays out its shadow stacks,
-fracturing is not expected to be a problem on native.
-
-Detect this case on boot and default to not using shstk if virtualised.
-Specifying `cet=shstk` on the command line will override this heuristic and
-enable shadow stacks irrespective.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: 01e7477d1b081cff4288ff9f51ec59ee94c03ee0
-master date: 2023-02-09 18:26:17 +0000
----
- docs/misc/xen-command-line.pandoc | 7 +++-
- tools/libs/light/libxl_cpuid.c | 2 +
- tools/misc/xen-cpuid.c | 1 +
- xen/arch/x86/cpu/common.c | 8 +++-
- xen/arch/x86/setup.c | 46 +++++++++++++++++----
- xen/include/public/arch-x86/cpufeatureset.h | 1 +
- 6 files changed, 55 insertions(+), 10 deletions(-)
-
-diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc
-index b3f60cd923..a6018fd5c3 100644
---- a/docs/misc/xen-command-line.pandoc
-+++ b/docs/misc/xen-command-line.pandoc
-@@ -287,10 +287,15 @@ can be maintained with the pv-shim mechanism.
- protection.
-
- The option is available when `CONFIG_XEN_SHSTK` is compiled in, and
-- defaults to `true` on hardware supporting CET-SS. Specifying
-+ generally defaults to `true` on hardware supporting CET-SS. Specifying
- `cet=no-shstk` will cause Xen not to use Shadow Stacks even when support
- is available in hardware.
-
-+ Some hardware suffers from an issue known as Supervisor Shadow Stack
-+ Fracturing. On such hardware, Xen will default to not using Shadow Stacks
-+ when virtualised. Specifying `cet=shstk` will override this heuristic and
-+ enable Shadow Stacks unilaterally.
-+
- * The `ibt=` boolean controls whether Xen uses Indirect Branch Tracking for
- its own protection.
-
-diff --git a/tools/libs/light/libxl_cpuid.c b/tools/libs/light/libxl_cpuid.c
-index 691d5c6b2a..b4eacc2bd5 100644
---- a/tools/libs/light/libxl_cpuid.c
-+++ b/tools/libs/light/libxl_cpuid.c
-@@ -234,6 +234,8 @@ int libxl_cpuid_parse_config(libxl_cpuid_policy_list *cpuid, const char* str)
- {"fsrs", 0x00000007, 1, CPUID_REG_EAX, 11, 1},
- {"fsrcs", 0x00000007, 1, CPUID_REG_EAX, 12, 1},
-
-+ {"cet-sss", 0x00000007, 1, CPUID_REG_EDX, 18, 1},
-+
- {"intel-psfd", 0x00000007, 2, CPUID_REG_EDX, 0, 1},
-
- {"lahfsahf", 0x80000001, NA, CPUID_REG_ECX, 0, 1},
-diff --git a/tools/misc/xen-cpuid.c b/tools/misc/xen-cpuid.c
-index 3cfbbf043f..db9c4ed8fc 100644
---- a/tools/misc/xen-cpuid.c
-+++ b/tools/misc/xen-cpuid.c
-@@ -204,6 +204,7 @@ static const char *const str_7c1[32] =
-
- static const char *const str_7d1[32] =
- {
-+ [18] = "cet-sss",
- };
-
- static const char *const str_7d2[32] =
-diff --git a/xen/arch/x86/cpu/common.c b/xen/arch/x86/cpu/common.c
-index 8222de6461..e1fc034ce6 100644
---- a/xen/arch/x86/cpu/common.c
-+++ b/xen/arch/x86/cpu/common.c
-@@ -344,9 +344,15 @@ void __init early_cpu_init(void)
- c->x86_model, c->x86_model, c->x86_mask, eax);
-
- if (c->cpuid_level >= 7) {
-- cpuid_count(7, 0, &eax, &ebx, &ecx, &edx);
-+ uint32_t max_subleaf;
-+
-+ cpuid_count(7, 0, &max_subleaf, &ebx, &ecx, &edx);
- c->x86_capability[cpufeat_word(X86_FEATURE_CET_SS)] = ecx;
- c->x86_capability[cpufeat_word(X86_FEATURE_CET_IBT)] = edx;
-+
-+ if (max_subleaf >= 1)
-+ cpuid_count(7, 1, &eax, &ebx, &ecx,
-+ &c->x86_capability[FEATURESET_7d1]);
- }
-
- eax = cpuid_eax(0x80000000);
-diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
-index 70b37d8afe..f0de805780 100644
---- a/xen/arch/x86/setup.c
-+++ b/xen/arch/x86/setup.c
-@@ -98,11 +98,7 @@ unsigned long __initdata highmem_start;
- size_param("highmem-start", highmem_start);
- #endif
-
--#ifdef CONFIG_XEN_SHSTK
--static bool __initdata opt_xen_shstk = true;
--#else
--#define opt_xen_shstk false
--#endif
-+static int8_t __initdata opt_xen_shstk = -IS_ENABLED(CONFIG_XEN_SHSTK);
-
- #ifdef CONFIG_XEN_IBT
- static bool __initdata opt_xen_ibt = true;
-@@ -1113,11 +1109,45 @@ void __init noreturn __start_xen(unsigned long mbi_p)
- early_cpu_init();
-
- /* Choose shadow stack early, to set infrastructure up appropriately. */
-- if ( opt_xen_shstk && boot_cpu_has(X86_FEATURE_CET_SS) )
-+ if ( !boot_cpu_has(X86_FEATURE_CET_SS) )
-+ opt_xen_shstk = 0;
-+
-+ if ( opt_xen_shstk )
- {
-- printk("Enabling Supervisor Shadow Stacks\n");
-+ /*
-+ * Some CPUs suffer from Shadow Stack Fracturing, an issue whereby a
-+ * fault/VMExit/etc between setting a Supervisor Busy bit and the
-+ * event delivery completing renders the operation non-restartable.
-+ * On restart, event delivery will find the Busy bit already set.
-+ *
-+ * This is a problem on bare metal, but outside of synthetic cases or
-+ * a very badly timed #MC, it's not believed to be a problem. It is a
-+ * much bigger problem under virt, because we can VMExit for a number
-+ * of legitimate reasons and tickle this bug.
-+ *
-+ * CPUs with this addressed enumerate CET-SSS to indicate that
-+ * supervisor shadow stacks are now safe to use.
-+ */
-+ bool cpu_has_bug_shstk_fracture =
-+ boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
-+ !boot_cpu_has(X86_FEATURE_CET_SSS);
-
-- setup_force_cpu_cap(X86_FEATURE_XEN_SHSTK);
-+ /*
-+ * On bare metal, assume that Xen won't be impacted by shstk
-+ * fracturing problems. Under virt, be more conservative and disable
-+ * shstk by default.
-+ */
-+ if ( opt_xen_shstk == -1 )
-+ opt_xen_shstk =
-+ cpu_has_hypervisor ? !cpu_has_bug_shstk_fracture
-+ : true;
-+
-+ if ( opt_xen_shstk )
-+ {
-+ printk("Enabling Supervisor Shadow Stacks\n");
-+
-+ setup_force_cpu_cap(X86_FEATURE_XEN_SHSTK);
-+ }
- }
-
- if ( opt_xen_ibt && boot_cpu_has(X86_FEATURE_CET_IBT) )
-diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h
-index 0b01ca5e8f..4832ad09df 100644
---- a/xen/include/public/arch-x86/cpufeatureset.h
-+++ b/xen/include/public/arch-x86/cpufeatureset.h
-@@ -307,6 +307,7 @@ XEN_CPUFEATURE(INTEL_PSFD, 13*32+ 0) /*A MSR_SPEC_CTRL.PSFD */
- /* Intel-defined CPU features, CPUID level 0x00000007:1.ecx, word 14 */
-
- /* Intel-defined CPU features, CPUID level 0x00000007:1.edx, word 15 */
-+XEN_CPUFEATURE(CET_SSS, 15*32+18) /* CET Supervisor Shadow Stacks safe to use */
-
- #endif /* XEN_CPUFEATURE */
-
---
-2.40.0
-
diff --git a/0026-x86-svm-Fix-asymmetry-with-AMD-DR-MASK-context-switc.patch b/0026-x86-svm-Fix-asymmetry-with-AMD-DR-MASK-context-switc.patch
new file mode 100644
index 0000000..97f95f3
--- /dev/null
+++ b/0026-x86-svm-Fix-asymmetry-with-AMD-DR-MASK-context-switc.patch
@@ -0,0 +1,104 @@
+From 3c22a9bf8703a297431ac5ad110e6d523758eae1 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Tue, 26 Sep 2023 20:06:57 +0100
+Subject: [PATCH 26/27] x86/svm: Fix asymmetry with AMD DR MASK context
+ switching
+
+The handling of MSR_DR{0..3}_MASK is asymmetric between PV and HVM guests.
+
+HVM guests context switch in based on the guest view of DBEXT, whereas PV
+guest switch in base on the host capability. Both guest types leave the
+context dirty for the next vCPU.
+
+This leads to the following issue:
+
+ * PV or HVM vCPU has debugging active (%dr7 + mask)
+ * Switch out deactivates %dr7 but leaves other state stale in hardware
+ * HVM vCPU with debugging activate but can't see DBEXT is switched in
+ * Switch in loads %dr7 but leaves the mask MSRs alone
+
+Now, the HVM vCPU is operating in the context of the prior vCPU's mask MSR,
+and furthermore in a case where it genuinely expects there to be no masking
+MSRs.
+
+As a stopgap, adjust the HVM path to switch in/out the masks based on host
+capabilities rather than guest visibility (i.e. like the PV path). Adjustment
+of the of the intercepts still needs to be dependent on the guest visibility
+of DBEXT.
+
+This is part of XSA-444 / CVE-2023-34327
+
+Fixes: c097f54912d3 ("x86/SVM: support data breakpoint extension registers")
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+(cherry picked from commit 5d54282f984bb9a7a65b3d12208584f9fdf1c8e1)
+---
+ xen/arch/x86/hvm/svm/svm.c | 24 ++++++++++++++++++------
+ xen/arch/x86/traps.c | 5 +++++
+ 2 files changed, 23 insertions(+), 6 deletions(-)
+
+diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
+index a019d196e0..ba4069f910 100644
+--- a/xen/arch/x86/hvm/svm/svm.c
++++ b/xen/arch/x86/hvm/svm/svm.c
+@@ -185,6 +185,10 @@ static void svm_save_dr(struct vcpu *v)
+ v->arch.hvm.flag_dr_dirty = 0;
+ vmcb_set_dr_intercepts(vmcb, ~0u);
+
++ /*
++ * The guest can only have changed the mask MSRs if we previous dropped
++ * intercepts. Re-read them from hardware.
++ */
+ if ( v->domain->arch.cpuid->extd.dbext )
+ {
+ svm_intercept_msr(v, MSR_AMD64_DR0_ADDRESS_MASK, MSR_INTERCEPT_RW);
+@@ -216,17 +220,25 @@ static void __restore_debug_registers(struct vmcb_struct *vmcb, struct vcpu *v)
+
+ ASSERT(v == current);
+
+- if ( v->domain->arch.cpuid->extd.dbext )
++ /*
++ * Both the PV and HVM paths leave stale DR_MASK values in hardware on
++ * context-switch-out. If we're activating %dr7 for the guest, we must
++ * sync the DR_MASKs too, whether or not the guest can see them.
++ */
++ if ( boot_cpu_has(X86_FEATURE_DBEXT) )
+ {
+- svm_intercept_msr(v, MSR_AMD64_DR0_ADDRESS_MASK, MSR_INTERCEPT_NONE);
+- svm_intercept_msr(v, MSR_AMD64_DR1_ADDRESS_MASK, MSR_INTERCEPT_NONE);
+- svm_intercept_msr(v, MSR_AMD64_DR2_ADDRESS_MASK, MSR_INTERCEPT_NONE);
+- svm_intercept_msr(v, MSR_AMD64_DR3_ADDRESS_MASK, MSR_INTERCEPT_NONE);
+-
+ wrmsrl(MSR_AMD64_DR0_ADDRESS_MASK, v->arch.msrs->dr_mask[0]);
+ wrmsrl(MSR_AMD64_DR1_ADDRESS_MASK, v->arch.msrs->dr_mask[1]);
+ wrmsrl(MSR_AMD64_DR2_ADDRESS_MASK, v->arch.msrs->dr_mask[2]);
+ wrmsrl(MSR_AMD64_DR3_ADDRESS_MASK, v->arch.msrs->dr_mask[3]);
++
++ if ( v->domain->arch.cpuid->extd.dbext )
++ {
++ svm_intercept_msr(v, MSR_AMD64_DR0_ADDRESS_MASK, MSR_INTERCEPT_NONE);
++ svm_intercept_msr(v, MSR_AMD64_DR1_ADDRESS_MASK, MSR_INTERCEPT_NONE);
++ svm_intercept_msr(v, MSR_AMD64_DR2_ADDRESS_MASK, MSR_INTERCEPT_NONE);
++ svm_intercept_msr(v, MSR_AMD64_DR3_ADDRESS_MASK, MSR_INTERCEPT_NONE);
++ }
+ }
+
+ write_debugreg(0, v->arch.dr[0]);
+diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
+index f7992ff230..a142a63dd8 100644
+--- a/xen/arch/x86/traps.c
++++ b/xen/arch/x86/traps.c
+@@ -2314,6 +2314,11 @@ void activate_debugregs(const struct vcpu *curr)
+ if ( curr->arch.dr7 & DR7_ACTIVE_MASK )
+ write_debugreg(7, curr->arch.dr7);
+
++ /*
++ * Both the PV and HVM paths leave stale DR_MASK values in hardware on
++ * context-switch-out. If we're activating %dr7 for the guest, we must
++ * sync the DR_MASKs too, whether or not the guest can see them.
++ */
+ if ( boot_cpu_has(X86_FEATURE_DBEXT) )
+ {
+ wrmsrl(MSR_AMD64_DR0_ADDRESS_MASK, curr->arch.msrs->dr_mask[0]);
+--
+2.42.0
+
diff --git a/0027-credit2-respect-credit2_runqueue-all-when-arranging-.patch b/0027-credit2-respect-credit2_runqueue-all-when-arranging-.patch
deleted file mode 100644
index 6c8ab5c..0000000
--- a/0027-credit2-respect-credit2_runqueue-all-when-arranging-.patch
+++ /dev/null
@@ -1,69 +0,0 @@
-From 366693226ce025e8721626609b4b43b9061b55f5 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?=
- <marmarek@invisiblethingslab.com>
-Date: Fri, 3 Mar 2023 08:13:20 +0100
-Subject: [PATCH 27/61] credit2: respect credit2_runqueue=all when arranging
- runqueues
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Documentation for credit2_runqueue=all says it should create one queue
-for all pCPUs on the host. But since introduction
-sched_credit2_max_cpus_runqueue, it actually created separate runqueue
-per socket, even if the CPUs count is below
-sched_credit2_max_cpus_runqueue.
-
-Adjust the condition to skip syblink check in case of
-credit2_runqueue=all.
-
-Fixes: 8e2aa76dc167 ("xen: credit2: limit the max number of CPUs in a runqueue")
-Signed-off-by: Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com>
-Reviewed-by: Juergen Gross <jgross@suse.com>
-master commit: 1f5747ee929fbbcae58d7234c6c38a77495d0cfe
-master date: 2023-02-15 16:12:42 +0100
----
- docs/misc/xen-command-line.pandoc | 5 +++++
- xen/common/sched/credit2.c | 9 +++++++--
- 2 files changed, 12 insertions(+), 2 deletions(-)
-
-diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc
-index a6018fd5c3..7b7a619c1b 100644
---- a/docs/misc/xen-command-line.pandoc
-+++ b/docs/misc/xen-command-line.pandoc
-@@ -724,6 +724,11 @@ Available alternatives, with their meaning, are:
- * `all`: just one runqueue shared by all the logical pCPUs of
- the host
-
-+Regardless of the above choice, Xen attempts to respect
-+`sched_credit2_max_cpus_runqueue` limit, which may mean more than one runqueue
-+for the `all` value. If that isn't intended, raise
-+the `sched_credit2_max_cpus_runqueue` value.
-+
- ### dbgp
- > `= ehci[ <integer> | @pci<bus>:<slot>.<func> ]`
-
-diff --git a/xen/common/sched/credit2.c b/xen/common/sched/credit2.c
-index 6396b38e04..1a240f417a 100644
---- a/xen/common/sched/credit2.c
-+++ b/xen/common/sched/credit2.c
-@@ -996,9 +996,14 @@ cpu_add_to_runqueue(const struct scheduler *ops, unsigned int cpu)
- *
- * Otherwise, let's try to make sure that siblings stay in the
- * same runqueue, pretty much under any cinrcumnstances.
-+ *
-+ * Furthermore, try to respect credit2_runqueue=all, as long as
-+ * max_cpus_runq isn't violated.
- */
-- if ( rqd->refcnt < max_cpus_runq && (ops->cpupool->gran != SCHED_GRAN_cpu ||
-- cpu_runqueue_siblings_match(rqd, cpu, max_cpus_runq)) )
-+ if ( rqd->refcnt < max_cpus_runq &&
-+ (ops->cpupool->gran != SCHED_GRAN_cpu ||
-+ cpu_runqueue_siblings_match(rqd, cpu, max_cpus_runq) ||
-+ opt_runqueue == OPT_RUNQUEUE_ALL) )
- {
- /*
- * This runqueue is ok, but as we said, we also want an even
---
-2.40.0
-
diff --git a/0027-x86-pv-Correct-the-auditing-of-guest-breakpoint-addr.patch b/0027-x86-pv-Correct-the-auditing-of-guest-breakpoint-addr.patch
new file mode 100644
index 0000000..353a5e3
--- /dev/null
+++ b/0027-x86-pv-Correct-the-auditing-of-guest-breakpoint-addr.patch
@@ -0,0 +1,86 @@
+From 29efce0f8f10e381417a61f2f9988b40d4f6bcf0 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Tue, 26 Sep 2023 20:06:57 +0100
+Subject: [PATCH 27/27] x86/pv: Correct the auditing of guest breakpoint
+ addresses
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+The use of access_ok() is buggy, because it permits access to the compat
+translation area. 64bit PV guests don't use the XLAT area, but on AMD
+hardware, the DBEXT feature allows a breakpoint to match up to a 4G aligned
+region, allowing the breakpoint to reach outside of the XLAT area.
+
+Prior to c/s cda16c1bb223 ("x86: mirror compat argument translation area for
+32-bit PV"), the live GDT was within 4G of the XLAT area.
+
+All together, this allowed a malicious 64bit PV guest on AMD hardware to place
+a breakpoint over the live GDT, and trigger a #DB livelock (CVE-2015-8104).
+
+Introduce breakpoint_addr_ok() and explain why __addr_ok() happens to be an
+appropriate check in this case.
+
+For Xen 4.14 and later, this is a latent bug because the XLAT area has moved
+to be on its own with nothing interesting adjacent. For Xen 4.13 and older on
+AMD hardware, this fixes a PV-trigger-able DoS.
+
+This is part of XSA-444 / CVE-2023-34328.
+
+Fixes: 65e355490817 ("x86/PV: support data breakpoint extension registers")
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+(cherry picked from commit dc9d9aa62ddeb14abd5672690d30789829f58f7e)
+---
+ xen/arch/x86/pv/misc-hypercalls.c | 2 +-
+ xen/include/asm-x86/debugreg.h | 20 ++++++++++++++++++++
+ 2 files changed, 21 insertions(+), 1 deletion(-)
+
+diff --git a/xen/arch/x86/pv/misc-hypercalls.c b/xen/arch/x86/pv/misc-hypercalls.c
+index 5dade24726..681c16108f 100644
+--- a/xen/arch/x86/pv/misc-hypercalls.c
++++ b/xen/arch/x86/pv/misc-hypercalls.c
+@@ -68,7 +68,7 @@ long set_debugreg(struct vcpu *v, unsigned int reg, unsigned long value)
+ switch ( reg )
+ {
+ case 0 ... 3:
+- if ( !access_ok(value, sizeof(long)) )
++ if ( !breakpoint_addr_ok(value) )
+ return -EPERM;
+
+ v->arch.dr[reg] = value;
+diff --git a/xen/include/asm-x86/debugreg.h b/xen/include/asm-x86/debugreg.h
+index c57914efc6..cc29826524 100644
+--- a/xen/include/asm-x86/debugreg.h
++++ b/xen/include/asm-x86/debugreg.h
+@@ -77,6 +77,26 @@
+ asm volatile ( "mov %%db" #reg ",%0" : "=r" (__val) ); \
+ __val; \
+ })
++
++/*
++ * Architecturally, %dr{0..3} can have any arbitrary value. However, Xen
++ * can't allow the guest to breakpoint the Xen address range, so we limit the
++ * guest to the lower canonical half, or above the Xen range in the higher
++ * canonical half.
++ *
++ * Breakpoint lengths are specified to mask the low order address bits,
++ * meaning all breakpoints are naturally aligned. With %dr7, the widest
++ * breakpoint is 8 bytes. With DBEXT, the widest breakpoint is 4G. Both of
++ * the Xen boundaries have >4G alignment.
++ *
++ * In principle we should account for HYPERVISOR_COMPAT_VIRT_START(d), but
++ * 64bit Xen has never enforced this for compat guests, and there's no problem
++ * (to Xen) if the guest breakpoints it's alias of the M2P. Skipping this
++ * aspect simplifies the logic, and causes us not to reject a migrating guest
++ * which operated fine on prior versions of Xen.
++ */
++#define breakpoint_addr_ok(a) __addr_ok(a)
++
+ long set_debugreg(struct vcpu *, unsigned int reg, unsigned long value);
+ void activate_debugregs(const struct vcpu *);
+
+--
+2.42.0
+
diff --git a/0028-x86-ucode-AMD-apply-the-patch-early-on-every-logical.patch b/0028-x86-ucode-AMD-apply-the-patch-early-on-every-logical.patch
deleted file mode 100644
index 55df5d0..0000000
--- a/0028-x86-ucode-AMD-apply-the-patch-early-on-every-logical.patch
+++ /dev/null
@@ -1,152 +0,0 @@
-From d1c6934b41f8288ea3169e63bce8a7eea9d9c549 Mon Sep 17 00:00:00 2001
-From: Sergey Dyasli <sergey.dyasli@citrix.com>
-Date: Fri, 3 Mar 2023 08:14:01 +0100
-Subject: [PATCH 28/61] x86/ucode/AMD: apply the patch early on every logical
- thread
-
-The original issue has been reported on AMD Bulldozer-based CPUs where
-ucode loading loses the LWP feature bit in order to gain the IBPB bit.
-LWP disabling is per-SMT/CMT core modification and needs to happen on
-each sibling thread despite the shared microcode engine. Otherwise,
-logical CPUs will end up with different cpuid capabilities.
-Link: https://bugzilla.kernel.org/show_bug.cgi?id=216211
-
-Guests running under Xen happen to be not affected because of levelling
-logic for the feature masking/override MSRs which causes the LWP bit to
-fall out and hides the issue. The latest recommendation from AMD, after
-discussing this bug, is to load ucode on every logical CPU.
-
-In Linux kernel this issue has been addressed by e7ad18d1169c
-("x86/microcode/AMD: Apply the patch early on every logical thread").
-Follow the same approach in Xen.
-
-Introduce SAME_UCODE match result and use it for early AMD ucode
-loading. Take this opportunity and move opt_ucode_allow_same out of
-compare_revisions() to the relevant callers and also modify the warning
-message based on it. Intel's side of things is modified for consistency
-but provides no functional change.
-
-Signed-off-by: Sergey Dyasli <sergey.dyasli@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: f4ef8a41b80831db2136bdaff9f946a1a4b051e7
-master date: 2023-02-21 15:08:05 +0100
----
- xen/arch/x86/cpu/microcode/amd.c | 11 ++++++++---
- xen/arch/x86/cpu/microcode/core.c | 24 ++++++++++++++++--------
- xen/arch/x86/cpu/microcode/intel.c | 10 +++++++---
- xen/arch/x86/cpu/microcode/private.h | 3 ++-
- 4 files changed, 33 insertions(+), 15 deletions(-)
-
-diff --git a/xen/arch/x86/cpu/microcode/amd.c b/xen/arch/x86/cpu/microcode/amd.c
-index fe92e594f1..52182c1a23 100644
---- a/xen/arch/x86/cpu/microcode/amd.c
-+++ b/xen/arch/x86/cpu/microcode/amd.c
-@@ -176,8 +176,8 @@ static enum microcode_match_result compare_revisions(
- if ( new_rev > old_rev )
- return NEW_UCODE;
-
-- if ( opt_ucode_allow_same && new_rev == old_rev )
-- return NEW_UCODE;
-+ if ( new_rev == old_rev )
-+ return SAME_UCODE;
-
- return OLD_UCODE;
- }
-@@ -220,8 +220,13 @@ static int apply_microcode(const struct microcode_patch *patch)
- unsigned int cpu = smp_processor_id();
- struct cpu_signature *sig = &per_cpu(cpu_sig, cpu);
- uint32_t rev, old_rev = sig->rev;
-+ enum microcode_match_result result = microcode_fits(patch);
-
-- if ( microcode_fits(patch) != NEW_UCODE )
-+ /*
-+ * Allow application of the same revision to pick up SMT-specific changes
-+ * even if the revision of the other SMT thread is already up-to-date.
-+ */
-+ if ( result != NEW_UCODE && result != SAME_UCODE )
- return -EINVAL;
-
- if ( check_final_patch_levels(sig) )
-diff --git a/xen/arch/x86/cpu/microcode/core.c b/xen/arch/x86/cpu/microcode/core.c
-index ac3ceb567c..ceec1f1edc 100644
---- a/xen/arch/x86/cpu/microcode/core.c
-+++ b/xen/arch/x86/cpu/microcode/core.c
-@@ -608,16 +608,24 @@ static long microcode_update_helper(void *data)
- * that ucode revision.
- */
- spin_lock(&microcode_mutex);
-- if ( microcode_cache &&
-- microcode_ops->compare_patch(patch, microcode_cache) != NEW_UCODE )
-+ if ( microcode_cache )
- {
-- spin_unlock(&microcode_mutex);
-- printk(XENLOG_WARNING "microcode: couldn't find any newer revision "
-- "in the provided blob!\n");
-- microcode_free_patch(patch);
-- ret = -ENOENT;
-+ enum microcode_match_result result;
-
-- goto put;
-+ result = microcode_ops->compare_patch(patch, microcode_cache);
-+
-+ if ( result != NEW_UCODE &&
-+ !(opt_ucode_allow_same && result == SAME_UCODE) )
-+ {
-+ spin_unlock(&microcode_mutex);
-+ printk(XENLOG_WARNING
-+ "microcode: couldn't find any newer%s revision in the provided blob!\n",
-+ opt_ucode_allow_same ? " (or the same)" : "");
-+ microcode_free_patch(patch);
-+ ret = -ENOENT;
-+
-+ goto put;
-+ }
- }
- spin_unlock(&microcode_mutex);
-
-diff --git a/xen/arch/x86/cpu/microcode/intel.c b/xen/arch/x86/cpu/microcode/intel.c
-index f6d01490e0..c26fbb8cc7 100644
---- a/xen/arch/x86/cpu/microcode/intel.c
-+++ b/xen/arch/x86/cpu/microcode/intel.c
-@@ -232,8 +232,8 @@ static enum microcode_match_result compare_revisions(
- if ( new_rev > old_rev )
- return NEW_UCODE;
-
-- if ( opt_ucode_allow_same && new_rev == old_rev )
-- return NEW_UCODE;
-+ if ( new_rev == old_rev )
-+ return SAME_UCODE;
-
- /*
- * Treat pre-production as always applicable - anyone using pre-production
-@@ -290,8 +290,12 @@ static int apply_microcode(const struct microcode_patch *patch)
- unsigned int cpu = smp_processor_id();
- struct cpu_signature *sig = &this_cpu(cpu_sig);
- uint32_t rev, old_rev = sig->rev;
-+ enum microcode_match_result result;
-+
-+ result = microcode_update_match(patch);
-
-- if ( microcode_update_match(patch) != NEW_UCODE )
-+ if ( result != NEW_UCODE &&
-+ !(opt_ucode_allow_same && result == SAME_UCODE) )
- return -EINVAL;
-
- wbinvd();
-diff --git a/xen/arch/x86/cpu/microcode/private.h b/xen/arch/x86/cpu/microcode/private.h
-index c085a10268..feafab0677 100644
---- a/xen/arch/x86/cpu/microcode/private.h
-+++ b/xen/arch/x86/cpu/microcode/private.h
-@@ -6,7 +6,8 @@
- extern bool opt_ucode_allow_same;
-
- enum microcode_match_result {
-- OLD_UCODE, /* signature matched, but revision id is older or equal */
-+ OLD_UCODE, /* signature matched, but revision id is older */
-+ SAME_UCODE, /* signature matched, but revision id is the same */
- NEW_UCODE, /* signature matched, but revision id is newer */
- MIS_UCODE, /* signature mismatched */
- };
---
-2.40.0
-
diff --git a/0029-x86-perform-mem_sharing-teardown-before-paging-teard.patch b/0029-x86-perform-mem_sharing-teardown-before-paging-teard.patch
deleted file mode 100644
index c96f44e..0000000
--- a/0029-x86-perform-mem_sharing-teardown-before-paging-teard.patch
+++ /dev/null
@@ -1,111 +0,0 @@
-From 700320a79297fb5087f7dd540424c468b2d2cffe Mon Sep 17 00:00:00 2001
-From: Tamas K Lengyel <tamas@tklengyel.com>
-Date: Fri, 3 Mar 2023 08:14:25 +0100
-Subject: [PATCH 29/61] x86: perform mem_sharing teardown before paging
- teardown
-
-An assert failure has been observed in p2m_teardown when performing vm
-forking and then destroying the forked VM (p2m-basic.c:173). The assert
-checks whether the domain's shared pages counter is 0. According to the
-patch that originally added the assert (7bedbbb5c31) the p2m_teardown
-should only happen after mem_sharing already relinquished all shared pages.
-
-In this patch we flip the order in which relinquish ops are called to avoid
-tripping the assert. Conceptually sharing being torn down makes sense to
-happen before paging is torn down.
-
-Fixes: e7aa55c0aab3 ("x86/p2m: free the paging memory pool preemptively")
-Signed-off-by: Tamas K Lengyel <tamas@tklengyel.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: 2869349f0cb3a89dcbf1f1b30371f58df6309312
-master date: 2023-02-23 12:35:48 +0100
----
- xen/arch/x86/domain.c | 56 ++++++++++++++++++++++---------------------
- 1 file changed, 29 insertions(+), 27 deletions(-)
-
-diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
-index 3080cde62b..6eeb248908 100644
---- a/xen/arch/x86/domain.c
-+++ b/xen/arch/x86/domain.c
-@@ -2343,9 +2343,9 @@ int domain_relinquish_resources(struct domain *d)
-
- enum {
- PROG_iommu_pagetables = 1,
-+ PROG_shared,
- PROG_paging,
- PROG_vcpu_pagetables,
-- PROG_shared,
- PROG_xen,
- PROG_l4,
- PROG_l3,
-@@ -2364,6 +2364,34 @@ int domain_relinquish_resources(struct domain *d)
- if ( ret )
- return ret;
-
-+#ifdef CONFIG_MEM_SHARING
-+ PROGRESS(shared):
-+
-+ if ( is_hvm_domain(d) )
-+ {
-+ /*
-+ * If the domain has shared pages, relinquish them allowing
-+ * for preemption.
-+ */
-+ ret = relinquish_shared_pages(d);
-+ if ( ret )
-+ return ret;
-+
-+ /*
-+ * If the domain is forked, decrement the parent's pause count
-+ * and release the domain.
-+ */
-+ if ( mem_sharing_is_fork(d) )
-+ {
-+ struct domain *parent = d->parent;
-+
-+ d->parent = NULL;
-+ domain_unpause(parent);
-+ put_domain(parent);
-+ }
-+ }
-+#endif
-+
- PROGRESS(paging):
-
- /* Tear down paging-assistance stuff. */
-@@ -2404,32 +2432,6 @@ int domain_relinquish_resources(struct domain *d)
- d->arch.auto_unmask = 0;
- }
-
--#ifdef CONFIG_MEM_SHARING
-- PROGRESS(shared):
--
-- if ( is_hvm_domain(d) )
-- {
-- /* If the domain has shared pages, relinquish them allowing
-- * for preemption. */
-- ret = relinquish_shared_pages(d);
-- if ( ret )
-- return ret;
--
-- /*
-- * If the domain is forked, decrement the parent's pause count
-- * and release the domain.
-- */
-- if ( mem_sharing_is_fork(d) )
-- {
-- struct domain *parent = d->parent;
--
-- d->parent = NULL;
-- domain_unpause(parent);
-- put_domain(parent);
-- }
-- }
--#endif
--
- spin_lock(&d->page_alloc_lock);
- page_list_splice(&d->arch.relmem_list, &d->page_list);
- INIT_PAGE_LIST_HEAD(&d->arch.relmem_list);
---
-2.40.0
-
diff --git a/0030-xen-Work-around-Clang-IAS-macro-expansion-bug.patch b/0030-xen-Work-around-Clang-IAS-macro-expansion-bug.patch
deleted file mode 100644
index a92f2f0..0000000
--- a/0030-xen-Work-around-Clang-IAS-macro-expansion-bug.patch
+++ /dev/null
@@ -1,115 +0,0 @@
-From 2b8f72a6b40dafc3fb40bce100cd62c4a377535a Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Fri, 3 Mar 2023 08:14:57 +0100
-Subject: [PATCH 30/61] xen: Work around Clang-IAS macro \@ expansion bug
-
-https://github.com/llvm/llvm-project/issues/60792
-
-It turns out that Clang-IAS does not expand \@ uniquely in a translaition
-unit, and the XSA-426 change tickles this bug:
-
- <instantiation>:4:1: error: invalid symbol redefinition
- .L1_fill_rsb_loop:
- ^
- make[3]: *** [Rules.mk:247: arch/x86/acpi/cpu_idle.o] Error 1
-
-Extend DO_OVERWRITE_RSB with an optional parameter so C callers can mix %= in
-too, which Clang does seem to expand properly.
-
-Fixes: 63305e5392ec ("x86/spec-ctrl: Mitigate Cross-Thread Return Address Predictions")
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: a2adacff0b91cc7b977abb209dc419a2ef15963f
-master date: 2023-02-24 17:44:29 +0000
----
- xen/include/asm-x86/spec_ctrl.h | 4 ++--
- xen/include/asm-x86/spec_ctrl_asm.h | 23 ++++++++++++++---------
- 2 files changed, 16 insertions(+), 11 deletions(-)
-
-diff --git a/xen/include/asm-x86/spec_ctrl.h b/xen/include/asm-x86/spec_ctrl.h
-index 391973ef6a..a431fea587 100644
---- a/xen/include/asm-x86/spec_ctrl.h
-+++ b/xen/include/asm-x86/spec_ctrl.h
-@@ -83,7 +83,7 @@ static always_inline void spec_ctrl_new_guest_context(void)
- wrmsrl(MSR_PRED_CMD, PRED_CMD_IBPB);
-
- /* (ab)use alternative_input() to specify clobbers. */
-- alternative_input("", "DO_OVERWRITE_RSB", X86_BUG_IBPB_NO_RET,
-+ alternative_input("", "DO_OVERWRITE_RSB xu=%=", X86_BUG_IBPB_NO_RET,
- : "rax", "rcx");
- }
-
-@@ -172,7 +172,7 @@ static always_inline void spec_ctrl_enter_idle(struct cpu_info *info)
- *
- * (ab)use alternative_input() to specify clobbers.
- */
-- alternative_input("", "DO_OVERWRITE_RSB", X86_FEATURE_SC_RSB_IDLE,
-+ alternative_input("", "DO_OVERWRITE_RSB xu=%=", X86_FEATURE_SC_RSB_IDLE,
- : "rax", "rcx");
- }
-
-diff --git a/xen/include/asm-x86/spec_ctrl_asm.h b/xen/include/asm-x86/spec_ctrl_asm.h
-index 9eb4ad9ab7..b61a5571ae 100644
---- a/xen/include/asm-x86/spec_ctrl_asm.h
-+++ b/xen/include/asm-x86/spec_ctrl_asm.h
-@@ -117,11 +117,16 @@
- .L\@_done:
- .endm
-
--.macro DO_OVERWRITE_RSB tmp=rax
-+.macro DO_OVERWRITE_RSB tmp=rax xu
- /*
- * Requires nothing
- * Clobbers \tmp (%rax by default), %rcx
- *
-+ * xu is an optional parameter to add eXtra Uniqueness. It is intended for
-+ * passing %= in from an asm() block, in order to work around
-+ * https://github.com/llvm/llvm-project/issues/60792 where Clang-IAS doesn't
-+ * expand \@ uniquely.
-+ *
- * Requires 256 bytes of {,shadow}stack space, but %rsp/SSP has no net
- * change. Based on Google's performance numbers, the loop is unrolled to 16
- * iterations and two calls per iteration.
-@@ -137,31 +142,31 @@
- mov $16, %ecx /* 16 iterations, two calls per loop */
- mov %rsp, %\tmp /* Store the current %rsp */
-
--.L\@_fill_rsb_loop:
-+.L\@_fill_rsb_loop\xu:
-
- .irp n, 1, 2 /* Unrolled twice. */
-- call .L\@_insert_rsb_entry_\n /* Create an RSB entry. */
-+ call .L\@_insert_rsb_entry\xu\n /* Create an RSB entry. */
-
--.L\@_capture_speculation_\n:
-+.L\@_capture_speculation\xu\n:
- pause
- lfence
-- jmp .L\@_capture_speculation_\n /* Capture rogue speculation. */
-+ jmp .L\@_capture_speculation\xu\n /* Capture rogue speculation. */
-
--.L\@_insert_rsb_entry_\n:
-+.L\@_insert_rsb_entry\xu\n:
- .endr
-
- sub $1, %ecx
-- jnz .L\@_fill_rsb_loop
-+ jnz .L\@_fill_rsb_loop\xu
- mov %\tmp, %rsp /* Restore old %rsp */
-
- #ifdef CONFIG_XEN_SHSTK
- mov $1, %ecx
- rdsspd %ecx
- cmp $1, %ecx
-- je .L\@_shstk_done
-+ je .L\@_shstk_done\xu
- mov $64, %ecx /* 64 * 4 bytes, given incsspd */
- incsspd %ecx /* Restore old SSP */
--.L\@_shstk_done:
-+.L\@_shstk_done\xu:
- #endif
- .endm
-
---
-2.40.0
-
diff --git a/0031-xen-Fix-Clang-Wunicode-diagnostic-when-building-asm-.patch b/0031-xen-Fix-Clang-Wunicode-diagnostic-when-building-asm-.patch
deleted file mode 100644
index bad0316..0000000
--- a/0031-xen-Fix-Clang-Wunicode-diagnostic-when-building-asm-.patch
+++ /dev/null
@@ -1,83 +0,0 @@
-From f073db0a07c5f6800a70c91819c4b8c2ba359451 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Fri, 3 Mar 2023 08:15:50 +0100
-Subject: [PATCH 31/61] xen: Fix Clang -Wunicode diagnostic when building
- asm-macros
-
-While trying to work around a different Clang-IAS bug (parent changeset), I
-stumbled onto:
-
- In file included from arch/x86/asm-macros.c:3:
- ./arch/x86/include/asm/spec_ctrl_asm.h:144:19: error: \u used with
- no following hex digits; treating as '\' followed by identifier [-Werror,-Wunicode]
- .L\@_fill_rsb_loop\uniq:
- ^
-
-It turns out that Clang -E is sensitive to the file extension of the source
-file it is processing. Furthermore, C explicitly permits the use of \u
-escapes in identifier names, so the diagnostic would be reasonable in
-principle if we trying to compile the result.
-
-asm-macros should really have been .S from the outset, as it is ultimately
-generating assembly, not C. Rename it, which causes Clang not to complain.
-
-We need to introduce rules for generating a .i file from .S, and substituting
-c_flags for a_flags lets us drop the now-redundant -D__ASSEMBLY__.
-
-No functional change.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: 53f0d02040b1df08f0589f162790ca376e1c2040
-master date: 2023-02-24 17:44:29 +0000
----
- xen/Rules.mk | 6 ++++++
- xen/arch/x86/Makefile | 2 +-
- xen/arch/x86/{asm-macros.c => asm-macros.S} | 0
- 3 files changed, 7 insertions(+), 1 deletion(-)
- rename xen/arch/x86/{asm-macros.c => asm-macros.S} (100%)
-
-diff --git a/xen/Rules.mk b/xen/Rules.mk
-index 5e0699e58b..1f171f88e2 100644
---- a/xen/Rules.mk
-+++ b/xen/Rules.mk
-@@ -223,6 +223,9 @@ $(filter %.init.o,$(obj-y) $(obj-bin-y) $(extra-y)): %.init.o: %.o FORCE
- quiet_cmd_cpp_i_c = CPP $@
- cmd_cpp_i_c = $(CPP) $(call cpp_flags,$(c_flags)) -MQ $@ -o $@ $<
-
-+quiet_cmd_cpp_i_S = CPP $@
-+cmd_cpp_i_S = $(CPP) $(call cpp_flags,$(a_flags)) -MQ $@ -o $@ $<
-+
- quiet_cmd_cc_s_c = CC $@
- cmd_cc_s_c = $(CC) $(filter-out -Wa$(comma)%,$(c_flags)) -S $< -o $@
-
-@@ -232,6 +235,9 @@ cmd_cpp_s_S = $(CPP) $(call cpp_flags,$(a_flags)) -MQ $@ -o $@ $<
- %.i: %.c FORCE
- $(call if_changed,cpp_i_c)
-
-+%.i: %.S FORCE
-+ $(call if_changed,cpp_i_S)
-+
- %.s: %.c FORCE
- $(call if_changed,cc_s_c)
-
-diff --git a/xen/arch/x86/Makefile b/xen/arch/x86/Makefile
-index 69b6cfaded..8e975f472d 100644
---- a/xen/arch/x86/Makefile
-+++ b/xen/arch/x86/Makefile
-@@ -273,7 +273,7 @@ efi/buildid.o efi/relocs-dummy.o: ;
- .PHONY: include
- include: $(BASEDIR)/include/asm-x86/asm-macros.h
-
--asm-macros.i: CFLAGS-y += -D__ASSEMBLY__ -P
-+asm-macros.i: CFLAGS-y += -P
-
- $(BASEDIR)/include/asm-x86/asm-macros.h: asm-macros.i Makefile
- echo '#if 0' >$@.new
-diff --git a/xen/arch/x86/asm-macros.c b/xen/arch/x86/asm-macros.S
-similarity index 100%
-rename from xen/arch/x86/asm-macros.c
-rename to xen/arch/x86/asm-macros.S
---
-2.40.0
-
diff --git a/0032-tools-Use-PKG_CONFIG_FILE-instead-of-PKG_CONFIG-vari.patch b/0032-tools-Use-PKG_CONFIG_FILE-instead-of-PKG_CONFIG-vari.patch
deleted file mode 100644
index bfcdd26..0000000
--- a/0032-tools-Use-PKG_CONFIG_FILE-instead-of-PKG_CONFIG-vari.patch
+++ /dev/null
@@ -1,98 +0,0 @@
-From a2adc7fcc22405e81dc11290416e6140bb0244ca Mon Sep 17 00:00:00 2001
-From: Bertrand Marquis <bertrand.marquis@arm.com>
-Date: Fri, 3 Mar 2023 08:16:45 +0100
-Subject: [PATCH 32/61] tools: Use PKG_CONFIG_FILE instead of PKG_CONFIG
- variable
-
-Replace PKG_CONFIG variable name with PKG_CONFIG_FILE for the name of
-the pkg-config file.
-This is preventing a conflict in some build systems where PKG_CONFIG
-actually contains the path to the pkg-config executable to use, as the
-default assignment in libs.mk is using a weak assignment (?=).
-
-This problem has been found when trying to build the latest version of
-Xen tools using buildroot.
-
-Fixes: d400dc5729e4 ("tools: tweak tools/libs/libs.mk for being able to support libxenctrl")
-Signed-off-by: Bertrand Marquis <bertrand.marquis@arm.com>
-Reviewed-by: Anthony PERARD <anthony.perard@citrix.com>
-master commit: b97e2fe7b9e1f4706693552697239ac2b71efee4
-master date: 2023-02-24 17:44:29 +0000
----
- tools/libs/ctrl/Makefile | 2 +-
- tools/libs/libs.mk | 13 +++++++------
- 2 files changed, 8 insertions(+), 7 deletions(-)
-
-diff --git a/tools/libs/ctrl/Makefile b/tools/libs/ctrl/Makefile
-index 6ff5918798..d3666ae7ff 100644
---- a/tools/libs/ctrl/Makefile
-+++ b/tools/libs/ctrl/Makefile
-@@ -47,7 +47,7 @@ CFLAGS += -include $(XEN_ROOT)/tools/config.h
- CFLAGS-$(CONFIG_Linux) += -D_GNU_SOURCE
-
- LIBHEADER := xenctrl.h xenctrl_compat.h
--PKG_CONFIG := xencontrol.pc
-+PKG_CONFIG_FILE := xencontrol.pc
- PKG_CONFIG_NAME := Xencontrol
-
- NO_HEADERS_CHK := y
-diff --git a/tools/libs/libs.mk b/tools/libs/libs.mk
-index f1554462fb..0e005218e2 100644
---- a/tools/libs/libs.mk
-+++ b/tools/libs/libs.mk
-@@ -1,7 +1,7 @@
- # Common Makefile for building a lib.
- #
- # Variables taken as input:
--# PKG_CONFIG: name of pkg-config file (xen$(LIBNAME).pc if empty)
-+# PKG_CONFIG_FILE: name of pkg-config file (xen$(LIBNAME).pc if empty)
- # MAJOR: major version of lib (Xen version if empty)
- # MINOR: minor version of lib (0 if empty)
-
-@@ -29,7 +29,8 @@ endif
- comma:= ,
- empty:=
- space:= $(empty) $(empty)
--PKG_CONFIG ?= $(LIB_FILE_NAME).pc
-+
-+PKG_CONFIG_FILE ?= $(LIB_FILE_NAME).pc
- PKG_CONFIG_NAME ?= Xen$(LIBNAME)
- PKG_CONFIG_DESC ?= The $(PKG_CONFIG_NAME) library for Xen hypervisor
- PKG_CONFIG_VERSION := $(MAJOR).$(MINOR)
-@@ -38,13 +39,13 @@ PKG_CONFIG_LIB := $(LIB_FILE_NAME)
- PKG_CONFIG_REQPRIV := $(subst $(space),$(comma),$(strip $(foreach lib,$(patsubst ctrl,control,$(USELIBS_$(LIBNAME))),xen$(lib))))
-
- ifneq ($(CONFIG_LIBXC_MINIOS),y)
--PKG_CONFIG_INST := $(PKG_CONFIG)
-+PKG_CONFIG_INST := $(PKG_CONFIG_FILE)
- $(PKG_CONFIG_INST): PKG_CONFIG_PREFIX = $(prefix)
- $(PKG_CONFIG_INST): PKG_CONFIG_INCDIR = $(includedir)
- $(PKG_CONFIG_INST): PKG_CONFIG_LIBDIR = $(libdir)
- endif
-
--PKG_CONFIG_LOCAL := $(PKG_CONFIG_DIR)/$(PKG_CONFIG)
-+PKG_CONFIG_LOCAL := $(PKG_CONFIG_DIR)/$(PKG_CONFIG_FILE)
-
- LIBHEADER ?= $(LIB_FILE_NAME).h
- LIBHEADERS = $(foreach h, $(LIBHEADER), $(XEN_INCLUDE)/$(h))
-@@ -114,7 +115,7 @@ install: build
- $(SYMLINK_SHLIB) lib$(LIB_FILE_NAME).so.$(MAJOR).$(MINOR) $(DESTDIR)$(libdir)/lib$(LIB_FILE_NAME).so.$(MAJOR)
- $(SYMLINK_SHLIB) lib$(LIB_FILE_NAME).so.$(MAJOR) $(DESTDIR)$(libdir)/lib$(LIB_FILE_NAME).so
- for i in $(LIBHEADERS); do $(INSTALL_DATA) $$i $(DESTDIR)$(includedir); done
-- $(INSTALL_DATA) $(PKG_CONFIG) $(DESTDIR)$(PKG_INSTALLDIR)
-+ $(INSTALL_DATA) $(PKG_CONFIG_FILE) $(DESTDIR)$(PKG_INSTALLDIR)
-
- .PHONY: uninstall
- uninstall:
-@@ -134,7 +135,7 @@ clean:
- rm -rf *.rpm $(LIB) *~ $(DEPS_RM) $(LIB_OBJS) $(PIC_OBJS)
- rm -f lib$(LIB_FILE_NAME).so.$(MAJOR).$(MINOR) lib$(LIB_FILE_NAME).so.$(MAJOR)
- rm -f headers.chk headers.lst
-- rm -f $(PKG_CONFIG)
-+ rm -f $(PKG_CONFIG_FILE)
- rm -f _paths.h
-
- .PHONY: distclean
---
-2.40.0
-
diff --git a/0033-libs-guest-Fix-resource-leaks-in-xc_core_arch_map_p2.patch b/0033-libs-guest-Fix-resource-leaks-in-xc_core_arch_map_p2.patch
deleted file mode 100644
index 5caa850..0000000
--- a/0033-libs-guest-Fix-resource-leaks-in-xc_core_arch_map_p2.patch
+++ /dev/null
@@ -1,65 +0,0 @@
-From b181a3a5532574d2163408284bcd785ec87fe046 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Fri, 3 Mar 2023 08:17:04 +0100
-Subject: [PATCH 33/61] libs/guest: Fix resource leaks in
- xc_core_arch_map_p2m_tree_rw()
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Edwin, with the help of GCC's -fanalyzer, identified that p2m_frame_list_list
-gets leaked. What fanalyzer can't see is that the live_p2m_frame_list_list
-and live_p2m_frame_list foreign mappings are leaked too.
-
-Rework the logic so the out path is executed unconditionally, which cleans up
-all the intermediate allocations/mappings appropriately.
-
-Fixes: bd7a29c3d0b9 ("tools/libs/ctrl: fix xc_core_arch_map_p2m() to support linear p2m table")
-Reported-by: Edwin Török <edwin.torok@cloud.com>
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Juergen Gross <jgross@suse.com>
-master commit: 1868d7f22660c8980bd0a7e53f044467e8b63bb5
-master date: 2023-02-27 15:51:23 +0000
----
- tools/libs/guest/xg_core_x86.c | 8 +++-----
- 1 file changed, 3 insertions(+), 5 deletions(-)
-
-diff --git a/tools/libs/guest/xg_core_x86.c b/tools/libs/guest/xg_core_x86.c
-index 61106b98b8..c5e4542ccc 100644
---- a/tools/libs/guest/xg_core_x86.c
-+++ b/tools/libs/guest/xg_core_x86.c
-@@ -229,11 +229,11 @@ xc_core_arch_map_p2m_tree_rw(xc_interface *xch, struct domain_info_context *dinf
- uint32_t dom, shared_info_any_t *live_shinfo)
- {
- /* Double and single indirect references to the live P2M table */
-- xen_pfn_t *live_p2m_frame_list_list;
-+ xen_pfn_t *live_p2m_frame_list_list = NULL;
- xen_pfn_t *live_p2m_frame_list = NULL;
- /* Copies of the above. */
- xen_pfn_t *p2m_frame_list_list = NULL;
-- xen_pfn_t *p2m_frame_list;
-+ xen_pfn_t *p2m_frame_list = NULL;
-
- int err;
- int i;
-@@ -297,8 +297,6 @@ xc_core_arch_map_p2m_tree_rw(xc_interface *xch, struct domain_info_context *dinf
-
- dinfo->p2m_frames = P2M_FL_ENTRIES;
-
-- return p2m_frame_list;
--
- out:
- err = errno;
-
-@@ -312,7 +310,7 @@ xc_core_arch_map_p2m_tree_rw(xc_interface *xch, struct domain_info_context *dinf
-
- errno = err;
-
-- return NULL;
-+ return p2m_frame_list;
- }
-
- static int
---
-2.40.0
-
diff --git a/0034-libs-guest-Fix-leak-on-realloc-failure-in-backup_pte.patch b/0034-libs-guest-Fix-leak-on-realloc-failure-in-backup_pte.patch
deleted file mode 100644
index 4be16a3..0000000
--- a/0034-libs-guest-Fix-leak-on-realloc-failure-in-backup_pte.patch
+++ /dev/null
@@ -1,56 +0,0 @@
-From 25d103f2eb59f021cce61f07a0bf0bfa696b4416 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= <edwin.torok@cloud.com>
-Date: Fri, 3 Mar 2023 08:17:23 +0100
-Subject: [PATCH 34/61] libs/guest: Fix leak on realloc failure in
- backup_ptes()
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-From `man 2 realloc`:
-
- If realloc() fails, the original block is left untouched; it is not freed or moved.
-
-Found using GCC -fanalyzer:
-
- | 184 | backup->entries = realloc(backup->entries,
- | | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- | | | | |
- | | | | (91) when ‘realloc’ fails
- | | | (92) ‘old_ptes.entries’ leaks here; was allocated at (44)
- | | (90) ...to here
-
-Signed-off-by: Edwin Török <edwin.torok@cloud.com>
-Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
-master commit: 275d13184cfa52ebe4336ed66526ce93716adbe0
-master date: 2023-02-27 15:51:23 +0000
----
- tools/libs/guest/xg_offline_page.c | 10 ++++++++--
- 1 file changed, 8 insertions(+), 2 deletions(-)
-
-diff --git a/tools/libs/guest/xg_offline_page.c b/tools/libs/guest/xg_offline_page.c
-index cfe0e2d537..c42b973363 100644
---- a/tools/libs/guest/xg_offline_page.c
-+++ b/tools/libs/guest/xg_offline_page.c
-@@ -181,10 +181,16 @@ static int backup_ptes(xen_pfn_t table_mfn, int offset,
-
- if (backup->max == backup->cur)
- {
-- backup->entries = realloc(backup->entries,
-- backup->max * 2 * sizeof(struct pte_backup_entry));
-+ void *orig = backup->entries;
-+
-+ backup->entries = realloc(
-+ orig, backup->max * 2 * sizeof(struct pte_backup_entry));
-+
- if (backup->entries == NULL)
-+ {
-+ free(orig);
- return -1;
-+ }
- else
- backup->max *= 2;
- }
---
-2.40.0
-
diff --git a/0035-x86-ucode-AMD-late-load-the-patch-on-every-logical-t.patch b/0035-x86-ucode-AMD-late-load-the-patch-on-every-logical-t.patch
deleted file mode 100644
index 931d93f..0000000
--- a/0035-x86-ucode-AMD-late-load-the-patch-on-every-logical-t.patch
+++ /dev/null
@@ -1,90 +0,0 @@
-From 84dfe7a56f04a7412fa4869b3e756c49e1cfbe75 Mon Sep 17 00:00:00 2001
-From: Sergey Dyasli <sergey.dyasli@citrix.com>
-Date: Fri, 3 Mar 2023 08:17:40 +0100
-Subject: [PATCH 35/61] x86/ucode/AMD: late load the patch on every logical
- thread
-
-Currently late ucode loading is performed only on the first core of CPU
-siblings. But according to the latest recommendation from AMD, late
-ucode loading should happen on every logical thread/core on AMD CPUs.
-
-To achieve that, introduce is_cpu_primary() helper which will consider
-every logical cpu as "primary" when running on AMD CPUs. Also include
-Hygon in the check for future-proofing.
-
-Signed-off-by: Sergey Dyasli <sergey.dyasli@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: f1315e48a03a42f78f9b03c0a384165baf02acae
-master date: 2023-02-28 14:51:28 +0100
----
- xen/arch/x86/cpu/microcode/core.c | 24 +++++++++++++++++++-----
- 1 file changed, 19 insertions(+), 5 deletions(-)
-
-diff --git a/xen/arch/x86/cpu/microcode/core.c b/xen/arch/x86/cpu/microcode/core.c
-index ceec1f1edc..ee7df9a591 100644
---- a/xen/arch/x86/cpu/microcode/core.c
-+++ b/xen/arch/x86/cpu/microcode/core.c
-@@ -273,6 +273,20 @@ static bool microcode_update_cache(struct microcode_patch *patch)
- return true;
- }
-
-+/* Returns true if ucode should be loaded on a given cpu */
-+static bool is_cpu_primary(unsigned int cpu)
-+{
-+ if ( boot_cpu_data.x86_vendor & (X86_VENDOR_AMD | X86_VENDOR_HYGON) )
-+ /* Load ucode on every logical thread/core */
-+ return true;
-+
-+ /* Intel CPUs should load ucode only on the first core of SMT siblings */
-+ if ( cpu == cpumask_first(per_cpu(cpu_sibling_mask, cpu)) )
-+ return true;
-+
-+ return false;
-+}
-+
- /* Wait for a condition to be met with a timeout (us). */
- static int wait_for_condition(bool (*func)(unsigned int data),
- unsigned int data, unsigned int timeout)
-@@ -378,7 +392,7 @@ static int primary_thread_work(const struct microcode_patch *patch)
-
- static int microcode_nmi_callback(const struct cpu_user_regs *regs, int cpu)
- {
-- unsigned int primary = cpumask_first(this_cpu(cpu_sibling_mask));
-+ bool primary_cpu = is_cpu_primary(cpu);
- int ret;
-
- /* System-generated NMI, leave to main handler */
-@@ -391,10 +405,10 @@ static int microcode_nmi_callback(const struct cpu_user_regs *regs, int cpu)
- * ucode_in_nmi.
- */
- if ( cpu == cpumask_first(&cpu_online_map) ||
-- (!ucode_in_nmi && cpu == primary) )
-+ (!ucode_in_nmi && primary_cpu) )
- return 0;
-
-- if ( cpu == primary )
-+ if ( primary_cpu )
- ret = primary_thread_work(nmi_patch);
- else
- ret = secondary_nmi_work();
-@@ -545,7 +559,7 @@ static int do_microcode_update(void *patch)
- */
- if ( cpu == cpumask_first(&cpu_online_map) )
- ret = control_thread_fn(patch);
-- else if ( cpu == cpumask_first(this_cpu(cpu_sibling_mask)) )
-+ else if ( is_cpu_primary(cpu) )
- ret = primary_thread_fn(patch);
- else
- ret = secondary_thread_fn();
-@@ -637,7 +651,7 @@ static long microcode_update_helper(void *data)
- /* Calculate the number of online CPU core */
- nr_cores = 0;
- for_each_online_cpu(cpu)
-- if ( cpu == cpumask_first(per_cpu(cpu_sibling_mask, cpu)) )
-+ if ( is_cpu_primary(cpu) )
- nr_cores++;
-
- printk(XENLOG_INFO "%u cores are to update their microcode\n", nr_cores);
---
-2.40.0
-
diff --git a/0036-x86-shadow-account-for-log-dirty-mode-when-pre-alloc.patch b/0036-x86-shadow-account-for-log-dirty-mode-when-pre-alloc.patch
deleted file mode 100644
index 38629a4..0000000
--- a/0036-x86-shadow-account-for-log-dirty-mode-when-pre-alloc.patch
+++ /dev/null
@@ -1,92 +0,0 @@
-From b0d6684ee58f7252940f5a62e4b85bdc56307eef Mon Sep 17 00:00:00 2001
-From: Jan Beulich <jbeulich@suse.com>
-Date: Tue, 21 Mar 2023 11:59:44 +0000
-Subject: [PATCH 36/61] x86/shadow: account for log-dirty mode when
- pre-allocating
-
-Pre-allocation is intended to ensure that in the course of constructing
-or updating shadows there won't be any risk of just made shadows or
-shadows being acted upon can disappear under our feet. The amount of
-pages pre-allocated then, however, needs to account for all possible
-subsequent allocations. While the use in sh_page_fault() accounts for
-all shadows which may need making, so far it didn't account for
-allocations coming from log-dirty tracking (which piggybacks onto the
-P2M allocation functions).
-
-Since shadow_prealloc() takes a count of shadows (or other data
-structures) rather than a count of pages, putting the adjustment at the
-call site of this function won't work very well: We simply can't express
-the correct count that way in all cases. Instead take care of this in
-the function itself, by "snooping" for L1 type requests. (While not
-applicable right now, future new request sites of L1 tables would then
-also be covered right away.)
-
-It is relevant to note here that pre-allocations like the one done from
-shadow_alloc_p2m_page() are benign when they fall in the "scope" of an
-earlier pre-alloc which already included that count: The inner call will
-simply find enough pages available then; it'll bail right away.
-
-This is CVE-2022-42332 / XSA-427.
-
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Tim Deegan <tim@xen.org>
-(cherry picked from commit 91767a71061035ae42be93de495cd976f863a41a)
----
- xen/arch/x86/mm/paging.c | 1 +
- xen/arch/x86/mm/shadow/common.c | 12 +++++++++++-
- xen/include/asm-x86/paging.h | 4 ++++
- 3 files changed, 16 insertions(+), 1 deletion(-)
-
-diff --git a/xen/arch/x86/mm/paging.c b/xen/arch/x86/mm/paging.c
-index 97ac9ccf59..9fb66e65cd 100644
---- a/xen/arch/x86/mm/paging.c
-+++ b/xen/arch/x86/mm/paging.c
-@@ -280,6 +280,7 @@ void paging_mark_pfn_dirty(struct domain *d, pfn_t pfn)
- if ( unlikely(!VALID_M2P(pfn_x(pfn))) )
- return;
-
-+ BUILD_BUG_ON(paging_logdirty_levels() != 4);
- i1 = L1_LOGDIRTY_IDX(pfn);
- i2 = L2_LOGDIRTY_IDX(pfn);
- i3 = L3_LOGDIRTY_IDX(pfn);
-diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c
-index 1de0139742..c14a269935 100644
---- a/xen/arch/x86/mm/shadow/common.c
-+++ b/xen/arch/x86/mm/shadow/common.c
-@@ -1015,7 +1015,17 @@ bool shadow_prealloc(struct domain *d, unsigned int type, unsigned int count)
- if ( unlikely(d->is_dying) )
- return false;
-
-- ret = _shadow_prealloc(d, shadow_size(type) * count);
-+ count *= shadow_size(type);
-+ /*
-+ * Log-dirty handling may result in allocations when populating its
-+ * tracking structures. Tie this to the caller requesting space for L1
-+ * shadows.
-+ */
-+ if ( paging_mode_log_dirty(d) &&
-+ ((SHF_L1_ANY | SHF_FL1_ANY) & (1u << type)) )
-+ count += paging_logdirty_levels();
-+
-+ ret = _shadow_prealloc(d, count);
- if ( !ret && (!d->is_shutting_down || d->shutdown_code != SHUTDOWN_crash) )
- /*
- * Failing to allocate memory required for shadow usage can only result in
-diff --git a/xen/include/asm-x86/paging.h b/xen/include/asm-x86/paging.h
-index 27890791d8..c6b429c691 100644
---- a/xen/include/asm-x86/paging.h
-+++ b/xen/include/asm-x86/paging.h
-@@ -192,6 +192,10 @@ int paging_mfn_is_dirty(struct domain *d, mfn_t gmfn);
- #define L4_LOGDIRTY_IDX(pfn) ((pfn_x(pfn) >> (PAGE_SHIFT + 3 + PAGETABLE_ORDER * 2)) & \
- (LOGDIRTY_NODE_ENTRIES-1))
-
-+#define paging_logdirty_levels() \
-+ (DIV_ROUND_UP(PADDR_BITS - PAGE_SHIFT - (PAGE_SHIFT + 3), \
-+ PAGE_SHIFT - ilog2(sizeof(mfn_t))) + 1)
-+
- #ifdef CONFIG_HVM
- /* VRAM dirty tracking support */
- struct sh_dirty_vram {
---
-2.40.0
-
diff --git a/0037-x86-HVM-bound-number-of-pinned-cache-attribute-regio.patch b/0037-x86-HVM-bound-number-of-pinned-cache-attribute-regio.patch
deleted file mode 100644
index 6730b2d..0000000
--- a/0037-x86-HVM-bound-number-of-pinned-cache-attribute-regio.patch
+++ /dev/null
@@ -1,50 +0,0 @@
-From 2fe1517a00e088f6b1f1aff7d4ea1b477b288987 Mon Sep 17 00:00:00 2001
-From: Jan Beulich <jbeulich@suse.com>
-Date: Tue, 21 Mar 2023 12:01:01 +0000
-Subject: [PATCH 37/61] x86/HVM: bound number of pinned cache attribute regions
-
-This is exposed via DMOP, i.e. to potentially not fully privileged
-device models. With that we may not permit registration of an (almost)
-unbounded amount of such regions.
-
-This is CVE-2022-42333 / part of XSA-428.
-
-Fixes: 642123c5123f ("x86/hvm: provide XEN_DMOP_pin_memory_cacheattr")
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
-(cherry picked from commit a5e768640f786b681063f4e08af45d0c4e91debf)
----
- xen/arch/x86/hvm/mtrr.c | 5 +++++
- 1 file changed, 5 insertions(+)
-
-diff --git a/xen/arch/x86/hvm/mtrr.c b/xen/arch/x86/hvm/mtrr.c
-index 4a9f3177ed..98e55bbdbd 100644
---- a/xen/arch/x86/hvm/mtrr.c
-+++ b/xen/arch/x86/hvm/mtrr.c
-@@ -595,6 +595,7 @@ int hvm_set_mem_pinned_cacheattr(struct domain *d, uint64_t gfn_start,
- uint64_t gfn_end, uint32_t type)
- {
- struct hvm_mem_pinned_cacheattr_range *range;
-+ unsigned int nr = 0;
- int rc = 1;
-
- if ( !is_hvm_domain(d) )
-@@ -666,11 +667,15 @@ int hvm_set_mem_pinned_cacheattr(struct domain *d, uint64_t gfn_start,
- rc = -EBUSY;
- break;
- }
-+ ++nr;
- }
- rcu_read_unlock(&pinned_cacheattr_rcu_lock);
- if ( rc <= 0 )
- return rc;
-
-+ if ( nr >= 64 /* The limit is arbitrary. */ )
-+ return -ENOSPC;
-+
- range = xzalloc(struct hvm_mem_pinned_cacheattr_range);
- if ( range == NULL )
- return -ENOMEM;
---
-2.40.0
-
diff --git a/0038-x86-HVM-serialize-pinned-cache-attribute-list-manipu.patch b/0038-x86-HVM-serialize-pinned-cache-attribute-list-manipu.patch
deleted file mode 100644
index ca8528f..0000000
--- a/0038-x86-HVM-serialize-pinned-cache-attribute-list-manipu.patch
+++ /dev/null
@@ -1,126 +0,0 @@
-From 564de020d29fbc4efd20ef8052051e86b2465a1a Mon Sep 17 00:00:00 2001
-From: Jan Beulich <jbeulich@suse.com>
-Date: Tue, 21 Mar 2023 12:01:01 +0000
-Subject: [PATCH 38/61] x86/HVM: serialize pinned cache attribute list
- manipulation
-
-While the RCU variants of list insertion and removal allow lockless list
-traversal (with RCU just read-locked), insertions and removals still
-need serializing amongst themselves. To keep things simple, use the
-domain lock for this purpose.
-
-This is CVE-2022-42334 / part of XSA-428.
-
-Fixes: 642123c5123f ("x86/hvm: provide XEN_DMOP_pin_memory_cacheattr")
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Julien Grall <jgrall@amazon.com>
-(cherry picked from commit 829ec245cf66560e3b50d140ccb3168e7fb7c945)
----
- xen/arch/x86/hvm/mtrr.c | 51 +++++++++++++++++++++++++----------------
- 1 file changed, 31 insertions(+), 20 deletions(-)
-
-diff --git a/xen/arch/x86/hvm/mtrr.c b/xen/arch/x86/hvm/mtrr.c
-index 98e55bbdbd..9b3b33012b 100644
---- a/xen/arch/x86/hvm/mtrr.c
-+++ b/xen/arch/x86/hvm/mtrr.c
-@@ -594,7 +594,7 @@ static void free_pinned_cacheattr_entry(struct rcu_head *rcu)
- int hvm_set_mem_pinned_cacheattr(struct domain *d, uint64_t gfn_start,
- uint64_t gfn_end, uint32_t type)
- {
-- struct hvm_mem_pinned_cacheattr_range *range;
-+ struct hvm_mem_pinned_cacheattr_range *range, *newr;
- unsigned int nr = 0;
- int rc = 1;
-
-@@ -608,14 +608,15 @@ int hvm_set_mem_pinned_cacheattr(struct domain *d, uint64_t gfn_start,
- {
- case XEN_DOMCTL_DELETE_MEM_CACHEATTR:
- /* Remove the requested range. */
-- rcu_read_lock(&pinned_cacheattr_rcu_lock);
-- list_for_each_entry_rcu ( range,
-- &d->arch.hvm.pinned_cacheattr_ranges,
-- list )
-+ domain_lock(d);
-+ list_for_each_entry ( range,
-+ &d->arch.hvm.pinned_cacheattr_ranges,
-+ list )
- if ( range->start == gfn_start && range->end == gfn_end )
- {
-- rcu_read_unlock(&pinned_cacheattr_rcu_lock);
- list_del_rcu(&range->list);
-+ domain_unlock(d);
-+
- type = range->type;
- call_rcu(&range->rcu, free_pinned_cacheattr_entry);
- p2m_memory_type_changed(d);
-@@ -636,7 +637,7 @@ int hvm_set_mem_pinned_cacheattr(struct domain *d, uint64_t gfn_start,
- }
- return 0;
- }
-- rcu_read_unlock(&pinned_cacheattr_rcu_lock);
-+ domain_unlock(d);
- return -ENOENT;
-
- case PAT_TYPE_UC_MINUS:
-@@ -651,7 +652,10 @@ int hvm_set_mem_pinned_cacheattr(struct domain *d, uint64_t gfn_start,
- return -EINVAL;
- }
-
-- rcu_read_lock(&pinned_cacheattr_rcu_lock);
-+ newr = xzalloc(struct hvm_mem_pinned_cacheattr_range);
-+
-+ domain_lock(d);
-+
- list_for_each_entry_rcu ( range,
- &d->arch.hvm.pinned_cacheattr_ranges,
- list )
-@@ -669,27 +673,34 @@ int hvm_set_mem_pinned_cacheattr(struct domain *d, uint64_t gfn_start,
- }
- ++nr;
- }
-- rcu_read_unlock(&pinned_cacheattr_rcu_lock);
-+
- if ( rc <= 0 )
-- return rc;
-+ /* nothing */;
-+ else if ( nr >= 64 /* The limit is arbitrary. */ )
-+ rc = -ENOSPC;
-+ else if ( !newr )
-+ rc = -ENOMEM;
-+ else
-+ {
-+ newr->start = gfn_start;
-+ newr->end = gfn_end;
-+ newr->type = type;
-
-- if ( nr >= 64 /* The limit is arbitrary. */ )
-- return -ENOSPC;
-+ list_add_rcu(&newr->list, &d->arch.hvm.pinned_cacheattr_ranges);
-
-- range = xzalloc(struct hvm_mem_pinned_cacheattr_range);
-- if ( range == NULL )
-- return -ENOMEM;
-+ newr = NULL;
-+ rc = 0;
-+ }
-+
-+ domain_unlock(d);
-
-- range->start = gfn_start;
-- range->end = gfn_end;
-- range->type = type;
-+ xfree(newr);
-
-- list_add_rcu(&range->list, &d->arch.hvm.pinned_cacheattr_ranges);
- p2m_memory_type_changed(d);
- if ( type != PAT_TYPE_WRBACK )
- flush_all(FLUSH_CACHE);
-
-- return 0;
-+ return rc;
- }
-
- static int hvm_save_mtrr_msr(struct vcpu *v, hvm_domain_context_t *h)
---
-2.40.0
-
diff --git a/0039-x86-spec-ctrl-Defer-CR4_PV32_RESTORE-on-the-cstar_en.patch b/0039-x86-spec-ctrl-Defer-CR4_PV32_RESTORE-on-the-cstar_en.patch
deleted file mode 100644
index 74bcf67..0000000
--- a/0039-x86-spec-ctrl-Defer-CR4_PV32_RESTORE-on-the-cstar_en.patch
+++ /dev/null
@@ -1,56 +0,0 @@
-From 3c924fe46b455834b5c04268db6b528b549668d1 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Fri, 10 Feb 2023 21:11:14 +0000
-Subject: [PATCH 39/61] x86/spec-ctrl: Defer CR4_PV32_RESTORE on the
- cstar_enter path
-
-As stated (correctly) by the comment next to SPEC_CTRL_ENTRY_FROM_PV, between
-the two hunks visible in the patch, RET's are not safe prior to this point.
-
-CR4_PV32_RESTORE hides a CALL/RET pair in certain configurations (PV32
-compiled in, SMEP or SMAP active), and the RET can be attacked with one of
-several known speculative issues.
-
-Furthermore, CR4_PV32_RESTORE also hides a reference to the cr4_pv32_mask
-global variable, which is not safe when XPTI is active before restoring Xen's
-full pagetables.
-
-This crash has gone unnoticed because it is only AMD CPUs which permit the
-SYSCALL instruction in compatibility mode, and these are not vulnerable to
-Meltdown so don't activate XPTI by default.
-
-This is XSA-429 / CVE-2022-42331
-
-Fixes: 5e7962901131 ("x86/entry: Organise the use of MSR_SPEC_CTRL at each entry/exit point")
-Fixes: 5784de3e2067 ("x86: Meltdown band-aid against malicious 64-bit PV guests")
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-(cherry picked from commit df5b055b12116d9e63ced59ae5389e69a2a3de48)
----
- xen/arch/x86/x86_64/entry.S | 3 ++-
- 1 file changed, 2 insertions(+), 1 deletion(-)
-
-diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
-index fba8ae498f..db2ea7871e 100644
---- a/xen/arch/x86/x86_64/entry.S
-+++ b/xen/arch/x86/x86_64/entry.S
-@@ -288,7 +288,6 @@ ENTRY(cstar_enter)
- ALTERNATIVE "", "setssbsy", X86_FEATURE_XEN_SHSTK
- #endif
- push %rax /* Guest %rsp */
-- CR4_PV32_RESTORE
- movq 8(%rsp), %rax /* Restore guest %rax. */
- movq $FLAT_USER_SS32, 8(%rsp) /* Assume a 64bit domain. Compat handled lower. */
- pushq %r11
-@@ -312,6 +311,8 @@ ENTRY(cstar_enter)
- .Lcstar_cr3_okay:
- sti
-
-+ CR4_PV32_RESTORE
-+
- movq STACK_CPUINFO_FIELD(current_vcpu)(%rbx), %rbx
-
- #ifdef CONFIG_PV32
---
-2.40.0
-
diff --git a/0040-tools-python-change-s-size-type-for-Python-3.10.patch b/0040-tools-python-change-s-size-type-for-Python-3.10.patch
deleted file mode 100644
index 979fd6f..0000000
--- a/0040-tools-python-change-s-size-type-for-Python-3.10.patch
+++ /dev/null
@@ -1,72 +0,0 @@
-From 0cbffc6099db7fd01041910a98b99ccad50af11b Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?=
- <marmarek@invisiblethingslab.com>
-Date: Tue, 21 Mar 2023 13:49:28 +0100
-Subject: [PATCH 40/61] tools/python: change 's#' size type for Python >= 3.10
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Python < 3.10 by default uses 'int' type for data+size string types
-(s#), unless PY_SSIZE_T_CLEAN is defined - in which case it uses
-Py_ssize_t. The former behavior was removed in Python 3.10 and now it's
-required to define PY_SSIZE_T_CLEAN before including Python.h, and using
-Py_ssize_t for the length argument. The PY_SSIZE_T_CLEAN behavior is
-supported since Python 2.5.
-
-Adjust bindings accordingly.
-
-Signed-off-by: Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com>
-Reviewed-by: Anthony PERARD <anthony.perard@citrix.com>
-master commit: 897257ba49d0a6ddcf084960fd792ccce9c40f94
-master date: 2023-02-06 08:50:13 +0100
----
- tools/python/xen/lowlevel/xc/xc.c | 3 ++-
- tools/python/xen/lowlevel/xs/xs.c | 3 ++-
- 2 files changed, 4 insertions(+), 2 deletions(-)
-
-diff --git a/tools/python/xen/lowlevel/xc/xc.c b/tools/python/xen/lowlevel/xc/xc.c
-index fd00861032..cfb2734a99 100644
---- a/tools/python/xen/lowlevel/xc/xc.c
-+++ b/tools/python/xen/lowlevel/xc/xc.c
-@@ -4,6 +4,7 @@
- * Copyright (c) 2003-2004, K A Fraser (University of Cambridge)
- */
-
-+#define PY_SSIZE_T_CLEAN
- #include <Python.h>
- #define XC_WANT_COMPAT_MAP_FOREIGN_API
- #include <xenctrl.h>
-@@ -1774,7 +1775,7 @@ static PyObject *pyflask_load(PyObject *self, PyObject *args, PyObject *kwds)
- {
- xc_interface *xc_handle;
- char *policy;
-- uint32_t len;
-+ Py_ssize_t len;
- int ret;
-
- static char *kwd_list[] = { "policy", NULL };
-diff --git a/tools/python/xen/lowlevel/xs/xs.c b/tools/python/xen/lowlevel/xs/xs.c
-index 0dad7fa5f2..3ba5a8b893 100644
---- a/tools/python/xen/lowlevel/xs/xs.c
-+++ b/tools/python/xen/lowlevel/xs/xs.c
-@@ -18,6 +18,7 @@
- * Copyright (C) 2005 XenSource Ltd.
- */
-
-+#define PY_SSIZE_T_CLEAN
- #include <Python.h>
-
- #include <stdbool.h>
-@@ -141,7 +142,7 @@ static PyObject *xspy_write(XsHandle *self, PyObject *args)
- char *thstr;
- char *path;
- char *data;
-- int data_n;
-+ Py_ssize_t data_n;
- bool result;
-
- if (!xh)
---
-2.40.0
-
diff --git a/0041-tools-xenmon-Fix-xenmon.py-for-with-python3.x.patch b/0041-tools-xenmon-Fix-xenmon.py-for-with-python3.x.patch
deleted file mode 100644
index ff97af6..0000000
--- a/0041-tools-xenmon-Fix-xenmon.py-for-with-python3.x.patch
+++ /dev/null
@@ -1,54 +0,0 @@
-From 5ce8d2aef85f590e4fb42d18784512203069d0c0 Mon Sep 17 00:00:00 2001
-From: Bernhard Kaindl <bernhard.kaindl@citrix.com>
-Date: Tue, 21 Mar 2023 13:49:47 +0100
-Subject: [PATCH 41/61] tools/xenmon: Fix xenmon.py for with python3.x
-
-Fixes for Py3:
-* class Delayed(): file not defined; also an error for pylint -E. Inherit
- object instead for Py2 compatibility. Fix DomainInfo() too.
-* Inconsistent use of tabs and spaces for indentation (in one block)
-
-Signed-off-by: Bernhard Kaindl <bernhard.kaindl@citrix.com>
-Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
-master commit: 3a59443c1d5ae0677a792c660ccd3796ce036732
-master date: 2023-02-06 10:22:12 +0000
----
- tools/xenmon/xenmon.py | 8 ++++----
- 1 file changed, 4 insertions(+), 4 deletions(-)
-
-diff --git a/tools/xenmon/xenmon.py b/tools/xenmon/xenmon.py
-index 175eacd2cb..977ada6887 100644
---- a/tools/xenmon/xenmon.py
-+++ b/tools/xenmon/xenmon.py
-@@ -117,7 +117,7 @@ def setup_cmdline_parser():
- return parser
-
- # encapsulate information about a domain
--class DomainInfo:
-+class DomainInfo(object):
- def __init__(self):
- self.allocated_sum = 0
- self.gotten_sum = 0
-@@ -533,7 +533,7 @@ def show_livestats(cpu):
- # simple functions to allow initialization of log files without actually
- # physically creating files that are never used; only on the first real
- # write does the file get created
--class Delayed(file):
-+class Delayed(object):
- def __init__(self, filename, mode):
- self.filename = filename
- self.saved_mode = mode
-@@ -677,8 +677,8 @@ def main():
-
- if os.uname()[0] == "SunOS":
- xenbaked_cmd = "/usr/lib/xenbaked"
-- stop_cmd = "/usr/bin/pkill -INT -z global xenbaked"
-- kill_cmd = "/usr/bin/pkill -KILL -z global xenbaked"
-+ stop_cmd = "/usr/bin/pkill -INT -z global xenbaked"
-+ kill_cmd = "/usr/bin/pkill -KILL -z global xenbaked"
- else:
- # assumes that xenbaked is in your path
- xenbaked_cmd = "xenbaked"
---
-2.40.0
-
diff --git a/0042-core-parking-fix-build-with-gcc12-and-NR_CPUS-1.patch b/0042-core-parking-fix-build-with-gcc12-and-NR_CPUS-1.patch
deleted file mode 100644
index c425c43..0000000
--- a/0042-core-parking-fix-build-with-gcc12-and-NR_CPUS-1.patch
+++ /dev/null
@@ -1,95 +0,0 @@
-From 4a6bedefe589dab12182d6b974de8ea3b2fcc681 Mon Sep 17 00:00:00 2001
-From: Jan Beulich <jbeulich@suse.com>
-Date: Tue, 21 Mar 2023 13:50:18 +0100
-Subject: [PATCH 42/61] core-parking: fix build with gcc12 and NR_CPUS=1
-
-Gcc12 takes issue with core_parking_remove()'s
-
- for ( ; i < cur_idle_nums; ++i )
- core_parking_cpunum[i] = core_parking_cpunum[i + 1];
-
-complaining that the right hand side array access is past the bounds of
-1. Clearly the compiler can't know that cur_idle_nums can only ever be
-zero in this case (as the sole CPU cannot be parked).
-
-Arrange for core_parking.c's contents to not be needed altogether, and
-then disable its building when NR_CPUS == 1.
-
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
-master commit: 4b0422f70feb4b1cd04598ffde805fc224f3812e
-master date: 2023-03-13 15:15:42 +0100
----
- xen/arch/x86/Kconfig | 2 +-
- xen/arch/x86/platform_hypercall.c | 11 ++++++++---
- xen/arch/x86/sysctl.c | 3 +++
- xen/common/Kconfig | 1 +
- 4 files changed, 13 insertions(+), 4 deletions(-)
-
-diff --git a/xen/arch/x86/Kconfig b/xen/arch/x86/Kconfig
-index 3c14096c80..8e2b504923 100644
---- a/xen/arch/x86/Kconfig
-+++ b/xen/arch/x86/Kconfig
-@@ -8,7 +8,7 @@ config X86
- select ACPI_LEGACY_TABLES_LOOKUP
- select ALTERNATIVE_CALL
- select ARCH_SUPPORTS_INT128
-- select CORE_PARKING
-+ imply CORE_PARKING
- select HAS_ALTERNATIVE
- select HAS_COMPAT
- select HAS_CPUFREQ
-diff --git a/xen/arch/x86/platform_hypercall.c b/xen/arch/x86/platform_hypercall.c
-index bf4090c942..c35e5669a4 100644
---- a/xen/arch/x86/platform_hypercall.c
-+++ b/xen/arch/x86/platform_hypercall.c
-@@ -725,12 +725,17 @@ ret_t do_platform_op(XEN_GUEST_HANDLE_PARAM(xen_platform_op_t) u_xenpf_op)
- case XEN_CORE_PARKING_SET:
- idle_nums = min_t(uint32_t,
- op->u.core_parking.idle_nums, num_present_cpus() - 1);
-- ret = continue_hypercall_on_cpu(
-- 0, core_parking_helper, (void *)(unsigned long)idle_nums);
-+ if ( CONFIG_NR_CPUS > 1 )
-+ ret = continue_hypercall_on_cpu(
-+ 0, core_parking_helper,
-+ (void *)(unsigned long)idle_nums);
-+ else if ( idle_nums )
-+ ret = -EINVAL;
- break;
-
- case XEN_CORE_PARKING_GET:
-- op->u.core_parking.idle_nums = get_cur_idle_nums();
-+ op->u.core_parking.idle_nums = CONFIG_NR_CPUS > 1
-+ ? get_cur_idle_nums() : 0;
- ret = __copy_field_to_guest(u_xenpf_op, op, u.core_parking) ?
- -EFAULT : 0;
- break;
-diff --git a/xen/arch/x86/sysctl.c b/xen/arch/x86/sysctl.c
-index aff52a13f3..ff843eaee2 100644
---- a/xen/arch/x86/sysctl.c
-+++ b/xen/arch/x86/sysctl.c
-@@ -179,6 +179,9 @@ long arch_do_sysctl(
- ret = -EBUSY;
- break;
- }
-+ if ( CONFIG_NR_CPUS <= 1 )
-+ /* Mimic behavior of smt_up_down_helper(). */
-+ return 0;
- plug = op == XEN_SYSCTL_CPU_HOTPLUG_SMT_ENABLE;
- fn = smt_up_down_helper;
- hcpu = _p(plug);
-diff --git a/xen/common/Kconfig b/xen/common/Kconfig
-index 6443943889..c9f4b7f492 100644
---- a/xen/common/Kconfig
-+++ b/xen/common/Kconfig
-@@ -10,6 +10,7 @@ config COMPAT
-
- config CORE_PARKING
- bool
-+ depends on NR_CPUS > 1
-
- config GRANT_TABLE
- bool "Grant table support" if EXPERT
---
-2.40.0
-
diff --git a/0043-x86-altp2m-help-gcc13-to-avoid-it-emitting-a-warning.patch b/0043-x86-altp2m-help-gcc13-to-avoid-it-emitting-a-warning.patch
deleted file mode 100644
index 0e040ad..0000000
--- a/0043-x86-altp2m-help-gcc13-to-avoid-it-emitting-a-warning.patch
+++ /dev/null
@@ -1,129 +0,0 @@
-From cdde3171a2a932a6836b094c4387412e27414ec9 Mon Sep 17 00:00:00 2001
-From: Jan Beulich <jbeulich@suse.com>
-Date: Tue, 21 Mar 2023 13:51:42 +0100
-Subject: [PATCH 43/61] x86/altp2m: help gcc13 to avoid it emitting a warning
-
-Switches of altp2m-s always expect a valid altp2m to be in place (and
-indeed altp2m_vcpu_initialise() sets the active one to be at index 0).
-The compiler, however, cannot know that, and hence it cannot eliminate
-p2m_get_altp2m()'s case of returnin (literal) NULL. If then the compiler
-decides to special case that code path in the caller, the dereference in
-instances of
-
- atomic_dec(&p2m_get_altp2m(v)->active_vcpus);
-
-can, to the code generator, appear to be NULL dereferences, leading to
-
-In function 'atomic_dec',
- inlined from '...' at ...:
-./arch/x86/include/asm/atomic.h:182:5: error: array subscript 0 is outside array bounds of 'int[0]' [-Werror=array-bounds=]
-
-Aid the compiler by adding a BUG_ON() checking the return value of the
-problematic p2m_get_altp2m(). Since with the use of the local variable
-the 2nd p2m_get_altp2m() each will look questionable at the first glance
-(Why is the local variable not used here?), open-code the only relevant
-piece of p2m_get_altp2m() there.
-
-To avoid repeatedly doing these transformations, and also to limit how
-"bad" the open-coding really is, convert the entire operation to an
-inline helper, used by all three instances (and accepting the redundant
-BUG_ON(idx >= MAX_ALTP2M) in two of the three cases).
-
-Reported-by: Charles Arnold <carnold@suse.com>
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
-master commit: be62b1fc2aa7375d553603fca07299da765a89fe
-master date: 2023-03-13 15:16:21 +0100
----
- xen/arch/x86/hvm/vmx/vmx.c | 8 +-------
- xen/arch/x86/mm/p2m.c | 14 ++------------
- xen/include/asm-x86/p2m.h | 20 ++++++++++++++++++++
- 3 files changed, 23 insertions(+), 19 deletions(-)
-
-diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
-index 094141be9a..c8a839cd5e 100644
---- a/xen/arch/x86/hvm/vmx/vmx.c
-+++ b/xen/arch/x86/hvm/vmx/vmx.c
-@@ -4036,13 +4036,7 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs)
- }
- }
-
-- if ( idx != vcpu_altp2m(v).p2midx )
-- {
-- BUG_ON(idx >= MAX_ALTP2M);
-- atomic_dec(&p2m_get_altp2m(v)->active_vcpus);
-- vcpu_altp2m(v).p2midx = idx;
-- atomic_inc(&p2m_get_altp2m(v)->active_vcpus);
-- }
-+ p2m_set_altp2m(v, idx);
- }
-
- /* XXX: This looks ugly, but we need a mechanism to ensure
-diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
-index 8781df9dda..2d41446a69 100644
---- a/xen/arch/x86/mm/p2m.c
-+++ b/xen/arch/x86/mm/p2m.c
-@@ -2194,13 +2194,8 @@ bool_t p2m_switch_vcpu_altp2m_by_id(struct vcpu *v, unsigned int idx)
-
- if ( d->arch.altp2m_eptp[idx] != mfn_x(INVALID_MFN) )
- {
-- if ( idx != vcpu_altp2m(v).p2midx )
-- {
-- atomic_dec(&p2m_get_altp2m(v)->active_vcpus);
-- vcpu_altp2m(v).p2midx = idx;
-- atomic_inc(&p2m_get_altp2m(v)->active_vcpus);
-+ if ( p2m_set_altp2m(v, idx) )
- altp2m_vcpu_update_p2m(v);
-- }
- rc = 1;
- }
-
-@@ -2471,13 +2466,8 @@ int p2m_switch_domain_altp2m_by_id(struct domain *d, unsigned int idx)
- if ( d->arch.altp2m_visible_eptp[idx] != mfn_x(INVALID_MFN) )
- {
- for_each_vcpu( d, v )
-- if ( idx != vcpu_altp2m(v).p2midx )
-- {
-- atomic_dec(&p2m_get_altp2m(v)->active_vcpus);
-- vcpu_altp2m(v).p2midx = idx;
-- atomic_inc(&p2m_get_altp2m(v)->active_vcpus);
-+ if ( p2m_set_altp2m(v, idx) )
- altp2m_vcpu_update_p2m(v);
-- }
-
- rc = 0;
- }
-diff --git a/xen/include/asm-x86/p2m.h b/xen/include/asm-x86/p2m.h
-index 2db9ab0122..f92bb97394 100644
---- a/xen/include/asm-x86/p2m.h
-+++ b/xen/include/asm-x86/p2m.h
-@@ -841,6 +841,26 @@ static inline struct p2m_domain *p2m_get_altp2m(struct vcpu *v)
- return v->domain->arch.altp2m_p2m[index];
- }
-
-+/* set current alternate p2m table */
-+static inline bool p2m_set_altp2m(struct vcpu *v, unsigned int idx)
-+{
-+ struct p2m_domain *orig;
-+
-+ BUG_ON(idx >= MAX_ALTP2M);
-+
-+ if ( idx == vcpu_altp2m(v).p2midx )
-+ return false;
-+
-+ orig = p2m_get_altp2m(v);
-+ BUG_ON(!orig);
-+ atomic_dec(&orig->active_vcpus);
-+
-+ vcpu_altp2m(v).p2midx = idx;
-+ atomic_inc(&v->domain->arch.altp2m_p2m[idx]->active_vcpus);
-+
-+ return true;
-+}
-+
- /* Switch alternate p2m for a single vcpu */
- bool_t p2m_switch_vcpu_altp2m_by_id(struct vcpu *v, unsigned int idx);
-
---
-2.40.0
-
diff --git a/0044-VT-d-constrain-IGD-check.patch b/0044-VT-d-constrain-IGD-check.patch
deleted file mode 100644
index 13ca74e..0000000
--- a/0044-VT-d-constrain-IGD-check.patch
+++ /dev/null
@@ -1,44 +0,0 @@
-From 4d42cc4d25c35ca381370a1fa0b45350723d1308 Mon Sep 17 00:00:00 2001
-From: Jan Beulich <jbeulich@suse.com>
-Date: Tue, 21 Mar 2023 13:52:20 +0100
-Subject: [PATCH 44/61] VT-d: constrain IGD check
-
-Marking a DRHD as controlling an IGD isn't very sensible without
-checking that at the very least it's a graphics device that lives at
-0000:00:02.0. Re-use the reading of the class-code to control both the
-clearing of "gfx_only" and the setting of "igd_drhd_address".
-
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Kevin Tian <kevin.tian@intel.com>
-master commit: f8c4317295fa1cde1a81779b7e362651c084efb8
-master date: 2023-03-14 10:44:08 +0100
----
- xen/drivers/passthrough/vtd/dmar.c | 9 +++------
- 1 file changed, 3 insertions(+), 6 deletions(-)
-
-diff --git a/xen/drivers/passthrough/vtd/dmar.c b/xen/drivers/passthrough/vtd/dmar.c
-index 33a12b2ae9..9ec49936b8 100644
---- a/xen/drivers/passthrough/vtd/dmar.c
-+++ b/xen/drivers/passthrough/vtd/dmar.c
-@@ -391,15 +391,12 @@ static int __init acpi_parse_dev_scope(
-
- if ( drhd )
- {
-- if ( (seg == 0) && (bus == 0) && (path->dev == 2) &&
-- (path->fn == 0) )
-- igd_drhd_address = drhd->address;
--
-- if ( gfx_only &&
-- pci_conf_read8(PCI_SBDF(seg, bus, path->dev, path->fn),
-+ if ( pci_conf_read8(PCI_SBDF(seg, bus, path->dev, path->fn),
- PCI_CLASS_DEVICE + 1) != 0x03
- /* PCI_BASE_CLASS_DISPLAY */ )
- gfx_only = false;
-+ else if ( !seg && !bus && path->dev == 2 && !path->fn )
-+ igd_drhd_address = drhd->address;
- }
-
- break;
---
-2.40.0
-
diff --git a/0045-bunzip-work-around-gcc13-warning.patch b/0045-bunzip-work-around-gcc13-warning.patch
deleted file mode 100644
index 9b26011..0000000
--- a/0045-bunzip-work-around-gcc13-warning.patch
+++ /dev/null
@@ -1,42 +0,0 @@
-From 49116b2101094c3d6658928f03db88d035ba97be Mon Sep 17 00:00:00 2001
-From: Jan Beulich <jbeulich@suse.com>
-Date: Tue, 21 Mar 2023 13:52:58 +0100
-Subject: [PATCH 45/61] bunzip: work around gcc13 warning
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-While provable that length[0] is always initialized (because symCount
-cannot be zero), upcoming gcc13 fails to recognize this and warns about
-the unconditional use of the value immediately following the loop.
-
-See also https://gcc.gnu.org/bugzilla/show_bug.cgi?id=106511.
-
-Reported-by: Martin Liška <martin.liska@suse.com>
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
-master commit: 402195e56de0aacf97e05c80ed367d464ca6938b
-master date: 2023-03-14 10:45:28 +0100
----
- xen/common/bunzip2.c | 5 +++++
- 1 file changed, 5 insertions(+)
-
-diff --git a/xen/common/bunzip2.c b/xen/common/bunzip2.c
-index 2087cfbbed..5108e570ed 100644
---- a/xen/common/bunzip2.c
-+++ b/xen/common/bunzip2.c
-@@ -233,6 +233,11 @@ static int __init get_next_block(struct bunzip_data *bd)
- becomes negative, so an unsigned inequality catches
- it.) */
- t = get_bits(bd, 5)-1;
-+ /* GCC 13 has apparently improved use-before-set detection, but
-+ it can't figure out that length[0] is always intialized by
-+ virtue of symCount always being positive when making it here.
-+ See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=106511. */
-+ length[0] = 0;
- for (i = 0; i < symCount; i++) {
- for (;;) {
- if (((unsigned)t) > (MAX_HUFCODE_BITS-1))
---
-2.40.0
-
diff --git a/0046-libacpi-fix-PCI-hotplug-AML.patch b/0046-libacpi-fix-PCI-hotplug-AML.patch
deleted file mode 100644
index b1c79f5..0000000
--- a/0046-libacpi-fix-PCI-hotplug-AML.patch
+++ /dev/null
@@ -1,57 +0,0 @@
-From 54102e428ba3f677904278479f8110c8eef6fedc Mon Sep 17 00:00:00 2001
-From: David Woodhouse <dwmw@amazon.co.uk>
-Date: Tue, 21 Mar 2023 13:53:25 +0100
-Subject: [PATCH 46/61] libacpi: fix PCI hotplug AML
-
-The emulated PIIX3 uses a nybble for the status of each PCI function,
-so the status for e.g. slot 0 functions 0 and 1 respectively can be
-read as (\_GPE.PH00 & 0x0F), and (\_GPE.PH00 >> 0x04).
-
-The AML that Xen gives to a guest gets the operand order for the odd-
-numbered functions the wrong way round, returning (0x04 >> \_GPE.PH00)
-instead.
-
-As far as I can tell, this was the wrong way round in Xen from the
-moment that PCI hotplug was first introduced in commit 83d82e6f35a8:
-
-+ ShiftRight (0x4, \_GPE.PH00, Local1)
-+ Return (Local1) /* IN status as the _STA */
-
-Or maybe there's bizarre AML operand ordering going on there, like
-Intel's wrong-way-round assembler, and it only broke later when it was
-changed to being generated?
-
-Either way, it's definitely wrong now, and instrumenting a Linux guest
-shows that it correctly sees _STA being 0x00 in function 0 of an empty
-slot, but then the loop in acpiphp_glue.c::get_slot_status() goes on to
-look at function 1 and sees that _STA evaluates to 0x04. Thus reporting
-an adapter is present in every slot in /sys/bus/pci/slots/*
-
-Quite why Linux wants to look for function 1 being physically present
-when function 0 isn't... I don't want to think about right now.
-
-Fixes: 83d82e6f35a8 ("hvmloader: pass-through: multi-function PCI hot-plug")
-Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: b190af7d3e90f58da5f58044b8dea7261b8b483d
-master date: 2023-03-20 17:12:34 +0100
----
- tools/libacpi/mk_dsdt.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/tools/libacpi/mk_dsdt.c b/tools/libacpi/mk_dsdt.c
-index c5ba4c0b2f..250a50b7eb 100644
---- a/tools/libacpi/mk_dsdt.c
-+++ b/tools/libacpi/mk_dsdt.c
-@@ -431,7 +431,7 @@ int main(int argc, char **argv)
- stmt("Store", "0x89, \\_GPE.DPT2");
- }
- if ( slot & 1 )
-- stmt("ShiftRight", "0x4, \\_GPE.PH%02X, Local1", slot & ~1);
-+ stmt("ShiftRight", "\\_GPE.PH%02X, 0x04, Local1", slot & ~1);
- else
- stmt("And", "\\_GPE.PH%02X, 0x0f, Local1", slot & ~1);
- stmt("Return", "Local1"); /* IN status as the _STA */
---
-2.40.0
-
diff --git a/0047-AMD-IOMMU-without-XT-x2APIC-needs-to-be-forced-into-.patch b/0047-AMD-IOMMU-without-XT-x2APIC-needs-to-be-forced-into-.patch
deleted file mode 100644
index 54940ba..0000000
--- a/0047-AMD-IOMMU-without-XT-x2APIC-needs-to-be-forced-into-.patch
+++ /dev/null
@@ -1,42 +0,0 @@
-From 8e9690a2252eda09537275a951ee0af0b3b330f2 Mon Sep 17 00:00:00 2001
-From: Jan Beulich <jbeulich@suse.com>
-Date: Fri, 31 Mar 2023 08:36:59 +0200
-Subject: [PATCH 47/61] AMD/IOMMU: without XT, x2APIC needs to be forced into
- physical mode
-
-An earlier change with the same title (commit 1ba66a870eba) altered only
-the path where x2apic_phys was already set to false (perhaps from the
-command line). The same of course needs applying when the variable
-wasn't modified yet from its initial value.
-
-Reported-by: Elliott Mitchell <ehem+xen@m5p.com>
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
-master commit: 0d2686f6b66b4b1b3c72c3525083b0ce02830054
-master date: 2023-03-21 09:23:25 +0100
----
- xen/arch/x86/genapic/x2apic.c | 6 +++---
- 1 file changed, 3 insertions(+), 3 deletions(-)
-
-diff --git a/xen/arch/x86/genapic/x2apic.c b/xen/arch/x86/genapic/x2apic.c
-index 628b441da5..247364af58 100644
---- a/xen/arch/x86/genapic/x2apic.c
-+++ b/xen/arch/x86/genapic/x2apic.c
-@@ -239,11 +239,11 @@ const struct genapic *__init apic_x2apic_probe(void)
- if ( x2apic_phys < 0 )
- {
- /*
-- * Force physical mode if there's no interrupt remapping support: The
-- * ID in clustered mode requires a 32 bit destination field due to
-+ * Force physical mode if there's no (full) interrupt remapping support:
-+ * The ID in clustered mode requires a 32 bit destination field due to
- * the usage of the high 16 bits to hold the cluster ID.
- */
-- x2apic_phys = !iommu_intremap ||
-+ x2apic_phys = iommu_intremap != iommu_intremap_full ||
- (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL);
- }
- else if ( !x2apic_phys )
---
-2.40.0
-
diff --git a/0048-VT-d-fix-iommu-no-igfx-if-the-IOMMU-scope-contains-f.patch b/0048-VT-d-fix-iommu-no-igfx-if-the-IOMMU-scope-contains-f.patch
deleted file mode 100644
index 4c480b0..0000000
--- a/0048-VT-d-fix-iommu-no-igfx-if-the-IOMMU-scope-contains-f.patch
+++ /dev/null
@@ -1,44 +0,0 @@
-From 07e8f5b3d1300327a9f2e67b03dead0e2138b92f Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?=
- <marmarek@invisiblethingslab.com>
-Date: Fri, 31 Mar 2023 08:38:07 +0200
-Subject: [PATCH 48/61] VT-d: fix iommu=no-igfx if the IOMMU scope contains
- fake device(s)
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-If the scope for IGD's IOMMU contains additional device that doesn't
-actually exist, iommu=no-igfx would not disable that IOMMU. In this
-particular case (Thinkpad x230) it included 00:02.1, but there is no
-such device on this platform. Consider only existing devices for the
-"gfx only" check as well as the establishing of IGD DRHD address
-(underlying is_igd_drhd(), which is used to determine applicability of
-two workarounds).
-
-Fixes: 2d7f191b392e ("VT-d: generalize and correct "iommu=no-igfx" handling")
-Signed-off-by: Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com>
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Kevin Tian <kevin.tian@intel.com>
-master commit: 49de6749baa8d0addc3048defd4ef3e85cb135e9
-master date: 2023-03-23 09:16:41 +0100
----
- xen/drivers/passthrough/vtd/dmar.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/xen/drivers/passthrough/vtd/dmar.c b/xen/drivers/passthrough/vtd/dmar.c
-index 9ec49936b8..bfec40f47d 100644
---- a/xen/drivers/passthrough/vtd/dmar.c
-+++ b/xen/drivers/passthrough/vtd/dmar.c
-@@ -389,7 +389,7 @@ static int __init acpi_parse_dev_scope(
- printk(VTDPREFIX " endpoint: %pp\n",
- &PCI_SBDF(seg, bus, path->dev, path->fn));
-
-- if ( drhd )
-+ if ( drhd && pci_device_detect(seg, bus, path->dev, path->fn) )
- {
- if ( pci_conf_read8(PCI_SBDF(seg, bus, path->dev, path->fn),
- PCI_CLASS_DEVICE + 1) != 0x03
---
-2.40.0
-
diff --git a/0049-x86-shadow-fix-and-improve-sh_page_has_multiple_shad.patch b/0049-x86-shadow-fix-and-improve-sh_page_has_multiple_shad.patch
deleted file mode 100644
index 0abf7e9..0000000
--- a/0049-x86-shadow-fix-and-improve-sh_page_has_multiple_shad.patch
+++ /dev/null
@@ -1,47 +0,0 @@
-From cab866ee62d860e9ff4abe701163972d4e9f896d Mon Sep 17 00:00:00 2001
-From: Jan Beulich <jbeulich@suse.com>
-Date: Fri, 31 Mar 2023 08:38:42 +0200
-Subject: [PATCH 49/61] x86/shadow: fix and improve
- sh_page_has_multiple_shadows()
-
-While no caller currently invokes the function without first making sure
-there is at least one shadow [1], we'd better eliminate UB here:
-find_first_set_bit() requires input to be non-zero to return a well-
-defined result.
-
-Further, using find_first_set_bit() isn't very efficient in the first
-place for the intended purpose.
-
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
-
-[1] The function has exactly two uses, and both are from OOS code, which
- is HVM-only. For HVM (but not for PV) sh_mfn_is_a_page_table(),
- guarding the call to sh_unsync(), guarantees at least one shadow.
- Hence even if sh_page_has_multiple_shadows() returned a bogus value
- when invoked for a PV domain, the subsequent is_hvm_vcpu() and
- oos_active checks (the former being redundant with the latter) will
- compensate. (Arguably that oos_active check should come first, for
- both clarity and efficiency reasons.)
-master commit: 2896224a4e294652c33f487b603d20bd30955f21
-master date: 2023-03-24 11:07:08 +0100
----
- xen/arch/x86/mm/shadow/private.h | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/xen/arch/x86/mm/shadow/private.h b/xen/arch/x86/mm/shadow/private.h
-index 738214f75e..762214f73c 100644
---- a/xen/arch/x86/mm/shadow/private.h
-+++ b/xen/arch/x86/mm/shadow/private.h
-@@ -324,7 +324,7 @@ static inline int sh_page_has_multiple_shadows(struct page_info *pg)
- return 0;
- shadows = pg->shadow_flags & SHF_page_type_mask;
- /* More than one type bit set in shadow-flags? */
-- return ( (shadows & ~(1UL << find_first_set_bit(shadows))) != 0 );
-+ return shadows && (shadows & (shadows - 1));
- }
-
- #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
---
-2.40.0
-
diff --git a/0050-x86-nospec-Fix-evaluate_nospec-code-generation-under.patch b/0050-x86-nospec-Fix-evaluate_nospec-code-generation-under.patch
deleted file mode 100644
index 14a8e14..0000000
--- a/0050-x86-nospec-Fix-evaluate_nospec-code-generation-under.patch
+++ /dev/null
@@ -1,101 +0,0 @@
-From 90320fd05991d7817cea85e1d45674b757abf03c Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Fri, 31 Mar 2023 08:39:32 +0200
-Subject: [PATCH 50/61] x86/nospec: Fix evaluate_nospec() code generation under
- Clang
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-It turns out that evaluate_nospec() code generation is not safe under Clang.
-Given:
-
- void eval_nospec_test(int x)
- {
- if ( evaluate_nospec(x) )
- asm volatile ("nop #true" ::: "memory");
- else
- asm volatile ("nop #false" ::: "memory");
- }
-
-Clang emits:
-
- <eval_nospec_test>:
- 0f ae e8 lfence
- 85 ff test %edi,%edi
- 74 02 je <eval_nospec_test+0x9>
- 90 nop
- c3 ret
- 90 nop
- c3 ret
-
-which is not safe because the lfence has been hoisted above the conditional
-jump. Clang concludes that both barrier_nospec_true()'s have identical side
-effects and can safely be merged.
-
-Clang can be persuaded that the side effects are different if there are
-different comments in the asm blocks. This is fragile, but no more fragile
-that other aspects of this construct.
-
-Introduce barrier_nospec_false() with a separate internal comment to prevent
-Clang merging it with barrier_nospec_true() despite the otherwise-identical
-content. The generated code now becomes:
-
- <eval_nospec_test>:
- 85 ff test %edi,%edi
- 74 05 je <eval_nospec_test+0x9>
- 0f ae e8 lfence
- 90 nop
- c3 ret
- 0f ae e8 lfence
- 90 nop
- c3 ret
-
-which has the correct number of lfence's, and in the correct place.
-
-Link: https://github.com/llvm/llvm-project/issues/55084
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: bc3c133841435829ba5c0a48427e2a77633502ab
-master date: 2023-03-24 12:16:31 +0000
----
- xen/include/asm-x86/nospec.h | 15 +++++++++++++--
- 1 file changed, 13 insertions(+), 2 deletions(-)
-
-diff --git a/xen/include/asm-x86/nospec.h b/xen/include/asm-x86/nospec.h
-index 5312ae4c6f..7150e76b87 100644
---- a/xen/include/asm-x86/nospec.h
-+++ b/xen/include/asm-x86/nospec.h
-@@ -10,15 +10,26 @@
- static always_inline bool barrier_nospec_true(void)
- {
- #ifdef CONFIG_SPECULATIVE_HARDEN_BRANCH
-- alternative("lfence", "", X86_FEATURE_SC_NO_BRANCH_HARDEN);
-+ alternative("lfence #nospec-true", "", X86_FEATURE_SC_NO_BRANCH_HARDEN);
- #endif
- return true;
- }
-
-+static always_inline bool barrier_nospec_false(void)
-+{
-+#ifdef CONFIG_SPECULATIVE_HARDEN_BRANCH
-+ alternative("lfence #nospec-false", "", X86_FEATURE_SC_NO_BRANCH_HARDEN);
-+#endif
-+ return false;
-+}
-+
- /* Allow to protect evaluation of conditionals with respect to speculation */
- static always_inline bool evaluate_nospec(bool condition)
- {
-- return condition ? barrier_nospec_true() : !barrier_nospec_true();
-+ if ( condition )
-+ return barrier_nospec_true();
-+ else
-+ return barrier_nospec_false();
- }
-
- /* Allow to block speculative execution in generic code */
---
-2.40.0
-
diff --git a/0051-x86-shadow-Fix-build-with-no-PG_log_dirty.patch b/0051-x86-shadow-Fix-build-with-no-PG_log_dirty.patch
deleted file mode 100644
index ef2a137..0000000
--- a/0051-x86-shadow-Fix-build-with-no-PG_log_dirty.patch
+++ /dev/null
@@ -1,56 +0,0 @@
-From 7e1fe95c79d55a1c1a65f71a078b8e31c69ffe94 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Fri, 31 Mar 2023 08:39:49 +0200
-Subject: [PATCH 51/61] x86/shadow: Fix build with no PG_log_dirty
-
-Gitlab Randconfig found:
-
- arch/x86/mm/shadow/common.c: In function 'shadow_prealloc':
- arch/x86/mm/shadow/common.c:1023:18: error: implicit declaration of function
- 'paging_logdirty_levels'; did you mean 'paging_log_dirty_init'? [-Werror=implicit-function-declaration]
- 1023 | count += paging_logdirty_levels();
- | ^~~~~~~~~~~~~~~~~~~~~~
- | paging_log_dirty_init
- arch/x86/mm/shadow/common.c:1023:18: error: nested extern declaration of 'paging_logdirty_levels' [-Werror=nested-externs]
-
-The '#if PG_log_dirty' expression is currently SHADOW_PAGING && !HVM &&
-PV_SHIM_EXCLUSIVE. Move the declaration outside.
-
-Fixes: 33fb3a661223 ("x86/shadow: account for log-dirty mode when pre-allocating")
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: 6d14cb105b1c54ad7b4228d858ae85aa8a672bbd
-master date: 2023-03-24 12:16:31 +0000
----
- xen/include/asm-x86/paging.h | 8 ++++----
- 1 file changed, 4 insertions(+), 4 deletions(-)
-
-diff --git a/xen/include/asm-x86/paging.h b/xen/include/asm-x86/paging.h
-index c6b429c691..43abaa5bd1 100644
---- a/xen/include/asm-x86/paging.h
-+++ b/xen/include/asm-x86/paging.h
-@@ -154,6 +154,10 @@ struct paging_mode {
- /*****************************************************************************
- * Log dirty code */
-
-+#define paging_logdirty_levels() \
-+ (DIV_ROUND_UP(PADDR_BITS - PAGE_SHIFT - (PAGE_SHIFT + 3), \
-+ PAGE_SHIFT - ilog2(sizeof(mfn_t))) + 1)
-+
- #if PG_log_dirty
-
- /* get the dirty bitmap for a specific range of pfns */
-@@ -192,10 +196,6 @@ int paging_mfn_is_dirty(struct domain *d, mfn_t gmfn);
- #define L4_LOGDIRTY_IDX(pfn) ((pfn_x(pfn) >> (PAGE_SHIFT + 3 + PAGETABLE_ORDER * 2)) & \
- (LOGDIRTY_NODE_ENTRIES-1))
-
--#define paging_logdirty_levels() \
-- (DIV_ROUND_UP(PADDR_BITS - PAGE_SHIFT - (PAGE_SHIFT + 3), \
-- PAGE_SHIFT - ilog2(sizeof(mfn_t))) + 1)
--
- #ifdef CONFIG_HVM
- /* VRAM dirty tracking support */
- struct sh_dirty_vram {
---
-2.40.0
-
diff --git a/0052-x86-vmx-Don-t-spuriously-crash-the-domain-when-INIT-.patch b/0052-x86-vmx-Don-t-spuriously-crash-the-domain-when-INIT-.patch
deleted file mode 100644
index c408fbb..0000000
--- a/0052-x86-vmx-Don-t-spuriously-crash-the-domain-when-INIT-.patch
+++ /dev/null
@@ -1,51 +0,0 @@
-From b1022b65de59828d40d9d71cc734a42c1c30c972 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Fri, 31 Mar 2023 08:40:27 +0200
-Subject: [PATCH 52/61] x86/vmx: Don't spuriously crash the domain when INIT is
- received
-
-In VMX operation, the handling of INIT IPIs is changed. Instead of the CPU
-resetting, the next VMEntry fails with EXIT_REASON_INIT. From the TXT spec,
-the intent of this behaviour is so that an entity which cares can scrub
-secrets from RAM before participating in an orderly shutdown.
-
-Right now, Xen's behaviour is that when an INIT arrives, the HVM VM which
-schedules next is killed (citing an unknown VMExit), *and* we ignore the INIT
-and continue blindly onwards anyway.
-
-This patch addresses only the first of these two problems by ignoring the INIT
-and continuing without crashing the VM in question.
-
-The second wants addressing too, just as soon as we've figured out something
-better to do...
-
-Discovered as collateral damage from when an AP triple faults on S3 resume on
-Intel TigerLake platforms.
-
-Link: https://github.com/QubesOS/qubes-issues/issues/7283
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Kevin Tian <kevin.tian@intel.com>
-master commit: b1f11273d5a774cc88a3685c96c2e7cf6385e3b6
-master date: 2023-03-24 22:49:58 +0000
----
- xen/arch/x86/hvm/vmx/vmx.c | 4 ++++
- 1 file changed, 4 insertions(+)
-
-diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
-index c8a839cd5e..cebe46ef6a 100644
---- a/xen/arch/x86/hvm/vmx/vmx.c
-+++ b/xen/arch/x86/hvm/vmx/vmx.c
-@@ -4002,6 +4002,10 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs)
- case EXIT_REASON_MCE_DURING_VMENTRY:
- do_machine_check(regs);
- break;
-+
-+ case EXIT_REASON_INIT:
-+ printk(XENLOG_ERR "Error: INIT received - ignoring\n");
-+ return; /* Renter the guest without further processing */
- }
-
- /* Now enable interrupts so it's safe to take locks. */
---
-2.40.0
-
diff --git a/0053-x86-ucode-Fix-error-paths-control_thread_fn.patch b/0053-x86-ucode-Fix-error-paths-control_thread_fn.patch
deleted file mode 100644
index 7bb2c27..0000000
--- a/0053-x86-ucode-Fix-error-paths-control_thread_fn.patch
+++ /dev/null
@@ -1,56 +0,0 @@
-From 0f81c5a2c8e0432d5af3d9f4e6398376cd514516 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Fri, 31 Mar 2023 08:40:56 +0200
-Subject: [PATCH 53/61] x86/ucode: Fix error paths control_thread_fn()
-
-These two early exits skipped re-enabling the watchdog, restoring the NMI
-callback, and clearing the nmi_patch global pointer. Always execute the tail
-of the function on the way out.
-
-Fixes: 8dd4dfa92d62 ("x86/microcode: Synchronize late microcode loading")
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Sergey Dyasli <sergey.dyasli@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: fc2e1f3aad602a66c14b8285a1bd38a82f8fd02d
-master date: 2023-03-28 11:57:56 +0100
----
- xen/arch/x86/cpu/microcode/core.c | 9 +++------
- 1 file changed, 3 insertions(+), 6 deletions(-)
-
-diff --git a/xen/arch/x86/cpu/microcode/core.c b/xen/arch/x86/cpu/microcode/core.c
-index ee7df9a591..ad150e5963 100644
---- a/xen/arch/x86/cpu/microcode/core.c
-+++ b/xen/arch/x86/cpu/microcode/core.c
-@@ -488,10 +488,7 @@ static int control_thread_fn(const struct microcode_patch *patch)
- ret = wait_for_condition(wait_cpu_callin, num_online_cpus(),
- MICROCODE_CALLIN_TIMEOUT_US);
- if ( ret )
-- {
-- set_state(LOADING_EXIT);
-- return ret;
-- }
-+ goto out;
-
- /* Control thread loads ucode first while others are in NMI handler. */
- ret = microcode_ops->apply_microcode(patch);
-@@ -503,8 +500,7 @@ static int control_thread_fn(const struct microcode_patch *patch)
- {
- printk(XENLOG_ERR
- "Late loading aborted: CPU%u failed to update ucode\n", cpu);
-- set_state(LOADING_EXIT);
-- return ret;
-+ goto out;
- }
-
- /* Let primary threads load the given ucode update */
-@@ -535,6 +531,7 @@ static int control_thread_fn(const struct microcode_patch *patch)
- }
- }
-
-+ out:
- /* Mark loading is done to unblock other threads */
- set_state(LOADING_EXIT);
-
---
-2.40.0
-
diff --git a/0054-vpci-msix-handle-accesses-adjacent-to-the-MSI-X-tabl.patch b/0054-vpci-msix-handle-accesses-adjacent-to-the-MSI-X-tabl.patch
deleted file mode 100644
index 4973ae7..0000000
--- a/0054-vpci-msix-handle-accesses-adjacent-to-the-MSI-X-tabl.patch
+++ /dev/null
@@ -1,543 +0,0 @@
-From d080287c2a8dce11baee1d7bbf9276757e8572e4 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
-Date: Fri, 31 Mar 2023 08:41:27 +0200
-Subject: [PATCH 54/61] vpci/msix: handle accesses adjacent to the MSI-X table
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-The handling of the MSI-X table accesses by Xen requires that any
-pages part of the MSI-X related tables are not mapped into the domain
-physmap. As a result, any device registers in the same pages as the
-start or the end of the MSIX or PBA tables is not currently
-accessible, as the accesses are just dropped.
-
-Note the spec forbids such placing of registers, as the MSIX and PBA
-tables must be 4K isolated from any other registers:
-
-"If a Base Address register that maps address space for the MSI-X
-Table or MSI-X PBA also maps other usable address space that is not
-associated with MSI-X structures, locations (e.g., for CSRs) used in
-the other address space must not share any naturally aligned 4-KB
-address range with one where either MSI-X structure resides."
-
-Yet the 'Intel Wi-Fi 6 AX201' device on one of my boxes has registers
-in the same page as the MSIX tables, and thus won't work on a PVH dom0
-without this fix.
-
-In order to cope with the behavior passthrough any accesses that fall
-on the same page as the MSIX tables (but don't fall in between) to the
-underlying hardware. Such forwarding also takes care of the PBA
-accesses, so it allows to remove the code doing this handling in
-msix_{read,write}. Note that as a result accesses to the PBA array
-are no longer limited to 4 and 8 byte sizes, there's no access size
-restriction for PBA accesses documented in the specification.
-
-Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-
-vpci/msix: restore PBA access length and alignment restrictions
-
-Accesses to the PBA array have the same length and alignment
-limitations as accesses to the MSI-X table:
-
-"For all accesses to MSI-X Table and MSI-X PBA fields, software must
-use aligned full DWORD or aligned full QWORD transactions; otherwise,
-the result is undefined."
-
-Introduce such length and alignment checks into the handling of PBA
-accesses for vPCI. This was a mistake of mine for not reading the
-specification correctly.
-
-Note that accesses must now be aligned, and hence there's no longer a
-need to check that the end of the access falls into the PBA region as
-both the access and the region addresses must be aligned.
-
-Fixes: b177892d2d ('vpci/msix: handle accesses adjacent to the MSI-X table')
-Reported-by: Jan Beulich <jbeulich@suse.com>
-Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-master commit: b177892d2d0e8a31122c218989f43130aeba5282
-master date: 2023-03-28 14:20:35 +0200
-master commit: 7a502b4fbc339e9d3d3d45fb37f09da06bc3081c
-master date: 2023-03-29 14:56:33 +0200
----
- xen/drivers/vpci/msix.c | 357 +++++++++++++++++++++++++++++-----------
- xen/drivers/vpci/vpci.c | 7 +-
- xen/include/xen/vpci.h | 8 +-
- 3 files changed, 275 insertions(+), 97 deletions(-)
-
-diff --git a/xen/drivers/vpci/msix.c b/xen/drivers/vpci/msix.c
-index ea5d73a02a..7e1bfb2f0a 100644
---- a/xen/drivers/vpci/msix.c
-+++ b/xen/drivers/vpci/msix.c
-@@ -27,6 +27,11 @@
- ((addr) >= vmsix_table_addr(vpci, nr) && \
- (addr) < vmsix_table_addr(vpci, nr) + vmsix_table_size(vpci, nr))
-
-+#define VMSIX_ADDR_SAME_PAGE(addr, vpci, nr) \
-+ (PFN_DOWN(addr) >= PFN_DOWN(vmsix_table_addr(vpci, nr)) && \
-+ PFN_DOWN(addr) <= PFN_DOWN(vmsix_table_addr(vpci, nr) + \
-+ vmsix_table_size(vpci, nr) - 1))
-+
- static uint32_t control_read(const struct pci_dev *pdev, unsigned int reg,
- void *data)
- {
-@@ -149,7 +154,7 @@ static struct vpci_msix *msix_find(const struct domain *d, unsigned long addr)
-
- for ( i = 0; i < ARRAY_SIZE(msix->tables); i++ )
- if ( bars[msix->tables[i] & PCI_MSIX_BIRMASK].enabled &&
-- VMSIX_ADDR_IN_RANGE(addr, msix->pdev->vpci, i) )
-+ VMSIX_ADDR_SAME_PAGE(addr, msix->pdev->vpci, i) )
- return msix;
- }
-
-@@ -182,36 +187,172 @@ static struct vpci_msix_entry *get_entry(struct vpci_msix *msix,
- return &msix->entries[(addr - start) / PCI_MSIX_ENTRY_SIZE];
- }
-
--static void __iomem *get_pba(struct vpci *vpci)
-+static void __iomem *get_table(struct vpci *vpci, unsigned int slot)
- {
- struct vpci_msix *msix = vpci->msix;
-+ paddr_t addr = 0;
-+
-+ ASSERT(spin_is_locked(&vpci->lock));
-+
-+ if ( likely(msix->table[slot]) )
-+ return msix->table[slot];
-+
-+ switch ( slot )
-+ {
-+ case VPCI_MSIX_TBL_TAIL:
-+ addr = vmsix_table_size(vpci, VPCI_MSIX_TABLE);
-+ fallthrough;
-+ case VPCI_MSIX_TBL_HEAD:
-+ addr += vmsix_table_addr(vpci, VPCI_MSIX_TABLE);
-+ break;
-+
-+ case VPCI_MSIX_PBA_TAIL:
-+ addr = vmsix_table_size(vpci, VPCI_MSIX_PBA);
-+ fallthrough;
-+ case VPCI_MSIX_PBA_HEAD:
-+ addr += vmsix_table_addr(vpci, VPCI_MSIX_PBA);
-+ break;
-+
-+ default:
-+ ASSERT_UNREACHABLE();
-+ return NULL;
-+ }
-+
-+ msix->table[slot] = ioremap(round_pgdown(addr), PAGE_SIZE);
-+
-+ return msix->table[slot];
-+}
-+
-+unsigned int get_slot(const struct vpci *vpci, unsigned long addr)
-+{
-+ unsigned long pfn = PFN_DOWN(addr);
-+
- /*
-- * PBA will only be unmapped when the device is deassigned, so access it
-- * without holding the vpci lock.
-+ * The logic below relies on having the tables identity mapped to the guest
-+ * address space, or for the `addr` parameter to be translated into its
-+ * host physical memory address equivalent.
- */
-- void __iomem *pba = read_atomic(&msix->pba);
-
-- if ( likely(pba) )
-- return pba;
-+ if ( pfn == PFN_DOWN(vmsix_table_addr(vpci, VPCI_MSIX_TABLE)) )
-+ return VPCI_MSIX_TBL_HEAD;
-+ if ( pfn == PFN_DOWN(vmsix_table_addr(vpci, VPCI_MSIX_TABLE) +
-+ vmsix_table_size(vpci, VPCI_MSIX_TABLE) - 1) )
-+ return VPCI_MSIX_TBL_TAIL;
-+ if ( pfn == PFN_DOWN(vmsix_table_addr(vpci, VPCI_MSIX_PBA)) )
-+ return VPCI_MSIX_PBA_HEAD;
-+ if ( pfn == PFN_DOWN(vmsix_table_addr(vpci, VPCI_MSIX_PBA) +
-+ vmsix_table_size(vpci, VPCI_MSIX_PBA) - 1) )
-+ return VPCI_MSIX_PBA_TAIL;
-+
-+ ASSERT_UNREACHABLE();
-+ return -1;
-+}
-+
-+static bool adjacent_handle(const struct vpci_msix *msix, unsigned long addr)
-+{
-+ unsigned int i;
-+
-+ if ( VMSIX_ADDR_IN_RANGE(addr, msix->pdev->vpci, VPCI_MSIX_PBA) )
-+ return true;
-+
-+ if ( VMSIX_ADDR_IN_RANGE(addr, msix->pdev->vpci, VPCI_MSIX_TABLE) )
-+ return false;
-+
-+ for ( i = 0; i < ARRAY_SIZE(msix->tables); i++ )
-+ if ( VMSIX_ADDR_SAME_PAGE(addr, msix->pdev->vpci, i) )
-+ return true;
-+
-+ return false;
-+}
-+
-+static int adjacent_read(const struct domain *d, const struct vpci_msix *msix,
-+ unsigned long addr, unsigned int len,
-+ unsigned long *data)
-+{
-+ const void __iomem *mem;
-+ struct vpci *vpci = msix->pdev->vpci;
-+ unsigned int slot;
-+
-+ *data = ~0ul;
-+
-+ if ( !adjacent_handle(msix, addr + len - 1) )
-+ return X86EMUL_OKAY;
-+
-+ if ( VMSIX_ADDR_IN_RANGE(addr, vpci, VPCI_MSIX_PBA) &&
-+ !access_allowed(msix->pdev, addr, len) )
-+ /* PBA accesses must be aligned and 4 or 8 bytes in size. */
-+ return X86EMUL_OKAY;
-+
-+ slot = get_slot(vpci, addr);
-+ if ( slot >= ARRAY_SIZE(msix->table) )
-+ return X86EMUL_OKAY;
-+
-+ if ( unlikely(!IS_ALIGNED(addr, len)) )
-+ {
-+ unsigned int i;
-
-- pba = ioremap(vmsix_table_addr(vpci, VPCI_MSIX_PBA),
-- vmsix_table_size(vpci, VPCI_MSIX_PBA));
-- if ( !pba )
-- return read_atomic(&msix->pba);
-+ gprintk(XENLOG_DEBUG, "%pp: unaligned read to MSI-X related page\n",
-+ &msix->pdev->sbdf);
-+
-+ /*
-+ * Split unaligned accesses into byte sized ones. Shouldn't happen in
-+ * the first place, but devices shouldn't have registers in the same 4K
-+ * page as the MSIX tables either.
-+ *
-+ * It's unclear whether this could cause issues if a guest expects
-+ * registers to be accessed atomically, it better use an aligned access
-+ * if it has such expectations.
-+ */
-+ for ( i = 0; i < len; i++ )
-+ {
-+ unsigned long partial = ~0ul;
-+ int rc = adjacent_read(d, msix, addr + i, 1, &partial);
-+
-+ if ( rc != X86EMUL_OKAY )
-+ return rc;
-+
-+ *data &= ~(0xfful << (i * 8));
-+ *data |= (partial & 0xff) << (i * 8);
-+ }
-+
-+ return X86EMUL_OKAY;
-+ }
-
- spin_lock(&vpci->lock);
-- if ( !msix->pba )
-+ mem = get_table(vpci, slot);
-+ if ( !mem )
- {
-- write_atomic(&msix->pba, pba);
- spin_unlock(&vpci->lock);
-+ gprintk(XENLOG_WARNING,
-+ "%pp: unable to map MSI-X page, returning all bits set\n",
-+ &msix->pdev->sbdf);
-+ return X86EMUL_OKAY;
- }
-- else
-+
-+ switch ( len )
- {
-- spin_unlock(&vpci->lock);
-- iounmap(pba);
-+ case 1:
-+ *data = readb(mem + PAGE_OFFSET(addr));
-+ break;
-+
-+ case 2:
-+ *data = readw(mem + PAGE_OFFSET(addr));
-+ break;
-+
-+ case 4:
-+ *data = readl(mem + PAGE_OFFSET(addr));
-+ break;
-+
-+ case 8:
-+ *data = readq(mem + PAGE_OFFSET(addr));
-+ break;
-+
-+ default:
-+ ASSERT_UNREACHABLE();
- }
-+ spin_unlock(&vpci->lock);
-
-- return read_atomic(&msix->pba);
-+ return X86EMUL_OKAY;
- }
-
- static int msix_read(struct vcpu *v, unsigned long addr, unsigned int len,
-@@ -227,47 +368,11 @@ static int msix_read(struct vcpu *v, unsigned long addr, unsigned int len,
- if ( !msix )
- return X86EMUL_RETRY;
-
-- if ( !access_allowed(msix->pdev, addr, len) )
-- return X86EMUL_OKAY;
--
-- if ( VMSIX_ADDR_IN_RANGE(addr, msix->pdev->vpci, VPCI_MSIX_PBA) )
-- {
-- struct vpci *vpci = msix->pdev->vpci;
-- unsigned int idx = addr - vmsix_table_addr(vpci, VPCI_MSIX_PBA);
-- const void __iomem *pba = get_pba(vpci);
--
-- /*
-- * Access to PBA.
-- *
-- * TODO: note that this relies on having the PBA identity mapped to the
-- * guest address space. If this changes the address will need to be
-- * translated.
-- */
-- if ( !pba )
-- {
-- gprintk(XENLOG_WARNING,
-- "%pp: unable to map MSI-X PBA, report all pending\n",
-- &msix->pdev->sbdf);
-- return X86EMUL_OKAY;
-- }
--
-- switch ( len )
-- {
-- case 4:
-- *data = readl(pba + idx);
-- break;
--
-- case 8:
-- *data = readq(pba + idx);
-- break;
--
-- default:
-- ASSERT_UNREACHABLE();
-- break;
-- }
-+ if ( adjacent_handle(msix, addr) )
-+ return adjacent_read(d, msix, addr, len, data);
-
-+ if ( !access_allowed(msix->pdev, addr, len) )
- return X86EMUL_OKAY;
-- }
-
- spin_lock(&msix->pdev->vpci->lock);
- entry = get_entry(msix, addr);
-@@ -303,57 +408,103 @@ static int msix_read(struct vcpu *v, unsigned long addr, unsigned int len,
- return X86EMUL_OKAY;
- }
-
--static int msix_write(struct vcpu *v, unsigned long addr, unsigned int len,
-- unsigned long data)
-+static int adjacent_write(const struct domain *d, const struct vpci_msix *msix,
-+ unsigned long addr, unsigned int len,
-+ unsigned long data)
- {
-- const struct domain *d = v->domain;
-- struct vpci_msix *msix = msix_find(d, addr);
-- struct vpci_msix_entry *entry;
-- unsigned int offset;
-+ void __iomem *mem;
-+ struct vpci *vpci = msix->pdev->vpci;
-+ unsigned int slot;
-
-- if ( !msix )
-- return X86EMUL_RETRY;
-+ if ( !adjacent_handle(msix, addr + len - 1) )
-+ return X86EMUL_OKAY;
-
-- if ( !access_allowed(msix->pdev, addr, len) )
-+ /*
-+ * Only check start and end of the access because the size of the PBA is
-+ * assumed to be equal or bigger (8 bytes) than the length of any access
-+ * handled here.
-+ */
-+ if ( VMSIX_ADDR_IN_RANGE(addr, vpci, VPCI_MSIX_PBA) &&
-+ (!access_allowed(msix->pdev, addr, len) || !is_hardware_domain(d)) )
-+ /* Ignore writes to PBA for DomUs, it's undefined behavior. */
- return X86EMUL_OKAY;
-
-- if ( VMSIX_ADDR_IN_RANGE(addr, msix->pdev->vpci, VPCI_MSIX_PBA) )
-- {
-- /* Ignore writes to PBA for DomUs, it's behavior is undefined. */
-- if ( is_hardware_domain(d) )
-- {
-- struct vpci *vpci = msix->pdev->vpci;
-- unsigned int idx = addr - vmsix_table_addr(vpci, VPCI_MSIX_PBA);
-- const void __iomem *pba = get_pba(vpci);
-+ slot = get_slot(vpci, addr);
-+ if ( slot >= ARRAY_SIZE(msix->table) )
-+ return X86EMUL_OKAY;
-
-- if ( !pba )
-- {
-- /* Unable to map the PBA, ignore write. */
-- gprintk(XENLOG_WARNING,
-- "%pp: unable to map MSI-X PBA, write ignored\n",
-- &msix->pdev->sbdf);
-- return X86EMUL_OKAY;
-- }
-+ if ( unlikely(!IS_ALIGNED(addr, len)) )
-+ {
-+ unsigned int i;
-
-- switch ( len )
-- {
-- case 4:
-- writel(data, pba + idx);
-- break;
-+ gprintk(XENLOG_DEBUG, "%pp: unaligned write to MSI-X related page\n",
-+ &msix->pdev->sbdf);
-
-- case 8:
-- writeq(data, pba + idx);
-- break;
-+ for ( i = 0; i < len; i++ )
-+ {
-+ int rc = adjacent_write(d, msix, addr + i, 1, data >> (i * 8));
-
-- default:
-- ASSERT_UNREACHABLE();
-- break;
-- }
-+ if ( rc != X86EMUL_OKAY )
-+ return rc;
- }
-
- return X86EMUL_OKAY;
- }
-
-+ spin_lock(&vpci->lock);
-+ mem = get_table(vpci, slot);
-+ if ( !mem )
-+ {
-+ spin_unlock(&vpci->lock);
-+ gprintk(XENLOG_WARNING,
-+ "%pp: unable to map MSI-X page, dropping write\n",
-+ &msix->pdev->sbdf);
-+ return X86EMUL_OKAY;
-+ }
-+
-+ switch ( len )
-+ {
-+ case 1:
-+ writeb(data, mem + PAGE_OFFSET(addr));
-+ break;
-+
-+ case 2:
-+ writew(data, mem + PAGE_OFFSET(addr));
-+ break;
-+
-+ case 4:
-+ writel(data, mem + PAGE_OFFSET(addr));
-+ break;
-+
-+ case 8:
-+ writeq(data, mem + PAGE_OFFSET(addr));
-+ break;
-+
-+ default:
-+ ASSERT_UNREACHABLE();
-+ }
-+ spin_unlock(&vpci->lock);
-+
-+ return X86EMUL_OKAY;
-+}
-+
-+static int msix_write(struct vcpu *v, unsigned long addr, unsigned int len,
-+ unsigned long data)
-+{
-+ const struct domain *d = v->domain;
-+ struct vpci_msix *msix = msix_find(d, addr);
-+ struct vpci_msix_entry *entry;
-+ unsigned int offset;
-+
-+ if ( !msix )
-+ return X86EMUL_RETRY;
-+
-+ if ( adjacent_handle(msix, addr) )
-+ return adjacent_write(d, msix, addr, len, data);
-+
-+ if ( !access_allowed(msix->pdev, addr, len) )
-+ return X86EMUL_OKAY;
-+
- spin_lock(&msix->pdev->vpci->lock);
- entry = get_entry(msix, addr);
- offset = addr & (PCI_MSIX_ENTRY_SIZE - 1);
-@@ -482,6 +633,26 @@ int vpci_make_msix_hole(const struct pci_dev *pdev)
- }
- }
-
-+ if ( is_hardware_domain(d) )
-+ {
-+ /*
-+ * For dom0 only: remove any hypervisor mappings of the MSIX or PBA
-+ * related areas, as dom0 is capable of moving the position of the BARs
-+ * in the host address space.
-+ *
-+ * We rely on being called with the vPCI lock held once the domain is
-+ * running, so the maps are not in use.
-+ */
-+ for ( i = 0; i < ARRAY_SIZE(pdev->vpci->msix->table); i++ )
-+ if ( pdev->vpci->msix->table[i] )
-+ {
-+ /* If there are any maps, the domain must be running. */
-+ ASSERT(spin_is_locked(&pdev->vpci->lock));
-+ iounmap(pdev->vpci->msix->table[i]);
-+ pdev->vpci->msix->table[i] = NULL;
-+ }
-+ }
-+
- return 0;
- }
-
-diff --git a/xen/drivers/vpci/vpci.c b/xen/drivers/vpci/vpci.c
-index b9339f8f3e..60b5f45cd1 100644
---- a/xen/drivers/vpci/vpci.c
-+++ b/xen/drivers/vpci/vpci.c
-@@ -53,9 +53,12 @@ void vpci_remove_device(struct pci_dev *pdev)
- spin_unlock(&pdev->vpci->lock);
- if ( pdev->vpci->msix )
- {
-+ unsigned int i;
-+
- list_del(&pdev->vpci->msix->next);
-- if ( pdev->vpci->msix->pba )
-- iounmap(pdev->vpci->msix->pba);
-+ for ( i = 0; i < ARRAY_SIZE(pdev->vpci->msix->table); i++ )
-+ if ( pdev->vpci->msix->table[i] )
-+ iounmap(pdev->vpci->msix->table[i]);
- }
- xfree(pdev->vpci->msix);
- xfree(pdev->vpci->msi);
-diff --git a/xen/include/xen/vpci.h b/xen/include/xen/vpci.h
-index 755b4fd5c8..3326d9026e 100644
---- a/xen/include/xen/vpci.h
-+++ b/xen/include/xen/vpci.h
-@@ -129,8 +129,12 @@ struct vpci {
- bool enabled : 1;
- /* Masked? */
- bool masked : 1;
-- /* PBA map */
-- void __iomem *pba;
-+ /* Partial table map. */
-+#define VPCI_MSIX_TBL_HEAD 0
-+#define VPCI_MSIX_TBL_TAIL 1
-+#define VPCI_MSIX_PBA_HEAD 2
-+#define VPCI_MSIX_PBA_TAIL 3
-+ void __iomem *table[4];
- /* Entries. */
- struct vpci_msix_entry {
- uint64_t addr;
---
-2.40.0
-
diff --git a/0055-ns16550-correct-name-value-pair-parsing-for-PCI-port.patch b/0055-ns16550-correct-name-value-pair-parsing-for-PCI-port.patch
deleted file mode 100644
index 9c05f3a..0000000
--- a/0055-ns16550-correct-name-value-pair-parsing-for-PCI-port.patch
+++ /dev/null
@@ -1,59 +0,0 @@
-From 06264af090ac69a95cdadbc261cc82d964dcb568 Mon Sep 17 00:00:00 2001
-From: Jan Beulich <jbeulich@suse.com>
-Date: Fri, 31 Mar 2023 08:42:02 +0200
-Subject: [PATCH 55/61] ns16550: correct name/value pair parsing for PCI
- port/bridge
-
-First of all these were inverted: "bridge=" caused the port coordinates
-to be established, while "port=" controlled the bridge coordinates. And
-then the error messages being identical also wasn't helpful. While
-correcting this also move both case blocks close together.
-
-Fixes: 97fd49a7e074 ("ns16550: add support for UART parameters to be specifed with name-value pairs")
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
-master commit: e692b22230b411d762ac9e278a398e28df474eae
-master date: 2023-03-29 14:55:37 +0200
----
- xen/drivers/char/ns16550.c | 16 ++++++++--------
- 1 file changed, 8 insertions(+), 8 deletions(-)
-
-diff --git a/xen/drivers/char/ns16550.c b/xen/drivers/char/ns16550.c
-index 5dd4d723f5..3651e0c0d4 100644
---- a/xen/drivers/char/ns16550.c
-+++ b/xen/drivers/char/ns16550.c
-@@ -1536,13 +1536,6 @@ static bool __init parse_namevalue_pairs(char *str, struct ns16550 *uart)
- break;
-
- #ifdef CONFIG_HAS_PCI
-- case bridge_bdf:
-- if ( !parse_pci(param_value, NULL, &uart->ps_bdf[0],
-- &uart->ps_bdf[1], &uart->ps_bdf[2]) )
-- PARSE_ERR_RET("Bad port PCI coordinates\n");
-- uart->ps_bdf_enable = true;
-- break;
--
- case device:
- if ( strncmp(param_value, "pci", 3) == 0 )
- {
-@@ -1557,9 +1550,16 @@ static bool __init parse_namevalue_pairs(char *str, struct ns16550 *uart)
- break;
-
- case port_bdf:
-+ if ( !parse_pci(param_value, NULL, &uart->ps_bdf[0],
-+ &uart->ps_bdf[1], &uart->ps_bdf[2]) )
-+ PARSE_ERR_RET("Bad port PCI coordinates\n");
-+ uart->ps_bdf_enable = true;
-+ break;
-+
-+ case bridge_bdf:
- if ( !parse_pci(param_value, NULL, &uart->pb_bdf[0],
- &uart->pb_bdf[1], &uart->pb_bdf[2]) )
-- PARSE_ERR_RET("Bad port PCI coordinates\n");
-+ PARSE_ERR_RET("Bad bridge PCI coordinates\n");
- uart->pb_bdf_enable = true;
- break;
- #endif
---
-2.40.0
-
diff --git a/0056-bump-default-SeaBIOS-version-to-1.16.0.patch b/0056-bump-default-SeaBIOS-version-to-1.16.0.patch
deleted file mode 100644
index 37d9b67..0000000
--- a/0056-bump-default-SeaBIOS-version-to-1.16.0.patch
+++ /dev/null
@@ -1,28 +0,0 @@
-From 2a4d327387601b60c9844a5b0cc44de28792ea52 Mon Sep 17 00:00:00 2001
-From: Jan Beulich <jbeulich@suse.com>
-Date: Fri, 6 May 2022 14:46:52 +0200
-Subject: [PATCH 56/61] bump default SeaBIOS version to 1.16.0
-
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Acked-by: Julien Grall <jgrall@amazon.com>
-(cherry picked from commit 944e389daa133dd310d87c4eebacba9f6da76018)
----
- Config.mk | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/Config.mk b/Config.mk
-index 1215c2725b..073715c28d 100644
---- a/Config.mk
-+++ b/Config.mk
-@@ -241,7 +241,7 @@ OVMF_UPSTREAM_REVISION ?= 7b4a99be8a39c12d3a7fc4b8db9f0eab4ac688d5
- QEMU_UPSTREAM_REVISION ?= qemu-xen-4.16.3
- MINIOS_UPSTREAM_REVISION ?= xen-RELEASE-4.16.3
-
--SEABIOS_UPSTREAM_REVISION ?= rel-1.14.0
-+SEABIOS_UPSTREAM_REVISION ?= rel-1.16.0
-
- ETHERBOOT_NICS ?= rtl8139 8086100e
-
---
-2.40.0
-
diff --git a/0057-CI-Drop-automation-configs.patch b/0057-CI-Drop-automation-configs.patch
deleted file mode 100644
index d726468..0000000
--- a/0057-CI-Drop-automation-configs.patch
+++ /dev/null
@@ -1,87 +0,0 @@
-From 657dc5f5f6269008fd7484ca7cca723e21455483 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Thu, 29 Dec 2022 15:39:13 +0000
-Subject: [PATCH 57/61] CI: Drop automation/configs/
-
-Having 3 extra hypervisor builds on the end of a full build is deeply
-confusing to debug if one of them fails, because the .config file presented in
-the artefacts is not the one which caused a build failure. Also, the log
-tends to be truncated in the UI.
-
-PV-only is tested as part of PV-Shim in a full build anyway, so doesn't need
-repeating. HVM-only and neither appear frequently in randconfig, so drop all
-the logic here to simplify things.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Michal Orzel <michal.orzel@amd.com>
-Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
-(cherry picked from commit 7b20009a812f26e74bdbde2ab96165376b3dad34)
----
- automation/configs/x86/hvm_only_config | 3 ---
- automation/configs/x86/no_hvm_pv_config | 3 ---
- automation/configs/x86/pv_only_config | 3 ---
- automation/scripts/build | 21 ---------------------
- 4 files changed, 30 deletions(-)
- delete mode 100644 automation/configs/x86/hvm_only_config
- delete mode 100644 automation/configs/x86/no_hvm_pv_config
- delete mode 100644 automation/configs/x86/pv_only_config
-
-diff --git a/automation/configs/x86/hvm_only_config b/automation/configs/x86/hvm_only_config
-deleted file mode 100644
-index 9efbddd535..0000000000
---- a/automation/configs/x86/hvm_only_config
-+++ /dev/null
-@@ -1,3 +0,0 @@
--CONFIG_HVM=y
--# CONFIG_PV is not set
--# CONFIG_DEBUG is not set
-diff --git a/automation/configs/x86/no_hvm_pv_config b/automation/configs/x86/no_hvm_pv_config
-deleted file mode 100644
-index 0bf6a8e468..0000000000
---- a/automation/configs/x86/no_hvm_pv_config
-+++ /dev/null
-@@ -1,3 +0,0 @@
--# CONFIG_HVM is not set
--# CONFIG_PV is not set
--# CONFIG_DEBUG is not set
-diff --git a/automation/configs/x86/pv_only_config b/automation/configs/x86/pv_only_config
-deleted file mode 100644
-index e9d8b4a7c7..0000000000
---- a/automation/configs/x86/pv_only_config
-+++ /dev/null
-@@ -1,3 +0,0 @@
--CONFIG_PV=y
--# CONFIG_HVM is not set
--# CONFIG_DEBUG is not set
-diff --git a/automation/scripts/build b/automation/scripts/build
-index 281f8b1fcc..2c807fa397 100755
---- a/automation/scripts/build
-+++ b/automation/scripts/build
-@@ -73,24 +73,3 @@ if [[ "${XEN_TARGET_ARCH}" != "x86_32" ]]; then
- cp -r dist binaries/
- fi
- fi
--
--if [[ "${hypervisor_only}" == "y" ]]; then
-- # If we are build testing a specific Kconfig exit now, there's no point in
-- # testing all the possible configs.
-- exit 0
--fi
--
--# Build all the configs we care about
--case ${XEN_TARGET_ARCH} in
-- x86_64) arch=x86 ;;
-- *) exit 0 ;;
--esac
--
--cfg_dir="automation/configs/${arch}"
--for cfg in `ls ${cfg_dir}`; do
-- echo "Building $cfg"
-- make -j$(nproc) -C xen clean
-- rm -f xen/.config
-- make -C xen KBUILD_DEFCONFIG=../../../../${cfg_dir}/${cfg} XEN_CONFIG_EXPERT=y defconfig
-- make -j$(nproc) -C xen XEN_CONFIG_EXPERT=y
--done
---
-2.40.0
-
diff --git a/0058-automation-Switch-arm32-cross-builds-to-run-on-arm64.patch b/0058-automation-Switch-arm32-cross-builds-to-run-on-arm64.patch
deleted file mode 100644
index 92d65ec..0000000
--- a/0058-automation-Switch-arm32-cross-builds-to-run-on-arm64.patch
+++ /dev/null
@@ -1,87 +0,0 @@
-From 37800cf8ab7806e506b96a13cad0fb395d86663a Mon Sep 17 00:00:00 2001
-From: Michal Orzel <michal.orzel@amd.com>
-Date: Tue, 14 Feb 2023 16:38:38 +0100
-Subject: [PATCH 58/61] automation: Switch arm32 cross builds to run on arm64
-
-Due to the limited x86 CI resources slowing down the whole pipeline,
-switch the arm32 cross builds to be executed on arm64 which is much more
-capable. For that, rename the existing debian container dockerfile
-from unstable-arm32-gcc to unstable-arm64v8-arm32-gcc and use
-arm64v8/debian:unstable as an image. Note, that we cannot use the same
-container name as we have to keep the backwards compatibility.
-Take the opportunity to remove extra empty line at the end of a file.
-
-Modify the tag of .arm32-cross-build-tmpl to arm64 and update the build
-jobs accordingly.
-
-Signed-off-by: Michal Orzel <michal.orzel@amd.com>
-Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
-(cherry picked from commit a35fccc8df93de7154dba87db6e7bcf391e9d51c)
----
- ...ockerfile => unstable-arm64v8-arm32-gcc.dockerfile} | 3 +--
- automation/gitlab-ci/build.yaml | 10 +++++-----
- 2 files changed, 6 insertions(+), 7 deletions(-)
- rename automation/build/debian/{unstable-arm32-gcc.dockerfile => unstable-arm64v8-arm32-gcc.dockerfile} (94%)
-
-diff --git a/automation/build/debian/unstable-arm32-gcc.dockerfile b/automation/build/debian/unstable-arm64v8-arm32-gcc.dockerfile
-similarity index 94%
-rename from automation/build/debian/unstable-arm32-gcc.dockerfile
-rename to automation/build/debian/unstable-arm64v8-arm32-gcc.dockerfile
-index b41a57f197..11860425a6 100644
---- a/automation/build/debian/unstable-arm32-gcc.dockerfile
-+++ b/automation/build/debian/unstable-arm64v8-arm32-gcc.dockerfile
-@@ -1,4 +1,4 @@
--FROM debian:unstable
-+FROM arm64v8/debian:unstable
- LABEL maintainer.name="The Xen Project" \
- maintainer.email="xen-devel@lists.xenproject.org"
-
-@@ -21,4 +21,3 @@ RUN apt-get update && \
- apt-get autoremove -y && \
- apt-get clean && \
- rm -rf /var/lib/apt/lists* /tmp/* /var/tmp/*
--
-diff --git a/automation/gitlab-ci/build.yaml b/automation/gitlab-ci/build.yaml
-index 06a75a8c5a..f66fbca8a7 100644
---- a/automation/gitlab-ci/build.yaml
-+++ b/automation/gitlab-ci/build.yaml
-@@ -123,7 +123,7 @@
- variables:
- XEN_TARGET_ARCH: arm32
- tags:
-- - x86_64
-+ - arm64
-
- .arm32-cross-build:
- extends: .arm32-cross-build-tmpl
-@@ -497,23 +497,23 @@ alpine-3.12-clang-debug:
- debian-unstable-gcc-arm32:
- extends: .gcc-arm32-cross-build
- variables:
-- CONTAINER: debian:unstable-arm32-gcc
-+ CONTAINER: debian:unstable-arm64v8-arm32-gcc
-
- debian-unstable-gcc-arm32-debug:
- extends: .gcc-arm32-cross-build-debug
- variables:
-- CONTAINER: debian:unstable-arm32-gcc
-+ CONTAINER: debian:unstable-arm64v8-arm32-gcc
-
- debian-unstable-gcc-arm32-randconfig:
- extends: .gcc-arm32-cross-build
- variables:
-- CONTAINER: debian:unstable-arm32-gcc
-+ CONTAINER: debian:unstable-arm64v8-arm32-gcc
- RANDCONFIG: y
-
- debian-unstable-gcc-arm32-debug-randconfig:
- extends: .gcc-arm32-cross-build-debug
- variables:
-- CONTAINER: debian:unstable-arm32-gcc
-+ CONTAINER: debian:unstable-arm64v8-arm32-gcc
- RANDCONFIG: y
-
- # Arm builds
---
-2.40.0
-
diff --git a/0059-automation-Remove-CentOS-7.2-containers-and-builds.patch b/0059-automation-Remove-CentOS-7.2-containers-and-builds.patch
deleted file mode 100644
index 8d58eea..0000000
--- a/0059-automation-Remove-CentOS-7.2-containers-and-builds.patch
+++ /dev/null
@@ -1,145 +0,0 @@
-From a4d901580b2ab3133bca13159b790914c217b0e2 Mon Sep 17 00:00:00 2001
-From: Anthony PERARD <anthony.perard@citrix.com>
-Date: Tue, 21 Feb 2023 16:55:36 +0000
-Subject: [PATCH 59/61] automation: Remove CentOS 7.2 containers and builds
-
-We already have a container which track the latest CentOS 7, no need
-for this one as well.
-
-Also, 7.2 have outdated root certificate which prevent connection to
-website which use Let's Encrypt.
-
-Signed-off-by: Anthony PERARD <anthony.perard@citrix.com>
-Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
-(cherry picked from commit ba512629f76dfddb39ea9133ee51cdd9e392a927)
----
- automation/build/centos/7.2.dockerfile | 52 -------------------------
- automation/build/centos/CentOS-7.2.repo | 35 -----------------
- automation/gitlab-ci/build.yaml | 10 -----
- 3 files changed, 97 deletions(-)
- delete mode 100644 automation/build/centos/7.2.dockerfile
- delete mode 100644 automation/build/centos/CentOS-7.2.repo
-
-diff --git a/automation/build/centos/7.2.dockerfile b/automation/build/centos/7.2.dockerfile
-deleted file mode 100644
-index 4baa097e31..0000000000
---- a/automation/build/centos/7.2.dockerfile
-+++ /dev/null
-@@ -1,52 +0,0 @@
--FROM centos:7.2.1511
--LABEL maintainer.name="The Xen Project" \
-- maintainer.email="xen-devel@lists.xenproject.org"
--
--# ensure we only get bits from the vault for
--# the version we want
--COPY CentOS-7.2.repo /etc/yum.repos.d/CentOS-Base.repo
--
--# install EPEL for dev86, xz-devel and possibly other packages
--RUN yum -y install https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm && \
-- yum clean all
--
--RUN mkdir /build
--WORKDIR /build
--
--# work around https://github.com/moby/moby/issues/10180
--# and install Xen depends
--RUN rpm --rebuilddb && \
-- yum -y install \
-- yum-plugin-ovl \
-- gcc \
-- gcc-c++ \
-- ncurses-devel \
-- zlib-devel \
-- openssl-devel \
-- python-devel \
-- libuuid-devel \
-- pkgconfig \
-- # gettext for Xen < 4.13
-- gettext \
-- flex \
-- bison \
-- libaio-devel \
-- glib2-devel \
-- yajl-devel \
-- pixman-devel \
-- glibc-devel \
-- # glibc-devel.i686 for Xen < 4.15
-- glibc-devel.i686 \
-- make \
-- binutils \
-- git \
-- wget \
-- acpica-tools \
-- python-markdown \
-- patch \
-- checkpolicy \
-- dev86 \
-- xz-devel \
-- bzip2 \
-- nasm \
-- && yum clean all
-diff --git a/automation/build/centos/CentOS-7.2.repo b/automation/build/centos/CentOS-7.2.repo
-deleted file mode 100644
-index 4da27faeb5..0000000000
---- a/automation/build/centos/CentOS-7.2.repo
-+++ /dev/null
-@@ -1,35 +0,0 @@
--# CentOS-Base.repo
--#
--# This is a replacement file that pins things to just use CentOS 7.2
--# from the CentOS Vault.
--#
--
--[base]
--name=CentOS-7.2.1511 - Base
--baseurl=http://vault.centos.org/7.2.1511/os/$basearch/
--gpgcheck=1
--gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-CentOS-7
--
--#released updates
--[updates]
--name=CentOS-7.2.1511 - Updates
--baseurl=http://vault.centos.org/7.2.1511/updates/$basearch/
--gpgcheck=1
--gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-CentOS-7
--
--#additional packages that may be useful
--[extras]
--name=CentOS-7.2.1511 - Extras
--baseurl=http://vault.centos.org/7.2.1511/extras/$basearch/
--gpgcheck=1
--gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-CentOS-7
--
--#additional packages that extend functionality of existing packages
--[centosplus]
--name=CentOS-7.2.1511 - Plus
--baseurl=http://vault.centos.org/7.2.1511/centosplus/$basearch/
--gpgcheck=1
--gpgcheck=1
--enabled=0
--gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-CentOS-7
--
-diff --git a/automation/gitlab-ci/build.yaml b/automation/gitlab-ci/build.yaml
-index f66fbca8a7..bc1a732069 100644
---- a/automation/gitlab-ci/build.yaml
-+++ b/automation/gitlab-ci/build.yaml
-@@ -184,16 +184,6 @@ archlinux-gcc-debug:
- variables:
- CONTAINER: archlinux:current
-
--centos-7-2-gcc:
-- extends: .gcc-x86-64-build
-- variables:
-- CONTAINER: centos:7.2
--
--centos-7-2-gcc-debug:
-- extends: .gcc-x86-64-build-debug
-- variables:
-- CONTAINER: centos:7.2
--
- centos-7-gcc:
- extends: .gcc-x86-64-build
- variables:
---
-2.40.0
-
diff --git a/0060-automation-Remove-non-debug-x86_32-build-jobs.patch b/0060-automation-Remove-non-debug-x86_32-build-jobs.patch
deleted file mode 100644
index c5516be..0000000
--- a/0060-automation-Remove-non-debug-x86_32-build-jobs.patch
+++ /dev/null
@@ -1,67 +0,0 @@
-From 27974fde92850419e385ad0355997c54d78046f2 Mon Sep 17 00:00:00 2001
-From: Anthony PERARD <anthony.perard@citrix.com>
-Date: Fri, 24 Feb 2023 17:29:15 +0000
-Subject: [PATCH 60/61] automation: Remove non-debug x86_32 build jobs
-
-In the interest of having less jobs, we remove the x86_32 build jobs
-that do release build. Debug build is very likely to be enough to find
-32bit build issues.
-
-Signed-off-by: Anthony PERARD <anthony.perard@citrix.com>
-Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
-(cherry picked from commit 7b66792ea7f77fb9e587e1e9c530a7c869eecba1)
----
- automation/gitlab-ci/build.yaml | 20 --------------------
- 1 file changed, 20 deletions(-)
-
-diff --git a/automation/gitlab-ci/build.yaml b/automation/gitlab-ci/build.yaml
-index bc1a732069..4b51ad9e34 100644
---- a/automation/gitlab-ci/build.yaml
-+++ b/automation/gitlab-ci/build.yaml
-@@ -264,21 +264,11 @@ debian-stretch-gcc-debug:
- variables:
- CONTAINER: debian:stretch
-
--debian-stretch-32-clang:
-- extends: .clang-x86-32-build
-- variables:
-- CONTAINER: debian:stretch-i386
--
- debian-stretch-32-clang-debug:
- extends: .clang-x86-32-build-debug
- variables:
- CONTAINER: debian:stretch-i386
-
--debian-stretch-32-gcc:
-- extends: .gcc-x86-32-build
-- variables:
-- CONTAINER: debian:stretch-i386
--
- debian-stretch-32-gcc-debug:
- extends: .gcc-x86-32-build-debug
- variables:
-@@ -316,21 +306,11 @@ debian-unstable-gcc-debug-randconfig:
- CONTAINER: debian:unstable
- RANDCONFIG: y
-
--debian-unstable-32-clang:
-- extends: .clang-x86-32-build
-- variables:
-- CONTAINER: debian:unstable-i386
--
- debian-unstable-32-clang-debug:
- extends: .clang-x86-32-build-debug
- variables:
- CONTAINER: debian:unstable-i386
-
--debian-unstable-32-gcc:
-- extends: .gcc-x86-32-build
-- variables:
-- CONTAINER: debian:unstable-i386
--
- debian-unstable-32-gcc-debug:
- extends: .gcc-x86-32-build-debug
- variables:
---
-2.40.0
-
diff --git a/0061-CI-Remove-llvm-8-from-the-Debian-Stretch-container.patch b/0061-CI-Remove-llvm-8-from-the-Debian-Stretch-container.patch
deleted file mode 100644
index 9170382..0000000
--- a/0061-CI-Remove-llvm-8-from-the-Debian-Stretch-container.patch
+++ /dev/null
@@ -1,103 +0,0 @@
-From 31627a059c2e186f4ad12d171d964b09abe8a4a9 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Fri, 24 Mar 2023 17:59:56 +0000
-Subject: [PATCH 61/61] CI: Remove llvm-8 from the Debian Stretch container
-
-For similar reasons to c/s a6b1e2b80fe20. While this container is still
-build-able for now, all the other problems with explicitly-versioned compilers
-remain.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
-(cherry picked from commit 7a298375721636290a57f31bb0f7c2a5a38956a4)
----
- automation/build/debian/stretch-llvm-8.list | 3 ---
- automation/build/debian/stretch.dockerfile | 12 ---------
- automation/gitlab-ci/build.yaml | 27 ---------------------
- 3 files changed, 42 deletions(-)
- delete mode 100644 automation/build/debian/stretch-llvm-8.list
-
-diff --git a/automation/build/debian/stretch-llvm-8.list b/automation/build/debian/stretch-llvm-8.list
-deleted file mode 100644
-index 09fe843fb2..0000000000
---- a/automation/build/debian/stretch-llvm-8.list
-+++ /dev/null
-@@ -1,3 +0,0 @@
--# Strech LLVM 8 repos
--deb http://apt.llvm.org/stretch/ llvm-toolchain-stretch-8 main
--deb-src http://apt.llvm.org/stretch/ llvm-toolchain-stretch-8 main
-diff --git a/automation/build/debian/stretch.dockerfile b/automation/build/debian/stretch.dockerfile
-index da6aa874dd..9861acbcc3 100644
---- a/automation/build/debian/stretch.dockerfile
-+++ b/automation/build/debian/stretch.dockerfile
-@@ -53,15 +53,3 @@ RUN apt-get update && \
- apt-get autoremove -y && \
- apt-get clean && \
- rm -rf /var/lib/apt/lists* /tmp/* /var/tmp/*
--
--RUN wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add -
--COPY stretch-llvm-8.list /etc/apt/sources.list.d/
--
--RUN apt-get update && \
-- apt-get --quiet --yes install \
-- clang-8 \
-- lld-8 \
-- && \
-- apt-get autoremove -y && \
-- apt-get clean && \
-- rm -rf /var/lib/apt/lists* /tmp/* /var/tmp/*
-diff --git a/automation/gitlab-ci/build.yaml b/automation/gitlab-ci/build.yaml
-index 4b51ad9e34..fd8034b429 100644
---- a/automation/gitlab-ci/build.yaml
-+++ b/automation/gitlab-ci/build.yaml
-@@ -27,13 +27,6 @@
- CXX: clang++
- clang: y
-
--.clang-8-tmpl:
-- variables: &clang-8
-- CC: clang-8
-- CXX: clang++-8
-- LD: ld.lld-8
-- clang: y
--
- .x86-64-build-tmpl:
- <<: *build
- variables:
-@@ -98,16 +91,6 @@
- variables:
- <<: *clang
-
--.clang-8-x86-64-build:
-- extends: .x86-64-build
-- variables:
-- <<: *clang-8
--
--.clang-8-x86-64-build-debug:
-- extends: .x86-64-build-debug
-- variables:
-- <<: *clang-8
--
- .clang-x86-32-build:
- extends: .x86-32-build
- variables:
-@@ -244,16 +227,6 @@ debian-stretch-clang-debug:
- variables:
- CONTAINER: debian:stretch
-
--debian-stretch-clang-8:
-- extends: .clang-8-x86-64-build
-- variables:
-- CONTAINER: debian:stretch
--
--debian-stretch-clang-8-debug:
-- extends: .clang-8-x86-64-build-debug
-- variables:
-- CONTAINER: debian:stretch
--
- debian-stretch-gcc:
- extends: .gcc-x86-64-build
- variables:
---
-2.40.0
-
diff --git a/info.txt b/info.txt
index c92b6d7..6fb1378 100644
--- a/info.txt
+++ b/info.txt
@@ -1,6 +1,6 @@
-Xen upstream patchset #0 for 4.16.4-pre
+Xen upstream patchset #0 for 4.16.6-pre
Containing patches from
-RELEASE-4.16.3 (08c42cec2f3dbb8d1df62c2ad4945d127b418fd6)
+RELEASE-4.16.5 (177c7edf16099ff4d804f4ed4d698233b372f334)
to
-staging-4.16 (4ad5975d4e35635f03d2cb9e86292c0daeabd75f)
+staging-4.16 (29efce0f8f10e381417a61f2f9988b40d4f6bcf0)