summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'trunk/2.6.22/20019_15181-dma-tracking.patch1')
-rw-r--r--trunk/2.6.22/20019_15181-dma-tracking.patch1551
1 files changed, 551 insertions, 0 deletions
diff --git a/trunk/2.6.22/20019_15181-dma-tracking.patch1 b/trunk/2.6.22/20019_15181-dma-tracking.patch1
new file mode 100644
index 0000000..2bf8906
--- /dev/null
+++ b/trunk/2.6.22/20019_15181-dma-tracking.patch1
@@ -0,0 +1,551 @@
+# HG changeset 15181+33+41 patch
+# User kfraser@localhost.localdomain
+# Date 1180518373 -3600
+# Node ID 45f939d0c72493d237783419996bbca0132551df
+# Parent 1f7a6456c330272a3cec13b31fc1ba9b4db898ec
+Subject: gnttab: Add basic DMA tracking
+
+This patch adds basic tracking of outstanding DMA requests on
+grant table entries marked as PageForeign.
+
+When a PageForeign struct page is about to be mapped for DMA,
+we set its map count to 1 (or zero in actual value). This is
+then checked for when we need to free a grant table entry early
+to ensure that we don't free an entry that's currently used for
+DMA.
+
+So any entry that has been marked for DMA will not be freed early.
+
+If the unmapping API had a struct page (which exists for the sg
+case) then we could do this properly.
+
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+
+(added the interfacing bits from 15180)
+
+Subject: gnttab: Fix copy_grant_page race with seqlock
+
+Previously gnttab_copy_grant_page would always unmap the grant table
+entry, even if DMA operations were outstanding. This would allow a
+hostile guest to free a page still used by DMA to the hypervisor.
+
+This patch fixes this by making sure that we don't free the grant
+table entry if a DMA operation has taken place. To achieve this a
+seqlock is used to synchronise the DMA operations and
+copy_grant_page.
+
+The DMA operations use the read side of the seqlock so performance
+should be largely unaffected.
+
+Thanks to Isaku Yamahata for noticing the race condition.
+
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+
+Subject: Make dma address conversion logic of gnttab dma arch specific.
+
+gnttab_dma_map_page() and gnttab_dma_unmap_page() uses machine address
+with dma address interchangebly. However it doesn't work with auto
+translated mode enabled (i.e. on ia64) because
+
+- bus address space(dma_addr_t) is different from machine address
+ space(maddr_t).
+ With the terminology in xen/include/public/mm.h,
+ dma_addr_t is maddr and maddr_t is gmaddr.
+ So they should be handled differently with auto translated physmap
+ mode
+ enabled.
+
+- dma address conversion depends on dma api implementation and
+ its paravirtualization.
+ "pfn_valid(mfn_to_local_pfn(maddr >> PAGE_SHIFT)" check in
+ gnttab_dma_map_page() doesn't make sense with auto translate physmap
+ mode enabled.
+
+To address those issues, split those logic from gnttab_dma_map_page()
+and gnttab_dma_unmap_page(), and put it into arch specific files.
+This patch doesn't change the already existing x86 logic.
+
+Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
+
+Acked-by: jbeulich@novell.com
+
+---
+ arch/i386/kernel/pci-dma-xen.c | 19 +++-
+ arch/i386/kernel/swiotlb.c | 27 ++++-
+ drivers/xen/core/gnttab.c | 124 +++++++++++++++++++++++++++
+ include/asm-i386/mach-xen/asm/gnttab_dma.h | 41 ++++++++
+ include/asm-x86_64/mach-xen/asm/gnttab_dma.h | 1
+ include/xen/gnttab.h | 28 ++++++
+ include/xen/interface/grant_table.h | 23 +++++
+ 7 files changed, 252 insertions(+), 11 deletions(-)
+
+--- a/arch/i386/kernel/pci-dma-xen.c 2007-08-27 14:01:24.000000000 -0400
++++ b/arch/i386/kernel/pci-dma-xen.c 2007-08-27 14:02:07.000000000 -0400
+@@ -15,9 +15,11 @@
+ #include <linux/version.h>
+ #include <asm/io.h>
+ #include <xen/balloon.h>
++#include <xen/gnttab.h>
+ #include <asm/swiotlb.h>
+ #include <asm/tlbflush.h>
+ #include <asm-i386/mach-xen/asm/swiotlb.h>
++#include <asm-i386/mach-xen/asm/gnttab_dma.h>
+ #include <asm/bug.h>
+
+ #ifdef __x86_64__
+@@ -90,7 +92,7 @@ dma_map_sg(struct device *hwdev, struct
+ } else {
+ for (i = 0; i < nents; i++ ) {
+ sg[i].dma_address =
+- page_to_bus(sg[i].page) + sg[i].offset;
++ gnttab_dma_map_page(sg[i].page) + sg[i].offset;
+ sg[i].dma_length = sg[i].length;
+ BUG_ON(!sg[i].page);
+ IOMMU_BUG_ON(address_needs_mapping(
+@@ -108,9 +110,15 @@ void
+ dma_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents,
+ enum dma_data_direction direction)
+ {
++ int i;
++
+ BUG_ON(direction == DMA_NONE);
+ if (swiotlb)
+ swiotlb_unmap_sg(hwdev, sg, nents, direction);
++ else {
++ for (i = 0; i < nents; i++ )
++ gnttab_dma_unmap_page(sg[i].dma_address);
++ }
+ }
+ EXPORT_SYMBOL(dma_unmap_sg);
+
+@@ -127,7 +135,7 @@ dma_map_page(struct device *dev, struct
+ dma_addr = swiotlb_map_page(
+ dev, page, offset, size, direction);
+ } else {
+- dma_addr = page_to_bus(page) + offset;
++ dma_addr = gnttab_dma_map_page(page) + offset;
+ IOMMU_BUG_ON(address_needs_mapping(dev, dma_addr));
+ }
+
+@@ -142,6 +150,8 @@ dma_unmap_page(struct device *dev, dma_a
+ BUG_ON(direction == DMA_NONE);
+ if (swiotlb)
+ swiotlb_unmap_page(dev, dma_address, size, direction);
++ else
++ gnttab_dma_unmap_page(dma_address);
+ }
+ EXPORT_SYMBOL(dma_unmap_page);
+ #endif /* CONFIG_HIGHMEM */
+@@ -326,7 +336,8 @@ dma_map_single(struct device *dev, void
+ if (swiotlb) {
+ dma = swiotlb_map_single(dev, ptr, size, direction);
+ } else {
+- dma = virt_to_bus(ptr);
++ dma = gnttab_dma_map_page(virt_to_page(ptr)) +
++ offset_in_page(ptr);
+ IOMMU_BUG_ON(range_straddles_page_boundary(ptr, size));
+ IOMMU_BUG_ON(address_needs_mapping(dev, dma));
+ }
+@@ -344,6 +355,8 @@ dma_unmap_single(struct device *dev, dma
+ BUG();
+ if (swiotlb)
+ swiotlb_unmap_single(dev, dma_addr, size, direction);
++ else
++ gnttab_dma_unmap_page(dma_addr);
+ }
+ EXPORT_SYMBOL(dma_unmap_single);
+
+--- a/arch/i386/kernel/swiotlb.c 2007-08-27 14:01:25.000000000 -0400
++++ b/arch/i386/kernel/swiotlb.c 2007-08-27 14:02:07.000000000 -0400
+@@ -25,15 +25,15 @@
+ #include <asm/pci.h>
+ #include <asm/dma.h>
+ #include <asm/uaccess.h>
++#include <xen/gnttab.h>
+ #include <xen/interface/memory.h>
++#include <asm-i386/mach-xen/asm/gnttab_dma.h>
+
+ int swiotlb;
+ EXPORT_SYMBOL(swiotlb);
+
+ #define OFFSET(val,align) ((unsigned long)((val) & ( (align) - 1)))
+
+-#define SG_ENT_PHYS_ADDRESS(sg) (page_to_bus((sg)->page) + (sg)->offset)
+-
+ /*
+ * Maximum allowable number of contiguous slabs to map,
+ * must be a power of 2. What is the appropriate value ?
+@@ -468,7 +468,8 @@ swiotlb_full(struct device *dev, size_t
+ dma_addr_t
+ swiotlb_map_single(struct device *hwdev, void *ptr, size_t size, int dir)
+ {
+- dma_addr_t dev_addr = virt_to_bus(ptr);
++ dma_addr_t dev_addr = gnttab_dma_map_page(virt_to_page(ptr)) +
++ offset_in_page(ptr);
+ void *map;
+ struct phys_addr buffer;
+
+@@ -486,6 +487,7 @@ swiotlb_map_single(struct device *hwdev,
+ /*
+ * Oh well, have to allocate and map a bounce buffer.
+ */
++ gnttab_dma_unmap_page(dev_addr);
+ buffer.page = virt_to_page(ptr);
+ buffer.offset = (unsigned long)ptr & ~PAGE_MASK;
+ map = map_single(hwdev, buffer, size, dir);
+@@ -513,6 +515,8 @@ swiotlb_unmap_single(struct device *hwde
+ BUG_ON(dir == DMA_NONE);
+ if (in_swiotlb_aperture(dev_addr))
+ unmap_single(hwdev, bus_to_virt(dev_addr), size, dir);
++ else
++ gnttab_dma_unmap_page(dev_addr);
+ }
+
+ /*
+@@ -571,8 +575,10 @@ swiotlb_map_sg(struct device *hwdev, str
+ BUG_ON(dir == DMA_NONE);
+
+ for (i = 0; i < nelems; i++, sg++) {
+- dev_addr = SG_ENT_PHYS_ADDRESS(sg);
++ dev_addr = gnttab_dma_map_page(sg->page) + sg->offset;
++
+ if (address_needs_mapping(hwdev, dev_addr)) {
++ gnttab_dma_unmap_page(dev_addr);
+ buffer.page = sg->page;
+ buffer.offset = sg->offset;
+ map = map_single(hwdev, buffer, sg->length, dir);
+@@ -605,10 +611,12 @@ swiotlb_unmap_sg(struct device *hwdev, s
+ BUG_ON(dir == DMA_NONE);
+
+ for (i = 0; i < nelems; i++, sg++)
+- if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg))
++ if (in_swiotlb_aperture(sg->dma_address))
+ unmap_single(hwdev,
+ (void *)bus_to_virt(sg->dma_address),
+ sg->dma_length, dir);
++ else
++ gnttab_dma_unmap_page(sg->dma_address);
+ }
+
+ /*
+@@ -627,7 +635,7 @@ swiotlb_sync_sg_for_cpu(struct device *h
+ BUG_ON(dir == DMA_NONE);
+
+ for (i = 0; i < nelems; i++, sg++)
+- if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg))
++ if (in_swiotlb_aperture(sg->dma_address))
+ sync_single(hwdev,
+ (void *)bus_to_virt(sg->dma_address),
+ sg->dma_length, dir);
+@@ -642,7 +650,7 @@ swiotlb_sync_sg_for_device(struct device
+ BUG_ON(dir == DMA_NONE);
+
+ for (i = 0; i < nelems; i++, sg++)
+- if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg))
++ if (in_swiotlb_aperture(sg->dma_address))
+ sync_single(hwdev,
+ (void *)bus_to_virt(sg->dma_address),
+ sg->dma_length, dir);
+@@ -659,8 +667,9 @@ swiotlb_map_page(struct device *hwdev, s
+ dma_addr_t dev_addr;
+ char *map;
+
+- dev_addr = page_to_bus(page) + offset;
++ dev_addr = gnttab_dma_map_page(page) + offset;
+ if (address_needs_mapping(hwdev, dev_addr)) {
++ gnttab_dma_unmap_page(dev_addr);
+ buffer.page = page;
+ buffer.offset = offset;
+ map = map_single(hwdev, buffer, size, direction);
+@@ -681,6 +690,8 @@ swiotlb_unmap_page(struct device *hwdev,
+ BUG_ON(direction == DMA_NONE);
+ if (in_swiotlb_aperture(dma_address))
+ unmap_single(hwdev, bus_to_virt(dma_address), size, direction);
++ else
++ gnttab_dma_unmap_page(dma_address);
+ }
+
+ #endif
+--- a/drivers/xen/core/gnttab.c 2007-08-27 14:01:25.000000000 -0400
++++ b/drivers/xen/core/gnttab.c 2007-08-27 14:01:25.000000000 -0400
+@@ -34,6 +34,7 @@
+ #include <linux/module.h>
+ #include <linux/sched.h>
+ #include <linux/mm.h>
++#include <linux/seqlock.h>
+ #include <xen/interface/xen.h>
+ #include <xen/gnttab.h>
+ #include <asm/pgtable.h>
+@@ -42,6 +43,7 @@
+ #include <asm/io.h>
+ #include <xen/interface/memory.h>
+ #include <xen/driver_util.h>
++#include <asm/gnttab_dma.h>
+
+ #ifdef HAVE_XEN_PLATFORM_COMPAT_H
+ #include <xen/platform-compat.h>
+@@ -63,6 +65,8 @@ static struct grant_entry *shared;
+
+ static struct gnttab_free_callback *gnttab_free_callback_list;
+
++static DEFINE_SEQLOCK(gnttab_dma_lock);
++
+ static int gnttab_expand(unsigned int req_entries);
+
+ #define RPP (PAGE_SIZE / sizeof(grant_ref_t))
+@@ -490,6 +494,126 @@ static int gnttab_map(unsigned int start
+ return 0;
+ }
+
++static void gnttab_page_free(struct page *page)
++{
++ ClearPageForeign(page);
++ gnttab_reset_grant_page(page);
++ put_page(page);
++}
++
++/*
++ * Must not be called with IRQs off. This should only be used on the
++ * slow path.
++ *
++ * Copy a foreign granted page to local memory.
++ */
++int gnttab_copy_grant_page(grant_ref_t ref, struct page **pagep)
++{
++ struct gnttab_unmap_and_replace unmap;
++ mmu_update_t mmu;
++ struct page *page;
++ struct page *new_page;
++ void *new_addr;
++ void *addr;
++ paddr_t pfn;
++ maddr_t mfn;
++ maddr_t new_mfn;
++ int err;
++
++ page = *pagep;
++ if (!get_page_unless_zero(page))
++ return -ENOENT;
++
++ err = -ENOMEM;
++ new_page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
++ if (!new_page)
++ goto out;
++
++ new_addr = page_address(new_page);
++ addr = page_address(page);
++ memcpy(new_addr, addr, PAGE_SIZE);
++
++ pfn = page_to_pfn(page);
++ mfn = pfn_to_mfn(pfn);
++ new_mfn = virt_to_mfn(new_addr);
++
++ write_seqlock(&gnttab_dma_lock);
++
++ /* Make seq visible before checking page_mapped. */
++ smp_mb();
++
++ /* Has the page been DMA-mapped? */
++ if (unlikely(page_mapped(page))) {
++ write_sequnlock(&gnttab_dma_lock);
++ put_page(new_page);
++ err = -EBUSY;
++ goto out;
++ }
++
++ if (!xen_feature(XENFEAT_auto_translated_physmap))
++ set_phys_to_machine(pfn, new_mfn);
++
++ gnttab_set_replace_op(&unmap, (unsigned long)addr,
++ (unsigned long)new_addr, ref);
++
++ err = HYPERVISOR_grant_table_op(GNTTABOP_unmap_and_replace,
++ &unmap, 1);
++ BUG_ON(err);
++ BUG_ON(unmap.status);
++
++ write_sequnlock(&gnttab_dma_lock);
++
++ if (!xen_feature(XENFEAT_auto_translated_physmap)) {
++ set_phys_to_machine(page_to_pfn(new_page), INVALID_P2M_ENTRY);
++
++ mmu.ptr = (new_mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
++ mmu.val = pfn;
++ err = HYPERVISOR_mmu_update(&mmu, 1, NULL, DOMID_SELF);
++ BUG_ON(err);
++ }
++
++ new_page->mapping = page->mapping;
++ new_page->index = page->index;
++ set_bit(PG_foreign, &new_page->flags);
++ *pagep = new_page;
++
++ SetPageForeign(page, gnttab_page_free);
++ page->mapping = NULL;
++
++out:
++ put_page(page);
++ return err;
++}
++EXPORT_SYMBOL(gnttab_copy_grant_page);
++
++/*
++ * Keep track of foreign pages marked as PageForeign so that we don't
++ * return them to the remote domain prematurely.
++ *
++ * PageForeign pages are pinned down by increasing their mapcount.
++ *
++ * All other pages are simply returned as is.
++ */
++void __gnttab_dma_map_page(struct page *page)
++{
++ unsigned int seq;
++
++ if (!is_running_on_xen() || !PageForeign(page))
++ return;
++
++ do {
++ seq = read_seqbegin(&gnttab_dma_lock);
++
++ if (gnttab_dma_local_pfn(page))
++ break;
++
++ atomic_set(&page->_mapcount, 0);
++
++ /* Make _mapcount visible before read_seqretry. */
++ smp_mb();
++ } while (unlikely(read_seqretry(&gnttab_dma_lock, seq)));
++}
++
+ int gnttab_resume(void)
+ {
+ if (max_nr_grant_frames() < nr_grant_frames)
+--- /dev/null 1970-01-01 00:00:00.000000000 +0000
++++ b/include/asm-i386/mach-xen/asm/gnttab_dma.h 2007-08-27 14:01:25.000000000 -0400
+@@ -0,0 +1,41 @@
++/*
++ * Copyright (c) 2007 Herbert Xu <herbert@gondor.apana.org.au>
++ * Copyright (c) 2007 Isaku Yamahata <yamahata at valinux co jp>
++ * VA Linux Systems Japan K.K.
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
++ */
++
++#ifndef _ASM_I386_GNTTAB_DMA_H
++#define _ASM_I386_GNTTAB_DMA_H
++
++static inline int gnttab_dma_local_pfn(struct page *page)
++{
++ /* Has it become a local MFN? */
++ return pfn_valid(mfn_to_local_pfn(pfn_to_mfn(page_to_pfn(page))));
++}
++
++static inline maddr_t gnttab_dma_map_page(struct page *page)
++{
++ __gnttab_dma_map_page(page);
++ return page_to_bus(page);
++}
++
++static inline void gnttab_dma_unmap_page(maddr_t maddr)
++{
++ __gnttab_dma_unmap_page(virt_to_page(bus_to_virt(maddr)));
++}
++
++#endif /* _ASM_I386_GNTTAB_DMA_H */
+--- /dev/null 1970-01-01 00:00:00.000000000 +0000
++++ b/include/asm-x86_64/mach-xen/asm/gnttab_dma.h 2007-08-27 14:01:25.000000000 -0400
+@@ -0,0 +1 @@
++#include <asm-i386/mach-xen/asm/gnttab_dma.h>
+--- a/include/xen/gnttab.h 2007-08-27 14:01:25.000000000 -0400
++++ b/include/xen/gnttab.h 2007-08-27 14:01:25.000000000 -0400
+@@ -39,6 +39,7 @@
+
+ #include <asm/hypervisor.h>
+ #include <asm/maddr.h> /* maddr_t */
++#include <linux/mm.h>
+ #include <xen/interface/grant_table.h>
+ #include <xen/features.h>
+
+@@ -101,6 +102,18 @@ void gnttab_grant_foreign_access_ref(gra
+ void gnttab_grant_foreign_transfer_ref(grant_ref_t, domid_t domid,
+ unsigned long pfn);
+
++int gnttab_copy_grant_page(grant_ref_t ref, struct page **pagep);
++void __gnttab_dma_map_page(struct page *page);
++static inline void __gnttab_dma_unmap_page(struct page *page)
++{
++}
++
++static inline void gnttab_reset_grant_page(struct page *page)
++{
++ init_page_count(page);
++ reset_page_mapcount(page);
++}
++
+ int gnttab_suspend(void);
+ int gnttab_resume(void);
+
+@@ -135,4 +148,19 @@ gnttab_set_unmap_op(struct gnttab_unmap_
+ unmap->dev_bus_addr = 0;
+ }
+
++static inline void
++gnttab_set_replace_op(struct gnttab_unmap_and_replace *unmap, maddr_t addr,
++ maddr_t new_addr, grant_handle_t handle)
++{
++ if (xen_feature(XENFEAT_auto_translated_physmap)) {
++ unmap->host_addr = __pa(addr);
++ unmap->new_addr = __pa(new_addr);
++ } else {
++ unmap->host_addr = addr;
++ unmap->new_addr = new_addr;
++ }
++
++ unmap->handle = handle;
++}
++
+ #endif /* __ASM_GNTTAB_H__ */
+--- a/include/xen/interface/grant_table.h 2007-08-27 14:01:25.000000000 -0400
++++ b/include/xen/interface/grant_table.h 2007-08-27 14:01:25.000000000 -0400
+@@ -328,6 +328,29 @@ struct gnttab_query_size {
+ typedef struct gnttab_query_size gnttab_query_size_t;
+ DEFINE_XEN_GUEST_HANDLE(gnttab_query_size_t);
+
++/*
++ * GNTTABOP_unmap_and_replace: Destroy one or more grant-reference mappings
++ * tracked by <handle> but atomically replace the page table entry with one
++ * pointing to the machine address under <new_addr>. <new_addr> will be
++ * redirected to the null entry.
++ * NOTES:
++ * 1. The call may fail in an undefined manner if either mapping is not
++ * tracked by <handle>.
++ * 2. After executing a batch of unmaps, it is guaranteed that no stale
++ * mappings will remain in the device or host TLBs.
++ */
++#define GNTTABOP_unmap_and_replace 7
++struct gnttab_unmap_and_replace {
++ /* IN parameters. */
++ uint64_t host_addr;
++ uint64_t new_addr;
++ grant_handle_t handle;
++ /* OUT parameters. */
++ int16_t status; /* GNTST_* */
++};
++typedef struct gnttab_unmap_and_replace gnttab_unmap_and_replace_t;
++DEFINE_XEN_GUEST_HANDLE(gnttab_unmap_and_replace_t);
++
+
+ /*
+ * Bitfield values for update_pin_status.flags.