summaryrefslogblamecommitdiffstats
path: root/drivers/xen/platform-pci.c
blob: d4c50d63acbc14b03b8bcf3aac7d8c80fb1ea2d7 (plain) (tree)




























                                                                               
                             



                            
                        









                                                                           
                             











































                                                                               












                                                                      












                                                                   



                                                                       
                    
                                 

                                    


                               




                                             









                                                            

                                                    
                             
 

                                                    
                             



                                    

                              























                                                                                 
                                    
        
                                    
















                                                                


                                              







                                                     
/******************************************************************************
 * platform-pci.c
 *
 * Xen platform PCI device driver
 * Copyright (c) 2005, Intel Corporation.
 * Copyright (c) 2007, XenSource Inc.
 * Copyright (c) 2010, Citrix
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
 * version 2, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 *
 * You should have received a copy of the GNU General Public License along with
 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
 * Place - Suite 330, Boston, MA 02111-1307 USA.
 *
 */


#include <linux/interrupt.h>
#include <linux/io.h>
#include <linux/module.h>
#include <linux/pci.h>

#include <xen/platform_pci.h>
#include <xen/grant_table.h>
#include <xen/xenbus.h>
#include <xen/events.h>
#include <xen/hvm.h>
#include <xen/xen-ops.h>

#define DRV_NAME    "xen-platform-pci"

MODULE_AUTHOR("ssmith@xensource.com and stefano.stabellini@eu.citrix.com");
MODULE_DESCRIPTION("Xen platform PCI device");
MODULE_LICENSE("GPL");

static unsigned long platform_mmio;
static unsigned long platform_mmio_alloc;
static unsigned long platform_mmiolen;
static uint64_t callback_via;

unsigned long alloc_xen_mmio(unsigned long len)
{
	unsigned long addr;

	addr = platform_mmio + platform_mmio_alloc;
	platform_mmio_alloc += len;
	BUG_ON(platform_mmio_alloc > platform_mmiolen);

	return addr;
}

static uint64_t get_callback_via(struct pci_dev *pdev)
{
	u8 pin;
	int irq;

	irq = pdev->irq;
	if (irq < 16)
		return irq; /* ISA IRQ */

	pin = pdev->pin;

	/* We don't know the GSI. Specify the PCI INTx line instead. */
	return ((uint64_t)0x01 << 56) | /* PCI INTx identifier */
		((uint64_t)pci_domain_nr(pdev->bus) << 32) |
		((uint64_t)pdev->bus->number << 16) |
		((uint64_t)(pdev->devfn & 0xff) << 8) |
		((uint64_t)(pin - 1) & 3);
}

static irqreturn_t do_hvm_evtchn_intr(int irq, void *dev_id)
{
	xen_hvm_evtchn_do_upcall();
	return IRQ_HANDLED;
}

static int xen_allocate_irq(struct pci_dev *pdev)
{
	return request_irq(pdev->irq, do_hvm_evtchn_intr,
			IRQF_DISABLED | IRQF_NOBALANCING | IRQF_TRIGGER_RISING,
			"xen-platform-pci", pdev);
}

static int platform_pci_resume(struct pci_dev *pdev)
{
	int err;
	if (xen_have_vector_callback)
		return 0;
	err = xen_set_callback_via(callback_via);
	if (err) {
		dev_err(&pdev->dev, "platform_pci_resume failure!\n");
		return err;
	}
	return 0;
}

static void __devinit prepare_shared_info(void)
{
#ifdef CONFIG_KEXEC
	unsigned long addr;
	struct shared_info *hvm_shared_info;

	addr = alloc_xen_mmio(PAGE_SIZE);
	hvm_shared_info = ioremap(addr, PAGE_SIZE);
	memset(hvm_shared_info, 0, PAGE_SIZE);
	xen_hvm_prepare_kexec(hvm_shared_info, addr >> PAGE_SHIFT);
#endif
}

static int __devinit platform_pci_init(struct pci_dev *pdev,
				       const struct pci_device_id *ent)
{
	int i, ret;
	long ioaddr;
	long mmio_addr, mmio_len;
	unsigned int max_nr_gframes;

	if (!xen_domain())
		return -ENODEV;

	i = pci_enable_device(pdev);
	if (i)
		return i;

	ioaddr = pci_resource_start(pdev, 0);

	mmio_addr = pci_resource_start(pdev, 1);
	mmio_len = pci_resource_len(pdev, 1);

	if (mmio_addr == 0 || ioaddr == 0) {
		dev_err(&pdev->dev, "no resources found\n");
		ret = -ENOENT;
		goto pci_out;
	}

	ret = pci_request_region(pdev, 1, DRV_NAME);
	if (ret < 0)
		goto pci_out;

	ret = pci_request_region(pdev, 0, DRV_NAME);
	if (ret < 0)
		goto mem_out;

	platform_mmio = mmio_addr;
	platform_mmiolen = mmio_len;

	prepare_shared_info();

	if (!xen_have_vector_callback) {
		ret = xen_allocate_irq(pdev);
		if (ret) {
			dev_warn(&pdev->dev, "request_irq failed err=%d\n", ret);
			goto out;
		}
		callback_via = get_callback_via(pdev);
		ret = xen_set_callback_via(callback_via);
		if (ret) {
			dev_warn(&pdev->dev, "Unable to set the evtchn callback "
					 "err=%d\n", ret);
			goto out;
		}
	}

	max_nr_gframes = gnttab_max_grant_frames();
	xen_hvm_resume_frames = alloc_xen_mmio(PAGE_SIZE * max_nr_gframes);
	ret = gnttab_init();
	if (ret)
		goto out;
	xenbus_probe(NULL);
	return 0;

out:
	pci_release_region(pdev, 0);
mem_out:
	pci_release_region(pdev, 1);
pci_out:
	pci_disable_device(pdev);
	return ret;
}

static struct pci_device_id platform_pci_tbl[] __devinitdata = {
	{PCI_VENDOR_ID_XEN, PCI_DEVICE_ID_XEN_PLATFORM,
		PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
	{0,}
};

MODULE_DEVICE_TABLE(pci, platform_pci_tbl);

static struct pci_driver platform_driver = {
	.name =           DRV_NAME,
	.probe =          platform_pci_init,
	.id_table =       platform_pci_tbl,
#ifdef CONFIG_PM
	.resume_early =   platform_pci_resume,
#endif
};

static int __init platform_pci_module_init(void)
{
	return pci_register_driver(&platform_driver);
}

module_init(platform_pci_module_init);
column5'>| | | | | | | | | | | | | | | | | | | | | | | | Upon timeout, we can just exit out of the loop, without the cost of the changing the task's state with an smp_store_mb call. Just exit out of the loop and be done - setting the task state afterwards will be, of course, redundant. [dave@stgolabs.net: forgotten fixlets] Link: http://lkml.kernel.org/r/20181109155258.jxcr4t2pnz6zqct3@linux-r8p5 Link: http://lkml.kernel.org/r/20181108051006.18751-7-dave@stgolabs.net Signed-off-by: Davidlohr Bueso <dave@stgolabs.net> Reviewed-by: Andrew Morton <akpm@linux-foundation.org> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Davidlohr Bueso <dbueso@suse.de> Cc: Jason Baron <jbaron@akamai.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> | * | | | | | | | fs/epoll: reduce the scope of wq lock in epoll_wait()Davidlohr Bueso2019-01-041-54/+60 | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | This patch aims at reducing ep wq.lock hold times in epoll_wait(2). For the blocking case, there is no need to constantly take and drop the spinlock, which is only needed to manipulate the waitqueue. The call to ep_events_available() is now lockless, and only exposed to benign races. Here, if false positive (returns available events and does not see another thread deleting an epi from the list) we call into send_events and then the list's state is correctly seen. Otoh, if a false negative and we don't see a list_add_tail(), for example, from irq callback, then it is rechecked again before blocking, which will see the correct state. In order for more accuracy to see concurrent list_del_init(), use the list_empty_careful() variant -- of course, this won't be safe against insertions from wakeup. For the overflow list we obviously need to prevent load/store tearing as we don't want to see partial values while the ready list is disabled. [dave@stgolabs.net: forgotten fixlets] Link: http://lkml.kernel.org/r/20181109155258.jxcr4t2pnz6zqct3@linux-r8p5 Link: http://lkml.kernel.org/r/20181108051006.18751-6-dave@stgolabs.net Signed-off-by: Davidlohr Bueso <dbueso@suse.de> Suggested-by: Jason Baron <jbaron@akamai.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> | * | | | | | | | fs/epoll: robustify ep->mtx held checksDavidlohr Bueso2019-01-041-0/+2 | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | Insted of just commenting how important it is, lets make it more robust and add a lockdep_assert_held() call. Link: http://lkml.kernel.org/r/20181108051006.18751-5-dave@stgolabs.net Signed-off-by: Davidlohr Bueso <dbueso@suse.de> Reviewed-by: Andrew Morton <akpm@linux-foundation.org> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Jason Baron <jbaron@akamai.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> | * | | | | | | | fs/epoll: drop ovflist branch predictionDavidlohr Bueso2019-01-041-1/+1 | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | The ep->ovflist is a secondary ready-list to temporarily store events that might occur when doing sproc without holding the ep->wq.lock. This accounts for every time we check for ready events and also send events back to userspace; both callbacks, particularly the latter because of copy_to_user, can account for a non-trivial time. As such, the unlikely() check to see if the pointer is being used, seems both misleading and sub-optimal. In fact, we go to an awful lot of trouble to sync both lists, and populating the ovflist is far from an uncommon scenario. For example, profiling a concurrent epoll_wait(2) benchmark, with CONFIG_PROFILE_ANNOTATED_BRANCHES shows that for a two threads a 33% incorrect rate was seen; and when incrementally increasing the number of epoll instances (which is used, for example for multiple queuing load balancing models), up to a 90% incorrect rate was seen. Similarly, by deleting the prediction, 3% throughput boost was seen across incremental threads. Link: http://lkml.kernel.org/r/20181108051006.18751-4-dave@stgolabs.net Signed-off-by: Davidlohr Bueso <dbueso@suse.de> Reviewed-by: Andrew Morton <akpm@linux-foundation.org> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Jason Baron <jbaron@akamai.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> | * | | | | | | | fs/epoll: simplify ep_send_events_proc() ready-list loopDavidlohr Bueso2019-01-041-36/+37 | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | The current logic is a bit convoluted. Lets simplify this with a standard list_for_each_entry_safe() loop instead and just break out after maxevents is reached. While at it, remove an unnecessary indentation level in the loop when there are in fact ready events. Link: http://lkml.kernel.org/r/20181108051006.18751-3-dave@stgolabs.net Signed-off-by: Davidlohr Bueso <dbueso@suse.de> Reviewed-by: Andrew Morton <akpm@linux-foundation.org> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Jason Baron <jbaron@akamai.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> | * | | | | | | | fs/epoll: remove max_nests argument from ep_call_nested()Davidlohr Bueso2019-01-04