From b3cbd9bf77cd1888114dbee1653e79aa23fd4068 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Tue, 2 Aug 2016 14:02:52 -0700 Subject: mm/kasan: get rid of ->state in struct kasan_alloc_meta The state of object currently tracked in two places - shadow memory, and the ->state field in struct kasan_alloc_meta. We can get rid of the latter. The will save us a little bit of memory. Also, this allow us to move free stack into struct kasan_alloc_meta, without increasing memory consumption. So now we should always know when the last time the object was freed. This may be useful for long delayed use-after-free bugs. As a side effect this fixes following UBSAN warning: UBSAN: Undefined behaviour in mm/kasan/quarantine.c:102:13 member access within misaligned address ffff88000d1efebc for type 'struct qlist_node' which requires 8 byte alignment Link: http://lkml.kernel.org/r/1470062715-14077-5-git-send-email-aryabinin@virtuozzo.com Reported-by: kernel test robot Signed-off-by: Andrey Ryabinin Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index c9cf374445d8..d600303306eb 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -56,6 +56,7 @@ void kasan_cache_destroy(struct kmem_cache *cache); void kasan_poison_slab(struct page *page); void kasan_unpoison_object_data(struct kmem_cache *cache, void *object); void kasan_poison_object_data(struct kmem_cache *cache, void *object); +void kasan_init_slab_obj(struct kmem_cache *cache, const void *object); void kasan_kmalloc_large(const void *ptr, size_t size, gfp_t flags); void kasan_kfree_large(const void *ptr); @@ -102,6 +103,8 @@ static inline void kasan_unpoison_object_data(struct kmem_cache *cache, void *object) {} static inline void kasan_poison_object_data(struct kmem_cache *cache, void *object) {} +static inline void kasan_init_slab_obj(struct kmem_cache *cache, + const void *object) {} static inline void kasan_kmalloc_large(void *ptr, size_t size, gfp_t flags) {} static inline void kasan_kfree_large(const void *ptr) {} -- cgit v1.2.3-55-g7522 From bd721ea73e1f965569b40620538c942001f76294 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Tue, 2 Aug 2016 14:03:33 -0700 Subject: treewide: replace obsolete _refok by __ref There was only one use of __initdata_refok and __exit_refok __init_refok was used 46 times against 82 for __ref. Those definitions are obsolete since commit 312b1485fb50 ("Introduce new section reference annotations tags: __ref, __refdata, __refconst") This patch removes the following compatibility definitions and replaces them treewide. /* compatibility defines */ #define __init_refok __ref #define __initdata_refok __refdata #define __exit_refok __ref I can also provide separate patches if necessary. (One patch per tree and check in 1 month or 2 to remove old definitions) [akpm@linux-foundation.org: coding-style fixes] Link: http://lkml.kernel.org/r/1466796271-3043-1-git-send-email-fabf@skynet.be Signed-off-by: Fabian Frederick Cc: Ingo Molnar Cc: Sam Ravnborg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/kernel/machvec_impl.h | 2 +- arch/arc/mm/init.c | 2 +- arch/arm/mach-integrator/impd1.c | 4 ++-- arch/arm/mach-mv78xx0/common.c | 2 +- arch/blackfin/mm/init.c | 2 +- arch/hexagon/mm/init.c | 2 +- arch/ia64/kernel/mca.c | 2 +- arch/microblaze/mm/init.c | 4 ++-- arch/microblaze/mm/pgtable.c | 2 +- arch/mips/mm/init.c | 2 +- arch/mips/txx9/generic/pci.c | 2 +- arch/nios2/mm/init.c | 2 +- arch/openrisc/mm/ioremap.c | 4 ++-- arch/powerpc/lib/alloc.c | 2 +- arch/powerpc/mm/pgtable_32.c | 2 +- arch/powerpc/platforms/powermac/setup.c | 4 ++-- arch/powerpc/platforms/ps3/device-init.c | 2 +- arch/powerpc/sysdev/msi_bitmap.c | 2 +- arch/score/mm/init.c | 2 +- arch/sh/drivers/pci/pci.c | 4 ++-- arch/sh/mm/ioremap.c | 2 +- arch/x86/mm/init.c | 4 ++-- arch/x86/platform/efi/early_printk.c | 4 ++-- drivers/acpi/osl.c | 5 ++--- drivers/base/node.c | 2 +- drivers/clk/clkdev.c | 4 ++-- drivers/pci/xen-pcifront.c | 2 +- drivers/video/logo/logo.c | 4 ++-- include/acpi/acpi_io.h | 2 +- include/linux/init.h | 6 ------ include/net/net_namespace.h | 2 +- init/main.c | 2 +- mm/page_alloc.c | 4 ++-- mm/slab.c | 2 +- mm/sparse-vmemmap.c | 2 +- mm/sparse.c | 2 +- 36 files changed, 46 insertions(+), 53 deletions(-) (limited to 'include') diff --git a/arch/alpha/kernel/machvec_impl.h b/arch/alpha/kernel/machvec_impl.h index f54bdf658cd0..d3398f6ab74c 100644 --- a/arch/alpha/kernel/machvec_impl.h +++ b/arch/alpha/kernel/machvec_impl.h @@ -137,7 +137,7 @@ #define __initmv __initdata #define ALIAS_MV(x) #else -#define __initmv __initdata_refok +#define __initmv __refdata /* GCC actually has a syntax for defining aliases, but is under some delusion that you shouldn't be able to declare it extern somewhere diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c index 8be930394750..399e2f223d25 100644 --- a/arch/arc/mm/init.c +++ b/arch/arc/mm/init.c @@ -220,7 +220,7 @@ void __init mem_init(void) /* * free_initmem: Free all the __init memory. */ -void __init_refok free_initmem(void) +void __ref free_initmem(void) { free_initmem_default(-1); } diff --git a/arch/arm/mach-integrator/impd1.c b/arch/arm/mach-integrator/impd1.c index 38b0da300dd5..ed9a01484030 100644 --- a/arch/arm/mach-integrator/impd1.c +++ b/arch/arm/mach-integrator/impd1.c @@ -320,11 +320,11 @@ static struct impd1_device impd1_devs[] = { #define IMPD1_VALID_IRQS 0x00000bffU /* - * As this module is bool, it is OK to have this as __init_refok() - no + * As this module is bool, it is OK to have this as __ref() - no * probe calls will be done after the initial system bootup, as devices * are discovered as part of the machine startup. */ -static int __init_refok impd1_probe(struct lm_device *dev) +static int __ref impd1_probe(struct lm_device *dev) { struct impd1_module *impd1; int irq_base; diff --git a/arch/arm/mach-mv78xx0/common.c b/arch/arm/mach-mv78xx0/common.c index 45a05207b418..6af5430d0d97 100644 --- a/arch/arm/mach-mv78xx0/common.c +++ b/arch/arm/mach-mv78xx0/common.c @@ -343,7 +343,7 @@ void __init mv78xx0_init_early(void) DDR_WINDOW_CPU1_BASE, DDR_WINDOW_CPU_SZ); } -void __init_refok mv78xx0_timer_init(void) +void __ref mv78xx0_timer_init(void) { orion_time_init(BRIDGE_VIRT_BASE, BRIDGE_INT_TIMER1_CLR, IRQ_MV78XX0_TIMER_1, get_tclk()); diff --git a/arch/blackfin/mm/init.c b/arch/blackfin/mm/init.c index 166842de3dc7..b59cd7c3261a 100644 --- a/arch/blackfin/mm/init.c +++ b/arch/blackfin/mm/init.c @@ -112,7 +112,7 @@ void __init free_initrd_mem(unsigned long start, unsigned long end) } #endif -void __init_refok free_initmem(void) +void __ref free_initmem(void) { #if defined CONFIG_RAMKERNEL && !defined CONFIG_MPU free_initmem_default(-1); diff --git a/arch/hexagon/mm/init.c b/arch/hexagon/mm/init.c index 88977e42af0a..192584d5ac2f 100644 --- a/arch/hexagon/mm/init.c +++ b/arch/hexagon/mm/init.c @@ -93,7 +93,7 @@ void __init mem_init(void) * Todo: free pages between __init_begin and __init_end; possibly * some devtree related stuff as well. */ -void __init_refok free_initmem(void) +void __ref free_initmem(void) { } diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index 07a4e32ae96a..eb9220cde76c 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c @@ -1831,7 +1831,7 @@ format_mca_init_stack(void *mca_data, unsigned long offset, } /* Caller prevents this from being called after init */ -static void * __init_refok mca_bootmem(void) +static void * __ref mca_bootmem(void) { return __alloc_bootmem(sizeof(struct ia64_mca_cpu), KERNEL_STACK_SIZE, 0); diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c index 77bc7c7e6522..434639f9a3a6 100644 --- a/arch/microblaze/mm/init.c +++ b/arch/microblaze/mm/init.c @@ -414,7 +414,7 @@ void __init *early_get_page(void) #endif /* CONFIG_MMU */ -void * __init_refok alloc_maybe_bootmem(size_t size, gfp_t mask) +void * __ref alloc_maybe_bootmem(size_t size, gfp_t mask) { if (mem_init_done) return kmalloc(size, mask); @@ -422,7 +422,7 @@ void * __init_refok alloc_maybe_bootmem(size_t size, gfp_t mask) return alloc_bootmem(size); } -void * __init_refok zalloc_maybe_bootmem(size_t size, gfp_t mask) +void * __ref zalloc_maybe_bootmem(size_t size, gfp_t mask) { void *p; diff --git a/arch/microblaze/mm/pgtable.c b/arch/microblaze/mm/pgtable.c index eb99fcc76088..cc732fe357ad 100644 --- a/arch/microblaze/mm/pgtable.c +++ b/arch/microblaze/mm/pgtable.c @@ -234,7 +234,7 @@ unsigned long iopa(unsigned long addr) return pa; } -__init_refok pte_t *pte_alloc_one_kernel(struct mm_struct *mm, +__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { pte_t *pte; diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index 9b58eb5fd0d5..a5509e7dcad2 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -504,7 +504,7 @@ void free_initrd_mem(unsigned long start, unsigned long end) void (*free_init_pages_eva)(void *begin, void *end) = NULL; -void __init_refok free_initmem(void) +void __ref free_initmem(void) { prom_free_prom_memory(); /* diff --git a/arch/mips/txx9/generic/pci.c b/arch/mips/txx9/generic/pci.c index a77698ff2b6f..1f6bc9a3036c 100644 --- a/arch/mips/txx9/generic/pci.c +++ b/arch/mips/txx9/generic/pci.c @@ -268,7 +268,7 @@ static int txx9_i8259_irq_setup(int irq) return err; } -static void __init_refok quirk_slc90e66_bridge(struct pci_dev *dev) +static void __ref quirk_slc90e66_bridge(struct pci_dev *dev) { int irq; /* PCI/ISA Bridge interrupt */ u8 reg_64; diff --git a/arch/nios2/mm/init.c b/arch/nios2/mm/init.c index e75c75d249d6..c92fe4234009 100644 --- a/arch/nios2/mm/init.c +++ b/arch/nios2/mm/init.c @@ -89,7 +89,7 @@ void __init free_initrd_mem(unsigned long start, unsigned long end) } #endif -void __init_refok free_initmem(void) +void __ref free_initmem(void) { free_initmem_default(-1); } diff --git a/arch/openrisc/mm/ioremap.c b/arch/openrisc/mm/ioremap.c index 5b2a95116e8f..fa60b81aee3e 100644 --- a/arch/openrisc/mm/ioremap.c +++ b/arch/openrisc/mm/ioremap.c @@ -38,7 +38,7 @@ static unsigned int fixmaps_used __initdata; * have to convert them into an offset in a page-aligned mapping, but the * caller shouldn't need to know that small detail. */ -void __iomem *__init_refok +void __iomem *__ref __ioremap(phys_addr_t addr, unsigned long size, pgprot_t prot) { phys_addr_t p; @@ -116,7 +116,7 @@ void iounmap(void *addr) * the memblock infrastructure. */ -pte_t __init_refok *pte_alloc_one_kernel(struct mm_struct *mm, +pte_t __ref *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { pte_t *pte; diff --git a/arch/powerpc/lib/alloc.c b/arch/powerpc/lib/alloc.c index 60b0b3fc8fc1..a58abe4afbd1 100644 --- a/arch/powerpc/lib/alloc.c +++ b/arch/powerpc/lib/alloc.c @@ -6,7 +6,7 @@ #include -void * __init_refok zalloc_maybe_bootmem(size_t size, gfp_t mask) +void * __ref zalloc_maybe_bootmem(size_t size, gfp_t mask) { void *p; diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 7f922f557936..0ae0572bc239 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -79,7 +79,7 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd) #endif } -__init_refok pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) +__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { pte_t *pte; diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c index 3de4a7c85140..6b4e9d181126 100644 --- a/arch/powerpc/platforms/powermac/setup.c +++ b/arch/powerpc/platforms/powermac/setup.c @@ -353,12 +353,12 @@ static int pmac_late_init(void) machine_late_initcall(powermac, pmac_late_init); /* - * This is __init_refok because we check for "initializing" before + * This is __ref because we check for "initializing" before * touching any of the __init sensitive things and "initializing" * will be false after __init time. This can't be __init because it * can be called whenever a disk is first accessed. */ -void __init_refok note_bootable_part(dev_t dev, int part, int goodness) +void __ref note_bootable_part(dev_t dev, int part, int goodness) { char *p; diff --git a/arch/powerpc/platforms/ps3/device-init.c b/arch/powerpc/platforms/ps3/device-init.c index 3f175e8aedb4..57caaf11a83f 100644 --- a/arch/powerpc/platforms/ps3/device-init.c +++ b/arch/powerpc/platforms/ps3/device-init.c @@ -189,7 +189,7 @@ fail_malloc: return result; } -static int __init_refok ps3_setup_uhc_device( +static int __ref ps3_setup_uhc_device( const struct ps3_repository_device *repo, enum ps3_match_id match_id, enum ps3_interrupt_type interrupt_type, enum ps3_reg_type reg_type) { diff --git a/arch/powerpc/sysdev/msi_bitmap.c b/arch/powerpc/sysdev/msi_bitmap.c index ed5234ed8d3f..5ebd3f018295 100644 --- a/arch/powerpc/sysdev/msi_bitmap.c +++ b/arch/powerpc/sysdev/msi_bitmap.c @@ -112,7 +112,7 @@ int msi_bitmap_reserve_dt_hwirqs(struct msi_bitmap *bmp) return 0; } -int __init_refok msi_bitmap_alloc(struct msi_bitmap *bmp, unsigned int irq_count, +int __ref msi_bitmap_alloc(struct msi_bitmap *bmp, unsigned int irq_count, struct device_node *of_node) { int size; diff --git a/arch/score/mm/init.c b/arch/score/mm/init.c index 9fbce49ad3bd..444c26c0f750 100644 --- a/arch/score/mm/init.c +++ b/arch/score/mm/init.c @@ -91,7 +91,7 @@ void free_initrd_mem(unsigned long start, unsigned long end) } #endif -void __init_refok free_initmem(void) +void __ref free_initmem(void) { free_initmem_default(POISON_FREE_INITMEM); } diff --git a/arch/sh/drivers/pci/pci.c b/arch/sh/drivers/pci/pci.c index d5462b7bc514..84563e39a5b8 100644 --- a/arch/sh/drivers/pci/pci.c +++ b/arch/sh/drivers/pci/pci.c @@ -221,7 +221,7 @@ pcibios_bus_report_status_early(struct pci_channel *hose, * We can't use pci_find_device() here since we are * called from interrupt context. */ -static void __init_refok +static void __ref pcibios_bus_report_status(struct pci_bus *bus, unsigned int status_mask, int warn) { @@ -256,7 +256,7 @@ pcibios_bus_report_status(struct pci_bus *bus, unsigned int status_mask, pcibios_bus_report_status(dev->subordinate, status_mask, warn); } -void __init_refok pcibios_report_status(unsigned int status_mask, int warn) +void __ref pcibios_report_status(unsigned int status_mask, int warn) { struct pci_channel *hose; diff --git a/arch/sh/mm/ioremap.c b/arch/sh/mm/ioremap.c index 0c99ec2e7ed8..d09ddfe58fd8 100644 --- a/arch/sh/mm/ioremap.c +++ b/arch/sh/mm/ioremap.c @@ -34,7 +34,7 @@ * have to convert them into an offset in a page-aligned mapping, but the * caller shouldn't need to know that small detail. */ -void __iomem * __init_refok +void __iomem * __ref __ioremap_caller(phys_addr_t phys_addr, unsigned long size, pgprot_t pgprot, void *caller) { diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index fb4c1b42fc7e..620928903be3 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -208,7 +208,7 @@ static int __meminit save_mr(struct map_range *mr, int nr_range, * adjust the page_size_mask for small range to go with * big page size instead small one if nearby are ram too. */ -static void __init_refok adjust_range_page_size_mask(struct map_range *mr, +static void __ref adjust_range_page_size_mask(struct map_range *mr, int nr_range) { int i; @@ -396,7 +396,7 @@ bool pfn_range_is_mapped(unsigned long start_pfn, unsigned long end_pfn) * This runs before bootmem is initialized and gets pages directly from * the physical memory. To access them they are temporarily mapped. */ -unsigned long __init_refok init_memory_mapping(unsigned long start, +unsigned long __ref init_memory_mapping(unsigned long start, unsigned long end) { struct map_range mr[NR_RANGE_MR]; diff --git a/arch/x86/platform/efi/early_printk.c b/arch/x86/platform/efi/early_printk.c index 524142117296..5fdacb322ceb 100644 --- a/arch/x86/platform/efi/early_printk.c +++ b/arch/x86/platform/efi/early_printk.c @@ -44,7 +44,7 @@ early_initcall(early_efi_map_fb); * In case earlyprintk=efi,keep we have the whole framebuffer mapped already * so just return the offset efi_fb + start. */ -static __init_refok void *early_efi_map(unsigned long start, unsigned long len) +static __ref void *early_efi_map(unsigned long start, unsigned long len) { unsigned long base; @@ -56,7 +56,7 @@ static __init_refok void *early_efi_map(unsigned long start, unsigned long len) return early_ioremap(base + start, len); } -static __init_refok void early_efi_unmap(void *addr, unsigned long len) +static __ref void early_efi_unmap(void *addr, unsigned long len) { if (!efi_fb) early_iounmap(addr, len); diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index b108f1358a32..4305ee9db4b2 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -309,7 +309,7 @@ static void acpi_unmap(acpi_physical_address pg_off, void __iomem *vaddr) * During early init (when acpi_gbl_permanent_mmap has not been set yet) this * routine simply calls __acpi_map_table() to get the job done. */ -void __iomem *__init_refok +void __iomem *__ref acpi_os_map_iomem(acpi_physical_address phys, acpi_size size) { struct acpi_ioremap *map; @@ -362,8 +362,7 @@ out: } EXPORT_SYMBOL_GPL(acpi_os_map_iomem); -void *__init_refok -acpi_os_map_memory(acpi_physical_address phys, acpi_size size) +void *__ref acpi_os_map_memory(acpi_physical_address phys, acpi_size size) { return (void *)acpi_os_map_iomem(phys, size); } diff --git a/drivers/base/node.c b/drivers/base/node.c index 29cd96661b30..5548f9686016 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -370,7 +370,7 @@ int unregister_cpu_under_node(unsigned int cpu, unsigned int nid) #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE #define page_initialized(page) (page->lru.next) -static int __init_refok get_nid_for_pfn(unsigned long pfn) +static int __ref get_nid_for_pfn(unsigned long pfn) { struct page *page; diff --git a/drivers/clk/clkdev.c b/drivers/clk/clkdev.c index 89cc700fbc37..97ae60fa1584 100644 --- a/drivers/clk/clkdev.c +++ b/drivers/clk/clkdev.c @@ -250,7 +250,7 @@ struct clk_lookup_alloc { char con_id[MAX_CON_ID]; }; -static struct clk_lookup * __init_refok +static struct clk_lookup * __ref vclkdev_alloc(struct clk_hw *hw, const char *con_id, const char *dev_fmt, va_list ap) { @@ -287,7 +287,7 @@ vclkdev_create(struct clk_hw *hw, const char *con_id, const char *dev_fmt, return cl; } -struct clk_lookup * __init_refok +struct clk_lookup * __ref clkdev_alloc(struct clk *clk, const char *con_id, const char *dev_fmt, ...) { struct clk_lookup *cl; diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c index 5f70fee59a94..d6ff5e82377d 100644 --- a/drivers/pci/xen-pcifront.c +++ b/drivers/pci/xen-pcifront.c @@ -1086,7 +1086,7 @@ out: return err; } -static void __init_refok pcifront_backend_changed(struct xenbus_device *xdev, +static void __ref pcifront_backend_changed(struct xenbus_device *xdev, enum xenbus_state be_state) { struct pcifront_device *pdev = dev_get_drvdata(&xdev->dev); diff --git a/drivers/video/logo/logo.c b/drivers/video/logo/logo.c index 10fbfd8ab963..b6bc4a0bda2a 100644 --- a/drivers/video/logo/logo.c +++ b/drivers/video/logo/logo.c @@ -36,11 +36,11 @@ static int __init fb_logo_late_init(void) late_initcall(fb_logo_late_init); -/* logo's are marked __initdata. Use __init_refok to tell +/* logo's are marked __initdata. Use __ref to tell * modpost that it is intended that this function uses data * marked __initdata. */ -const struct linux_logo * __init_refok fb_find_logo(int depth) +const struct linux_logo * __ref fb_find_logo(int depth) { const struct linux_logo *logo = NULL; diff --git a/include/acpi/acpi_io.h b/include/acpi/acpi_io.h index dd86c5fc102d..d7d0f495a34e 100644 --- a/include/acpi/acpi_io.h +++ b/include/acpi/acpi_io.h @@ -13,7 +13,7 @@ static inline void __iomem *acpi_os_ioremap(acpi_physical_address phys, } #endif -void __iomem *__init_refok +void __iomem *__ref acpi_os_map_iomem(acpi_physical_address phys, acpi_size size); void __ref acpi_os_unmap_iomem(void __iomem *virt, acpi_size size); void __iomem *acpi_os_get_iomem(acpi_physical_address phys, unsigned int size); diff --git a/include/linux/init.h b/include/linux/init.h index aedb254abc37..6935d02474aa 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -77,12 +77,6 @@ #define __refdata __section(.ref.data) #define __refconst __constsection(.ref.rodata) -/* compatibility defines */ -#define __init_refok __ref -#define __initdata_refok __refdata -#define __exit_refok __ref - - #ifdef MODULE #define __exitused #else diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 4089abc6e9c0..0933c7455a30 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -275,7 +275,7 @@ static inline struct net *read_pnet(const possible_net_t *pnet) #define __net_initconst #else #define __net_init __init -#define __net_exit __exit_refok +#define __net_exit __ref #define __net_initdata __initdata #define __net_initconst __initconst #endif diff --git a/init/main.c b/init/main.c index eae02aa03c9e..e7345dcaaf05 100644 --- a/init/main.c +++ b/init/main.c @@ -380,7 +380,7 @@ static void __init setup_command_line(char *command_line) static __initdata DECLARE_COMPLETION(kthreadd_done); -static noinline void __init_refok rest_init(void) +static noinline void __ref rest_init(void) { int pid; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index ea759b935360..39a372a2a1d6 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5276,7 +5276,7 @@ void __init setup_per_cpu_pageset(void) setup_zone_pageset(zone); } -static noinline __init_refok +static noinline __ref int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages) { int i; @@ -5903,7 +5903,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat) } } -static void __init_refok alloc_node_mem_map(struct pglist_data *pgdat) +static void __ref alloc_node_mem_map(struct pglist_data *pgdat) { unsigned long __maybe_unused start = 0; unsigned long __maybe_unused offset = 0; diff --git a/mm/slab.c b/mm/slab.c index ca135bd47c35..261147ba156f 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -1877,7 +1877,7 @@ static struct array_cache __percpu *alloc_kmem_cache_cpus( return cpu_cache; } -static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp) +static int __ref setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp) { if (slab_state >= FULL) return enable_cpucache(cachep, gfp); diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index 68885dcbaf40..574c67b663fe 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c @@ -36,7 +36,7 @@ * Uses the main allocators if they are available, else bootmem. */ -static void * __init_refok __earlyonly_bootmem_alloc(int node, +static void * __ref __earlyonly_bootmem_alloc(int node, unsigned long size, unsigned long align, unsigned long goal) diff --git a/mm/sparse.c b/mm/sparse.c index 36d7bbb80e49..1e168bf2779a 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -59,7 +59,7 @@ static inline void set_section_nid(unsigned long section_nr, int nid) #endif #ifdef CONFIG_SPARSEMEM_EXTREME -static struct mem_section noinline __init_refok *sparse_index_alloc(int nid) +static noinline struct mem_section __ref *sparse_index_alloc(int nid) { struct mem_section *section = NULL; unsigned long array_size = SECTIONS_PER_ROOT * -- cgit v1.2.3-55-g7522 From db3f60012482756f46cc4d7d9ad7d793ae30360c Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 2 Aug 2016 14:03:36 -0700 Subject: uapi: move forward declarations of internal structures Don't user forward declarations of internal kernel structures in headers exported to userspace. Move "struct completion;". Move "struct task_struct;". Link: http://lkml.kernel.org/r/20160713215808.GA22486@p183.telecom.by Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/capability.h | 1 + include/linux/sysctl.h | 1 + include/uapi/linux/capability.h | 2 -- include/uapi/linux/sysctl.h | 2 -- 4 files changed, 2 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/capability.h b/include/linux/capability.h index 5f3c63dde2d5..dbc21c719ce6 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -38,6 +38,7 @@ struct cpu_vfs_cap_data { struct file; struct inode; struct dentry; +struct task_struct; struct user_namespace; extern const kernel_cap_t __cap_empty_set; diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index fa7bc29925c9..697e160c78d0 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -28,6 +28,7 @@ #include /* For the /proc/sys support */ +struct completion; struct ctl_table; struct nsproxy; struct ctl_table_root; diff --git a/include/uapi/linux/capability.h b/include/uapi/linux/capability.h index 12c37a197d24..49bc06295398 100644 --- a/include/uapi/linux/capability.h +++ b/include/uapi/linux/capability.h @@ -15,8 +15,6 @@ #include -struct task_struct; - /* User-level do most of the mapping between kernel and user capabilities based on the version tag given by the kernel. The kernel might be somewhat backwards compatible, but don't bet on diff --git a/include/uapi/linux/sysctl.h b/include/uapi/linux/sysctl.h index 0956373b56db..d2b12152e358 100644 --- a/include/uapi/linux/sysctl.h +++ b/include/uapi/linux/sysctl.h @@ -26,8 +26,6 @@ #include #include -struct completion; - #define CTL_MAXNAME 10 /* how many path components do we allow in a call to sysctl? In other words, what is the largest acceptable value for the nlen -- cgit v1.2.3-55-g7522 From 949bed2f5764435715e3d6dd3ab6dd4dbd890a71 Mon Sep 17 00:00:00 2001 From: Chen Gang Date: Tue, 2 Aug 2016 14:03:42 -0700 Subject: include: mman: use bool instead of int for the return value of arch_validate_prot For pure bool function's return value, bool is a little better more or less than int. Link: http://lkml.kernel.org/r/1469331815-2026-1-git-send-email-chengang@emindsoft.com.cn Signed-off-by: Chen Gang Acked-by: Michael Ellerman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/include/asm/mman.h | 8 ++++---- include/linux/mman.h | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/arch/powerpc/include/asm/mman.h b/arch/powerpc/include/asm/mman.h index 2563c435a4b1..fc420cedecae 100644 --- a/arch/powerpc/include/asm/mman.h +++ b/arch/powerpc/include/asm/mman.h @@ -31,13 +31,13 @@ static inline pgprot_t arch_vm_get_page_prot(unsigned long vm_flags) } #define arch_vm_get_page_prot(vm_flags) arch_vm_get_page_prot(vm_flags) -static inline int arch_validate_prot(unsigned long prot) +static inline bool arch_validate_prot(unsigned long prot) { if (prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC | PROT_SEM | PROT_SAO)) - return 0; + return false; if ((prot & PROT_SAO) && !cpu_has_feature(CPU_FTR_SAO)) - return 0; - return 1; + return false; + return true; } #define arch_validate_prot(prot) arch_validate_prot(prot) diff --git a/include/linux/mman.h b/include/linux/mman.h index 33e17f6a327a..634c4c51fe3a 100644 --- a/include/linux/mman.h +++ b/include/linux/mman.h @@ -49,7 +49,7 @@ static inline void vm_unacct_memory(long pages) * * Returns true if the prot flags are valid */ -static inline int arch_validate_prot(unsigned long prot) +static inline bool arch_validate_prot(unsigned long prot) { return (prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC | PROT_SEM)) == 0; } -- cgit v1.2.3-55-g7522 From 9d5059c959ac739dbf837cec14586e58e7a67292 Mon Sep 17 00:00:00 2001 From: Luis de Bethencourt Date: Tue, 2 Aug 2016 14:03:47 -0700 Subject: dynamic_debug: only add header when used kernel.h header doesn't directly use dynamic debug, instead we can include it in module.c (which used it via kernel.h). printk.h only uses it if CONFIG_DYNAMIC_DEBUG is on, changing the inclusion to only happen in that case. Link: http://lkml.kernel.org/r/1468429793-16917-1-git-send-email-luisbg@osg.samsung.com [luisbg@osg.samsung.com: include dynamic_debug.h in drb_int.h] Link: http://lkml.kernel.org/r/1468447828-18558-2-git-send-email-luisbg@osg.samsung.com Signed-off-by: Luis de Bethencourt Cc: Rusty Russell Cc: Hidehiro Kawai Cc: Borislav Petkov Cc: Michal Nazarewicz Cc: Rasmus Villemoes Cc: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/drbd/drbd_actlog.c | 1 - drivers/block/drbd/drbd_int.h | 1 + include/linux/kernel.h | 1 - include/linux/printk.h | 3 ++- kernel/module.c | 1 + 5 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 0a1aaf8c24c4..2d3d50ab74bf 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -27,7 +27,6 @@ #include #include #include -#include #include "drbd_int.h" diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 7b54354976a5..4cb8f21ff4ef 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -41,6 +41,7 @@ #include #include #include +#include #include #include #include diff --git a/include/linux/kernel.h b/include/linux/kernel.h index c42082112ec8..d96a6118d26a 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -11,7 +11,6 @@ #include #include #include -#include #include #include diff --git a/include/linux/printk.h b/include/linux/printk.h index f136b22c7772..987c65ed34e5 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -289,10 +289,11 @@ extern asmlinkage void dump_stack(void) __cold; no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) #endif -#include /* If you are writing a driver, please use dev_dbg instead */ #if defined(CONFIG_DYNAMIC_DEBUG) +#include + /* dynamic_pr_debug() uses pr_fmt() internally so we don't need it here */ #define pr_debug(fmt, ...) \ dynamic_pr_debug(fmt, ##__VA_ARGS__) diff --git a/kernel/module.c b/kernel/module.c index 5f71aa63ed2a..a0f48b8b00da 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -60,6 +60,7 @@ #include #include #include +#include #include #include "module-internal.h" -- cgit v1.2.3-55-g7522 From 874f9c7da9a4acbc1b9e12ca722579fb50e4d142 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 2 Aug 2016 14:03:53 -0700 Subject: printk: create pr_ functions Using functions instead of macros can reduce overall code size by eliminating unnecessary "KERN_SOH" prefixes from format strings. defconfig x86-64: $ size vmlinux* text data bss dec hex filename 10193570 4331464 1105920 15630954 ee826a vmlinux.new 10192623 4335560 1105920 15634103 ee8eb7 vmlinux.old As the return value are unimportant and unused in the kernel tree, these new functions return void. Miscellanea: - change pr_ macros to call new __pr_ functions - change vprintk_nmi and vprintk_default to add LOGLEVEL_ argument [akpm@linux-foundation.org: fix LOGLEVEL_INFO, per Joe] Link: http://lkml.kernel.org/r/e16cc34479dfefcae37c98b481e6646f0f69efc3.1466718827.git.joe@perches.com Signed-off-by: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/printk.h | 48 +++++++++++++++++++++++++++++++++--------------- kernel/printk/internal.h | 16 ++++++++++------ kernel/printk/nmi.c | 13 +++++++++++-- kernel/printk/printk.c | 27 ++++++++++++++++++++++++--- 4 files changed, 78 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/include/linux/printk.h b/include/linux/printk.h index 987c65ed34e5..c2158f0f1499 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -257,21 +257,39 @@ extern asmlinkage void dump_stack(void) __cold; * and other debug macros are compiled out unless either DEBUG is defined * or CONFIG_DYNAMIC_DEBUG is set. */ -#define pr_emerg(fmt, ...) \ - printk(KERN_EMERG pr_fmt(fmt), ##__VA_ARGS__) -#define pr_alert(fmt, ...) \ - printk(KERN_ALERT pr_fmt(fmt), ##__VA_ARGS__) -#define pr_crit(fmt, ...) \ - printk(KERN_CRIT pr_fmt(fmt), ##__VA_ARGS__) -#define pr_err(fmt, ...) \ - printk(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__) -#define pr_warning(fmt, ...) \ - printk(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__) -#define pr_warn pr_warning -#define pr_notice(fmt, ...) \ - printk(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__) -#define pr_info(fmt, ...) \ - printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) + +#ifdef CONFIG_PRINTK + +asmlinkage __printf(1, 2) __cold void __pr_emerg(const char *fmt, ...); +asmlinkage __printf(1, 2) __cold void __pr_alert(const char *fmt, ...); +asmlinkage __printf(1, 2) __cold void __pr_crit(const char *fmt, ...); +asmlinkage __printf(1, 2) __cold void __pr_err(const char *fmt, ...); +asmlinkage __printf(1, 2) __cold void __pr_warn(const char *fmt, ...); +asmlinkage __printf(1, 2) __cold void __pr_notice(const char *fmt, ...); +asmlinkage __printf(1, 2) __cold void __pr_info(const char *fmt, ...); + +#define pr_emerg(fmt, ...) __pr_emerg(pr_fmt(fmt), ##__VA_ARGS__) +#define pr_alert(fmt, ...) __pr_alert(pr_fmt(fmt), ##__VA_ARGS__) +#define pr_crit(fmt, ...) __pr_crit(pr_fmt(fmt), ##__VA_ARGS__) +#define pr_err(fmt, ...) __pr_err(pr_fmt(fmt), ##__VA_ARGS__) +#define pr_warn(fmt, ...) __pr_warn(pr_fmt(fmt), ##__VA_ARGS__) +#define pr_notice(fmt, ...) __pr_notice(pr_fmt(fmt), ##__VA_ARGS__) +#define pr_info(fmt, ...) __pr_info(pr_fmt(fmt), ##__VA_ARGS__) + +#else + +#define pr_emerg(fmt, ...) printk(KERN_EMERG pr_fmt(fmt), ##__VA_ARGS__) +#define pr_alert(fmt, ...) printk(KERN_ALERT pr_fmt(fmt), ##__VA_ARGS__) +#define pr_crit(fmt, ...) printk(KERN_CRIT pr_fmt(fmt), ##__VA_ARGS__) +#define pr_err(fmt, ...) printk(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__) +#define pr_warn(fmt, ...) printk(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__) +#define pr_notice(fmt, ...) printk(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__) +#define pr_info(fmt, ...) printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) + +#endif + +#define pr_warning pr_warn + /* * Like KERN_CONT, pr_cont() should only be used when continuing * a line with no newline ('\n') enclosed. Otherwise it defaults diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h index 7fd2838fa417..5d4505f30083 100644 --- a/kernel/printk/internal.h +++ b/kernel/printk/internal.h @@ -16,9 +16,11 @@ */ #include -typedef __printf(1, 0) int (*printk_func_t)(const char *fmt, va_list args); +typedef __printf(2, 0) int (*printk_func_t)(int level, const char *fmt, + va_list args); -int __printf(1, 0) vprintk_default(const char *fmt, va_list args); +__printf(2, 0) +int vprintk_default(int level, const char *fmt, va_list args); #ifdef CONFIG_PRINTK_NMI @@ -31,9 +33,10 @@ extern raw_spinlock_t logbuf_lock; * via per-CPU variable. */ DECLARE_PER_CPU(printk_func_t, printk_func); -static inline __printf(1, 0) int vprintk_func(const char *fmt, va_list args) +__printf(2, 0) +static inline int vprintk_func(int level, const char *fmt, va_list args) { - return this_cpu_read(printk_func)(fmt, args); + return this_cpu_read(printk_func)(level, fmt, args); } extern atomic_t nmi_message_lost; @@ -44,9 +47,10 @@ static inline int get_nmi_message_lost(void) #else /* CONFIG_PRINTK_NMI */ -static inline __printf(1, 0) int vprintk_func(const char *fmt, va_list args) +__printf(2, 0) +static inline int vprintk_func(int level, const char *fmt, va_list args) { - return vprintk_default(fmt, args); + return vprintk_default(level, fmt, args); } static inline int get_nmi_message_lost(void) diff --git a/kernel/printk/nmi.c b/kernel/printk/nmi.c index b69eb8a2876f..bc3eeb1ae6da 100644 --- a/kernel/printk/nmi.c +++ b/kernel/printk/nmi.c @@ -58,7 +58,7 @@ static DEFINE_PER_CPU(struct nmi_seq_buf, nmi_print_seq); * one writer running. But the buffer might get flushed from another * CPU, so we need to be careful. */ -static int vprintk_nmi(const char *fmt, va_list args) +static int vprintk_nmi(int level, const char *fmt, va_list args) { struct nmi_seq_buf *s = this_cpu_ptr(&nmi_print_seq); int add = 0; @@ -79,7 +79,16 @@ again: if (!len) smp_rmb(); - add = vsnprintf(s->buffer + len, sizeof(s->buffer) - len, fmt, args); + if (level != LOGLEVEL_DEFAULT) { + add = snprintf(s->buffer + len, sizeof(s->buffer) - len, + KERN_SOH "%c", '0' + level); + add += vsnprintf(s->buffer + len + add, + sizeof(s->buffer) - len - add, + fmt, args); + } else { + add = vsnprintf(s->buffer + len, sizeof(s->buffer) - len, + fmt, args); + } /* * Do it once again if the buffer has been flushed in the meantime. diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 09af62e71fee..d2accf2f4448 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -1801,7 +1801,28 @@ asmlinkage int printk_emit(int facility, int level, } EXPORT_SYMBOL(printk_emit); -int vprintk_default(const char *fmt, va_list args) +#ifdef CONFIG_PRINTK +#define define_pr_level(func, loglevel) \ +asmlinkage __visible void func(const char *fmt, ...) \ +{ \ + va_list args; \ + \ + va_start(args, fmt); \ + vprintk_default(loglevel, fmt, args); \ + va_end(args); \ +} \ +EXPORT_SYMBOL(func) + +define_pr_level(__pr_emerg, LOGLEVEL_EMERG); +define_pr_level(__pr_alert, LOGLEVEL_ALERT); +define_pr_level(__pr_crit, LOGLEVEL_CRIT); +define_pr_level(__pr_err, LOGLEVEL_ERR); +define_pr_level(__pr_warn, LOGLEVEL_WARNING); +define_pr_level(__pr_notice, LOGLEVEL_NOTICE); +define_pr_level(__pr_info, LOGLEVEL_INFO); +#endif + +int vprintk_default(int level, const char *fmt, va_list args) { int r; @@ -1811,7 +1832,7 @@ int vprintk_default(const char *fmt, va_list args) return r; } #endif - r = vprintk_emit(0, LOGLEVEL_DEFAULT, NULL, 0, fmt, args); + r = vprintk_emit(0, level, NULL, 0, fmt, args); return r; } @@ -1844,7 +1865,7 @@ asmlinkage __visible int printk(const char *fmt, ...) int r; va_start(args, fmt); - r = vprintk_func(fmt, args); + r = vprintk_func(LOGLEVEL_DEFAULT, fmt, args); va_end(args); return r; -- cgit v1.2.3-55-g7522 From 6b1d174b0c27b5de421eda55c2731f32b6bd9852 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 2 Aug 2016 14:04:04 -0700 Subject: ratelimit: extend to print suppressed messages on release Extend the ratelimiting facility to print the amount of suppressed lines when it is being released. This use case is aimed at short-termed, burst-like users for which we want to output the suppressed lines stats only once, after it has been disposed of. For an example, see /dev/kmsg usage in a follow-on patch. Also, change the printk() line we issue on release to not use "callbacks" as it is misleading: we're not suppressing callbacks but printk() calls. This has been separated from a previous patch by Linus. Link: http://lkml.kernel.org/r/20160716061745.15795-2-bp@alien8.de Signed-off-by: Borislav Petkov Cc: Dave Young Cc: Franck Bui Cc: Greg Kroah-Hartman Cc: Ingo Molnar Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Uwe Kleine-König Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ratelimit.h | 38 +++++++++++++++++++++++++++++++++----- lib/ratelimit.c | 10 ++++++---- 2 files changed, 39 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/ratelimit.h b/include/linux/ratelimit.h index 18102529254e..57c9e0622a38 100644 --- a/include/linux/ratelimit.h +++ b/include/linux/ratelimit.h @@ -2,11 +2,15 @@ #define _LINUX_RATELIMIT_H #include +#include #include #define DEFAULT_RATELIMIT_INTERVAL (5 * HZ) #define DEFAULT_RATELIMIT_BURST 10 +/* issue num suppressed message on exit */ +#define RATELIMIT_MSG_ON_RELEASE BIT(0) + struct ratelimit_state { raw_spinlock_t lock; /* protect the state */ @@ -15,6 +19,7 @@ struct ratelimit_state { int printed; int missed; unsigned long begin; + unsigned long flags; }; #define RATELIMIT_STATE_INIT(name, interval_init, burst_init) { \ @@ -34,12 +39,35 @@ struct ratelimit_state { static inline void ratelimit_state_init(struct ratelimit_state *rs, int interval, int burst) { + memset(rs, 0, sizeof(*rs)); + raw_spin_lock_init(&rs->lock); - rs->interval = interval; - rs->burst = burst; - rs->printed = 0; - rs->missed = 0; - rs->begin = 0; + rs->interval = interval; + rs->burst = burst; +} + +static inline void ratelimit_default_init(struct ratelimit_state *rs) +{ + return ratelimit_state_init(rs, DEFAULT_RATELIMIT_INTERVAL, + DEFAULT_RATELIMIT_BURST); +} + +static inline void ratelimit_state_exit(struct ratelimit_state *rs) +{ + if (!(rs->flags & RATELIMIT_MSG_ON_RELEASE)) + return; + + if (rs->missed) { + pr_warn("%s: %d output lines suppressed due to ratelimiting\n", + current->comm, rs->missed); + rs->missed = 0; + } +} + +static inline void +ratelimit_set_flags(struct ratelimit_state *rs, unsigned long flags) +{ + rs->flags = flags; } extern struct ratelimit_state printk_ratelimit_state; diff --git a/lib/ratelimit.c b/lib/ratelimit.c index 2c5de86460c5..08f8043cac61 100644 --- a/lib/ratelimit.c +++ b/lib/ratelimit.c @@ -46,12 +46,14 @@ int ___ratelimit(struct ratelimit_state *rs, const char *func) rs->begin = jiffies; if (time_is_before_jiffies(rs->begin + rs->interval)) { - if (rs->missed) - printk(KERN_WARNING "%s: %d callbacks suppressed\n", - func, rs->missed); + if (rs->missed) { + if (!(rs->flags & RATELIMIT_MSG_ON_RELEASE)) { + pr_warn("%s: %d callbacks suppressed\n", func, rs->missed); + rs->missed = 0; + } + } rs->begin = jiffies; rs->printed = 0; - rs->missed = 0; } if (rs->burst && rs->burst > rs->printed) { rs->printed++; -- cgit v1.2.3-55-g7522 From 750afe7babd117daabebf4855da18e4418ea845e Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 2 Aug 2016 14:04:07 -0700 Subject: printk: add kernel parameter to control writes to /dev/kmsg Add a "printk.devkmsg" kernel command line parameter which controls how userspace writes into /dev/kmsg. It has three options: * ratelimit - ratelimit logging from userspace. * on - unlimited logging from userspace * off - logging from userspace gets ignored The default setting is to ratelimit the messages written to it. This changes the kernel default setting of "on" to "ratelimit" and we do that because we want to keep userspace spamming /dev/kmsg to sane levels. This is especially moot when a small kernel log buffer wraps around and messages get lost. So the ratelimiting setting should be a sane setting where kernel messages should have a bit higher chance of survival from all the spamming. It additionally does not limit logging to /dev/kmsg while the system is booting if we haven't disabled it on the command line. Furthermore, we can control the logging from a lower priority sysctl interface - kernel.printk_devkmsg. That interface will succeed only if printk.devkmsg *hasn't* been supplied on the command line. If it has, then printk.devkmsg is a one-time setting which remains for the duration of the system lifetime. This "locking" of the setting is to prevent userspace from changing the logging on us through sysctl(2). This patch is based on previous patches from Linus and Steven. [bp@suse.de: fixes] Link: http://lkml.kernel.org/r/20160719072344.GC25563@nazgul.tnic Link: http://lkml.kernel.org/r/20160716061745.15795-3-bp@alien8.de Signed-off-by: Borislav Petkov Cc: Dave Young Cc: Franck Bui Cc: Greg Kroah-Hartman Cc: Ingo Molnar Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Uwe Kleine-König Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/kernel-parameters.txt | 7 ++ Documentation/sysctl/kernel.txt | 14 ++++ include/linux/printk.h | 9 +++ kernel/printk/printk.c | 142 ++++++++++++++++++++++++++++++++++-- kernel/sysctl.c | 7 ++ 5 files changed, 171 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index e24aa11e8f8a..b240540e49f2 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -3173,6 +3173,13 @@ bytes respectively. Such letter suffixes can also be entirely omitted. Format: (1/Y/y=enable, 0/N/n=disable) default: disabled + printk.devkmsg={on,off,ratelimit} + Control writing to /dev/kmsg. + on - unlimited logging to /dev/kmsg from userspace + off - logging to /dev/kmsg disabled + ratelimit - ratelimit the logging + Default: ratelimit + printk.time= Show timing data prefixed to each printk message line Format: (1/Y/y=enable, 0/N/n=disable) diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index 33204604de6c..ffab8b5caa60 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt @@ -764,6 +764,20 @@ send before ratelimiting kicks in. ============================================================== +printk_devkmsg: + +Control the logging to /dev/kmsg from userspace: + +ratelimit: default, ratelimited +on: unlimited logging to /dev/kmsg from userspace +off: logging to /dev/kmsg disabled + +The kernel command line parameter printk.devkmsg= overrides this and is +a one-time setting until next reboot: once set, it cannot be changed by +this sysctl interface anymore. + +============================================================== + randomize_va_space: This option can be used to select the type of process address diff --git a/include/linux/printk.h b/include/linux/printk.h index c2158f0f1499..8dc155dab3ed 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -61,6 +61,11 @@ static inline void console_verbose(void) console_loglevel = CONSOLE_LOGLEVEL_MOTORMOUTH; } +/* strlen("ratelimit") + 1 */ +#define DEVKMSG_STR_MAX_SIZE 10 +extern char devkmsg_log_str[]; +struct ctl_table; + struct va_format { const char *fmt; va_list *va; @@ -175,6 +180,10 @@ extern int printk_delay_msec; extern int dmesg_restrict; extern int kptr_restrict; +extern int +devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write, void __user *buf, + size_t *lenp, loff_t *ppos); + extern void wake_up_klogd(void); char *log_buf_addr_get(void); diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 70c66c5ba212..a5ef95ca18c9 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -85,6 +85,111 @@ static struct lockdep_map console_lock_dep_map = { }; #endif +enum devkmsg_log_bits { + __DEVKMSG_LOG_BIT_ON = 0, + __DEVKMSG_LOG_BIT_OFF, + __DEVKMSG_LOG_BIT_LOCK, +}; + +enum devkmsg_log_masks { + DEVKMSG_LOG_MASK_ON = BIT(__DEVKMSG_LOG_BIT_ON), + DEVKMSG_LOG_MASK_OFF = BIT(__DEVKMSG_LOG_BIT_OFF), + DEVKMSG_LOG_MASK_LOCK = BIT(__DEVKMSG_LOG_BIT_LOCK), +}; + +/* Keep both the 'on' and 'off' bits clear, i.e. ratelimit by default: */ +#define DEVKMSG_LOG_MASK_DEFAULT 0 + +static unsigned int __read_mostly devkmsg_log = DEVKMSG_LOG_MASK_DEFAULT; + +static int __control_devkmsg(char *str) +{ + if (!str) + return -EINVAL; + + if (!strncmp(str, "on", 2)) { + devkmsg_log = DEVKMSG_LOG_MASK_ON; + return 2; + } else if (!strncmp(str, "off", 3)) { + devkmsg_log = DEVKMSG_LOG_MASK_OFF; + return 3; + } else if (!strncmp(str, "ratelimit", 9)) { + devkmsg_log = DEVKMSG_LOG_MASK_DEFAULT; + return 9; + } + return -EINVAL; +} + +static int __init control_devkmsg(char *str) +{ + if (__control_devkmsg(str) < 0) + return 1; + + /* + * Set sysctl string accordingly: + */ + if (devkmsg_log == DEVKMSG_LOG_MASK_ON) { + memset(devkmsg_log_str, 0, DEVKMSG_STR_MAX_SIZE); + strncpy(devkmsg_log_str, "on", 2); + } else if (devkmsg_log == DEVKMSG_LOG_MASK_OFF) { + memset(devkmsg_log_str, 0, DEVKMSG_STR_MAX_SIZE); + strncpy(devkmsg_log_str, "off", 3); + } + /* else "ratelimit" which is set by default. */ + + /* + * Sysctl cannot change it anymore. The kernel command line setting of + * this parameter is to force the setting to be permanent throughout the + * runtime of the system. This is a precation measure against userspace + * trying to be a smarta** and attempting to change it up on us. + */ + devkmsg_log |= DEVKMSG_LOG_MASK_LOCK; + + return 0; +} +__setup("printk.devkmsg=", control_devkmsg); + +char devkmsg_log_str[DEVKMSG_STR_MAX_SIZE] = "ratelimit"; + +int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + char old_str[DEVKMSG_STR_MAX_SIZE]; + unsigned int old; + int err; + + if (write) { + if (devkmsg_log & DEVKMSG_LOG_MASK_LOCK) + return -EINVAL; + + old = devkmsg_log; + strncpy(old_str, devkmsg_log_str, DEVKMSG_STR_MAX_SIZE); + } + + err = proc_dostring(table, write, buffer, lenp, ppos); + if (err) + return err; + + if (write) { + err = __control_devkmsg(devkmsg_log_str); + + /* + * Do not accept an unknown string OR a known string with + * trailing crap... + */ + if (err < 0 || (err + 1 != *lenp)) { + + /* ... and restore old setting. */ + devkmsg_log = old; + strncpy(devkmsg_log_str, old_str, DEVKMSG_STR_MAX_SIZE); + + return -EINVAL; + } + } + + return 0; +} + /* * Number of registered extended console drivers. * @@ -613,6 +718,7 @@ struct devkmsg_user { u64 seq; u32 idx; enum log_flags prev; + struct ratelimit_state rs; struct mutex lock; char buf[CONSOLE_EXT_LOG_MAX]; }; @@ -622,11 +728,24 @@ static ssize_t devkmsg_write(struct kiocb *iocb, struct iov_iter *from) char *buf, *line; int level = default_message_loglevel; int facility = 1; /* LOG_USER */ + struct file *file = iocb->ki_filp; + struct devkmsg_user *user = file->private_data; size_t len = iov_iter_count(from); ssize_t ret = len; - if (len > LOG_LINE_MAX) + if (!user || len > LOG_LINE_MAX) return -EINVAL; + + /* Ignore when user logging is disabled. */ + if (devkmsg_log & DEVKMSG_LOG_MASK_OFF) + return len; + + /* Ratelimit when not explicitly enabled. */ + if (!(devkmsg_log & DEVKMSG_LOG_MASK_ON)) { + if (!___ratelimit(&user->rs, current->comm)) + return ret; + } + buf = kmalloc(len+1, GFP_KERNEL); if (buf == NULL) return -ENOMEM; @@ -799,19 +918,24 @@ static int devkmsg_open(struct inode *inode, struct file *file) struct devkmsg_user *user; int err; - /* write-only does not need any file context */ - if ((file->f_flags & O_ACCMODE) == O_WRONLY) - return 0; + if (devkmsg_log & DEVKMSG_LOG_MASK_OFF) + return -EPERM; - err = check_syslog_permissions(SYSLOG_ACTION_READ_ALL, - SYSLOG_FROM_READER); - if (err) - return err; + /* write-only does not need any file context */ + if ((file->f_flags & O_ACCMODE) != O_WRONLY) { + err = check_syslog_permissions(SYSLOG_ACTION_READ_ALL, + SYSLOG_FROM_READER); + if (err) + return err; + } user = kmalloc(sizeof(struct devkmsg_user), GFP_KERNEL); if (!user) return -ENOMEM; + ratelimit_default_init(&user->rs); + ratelimit_set_flags(&user->rs, RATELIMIT_MSG_ON_RELEASE); + mutex_init(&user->lock); raw_spin_lock_irq(&logbuf_lock); @@ -830,6 +954,8 @@ static int devkmsg_release(struct inode *inode, struct file *file) if (!user) return 0; + ratelimit_state_exit(&user->rs); + mutex_destroy(&user->lock); kfree(user); return 0; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 53954631a4e1..b43d0b27c1fe 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -813,6 +813,13 @@ static struct ctl_table kern_table[] = { .extra1 = &zero, .extra2 = &ten_thousand, }, + { + .procname = "printk_devkmsg", + .data = devkmsg_log_str, + .maxlen = DEVKMSG_STR_MAX_SIZE, + .mode = 0644, + .proc_handler = devkmsg_sysctl_set_loglvl, + }, { .procname = "dmesg_restrict", .data = &dmesg_restrict, -- cgit v1.2.3-55-g7522 From a23216a2f1f8a30a3b6588c743681651e4a6aa94 Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Tue, 2 Aug 2016 14:04:19 -0700 Subject: radix-tree: fix comment about "exceptional" bits The bottom two bits of radix tree entries are reserved for special use by the radix tree code itself. A comment detailing their usage was added by commit 3bcadd6fa6c4 ("radix-tree: free up the bottom bit of exceptional entries for reuse") This comment states that if the bottom two bits are '11', this means that this is a locked exceptional entry. It turns out that this bit combination was never actually used. Radix tree locking for DAX was indeed implemented, but it actually used the third LSB: /* We use lowest available exceptional entry bit for locking */ #define RADIX_DAX_ENTRY_LOCK (1 << RADIX_TREE_EXCEPTIONAL_SHIFT) This locking code was also made specific to the DAX code instead of being generally implemented in radix-tree.h. So, fix the comment. Link: http://lkml.kernel.org/r/1468997731-2155-1-git-send-email-ross.zwisler@linux.intel.com Signed-off-by: Ross Zwisler Reviewed-by: Johannes Thumshirn Cc: Dave Chinner Cc: Jan Kara Cc: Konstantin Khlebnikov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/radix-tree.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index cbfee507c839..4c45105dece3 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -35,7 +35,7 @@ * 00 - data pointer * 01 - internal entry * 10 - exceptional entry - * 11 - locked exceptional entry + * 11 - this bit combination is currently unused/reserved * * The internal entry may be a pointer to the next level in the tree, a * sibling entry, or an indicator that the entry in this slot has been moved -- cgit v1.2.3-55-g7522 From a098ecd2fa7db8fa4fcc178a43627b29b226edb9 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Tue, 2 Aug 2016 14:04:28 -0700 Subject: firmware: support loading into a pre-allocated buffer Some systems are memory constrained but they need to load very large firmwares. The firmware subsystem allows drivers to request this firmware be loaded from the filesystem, but this requires that the entire firmware be loaded into kernel memory first before it's provided to the driver. This can lead to a situation where we map the firmware twice, once to load the firmware into kernel memory and once to copy the firmware into the final resting place. This creates needless memory pressure and delays loading because we have to copy from kernel memory to somewhere else. Let's add a request_firmware_into_buf() API that allows drivers to request firmware be loaded directly into a pre-allocated buffer. This skips the intermediate step of allocating a buffer in kernel memory to hold the firmware image while it's read from the filesystem. It also requires that drivers know how much memory they'll require before requesting the firmware and negates any benefits of firmware caching because the firmware layer doesn't manage the buffer lifetime. For a 16MB buffer, about half the time is spent performing a memcpy from the buffer to the final resting place. I see loading times go from 0.081171 seconds to 0.047696 seconds after applying this patch. Plus the vmalloc pressure is reduced. This is based on a patch from Vikram Mulukutla on codeaurora.org: https://www.codeaurora.org/cgit/quic/la/kernel/msm-3.18/commit/drivers/base/firmware_class.c?h=rel/msm-3.18&id=0a328c5f6cd999f5c591f172216835636f39bcb5 Link: http://lkml.kernel.org/r/20160607164741.31849-4-stephen.boyd@linaro.org Signed-off-by: Stephen Boyd Cc: Mimi Zohar Cc: Vikram Mulukutla Cc: Mark Brown Cc: Ming Lei Cc: Greg Kroah-Hartman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/firmware_class.c | 125 +++++++++++++++++++++++++++++++++--------- fs/exec.c | 9 ++- include/linux/firmware.h | 8 +++ include/linux/fs.h | 1 + 4 files changed, 114 insertions(+), 29 deletions(-) (limited to 'include') diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c index 45ed20cefa10..22d1760a4278 100644 --- a/drivers/base/firmware_class.c +++ b/drivers/base/firmware_class.c @@ -46,7 +46,8 @@ MODULE_LICENSE("GPL"); extern struct builtin_fw __start_builtin_fw[]; extern struct builtin_fw __end_builtin_fw[]; -static bool fw_get_builtin_firmware(struct firmware *fw, const char *name) +static bool fw_get_builtin_firmware(struct firmware *fw, const char *name, + void *buf, size_t size) { struct builtin_fw *b_fw; @@ -54,6 +55,9 @@ static bool fw_get_builtin_firmware(struct firmware *fw, const char *name) if (strcmp(name, b_fw->name) == 0) { fw->size = b_fw->size; fw->data = b_fw->data; + + if (buf && fw->size <= size) + memcpy(buf, fw->data, fw->size); return true; } } @@ -74,7 +78,9 @@ static bool fw_is_builtin_firmware(const struct firmware *fw) #else /* Module case - no builtin firmware support */ -static inline bool fw_get_builtin_firmware(struct firmware *fw, const char *name) +static inline bool fw_get_builtin_firmware(struct firmware *fw, + const char *name, void *buf, + size_t size) { return false; } @@ -144,6 +150,7 @@ struct firmware_buf { unsigned long status; void *data; size_t size; + size_t allocated_size; #ifdef CONFIG_FW_LOADER_USER_HELPER bool is_paged_buf; bool need_uevent; @@ -179,7 +186,8 @@ static DEFINE_MUTEX(fw_lock); static struct firmware_cache fw_cache; static struct firmware_buf *__allocate_fw_buf(const char *fw_name, - struct firmware_cache *fwc) + struct firmware_cache *fwc, + void *dbuf, size_t size) { struct firmware_buf *buf; @@ -195,6 +203,8 @@ static struct firmware_buf *__allocate_fw_buf(const char *fw_name, kref_init(&buf->ref); buf->fwc = fwc; + buf->data = dbuf; + buf->allocated_size = size; init_completion(&buf->completion); #ifdef CONFIG_FW_LOADER_USER_HELPER INIT_LIST_HEAD(&buf->pending_list); @@ -218,7 +228,8 @@ static struct firmware_buf *__fw_lookup_buf(const char *fw_name) static int fw_lookup_and_allocate_buf(const char *fw_name, struct firmware_cache *fwc, - struct firmware_buf **buf) + struct firmware_buf **buf, void *dbuf, + size_t size) { struct firmware_buf *tmp; @@ -230,7 +241,7 @@ static int fw_lookup_and_allocate_buf(const char *fw_name, *buf = tmp; return 1; } - tmp = __allocate_fw_buf(fw_name, fwc); + tmp = __allocate_fw_buf(fw_name, fwc, dbuf, size); if (tmp) list_add(&tmp->list, &fwc->head); spin_unlock(&fwc->lock); @@ -262,6 +273,7 @@ static void __fw_free_buf(struct kref *ref) vfree(buf->pages); } else #endif + if (!buf->allocated_size) vfree(buf->data); kfree_const(buf->fw_id); kfree(buf); @@ -302,13 +314,21 @@ static void fw_finish_direct_load(struct device *device, mutex_unlock(&fw_lock); } -static int fw_get_filesystem_firmware(struct device *device, - struct firmware_buf *buf) +static int +fw_get_filesystem_firmware(struct device *device, struct firmware_buf *buf) { loff_t size; int i, len; int rc = -ENOENT; char *path; + enum kernel_read_file_id id = READING_FIRMWARE; + size_t msize = INT_MAX; + + /* Already populated data member means we're loading into a buffer */ + if (buf->data) { + id = READING_FIRMWARE_PREALLOC_BUFFER; + msize = buf->allocated_size; + } path = __getname(); if (!path) @@ -327,8 +347,8 @@ static int fw_get_filesystem_firmware(struct device *device, } buf->size = 0; - rc = kernel_read_file_from_path(path, &buf->data, &size, - INT_MAX, READING_FIRMWARE); + rc = kernel_read_file_from_path(path, &buf->data, &size, msize, + id); if (rc) { if (rc == -ENOENT) dev_dbg(device, "loading %s failed with error %d\n", @@ -692,6 +712,15 @@ out: static DEVICE_ATTR(loading, 0644, firmware_loading_show, firmware_loading_store); +static void firmware_rw_buf(struct firmware_buf *buf, char *buffer, + loff_t offset, size_t count, bool read) +{ + if (read) + memcpy(buffer, buf->data + offset, count); + else + memcpy(buf->data + offset, buffer, count); +} + static void firmware_rw(struct firmware_buf *buf, char *buffer, loff_t offset, size_t count, bool read) { @@ -739,7 +768,10 @@ static ssize_t firmware_data_read(struct file *filp, struct kobject *kobj, ret_count = count; - firmware_rw(buf, buffer, offset, count, true); + if (buf->data) + firmware_rw_buf(buf, buffer, offset, count, true); + else + firmware_rw(buf, buffer, offset, count, true); out: mutex_unlock(&fw_lock); @@ -815,12 +847,21 @@ static ssize_t firmware_data_write(struct file *filp, struct kobject *kobj, goto out; } - retval = fw_realloc_buffer(fw_priv, offset + count); - if (retval) - goto out; + if (buf->data) { + if (offset + count > buf->allocated_size) { + retval = -ENOMEM; + goto out; + } + firmware_rw_buf(buf, buffer, offset, count, false); + retval = count; + } else { + retval = fw_realloc_buffer(fw_priv, offset + count); + if (retval) + goto out; - retval = count; - firmware_rw(buf, buffer, offset, count, false); + retval = count; + firmware_rw(buf, buffer, offset, count, false); + } buf->size = max_t(size_t, offset + count, buf->size); out: @@ -890,7 +931,8 @@ static int _request_firmware_load(struct firmware_priv *fw_priv, struct firmware_buf *buf = fw_priv->buf; /* fall back on userspace loading */ - buf->is_paged_buf = true; + if (!buf->data) + buf->is_paged_buf = true; dev_set_uevent_suppress(f_dev, true); @@ -925,7 +967,7 @@ static int _request_firmware_load(struct firmware_priv *fw_priv, if (is_fw_load_aborted(buf)) retval = -EAGAIN; - else if (!buf->data) + else if (buf->is_paged_buf && !buf->data) retval = -ENOMEM; device_del(f_dev); @@ -1008,7 +1050,7 @@ static int sync_cached_firmware_buf(struct firmware_buf *buf) */ static int _request_firmware_prepare(struct firmware **firmware_p, const char *name, - struct device *device) + struct device *device, void *dbuf, size_t size) { struct firmware *firmware; struct firmware_buf *buf; @@ -1021,12 +1063,12 @@ _request_firmware_prepare(struct firmware **firmware_p, const char *name, return -ENOMEM; } - if (fw_get_builtin_firmware(firmware, name)) { + if (fw_get_builtin_firmware(firmware, name, dbuf, size)) { dev_dbg(device, "using built-in %s\n", name); return 0; /* assigned */ } - ret = fw_lookup_and_allocate_buf(name, &fw_cache, &buf); + ret = fw_lookup_and_allocate_buf(name, &fw_cache, &buf, dbuf, size); /* * bind with 'buf' now to avoid warning in failure path @@ -1089,7 +1131,8 @@ static int assign_firmware_buf(struct firmware *fw, struct device *device, /* called from request_firmware() and request_firmware_work_func() */ static int _request_firmware(const struct firmware **firmware_p, const char *name, - struct device *device, unsigned int opt_flags) + struct device *device, void *buf, size_t size, + unsigned int opt_flags) { struct firmware *fw = NULL; long timeout; @@ -1103,7 +1146,7 @@ _request_firmware(const struct firmware **firmware_p, const char *name, goto out; } - ret = _request_firmware_prepare(&fw, name, device); + ret = _request_firmware_prepare(&fw, name, device, buf, size); if (ret <= 0) /* error or already assigned */ goto out; @@ -1182,7 +1225,7 @@ request_firmware(const struct firmware **firmware_p, const char *name, /* Need to pin this module until return */ __module_get(THIS_MODULE); - ret = _request_firmware(firmware_p, name, device, + ret = _request_firmware(firmware_p, name, device, NULL, 0, FW_OPT_UEVENT | FW_OPT_FALLBACK); module_put(THIS_MODULE); return ret; @@ -1206,13 +1249,43 @@ int request_firmware_direct(const struct firmware **firmware_p, int ret; __module_get(THIS_MODULE); - ret = _request_firmware(firmware_p, name, device, + ret = _request_firmware(firmware_p, name, device, NULL, 0, FW_OPT_UEVENT | FW_OPT_NO_WARN); module_put(THIS_MODULE); return ret; } EXPORT_SYMBOL_GPL(request_firmware_direct); +/** + * request_firmware_into_buf - load firmware into a previously allocated buffer + * @firmware_p: pointer to firmware image + * @name: name of firmware file + * @device: device for which firmware is being loaded and DMA region allocated + * @buf: address of buffer to load firmware into + * @size: size of buffer + * + * This function works pretty much like request_firmware(), but it doesn't + * allocate a buffer to hold the firmware data. Instead, the firmware + * is loaded directly into the buffer pointed to by @buf and the @firmware_p + * data member is pointed at @buf. + * + * This function doesn't cache firmware either. + */ +int +request_firmware_into_buf(const struct firmware **firmware_p, const char *name, + struct device *device, void *buf, size_t size) +{ + int ret; + + __module_get(THIS_MODULE); + ret = _request_firmware(firmware_p, name, device, buf, size, + FW_OPT_UEVENT | FW_OPT_FALLBACK | + FW_OPT_NOCACHE); + module_put(THIS_MODULE); + return ret; +} +EXPORT_SYMBOL(request_firmware_into_buf); + /** * release_firmware: - release the resource associated with a firmware image * @fw: firmware resource to release @@ -1245,7 +1318,7 @@ static void request_firmware_work_func(struct work_struct *work) fw_work = container_of(work, struct firmware_work, work); - _request_firmware(&fw, fw_work->name, fw_work->device, + _request_firmware(&fw, fw_work->name, fw_work->device, NULL, 0, fw_work->opt_flags); fw_work->cont(fw, fw_work->context); put_device(fw_work->device); /* taken in request_firmware_nowait() */ @@ -1378,7 +1451,7 @@ static int uncache_firmware(const char *fw_name) pr_debug("%s: %s\n", __func__, fw_name); - if (fw_get_builtin_firmware(&fw, fw_name)) + if (fw_get_builtin_firmware(&fw, fw_name, NULL, 0)) return 0; buf = fw_lookup_buf(fw_name); diff --git a/fs/exec.c b/fs/exec.c index ca239fc86d8d..a1789cd684bf 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -866,7 +866,8 @@ int kernel_read_file(struct file *file, void **buf, loff_t *size, goto out; } - *buf = vmalloc(i_size); + if (id != READING_FIRMWARE_PREALLOC_BUFFER) + *buf = vmalloc(i_size); if (!*buf) { ret = -ENOMEM; goto out; @@ -897,8 +898,10 @@ int kernel_read_file(struct file *file, void **buf, loff_t *size, out_free: if (ret < 0) { - vfree(*buf); - *buf = NULL; + if (id != READING_FIRMWARE_PREALLOC_BUFFER) { + vfree(*buf); + *buf = NULL; + } } out: diff --git a/include/linux/firmware.h b/include/linux/firmware.h index 5c41c5e75b5c..b1f9f0ccb8ac 100644 --- a/include/linux/firmware.h +++ b/include/linux/firmware.h @@ -47,6 +47,8 @@ int request_firmware_nowait( void (*cont)(const struct firmware *fw, void *context)); int request_firmware_direct(const struct firmware **fw, const char *name, struct device *device); +int request_firmware_into_buf(const struct firmware **firmware_p, + const char *name, struct device *device, void *buf, size_t size); void release_firmware(const struct firmware *fw); #else @@ -75,5 +77,11 @@ static inline int request_firmware_direct(const struct firmware **fw, return -EINVAL; } +static inline int request_firmware_into_buf(const struct firmware **firmware_p, + const char *name, struct device *device, void *buf, size_t size) +{ + return -EINVAL; +} + #endif #endif diff --git a/include/linux/fs.h b/include/linux/fs.h index 577365a77b47..f3f0b4c8e8ac 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2652,6 +2652,7 @@ extern int do_pipe_flags(int *, int); #define __kernel_read_file_id(id) \ id(UNKNOWN, unknown) \ id(FIRMWARE, firmware) \ + id(FIRMWARE_PREALLOC_BUFFER, firmware) \ id(MODULE, kernel-module) \ id(KEXEC_IMAGE, kexec-image) \ id(KEXEC_INITRAMFS, kexec-initramfs) \ -- cgit v1.2.3-55-g7522 From e63e88bc53bac7e4c3f592f8126c51a7569be673 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Tue, 2 Aug 2016 14:05:30 -0700 Subject: nilfs2: move ioctl interface and disk layout to uapi separately The header file "include/linux/nilfs2_fs.h" is composed of parts for ioctl and disk format, and both are intended to be shared with user space programs. This moves them to the uapi directory "include/uapi/linux" splitting the file to "nilfs2_api.h" and "nilfs2_ondisk.h". The following minor changes are accompanied by this migration: - nilfs_direct_node struct in nilfs2/direct.h is converged to nilfs2_ondisk.h because it's an on-disk structure. - inline functions nilfs_rec_len_from_disk() and nilfs_rec_len_to_disk() are moved to nilfs2/dir.c. Link: http://lkml.kernel.org/r/1465825507-3407-4-git-send-email-konishi.ryusuke@lab.ntt.co.jp Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/filesystems/nilfs2.txt | 3 +- Documentation/ioctl/ioctl-number.txt | 2 +- MAINTAINERS | 3 +- fs/nilfs2/bmap.h | 2 +- fs/nilfs2/btree.h | 2 +- fs/nilfs2/cpfile.c | 1 - fs/nilfs2/cpfile.h | 3 +- fs/nilfs2/dat.h | 1 + fs/nilfs2/dir.c | 22 + fs/nilfs2/direct.h | 10 - fs/nilfs2/ifile.h | 1 - fs/nilfs2/ioctl.c | 1 - fs/nilfs2/nilfs.h | 3 +- fs/nilfs2/segment.h | 1 - fs/nilfs2/sufile.c | 1 - fs/nilfs2/sufile.h | 1 - include/linux/nilfs2_fs.h | 934 ----------------------------------- include/uapi/linux/nilfs2_api.h | 292 +++++++++++ include/uapi/linux/nilfs2_ondisk.h | 650 ++++++++++++++++++++++++ 19 files changed, 976 insertions(+), 957 deletions(-) delete mode 100644 include/linux/nilfs2_fs.h create mode 100644 include/uapi/linux/nilfs2_api.h create mode 100644 include/uapi/linux/nilfs2_ondisk.h (limited to 'include') diff --git a/Documentation/filesystems/nilfs2.txt b/Documentation/filesystems/nilfs2.txt index 5b21ef76f751..c0727dc36271 100644 --- a/Documentation/filesystems/nilfs2.txt +++ b/Documentation/filesystems/nilfs2.txt @@ -267,7 +267,8 @@ among NILFS2 files can be depicted as follows: `-- file (ino=yy) ( regular file, directory, or symlink ) -For detail on the format of each file, please see include/linux/nilfs2_fs.h. +For detail on the format of each file, please see nilfs2_ondisk.h +located at include/uapi/linux directory. There are no patents or other intellectual property that we protect with regard to the design of NILFS2. It is allowed to replicate the diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt index 56af5e43e9c0..81c7f2bb7daf 100644 --- a/Documentation/ioctl/ioctl-number.txt +++ b/Documentation/ioctl/ioctl-number.txt @@ -248,7 +248,7 @@ Code Seq#(hex) Include File Comments 'm' 00 drivers/scsi/megaraid/megaraid_ioctl.h conflict! 'm' 00-1F net/irda/irmod.h conflict! 'n' 00-7F linux/ncp_fs.h and fs/ncpfs/ioctl.c -'n' 80-8F linux/nilfs2_fs.h NILFS2 +'n' 80-8F uapi/linux/nilfs2_api.h NILFS2 'n' E0-FF linux/matroxfb.h matroxfb 'o' 00-1F fs/ocfs2/ocfs2_fs.h OCFS2 'o' 00-03 mtd/ubi-user.h conflict! (OCFS2 and UBI overlaps) diff --git a/MAINTAINERS b/MAINTAINERS index bb51bbbc9e1d..e9eacacf0f08 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8258,8 +8258,9 @@ T: git git://github.com/konis/nilfs2.git S: Supported F: Documentation/filesystems/nilfs2.txt F: fs/nilfs2/ -F: include/linux/nilfs2_fs.h F: include/trace/events/nilfs2.h +F: include/uapi/linux/nilfs2_api.h +F: include/uapi/linux/nilfs2_ondisk.h NINJA SCSI-3 / NINJA SCSI-32Bi (16bit/CardBus) PCMCIA SCSI HOST ADAPTER DRIVER M: YOKOTA Hiroshi diff --git a/fs/nilfs2/bmap.h b/fs/nilfs2/bmap.h index b6a4c8f93ac8..2b6ffbe5997a 100644 --- a/fs/nilfs2/bmap.h +++ b/fs/nilfs2/bmap.h @@ -22,7 +22,7 @@ #include #include #include -#include +#include /* nilfs_binfo, nilfs_inode, etc */ #include "alloc.h" #include "dat.h" diff --git a/fs/nilfs2/btree.h b/fs/nilfs2/btree.h index df1a25faa83b..2184e47fa4bf 100644 --- a/fs/nilfs2/btree.h +++ b/fs/nilfs2/btree.h @@ -22,7 +22,7 @@ #include #include #include -#include +#include /* nilfs_btree_node */ #include "btnode.h" #include "bmap.h" diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c index 19d9f4ae8347..a15a1601e931 100644 --- a/fs/nilfs2/cpfile.c +++ b/fs/nilfs2/cpfile.c @@ -21,7 +21,6 @@ #include #include #include -#include #include "mdt.h" #include "cpfile.h" diff --git a/fs/nilfs2/cpfile.h b/fs/nilfs2/cpfile.h index 0249744ae234..6eca972f9673 100644 --- a/fs/nilfs2/cpfile.h +++ b/fs/nilfs2/cpfile.h @@ -21,7 +21,8 @@ #include #include -#include +#include /* nilfs_cpstat */ +#include /* nilfs_inode, nilfs_checkpoint */ int nilfs_cpfile_get_checkpoint(struct inode *, __u64, int, diff --git a/fs/nilfs2/dat.h b/fs/nilfs2/dat.h index abbfdabcabea..57dc6cf466d0 100644 --- a/fs/nilfs2/dat.h +++ b/fs/nilfs2/dat.h @@ -22,6 +22,7 @@ #include #include #include +#include /* nilfs_inode, nilfs_checkpoint */ struct nilfs_palloc_req; diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c index 746956d2937a..908ebbf0ac7e 100644 --- a/fs/nilfs2/dir.c +++ b/fs/nilfs2/dir.c @@ -42,6 +42,28 @@ #include "nilfs.h" #include "page.h" +static inline unsigned int nilfs_rec_len_from_disk(__le16 dlen) +{ + unsigned int len = le16_to_cpu(dlen); + +#if (PAGE_SIZE >= 65536) + if (len == NILFS_MAX_REC_LEN) + return 1 << 16; +#endif + return len; +} + +static inline __le16 nilfs_rec_len_to_disk(unsigned int len) +{ +#if (PAGE_SIZE >= 65536) + if (len == (1 << 16)) + return cpu_to_le16(NILFS_MAX_REC_LEN); + + BUG_ON(len > (1 << 16)); +#endif + return cpu_to_le16(len); +} + /* * nilfs uses block-sized chunks. Arguably, sector-sized ones would be * more robust, but we have what we have diff --git a/fs/nilfs2/direct.h b/fs/nilfs2/direct.h index 3015a6e78724..cfe85e848bba 100644 --- a/fs/nilfs2/direct.h +++ b/fs/nilfs2/direct.h @@ -24,16 +24,6 @@ #include "bmap.h" -/** - * struct nilfs_direct_node - direct node - * @dn_flags: flags - * @dn_pad: padding - */ -struct nilfs_direct_node { - __u8 dn_flags; - __u8 pad[7]; -}; - #define NILFS_DIRECT_NBLOCKS (NILFS_BMAP_SIZE / sizeof(__le64) - 1) #define NILFS_DIRECT_KEY_MIN 0 #define NILFS_DIRECT_KEY_MAX (NILFS_DIRECT_NBLOCKS - 1) diff --git a/fs/nilfs2/ifile.h b/fs/nilfs2/ifile.h index 23ad2f091e76..188b94fe0ec5 100644 --- a/fs/nilfs2/ifile.h +++ b/fs/nilfs2/ifile.h @@ -23,7 +23,6 @@ #include #include -#include #include "mdt.h" #include "alloc.h" diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index 827283fe9525..f1d7989459fd 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -25,7 +25,6 @@ #include /* compat_ptr() */ #include /* mnt_want_write_file(), mnt_drop_write_file() */ #include -#include #include "nilfs.h" #include "segment.h" #include "bmap.h" diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index 2ba8a146af1f..33f8c8fc96e8 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h @@ -23,7 +23,8 @@ #include #include #include -#include +#include +#include #include "the_nilfs.h" #include "bmap.h" diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h index 6565c10b7b76..1060949d7dd2 100644 --- a/fs/nilfs2/segment.h +++ b/fs/nilfs2/segment.h @@ -23,7 +23,6 @@ #include #include #include -#include #include "nilfs.h" struct nilfs_root; diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c index 12d11de93602..1541a1e9221a 100644 --- a/fs/nilfs2/sufile.c +++ b/fs/nilfs2/sufile.c @@ -22,7 +22,6 @@ #include #include #include -#include #include "mdt.h" #include "sufile.h" diff --git a/fs/nilfs2/sufile.h b/fs/nilfs2/sufile.h index 46e89872294c..158a9190c8ec 100644 --- a/fs/nilfs2/sufile.h +++ b/fs/nilfs2/sufile.h @@ -21,7 +21,6 @@ #include #include -#include #include "mdt.h" diff --git a/include/linux/nilfs2_fs.h b/include/linux/nilfs2_fs.h deleted file mode 100644 index 5988dd57ba66..000000000000 --- a/include/linux/nilfs2_fs.h +++ /dev/null @@ -1,934 +0,0 @@ -/* - * nilfs2_fs.h - NILFS2 on-disk structures and common declarations. - * - * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation; either version 2.1 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * Written by Koji Sato and Ryusuke Konishi. - */ -/* - * linux/include/linux/ext2_fs.h - * - * Copyright (C) 1992, 1993, 1994, 1995 - * Remy Card (card@masi.ibp.fr) - * Laboratoire MASI - Institut Blaise Pascal - * Universite Pierre et Marie Curie (Paris VI) - * - * from - * - * linux/include/linux/minix_fs.h - * - * Copyright (C) 1991, 1992 Linus Torvalds - */ - -#ifndef _LINUX_NILFS_FS_H -#define _LINUX_NILFS_FS_H - -#include -#include -#include -#include - - -#define NILFS_INODE_BMAP_SIZE 7 -/** - * struct nilfs_inode - structure of an inode on disk - * @i_blocks: blocks count - * @i_size: size in bytes - * @i_ctime: creation time (seconds) - * @i_mtime: modification time (seconds) - * @i_ctime_nsec: creation time (nano seconds) - * @i_mtime_nsec: modification time (nano seconds) - * @i_uid: user id - * @i_gid: group id - * @i_mode: file mode - * @i_links_count: links count - * @i_flags: file flags - * @i_bmap: block mapping - * @i_xattr: extended attributes - * @i_generation: file generation (for NFS) - * @i_pad: padding - */ -struct nilfs_inode { - __le64 i_blocks; - __le64 i_size; - __le64 i_ctime; - __le64 i_mtime; - __le32 i_ctime_nsec; - __le32 i_mtime_nsec; - __le32 i_uid; - __le32 i_gid; - __le16 i_mode; - __le16 i_links_count; - __le32 i_flags; - __le64 i_bmap[NILFS_INODE_BMAP_SIZE]; -#define i_device_code i_bmap[0] - __le64 i_xattr; - __le32 i_generation; - __le32 i_pad; -}; - -#define NILFS_MIN_INODE_SIZE 128 - -/** - * struct nilfs_super_root - structure of super root - * @sr_sum: check sum - * @sr_bytes: byte count of the structure - * @sr_flags: flags (reserved) - * @sr_nongc_ctime: write time of the last segment not for cleaner operation - * @sr_dat: DAT file inode - * @sr_cpfile: checkpoint file inode - * @sr_sufile: segment usage file inode - */ -struct nilfs_super_root { - __le32 sr_sum; - __le16 sr_bytes; - __le16 sr_flags; - __le64 sr_nongc_ctime; - struct nilfs_inode sr_dat; - struct nilfs_inode sr_cpfile; - struct nilfs_inode sr_sufile; -}; - -#define NILFS_SR_MDT_OFFSET(inode_size, i) \ - ((unsigned long)&((struct nilfs_super_root *)0)->sr_dat + \ - (inode_size) * (i)) -#define NILFS_SR_DAT_OFFSET(inode_size) NILFS_SR_MDT_OFFSET(inode_size, 0) -#define NILFS_SR_CPFILE_OFFSET(inode_size) NILFS_SR_MDT_OFFSET(inode_size, 1) -#define NILFS_SR_SUFILE_OFFSET(inode_size) NILFS_SR_MDT_OFFSET(inode_size, 2) -#define NILFS_SR_BYTES(inode_size) NILFS_SR_MDT_OFFSET(inode_size, 3) - -/* - * Maximal mount counts - */ -#define NILFS_DFL_MAX_MNT_COUNT 50 /* 50 mounts */ - -/* - * File system states (sbp->s_state, nilfs->ns_mount_state) - */ -#define NILFS_VALID_FS 0x0001 /* Unmounted cleanly */ -#define NILFS_ERROR_FS 0x0002 /* Errors detected */ -#define NILFS_RESIZE_FS 0x0004 /* Resize required */ - -/* - * Mount flags (sbi->s_mount_opt) - */ -#define NILFS_MOUNT_ERROR_MODE 0x0070 /* Error mode mask */ -#define NILFS_MOUNT_ERRORS_CONT 0x0010 /* Continue on errors */ -#define NILFS_MOUNT_ERRORS_RO 0x0020 /* Remount fs ro on errors */ -#define NILFS_MOUNT_ERRORS_PANIC 0x0040 /* Panic on errors */ -#define NILFS_MOUNT_BARRIER 0x1000 /* Use block barriers */ -#define NILFS_MOUNT_STRICT_ORDER 0x2000 /* - * Apply strict in-order - * semantics also for data - */ -#define NILFS_MOUNT_NORECOVERY 0x4000 /* - * Disable write access during - * mount-time recovery - */ -#define NILFS_MOUNT_DISCARD 0x8000 /* Issue DISCARD requests */ - - -/** - * struct nilfs_super_block - structure of super block on disk - */ -struct nilfs_super_block { -/*00*/ __le32 s_rev_level; /* Revision level */ - __le16 s_minor_rev_level; /* minor revision level */ - __le16 s_magic; /* Magic signature */ - - __le16 s_bytes; /* - * Bytes count of CRC calculation - * for this structure. s_reserved - * is excluded. - */ - __le16 s_flags; /* flags */ - __le32 s_crc_seed; /* Seed value of CRC calculation */ -/*10*/ __le32 s_sum; /* Check sum of super block */ - - __le32 s_log_block_size; /* - * Block size represented as follows - * blocksize = - * 1 << (s_log_block_size + 10) - */ - __le64 s_nsegments; /* Number of segments in filesystem */ -/*20*/ __le64 s_dev_size; /* block device size in bytes */ - __le64 s_first_data_block; /* 1st seg disk block number */ -/*30*/ __le32 s_blocks_per_segment; /* number of blocks per full segment */ - __le32 s_r_segments_percentage; /* Reserved segments percentage */ - - __le64 s_last_cno; /* Last checkpoint number */ -/*40*/ __le64 s_last_pseg; /* disk block addr pseg written last */ - __le64 s_last_seq; /* seq. number of seg written last */ -/*50*/ __le64 s_free_blocks_count; /* Free blocks count */ - - __le64 s_ctime; /* - * Creation time (execution time of - * newfs) - */ -/*60*/ __le64 s_mtime; /* Mount time */ - __le64 s_wtime; /* Write time */ -/*70*/ __le16 s_mnt_count; /* Mount count */ - __le16 s_max_mnt_count; /* Maximal mount count */ - __le16 s_state; /* File system state */ - __le16 s_errors; /* Behaviour when detecting errors */ - __le64 s_lastcheck; /* time of last check */ - -/*80*/ __le32 s_checkinterval; /* max. time between checks */ - __le32 s_creator_os; /* OS */ - __le16 s_def_resuid; /* Default uid for reserved blocks */ - __le16 s_def_resgid; /* Default gid for reserved blocks */ - __le32 s_first_ino; /* First non-reserved inode */ - -/*90*/ __le16 s_inode_size; /* Size of an inode */ - __le16 s_dat_entry_size; /* Size of a dat entry */ - __le16 s_checkpoint_size; /* Size of a checkpoint */ - __le16 s_segment_usage_size; /* Size of a segment usage */ - -/*98*/ __u8 s_uuid[16]; /* 128-bit uuid for volume */ -/*A8*/ char s_volume_name[80]; /* volume name */ - -/*F8*/ __le32 s_c_interval; /* Commit interval of segment */ - __le32 s_c_block_max; /* - * Threshold of data amount for - * the segment construction - */ -/*100*/ __le64 s_feature_compat; /* Compatible feature set */ - __le64 s_feature_compat_ro; /* Read-only compatible feature set */ - __le64 s_feature_incompat; /* Incompatible feature set */ - __u32 s_reserved[186]; /* padding to the end of the block */ -}; - -/* - * Codes for operating systems - */ -#define NILFS_OS_LINUX 0 -/* Codes from 1 to 4 are reserved to keep compatibility with ext2 creator-OS */ - -/* - * Revision levels - */ -#define NILFS_CURRENT_REV 2 /* current major revision */ -#define NILFS_MINOR_REV 0 /* minor revision */ -#define NILFS_MIN_SUPP_REV 2 /* minimum supported revision */ - -/* - * Feature set definitions - * - * If there is a bit set in the incompatible feature set that the kernel - * doesn't know about, it should refuse to mount the filesystem. - */ -#define NILFS_FEATURE_COMPAT_RO_BLOCK_COUNT 0x00000001ULL - -#define NILFS_FEATURE_COMPAT_SUPP 0ULL -#define NILFS_FEATURE_COMPAT_RO_SUPP NILFS_FEATURE_COMPAT_RO_BLOCK_COUNT -#define NILFS_FEATURE_INCOMPAT_SUPP 0ULL - -/* - * Bytes count of super_block for CRC-calculation - */ -#define NILFS_SB_BYTES \ - ((long)&((struct nilfs_super_block *)0)->s_reserved) - -/* - * Special inode number - */ -#define NILFS_ROOT_INO 2 /* Root file inode */ -#define NILFS_DAT_INO 3 /* DAT file */ -#define NILFS_CPFILE_INO 4 /* checkpoint file */ -#define NILFS_SUFILE_INO 5 /* segment usage file */ -#define NILFS_IFILE_INO 6 /* ifile */ -#define NILFS_ATIME_INO 7 /* Atime file (reserved) */ -#define NILFS_XATTR_INO 8 /* Xattribute file (reserved) */ -#define NILFS_SKETCH_INO 10 /* Sketch file */ -#define NILFS_USER_INO 11 /* Fisrt user's file inode number */ - -#define NILFS_SB_OFFSET_BYTES 1024 /* byte offset of nilfs superblock */ - -#define NILFS_SEG_MIN_BLOCKS 16 /* - * Minimum number of blocks in - * a full segment - */ -#define NILFS_PSEG_MIN_BLOCKS 2 /* - * Minimum number of blocks in - * a partial segment - */ -#define NILFS_MIN_NRSVSEGS 8 /* - * Minimum number of reserved - * segments - */ - -/* - * We call DAT, cpfile, and sufile root metadata files. Inodes of - * these files are written in super root block instead of ifile, and - * garbage collector doesn't keep any past versions of these files. - */ -#define NILFS_ROOT_METADATA_FILE(ino) \ - ((ino) >= NILFS_DAT_INO && (ino) <= NILFS_SUFILE_INO) - -/* - * bytes offset of secondary super block - */ -#define NILFS_SB2_OFFSET_BYTES(devsize) ((((devsize) >> 12) - 1) << 12) - -/* - * Maximal count of links to a file - */ -#define NILFS_LINK_MAX 32000 - -/* - * Structure of a directory entry - * (Same as ext2) - */ - -#define NILFS_NAME_LEN 255 - -/* - * Block size limitations - */ -#define NILFS_MIN_BLOCK_SIZE 1024 -#define NILFS_MAX_BLOCK_SIZE 65536 - -/* - * The new version of the directory entry. Since V0 structures are - * stored in intel byte order, and the name_len field could never be - * bigger than 255 chars, it's safe to reclaim the extra byte for the - * file_type field. - */ -struct nilfs_dir_entry { - __le64 inode; /* Inode number */ - __le16 rec_len; /* Directory entry length */ - __u8 name_len; /* Name length */ - __u8 file_type; /* Dir entry type (file, dir, etc) */ - char name[NILFS_NAME_LEN]; /* File name */ - char pad; -}; - -/* - * NILFS directory file types. Only the low 3 bits are used. The - * other bits are reserved for now. - */ -enum { - NILFS_FT_UNKNOWN, - NILFS_FT_REG_FILE, - NILFS_FT_DIR, - NILFS_FT_CHRDEV, - NILFS_FT_BLKDEV, - NILFS_FT_FIFO, - NILFS_FT_SOCK, - NILFS_FT_SYMLINK, - NILFS_FT_MAX -}; - -/* - * NILFS_DIR_PAD defines the directory entries boundaries - * - * NOTE: It must be a multiple of 8 - */ -#define NILFS_DIR_PAD 8 -#define NILFS_DIR_ROUND (NILFS_DIR_PAD - 1) -#define NILFS_DIR_REC_LEN(name_len) (((name_len) + 12 + NILFS_DIR_ROUND) & \ - ~NILFS_DIR_ROUND) -#define NILFS_MAX_REC_LEN ((1<<16)-1) - -static inline unsigned int nilfs_rec_len_from_disk(__le16 dlen) -{ - unsigned int len = le16_to_cpu(dlen); - -#if !defined(__KERNEL__) || (PAGE_SIZE >= 65536) - if (len == NILFS_MAX_REC_LEN) - return 1 << 16; -#endif - return len; -} - -static inline __le16 nilfs_rec_len_to_disk(unsigned int len) -{ -#if !defined(__KERNEL__) || (PAGE_SIZE >= 65536) - if (len == (1 << 16)) - return cpu_to_le16(NILFS_MAX_REC_LEN); - else if (len > (1 << 16)) - BUG(); -#endif - return cpu_to_le16(len); -} - -/** - * struct nilfs_finfo - file information - * @fi_ino: inode number - * @fi_cno: checkpoint number - * @fi_nblocks: number of blocks (including intermediate blocks) - * @fi_ndatablk: number of file data blocks - */ -struct nilfs_finfo { - __le64 fi_ino; - __le64 fi_cno; - __le32 fi_nblocks; - __le32 fi_ndatablk; - /* array of virtual block numbers */ -}; - -/** - * struct nilfs_binfo_v - information for the block to which a virtual block number is assigned - * @bi_vblocknr: virtual block number - * @bi_blkoff: block offset - */ -struct nilfs_binfo_v { - __le64 bi_vblocknr; - __le64 bi_blkoff; -}; - -/** - * struct nilfs_binfo_dat - information for the block which belongs to the DAT file - * @bi_blkoff: block offset - * @bi_level: level - * @bi_pad: padding - */ -struct nilfs_binfo_dat { - __le64 bi_blkoff; - __u8 bi_level; - __u8 bi_pad[7]; -}; - -/** - * union nilfs_binfo: block information - * @bi_v: nilfs_binfo_v structure - * @bi_dat: nilfs_binfo_dat structure - */ -union nilfs_binfo { - struct nilfs_binfo_v bi_v; - struct nilfs_binfo_dat bi_dat; -}; - -/** - * struct nilfs_segment_summary - segment summary header - * @ss_datasum: checksum of data - * @ss_sumsum: checksum of segment summary - * @ss_magic: magic number - * @ss_bytes: size of this structure in bytes - * @ss_flags: flags - * @ss_seq: sequence number - * @ss_create: creation timestamp - * @ss_next: next segment - * @ss_nblocks: number of blocks - * @ss_nfinfo: number of finfo structures - * @ss_sumbytes: total size of segment summary in bytes - * @ss_pad: padding - * @ss_cno: checkpoint number - */ -struct nilfs_segment_summary { - __le32 ss_datasum; - __le32 ss_sumsum; - __le32 ss_magic; - __le16 ss_bytes; - __le16 ss_flags; - __le64 ss_seq; - __le64 ss_create; - __le64 ss_next; - __le32 ss_nblocks; - __le32 ss_nfinfo; - __le32 ss_sumbytes; - __le32 ss_pad; - __le64 ss_cno; - /* array of finfo structures */ -}; - -#define NILFS_SEGSUM_MAGIC 0x1eaffa11 /* segment summary magic number */ - -/* - * Segment summary flags - */ -#define NILFS_SS_LOGBGN 0x0001 /* begins a logical segment */ -#define NILFS_SS_LOGEND 0x0002 /* ends a logical segment */ -#define NILFS_SS_SR 0x0004 /* has super root */ -#define NILFS_SS_SYNDT 0x0008 /* includes data only updates */ -#define NILFS_SS_GC 0x0010 /* segment written for cleaner operation */ - -/** - * struct nilfs_btree_node - B-tree node - * @bn_flags: flags - * @bn_level: level - * @bn_nchildren: number of children - * @bn_pad: padding - */ -struct nilfs_btree_node { - __u8 bn_flags; - __u8 bn_level; - __le16 bn_nchildren; - __le32 bn_pad; -}; - -/* flags */ -#define NILFS_BTREE_NODE_ROOT 0x01 - -/* level */ -#define NILFS_BTREE_LEVEL_DATA 0 -#define NILFS_BTREE_LEVEL_NODE_MIN (NILFS_BTREE_LEVEL_DATA + 1) -#define NILFS_BTREE_LEVEL_MAX 14 /* Max level (exclusive) */ - -/** - * struct nilfs_palloc_group_desc - block group descriptor - * @pg_nfrees: number of free entries in block group - */ -struct nilfs_palloc_group_desc { - __le32 pg_nfrees; -}; - -/** - * struct nilfs_dat_entry - disk address translation entry - * @de_blocknr: block number - * @de_start: start checkpoint number - * @de_end: end checkpoint number - * @de_rsv: reserved for future use - */ -struct nilfs_dat_entry { - __le64 de_blocknr; - __le64 de_start; - __le64 de_end; - __le64 de_rsv; -}; - -#define NILFS_MIN_DAT_ENTRY_SIZE 32 - -/** - * struct nilfs_snapshot_list - snapshot list - * @ssl_next: next checkpoint number on snapshot list - * @ssl_prev: previous checkpoint number on snapshot list - */ -struct nilfs_snapshot_list { - __le64 ssl_next; - __le64 ssl_prev; -}; - -/** - * struct nilfs_checkpoint - checkpoint structure - * @cp_flags: flags - * @cp_checkpoints_count: checkpoints count in a block - * @cp_snapshot_list: snapshot list - * @cp_cno: checkpoint number - * @cp_create: creation timestamp - * @cp_nblk_inc: number of blocks incremented by this checkpoint - * @cp_inodes_count: inodes count - * @cp_blocks_count: blocks count - * @cp_ifile_inode: inode of ifile - */ -struct nilfs_checkpoint { - __le32 cp_flags; - __le32 cp_checkpoints_count; - struct nilfs_snapshot_list cp_snapshot_list; - __le64 cp_cno; - __le64 cp_create; - __le64 cp_nblk_inc; - __le64 cp_inodes_count; - __le64 cp_blocks_count; - - /* - * Do not change the byte offset of ifile inode. - * To keep the compatibility of the disk format, - * additional fields should be added behind cp_ifile_inode. - */ - struct nilfs_inode cp_ifile_inode; -}; - -#define NILFS_MIN_CHECKPOINT_SIZE (64 + NILFS_MIN_INODE_SIZE) - -/* checkpoint flags */ -enum { - NILFS_CHECKPOINT_SNAPSHOT, - NILFS_CHECKPOINT_INVALID, - NILFS_CHECKPOINT_SKETCH, - NILFS_CHECKPOINT_MINOR, -}; - -#define NILFS_CHECKPOINT_FNS(flag, name) \ -static inline void \ -nilfs_checkpoint_set_##name(struct nilfs_checkpoint *cp) \ -{ \ - cp->cp_flags = cpu_to_le32(le32_to_cpu(cp->cp_flags) | \ - (1UL << NILFS_CHECKPOINT_##flag)); \ -} \ -static inline void \ -nilfs_checkpoint_clear_##name(struct nilfs_checkpoint *cp) \ -{ \ - cp->cp_flags = cpu_to_le32(le32_to_cpu(cp->cp_flags) & \ - ~(1UL << NILFS_CHECKPOINT_##flag)); \ -} \ -static inline int \ -nilfs_checkpoint_##name(const struct nilfs_checkpoint *cp) \ -{ \ - return !!(le32_to_cpu(cp->cp_flags) & \ - (1UL << NILFS_CHECKPOINT_##flag)); \ -} - -NILFS_CHECKPOINT_FNS(SNAPSHOT, snapshot) -NILFS_CHECKPOINT_FNS(INVALID, invalid) -NILFS_CHECKPOINT_FNS(MINOR, minor) - -/** - * struct nilfs_cpinfo - checkpoint information - * @ci_flags: flags - * @ci_pad: padding - * @ci_cno: checkpoint number - * @ci_create: creation timestamp - * @ci_nblk_inc: number of blocks incremented by this checkpoint - * @ci_inodes_count: inodes count - * @ci_blocks_count: blocks count - * @ci_next: next checkpoint number in snapshot list - */ -struct nilfs_cpinfo { - __u32 ci_flags; - __u32 ci_pad; - __u64 ci_cno; - __u64 ci_create; - __u64 ci_nblk_inc; - __u64 ci_inodes_count; - __u64 ci_blocks_count; - __u64 ci_next; -}; - -#define NILFS_CPINFO_FNS(flag, name) \ -static inline int \ -nilfs_cpinfo_##name(const struct nilfs_cpinfo *cpinfo) \ -{ \ - return !!(cpinfo->ci_flags & (1UL << NILFS_CHECKPOINT_##flag)); \ -} - -NILFS_CPINFO_FNS(SNAPSHOT, snapshot) -NILFS_CPINFO_FNS(INVALID, invalid) -NILFS_CPINFO_FNS(MINOR, minor) - - -/** - * struct nilfs_cpfile_header - checkpoint file header - * @ch_ncheckpoints: number of checkpoints - * @ch_nsnapshots: number of snapshots - * @ch_snapshot_list: snapshot list - */ -struct nilfs_cpfile_header { - __le64 ch_ncheckpoints; - __le64 ch_nsnapshots; - struct nilfs_snapshot_list ch_snapshot_list; -}; - -#define NILFS_CPFILE_FIRST_CHECKPOINT_OFFSET \ - ((sizeof(struct nilfs_cpfile_header) + \ - sizeof(struct nilfs_checkpoint) - 1) / \ - sizeof(struct nilfs_checkpoint)) - -/** - * struct nilfs_segment_usage - segment usage - * @su_lastmod: last modified timestamp - * @su_nblocks: number of blocks in segment - * @su_flags: flags - */ -struct nilfs_segment_usage { - __le64 su_lastmod; - __le32 su_nblocks; - __le32 su_flags; -}; - -#define NILFS_MIN_SEGMENT_USAGE_SIZE 16 - -/* segment usage flag */ -enum { - NILFS_SEGMENT_USAGE_ACTIVE, - NILFS_SEGMENT_USAGE_DIRTY, - NILFS_SEGMENT_USAGE_ERROR, - - /* ... */ -}; - -#define NILFS_SEGMENT_USAGE_FNS(flag, name) \ -static inline void \ -nilfs_segment_usage_set_##name(struct nilfs_segment_usage *su) \ -{ \ - su->su_flags = cpu_to_le32(le32_to_cpu(su->su_flags) | \ - (1UL << NILFS_SEGMENT_USAGE_##flag));\ -} \ -static inline void \ -nilfs_segment_usage_clear_##name(struct nilfs_segment_usage *su) \ -{ \ - su->su_flags = \ - cpu_to_le32(le32_to_cpu(su->su_flags) & \ - ~(1UL << NILFS_SEGMENT_USAGE_##flag)); \ -} \ -static inline int \ -nilfs_segment_usage_##name(const struct nilfs_segment_usage *su) \ -{ \ - return !!(le32_to_cpu(su->su_flags) & \ - (1UL << NILFS_SEGMENT_USAGE_##flag)); \ -} - -NILFS_SEGMENT_USAGE_FNS(ACTIVE, active) -NILFS_SEGMENT_USAGE_FNS(DIRTY, dirty) -NILFS_SEGMENT_USAGE_FNS(ERROR, error) - -static inline void -nilfs_segment_usage_set_clean(struct nilfs_segment_usage *su) -{ - su->su_lastmod = cpu_to_le64(0); - su->su_nblocks = cpu_to_le32(0); - su->su_flags = cpu_to_le32(0); -} - -static inline int -nilfs_segment_usage_clean(const struct nilfs_segment_usage *su) -{ - return !le32_to_cpu(su->su_flags); -} - -/** - * struct nilfs_sufile_header - segment usage file header - * @sh_ncleansegs: number of clean segments - * @sh_ndirtysegs: number of dirty segments - * @sh_last_alloc: last allocated segment number - */ -struct nilfs_sufile_header { - __le64 sh_ncleansegs; - __le64 sh_ndirtysegs; - __le64 sh_last_alloc; - /* ... */ -}; - -#define NILFS_SUFILE_FIRST_SEGMENT_USAGE_OFFSET \ - ((sizeof(struct nilfs_sufile_header) + \ - sizeof(struct nilfs_segment_usage) - 1) / \ - sizeof(struct nilfs_segment_usage)) - -/** - * nilfs_suinfo - segment usage information - * @sui_lastmod: timestamp of last modification - * @sui_nblocks: number of written blocks in segment - * @sui_flags: segment usage flags - */ -struct nilfs_suinfo { - __u64 sui_lastmod; - __u32 sui_nblocks; - __u32 sui_flags; -}; - -#define NILFS_SUINFO_FNS(flag, name) \ -static inline int \ -nilfs_suinfo_##name(const struct nilfs_suinfo *si) \ -{ \ - return si->sui_flags & (1UL << NILFS_SEGMENT_USAGE_##flag); \ -} - -NILFS_SUINFO_FNS(ACTIVE, active) -NILFS_SUINFO_FNS(DIRTY, dirty) -NILFS_SUINFO_FNS(ERROR, error) - -static inline int nilfs_suinfo_clean(const struct nilfs_suinfo *si) -{ - return !si->sui_flags; -} - -/* ioctl */ -/** - * nilfs_suinfo_update - segment usage information update - * @sup_segnum: segment number - * @sup_flags: flags for which fields are active in sup_sui - * @sup_reserved: reserved necessary for alignment - * @sup_sui: segment usage information - */ -struct nilfs_suinfo_update { - __u64 sup_segnum; - __u32 sup_flags; - __u32 sup_reserved; - struct nilfs_suinfo sup_sui; -}; - -enum { - NILFS_SUINFO_UPDATE_LASTMOD, - NILFS_SUINFO_UPDATE_NBLOCKS, - NILFS_SUINFO_UPDATE_FLAGS, - __NR_NILFS_SUINFO_UPDATE_FIELDS, -}; - -#define NILFS_SUINFO_UPDATE_FNS(flag, name) \ -static inline void \ -nilfs_suinfo_update_set_##name(struct nilfs_suinfo_update *sup) \ -{ \ - sup->sup_flags |= 1UL << NILFS_SUINFO_UPDATE_##flag; \ -} \ -static inline void \ -nilfs_suinfo_update_clear_##name(struct nilfs_suinfo_update *sup) \ -{ \ - sup->sup_flags &= ~(1UL << NILFS_SUINFO_UPDATE_##flag); \ -} \ -static inline int \ -nilfs_suinfo_update_##name(const struct nilfs_suinfo_update *sup) \ -{ \ - return !!(sup->sup_flags & (1UL << NILFS_SUINFO_UPDATE_##flag));\ -} - -NILFS_SUINFO_UPDATE_FNS(LASTMOD, lastmod) -NILFS_SUINFO_UPDATE_FNS(NBLOCKS, nblocks) -NILFS_SUINFO_UPDATE_FNS(FLAGS, flags) - -enum { - NILFS_CHECKPOINT, - NILFS_SNAPSHOT, -}; - -/** - * struct nilfs_cpmode - change checkpoint mode structure - * @cm_cno: checkpoint number - * @cm_mode: mode of checkpoint - * @cm_pad: padding - */ -struct nilfs_cpmode { - __u64 cm_cno; - __u32 cm_mode; - __u32 cm_pad; -}; - -/** - * struct nilfs_argv - argument vector - * @v_base: pointer on data array from userspace - * @v_nmembs: number of members in data array - * @v_size: size of data array in bytes - * @v_flags: flags - * @v_index: start number of target data items - */ -struct nilfs_argv { - __u64 v_base; - __u32 v_nmembs; /* number of members */ - __u16 v_size; /* size of members */ - __u16 v_flags; - __u64 v_index; -}; - -/** - * struct nilfs_period - period of checkpoint numbers - * @p_start: start checkpoint number (inclusive) - * @p_end: end checkpoint number (exclusive) - */ -struct nilfs_period { - __u64 p_start; - __u64 p_end; -}; - -/** - * struct nilfs_cpstat - checkpoint statistics - * @cs_cno: checkpoint number - * @cs_ncps: number of checkpoints - * @cs_nsss: number of snapshots - */ -struct nilfs_cpstat { - __u64 cs_cno; - __u64 cs_ncps; - __u64 cs_nsss; -}; - -/** - * struct nilfs_sustat - segment usage statistics - * @ss_nsegs: number of segments - * @ss_ncleansegs: number of clean segments - * @ss_ndirtysegs: number of dirty segments - * @ss_ctime: creation time of the last segment - * @ss_nongc_ctime: creation time of the last segment not for GC - * @ss_prot_seq: least sequence number of segments which must not be reclaimed - */ -struct nilfs_sustat { - __u64 ss_nsegs; - __u64 ss_ncleansegs; - __u64 ss_ndirtysegs; - __u64 ss_ctime; - __u64 ss_nongc_ctime; - __u64 ss_prot_seq; -}; - -/** - * struct nilfs_vinfo - virtual block number information - * @vi_vblocknr: virtual block number - * @vi_start: start checkpoint number (inclusive) - * @vi_end: end checkpoint number (exclusive) - * @vi_blocknr: disk block number - */ -struct nilfs_vinfo { - __u64 vi_vblocknr; - __u64 vi_start; - __u64 vi_end; - __u64 vi_blocknr; -}; - -/** - * struct nilfs_vdesc - descriptor of virtual block number - * @vd_ino: inode number - * @vd_cno: checkpoint number - * @vd_vblocknr: virtual block number - * @vd_period: period of checkpoint numbers - * @vd_blocknr: disk block number - * @vd_offset: logical block offset inside a file - * @vd_flags: flags (data or node block) - * @vd_pad: padding - */ -struct nilfs_vdesc { - __u64 vd_ino; - __u64 vd_cno; - __u64 vd_vblocknr; - struct nilfs_period vd_period; - __u64 vd_blocknr; - __u64 vd_offset; - __u32 vd_flags; - __u32 vd_pad; -}; - -/** - * struct nilfs_bdesc - descriptor of disk block number - * @bd_ino: inode number - * @bd_oblocknr: disk block address (for skipping dead blocks) - * @bd_blocknr: disk block address - * @bd_offset: logical block offset inside a file - * @bd_level: level in the b-tree organization - * @bd_pad: padding - */ -struct nilfs_bdesc { - __u64 bd_ino; - __u64 bd_oblocknr; - __u64 bd_blocknr; - __u64 bd_offset; - __u32 bd_level; - __u32 bd_pad; -}; - -#define NILFS_IOCTL_IDENT 'n' - -#define NILFS_IOCTL_CHANGE_CPMODE \ - _IOW(NILFS_IOCTL_IDENT, 0x80, struct nilfs_cpmode) -#define NILFS_IOCTL_DELETE_CHECKPOINT \ - _IOW(NILFS_IOCTL_IDENT, 0x81, __u64) -#define NILFS_IOCTL_GET_CPINFO \ - _IOR(NILFS_IOCTL_IDENT, 0x82, struct nilfs_argv) -#define NILFS_IOCTL_GET_CPSTAT \ - _IOR(NILFS_IOCTL_IDENT, 0x83, struct nilfs_cpstat) -#define NILFS_IOCTL_GET_SUINFO \ - _IOR(NILFS_IOCTL_IDENT, 0x84, struct nilfs_argv) -#define NILFS_IOCTL_GET_SUSTAT \ - _IOR(NILFS_IOCTL_IDENT, 0x85, struct nilfs_sustat) -#define NILFS_IOCTL_GET_VINFO \ - _IOWR(NILFS_IOCTL_IDENT, 0x86, struct nilfs_argv) -#define NILFS_IOCTL_GET_BDESCS \ - _IOWR(NILFS_IOCTL_IDENT, 0x87, struct nilfs_argv) -#define NILFS_IOCTL_CLEAN_SEGMENTS \ - _IOW(NILFS_IOCTL_IDENT, 0x88, struct nilfs_argv[5]) -#define NILFS_IOCTL_SYNC \ - _IOR(NILFS_IOCTL_IDENT, 0x8A, __u64) -#define NILFS_IOCTL_RESIZE \ - _IOW(NILFS_IOCTL_IDENT, 0x8B, __u64) -#define NILFS_IOCTL_SET_ALLOC_RANGE \ - _IOW(NILFS_IOCTL_IDENT, 0x8C, __u64[2]) -#define NILFS_IOCTL_SET_SUINFO \ - _IOW(NILFS_IOCTL_IDENT, 0x8D, struct nilfs_argv) - -#endif /* _LINUX_NILFS_FS_H */ diff --git a/include/uapi/linux/nilfs2_api.h b/include/uapi/linux/nilfs2_api.h new file mode 100644 index 000000000000..ef4c1de89b11 --- /dev/null +++ b/include/uapi/linux/nilfs2_api.h @@ -0,0 +1,292 @@ +/* + * nilfs2_api.h - NILFS2 user space API + * + * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 2.1 of the License, or + * (at your option) any later version. + */ + +#ifndef _LINUX_NILFS2_API_H +#define _LINUX_NILFS2_API_H + +#include +#include + +/** + * struct nilfs_cpinfo - checkpoint information + * @ci_flags: flags + * @ci_pad: padding + * @ci_cno: checkpoint number + * @ci_create: creation timestamp + * @ci_nblk_inc: number of blocks incremented by this checkpoint + * @ci_inodes_count: inodes count + * @ci_blocks_count: blocks count + * @ci_next: next checkpoint number in snapshot list + */ +struct nilfs_cpinfo { + __u32 ci_flags; + __u32 ci_pad; + __u64 ci_cno; + __u64 ci_create; + __u64 ci_nblk_inc; + __u64 ci_inodes_count; + __u64 ci_blocks_count; + __u64 ci_next; +}; + +/* checkpoint flags */ +enum { + NILFS_CPINFO_SNAPSHOT, + NILFS_CPINFO_INVALID, + NILFS_CPINFO_SKETCH, + NILFS_CPINFO_MINOR, +}; + +#define NILFS_CPINFO_FNS(flag, name) \ +static inline int \ +nilfs_cpinfo_##name(const struct nilfs_cpinfo *cpinfo) \ +{ \ + return !!(cpinfo->ci_flags & (1UL << NILFS_CPINFO_##flag)); \ +} + +NILFS_CPINFO_FNS(SNAPSHOT, snapshot) +NILFS_CPINFO_FNS(INVALID, invalid) +NILFS_CPINFO_FNS(MINOR, minor) + +/** + * nilfs_suinfo - segment usage information + * @sui_lastmod: timestamp of last modification + * @sui_nblocks: number of written blocks in segment + * @sui_flags: segment usage flags + */ +struct nilfs_suinfo { + __u64 sui_lastmod; + __u32 sui_nblocks; + __u32 sui_flags; +}; + +/* segment usage flags */ +enum { + NILFS_SUINFO_ACTIVE, + NILFS_SUINFO_DIRTY, + NILFS_SUINFO_ERROR, +}; + +#define NILFS_SUINFO_FNS(flag, name) \ +static inline int \ +nilfs_suinfo_##name(const struct nilfs_suinfo *si) \ +{ \ + return si->sui_flags & (1UL << NILFS_SUINFO_##flag); \ +} + +NILFS_SUINFO_FNS(ACTIVE, active) +NILFS_SUINFO_FNS(DIRTY, dirty) +NILFS_SUINFO_FNS(ERROR, error) + +static inline int nilfs_suinfo_clean(const struct nilfs_suinfo *si) +{ + return !si->sui_flags; +} + +/** + * nilfs_suinfo_update - segment usage information update + * @sup_segnum: segment number + * @sup_flags: flags for which fields are active in sup_sui + * @sup_reserved: reserved necessary for alignment + * @sup_sui: segment usage information + */ +struct nilfs_suinfo_update { + __u64 sup_segnum; + __u32 sup_flags; + __u32 sup_reserved; + struct nilfs_suinfo sup_sui; +}; + +enum { + NILFS_SUINFO_UPDATE_LASTMOD, + NILFS_SUINFO_UPDATE_NBLOCKS, + NILFS_SUINFO_UPDATE_FLAGS, + __NR_NILFS_SUINFO_UPDATE_FIELDS, +}; + +#define NILFS_SUINFO_UPDATE_FNS(flag, name) \ +static inline void \ +nilfs_suinfo_update_set_##name(struct nilfs_suinfo_update *sup) \ +{ \ + sup->sup_flags |= 1UL << NILFS_SUINFO_UPDATE_##flag; \ +} \ +static inline void \ +nilfs_suinfo_update_clear_##name(struct nilfs_suinfo_update *sup) \ +{ \ + sup->sup_flags &= ~(1UL << NILFS_SUINFO_UPDATE_##flag); \ +} \ +static inline int \ +nilfs_suinfo_update_##name(const struct nilfs_suinfo_update *sup) \ +{ \ + return !!(sup->sup_flags & (1UL << NILFS_SUINFO_UPDATE_##flag));\ +} + +NILFS_SUINFO_UPDATE_FNS(LASTMOD, lastmod) +NILFS_SUINFO_UPDATE_FNS(NBLOCKS, nblocks) +NILFS_SUINFO_UPDATE_FNS(FLAGS, flags) + +enum { + NILFS_CHECKPOINT, + NILFS_SNAPSHOT, +}; + +/** + * struct nilfs_cpmode - change checkpoint mode structure + * @cm_cno: checkpoint number + * @cm_mode: mode of checkpoint + * @cm_pad: padding + */ +struct nilfs_cpmode { + __u64 cm_cno; + __u32 cm_mode; + __u32 cm_pad; +}; + +/** + * struct nilfs_argv - argument vector + * @v_base: pointer on data array from userspace + * @v_nmembs: number of members in data array + * @v_size: size of data array in bytes + * @v_flags: flags + * @v_index: start number of target data items + */ +struct nilfs_argv { + __u64 v_base; + __u32 v_nmembs; /* number of members */ + __u16 v_size; /* size of members */ + __u16 v_flags; + __u64 v_index; +}; + +/** + * struct nilfs_period - period of checkpoint numbers + * @p_start: start checkpoint number (inclusive) + * @p_end: end checkpoint number (exclusive) + */ +struct nilfs_period { + __u64 p_start; + __u64 p_end; +}; + +/** + * struct nilfs_cpstat - checkpoint statistics + * @cs_cno: checkpoint number + * @cs_ncps: number of checkpoints + * @cs_nsss: number of snapshots + */ +struct nilfs_cpstat { + __u64 cs_cno; + __u64 cs_ncps; + __u64 cs_nsss; +}; + +/** + * struct nilfs_sustat - segment usage statistics + * @ss_nsegs: number of segments + * @ss_ncleansegs: number of clean segments + * @ss_ndirtysegs: number of dirty segments + * @ss_ctime: creation time of the last segment + * @ss_nongc_ctime: creation time of the last segment not for GC + * @ss_prot_seq: least sequence number of segments which must not be reclaimed + */ +struct nilfs_sustat { + __u64 ss_nsegs; + __u64 ss_ncleansegs; + __u64 ss_ndirtysegs; + __u64 ss_ctime; + __u64 ss_nongc_ctime; + __u64 ss_prot_seq; +}; + +/** + * struct nilfs_vinfo - virtual block number information + * @vi_vblocknr: virtual block number + * @vi_start: start checkpoint number (inclusive) + * @vi_end: end checkpoint number (exclusive) + * @vi_blocknr: disk block number + */ +struct nilfs_vinfo { + __u64 vi_vblocknr; + __u64 vi_start; + __u64 vi_end; + __u64 vi_blocknr; +}; + +/** + * struct nilfs_vdesc - descriptor of virtual block number + * @vd_ino: inode number + * @vd_cno: checkpoint number + * @vd_vblocknr: virtual block number + * @vd_period: period of checkpoint numbers + * @vd_blocknr: disk block number + * @vd_offset: logical block offset inside a file + * @vd_flags: flags (data or node block) + * @vd_pad: padding + */ +struct nilfs_vdesc { + __u64 vd_ino; + __u64 vd_cno; + __u64 vd_vblocknr; + struct nilfs_period vd_period; + __u64 vd_blocknr; + __u64 vd_offset; + __u32 vd_flags; + __u32 vd_pad; +}; + +/** + * struct nilfs_bdesc - descriptor of disk block number + * @bd_ino: inode number + * @bd_oblocknr: disk block address (for skipping dead blocks) + * @bd_blocknr: disk block address + * @bd_offset: logical block offset inside a file + * @bd_level: level in the b-tree organization + * @bd_pad: padding + */ +struct nilfs_bdesc { + __u64 bd_ino; + __u64 bd_oblocknr; + __u64 bd_blocknr; + __u64 bd_offset; + __u32 bd_level; + __u32 bd_pad; +}; + +#define NILFS_IOCTL_IDENT 'n' + +#define NILFS_IOCTL_CHANGE_CPMODE \ + _IOW(NILFS_IOCTL_IDENT, 0x80, struct nilfs_cpmode) +#define NILFS_IOCTL_DELETE_CHECKPOINT \ + _IOW(NILFS_IOCTL_IDENT, 0x81, __u64) +#define NILFS_IOCTL_GET_CPINFO \ + _IOR(NILFS_IOCTL_IDENT, 0x82, struct nilfs_argv) +#define NILFS_IOCTL_GET_CPSTAT \ + _IOR(NILFS_IOCTL_IDENT, 0x83, struct nilfs_cpstat) +#define NILFS_IOCTL_GET_SUINFO \ + _IOR(NILFS_IOCTL_IDENT, 0x84, struct nilfs_argv) +#define NILFS_IOCTL_GET_SUSTAT \ + _IOR(NILFS_IOCTL_IDENT, 0x85, struct nilfs_sustat) +#define NILFS_IOCTL_GET_VINFO \ + _IOWR(NILFS_IOCTL_IDENT, 0x86, struct nilfs_argv) +#define NILFS_IOCTL_GET_BDESCS \ + _IOWR(NILFS_IOCTL_IDENT, 0x87, struct nilfs_argv) +#define NILFS_IOCTL_CLEAN_SEGMENTS \ + _IOW(NILFS_IOCTL_IDENT, 0x88, struct nilfs_argv[5]) +#define NILFS_IOCTL_SYNC \ + _IOR(NILFS_IOCTL_IDENT, 0x8A, __u64) +#define NILFS_IOCTL_RESIZE \ + _IOW(NILFS_IOCTL_IDENT, 0x8B, __u64) +#define NILFS_IOCTL_SET_ALLOC_RANGE \ + _IOW(NILFS_IOCTL_IDENT, 0x8C, __u64[2]) +#define NILFS_IOCTL_SET_SUINFO \ + _IOW(NILFS_IOCTL_IDENT, 0x8D, struct nilfs_argv) + +#endif /* _LINUX_NILFS2_API_H */ diff --git a/include/uapi/linux/nilfs2_ondisk.h b/include/uapi/linux/nilfs2_ondisk.h new file mode 100644 index 000000000000..2a8a3addb675 --- /dev/null +++ b/include/uapi/linux/nilfs2_ondisk.h @@ -0,0 +1,650 @@ +/* + * nilfs2_ondisk.h - NILFS2 on-disk structures + * + * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 2.1 of the License, or + * (at your option) any later version. + */ +/* + * linux/include/linux/ext2_fs.h + * + * Copyright (C) 1992, 1993, 1994, 1995 + * Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + * + * from + * + * linux/include/linux/minix_fs.h + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +#ifndef _LINUX_NILFS2_ONDISK_H +#define _LINUX_NILFS2_ONDISK_H + +#include +#include + + +#define NILFS_INODE_BMAP_SIZE 7 + +/** + * struct nilfs_inode - structure of an inode on disk + * @i_blocks: blocks count + * @i_size: size in bytes + * @i_ctime: creation time (seconds) + * @i_mtime: modification time (seconds) + * @i_ctime_nsec: creation time (nano seconds) + * @i_mtime_nsec: modification time (nano seconds) + * @i_uid: user id + * @i_gid: group id + * @i_mode: file mode + * @i_links_count: links count + * @i_flags: file flags + * @i_bmap: block mapping + * @i_xattr: extended attributes + * @i_generation: file generation (for NFS) + * @i_pad: padding + */ +struct nilfs_inode { + __le64 i_blocks; + __le64 i_size; + __le64 i_ctime; + __le64 i_mtime; + __le32 i_ctime_nsec; + __le32 i_mtime_nsec; + __le32 i_uid; + __le32 i_gid; + __le16 i_mode; + __le16 i_links_count; + __le32 i_flags; + __le64 i_bmap[NILFS_INODE_BMAP_SIZE]; +#define i_device_code i_bmap[0] + __le64 i_xattr; + __le32 i_generation; + __le32 i_pad; +}; + +#define NILFS_MIN_INODE_SIZE 128 + +/** + * struct nilfs_super_root - structure of super root + * @sr_sum: check sum + * @sr_bytes: byte count of the structure + * @sr_flags: flags (reserved) + * @sr_nongc_ctime: write time of the last segment not for cleaner operation + * @sr_dat: DAT file inode + * @sr_cpfile: checkpoint file inode + * @sr_sufile: segment usage file inode + */ +struct nilfs_super_root { + __le32 sr_sum; + __le16 sr_bytes; + __le16 sr_flags; + __le64 sr_nongc_ctime; + struct nilfs_inode sr_dat; + struct nilfs_inode sr_cpfile; + struct nilfs_inode sr_sufile; +}; + +#define NILFS_SR_MDT_OFFSET(inode_size, i) \ + ((unsigned long)&((struct nilfs_super_root *)0)->sr_dat + \ + (inode_size) * (i)) +#define NILFS_SR_DAT_OFFSET(inode_size) NILFS_SR_MDT_OFFSET(inode_size, 0) +#define NILFS_SR_CPFILE_OFFSET(inode_size) NILFS_SR_MDT_OFFSET(inode_size, 1) +#define NILFS_SR_SUFILE_OFFSET(inode_size) NILFS_SR_MDT_OFFSET(inode_size, 2) +#define NILFS_SR_BYTES(inode_size) NILFS_SR_MDT_OFFSET(inode_size, 3) + +/* + * Maximal mount counts + */ +#define NILFS_DFL_MAX_MNT_COUNT 50 /* 50 mounts */ + +/* + * File system states (sbp->s_state, nilfs->ns_mount_state) + */ +#define NILFS_VALID_FS 0x0001 /* Unmounted cleanly */ +#define NILFS_ERROR_FS 0x0002 /* Errors detected */ +#define NILFS_RESIZE_FS 0x0004 /* Resize required */ + +/* + * Mount flags (sbi->s_mount_opt) + */ +#define NILFS_MOUNT_ERROR_MODE 0x0070 /* Error mode mask */ +#define NILFS_MOUNT_ERRORS_CONT 0x0010 /* Continue on errors */ +#define NILFS_MOUNT_ERRORS_RO 0x0020 /* Remount fs ro on errors */ +#define NILFS_MOUNT_ERRORS_PANIC 0x0040 /* Panic on errors */ +#define NILFS_MOUNT_BARRIER 0x1000 /* Use block barriers */ +#define NILFS_MOUNT_STRICT_ORDER 0x2000 /* + * Apply strict in-order + * semantics also for data + */ +#define NILFS_MOUNT_NORECOVERY 0x4000 /* + * Disable write access during + * mount-time recovery + */ +#define NILFS_MOUNT_DISCARD 0x8000 /* Issue DISCARD requests */ + + +/** + * struct nilfs_super_block - structure of super block on disk + */ +struct nilfs_super_block { +/*00*/ __le32 s_rev_level; /* Revision level */ + __le16 s_minor_rev_level; /* minor revision level */ + __le16 s_magic; /* Magic signature */ + + __le16 s_bytes; /* + * Bytes count of CRC calculation + * for this structure. s_reserved + * is excluded. + */ + __le16 s_flags; /* flags */ + __le32 s_crc_seed; /* Seed value of CRC calculation */ +/*10*/ __le32 s_sum; /* Check sum of super block */ + + __le32 s_log_block_size; /* + * Block size represented as follows + * blocksize = + * 1 << (s_log_block_size + 10) + */ + __le64 s_nsegments; /* Number of segments in filesystem */ +/*20*/ __le64 s_dev_size; /* block device size in bytes */ + __le64 s_first_data_block; /* 1st seg disk block number */ +/*30*/ __le32 s_blocks_per_segment; /* number of blocks per full segment */ + __le32 s_r_segments_percentage; /* Reserved segments percentage */ + + __le64 s_last_cno; /* Last checkpoint number */ +/*40*/ __le64 s_last_pseg; /* disk block addr pseg written last */ + __le64 s_last_seq; /* seq. number of seg written last */ +/*50*/ __le64 s_free_blocks_count; /* Free blocks count */ + + __le64 s_ctime; /* + * Creation time (execution time of + * newfs) + */ +/*60*/ __le64 s_mtime; /* Mount time */ + __le64 s_wtime; /* Write time */ +/*70*/ __le16 s_mnt_count; /* Mount count */ + __le16 s_max_mnt_count; /* Maximal mount count */ + __le16 s_state; /* File system state */ + __le16 s_errors; /* Behaviour when detecting errors */ + __le64 s_lastcheck; /* time of last check */ + +/*80*/ __le32 s_checkinterval; /* max. time between checks */ + __le32 s_creator_os; /* OS */ + __le16 s_def_resuid; /* Default uid for reserved blocks */ + __le16 s_def_resgid; /* Default gid for reserved blocks */ + __le32 s_first_ino; /* First non-reserved inode */ + +/*90*/ __le16 s_inode_size; /* Size of an inode */ + __le16 s_dat_entry_size; /* Size of a dat entry */ + __le16 s_checkpoint_size; /* Size of a checkpoint */ + __le16 s_segment_usage_size; /* Size of a segment usage */ + +/*98*/ __u8 s_uuid[16]; /* 128-bit uuid for volume */ +/*A8*/ char s_volume_name[80]; /* volume name */ + +/*F8*/ __le32 s_c_interval; /* Commit interval of segment */ + __le32 s_c_block_max; /* + * Threshold of data amount for + * the segment construction + */ +/*100*/ __le64 s_feature_compat; /* Compatible feature set */ + __le64 s_feature_compat_ro; /* Read-only compatible feature set */ + __le64 s_feature_incompat; /* Incompatible feature set */ + __u32 s_reserved[186]; /* padding to the end of the block */ +}; + +/* + * Codes for operating systems + */ +#define NILFS_OS_LINUX 0 +/* Codes from 1 to 4 are reserved to keep compatibility with ext2 creator-OS */ + +/* + * Revision levels + */ +#define NILFS_CURRENT_REV 2 /* current major revision */ +#define NILFS_MINOR_REV 0 /* minor revision */ +#define NILFS_MIN_SUPP_REV 2 /* minimum supported revision */ + +/* + * Feature set definitions + * + * If there is a bit set in the incompatible feature set that the kernel + * doesn't know about, it should refuse to mount the filesystem. + */ +#define NILFS_FEATURE_COMPAT_RO_BLOCK_COUNT 0x00000001ULL + +#define NILFS_FEATURE_COMPAT_SUPP 0ULL +#define NILFS_FEATURE_COMPAT_RO_SUPP NILFS_FEATURE_COMPAT_RO_BLOCK_COUNT +#define NILFS_FEATURE_INCOMPAT_SUPP 0ULL + +/* + * Bytes count of super_block for CRC-calculation + */ +#define NILFS_SB_BYTES \ + ((long)&((struct nilfs_super_block *)0)->s_reserved) + +/* + * Special inode number + */ +#define NILFS_ROOT_INO 2 /* Root file inode */ +#define NILFS_DAT_INO 3 /* DAT file */ +#define NILFS_CPFILE_INO 4 /* checkpoint file */ +#define NILFS_SUFILE_INO 5 /* segment usage file */ +#define NILFS_IFILE_INO 6 /* ifile */ +#define NILFS_ATIME_INO 7 /* Atime file (reserved) */ +#define NILFS_XATTR_INO 8 /* Xattribute file (reserved) */ +#define NILFS_SKETCH_INO 10 /* Sketch file */ +#define NILFS_USER_INO 11 /* Fisrt user's file inode number */ + +#define NILFS_SB_OFFSET_BYTES 1024 /* byte offset of nilfs superblock */ + +#define NILFS_SEG_MIN_BLOCKS 16 /* + * Minimum number of blocks in + * a full segment + */ +#define NILFS_PSEG_MIN_BLOCKS 2 /* + * Minimum number of blocks in + * a partial segment + */ +#define NILFS_MIN_NRSVSEGS 8 /* + * Minimum number of reserved + * segments + */ + +/* + * We call DAT, cpfile, and sufile root metadata files. Inodes of + * these files are written in super root block instead of ifile, and + * garbage collector doesn't keep any past versions of these files. + */ +#define NILFS_ROOT_METADATA_FILE(ino) \ + ((ino) >= NILFS_DAT_INO && (ino) <= NILFS_SUFILE_INO) + +/* + * bytes offset of secondary super block + */ +#define NILFS_SB2_OFFSET_BYTES(devsize) ((((devsize) >> 12) - 1) << 12) + +/* + * Maximal count of links to a file + */ +#define NILFS_LINK_MAX 32000 + +/* + * Structure of a directory entry + * (Same as ext2) + */ + +#define NILFS_NAME_LEN 255 + +/* + * Block size limitations + */ +#define NILFS_MIN_BLOCK_SIZE 1024 +#define NILFS_MAX_BLOCK_SIZE 65536 + +/* + * The new version of the directory entry. Since V0 structures are + * stored in intel byte order, and the name_len field could never be + * bigger than 255 chars, it's safe to reclaim the extra byte for the + * file_type field. + */ +struct nilfs_dir_entry { + __le64 inode; /* Inode number */ + __le16 rec_len; /* Directory entry length */ + __u8 name_len; /* Name length */ + __u8 file_type; /* Dir entry type (file, dir, etc) */ + char name[NILFS_NAME_LEN]; /* File name */ + char pad; +}; + +/* + * NILFS directory file types. Only the low 3 bits are used. The + * other bits are reserved for now. + */ +enum { + NILFS_FT_UNKNOWN, + NILFS_FT_REG_FILE, + NILFS_FT_DIR, + NILFS_FT_CHRDEV, + NILFS_FT_BLKDEV, + NILFS_FT_FIFO, + NILFS_FT_SOCK, + NILFS_FT_SYMLINK, + NILFS_FT_MAX +}; + +/* + * NILFS_DIR_PAD defines the directory entries boundaries + * + * NOTE: It must be a multiple of 8 + */ +#define NILFS_DIR_PAD 8 +#define NILFS_DIR_ROUND (NILFS_DIR_PAD - 1) +#define NILFS_DIR_REC_LEN(name_len) (((name_len) + 12 + NILFS_DIR_ROUND) & \ + ~NILFS_DIR_ROUND) +#define NILFS_MAX_REC_LEN ((1 << 16) - 1) + +/** + * struct nilfs_finfo - file information + * @fi_ino: inode number + * @fi_cno: checkpoint number + * @fi_nblocks: number of blocks (including intermediate blocks) + * @fi_ndatablk: number of file data blocks + */ +struct nilfs_finfo { + __le64 fi_ino; + __le64 fi_cno; + __le32 fi_nblocks; + __le32 fi_ndatablk; +}; + +/** + * struct nilfs_binfo_v - information on a data block (except DAT) + * @bi_vblocknr: virtual block number + * @bi_blkoff: block offset + */ +struct nilfs_binfo_v { + __le64 bi_vblocknr; + __le64 bi_blkoff; +}; + +/** + * struct nilfs_binfo_dat - information on a DAT node block + * @bi_blkoff: block offset + * @bi_level: level + * @bi_pad: padding + */ +struct nilfs_binfo_dat { + __le64 bi_blkoff; + __u8 bi_level; + __u8 bi_pad[7]; +}; + +/** + * union nilfs_binfo: block information + * @bi_v: nilfs_binfo_v structure + * @bi_dat: nilfs_binfo_dat structure + */ +union nilfs_binfo { + struct nilfs_binfo_v bi_v; + struct nilfs_binfo_dat bi_dat; +}; + +/** + * struct nilfs_segment_summary - segment summary header + * @ss_datasum: checksum of data + * @ss_sumsum: checksum of segment summary + * @ss_magic: magic number + * @ss_bytes: size of this structure in bytes + * @ss_flags: flags + * @ss_seq: sequence number + * @ss_create: creation timestamp + * @ss_next: next segment + * @ss_nblocks: number of blocks + * @ss_nfinfo: number of finfo structures + * @ss_sumbytes: total size of segment summary in bytes + * @ss_pad: padding + * @ss_cno: checkpoint number + */ +struct nilfs_segment_summary { + __le32 ss_datasum; + __le32 ss_sumsum; + __le32 ss_magic; + __le16 ss_bytes; + __le16 ss_flags; + __le64 ss_seq; + __le64 ss_create; + __le64 ss_next; + __le32 ss_nblocks; + __le32 ss_nfinfo; + __le32 ss_sumbytes; + __le32 ss_pad; + __le64 ss_cno; + /* array of finfo structures */ +}; + +#define NILFS_SEGSUM_MAGIC 0x1eaffa11 /* segment summary magic number */ + +/* + * Segment summary flags + */ +#define NILFS_SS_LOGBGN 0x0001 /* begins a logical segment */ +#define NILFS_SS_LOGEND 0x0002 /* ends a logical segment */ +#define NILFS_SS_SR 0x0004 /* has super root */ +#define NILFS_SS_SYNDT 0x0008 /* includes data only updates */ +#define NILFS_SS_GC 0x0010 /* segment written for cleaner operation */ + +/** + * struct nilfs_btree_node - header of B-tree node block + * @bn_flags: flags + * @bn_level: level + * @bn_nchildren: number of children + * @bn_pad: padding + */ +struct nilfs_btree_node { + __u8 bn_flags; + __u8 bn_level; + __le16 bn_nchildren; + __le32 bn_pad; +}; + +/* flags */ +#define NILFS_BTREE_NODE_ROOT 0x01 + +/* level */ +#define NILFS_BTREE_LEVEL_DATA 0 +#define NILFS_BTREE_LEVEL_NODE_MIN (NILFS_BTREE_LEVEL_DATA + 1) +#define NILFS_BTREE_LEVEL_MAX 14 /* Max level (exclusive) */ + +/** + * struct nilfs_direct_node - header of built-in bmap array + * @dn_flags: flags + * @dn_pad: padding + */ +struct nilfs_direct_node { + __u8 dn_flags; + __u8 pad[7]; +}; + +/** + * struct nilfs_palloc_group_desc - block group descriptor + * @pg_nfrees: number of free entries in block group + */ +struct nilfs_palloc_group_desc { + __le32 pg_nfrees; +}; + +/** + * struct nilfs_dat_entry - disk address translation entry + * @de_blocknr: block number + * @de_start: start checkpoint number + * @de_end: end checkpoint number + * @de_rsv: reserved for future use + */ +struct nilfs_dat_entry { + __le64 de_blocknr; + __le64 de_start; + __le64 de_end; + __le64 de_rsv; +}; + +#define NILFS_MIN_DAT_ENTRY_SIZE 32 + +/** + * struct nilfs_snapshot_list - snapshot list + * @ssl_next: next checkpoint number on snapshot list + * @ssl_prev: previous checkpoint number on snapshot list + */ +struct nilfs_snapshot_list { + __le64 ssl_next; + __le64 ssl_prev; +}; + +/** + * struct nilfs_checkpoint - checkpoint structure + * @cp_flags: flags + * @cp_checkpoints_count: checkpoints count in a block + * @cp_snapshot_list: snapshot list + * @cp_cno: checkpoint number + * @cp_create: creation timestamp + * @cp_nblk_inc: number of blocks incremented by this checkpoint + * @cp_inodes_count: inodes count + * @cp_blocks_count: blocks count + * @cp_ifile_inode: inode of ifile + */ +struct nilfs_checkpoint { + __le32 cp_flags; + __le32 cp_checkpoints_count; + struct nilfs_snapshot_list cp_snapshot_list; + __le64 cp_cno; + __le64 cp_create; + __le64 cp_nblk_inc; + __le64 cp_inodes_count; + __le64 cp_blocks_count; + + /* + * Do not change the byte offset of ifile inode. + * To keep the compatibility of the disk format, + * additional fields should be added behind cp_ifile_inode. + */ + struct nilfs_inode cp_ifile_inode; +}; + +#define NILFS_MIN_CHECKPOINT_SIZE (64 + NILFS_MIN_INODE_SIZE) + +/* checkpoint flags */ +enum { + NILFS_CHECKPOINT_SNAPSHOT, + NILFS_CHECKPOINT_INVALID, + NILFS_CHECKPOINT_SKETCH, + NILFS_CHECKPOINT_MINOR, +}; + +#define NILFS_CHECKPOINT_FNS(flag, name) \ +static inline void \ +nilfs_checkpoint_set_##name(struct nilfs_checkpoint *cp) \ +{ \ + cp->cp_flags = cpu_to_le32(le32_to_cpu(cp->cp_flags) | \ + (1UL << NILFS_CHECKPOINT_##flag)); \ +} \ +static inline void \ +nilfs_checkpoint_clear_##name(struct nilfs_checkpoint *cp) \ +{ \ + cp->cp_flags = cpu_to_le32(le32_to_cpu(cp->cp_flags) & \ + ~(1UL << NILFS_CHECKPOINT_##flag)); \ +} \ +static inline int \ +nilfs_checkpoint_##name(const struct nilfs_checkpoint *cp) \ +{ \ + return !!(le32_to_cpu(cp->cp_flags) & \ + (1UL << NILFS_CHECKPOINT_##flag)); \ +} + +NILFS_CHECKPOINT_FNS(SNAPSHOT, snapshot) +NILFS_CHECKPOINT_FNS(INVALID, invalid) +NILFS_CHECKPOINT_FNS(MINOR, minor) + +/** + * struct nilfs_cpfile_header - checkpoint file header + * @ch_ncheckpoints: number of checkpoints + * @ch_nsnapshots: number of snapshots + * @ch_snapshot_list: snapshot list + */ +struct nilfs_cpfile_header { + __le64 ch_ncheckpoints; + __le64 ch_nsnapshots; + struct nilfs_snapshot_list ch_snapshot_list; +}; + +#define NILFS_CPFILE_FIRST_CHECKPOINT_OFFSET \ + ((sizeof(struct nilfs_cpfile_header) + \ + sizeof(struct nilfs_checkpoint) - 1) / \ + sizeof(struct nilfs_checkpoint)) + +/** + * struct nilfs_segment_usage - segment usage + * @su_lastmod: last modified timestamp + * @su_nblocks: number of blocks in segment + * @su_flags: flags + */ +struct nilfs_segment_usage { + __le64 su_lastmod; + __le32 su_nblocks; + __le32 su_flags; +}; + +#define NILFS_MIN_SEGMENT_USAGE_SIZE 16 + +/* segment usage flag */ +enum { + NILFS_SEGMENT_USAGE_ACTIVE, + NILFS_SEGMENT_USAGE_DIRTY, + NILFS_SEGMENT_USAGE_ERROR, +}; + +#define NILFS_SEGMENT_USAGE_FNS(flag, name) \ +static inline void \ +nilfs_segment_usage_set_##name(struct nilfs_segment_usage *su) \ +{ \ + su->su_flags = cpu_to_le32(le32_to_cpu(su->su_flags) | \ + (1UL << NILFS_SEGMENT_USAGE_##flag));\ +} \ +static inline void \ +nilfs_segment_usage_clear_##name(struct nilfs_segment_usage *su) \ +{ \ + su->su_flags = \ + cpu_to_le32(le32_to_cpu(su->su_flags) & \ + ~(1UL << NILFS_SEGMENT_USAGE_##flag)); \ +} \ +static inline int \ +nilfs_segment_usage_##name(const struct nilfs_segment_usage *su) \ +{ \ + return !!(le32_to_cpu(su->su_flags) & \ + (1UL << NILFS_SEGMENT_USAGE_##flag)); \ +} + +NILFS_SEGMENT_USAGE_FNS(ACTIVE, active) +NILFS_SEGMENT_USAGE_FNS(DIRTY, dirty) +NILFS_SEGMENT_USAGE_FNS(ERROR, error) + +static inline void +nilfs_segment_usage_set_clean(struct nilfs_segment_usage *su) +{ + su->su_lastmod = cpu_to_le64(0); + su->su_nblocks = cpu_to_le32(0); + su->su_flags = cpu_to_le32(0); +} + +static inline int +nilfs_segment_usage_clean(const struct nilfs_segment_usage *su) +{ + return !le32_to_cpu(su->su_flags); +} + +/** + * struct nilfs_sufile_header - segment usage file header + * @sh_ncleansegs: number of clean segments + * @sh_ndirtysegs: number of dirty segments + * @sh_last_alloc: last allocated segment number + */ +struct nilfs_sufile_header { + __le64 sh_ncleansegs; + __le64 sh_ndirtysegs; + __le64 sh_last_alloc; + /* ... */ +}; + +#define NILFS_SUFILE_FIRST_SEGMENT_USAGE_OFFSET \ + ((sizeof(struct nilfs_sufile_header) + \ + sizeof(struct nilfs_segment_usage) - 1) / \ + sizeof(struct nilfs_segment_usage)) + +#endif /* _LINUX_NILFS2_ONDISK_H */ -- cgit v1.2.3-55-g7522 From 7e7814180b334dff97ef8f56c7c40c277ad4531c Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 2 Aug 2016 14:05:36 -0700 Subject: signal: consolidate {TS,TLF}_RESTORE_SIGMASK code In general, there's no need for the "restore sigmask" flag to live in ti->flags. alpha, ia64, microblaze, powerpc, sh, sparc (64-bit only), tile, and x86 use essentially identical alternative implementations, placing the flag in ti->status. Replace those optimized implementations with an equally good common implementation that stores it in a bitfield in struct task_struct and drop the custom implementations. Additional architectures can opt in by removing their TIF_RESTORE_SIGMASK defines. Link: http://lkml.kernel.org/r/8a14321d64a28e40adfddc90e18a96c086a6d6f9.1468522723.git.luto@kernel.org Signed-off-by: Andy Lutomirski Tested-by: Michael Ellerman [powerpc] Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Matt Turner Cc: Tony Luck Cc: Fenghua Yu Cc: Michal Simek Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Yoshinori Sato Cc: Rich Felker Cc: "David S. Miller" Cc: Chris Metcalf Cc: Peter Zijlstra Cc: Borislav Petkov Cc: Brian Gerst Cc: Dmitry Safonov Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/include/asm/thread_info.h | 27 ------------- arch/ia64/include/asm/thread_info.h | 28 -------------- arch/microblaze/include/asm/thread_info.h | 27 ------------- arch/powerpc/include/asm/thread_info.h | 25 ------------ arch/sh/include/asm/thread_info.h | 26 ------------- arch/sparc/include/asm/thread_info_64.h | 24 ------------ arch/tile/include/asm/thread_info.h | 27 ------------- arch/x86/include/asm/thread_info.h | 24 ------------ include/linux/sched.h | 63 +++++++++++++++++++++++++++++++ include/linux/thread_info.h | 41 -------------------- 10 files changed, 63 insertions(+), 249 deletions(-) (limited to 'include') diff --git a/arch/alpha/include/asm/thread_info.h b/arch/alpha/include/asm/thread_info.h index 32e920a83ae5..e9e90bfa2b50 100644 --- a/arch/alpha/include/asm/thread_info.h +++ b/arch/alpha/include/asm/thread_info.h @@ -86,33 +86,6 @@ register struct thread_info *__current_thread_info __asm__("$8"); #define TS_UAC_NOPRINT 0x0001 /* ! Preserve the following three */ #define TS_UAC_NOFIX 0x0002 /* ! flags as they match */ #define TS_UAC_SIGBUS 0x0004 /* ! userspace part of 'osf_sysinfo' */ -#define TS_RESTORE_SIGMASK 0x0008 /* restore signal mask in do_signal() */ - -#ifndef __ASSEMBLY__ -#define HAVE_SET_RESTORE_SIGMASK 1 -static inline void set_restore_sigmask(void) -{ - struct thread_info *ti = current_thread_info(); - ti->status |= TS_RESTORE_SIGMASK; - WARN_ON(!test_bit(TIF_SIGPENDING, (unsigned long *)&ti->flags)); -} -static inline void clear_restore_sigmask(void) -{ - current_thread_info()->status &= ~TS_RESTORE_SIGMASK; -} -static inline bool test_restore_sigmask(void) -{ - return current_thread_info()->status & TS_RESTORE_SIGMASK; -} -static inline bool test_and_clear_restore_sigmask(void) -{ - struct thread_info *ti = current_thread_info(); - if (!(ti->status & TS_RESTORE_SIGMASK)) - return false; - ti->status &= ~TS_RESTORE_SIGMASK; - return true; -} -#endif #define SET_UNALIGN_CTL(task,value) ({ \ __u32 status = task_thread_info(task)->status & ~UAC_BITMASK; \ diff --git a/arch/ia64/include/asm/thread_info.h b/arch/ia64/include/asm/thread_info.h index d1212b84fb83..29bd59790d6c 100644 --- a/arch/ia64/include/asm/thread_info.h +++ b/arch/ia64/include/asm/thread_info.h @@ -121,32 +121,4 @@ struct thread_info { /* like TIF_ALLWORK_BITS but sans TIF_SYSCALL_TRACE or TIF_SYSCALL_AUDIT */ #define TIF_WORK_MASK (TIF_ALLWORK_MASK&~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT)) -#define TS_RESTORE_SIGMASK 2 /* restore signal mask in do_signal() */ - -#ifndef __ASSEMBLY__ -#define HAVE_SET_RESTORE_SIGMASK 1 -static inline void set_restore_sigmask(void) -{ - struct thread_info *ti = current_thread_info(); - ti->status |= TS_RESTORE_SIGMASK; - WARN_ON(!test_bit(TIF_SIGPENDING, &ti->flags)); -} -static inline void clear_restore_sigmask(void) -{ - current_thread_info()->status &= ~TS_RESTORE_SIGMASK; -} -static inline bool test_restore_sigmask(void) -{ - return current_thread_info()->status & TS_RESTORE_SIGMASK; -} -static inline bool test_and_clear_restore_sigmask(void) -{ - struct thread_info *ti = current_thread_info(); - if (!(ti->status & TS_RESTORE_SIGMASK)) - return false; - ti->status &= ~TS_RESTORE_SIGMASK; - return true; -} -#endif /* !__ASSEMBLY__ */ - #endif /* _ASM_IA64_THREAD_INFO_H */ diff --git a/arch/microblaze/include/asm/thread_info.h b/arch/microblaze/include/asm/thread_info.h index 383f387b4eee..e7e8954e9815 100644 --- a/arch/microblaze/include/asm/thread_info.h +++ b/arch/microblaze/include/asm/thread_info.h @@ -148,33 +148,6 @@ static inline struct thread_info *current_thread_info(void) */ /* FPU was used by this task this quantum (SMP) */ #define TS_USEDFPU 0x0001 -#define TS_RESTORE_SIGMASK 0x0002 - -#ifndef __ASSEMBLY__ -#define HAVE_SET_RESTORE_SIGMASK 1 -static inline void set_restore_sigmask(void) -{ - struct thread_info *ti = current_thread_info(); - ti->status |= TS_RESTORE_SIGMASK; - WARN_ON(!test_bit(TIF_SIGPENDING, (unsigned long *)&ti->flags)); -} -static inline void clear_restore_sigmask(void) -{ - current_thread_info()->status &= ~TS_RESTORE_SIGMASK; -} -static inline bool test_restore_sigmask(void) -{ - return current_thread_info()->status & TS_RESTORE_SIGMASK; -} -static inline bool test_and_clear_restore_sigmask(void) -{ - struct thread_info *ti = current_thread_info(); - if (!(ti->status & TS_RESTORE_SIGMASK)) - return false; - ti->status &= ~TS_RESTORE_SIGMASK; - return true; -} -#endif #endif /* __KERNEL__ */ #endif /* _ASM_MICROBLAZE_THREAD_INFO_H */ diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index b21bb1f72314..87e4b2d8dcd4 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -138,40 +138,15 @@ static inline struct thread_info *current_thread_info(void) /* Don't move TLF_NAPPING without adjusting the code in entry_32.S */ #define TLF_NAPPING 0 /* idle thread enabled NAP mode */ #define TLF_SLEEPING 1 /* suspend code enabled SLEEP mode */ -#define TLF_RESTORE_SIGMASK 2 /* Restore signal mask in do_signal */ #define TLF_LAZY_MMU 3 /* tlb_batch is active */ #define TLF_RUNLATCH 4 /* Is the runlatch enabled? */ #define _TLF_NAPPING (1 << TLF_NAPPING) #define _TLF_SLEEPING (1 << TLF_SLEEPING) -#define _TLF_RESTORE_SIGMASK (1 << TLF_RESTORE_SIGMASK) #define _TLF_LAZY_MMU (1 << TLF_LAZY_MMU) #define _TLF_RUNLATCH (1 << TLF_RUNLATCH) #ifndef __ASSEMBLY__ -#define HAVE_SET_RESTORE_SIGMASK 1 -static inline void set_restore_sigmask(void) -{ - struct thread_info *ti = current_thread_info(); - ti->local_flags |= _TLF_RESTORE_SIGMASK; - WARN_ON(!test_bit(TIF_SIGPENDING, &ti->flags)); -} -static inline void clear_restore_sigmask(void) -{ - current_thread_info()->local_flags &= ~_TLF_RESTORE_SIGMASK; -} -static inline bool test_restore_sigmask(void) -{ - return current_thread_info()->local_flags & _TLF_RESTORE_SIGMASK; -} -static inline bool test_and_clear_restore_sigmask(void) -{ - struct thread_info *ti = current_thread_info(); - if (!(ti->local_flags & _TLF_RESTORE_SIGMASK)) - return false; - ti->local_flags &= ~_TLF_RESTORE_SIGMASK; - return true; -} static inline bool test_thread_local_flags(unsigned int flags) { diff --git a/arch/sh/include/asm/thread_info.h b/arch/sh/include/asm/thread_info.h index 2afa321157be..6c65dcd470ab 100644 --- a/arch/sh/include/asm/thread_info.h +++ b/arch/sh/include/asm/thread_info.h @@ -151,19 +151,10 @@ extern void init_thread_xstate(void); * ever touches our thread-synchronous status, so we don't * have to worry about atomic accesses. */ -#define TS_RESTORE_SIGMASK 0x0001 /* restore signal mask in do_signal() */ #define TS_USEDFPU 0x0002 /* FPU used by this task this quantum */ #ifndef __ASSEMBLY__ -#define HAVE_SET_RESTORE_SIGMASK 1 -static inline void set_restore_sigmask(void) -{ - struct thread_info *ti = current_thread_info(); - ti->status |= TS_RESTORE_SIGMASK; - WARN_ON(!test_bit(TIF_SIGPENDING, (unsigned long *)&ti->flags)); -} - #define TI_FLAG_FAULT_CODE_SHIFT 24 /* @@ -182,23 +173,6 @@ static inline unsigned int get_thread_fault_code(void) return ti->flags >> TI_FLAG_FAULT_CODE_SHIFT; } -static inline void clear_restore_sigmask(void) -{ - current_thread_info()->status &= ~TS_RESTORE_SIGMASK; -} -static inline bool test_restore_sigmask(void) -{ - return current_thread_info()->status & TS_RESTORE_SIGMASK; -} -static inline bool test_and_clear_restore_sigmask(void) -{ - struct thread_info *ti = current_thread_info(); - if (!(ti->status & TS_RESTORE_SIGMASK)) - return false; - ti->status &= ~TS_RESTORE_SIGMASK; - return true; -} - #endif /* !__ASSEMBLY__ */ #endif /* __KERNEL__ */ diff --git a/arch/sparc/include/asm/thread_info_64.h b/arch/sparc/include/asm/thread_info_64.h index bde59825d06c..3d7b925f6516 100644 --- a/arch/sparc/include/asm/thread_info_64.h +++ b/arch/sparc/include/asm/thread_info_64.h @@ -222,32 +222,8 @@ register struct thread_info *current_thread_info_reg asm("g6"); * * Note that there are only 8 bits available. */ -#define TS_RESTORE_SIGMASK 0x0001 /* restore signal mask in do_signal() */ #ifndef __ASSEMBLY__ -#define HAVE_SET_RESTORE_SIGMASK 1 -static inline void set_restore_sigmask(void) -{ - struct thread_info *ti = current_thread_info(); - ti->status |= TS_RESTORE_SIGMASK; - WARN_ON(!test_bit(TIF_SIGPENDING, &ti->flags)); -} -static inline void clear_restore_sigmask(void) -{ - current_thread_info()->status &= ~TS_RESTORE_SIGMASK; -} -static inline bool test_restore_sigmask(void) -{ - return current_thread_info()->status & TS_RESTORE_SIGMASK; -} -static inline bool test_and_clear_restore_sigmask(void) -{ - struct thread_info *ti = current_thread_info(); - if (!(ti->status & TS_RESTORE_SIGMASK)) - return false; - ti->status &= ~TS_RESTORE_SIGMASK; - return true; -} #define thread32_stack_is_64bit(__SP) (((__SP) & 0x1) != 0) #define test_thread_64bit_stack(__SP) \ diff --git a/arch/tile/include/asm/thread_info.h b/arch/tile/include/asm/thread_info.h index c1467ac59ce6..b7659b8f1117 100644 --- a/arch/tile/include/asm/thread_info.h +++ b/arch/tile/include/asm/thread_info.h @@ -166,32 +166,5 @@ extern void _cpu_idle(void); #ifdef __tilegx__ #define TS_COMPAT 0x0001 /* 32-bit compatibility mode */ #endif -#define TS_RESTORE_SIGMASK 0x0008 /* restore signal mask in do_signal */ - -#ifndef __ASSEMBLY__ -#define HAVE_SET_RESTORE_SIGMASK 1 -static inline void set_restore_sigmask(void) -{ - struct thread_info *ti = current_thread_info(); - ti->status |= TS_RESTORE_SIGMASK; - WARN_ON(!test_bit(TIF_SIGPENDING, &ti->flags)); -} -static inline void clear_restore_sigmask(void) -{ - current_thread_info()->status &= ~TS_RESTORE_SIGMASK; -} -static inline bool test_restore_sigmask(void) -{ - return current_thread_info()->status & TS_RESTORE_SIGMASK; -} -static inline bool test_and_clear_restore_sigmask(void) -{ - struct thread_info *ti = current_thread_info(); - if (!(ti->status & TS_RESTORE_SIGMASK)) - return false; - ti->status &= ~TS_RESTORE_SIGMASK; - return true; -} -#endif /* !__ASSEMBLY__ */ #endif /* _ASM_TILE_THREAD_INFO_H */ diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 89bff044a6f5..b45ffdda3549 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -219,32 +219,8 @@ static inline unsigned long current_stack_pointer(void) * have to worry about atomic accesses. */ #define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/ -#define TS_RESTORE_SIGMASK 0x0008 /* restore signal mask in do_signal() */ #ifndef __ASSEMBLY__ -#define HAVE_SET_RESTORE_SIGMASK 1 -static inline void set_restore_sigmask(void) -{ - struct thread_info *ti = current_thread_info(); - ti->status |= TS_RESTORE_SIGMASK; - WARN_ON(!test_bit(TIF_SIGPENDING, (unsigned long *)&ti->flags)); -} -static inline void clear_restore_sigmask(void) -{ - current_thread_info()->status &= ~TS_RESTORE_SIGMASK; -} -static inline bool test_restore_sigmask(void) -{ - return current_thread_info()->status & TS_RESTORE_SIGMASK; -} -static inline bool test_and_clear_restore_sigmask(void) -{ - struct thread_info *ti = current_thread_info(); - if (!(ti->status & TS_RESTORE_SIGMASK)) - return false; - ti->status &= ~TS_RESTORE_SIGMASK; - return true; -} static inline bool in_ia32_syscall(void) { diff --git a/include/linux/sched.h b/include/linux/sched.h index 553af2923824..62c68e513e39 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1547,6 +1547,9 @@ struct task_struct { /* unserialized, strictly 'current' */ unsigned in_execve:1; /* bit to tell LSMs we're in execve */ unsigned in_iowait:1; +#if !defined(TIF_RESTORE_SIGMASK) + unsigned restore_sigmask:1; +#endif #ifdef CONFIG_MEMCG unsigned memcg_may_oom:1; #ifndef CONFIG_SLOB @@ -2680,6 +2683,66 @@ extern void sigqueue_free(struct sigqueue *); extern int send_sigqueue(struct sigqueue *, struct task_struct *, int group); extern int do_sigaction(int, struct k_sigaction *, struct k_sigaction *); +#ifdef TIF_RESTORE_SIGMASK +/* + * Legacy restore_sigmask accessors. These are inefficient on + * SMP architectures because they require atomic operations. + */ + +/** + * set_restore_sigmask() - make sure saved_sigmask processing gets done + * + * This sets TIF_RESTORE_SIGMASK and ensures that the arch signal code + * will run before returning to user mode, to process the flag. For + * all callers, TIF_SIGPENDING is already set or it's no harm to set + * it. TIF_RESTORE_SIGMASK need not be in the set of bits that the + * arch code will notice on return to user mode, in case those bits + * are scarce. We set TIF_SIGPENDING here to ensure that the arch + * signal code always gets run when TIF_RESTORE_SIGMASK is set. + */ +static inline void set_restore_sigmask(void) +{ + set_thread_flag(TIF_RESTORE_SIGMASK); + WARN_ON(!test_thread_flag(TIF_SIGPENDING)); +} +static inline void clear_restore_sigmask(void) +{ + clear_thread_flag(TIF_RESTORE_SIGMASK); +} +static inline bool test_restore_sigmask(void) +{ + return test_thread_flag(TIF_RESTORE_SIGMASK); +} +static inline bool test_and_clear_restore_sigmask(void) +{ + return test_and_clear_thread_flag(TIF_RESTORE_SIGMASK); +} + +#else /* TIF_RESTORE_SIGMASK */ + +/* Higher-quality implementation, used if TIF_RESTORE_SIGMASK doesn't exist. */ +static inline void set_restore_sigmask(void) +{ + current->restore_sigmask = true; + WARN_ON(!test_thread_flag(TIF_SIGPENDING)); +} +static inline void clear_restore_sigmask(void) +{ + current->restore_sigmask = false; +} +static inline bool test_restore_sigmask(void) +{ + return current->restore_sigmask; +} +static inline bool test_and_clear_restore_sigmask(void) +{ + if (!current->restore_sigmask) + return false; + current->restore_sigmask = false; + return true; +} +#endif + static inline void restore_saved_sigmask(void) { if (test_and_clear_restore_sigmask()) diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index b4c2a485b28a..352b1542f5cc 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -105,47 +105,6 @@ static inline int test_ti_thread_flag(struct thread_info *ti, int flag) #define tif_need_resched() test_thread_flag(TIF_NEED_RESCHED) -#if defined TIF_RESTORE_SIGMASK && !defined HAVE_SET_RESTORE_SIGMASK -/* - * An arch can define its own version of set_restore_sigmask() to get the - * job done however works, with or without TIF_RESTORE_SIGMASK. - */ -#define HAVE_SET_RESTORE_SIGMASK 1 - -/** - * set_restore_sigmask() - make sure saved_sigmask processing gets done - * - * This sets TIF_RESTORE_SIGMASK and ensures that the arch signal code - * will run before returning to user mode, to process the flag. For - * all callers, TIF_SIGPENDING is already set or it's no harm to set - * it. TIF_RESTORE_SIGMASK need not be in the set of bits that the - * arch code will notice on return to user mode, in case those bits - * are scarce. We set TIF_SIGPENDING here to ensure that the arch - * signal code always gets run when TIF_RESTORE_SIGMASK is set. - */ -static inline void set_restore_sigmask(void) -{ - set_thread_flag(TIF_RESTORE_SIGMASK); - WARN_ON(!test_thread_flag(TIF_SIGPENDING)); -} -static inline void clear_restore_sigmask(void) -{ - clear_thread_flag(TIF_RESTORE_SIGMASK); -} -static inline bool test_restore_sigmask(void) -{ - return test_thread_flag(TIF_RESTORE_SIGMASK); -} -static inline bool test_and_clear_restore_sigmask(void) -{ - return test_and_clear_thread_flag(TIF_RESTORE_SIGMASK); -} -#endif /* TIF_RESTORE_SIGMASK && !HAVE_SET_RESTORE_SIGMASK */ - -#ifndef HAVE_SET_RESTORE_SIGMASK -#error "no set_restore_sigmask() provided and default one won't work" -#endif - #endif /* __KERNEL__ */ #endif /* _LINUX_THREAD_INFO_H */ -- cgit v1.2.3-55-g7522 From b06fb415331a7beb841f3d20d0fe60f6f0787dba Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Tue, 2 Aug 2016 14:05:42 -0700 Subject: cpumask: fix code comment Fix code comment for cpumask_parse(). Link: http://lkml.kernel.org/r/71aae2c60ae5dae0cf554199ce6aea8f88c69347.1465380581.git.geliangtang@gmail.com Signed-off-by: Geliang Tang Acked-by: Rusty Russell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cpumask.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index e828cf65d7df..da7fbf1cdd56 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -579,7 +579,7 @@ static inline int cpumask_parselist_user(const char __user *buf, int len, } /** - * cpumask_parse - extract a cpumask from from a string + * cpumask_parse - extract a cpumask from a string * @buf: the buffer to extract from * @dstp: the cpumask to set. * -- cgit v1.2.3-55-g7522 From dc5cccacf4272da4aba20a1fc0804d59d985ab32 Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 2 Aug 2016 14:05:54 -0700 Subject: kexec: don't invoke OOM-killer for control page allocation If we are unable to find a suitable page when allocating the control page, do not invoke the OOM-killer: killing processes probably isn't going to help. Link: http://lkml.kernel.org/r/E1b8ko9-0004HG-R5@rmk-PC.armlinux.org.uk Signed-off-by: Russell King Reviewed-by: Pratyush Anand Acked-by: Baoquan He Cc: Keerthy Cc: Vitaly Andrianov Cc: Eric Biederman Cc: Dave Young Cc: Vivek Goyal Cc: Simon Horman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kexec.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kexec.h b/include/linux/kexec.h index e8acb2b43dd9..ce2fe197f583 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -41,7 +41,7 @@ #endif #ifndef KEXEC_CONTROL_MEMORY_GFP -#define KEXEC_CONTROL_MEMORY_GFP GFP_KERNEL +#define KEXEC_CONTROL_MEMORY_GFP (GFP_KERNEL | __GFP_NORETRY) #endif #ifndef KEXEC_CONTROL_PAGE_SIZE -- cgit v1.2.3-55-g7522 From dae28018f56645b61f5beb84d5831346d3c5e457 Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 2 Aug 2016 14:06:00 -0700 Subject: kdump: arrange for paddr_vmcoreinfo_note() to return phys_addr_t On PAE systems (eg, ARM LPAE) the vmcore note may be located above 4GB physical on 32-bit architectures, so we need a wider type than "unsigned long" here. Arrange for paddr_vmcoreinfo_note() to return a phys_addr_t, thereby allowing it to be located above 4GB. This makes no difference for kexec-tools, as they already assume a 64-bit type when reading from this file. Link: http://lkml.kernel.org/r/E1b8koK-0004HS-K9@rmk-PC.armlinux.org.uk Signed-off-by: Russell King Reviewed-by: Pratyush Anand Acked-by: Baoquan He Cc: Keerthy Cc: Vitaly Andrianov Cc: Eric Biederman Cc: Dave Young Cc: Vivek Goyal Cc: Simon Horman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/kernel/machine_kexec.c | 2 +- include/linux/kexec.h | 2 +- kernel/kexec_core.c | 2 +- kernel/ksysfs.c | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/arch/ia64/kernel/machine_kexec.c b/arch/ia64/kernel/machine_kexec.c index b72cd7a07222..599507bcec91 100644 --- a/arch/ia64/kernel/machine_kexec.c +++ b/arch/ia64/kernel/machine_kexec.c @@ -163,7 +163,7 @@ void arch_crash_save_vmcoreinfo(void) #endif } -unsigned long paddr_vmcoreinfo_note(void) +phys_addr_t paddr_vmcoreinfo_note(void) { return ia64_tpa((unsigned long)(char *)&vmcoreinfo_note); } diff --git a/include/linux/kexec.h b/include/linux/kexec.h index ce2fe197f583..555227f0029f 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -233,7 +233,7 @@ void crash_save_vmcoreinfo(void); void arch_crash_save_vmcoreinfo(void); __printf(1, 2) void vmcoreinfo_append_str(const char *fmt, ...); -unsigned long paddr_vmcoreinfo_note(void); +phys_addr_t paddr_vmcoreinfo_note(void); #define VMCOREINFO_OSRELEASE(value) \ vmcoreinfo_append_str("OSRELEASE=%s\n", value) diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c index 5a83b2a9d584..dab03f17be25 100644 --- a/kernel/kexec_core.c +++ b/kernel/kexec_core.c @@ -1372,7 +1372,7 @@ void vmcoreinfo_append_str(const char *fmt, ...) void __weak arch_crash_save_vmcoreinfo(void) {} -unsigned long __weak paddr_vmcoreinfo_note(void) +phys_addr_t __weak paddr_vmcoreinfo_note(void) { return __pa((unsigned long)(char *)&vmcoreinfo_note); } diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c index 152da4a48867..9f1920d2d0c6 100644 --- a/kernel/ksysfs.c +++ b/kernel/ksysfs.c @@ -128,8 +128,8 @@ KERNEL_ATTR_RW(kexec_crash_size); static ssize_t vmcoreinfo_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - return sprintf(buf, "%lx %x\n", - paddr_vmcoreinfo_note(), + phys_addr_t vmcore_base = paddr_vmcoreinfo_note(); + return sprintf(buf, "%pa %x\n", &vmcore_base, (unsigned int)sizeof(vmcoreinfo_note)); } KERNEL_ATTR_RO(vmcoreinfo); -- cgit v1.2.3-55-g7522 From 43546d8669d62d75fa69ca9a45d2f586665f56bd Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 2 Aug 2016 14:06:04 -0700 Subject: kexec: allow architectures to override boot mapping kexec physical addresses are the boot-time view of the system. For certain ARM systems (such as Keystone 2), the boot view of the system does not match the kernel's view of the system: the boot view uses a special alias in the lower 4GB of the physical address space. To cater for these kinds of setups, we need to translate between the boot view physical addresses and the normal kernel view physical addresses. This patch extracts the current transation points into linux/kexec.h, and allows an architecture to override the functions. Due to the translations required, we unfortunately end up with six translation functions, which are reduced down to four that the architecture can override. [akpm@linux-foundation.org: kexec.h needs asm/io.h for phys_to_virt()] Link: http://lkml.kernel.org/r/E1b8koP-0004HZ-Vf@rmk-PC.armlinux.org.uk Signed-off-by: Russell King Cc: Keerthy Cc: Pratyush Anand Cc: Vitaly Andrianov Cc: Eric Biederman Cc: Dave Young Cc: Baoquan He Cc: Vivek Goyal Cc: Simon Horman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kexec.h | 40 ++++++++++++++++++++++++++++++++++++++++ kernel/kexec.c | 3 ++- kernel/kexec_core.c | 26 +++++++++++++------------- 3 files changed, 55 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 555227f0029f..23e14a460cfb 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -14,6 +14,8 @@ #if !defined(__ASSEMBLY__) +#include + #include #ifdef CONFIG_KEXEC_CORE @@ -318,6 +320,44 @@ int __weak arch_kexec_apply_relocations(const Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, void arch_kexec_protect_crashkres(void); void arch_kexec_unprotect_crashkres(void); +#ifndef page_to_boot_pfn +static inline unsigned long page_to_boot_pfn(struct page *page) +{ + return page_to_pfn(page); +} +#endif + +#ifndef boot_pfn_to_page +static inline struct page *boot_pfn_to_page(unsigned long boot_pfn) +{ + return pfn_to_page(boot_pfn); +} +#endif + +#ifndef phys_to_boot_phys +static inline unsigned long phys_to_boot_phys(phys_addr_t phys) +{ + return phys; +} +#endif + +#ifndef boot_phys_to_phys +static inline phys_addr_t boot_phys_to_phys(unsigned long boot_phys) +{ + return boot_phys; +} +#endif + +static inline unsigned long virt_to_boot_phys(void *addr) +{ + return phys_to_boot_phys(__pa((unsigned long)addr)); +} + +static inline void *boot_phys_to_virt(unsigned long entry) +{ + return phys_to_virt(boot_phys_to_phys(entry)); +} + #else /* !CONFIG_KEXEC_CORE */ struct pt_regs; struct task_struct; diff --git a/kernel/kexec.c b/kernel/kexec.c index 4384672d3245..980936a90ee6 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -48,7 +48,8 @@ static int kimage_alloc_init(struct kimage **rimage, unsigned long entry, if (kexec_on_panic) { /* Verify we have a valid entry point */ - if ((entry < crashk_res.start) || (entry > crashk_res.end)) + if ((entry < phys_to_boot_phys(crashk_res.start)) || + (entry > phys_to_boot_phys(crashk_res.end))) return -EADDRNOTAVAIL; } diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c index dab03f17be25..73d4c5f57dd8 100644 --- a/kernel/kexec_core.c +++ b/kernel/kexec_core.c @@ -225,8 +225,8 @@ int sanity_check_segment_list(struct kimage *image) mstart = image->segment[i].mem; mend = mstart + image->segment[i].memsz - 1; /* Ensure we are within the crash kernel limits */ - if ((mstart < crashk_res.start) || - (mend > crashk_res.end)) + if ((mstart < phys_to_boot_phys(crashk_res.start)) || + (mend > phys_to_boot_phys(crashk_res.end))) return -EADDRNOTAVAIL; } } @@ -350,7 +350,7 @@ static struct page *kimage_alloc_normal_control_pages(struct kimage *image, pages = kimage_alloc_pages(KEXEC_CONTROL_MEMORY_GFP, order); if (!pages) break; - pfn = page_to_pfn(pages); + pfn = page_to_boot_pfn(pages); epfn = pfn + count; addr = pfn << PAGE_SHIFT; eaddr = epfn << PAGE_SHIFT; @@ -476,7 +476,7 @@ static int kimage_add_entry(struct kimage *image, kimage_entry_t entry) return -ENOMEM; ind_page = page_address(page); - *image->entry = virt_to_phys(ind_page) | IND_INDIRECTION; + *image->entry = virt_to_boot_phys(ind_page) | IND_INDIRECTION; image->entry = ind_page; image->last_entry = ind_page + ((PAGE_SIZE/sizeof(kimage_entry_t)) - 1); @@ -531,13 +531,13 @@ void kimage_terminate(struct kimage *image) #define for_each_kimage_entry(image, ptr, entry) \ for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE); \ ptr = (entry & IND_INDIRECTION) ? \ - phys_to_virt((entry & PAGE_MASK)) : ptr + 1) + boot_phys_to_virt((entry & PAGE_MASK)) : ptr + 1) static void kimage_free_entry(kimage_entry_t entry) { struct page *page; - page = pfn_to_page(entry >> PAGE_SHIFT); + page = boot_pfn_to_page(entry >> PAGE_SHIFT); kimage_free_pages(page); } @@ -631,7 +631,7 @@ static struct page *kimage_alloc_page(struct kimage *image, * have a match. */ list_for_each_entry(page, &image->dest_pages, lru) { - addr = page_to_pfn(page) << PAGE_SHIFT; + addr = page_to_boot_pfn(page) << PAGE_SHIFT; if (addr == destination) { list_del(&page->lru); return page; @@ -646,12 +646,12 @@ static struct page *kimage_alloc_page(struct kimage *image, if (!page) return NULL; /* If the page cannot be used file it away */ - if (page_to_pfn(page) > + if (page_to_boot_pfn(page) > (KEXEC_SOURCE_MEMORY_LIMIT >> PAGE_SHIFT)) { list_add(&page->lru, &image->unusable_pages); continue; } - addr = page_to_pfn(page) << PAGE_SHIFT; + addr = page_to_boot_pfn(page) << PAGE_SHIFT; /* If it is the destination page we want use it */ if (addr == destination) @@ -674,7 +674,7 @@ static struct page *kimage_alloc_page(struct kimage *image, struct page *old_page; old_addr = *old & PAGE_MASK; - old_page = pfn_to_page(old_addr >> PAGE_SHIFT); + old_page = boot_pfn_to_page(old_addr >> PAGE_SHIFT); copy_highpage(page, old_page); *old = addr | (*old & ~PAGE_MASK); @@ -730,7 +730,7 @@ static int kimage_load_normal_segment(struct kimage *image, result = -ENOMEM; goto out; } - result = kimage_add_page(image, page_to_pfn(page) + result = kimage_add_page(image, page_to_boot_pfn(page) << PAGE_SHIFT); if (result < 0) goto out; @@ -791,7 +791,7 @@ static int kimage_load_crash_segment(struct kimage *image, char *ptr; size_t uchunk, mchunk; - page = pfn_to_page(maddr >> PAGE_SHIFT); + page = boot_pfn_to_page(maddr >> PAGE_SHIFT); if (!page) { result = -ENOMEM; goto out; @@ -919,7 +919,7 @@ void __weak crash_free_reserved_phys_range(unsigned long begin, unsigned long addr; for (addr = begin; addr < end; addr += PAGE_SIZE) - free_reserved_page(pfn_to_page(addr >> PAGE_SHIFT)); + free_reserved_page(boot_pfn_to_page(addr >> PAGE_SHIFT)); } int crash_shrink_memory(unsigned long new_size) -- cgit v1.2.3-55-g7522 From 21db79e8bb054d0351a6b1b464f1c9c47a2e6e8d Mon Sep 17 00:00:00 2001 From: Petr Tesarik Date: Tue, 2 Aug 2016 14:06:16 -0700 Subject: kexec: add a kexec_crash_loaded() function Provide a wrapper function to be used by kernel code to check whether a crash kernel is loaded. It returns the same value that can be seen in /sys/kernel/kexec_crash_loaded by userspace programs. I'm exporting the function, because it will be used by Xen, and it is possible to compile Xen modules separately to enable the use of PV drivers with unmodified bare-metal kernels. Link: http://lkml.kernel.org/r/20160713121955.14969.69080.stgit@hananiah.suse.cz Signed-off-by: Petr Tesarik Cc: Juergen Gross Cc: Josh Triplett Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Eric Biederman Cc: "H. Peter Anvin" Cc: Boris Ostrovsky Cc: "Paul E. McKenney" Cc: Dave Young Cc: David Vrabel Cc: Vivek Goyal Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kexec.h | 2 ++ kernel/kexec_core.c | 6 ++++++ kernel/ksysfs.c | 2 +- 3 files changed, 9 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 23e14a460cfb..d7437777baaa 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -230,6 +230,7 @@ extern void *kexec_purgatory_get_symbol_addr(struct kimage *image, extern void __crash_kexec(struct pt_regs *); extern void crash_kexec(struct pt_regs *); int kexec_should_crash(struct task_struct *); +int kexec_crash_loaded(void); void crash_save_cpu(struct pt_regs *regs, int cpu); void crash_save_vmcoreinfo(void); void arch_crash_save_vmcoreinfo(void); @@ -364,6 +365,7 @@ struct task_struct; static inline void __crash_kexec(struct pt_regs *regs) { } static inline void crash_kexec(struct pt_regs *regs) { } static inline int kexec_should_crash(struct task_struct *p) { return 0; } +static inline int kexec_crash_loaded(void) { return 0; } #define kexec_in_progress false #endif /* CONFIG_KEXEC_CORE */ diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c index 73d4c5f57dd8..704534029a00 100644 --- a/kernel/kexec_core.c +++ b/kernel/kexec_core.c @@ -95,6 +95,12 @@ int kexec_should_crash(struct task_struct *p) return 0; } +int kexec_crash_loaded(void) +{ + return !!kexec_crash_image; +} +EXPORT_SYMBOL_GPL(kexec_crash_loaded); + /* * When kexec transitions to the new kernel there is a one-to-one * mapping between physical and virtual addresses. On processors diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c index 9f1920d2d0c6..ee1bc1bb8feb 100644 --- a/kernel/ksysfs.c +++ b/kernel/ksysfs.c @@ -101,7 +101,7 @@ KERNEL_ATTR_RO(kexec_loaded); static ssize_t kexec_crash_loaded_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - return sprintf(buf, "%d\n", !!kexec_crash_image); + return sprintf(buf, "%d\n", kexec_crash_loaded()); } KERNEL_ATTR_RO(kexec_crash_loaded); -- cgit v1.2.3-55-g7522 From b6e8d4aa1110306378af0f3472a6b85a1f039a16 Mon Sep 17 00:00:00 2001 From: Alexandre Bounine Date: Tue, 2 Aug 2016 14:06:25 -0700 Subject: rapidio: add RapidIO channelized messaging driver Add channelized messaging driver to support native RapidIO messaging exchange between multiple senders/recipients on devices that use kernel RapidIO subsystem services. This device driver is the result of collaboration within the RapidIO.org Software Task Group (STG) between Texas Instruments, Prodrive Technologies, Nokia Networks, BAE and IDT. Additional input was received from other members of RapidIO.org. The objective was to create a character mode driver interface which exposes messaging capabilities of RapidIO endpoint devices (mports) directly to applications, in a manner that allows the numerous and varied RapidIO implementations to interoperate. This char mode device driver allows user-space applications to setup messaging communication channels using single shared RapidIO messaging mailbox. By default this driver uses RapidIO MBOX_1 (MBOX_0 is reserved for use by RIONET Ethernet emulation driver). [weiyj.lk@gmail.com: rapidio/rio_cm: fix return value check in riocm_init()] Link: http://lkml.kernel.org/r/1469198221-21970-1-git-send-email-alexandre.bounine@idt.com Link: http://lkml.kernel.org/r/1468952862-18056-1-git-send-email-alexandre.bounine@idt.com Signed-off-by: Alexandre Bounine Tested-by: Barry Wood Cc: Matt Porter Cc: Aurelien Jacquiot Cc: Andre van Herk Cc: Barry Wood Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/rapidio/rio_cm.txt | 119 ++ drivers/rapidio/Kconfig | 9 + drivers/rapidio/Makefile | 1 + drivers/rapidio/rio_cm.c | 2366 ++++++++++++++++++++++++++++++++++++++ include/uapi/linux/Kbuild | 1 + include/uapi/linux/rio_cm_cdev.h | 78 ++ 6 files changed, 2574 insertions(+) create mode 100644 Documentation/rapidio/rio_cm.txt create mode 100644 drivers/rapidio/rio_cm.c create mode 100644 include/uapi/linux/rio_cm_cdev.h (limited to 'include') diff --git a/Documentation/rapidio/rio_cm.txt b/Documentation/rapidio/rio_cm.txt new file mode 100644 index 000000000000..27aa401f1126 --- /dev/null +++ b/Documentation/rapidio/rio_cm.txt @@ -0,0 +1,119 @@ +RapidIO subsystem Channelized Messaging character device driver (rio_cm.c) +========================================================================== + +Version History: +---------------- + 1.0.0 - Initial driver release. + +========================================================================== + +I. Overview + +This device driver is the result of collaboration within the RapidIO.org +Software Task Group (STG) between Texas Instruments, Prodrive Technologies, +Nokia Networks, BAE and IDT. Additional input was received from other members +of RapidIO.org. + +The objective was to create a character mode driver interface which exposes +messaging capabilities of RapidIO endpoint devices (mports) directly +to applications, in a manner that allows the numerous and varied RapidIO +implementations to interoperate. + +This driver (RIO_CM) provides to user-space applications shared access to +RapidIO mailbox messaging resources. + +RapidIO specification (Part 2) defines that endpoint devices may have up to four +messaging mailboxes in case of multi-packet message (up to 4KB) and +up to 64 mailboxes if single-packet messages (up to 256 B) are used. In addition +to protocol definition limitations, a particular hardware implementation can +have reduced number of messaging mailboxes. RapidIO aware applications must +therefore share the messaging resources of a RapidIO endpoint. + +Main purpose of this device driver is to provide RapidIO mailbox messaging +capability to large number of user-space processes by introducing socket-like +operations using a single messaging mailbox. This allows applications to +use the limited RapidIO messaging hardware resources efficiently. + +Most of device driver's operations are supported through 'ioctl' system calls. + +When loaded this device driver creates a single file system node named rio_cm +in /dev directory common for all registered RapidIO mport devices. + +Following ioctl commands are available to user-space applications: + +- RIO_CM_MPORT_GET_LIST : Returns to caller list of local mport devices that + support messaging operations (number of entries up to RIO_MAX_MPORTS). + Each list entry is combination of mport's index in the system and RapidIO + destination ID assigned to the port. +- RIO_CM_EP_GET_LIST_SIZE : Returns number of messaging capable remote endpoints + in a RapidIO network associated with the specified mport device. +- RIO_CM_EP_GET_LIST : Returns list of RapidIO destination IDs for messaging + capable remote endpoints (peers) available in a RapidIO network associated + with the specified mport device. +- RIO_CM_CHAN_CREATE : Creates RapidIO message exchange channel data structure + with channel ID assigned automatically or as requested by a caller. +- RIO_CM_CHAN_BIND : Binds the specified channel data structure to the specified + mport device. +- RIO_CM_CHAN_LISTEN : Enables listening for connection requests on the specified + channel. +- RIO_CM_CHAN_ACCEPT : Accepts a connection request from peer on the specified + channel. If wait timeout for this request is specified by a caller it is + a blocking call. If timeout set to 0 this is non-blocking call - ioctl + handler checks for a pending connection request and if one is not available + exits with -EGAIN error status immediately. +- RIO_CM_CHAN_CONNECT : Sends a connection request to a remote peer/channel. +- RIO_CM_CHAN_SEND : Sends a data message through the specified channel. + The handler for this request assumes that message buffer specified by + a caller includes the reserved space for a packet header required by + this driver. +- RIO_CM_CHAN_RECEIVE : Receives a data message through a connected channel. + If the channel does not have an incoming message ready to return this ioctl + handler will wait for new message until timeout specified by a caller + expires. If timeout value is set to 0, ioctl handler uses a default value + defined by MAX_SCHEDULE_TIMEOUT. +- RIO_CM_CHAN_CLOSE : Closes a specified channel and frees associated buffers. + If the specified channel is in the CONNECTED state, sends close notification + to the remote peer. + +The ioctl command codes and corresponding data structures intended for use by +user-space applications are defined in 'include/uapi/linux/rio_cm_cdev.h'. + +II. Hardware Compatibility + +This device driver uses standard interfaces defined by kernel RapidIO subsystem +and therefore it can be used with any mport device driver registered by RapidIO +subsystem with limitations set by available mport HW implementation of messaging +mailboxes. + +III. Module parameters + +- 'dbg_level' - This parameter allows to control amount of debug information + generated by this device driver. This parameter is formed by set of + bit masks that correspond to the specific functional block. + For mask definitions see 'drivers/rapidio/devices/rio_cm.c' + This parameter can be changed dynamically. + Use CONFIG_RAPIDIO_DEBUG=y to enable debug output at the top level. + +- 'cmbox' - Number of RapidIO mailbox to use (default value is 1). + This parameter allows to set messaging mailbox number that will be used + within entire RapidIO network. It can be used when default mailbox is + used by other device drivers or is not supported by some nodes in the + RapidIO network. + +- 'chstart' - Start channel number for dynamic assignment. Default value - 256. + Allows to exclude channel numbers below this parameter from dynamic + allocation to avoid conflicts with software components that use + reserved predefined channel numbers. + +IV. Known problems + + None. + +V. User-space Applications and API Library + +Messaging API library and applications that use this device driver are available +from RapidIO.org. + +VI. TODO List + +- Add support for system notification messages (reserved channel 0). diff --git a/drivers/rapidio/Kconfig b/drivers/rapidio/Kconfig index b5a10d3c92c7..d6d2f20c4597 100644 --- a/drivers/rapidio/Kconfig +++ b/drivers/rapidio/Kconfig @@ -67,6 +67,15 @@ config RAPIDIO_ENUM_BASIC endchoice +config RAPIDIO_CHMAN + tristate "RapidIO Channelized Messaging driver" + depends on RAPIDIO + help + This option includes RapidIO channelized messaging driver which + provides socket-like interface to allow sharing of single RapidIO + messaging mailbox between multiple user-space applications. + See "Documentation/rapidio/rio_cm.txt" for driver description. + config RAPIDIO_MPORT_CDEV tristate "RapidIO /dev mport device driver" depends on RAPIDIO diff --git a/drivers/rapidio/Makefile b/drivers/rapidio/Makefile index 6271ada6993f..74dcea45ad49 100644 --- a/drivers/rapidio/Makefile +++ b/drivers/rapidio/Makefile @@ -5,6 +5,7 @@ obj-$(CONFIG_RAPIDIO) += rapidio.o rapidio-y := rio.o rio-access.o rio-driver.o rio-sysfs.o obj-$(CONFIG_RAPIDIO_ENUM_BASIC) += rio-scan.o +obj-$(CONFIG_RAPIDIO_CHMAN) += rio_cm.o obj-$(CONFIG_RAPIDIO) += switches/ obj-$(CONFIG_RAPIDIO) += devices/ diff --git a/drivers/rapidio/rio_cm.c b/drivers/rapidio/rio_cm.c new file mode 100644 index 000000000000..cecc15a880de --- /dev/null +++ b/drivers/rapidio/rio_cm.c @@ -0,0 +1,2366 @@ +/* + * rio_cm - RapidIO Channelized Messaging Driver + * + * Copyright 2013-2016 Integrated Device Technology, Inc. + * Copyright (c) 2015, Prodrive Technologies + * Copyright (c) 2015, RapidIO Trade Association + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * THIS PROGRAM IS DISTRIBUTED IN THE HOPE THAT IT WILL BE USEFUL, + * BUT WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED WARRANTY OF + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. SEE THE + * GNU GENERAL PUBLIC LICENSE FOR MORE DETAILS. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DRV_NAME "rio_cm" +#define DRV_VERSION "1.0.0" +#define DRV_AUTHOR "Alexandre Bounine " +#define DRV_DESC "RapidIO Channelized Messaging Driver" +#define DEV_NAME "rio_cm" + +/* Debug output filtering masks */ +enum { + DBG_NONE = 0, + DBG_INIT = BIT(0), /* driver init */ + DBG_EXIT = BIT(1), /* driver exit */ + DBG_MPORT = BIT(2), /* mport add/remove */ + DBG_RDEV = BIT(3), /* RapidIO device add/remove */ + DBG_CHOP = BIT(4), /* channel operations */ + DBG_WAIT = BIT(5), /* waiting for events */ + DBG_TX = BIT(6), /* message TX */ + DBG_TX_EVENT = BIT(7), /* message TX event */ + DBG_RX_DATA = BIT(8), /* inbound data messages */ + DBG_RX_CMD = BIT(9), /* inbound REQ/ACK/NACK messages */ + DBG_ALL = ~0, +}; + +#ifdef DEBUG +#define riocm_debug(level, fmt, arg...) \ + do { \ + if (DBG_##level & dbg_level) \ + pr_debug(DRV_NAME ": %s " fmt "\n", \ + __func__, ##arg); \ + } while (0) +#else +#define riocm_debug(level, fmt, arg...) \ + no_printk(KERN_DEBUG pr_fmt(DRV_NAME fmt "\n"), ##arg) +#endif + +#define riocm_warn(fmt, arg...) \ + pr_warn(DRV_NAME ": %s WARNING " fmt "\n", __func__, ##arg) + +#define riocm_error(fmt, arg...) \ + pr_err(DRV_NAME ": %s ERROR " fmt "\n", __func__, ##arg) + + +static int cmbox = 1; +module_param(cmbox, int, S_IRUGO); +MODULE_PARM_DESC(cmbox, "RapidIO Mailbox number (default 1)"); + +static int chstart = 256; +module_param(chstart, int, S_IRUGO); +MODULE_PARM_DESC(chstart, + "Start channel number for dynamic allocation (default 256)"); + +#ifdef DEBUG +static u32 dbg_level = DBG_NONE; +module_param(dbg_level, uint, S_IWUSR | S_IRUGO); +MODULE_PARM_DESC(dbg_level, "Debugging output level (default 0 = none)"); +#endif + +MODULE_AUTHOR(DRV_AUTHOR); +MODULE_DESCRIPTION(DRV_DESC); +MODULE_LICENSE("GPL"); +MODULE_VERSION(DRV_VERSION); + +#define RIOCM_TX_RING_SIZE 128 +#define RIOCM_RX_RING_SIZE 128 +#define RIOCM_CONNECT_TO 3 /* connect response TO (in sec) */ + +#define RIOCM_MAX_CHNUM 0xffff /* Use full range of u16 field */ +#define RIOCM_CHNUM_AUTO 0 +#define RIOCM_MAX_EP_COUNT 0x10000 /* Max number of endpoints */ + +enum rio_cm_state { + RIO_CM_IDLE, + RIO_CM_CONNECT, + RIO_CM_CONNECTED, + RIO_CM_DISCONNECT, + RIO_CM_CHAN_BOUND, + RIO_CM_LISTEN, + RIO_CM_DESTROYING, +}; + +enum rio_cm_pkt_type { + RIO_CM_SYS = 0xaa, + RIO_CM_CHAN = 0x55, +}; + +enum rio_cm_chop { + CM_CONN_REQ, + CM_CONN_ACK, + CM_CONN_CLOSE, + CM_DATA_MSG, +}; + +struct rio_ch_base_bhdr { + u32 src_id; + u32 dst_id; +#define RIO_HDR_LETTER_MASK 0xffff0000 +#define RIO_HDR_MBOX_MASK 0x0000ffff + u8 src_mbox; + u8 dst_mbox; + u8 type; +} __attribute__((__packed__)); + +struct rio_ch_chan_hdr { + struct rio_ch_base_bhdr bhdr; + u8 ch_op; + u16 dst_ch; + u16 src_ch; + u16 msg_len; + u16 rsrvd; +} __attribute__((__packed__)); + +struct tx_req { + struct list_head node; + struct rio_dev *rdev; + void *buffer; + size_t len; +}; + +struct cm_dev { + struct list_head list; + struct rio_mport *mport; + void *rx_buf[RIOCM_RX_RING_SIZE]; + int rx_slots; + struct mutex rx_lock; + + void *tx_buf[RIOCM_TX_RING_SIZE]; + int tx_slot; + int tx_cnt; + int tx_ack_slot; + struct list_head tx_reqs; + spinlock_t tx_lock; + + struct list_head peers; + u32 npeers; + struct workqueue_struct *rx_wq; + struct work_struct rx_work; +}; + +struct chan_rx_ring { + void *buf[RIOCM_RX_RING_SIZE]; + int head; + int tail; + int count; + + /* Tracking RX buffers reported to upper level */ + void *inuse[RIOCM_RX_RING_SIZE]; + int inuse_cnt; +}; + +struct rio_channel { + u16 id; /* local channel ID */ + struct kref ref; /* channel refcount */ + struct file *filp; + struct cm_dev *cmdev; /* associated CM device object */ + struct rio_dev *rdev; /* remote RapidIO device */ + enum rio_cm_state state; + int error; + spinlock_t lock; + void *context; + u32 loc_destid; /* local destID */ + u32 rem_destid; /* remote destID */ + u16 rem_channel; /* remote channel ID */ + struct list_head accept_queue; + struct list_head ch_node; + struct completion comp; + struct completion comp_close; + struct chan_rx_ring rx_ring; +}; + +struct cm_peer { + struct list_head node; + struct rio_dev *rdev; +}; + +struct rio_cm_work { + struct work_struct work; + struct cm_dev *cm; + void *data; +}; + +struct conn_req { + struct list_head node; + u32 destid; /* requester destID */ + u16 chan; /* requester channel ID */ + struct cm_dev *cmdev; +}; + +/* + * A channel_dev structure represents a CM_CDEV + * @cdev Character device + * @dev Associated device object + */ +struct channel_dev { + struct cdev cdev; + struct device *dev; +}; + +static struct rio_channel *riocm_ch_alloc(u16 ch_num); +static void riocm_ch_free(struct kref *ref); +static int riocm_post_send(struct cm_dev *cm, struct rio_dev *rdev, + void *buffer, size_t len); +static int riocm_ch_close(struct rio_channel *ch); + +static DEFINE_SPINLOCK(idr_lock); +static DEFINE_IDR(ch_idr); + +static LIST_HEAD(cm_dev_list); +static DECLARE_RWSEM(rdev_sem); + +static struct class *dev_class; +static unsigned int dev_major; +static unsigned int dev_minor_base; +static dev_t dev_number; +static struct channel_dev riocm_cdev; + +#define is_msg_capable(src_ops, dst_ops) \ + ((src_ops & RIO_SRC_OPS_DATA_MSG) && \ + (dst_ops & RIO_DST_OPS_DATA_MSG)) +#define dev_cm_capable(dev) \ + is_msg_capable(dev->src_ops, dev->dst_ops) + +static int riocm_cmp(struct rio_channel *ch, enum rio_cm_state cmp) +{ + int ret; + + spin_lock_bh(&ch->lock); + ret = (ch->state == cmp); + spin_unlock_bh(&ch->lock); + return ret; +} + +static int riocm_cmp_exch(struct rio_channel *ch, + enum rio_cm_state cmp, enum rio_cm_state exch) +{ + int ret; + + spin_lock_bh(&ch->lock); + ret = (ch->state == cmp); + if (ret) + ch->state = exch; + spin_unlock_bh(&ch->lock); + return ret; +} + +static enum rio_cm_state riocm_exch(struct rio_channel *ch, + enum rio_cm_state exch) +{ + enum rio_cm_state old; + + spin_lock_bh(&ch->lock); + old = ch->state; + ch->state = exch; + spin_unlock_bh(&ch->lock); + return old; +} + +static struct rio_channel *riocm_get_channel(u16 nr) +{ + struct rio_channel *ch; + + spin_lock_bh(&idr_lock); + ch = idr_find(&ch_idr, nr); + if (ch) + kref_get(&ch->ref); + spin_unlock_bh(&idr_lock); + return ch; +} + +static void riocm_put_channel(struct rio_channel *ch) +{ + kref_put(&ch->ref, riocm_ch_free); +} + +static void *riocm_rx_get_msg(struct cm_dev *cm) +{ + void *msg; + int i; + + msg = rio_get_inb_message(cm->mport, cmbox); + if (msg) { + for (i = 0; i < RIOCM_RX_RING_SIZE; i++) { + if (cm->rx_buf[i] == msg) { + cm->rx_buf[i] = NULL; + cm->rx_slots++; + break; + } + } + + if (i == RIOCM_RX_RING_SIZE) + riocm_warn("no record for buffer 0x%p", msg); + } + + return msg; +} + +/* + * riocm_rx_fill - fills a ring of receive buffers for given cm device + * @cm: cm_dev object + * @nent: max number of entries to fill + * + * Returns: none + */ +static void riocm_rx_fill(struct cm_dev *cm, int nent) +{ + int i; + + if (cm->rx_slots == 0) + return; + + for (i = 0; i < RIOCM_RX_RING_SIZE && cm->rx_slots && nent; i++) { + if (cm->rx_buf[i] == NULL) { + cm->rx_buf[i] = kmalloc(RIO_MAX_MSG_SIZE, GFP_KERNEL); + if (cm->rx_buf[i] == NULL) + break; + rio_add_inb_buffer(cm->mport, cmbox, cm->rx_buf[i]); + cm->rx_slots--; + nent--; + } + } +} + +/* + * riocm_rx_free - frees all receive buffers associated with given cm device + * @cm: cm_dev object + * + * Returns: none + */ +static void riocm_rx_free(struct cm_dev *cm) +{ + int i; + + for (i = 0; i < RIOCM_RX_RING_SIZE; i++) { + if (cm->rx_buf[i] != NULL) { + kfree(cm->rx_buf[i]); + cm->rx_buf[i] = NULL; + } + } +} + +/* + * riocm_req_handler - connection request handler + * @cm: cm_dev object + * @req_data: pointer to the request packet + * + * Returns: 0 if success, or + * -EINVAL if channel is not in correct state, + * -ENODEV if cannot find a channel with specified ID, + * -ENOMEM if unable to allocate memory to store the request + */ +static int riocm_req_handler(struct cm_dev *cm, void *req_data) +{ + struct rio_channel *ch; + struct conn_req *req; + struct rio_ch_chan_hdr *hh = req_data; + u16 chnum; + + chnum = ntohs(hh->dst_ch); + + ch = riocm_get_channel(chnum); + + if (!ch) + return -ENODEV; + + if (ch->state != RIO_CM_LISTEN) { + riocm_debug(RX_CMD, "channel %d is not in listen state", chnum); + riocm_put_channel(ch); + return -EINVAL; + } + + req = kzalloc(sizeof(*req), GFP_KERNEL); + if (!req) { + riocm_put_channel(ch); + return -ENOMEM; + } + + req->destid = ntohl(hh->bhdr.src_id); + req->chan = ntohs(hh->src_ch); + req->cmdev = cm; + + spin_lock_bh(&ch->lock); + list_add_tail(&req->node, &ch->accept_queue); + spin_unlock_bh(&ch->lock); + complete(&ch->comp); + riocm_put_channel(ch); + + return 0; +} + +/* + * riocm_resp_handler - response to connection request handler + * @resp_data: pointer to the response packet + * + * Returns: 0 if success, or + * -EINVAL if channel is not in correct state, + * -ENODEV if cannot find a channel with specified ID, + */ +static int riocm_resp_handler(void *resp_data) +{ + struct rio_channel *ch; + struct rio_ch_chan_hdr *hh = resp_data; + u16 chnum; + + chnum = ntohs(hh->dst_ch); + ch = riocm_get_channel(chnum); + if (!ch) + return -ENODEV; + + if (ch->state != RIO_CM_CONNECT) { + riocm_put_channel(ch); + return -EINVAL; + } + + riocm_exch(ch, RIO_CM_CONNECTED); + ch->rem_channel = ntohs(hh->src_ch); + complete(&ch->comp); + riocm_put_channel(ch); + + return 0; +} + +/* + * riocm_close_handler - channel close request handler + * @req_data: pointer to the request packet + * + * Returns: 0 if success, or + * -ENODEV if cannot find a channel with specified ID, + * + error codes returned by riocm_ch_close. + */ +static int riocm_close_handler(void *data) +{ + struct rio_channel *ch; + struct rio_ch_chan_hdr *hh = data; + int ret; + + riocm_debug(RX_CMD, "for ch=%d", ntohs(hh->dst_ch)); + + spin_lock_bh(&idr_lock); + ch = idr_find(&ch_idr, ntohs(hh->dst_ch)); + if (!ch) { + spin_unlock_bh(&idr_lock); + return -ENODEV; + } + idr_remove(&ch_idr, ch->id); + spin_unlock_bh(&idr_lock); + + riocm_exch(ch, RIO_CM_DISCONNECT); + + ret = riocm_ch_close(ch); + if (ret) + riocm_debug(RX_CMD, "riocm_ch_close() returned %d", ret); + + return 0; +} + +/* + * rio_cm_handler - function that services request (non-data) packets + * @cm: cm_dev object + * @data: pointer to the packet + */ +static void rio_cm_handler(struct cm_dev *cm, void *data) +{ + struct rio_ch_chan_hdr *hdr; + + if (!rio_mport_is_running(cm->mport)) + goto out; + + hdr = data; + + riocm_debug(RX_CMD, "OP=%x for ch=%d from %d", + hdr->ch_op, ntohs(hdr->dst_ch), ntohs(hdr->src_ch)); + + switch (hdr->ch_op) { + case CM_CONN_REQ: + riocm_req_handler(cm, data); + break; + case CM_CONN_ACK: + riocm_resp_handler(data); + break; + case CM_CONN_CLOSE: + riocm_close_handler(data); + break; + default: + riocm_error("Invalid packet header"); + break; + } +out: + kfree(data); +} + +/* + * rio_rx_data_handler - received data packet handler + * @cm: cm_dev object + * @buf: data packet + * + * Returns: 0 if success, or + * -ENODEV if cannot find a channel with specified ID, + * -EIO if channel is not in CONNECTED state, + * -ENOMEM if channel RX queue is full (packet discarded) + */ +static int rio_rx_data_handler(struct cm_dev *cm, void *buf) +{ + struct rio_ch_chan_hdr *hdr; + struct rio_channel *ch; + + hdr = buf; + + riocm_debug(RX_DATA, "for ch=%d", ntohs(hdr->dst_ch)); + + ch = riocm_get_channel(ntohs(hdr->dst_ch)); + if (!ch) { + /* Discard data message for non-existing channel */ + kfree(buf); + return -ENODEV; + } + + /* Place pointer to the buffer into channel's RX queue */ + spin_lock(&ch->lock); + + if (ch->state != RIO_CM_CONNECTED) { + /* Channel is not ready to receive data, discard a packet */ + riocm_debug(RX_DATA, "ch=%d is in wrong state=%d", + ch->id, ch->state); + spin_unlock(&ch->lock); + kfree(buf); + riocm_put_channel(ch); + return -EIO; + } + + if (ch->rx_ring.count == RIOCM_RX_RING_SIZE) { + /* If RX ring is full, discard a packet */ + riocm_debug(RX_DATA, "ch=%d is full", ch->id); + spin_unlock(&ch->lock); + kfree(buf); + riocm_put_channel(ch); + return -ENOMEM; + } + + ch->rx_ring.buf[ch->rx_ring.head] = buf; + ch->rx_ring.head++; + ch->rx_ring.count++; + ch->rx_ring.head %= RIOCM_RX_RING_SIZE; + + complete(&ch->comp); + + spin_unlock(&ch->lock); + riocm_put_channel(ch); + + return 0; +} + +/* + * rio_ibmsg_handler - inbound message packet handler + */ +static void rio_ibmsg_handler(struct work_struct *work) +{ + struct cm_dev *cm = container_of(work, struct cm_dev, rx_work); + void *data; + struct rio_ch_chan_hdr *hdr; + + if (!rio_mport_is_running(cm->mport)) + return; + + while (1) { + mutex_lock(&cm->rx_lock); + data = riocm_rx_get_msg(cm); + if (data) + riocm_rx_fill(cm, 1); + mutex_unlock(&cm->rx_lock); + + if (data == NULL) + break; + + hdr = data; + + if (hdr->bhdr.type != RIO_CM_CHAN) { + /* For now simply discard packets other than channel */ + riocm_error("Unsupported TYPE code (0x%x). Msg dropped", + hdr->bhdr.type); + kfree(data); + continue; + } + + /* Process a channel message */ + if (hdr->ch_op == CM_DATA_MSG) + rio_rx_data_handler(cm, data); + else + rio_cm_handler(cm, data); + } +} + +static void riocm_inb_msg_event(struct rio_mport *mport, void *dev_id, + int mbox, int slot) +{ + struct cm_dev *cm = dev_id; + + if (rio_mport_is_running(cm->mport) && !work_pending(&cm->rx_work)) + queue_work(cm->rx_wq, &cm->rx_work); +} + +/* + * rio_txcq_handler - TX completion handler + * @cm: cm_dev object + * @slot: TX queue slot + * + * TX completion handler also ensures that pending request packets are placed + * into transmit queue as soon as a free slot becomes available. This is done + * to give higher priority to request packets during high intensity data flow. + */ +static void rio_txcq_handler(struct cm_dev *cm, int slot) +{ + int ack_slot; + + /* ATTN: Add TX completion notification if/when direct buffer + * transfer is implemented. At this moment only correct tracking + * of tx_count is important. + */ + riocm_debug(TX_EVENT, "for mport_%d slot %d tx_cnt %d", + cm->mport->id, slot, cm->tx_cnt); + + spin_lock(&cm->tx_lock); + ack_slot = cm->tx_ack_slot; + + if (ack_slot == slot) + riocm_debug(TX_EVENT, "slot == ack_slot"); + + while (cm->tx_cnt && ((ack_slot != slot) || + (cm->tx_cnt == RIOCM_TX_RING_SIZE))) { + + cm->tx_buf[ack_slot] = NULL; + ++ack_slot; + ack_slot &= (RIOCM_TX_RING_SIZE - 1); + cm->tx_cnt--; + } + + if (cm->tx_cnt < 0 || cm->tx_cnt > RIOCM_TX_RING_SIZE) + riocm_error("tx_cnt %d out of sync", cm->tx_cnt); + + WARN_ON((cm->tx_cnt < 0) || (cm->tx_cnt > RIOCM_TX_RING_SIZE)); + + cm->tx_ack_slot = ack_slot; + + /* + * If there are pending requests, insert them into transmit queue + */ + if (!list_empty(&cm->tx_reqs) && (cm->tx_cnt < RIOCM_TX_RING_SIZE)) { + struct tx_req *req, *_req; + int rc; + + list_for_each_entry_safe(req, _req, &cm->tx_reqs, node) { + list_del(&req->node); + cm->tx_buf[cm->tx_slot] = req->buffer; + rc = rio_add_outb_message(cm->mport, req->rdev, cmbox, + req->buffer, req->len); + kfree(req->buffer); + kfree(req); + + ++cm->tx_cnt; + ++cm->tx_slot; + cm->tx_slot &= (RIOCM_TX_RING_SIZE - 1); + if (cm->tx_cnt == RIOCM_TX_RING_SIZE) + break; + } + } + + spin_unlock(&cm->tx_lock); +} + +static void riocm_outb_msg_event(struct rio_mport *mport, void *dev_id, + int mbox, int slot) +{ + struct cm_dev *cm = dev_id; + + if (cm && rio_mport_is_running(cm->mport)) + rio_txcq_handler(cm, slot); +} + +static int riocm_queue_req(struct cm_dev *cm, struct rio_dev *rdev, + void *buffer, size_t len) +{ + unsigned long flags; + struct tx_req *treq; + + treq = kzalloc(sizeof(*treq), GFP_KERNEL); + if (treq == NULL) + return -ENOMEM; + + treq->rdev = rdev; + treq->buffer = buffer; + treq->len = len; + + spin_lock_irqsave(&cm->tx_lock, flags); + list_add_tail(&treq->node, &cm->tx_reqs); + spin_unlock_irqrestore(&cm->tx_lock, flags); + return 0; +} + +/* + * riocm_post_send - helper function that places packet into msg TX queue + * @cm: cm_dev object + * @rdev: target RapidIO device object (required by outbound msg interface) + * @buffer: pointer to a packet buffer to send + * @len: length of data to transfer + * @req: request priority flag + * + * Returns: 0 if success, or error code otherwise. + */ +static int riocm_post_send(struct cm_dev *cm, struct rio_dev *rdev, + void *buffer, size_t len) +{ + int rc; + unsigned long flags; + + spin_lock_irqsave(&cm->tx_lock, flags); + + if (cm->mport == NULL) { + rc = -ENODEV; + goto err_out; + } + + if (cm->tx_cnt == RIOCM_TX_RING_SIZE) { + riocm_debug(TX, "Tx Queue is full"); + rc = -EBUSY; + goto err_out; + } + + cm->tx_buf[cm->tx_slot] = buffer; + rc = rio_add_outb_message(cm->mport, rdev, cmbox, buffer, len); + + riocm_debug(TX, "Add buf@%p destid=%x tx_slot=%d tx_cnt=%d", + buffer, rdev->destid, cm->tx_slot, cm->tx_cnt); + + ++cm->tx_cnt; + ++cm->tx_slot; + cm->tx_slot &= (RIOCM_TX_RING_SIZE - 1); + +err_out: + spin_unlock_irqrestore(&cm->tx_lock, flags); + return rc; +} + +/* + * riocm_ch_send - sends a data packet to a remote device + * @ch_id: local channel ID + * @buf: pointer to a data buffer to send (including CM header) + * @len: length of data to transfer (including CM header) + * + * ATTN: ASSUMES THAT THE HEADER SPACE IS RESERVED PART OF THE DATA PACKET + * + * Returns: 0 if success, or + * -EINVAL if one or more input parameters is/are not valid, + * -ENODEV if cannot find a channel with specified ID, + * -EAGAIN if a channel is not in CONNECTED state, + * + error codes returned by HW send routine. + */ +static int riocm_ch_send(u16 ch_id, void *buf, int len) +{ + struct rio_channel *ch; + struct rio_ch_chan_hdr *hdr; + int ret; + + if (buf == NULL || ch_id == 0 || len == 0 || len > RIO_MAX_MSG_SIZE) + return -EINVAL; + + ch = riocm_get_channel(ch_id); + if (!ch) { + riocm_error("%s(%d) ch_%d not found", current->comm, + task_pid_nr(current), ch_id); + return -ENODEV; + } + + if (!riocm_cmp(ch, RIO_CM_CONNECTED)) { + ret = -EAGAIN; + goto err_out; + } + + /* + * Fill buffer header section with corresponding channel data + */ + hdr = buf; + + hdr->bhdr.src_id = htonl(ch->loc_destid); + hdr->bhdr.dst_id = htonl(ch->rem_destid); + hdr->bhdr.src_mbox = cmbox; + hdr->bhdr.dst_mbox = cmbox; + hdr->bhdr.type = RIO_CM_CHAN; + hdr->ch_op = CM_DATA_MSG; + hdr->dst_ch = htons(ch->rem_channel); + hdr->src_ch = htons(ch->id); + hdr->msg_len = htons((u16)len); + + /* ATTN: the function call below relies on the fact that underlying + * HW-specific add_outb_message() routine copies TX data into its own + * internal transfer buffer (true for all RIONET compatible mport + * drivers). Must be reviewed if mport driver uses the buffer directly. + */ + + ret = riocm_post_send(ch->cmdev, ch->rdev, buf, len); + if (ret) + riocm_debug(TX, "ch %d send_err=%d", ch->id, ret); +err_out: + riocm_put_channel(ch); + return ret; +} + +static int riocm_ch_free_rxbuf(struct rio_channel *ch, void *buf) +{ + int i, ret = -EINVAL; + + spin_lock_bh(&ch->lock); + + for (i = 0; i < RIOCM_RX_RING_SIZE; i++) { + if (ch->rx_ring.inuse[i] == buf) { + ch->rx_ring.inuse[i] = NULL; + ch->rx_ring.inuse_cnt--; + ret = 0; + break; + } + } + + spin_unlock_bh(&ch->lock); + + if (!ret) + kfree(buf); + + return ret; +} + +/* + * riocm_ch_receive - fetch a data packet received for the specified channel + * @ch: local channel ID + * @buf: pointer to a packet buffer + * @timeout: timeout to wait for incoming packet (in jiffies) + * + * Returns: 0 and valid buffer pointer if success, or NULL pointer and one of: + * -EAGAIN if a channel is not in CONNECTED state, + * -ENOMEM if in-use tracking queue is full, + * -ETIME if wait timeout expired, + * -EINTR if wait was interrupted. + */ +static int riocm_ch_receive(struct rio_channel *ch, void **buf, long timeout) +{ + void *rxmsg = NULL; + int i, ret = 0; + long wret; + + if (!riocm_cmp(ch, RIO_CM_CONNECTED)) { + ret = -EAGAIN; + goto out; + } + + if (ch->rx_ring.inuse_cnt == RIOCM_RX_RING_SIZE) { + /* If we do not have entries to track buffers given to upper + * layer, reject request. + */ + ret = -ENOMEM; + goto out; + } + + wret = wait_for_completion_interruptible_timeout(&ch->comp, timeout); + + riocm_debug(WAIT, "wait on %d returned %ld", ch->id, wret); + + if (!wret) + ret = -ETIME; + else if (wret == -ERESTARTSYS) + ret = -EINTR; + else + ret = riocm_cmp(ch, RIO_CM_CONNECTED) ? 0 : -ECONNRESET; + + if (ret) + goto out; + + spin_lock_bh(&ch->lock); + + rxmsg = ch->rx_ring.buf[ch->rx_ring.tail]; + ch->rx_ring.buf[ch->rx_ring.tail] = NULL; + ch->rx_ring.count--; + ch->rx_ring.tail++; + ch->rx_ring.tail %= RIOCM_RX_RING_SIZE; + ret = -ENOMEM; + + for (i = 0; i < RIOCM_RX_RING_SIZE; i++) { + if (ch->rx_ring.inuse[i] == NULL) { + ch->rx_ring.inuse[i] = rxmsg; + ch->rx_ring.inuse_cnt++; + ret = 0; + break; + } + } + + if (ret) { + /* We have no entry to store pending message: drop it */ + kfree(rxmsg); + rxmsg = NULL; + } + + spin_unlock_bh(&ch->lock); +out: + *buf = rxmsg; + return ret; +} + +/* + * riocm_ch_connect - sends a connect request to a remote device + * @loc_ch: local channel ID + * @cm: CM device to send connect request + * @peer: target RapidIO device + * @rem_ch: remote channel ID + * + * Returns: 0 if success, or + * -EINVAL if the channel is not in IDLE state, + * -EAGAIN if no connection request available immediately, + * -ETIME if ACK response timeout expired, + * -EINTR if wait for response was interrupted. + */ +static int riocm_ch_connect(u16 loc_ch, struct cm_dev *cm, + struct cm_peer *peer, u16 rem_ch) +{ + struct rio_channel *ch = NULL; + struct rio_ch_chan_hdr *hdr; + int ret; + long wret; + + ch = riocm_get_channel(loc_ch); + if (!ch) + return -ENODEV; + + if (!riocm_cmp_exch(ch, RIO_CM_IDLE, RIO_CM_CONNECT)) { + ret = -EINVAL; + goto conn_done; + } + + ch->cmdev = cm; + ch->rdev = peer->rdev; + ch->context = NULL; + ch->loc_destid = cm->mport->host_deviceid; + ch->rem_channel = rem_ch; + + /* + * Send connect request to the remote RapidIO device + */ + + hdr = kzalloc(sizeof(*hdr), GFP_KERNEL); + if (hdr == NULL) { + ret = -ENOMEM; + goto conn_done; + } + + hdr->bhdr.src_id = htonl(ch->loc_destid); + hdr->bhdr.dst_id = htonl(peer->rdev->destid); + hdr->bhdr.src_mbox = cmbox; + hdr->bhdr.dst_mbox = cmbox; + hdr->bhdr.type = RIO_CM_CHAN; + hdr->ch_op = CM_CONN_REQ; + hdr->dst_ch = htons(rem_ch); + hdr->src_ch = htons(loc_ch); + + /* ATTN: the function call below relies on the fact that underlying + * HW-specific add_outb_message() routine copies TX data into its + * internal transfer buffer. Must be reviewed if mport driver uses + * this buffer directly. + */ + ret = riocm_post_send(cm, peer->rdev, hdr, sizeof(*hdr)); + + if (ret != -EBUSY) { + kfree(hdr); + } else { + ret = riocm_queue_req(cm, peer->rdev, hdr, sizeof(*hdr)); + if (ret) + kfree(hdr); + } + + if (ret) { + riocm_cmp_exch(ch, RIO_CM_CONNECT, RIO_CM_IDLE); + goto conn_done; + } + + /* Wait for connect response from the remote device */ + wret = wait_for_completion_interruptible_timeout(&ch->comp, + RIOCM_CONNECT_TO * HZ); + riocm_debug(WAIT, "wait on %d returns %ld", ch->id, wret); + + if (!wret) + ret = -ETIME; + else if (wret == -ERESTARTSYS) + ret = -EINTR; + else + ret = riocm_cmp(ch, RIO_CM_CONNECTED) ? 0 : -1; + +conn_done: + riocm_put_channel(ch); + return ret; +} + +static int riocm_send_ack(struct rio_channel *ch) +{ + struct rio_ch_chan_hdr *hdr; + int ret; + + hdr = kzalloc(sizeof(*hdr), GFP_KERNEL); + if (hdr == NULL) + return -ENOMEM; + + hdr->bhdr.src_id = htonl(ch->loc_destid); + hdr->bhdr.dst_id = htonl(ch->rem_destid); + hdr->dst_ch = htons(ch->rem_channel); + hdr->src_ch = htons(ch->id); + hdr->bhdr.src_mbox = cmbox; + hdr->bhdr.dst_mbox = cmbox; + hdr->bhdr.type = RIO_CM_CHAN; + hdr->ch_op = CM_CONN_ACK; + + /* ATTN: the function call below relies on the fact that underlying + * add_outb_message() routine copies TX data into its internal transfer + * buffer. Review if switching to direct buffer version. + */ + ret = riocm_post_send(ch->cmdev, ch->rdev, hdr, sizeof(*hdr)); + + if (ret == -EBUSY && !riocm_queue_req(ch->cmdev, + ch->rdev, hdr, sizeof(*hdr))) + return 0; + kfree(hdr); + + if (ret) + riocm_error("send ACK to ch_%d on %s failed (ret=%d)", + ch->id, rio_name(ch->rdev), ret); + return ret; +} + +/* + * riocm_ch_accept - accept incoming connection request + * @ch_id: channel ID + * @new_ch_id: local mport device + * @timeout: wait timeout (if 0 non-blocking call, do not wait if connection + * request is not available). + * + * Returns: pointer to new channel struct if success, or error-valued pointer: + * -ENODEV - cannot find specified channel or mport, + * -EINVAL - the channel is not in IDLE state, + * -EAGAIN - no connection request available immediately (timeout=0), + * -ENOMEM - unable to allocate new channel, + * -ETIME - wait timeout expired, + * -EINTR - wait was interrupted. + */ +static struct rio_channel *riocm_ch_accept(u16 ch_id, u16 *new_ch_id, + long timeout) +{ + struct rio_channel *ch = NULL; + struct rio_channel *new_ch = NULL; + struct conn_req *req; + struct cm_peer *peer; + int found = 0; + int err = 0; + long wret; + + ch = riocm_get_channel(ch_id); + if (!ch) + return ERR_PTR(-EINVAL); + + if (!riocm_cmp(ch, RIO_CM_LISTEN)) { + err = -EINVAL; + goto err_put; + } + + /* Don't sleep if this is a non blocking call */ + if (!timeout) { + if (!try_wait_for_completion(&ch->comp)) { + err = -EAGAIN; + goto err_put; + } + } else { + riocm_debug(WAIT, "on %d", ch->id); + + wret = wait_for_completion_interruptible_timeout(&ch->comp, + timeout); + if (!wret) { + err = -ETIME; + goto err_put; + } else if (wret == -ERESTARTSYS) { + err = -EINTR; + goto err_put; + } + } + + spin_lock_bh(&ch->lock); + + if (ch->state != RIO_CM_LISTEN) { + err = -ECANCELED; + } else if (list_empty(&ch->accept_queue)) { + riocm_debug(WAIT, "on %d accept_queue is empty on completion", + ch->id); + err = -EIO; + } + + spin_unlock_bh(&ch->lock); + + if (err) { + riocm_debug(WAIT, "on %d returns %d", ch->id, err); + goto err_put; + } + + /* Create new channel for this connection */ + new_ch = riocm_ch_alloc(RIOCM_CHNUM_AUTO); + + if (IS_ERR(new_ch)) { + riocm_error("failed to get channel for new req (%ld)", + PTR_ERR(new_ch)); + err = -ENOMEM; + goto err_put; + } + + spin_lock_bh(&ch->lock); + + req = list_first_entry(&ch->accept_queue, struct conn_req, node); + list_del(&req->node); + new_ch->cmdev = ch->cmdev; + new_ch->loc_destid = ch->loc_destid; + new_ch->rem_destid = req->destid; + new_ch->rem_channel = req->chan; + + spin_unlock_bh(&ch->lock); + riocm_put_channel(ch); + kfree(req); + + down_read(&rdev_sem); + /* Find requester's device object */ + list_for_each_entry(peer, &new_ch->cmdev->peers, node) { + if (peer->rdev->destid == new_ch->rem_destid) { + riocm_debug(RX_CMD, "found matching device(%s)", + rio_name(peer->rdev)); + found = 1; + break; + } + } + up_read(&rdev_sem); + + if (!found) { + /* If peer device object not found, simply ignore the request */ + err = -ENODEV; + goto err_nodev; + } + + new_ch->rdev = peer->rdev; + new_ch->state = RIO_CM_CONNECTED; + spin_lock_init(&new_ch->lock); + + /* Acknowledge the connection request. */ + riocm_send_ack(new_ch); + + *new_ch_id = new_ch->id; + return new_ch; +err_put: + riocm_put_channel(ch); +err_nodev: + if (new_ch) { + spin_lock_bh(&idr_lock); + idr_remove(&ch_idr, new_ch->id); + spin_unlock_bh(&idr_lock); + riocm_put_channel(new_ch); + } + *new_ch_id = 0; + return ERR_PTR(err); +} + +/* + * riocm_ch_listen - puts a channel into LISTEN state + * @ch_id: channel ID + * + * Returns: 0 if success, or + * -EINVAL if the specified channel does not exists or + * is not in CHAN_BOUND state. + */ +static int riocm_ch_listen(u16 ch_id) +{ + struct rio_channel *ch = NULL; + int ret = 0; + + riocm_debug(CHOP, "(ch_%d)", ch_id); + + ch = riocm_get_channel(ch_id); + if (!ch || !riocm_cmp_exch(ch, RIO_CM_CHAN_BOUND, RIO_CM_LISTEN)) + ret = -EINVAL; + riocm_put_channel(ch); + return ret; +} + +/* + * riocm_ch_bind - associate a channel object and an mport device + * @ch_id: channel ID + * @mport_id: local mport device ID + * @context: pointer to the additional caller's context + * + * Returns: 0 if success, or + * -ENODEV if cannot find specified mport, + * -EINVAL if the specified channel does not exist or + * is not in IDLE state. + */ +static int riocm_ch_bind(u16 ch_id, u8 mport_id, void *context) +{ + struct rio_channel *ch = NULL; + struct cm_dev *cm; + int rc = -ENODEV; + + riocm_debug(CHOP, "ch_%d to mport_%d", ch_id, mport_id); + + /* Find matching cm_dev object */ + down_read(&rdev_sem); + list_for_each_entry(cm, &cm_dev_list, list) { + if ((cm->mport->id == mport_id) && + rio_mport_is_running(cm->mport)) { + rc = 0; + break; + } + } + + if (rc) + goto exit; + + ch = riocm_get_channel(ch_id); + if (!ch) { + rc = -EINVAL; + goto exit; + } + + spin_lock_bh(&ch->lock); + if (ch->state != RIO_CM_IDLE) { + spin_unlock_bh(&ch->lock); + rc = -EINVAL; + goto err_put; + } + + ch->cmdev = cm; + ch->loc_destid = cm->mport->host_deviceid; + ch->context = context; + ch->state = RIO_CM_CHAN_BOUND; + spin_unlock_bh(&ch->lock); +err_put: + riocm_put_channel(ch); +exit: + up_read(&rdev_sem); + return rc; +} + +/* + * riocm_ch_alloc - channel object allocation helper routine + * @ch_num: channel ID (1 ... RIOCM_MAX_CHNUM, 0 = automatic) + * + * Return value: pointer to newly created channel object, + * or error-valued pointer + */ +static struct rio_channel *riocm_ch_alloc(u16 ch_num) +{ + int id; + int start, end; + struct rio_channel *ch; + + ch = kzalloc(sizeof(*ch), GFP_KERNEL); + if (!ch) + return ERR_PTR(-ENOMEM); + + if (ch_num) { + /* If requested, try to obtain the specified channel ID */ + start = ch_num; + end = ch_num + 1; + } else { + /* Obtain channel ID from the dynamic allocation range */ + start = chstart; + end = RIOCM_MAX_CHNUM + 1; + } + + idr_preload(GFP_KERNEL); + spin_lock_bh(&idr_lock); + id = idr_alloc_cyclic(&ch_idr, ch, start, end, GFP_NOWAIT); + spin_unlock_bh(&idr_lock); + idr_preload_end(); + + if (id < 0) { + kfree(ch); + return ERR_PTR(id == -ENOSPC ? -EBUSY : id); + } + + ch->id = (u16)id; + ch->state = RIO_CM_IDLE; + spin_lock_init(&ch->lock); + INIT_LIST_HEAD(&ch->accept_queue); + INIT_LIST_HEAD(&ch->ch_node); + init_completion(&ch->comp); + init_completion(&ch->comp_close); + kref_init(&ch->ref); + ch->rx_ring.head = 0; + ch->rx_ring.tail = 0; + ch->rx_ring.count = 0; + ch->rx_ring.inuse_cnt = 0; + + return ch; +} + +/* + * riocm_ch_create - creates a new channel object and allocates ID for it + * @ch_num: channel ID (1 ... RIOCM_MAX_CHNUM, 0 = automatic) + * + * Allocates and initializes a new channel object. If the parameter ch_num > 0 + * and is within the valid range, riocm_ch_create tries to allocate the + * specified ID for the new channel. If ch_num = 0, channel ID will be assigned + * automatically from the range (chstart ... RIOCM_MAX_CHNUM). + * Module parameter 'chstart' defines start of an ID range available for dynamic + * allocation. Range below 'chstart' is reserved for pre-defined ID numbers. + * Available channel numbers are limited by 16-bit size of channel numbers used + * in the packet header. + * + * Return value: PTR to rio_channel structure if successful (with channel number + * updated via pointer) or error-valued pointer if error. + */ +static struct rio_channel *riocm_ch_create(u16 *ch_num) +{ + struct rio_channel *ch = NULL; + + ch = riocm_ch_alloc(*ch_num); + + if (IS_ERR(ch)) + riocm_debug(CHOP, "Failed to allocate channel %d (err=%ld)", + *ch_num, PTR_ERR(ch)); + else + *ch_num = ch->id; + + return ch; +} + +/* + * riocm_ch_free - channel object release routine + * @ref: pointer to a channel's kref structure + */ +static void riocm_ch_free(struct kref *ref) +{ + struct rio_channel *ch = container_of(ref, struct rio_channel, ref); + int i; + + riocm_debug(CHOP, "(ch_%d)", ch->id); + + if (ch->rx_ring.inuse_cnt) { + for (i = 0; + i < RIOCM_RX_RING_SIZE && ch->rx_ring.inuse_cnt; i++) { + if (ch->rx_ring.inuse[i] != NULL) { + kfree(ch->rx_ring.inuse[i]); + ch->rx_ring.inuse_cnt--; + } + } + } + + if (ch->rx_ring.count) + for (i = 0; i < RIOCM_RX_RING_SIZE && ch->rx_ring.count; i++) { + if (ch->rx_ring.buf[i] != NULL) { + kfree(ch->rx_ring.buf[i]); + ch->rx_ring.count--; + } + } + + complete(&ch->comp_close); +} + +static int riocm_send_close(struct rio_channel *ch) +{ + struct rio_ch_chan_hdr *hdr; + int ret; + + /* + * Send CH_CLOSE notification to the remote RapidIO device + */ + + hdr = kzalloc(sizeof(*hdr), GFP_KERNEL); + if (hdr == NULL) + return -ENOMEM; + + hdr->bhdr.src_id = htonl(ch->loc_destid); + hdr->bhdr.dst_id = htonl(ch->rem_destid); + hdr->bhdr.src_mbox = cmbox; + hdr->bhdr.dst_mbox = cmbox; + hdr->bhdr.type = RIO_CM_CHAN; + hdr->ch_op = CM_CONN_CLOSE; + hdr->dst_ch = htons(ch->rem_channel); + hdr->src_ch = htons(ch->id); + + /* ATTN: the function call below relies on the fact that underlying + * add_outb_message() routine copies TX data into its internal transfer + * buffer. Needs to be reviewed if switched to direct buffer mode. + */ + ret = riocm_post_send(ch->cmdev, ch->rdev, hdr, sizeof(*hdr)); + + if (ret == -EBUSY && !riocm_queue_req(ch->cmdev, ch->rdev, + hdr, sizeof(*hdr))) + return 0; + kfree(hdr); + + if (ret) + riocm_error("ch(%d) send CLOSE failed (ret=%d)", ch->id, ret); + + return ret; +} + +/* + * riocm_ch_close - closes a channel object with specified ID (by local request) + * @ch: channel to be closed + */ +static int riocm_ch_close(struct rio_channel *ch) +{ + unsigned long tmo = msecs_to_jiffies(3000); + enum rio_cm_state state; + long wret; + int ret = 0; + + riocm_debug(CHOP, "ch_%d by %s(%d)", + ch->id, current->comm, task_pid_nr(current)); + + state = riocm_exch(ch, RIO_CM_DESTROYING); + if (state == RIO_CM_CONNECTED) + riocm_send_close(ch); + + complete_all(&ch->comp); + + riocm_put_channel(ch); + wret = wait_for_completion_interruptible_timeout(&ch->comp_close, tmo); + + riocm_debug(WAIT, "wait on %d returns %ld", ch->id, wret); + + if (wret == 0) { + /* Timeout on wait occurred */ + riocm_debug(CHOP, "%s(%d) timed out waiting for ch %d", + current->comm, task_pid_nr(current), ch->id); + ret = -ETIMEDOUT; + } else if (wret == -ERESTARTSYS) { + /* Wait_for_completion was interrupted by a signal */ + riocm_debug(CHOP, "%s(%d) wait for ch %d was interrupted", + current->comm, task_pid_nr(current), ch->id); + ret = -EINTR; + } + + if (!ret) { + riocm_debug(CHOP, "ch_%d resources released", ch->id); + kfree(ch); + } else { + riocm_debug(CHOP, "failed to release ch_%d resources", ch->id); + } + + return ret; +} + +/* + * riocm_cdev_open() - Open character device + */ +static int riocm_cdev_open(struct inode *inode, struct file *filp) +{ + riocm_debug(INIT, "by %s(%d) filp=%p ", + current->comm, task_pid_nr(current), filp); + + if (list_empty(&cm_dev_list)) + return -ENODEV; + + return 0; +} + +/* + * riocm_cdev_release() - Release character device + */ +static int riocm_cdev_release(struct inode *inode, struct file *filp) +{ + struct rio_channel *ch, *_c; + unsigned int i; + LIST_HEAD(list); + + riocm_debug(EXIT, "by %s(%d) filp=%p", + current->comm, task_pid_nr(current), filp); + + /* Check if there are channels associated with this file descriptor */ + spin_lock_bh(&idr_lock); + idr_for_each_entry(&ch_idr, ch, i) { + if (ch && ch->filp == filp) { + riocm_debug(EXIT, "ch_%d not released by %s(%d)", + ch->id, current->comm, + task_pid_nr(current)); + idr_remove(&ch_idr, ch->id); + list_add(&ch->ch_node, &list); + } + } + spin_unlock_bh(&idr_lock); + + if (!list_empty(&list)) { + list_for_each_entry_safe(ch, _c, &list, ch_node) { + list_del(&ch->ch_node); + riocm_ch_close(ch); + } + } + + return 0; +} + +/* + * cm_ep_get_list_size() - Reports number of endpoints in the network + */ +static int cm_ep_get_list_size(void __user *arg) +{ + u32 __user *p = arg; + u32 mport_id; + u32 count = 0; + struct cm_dev *cm; + + if (get_user(mport_id, p)) + return -EFAULT; + if (mport_id >= RIO_MAX_MPORTS) + return -EINVAL; + + /* Find a matching cm_dev object */ + down_read(&rdev_sem); + list_for_each_entry(cm, &cm_dev_list, list) { + if (cm->mport->id == mport_id) { + count = cm->npeers; + up_read(&rdev_sem); + if (copy_to_user(arg, &count, sizeof(u32))) + return -EFAULT; + return 0; + } + } + up_read(&rdev_sem); + + return -ENODEV; +} + +/* + * cm_ep_get_list() - Returns list of attached endpoints + */ +static int cm_ep_get_list(void __user *arg) +{ + struct cm_dev *cm; + struct cm_peer *peer; + u32 info[2]; + void *buf; + u32 nent; + u32 *entry_ptr; + u32 i = 0; + int ret = 0; + + if (copy_from_user(&info, arg, sizeof(info))) + return -EFAULT; + + if (info[1] >= RIO_MAX_MPORTS || info[0] > RIOCM_MAX_EP_COUNT) + return -EINVAL; + + /* Find a matching cm_dev object */ + down_read(&rdev_sem); + list_for_each_entry(cm, &cm_dev_list, list) + if (cm->mport->id == (u8)info[1]) + goto found; + + up_read(&rdev_sem); + return -ENODEV; + +found: + nent = min(info[0], cm->npeers); + buf = kcalloc(nent + 2, sizeof(u32), GFP_KERNEL); + if (!buf) { + up_read(&rdev_sem); + return -ENOMEM; + } + + entry_ptr = (u32 *)((uintptr_t)buf + 2*sizeof(u32)); + + list_for_each_entry(peer, &cm->peers, node) { + *entry_ptr = (u32)peer->rdev->destid; + entry_ptr++; + if (++i == nent) + break; + } + up_read(&rdev_sem); + + ((u32 *)buf)[0] = i; /* report an updated number of entries */ + ((u32 *)buf)[1] = info[1]; /* put back an mport ID */ + if (copy_to_user(arg, buf, sizeof(u32) * (info[0] + 2))) + ret = -EFAULT; + + kfree(buf); + return ret; +} + +/* + * cm_mport_get_list() - Returns list of available local mport devices + */ +static int cm_mport_get_list(void __user *arg) +{ + int ret = 0; + u32 entries; + void *buf; + struct cm_dev *cm; + u32 *entry_ptr; + int count = 0; + + if (copy_from_user(&entries, arg, sizeof(entries))) + return -EFAULT; + if (entries == 0 || entries > RIO_MAX_MPORTS) + return -EINVAL; + buf = kcalloc(entries + 1, sizeof(u32), GFP_KERNEL); + if (!buf) + return -ENOMEM; + + /* Scan all registered cm_dev objects */ + entry_ptr = (u32 *)((uintptr_t)buf + sizeof(u32)); + down_read(&rdev_sem); + list_for_each_entry(cm, &cm_dev_list, list) { + if (count++ < entries) { + *entry_ptr = (cm->mport->id << 16) | + cm->mport->host_deviceid; + entry_ptr++; + } + } + up_read(&rdev_sem); + + *((u32 *)buf) = count; /* report a real number of entries */ + if (copy_to_user(arg, buf, sizeof(u32) * (count + 1))) + ret = -EFAULT; + + kfree(buf); + return ret; +} + +/* + * cm_chan_create() - Create a message exchange channel + */ +static int cm_chan_create(struct file *filp, void __user *arg) +{ + u16 __user *p = arg; + u16 ch_num; + struct rio_channel *ch; + + if (get_user(ch_num, p)) + return -EFAULT; + + riocm_debug(CHOP, "ch_%d requested by %s(%d)", + ch_num, current->comm, task_pid_nr(current)); + ch = riocm_ch_create(&ch_num); + if (IS_ERR(ch)) + return PTR_ERR(ch); + + ch->filp = filp; + riocm_debug(CHOP, "ch_%d created by %s(%d)", + ch_num, current->comm, task_pid_nr(current)); + return put_user(ch_num, p); +} + +/* + * cm_chan_close() - Close channel + * @filp: Pointer to file object + * @arg: Channel to close + */ +static int cm_chan_close(struct file *filp, void __user *arg) +{ + u16 __user *p = arg; + u16 ch_num; + struct rio_channel *ch; + + if (get_user(ch_num, p)) + return -EFAULT; + + riocm_debug(CHOP, "ch_%d by %s(%d)", + ch_num, current->comm, task_pid_nr(current)); + + spin_lock_bh(&idr_lock); + ch = idr_find(&ch_idr, ch_num); + if (!ch) { + spin_unlock_bh(&idr_lock); + return 0; + } + if (ch->filp != filp) { + spin_unlock_bh(&idr_lock); + return -EINVAL; + } + idr_remove(&ch_idr, ch->id); + spin_unlock_bh(&idr_lock); + + return riocm_ch_close(ch); +} + +/* + * cm_chan_bind() - Bind channel + * @arg: Channel number + */ +static int cm_chan_bind(void __user *arg) +{ + struct rio_cm_channel chan; + + if (copy_from_user(&chan, arg, sizeof(chan))) + return -EFAULT; + if (chan.mport_id >= RIO_MAX_MPORTS) + return -EINVAL; + + return riocm_ch_bind(chan.id, chan.mport_id, NULL); +} + +/* + * cm_chan_listen() - Listen on channel + * @arg: Channel number + */ +static int cm_chan_listen(void __user *arg) +{ + u16 __user *p = arg; + u16 ch_num; + + if (get_user(ch_num, p)) + return -EFAULT; + + return riocm_ch_listen(ch_num); +} + +/* + * cm_chan_accept() - Accept incoming connection + * @filp: Pointer to file object + * @arg: Channel number + */ +static int cm_chan_accept(struct file *filp, void __user *arg) +{ + struct rio_cm_accept param; + long accept_to; + struct rio_channel *ch; + + if (copy_from_user(¶m, arg, sizeof(param))) + return -EFAULT; + + riocm_debug(CHOP, "on ch_%d by %s(%d)", + param.ch_num, current->comm, task_pid_nr(current)); + + accept_to = param.wait_to ? + msecs_to_jiffies(param.wait_to) : 0; + + ch = riocm_ch_accept(param.ch_num, ¶m.ch_num, accept_to); + if (IS_ERR(ch)) + return PTR_ERR(ch); + ch->filp = filp; + + riocm_debug(CHOP, "new ch_%d for %s(%d)", + ch->id, current->comm, task_pid_nr(current)); + + if (copy_to_user(arg, ¶m, sizeof(param))) + return -EFAULT; + return 0; +} + +/* + * cm_chan_connect() - Connect on channel + * @arg: Channel information + */ +static int cm_chan_connect(void __user *arg) +{ + struct rio_cm_channel chan; + struct cm_dev *cm; + struct cm_peer *peer; + int ret = -ENODEV; + + if (copy_from_user(&chan, arg, sizeof(chan))) + return -EFAULT; + if (chan.mport_id >= RIO_MAX_MPORTS) + return -EINVAL; + + down_read(&rdev_sem); + + /* Find matching cm_dev object */ + list_for_each_entry(cm, &cm_dev_list, list) { + if (cm->mport->id == chan.mport_id) { + ret = 0; + break; + } + } + + if (ret) + goto err_out; + + if (chan.remote_destid >= RIO_ANY_DESTID(cm->mport->sys_size)) { + ret = -EINVAL; + goto err_out; + } + + /* Find corresponding RapidIO endpoint device object */ + ret = -ENODEV; + + list_for_each_entry(peer, &cm->peers, node) { + if (peer->rdev->destid == chan.remote_destid) { + ret = 0; + break; + } + } + + if (ret) + goto err_out; + + up_read(&rdev_sem); + + return riocm_ch_connect(chan.id, cm, peer, chan.remote_channel); +err_out: + up_read(&rdev_sem); + return ret; +} + +/* + * cm_chan_msg_send() - Send a message through channel + * @arg: Outbound message information + */ +static int cm_chan_msg_send(void __user *arg) +{ + struct rio_cm_msg msg; + void *buf; + int ret = 0; + + if (copy_from_user(&msg, arg, sizeof(msg))) + return -EFAULT; + if (msg.size > RIO_MAX_MSG_SIZE) + return -EINVAL; + + buf = kmalloc(msg.size, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + if (copy_from_user(buf, (void __user *)(uintptr_t)msg.msg, msg.size)) { + ret = -EFAULT; + goto out; + } + + ret = riocm_ch_send(msg.ch_num, buf, msg.size); +out: + kfree(buf); + return ret; +} + +/* + * cm_chan_msg_rcv() - Receive a message through channel + * @arg: Inbound message information + */ +static int cm_chan_msg_rcv(void __user *arg) +{ + struct rio_cm_msg msg; + struct rio_channel *ch; + void *buf; + long rxto; + int ret = 0, msg_size; + + if (copy_from_user(&msg, arg, sizeof(msg))) + return -EFAULT; + + if (msg.ch_num == 0 || msg.size == 0) + return -EINVAL; + + ch = riocm_get_channel(msg.ch_num); + if (!ch) + return -ENODEV; + + rxto = msg.rxto ? msecs_to_jiffies(msg.rxto) : MAX_SCHEDULE_TIMEOUT; + + ret = riocm_ch_receive(ch, &buf, rxto); + if (ret) + goto out; + + msg_size = min(msg.size, (u16)(RIO_MAX_MSG_SIZE)); + + if (copy_to_user((void __user *)(uintptr_t)msg.msg, buf, msg_size)) + ret = -EFAULT; + + riocm_ch_free_rxbuf(ch, buf); +out: + riocm_put_channel(ch); + return ret; +} + +/* + * riocm_cdev_ioctl() - IOCTL requests handler + */ +static long +riocm_cdev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +{ + switch (cmd) { + case RIO_CM_EP_GET_LIST_SIZE: + return cm_ep_get_list_size((void __user *)arg); + case RIO_CM_EP_GET_LIST: + return cm_ep_get_list((void __user *)arg); + case RIO_CM_CHAN_CREATE: + return cm_chan_create(filp, (void __user *)arg); + case RIO_CM_CHAN_CLOSE: + return cm_chan_close(filp, (void __user *)arg); + case RIO_CM_CHAN_BIND: + return cm_chan_bind((void __user *)arg); + case RIO_CM_CHAN_LISTEN: + return cm_chan_listen((void __user *)arg); + case RIO_CM_CHAN_ACCEPT: + return cm_chan_accept(filp, (void __user *)arg); + case RIO_CM_CHAN_CONNECT: + return cm_chan_connect((void __user *)arg); + case RIO_CM_CHAN_SEND: + return cm_chan_msg_send((void __user *)arg); + case RIO_CM_CHAN_RECEIVE: + return cm_chan_msg_rcv((void __user *)arg); + case RIO_CM_MPORT_GET_LIST: + return cm_mport_get_list((void __user *)arg); + default: + break; + } + + return -EINVAL; +} + +static const struct file_operations riocm_cdev_fops = { + .owner = THIS_MODULE, + .open = riocm_cdev_open, + .release = riocm_cdev_release, + .unlocked_ioctl = riocm_cdev_ioctl, +}; + +/* + * riocm_add_dev - add new remote RapidIO device into channel management core + * @dev: device object associated with RapidIO device + * @sif: subsystem interface + * + * Adds the specified RapidIO device (if applicable) into peers list of + * the corresponding channel management device (cm_dev). + */ +static int riocm_add_dev(struct device *dev, struct subsys_interface *sif) +{ + struct cm_peer *peer; + struct rio_dev *rdev = to_rio_dev(dev); + struct cm_dev *cm; + + /* Check if the remote device has capabilities required to support CM */ + if (!dev_cm_capable(rdev)) + return 0; + + riocm_debug(RDEV, "(%s)", rio_name(rdev)); + + peer = kmalloc(sizeof(*peer), GFP_KERNEL); + if (!peer) + return -ENOMEM; + + /* Find a corresponding cm_dev object */ + down_write(&rdev_sem); + list_for_each_entry(cm, &cm_dev_list, list) { + if (cm->mport == rdev->net->hport) + goto found; + } + + up_write(&rdev_sem); + kfree(peer); + return -ENODEV; + +found: + peer->rdev = rdev; + list_add_tail(&peer->node, &cm->peers); + cm->npeers++; + + up_write(&rdev_sem); + return 0; +} + +/* + * riocm_remove_dev - remove remote RapidIO device from channel management core + * @dev: device object associated with RapidIO device + * @sif: subsystem interface + * + * Removes the specified RapidIO device (if applicable) from peers list of + * the corresponding channel management device (cm_dev). + */ +static void riocm_remove_dev(struct device *dev, struct subsys_interface *sif) +{ + struct rio_dev *rdev = to_rio_dev(dev); + struct cm_dev *cm; + struct cm_peer *peer; + struct rio_channel *ch, *_c; + unsigned int i; + bool found = false; + LIST_HEAD(list); + + /* Check if the remote device has capabilities required to support CM */ + if (!dev_cm_capable(rdev)) + return; + + riocm_debug(RDEV, "(%s)", rio_name(rdev)); + + /* Find matching cm_dev object */ + down_write(&rdev_sem); + list_for_each_entry(cm, &cm_dev_list, list) { + if (cm->mport == rdev->net->hport) { + found = true; + break; + } + } + + if (!found) { + up_write(&rdev_sem); + return; + } + + /* Remove remote device from the list of peers */ + found = false; + list_for_each_entry(peer, &cm->peers, node) { + if (peer->rdev == rdev) { + riocm_debug(RDEV, "removing peer %s", rio_name(rdev)); + found = true; + list_del(&peer->node); + cm->npeers--; + kfree(peer); + break; + } + } + + up_write(&rdev_sem); + + if (!found) + return; + + /* + * Release channels associated with this peer + */ + + spin_lock_bh(&idr_lock); + idr_for_each_entry(&ch_idr, ch, i) { + if (ch && ch->rdev == rdev) { + if (atomic_read(&rdev->state) != RIO_DEVICE_SHUTDOWN) + riocm_exch(ch, RIO_CM_DISCONNECT); + idr_remove(&ch_idr, ch->id); + list_add(&ch->ch_node, &list); + } + } + spin_unlock_bh(&idr_lock); + + if (!list_empty(&list)) { + list_for_each_entry_safe(ch, _c, &list, ch_node) { + list_del(&ch->ch_node); + riocm_ch_close(ch); + } + } +} + +/* + * riocm_cdev_add() - Create rio_cm char device + * @devno: device number assigned to device (MAJ + MIN) + */ +static int riocm_cdev_add(dev_t devno) +{ + int ret; + + cdev_init(&riocm_cdev.cdev, &riocm_cdev_fops); + riocm_cdev.cdev.owner = THIS_MODULE; + ret = cdev_add(&riocm_cdev.cdev, devno, 1); + if (ret < 0) { + riocm_error("Cannot register a device with error %d", ret); + return ret; + } + + riocm_cdev.dev = device_create(dev_class, NULL, devno, NULL, DEV_NAME); + if (IS_ERR(riocm_cdev.dev)) { + cdev_del(&riocm_cdev.cdev); + return PTR_ERR(riocm_cdev.dev); + } + + riocm_debug(MPORT, "Added %s cdev(%d:%d)", + DEV_NAME, MAJOR(devno), MINOR(devno)); + + return 0; +} + +/* + * riocm_add_mport - add new local mport device into channel management core + * @dev: device object associated with mport + * @class_intf: class interface + * + * When a new mport device is added, CM immediately reserves inbound and + * outbound RapidIO mailboxes that will be used. + */ +static int riocm_add_mport(struct device *dev, + struct class_interface *class_intf) +{ + int rc; + int i; + struct cm_dev *cm; + struct rio_mport *mport = to_rio_mport(dev); + + riocm_debug(MPORT, "add mport %s", mport->name); + + cm = kzalloc(sizeof(*cm), GFP_KERNEL); + if (!cm) + return -ENOMEM; + + cm->mport = mport; + + rc = rio_request_outb_mbox(mport, cm, cmbox, + RIOCM_TX_RING_SIZE, riocm_outb_msg_event); + if (rc) { + riocm_error("failed to allocate OBMBOX_%d on %s", + cmbox, mport->name); + kfree(cm); + return -ENODEV; + } + + rc = rio_request_inb_mbox(mport, cm, cmbox, + RIOCM_RX_RING_SIZE, riocm_inb_msg_event); + if (rc) { + riocm_error("failed to allocate IBMBOX_%d on %s", + cmbox, mport->name); + rio_release_outb_mbox(mport, cmbox); + kfree(cm); + return -ENODEV; + } + + /* + * Allocate and register inbound messaging buffers to be ready + * to receive channel and system management requests + */ + for (i = 0; i < RIOCM_RX_RING_SIZE; i++) + cm->rx_buf[i] = NULL; + + cm->rx_slots = RIOCM_RX_RING_SIZE; + mutex_init(&cm->rx_lock); + riocm_rx_fill(cm, RIOCM_RX_RING_SIZE); + cm->rx_wq = create_workqueue(DRV_NAME "/rxq"); + INIT_WORK(&cm->rx_work, rio_ibmsg_handler); + + cm->tx_slot = 0; + cm->tx_cnt = 0; + cm->tx_ack_slot = 0; + spin_lock_init(&cm->tx_lock); + + INIT_LIST_HEAD(&cm->peers); + cm->npeers = 0; + INIT_LIST_HEAD(&cm->tx_reqs); + + down_write(&rdev_sem); + list_add_tail(&cm->list, &cm_dev_list); + up_write(&rdev_sem); + + return 0; +} + +/* + * riocm_remove_mport - remove local mport device from channel management core + * @dev: device object associated with mport + * @class_intf: class interface + * + * Removes a local mport device from the list of registered devices that provide + * channel management services. Returns an error if the specified mport is not + * registered with the CM core. + */ +static void riocm_remove_mport(struct device *dev, + struct class_interface *class_intf) +{ + struct rio_mport *mport = to_rio_mport(dev); + struct cm_dev *cm; + struct cm_peer *peer, *temp; + struct rio_channel *ch, *_c; + unsigned int i; + bool found = false; + LIST_HEAD(list); + + riocm_debug(MPORT, "%s", mport->name); + + /* Find a matching cm_dev object */ + down_write(&rdev_sem); + list_for_each_entry(cm, &cm_dev_list, list) { + if (cm->mport == mport) { + list_del(&cm->list); + found = true; + break; + } + } + up_write(&rdev_sem); + if (!found) + return; + + flush_workqueue(cm->rx_wq); + destroy_workqueue(cm->rx_wq); + + /* Release channels bound to this mport */ + spin_lock_bh(&idr_lock); + idr_for_each_entry(&ch_idr, ch, i) { + if (ch->cmdev == cm) { + riocm_debug(RDEV, "%s drop ch_%d", + mport->name, ch->id); + idr_remove(&ch_idr, ch->id); + list_add(&ch->ch_node, &list); + } + } + spin_unlock_bh(&idr_lock); + + if (!list_empty(&list)) { + list_for_each_entry_safe(ch, _c, &list, ch_node) { + list_del(&ch->ch_node); + riocm_ch_close(ch); + } + } + + rio_release_inb_mbox(mport, cmbox); + rio_release_outb_mbox(mport, cmbox); + + /* Remove and free peer entries */ + if (!list_empty(&cm->peers)) + riocm_debug(RDEV, "ATTN: peer list not empty"); + list_for_each_entry_safe(peer, temp, &cm->peers, node) { + riocm_debug(RDEV, "removing peer %s", rio_name(peer->rdev)); + list_del(&peer->node); + kfree(peer); + } + + riocm_rx_free(cm); + kfree(cm); + riocm_debug(MPORT, "%s done", mport->name); +} + +static int rio_cm_shutdown(struct notifier_block *nb, unsigned long code, + void *unused) +{ + struct rio_channel *ch; + unsigned int i; + + riocm_debug(EXIT, "."); + + spin_lock_bh(&idr_lock); + idr_for_each_entry(&ch_idr, ch, i) { + riocm_debug(EXIT, "close ch %d", ch->id); + if (ch->state == RIO_CM_CONNECTED) + riocm_send_close(ch); + } + spin_unlock_bh(&idr_lock); + + return NOTIFY_DONE; +} + +/* + * riocm_interface handles addition/removal of remote RapidIO devices + */ +static struct subsys_interface riocm_interface = { + .name = "rio_cm", + .subsys = &rio_bus_type, + .add_dev = riocm_add_dev, + .remove_dev = riocm_remove_dev, +}; + +/* + * rio_mport_interface handles addition/removal local mport devices + */ +static struct class_interface rio_mport_interface __refdata = { + .class = &rio_mport_class, + .add_dev = riocm_add_mport, + .remove_dev = riocm_remove_mport, +}; + +static struct notifier_block rio_cm_notifier = { + .notifier_call = rio_cm_shutdown, +}; + +static int __init riocm_init(void) +{ + int ret; + + /* Create device class needed by udev */ + dev_class = class_create(THIS_MODULE, DRV_NAME); + if (IS_ERR(dev_class)) { + riocm_error("Cannot create " DRV_NAME " class"); + return PTR_ERR(dev_class); + } + + ret = alloc_chrdev_region(&dev_number, 0, 1, DRV_NAME); + if (ret) { + class_destroy(dev_class); + return ret; + } + + dev_major = MAJOR(dev_number); + dev_minor_base = MINOR(dev_number); + riocm_debug(INIT, "Registered class with %d major", dev_major); + + /* + * Register as rapidio_port class interface to get notifications about + * mport additions and removals. + */ + ret = class_interface_register(&rio_mport_interface); + if (ret) { + riocm_error("class_interface_register error: %d", ret); + goto err_reg; + } + + /* + * Register as RapidIO bus interface to get notifications about + * addition/removal of remote RapidIO devices. + */ + ret = subsys_interface_register(&riocm_interface); + if (ret) { + riocm_error("subsys_interface_register error: %d", ret); + goto err_cl; + } + + ret = register_reboot_notifier(&rio_cm_notifier); + if (ret) { + riocm_error("failed to register reboot notifier (err=%d)", ret); + goto err_sif; + } + + ret = riocm_cdev_add(dev_number); + if (ret) { + unregister_reboot_notifier(&rio_cm_notifier); + ret = -ENODEV; + goto err_sif; + } + + return 0; +err_sif: + subsys_interface_unregister(&riocm_interface); +err_cl: + class_interface_unregister(&rio_mport_interface); +err_reg: + unregister_chrdev_region(dev_number, 1); + class_destroy(dev_class); + return ret; +} + +static void __exit riocm_exit(void) +{ + riocm_debug(EXIT, "enter"); + unregister_reboot_notifier(&rio_cm_notifier); + subsys_interface_unregister(&riocm_interface); + class_interface_unregister(&rio_mport_interface); + idr_destroy(&ch_idr); + + device_unregister(riocm_cdev.dev); + cdev_del(&(riocm_cdev.cdev)); + + class_destroy(dev_class); + unregister_chrdev_region(dev_number, 1); +} + +late_initcall(riocm_init); +module_exit(riocm_exit); diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild index 6d4e92ccdc91..c44747c0796a 100644 --- a/include/uapi/linux/Kbuild +++ b/include/uapi/linux/Kbuild @@ -357,6 +357,7 @@ header-y += reiserfs_fs.h header-y += reiserfs_xattr.h header-y += resource.h header-y += rfkill.h +header-y += rio_cm_cdev.h header-y += rio_mport_cdev.h header-y += romfs_fs.h header-y += rose.h diff --git a/include/uapi/linux/rio_cm_cdev.h b/include/uapi/linux/rio_cm_cdev.h new file mode 100644 index 000000000000..6edb900d318d --- /dev/null +++ b/include/uapi/linux/rio_cm_cdev.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2015, Integrated Device Technology Inc. + * Copyright (c) 2015, Prodrive Technologies + * Copyright (c) 2015, RapidIO Trade Association + * All rights reserved. + * + * This software is available to you under a choice of one of two licenses. + * You may choose to be licensed under the terms of the GNU General Public + * License(GPL) Version 2, or the BSD-3 Clause license below: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its contributors + * may be used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RIO_CM_CDEV_H_ +#define _RIO_CM_CDEV_H_ + +#include + +struct rio_cm_channel { + __u16 id; + __u16 remote_channel; + __u16 remote_destid; + __u8 mport_id; +}; + +struct rio_cm_msg { + __u16 ch_num; + __u16 size; + __u32 rxto; /* receive timeout in mSec. 0 = blocking */ + __u64 msg; +}; + +struct rio_cm_accept { + __u16 ch_num; + __u16 pad0; + __u32 wait_to; /* accept timeout in mSec. 0 = blocking */ +}; + +/* RapidIO Channelized Messaging Driver IOCTLs */ +#define RIO_CM_IOC_MAGIC 'c' + +#define RIO_CM_EP_GET_LIST_SIZE _IOWR(RIO_CM_IOC_MAGIC, 1, __u32) +#define RIO_CM_EP_GET_LIST _IOWR(RIO_CM_IOC_MAGIC, 2, __u32) +#define RIO_CM_CHAN_CREATE _IOWR(RIO_CM_IOC_MAGIC, 3, __u16) +#define RIO_CM_CHAN_CLOSE _IOW(RIO_CM_IOC_MAGIC, 4, __u16) +#define RIO_CM_CHAN_BIND _IOW(RIO_CM_IOC_MAGIC, 5, struct rio_cm_channel) +#define RIO_CM_CHAN_LISTEN _IOW(RIO_CM_IOC_MAGIC, 6, __u16) +#define RIO_CM_CHAN_ACCEPT _IOWR(RIO_CM_IOC_MAGIC, 7, struct rio_cm_accept) +#define RIO_CM_CHAN_CONNECT _IOW(RIO_CM_IOC_MAGIC, 8, struct rio_cm_channel) +#define RIO_CM_CHAN_SEND _IOW(RIO_CM_IOC_MAGIC, 9, struct rio_cm_msg) +#define RIO_CM_CHAN_RECEIVE _IOWR(RIO_CM_IOC_MAGIC, 10, struct rio_cm_msg) +#define RIO_CM_MPORT_GET_LIST _IOWR(RIO_CM_IOC_MAGIC, 11, __u32) + +#endif /* _RIO_CM_CDEV_H_ */ -- cgit v1.2.3-55-g7522 From a057a52e94e15d89be8af557584e0144a496b6c6 Mon Sep 17 00:00:00 2001 From: Alexandre Bounine Date: Tue, 2 Aug 2016 14:06:54 -0700 Subject: rapidio: change inbound window size type to u64 Current definition of map_inb() mport operations callback uses u32 type to specify required inbound window (IBW) size. This is limiting factor because existing hardware - tsi721 and fsl_rio, both support IBW size up to 16GB. Changing type of size parameter to u64 to allow IBW size configurations larger than 4GB. [alexandre.bounine@idt.com: remove compiler warning about size of constant] Link: http://lkml.kernel.org/r/20160802184856.2566-1-alexandre.bounine@idt.com Link: http://lkml.kernel.org/r/1469125134-16523-11-git-send-email-alexandre.bounine@idt.com Signed-off-by: Alexandre Bounine Cc: Matt Porter Cc: Benjamin Herrenschmidt Cc: Michael Ellerman Cc: Andre van Herk Cc: Barry Wood Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/sysdev/fsl_rio.c | 4 ++-- drivers/rapidio/devices/tsi721.c | 14 +++++++++----- include/linux/rio.h | 2 +- 3 files changed, 12 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/arch/powerpc/sysdev/fsl_rio.c b/arch/powerpc/sysdev/fsl_rio.c index f5bf38b94595..386790cfa16e 100644 --- a/arch/powerpc/sysdev/fsl_rio.c +++ b/arch/powerpc/sysdev/fsl_rio.c @@ -289,7 +289,7 @@ static void fsl_rio_inbound_mem_init(struct rio_priv *priv) } int fsl_map_inb_mem(struct rio_mport *mport, dma_addr_t lstart, - u64 rstart, u32 size, u32 flags) + u64 rstart, u64 size, u32 flags) { struct rio_priv *priv = mport->priv; u32 base_size; @@ -298,7 +298,7 @@ int fsl_map_inb_mem(struct rio_mport *mport, dma_addr_t lstart, u32 riwar; int i; - if ((size & (size - 1)) != 0) + if ((size & (size - 1)) != 0 || size > 0x400000000ULL) return -EINVAL; base_size_log = ilog2(size); diff --git a/drivers/rapidio/devices/tsi721.c b/drivers/rapidio/devices/tsi721.c index 8e07cd56abdc..53daf634a1ac 100644 --- a/drivers/rapidio/devices/tsi721.c +++ b/drivers/rapidio/devices/tsi721.c @@ -1090,7 +1090,7 @@ static void tsi721_init_pc2sr_mapping(struct tsi721_device *priv) * from rstart to lstart. */ static int tsi721_rio_map_inb_mem(struct rio_mport *mport, dma_addr_t lstart, - u64 rstart, u32 size, u32 flags) + u64 rstart, u64 size, u32 flags) { struct tsi721_device *priv = mport->priv; int i, avail = -1; @@ -1103,6 +1103,10 @@ static int tsi721_rio_map_inb_mem(struct rio_mport *mport, dma_addr_t lstart, struct tsi721_ib_win_mapping *map = NULL; int ret = -EBUSY; + /* Max IBW size supported by HW is 16GB */ + if (size > 0x400000000UL) + return -EINVAL; + if (direct) { /* Calculate minimal acceptable window size and base address */ @@ -1110,15 +1114,15 @@ static int tsi721_rio_map_inb_mem(struct rio_mport *mport, dma_addr_t lstart, ibw_start = lstart & ~(ibw_size - 1); tsi_debug(IBW, &priv->pdev->dev, - "Direct (RIO_0x%llx -> PCIe_%pad), size=0x%x, ibw_start = 0x%llx", + "Direct (RIO_0x%llx -> PCIe_%pad), size=0x%llx, ibw_start = 0x%llx", rstart, &lstart, size, ibw_start); while ((lstart + size) > (ibw_start + ibw_size)) { ibw_size *= 2; ibw_start = lstart & ~(ibw_size - 1); - if (ibw_size > 0x80000000) { /* Limit max size to 2GB */ + /* Check for crossing IBW max size 16GB */ + if (ibw_size > 0x400000000UL) return -EBUSY; - } } loc_start = ibw_start; @@ -1129,7 +1133,7 @@ static int tsi721_rio_map_inb_mem(struct rio_mport *mport, dma_addr_t lstart, } else { tsi_debug(IBW, &priv->pdev->dev, - "Translated (RIO_0x%llx -> PCIe_%pad), size=0x%x", + "Translated (RIO_0x%llx -> PCIe_%pad), size=0x%llx", rstart, &lstart, size); if (!is_power_of_2(size) || size < 0x1000 || diff --git a/include/linux/rio.h b/include/linux/rio.h index aa2323893e8d..f7ec35b48800 100644 --- a/include/linux/rio.h +++ b/include/linux/rio.h @@ -425,7 +425,7 @@ struct rio_ops { int (*add_inb_buffer)(struct rio_mport *mport, int mbox, void *buf); void *(*get_inb_message)(struct rio_mport *mport, int mbox); int (*map_inb)(struct rio_mport *mport, dma_addr_t lstart, - u64 rstart, u32 size, u32 flags); + u64 rstart, u64 size, u32 flags); void (*unmap_inb)(struct rio_mport *mport, dma_addr_t lstart); int (*query_mport)(struct rio_mport *mport, struct rio_mport_attr *attr); -- cgit v1.2.3-55-g7522 From 1ae842de1dd8051cbb65b396b6f029d07f992641 Mon Sep 17 00:00:00 2001 From: Alexandre Bounine Date: Tue, 2 Aug 2016 14:06:57 -0700 Subject: rapidio: modify for rev.3 specification changes Implement changes made in RapidIO specification rev.3 to LP-Serial Physical Layer register definitions: - use per-port register offset calculations based on LP-Serial Extended Features Block (EFB) Register Map type (I or II) with different per-port offset step (0x20 vs 0x40 respectfully). - remove deprecated Parallel Physical layer definitions and related code. [alexandre.bounine@idt.com: fix DocBook warning for gen3 update] Link: http://lkml.kernel.org/r/1469191173-19338-1-git-send-email-alexandre.bounine@idt.com Link: http://lkml.kernel.org/r/1469125134-16523-12-git-send-email-alexandre.bounine@idt.com Signed-off-by: Alexandre Bounine Tested-by: Barry Wood Cc: Matt Porter Cc: Andre van Herk Cc: Barry Wood Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rapidio/devices/rio_mport_cdev.c | 2 +- drivers/rapidio/devices/tsi721.c | 8 +- drivers/rapidio/rio-scan.c | 74 ++++---------- drivers/rapidio/rio.c | 150 +++++++++++++-------------- drivers/rapidio/rio.h | 2 +- drivers/rapidio/switches/tsi57x.c | 26 +++-- include/linux/rio.h | 11 +- include/linux/rio_regs.h | 167 ++++++++++++++++++++++++------- 8 files changed, 249 insertions(+), 191 deletions(-) (limited to 'include') diff --git a/drivers/rapidio/devices/rio_mport_cdev.c b/drivers/rapidio/devices/rio_mport_cdev.c index de0c692587dd..436dfe871d32 100644 --- a/drivers/rapidio/devices/rio_mport_cdev.c +++ b/drivers/rapidio/devices/rio_mport_cdev.c @@ -1813,7 +1813,7 @@ static int rio_mport_add_riodev(struct mport_cdev_priv *priv, if (rdev->pef & RIO_PEF_EXT_FEATURES) { rdev->efptr = rval & 0xffff; rdev->phys_efptr = rio_mport_get_physefb(mport, 0, destid, - hopcount); + hopcount, &rdev->phys_rmap); rdev->em_efptr = rio_mport_get_feature(mport, 0, destid, hopcount, RIO_EFB_ERR_MGMNT); diff --git a/drivers/rapidio/devices/tsi721.c b/drivers/rapidio/devices/tsi721.c index 53daf634a1ac..32f0f014a067 100644 --- a/drivers/rapidio/devices/tsi721.c +++ b/drivers/rapidio/devices/tsi721.c @@ -2555,11 +2555,11 @@ static int tsi721_query_mport(struct rio_mport *mport, struct tsi721_device *priv = mport->priv; u32 rval; - rval = ioread32(priv->regs + (0x100 + RIO_PORT_N_ERR_STS_CSR(0))); + rval = ioread32(priv->regs + 0x100 + RIO_PORT_N_ERR_STS_CSR(0, 0)); if (rval & RIO_PORT_N_ERR_STS_PORT_OK) { - rval = ioread32(priv->regs + (0x100 + RIO_PORT_N_CTL2_CSR(0))); + rval = ioread32(priv->regs + 0x100 + RIO_PORT_N_CTL2_CSR(0, 0)); attr->link_speed = (rval & RIO_PORT_N_CTL2_SEL_BAUD) >> 28; - rval = ioread32(priv->regs + (0x100 + RIO_PORT_N_CTL_CSR(0))); + rval = ioread32(priv->regs + 0x100 + RIO_PORT_N_CTL_CSR(0, 0)); attr->link_width = (rval & RIO_PORT_N_CTL_IPW) >> 27; } else attr->link_speed = RIO_LINK_DOWN; @@ -2673,9 +2673,9 @@ static int tsi721_setup_mport(struct tsi721_device *priv) mport->ops = &tsi721_rio_ops; mport->index = 0; mport->sys_size = 0; /* small system */ - mport->phy_type = RIO_PHY_SERIAL; mport->priv = (void *)priv; mport->phys_efptr = 0x100; + mport->phys_rmap = 1; mport->dev.parent = &pdev->dev; mport->dev.release = tsi721_mport_release; diff --git a/drivers/rapidio/rio-scan.c b/drivers/rapidio/rio-scan.c index a63a380809d1..23429bdaca84 100644 --- a/drivers/rapidio/rio-scan.c +++ b/drivers/rapidio/rio-scan.c @@ -49,15 +49,6 @@ struct rio_id_table { static int next_destid = 0; static int next_comptag = 1; -static int rio_mport_phys_table[] = { - RIO_EFB_PAR_EP_ID, - RIO_EFB_PAR_EP_REC_ID, - RIO_EFB_SER_EP_ID, - RIO_EFB_SER_EP_REC_ID, - -1, -}; - - /** * rio_destid_alloc - Allocate next available destID for given network * @net: RIO network @@ -380,10 +371,15 @@ static struct rio_dev *rio_setup_device(struct rio_net *net, if (rdev->pef & RIO_PEF_EXT_FEATURES) { rdev->efptr = result & 0xffff; rdev->phys_efptr = rio_mport_get_physefb(port, 0, destid, - hopcount); + hopcount, &rdev->phys_rmap); + pr_debug("RIO: %s Register Map %d device\n", + __func__, rdev->phys_rmap); rdev->em_efptr = rio_mport_get_feature(port, 0, destid, hopcount, RIO_EFB_ERR_MGMNT); + if (!rdev->em_efptr) + rdev->em_efptr = rio_mport_get_feature(port, 0, destid, + hopcount, RIO_EFB_ERR_MGMNT_HS); } rio_mport_read_config_32(port, destid, hopcount, RIO_SRC_OPS_CAR, @@ -445,7 +441,7 @@ static struct rio_dev *rio_setup_device(struct rio_net *net, rio_route_clr_table(rdev, RIO_GLOBAL_TABLE, 0); } else { if (do_enum) - /*Enable Input Output Port (transmitter reviever)*/ + /*Enable Input Output Port (transmitter receiver)*/ rio_enable_rx_tx_port(port, 0, destid, hopcount, 0); dev_set_name(&rdev->dev, "%02x:e:%04x", rdev->net->id, @@ -481,10 +477,8 @@ cleanup: /** * rio_sport_is_active- Tests if a switch port has an active connection. - * @port: Master port to send transaction - * @destid: Associated destination ID for switch - * @hopcount: Hopcount to reach switch - * @sport: Switch port number + * @rdev: RapidIO device object + * @sp: Switch port number * * Reads the port error status CSR for a particular switch port to * determine if the port has an active link. Returns @@ -492,31 +486,12 @@ cleanup: * inactive. */ static int -rio_sport_is_active(struct rio_mport *port, u16 destid, u8 hopcount, int sport) +rio_sport_is_active(struct rio_dev *rdev, int sp) { u32 result = 0; - u32 ext_ftr_ptr; - ext_ftr_ptr = rio_mport_get_efb(port, 0, destid, hopcount, 0); - - while (ext_ftr_ptr) { - rio_mport_read_config_32(port, destid, hopcount, - ext_ftr_ptr, &result); - result = RIO_GET_BLOCK_ID(result); - if ((result == RIO_EFB_SER_EP_FREE_ID) || - (result == RIO_EFB_SER_EP_FREE_ID_V13P) || - (result == RIO_EFB_SER_EP_FREC_ID)) - break; - - ext_ftr_ptr = rio_mport_get_efb(port, 0, destid, hopcount, - ext_ftr_ptr); - } - - if (ext_ftr_ptr) - rio_mport_read_config_32(port, destid, hopcount, - ext_ftr_ptr + - RIO_PORT_N_ERR_STS_CSR(sport), - &result); + rio_read_config_32(rdev, RIO_DEV_PORT_N_ERR_STS_CSR(rdev, sp), + &result); return result & RIO_PORT_N_ERR_STS_PORT_OK; } @@ -655,9 +630,7 @@ static int rio_enum_peer(struct rio_net *net, struct rio_mport *port, cur_destid = next_destid; - if (rio_sport_is_active - (port, RIO_ANY_DESTID(port->sys_size), hopcount, - port_num)) { + if (rio_sport_is_active(rdev, port_num)) { pr_debug( "RIO: scanning device on port %d\n", port_num); @@ -785,8 +758,7 @@ rio_disc_peer(struct rio_net *net, struct rio_mport *port, u16 destid, if (RIO_GET_PORT_NUM(rdev->swpinfo) == port_num) continue; - if (rio_sport_is_active - (port, destid, hopcount, port_num)) { + if (rio_sport_is_active(rdev, port_num)) { pr_debug( "RIO: scanning device on port %d\n", port_num); @@ -831,21 +803,11 @@ rio_disc_peer(struct rio_net *net, struct rio_mport *port, u16 destid, static int rio_mport_is_active(struct rio_mport *port) { u32 result = 0; - u32 ext_ftr_ptr; - int *entry = rio_mport_phys_table; - - do { - if ((ext_ftr_ptr = - rio_mport_get_feature(port, 1, 0, 0, *entry))) - break; - } while (*++entry >= 0); - - if (ext_ftr_ptr) - rio_local_read_config_32(port, - ext_ftr_ptr + - RIO_PORT_N_ERR_STS_CSR(port->index), - &result); + rio_local_read_config_32(port, + port->phys_efptr + + RIO_PORT_N_ERR_STS_CSR(port->index, port->phys_rmap), + &result); return result & RIO_PORT_N_ERR_STS_PORT_OK; } diff --git a/drivers/rapidio/rio.c b/drivers/rapidio/rio.c index 1cd32603259f..37042858c2db 100644 --- a/drivers/rapidio/rio.c +++ b/drivers/rapidio/rio.c @@ -786,10 +786,11 @@ EXPORT_SYMBOL_GPL(rio_unmap_outb_region); * @local: Indicate a local master port or remote device access * @destid: Destination ID of the device * @hopcount: Number of switch hops to the device + * @rmap: pointer to location to store register map type info */ u32 rio_mport_get_physefb(struct rio_mport *port, int local, - u16 destid, u8 hopcount) + u16 destid, u8 hopcount, u32 *rmap) { u32 ext_ftr_ptr; u32 ftr_header; @@ -807,14 +808,21 @@ rio_mport_get_physefb(struct rio_mport *port, int local, ftr_header = RIO_GET_BLOCK_ID(ftr_header); switch (ftr_header) { - case RIO_EFB_SER_EP_ID_V13P: - case RIO_EFB_SER_EP_REC_ID_V13P: - case RIO_EFB_SER_EP_FREE_ID_V13P: case RIO_EFB_SER_EP_ID: case RIO_EFB_SER_EP_REC_ID: case RIO_EFB_SER_EP_FREE_ID: - case RIO_EFB_SER_EP_FREC_ID: + case RIO_EFB_SER_EP_M1_ID: + case RIO_EFB_SER_EP_SW_M1_ID: + case RIO_EFB_SER_EPF_M1_ID: + case RIO_EFB_SER_EPF_SW_M1_ID: + *rmap = 1; + return ext_ftr_ptr; + case RIO_EFB_SER_EP_M2_ID: + case RIO_EFB_SER_EP_SW_M2_ID: + case RIO_EFB_SER_EPF_M2_ID: + case RIO_EFB_SER_EPF_SW_M2_ID: + *rmap = 2; return ext_ftr_ptr; default: @@ -873,16 +881,16 @@ int rio_set_port_lockout(struct rio_dev *rdev, u32 pnum, int lock) u32 regval; rio_read_config_32(rdev, - rdev->phys_efptr + RIO_PORT_N_CTL_CSR(pnum), - ®val); + RIO_DEV_PORT_N_CTL_CSR(rdev, pnum), + ®val); if (lock) regval |= RIO_PORT_N_CTL_LOCKOUT; else regval &= ~RIO_PORT_N_CTL_LOCKOUT; rio_write_config_32(rdev, - rdev->phys_efptr + RIO_PORT_N_CTL_CSR(pnum), - regval); + RIO_DEV_PORT_N_CTL_CSR(rdev, pnum), + regval); return 0; } EXPORT_SYMBOL_GPL(rio_set_port_lockout); @@ -906,6 +914,7 @@ int rio_enable_rx_tx_port(struct rio_mport *port, #ifdef CONFIG_RAPIDIO_ENABLE_RX_TX_PORTS u32 regval; u32 ext_ftr_ptr; + u32 rmap; /* * enable rx input tx output port @@ -913,34 +922,29 @@ int rio_enable_rx_tx_port(struct rio_mport *port, pr_debug("rio_enable_rx_tx_port(local = %d, destid = %d, hopcount = " "%d, port_num = %d)\n", local, destid, hopcount, port_num); - ext_ftr_ptr = rio_mport_get_physefb(port, local, destid, hopcount); + ext_ftr_ptr = rio_mport_get_physefb(port, local, destid, + hopcount, &rmap); if (local) { - rio_local_read_config_32(port, ext_ftr_ptr + - RIO_PORT_N_CTL_CSR(0), + rio_local_read_config_32(port, + ext_ftr_ptr + RIO_PORT_N_CTL_CSR(0, rmap), ®val); } else { if (rio_mport_read_config_32(port, destid, hopcount, - ext_ftr_ptr + RIO_PORT_N_CTL_CSR(port_num), ®val) < 0) + ext_ftr_ptr + RIO_PORT_N_CTL_CSR(port_num, rmap), + ®val) < 0) return -EIO; } - if (regval & RIO_PORT_N_CTL_P_TYP_SER) { - /* serial */ - regval = regval | RIO_PORT_N_CTL_EN_RX_SER - | RIO_PORT_N_CTL_EN_TX_SER; - } else { - /* parallel */ - regval = regval | RIO_PORT_N_CTL_EN_RX_PAR - | RIO_PORT_N_CTL_EN_TX_PAR; - } + regval = regval | RIO_PORT_N_CTL_EN_RX | RIO_PORT_N_CTL_EN_TX; if (local) { - rio_local_write_config_32(port, ext_ftr_ptr + - RIO_PORT_N_CTL_CSR(0), regval); + rio_local_write_config_32(port, + ext_ftr_ptr + RIO_PORT_N_CTL_CSR(0, rmap), regval); } else { if (rio_mport_write_config_32(port, destid, hopcount, - ext_ftr_ptr + RIO_PORT_N_CTL_CSR(port_num), regval) < 0) + ext_ftr_ptr + RIO_PORT_N_CTL_CSR(port_num, rmap), + regval) < 0) return -EIO; } #endif @@ -1042,14 +1046,14 @@ rio_get_input_status(struct rio_dev *rdev, int pnum, u32 *lnkresp) /* Read from link maintenance response register * to clear valid bit */ rio_read_config_32(rdev, - rdev->phys_efptr + RIO_PORT_N_MNT_RSP_CSR(pnum), + RIO_DEV_PORT_N_MNT_RSP_CSR(rdev, pnum), ®val); udelay(50); } /* Issue Input-status command */ rio_write_config_32(rdev, - rdev->phys_efptr + RIO_PORT_N_MNT_REQ_CSR(pnum), + RIO_DEV_PORT_N_MNT_REQ_CSR(rdev, pnum), RIO_MNT_REQ_CMD_IS); /* Exit if the response is not expected */ @@ -1060,7 +1064,7 @@ rio_get_input_status(struct rio_dev *rdev, int pnum, u32 *lnkresp) while (checkcount--) { udelay(50); rio_read_config_32(rdev, - rdev->phys_efptr + RIO_PORT_N_MNT_RSP_CSR(pnum), + RIO_DEV_PORT_N_MNT_RSP_CSR(rdev, pnum), ®val); if (regval & RIO_PORT_N_MNT_RSP_RVAL) { *lnkresp = regval; @@ -1076,6 +1080,13 @@ rio_get_input_status(struct rio_dev *rdev, int pnum, u32 *lnkresp) * @rdev: Pointer to RIO device control structure * @pnum: Switch port number to clear errors * @err_status: port error status (if 0 reads register from device) + * + * TODO: Currently this routine is not compatible with recovery process + * specified for idt_gen3 RapidIO switch devices. It has to be reviewed + * to implement universal recovery process that is compatible full range + * off available devices. + * IDT gen3 switch driver now implements HW-specific error handler that + * issues soft port reset to the port to reset ERR_STOP bits and ackIDs. */ static int rio_clr_err_stopped(struct rio_dev *rdev, u32 pnum, u32 err_status) { @@ -1085,10 +1096,10 @@ static int rio_clr_err_stopped(struct rio_dev *rdev, u32 pnum, u32 err_status) if (err_status == 0) rio_read_config_32(rdev, - rdev->phys_efptr + RIO_PORT_N_ERR_STS_CSR(pnum), + RIO_DEV_PORT_N_ERR_STS_CSR(rdev, pnum), &err_status); - if (err_status & RIO_PORT_N_ERR_STS_PW_OUT_ES) { + if (err_status & RIO_PORT_N_ERR_STS_OUT_ES) { pr_debug("RIO_EM: servicing Output Error-Stopped state\n"); /* * Send a Link-Request/Input-Status control symbol @@ -1103,7 +1114,7 @@ static int rio_clr_err_stopped(struct rio_dev *rdev, u32 pnum, u32 err_status) far_ackid = (regval & RIO_PORT_N_MNT_RSP_ASTAT) >> 5; far_linkstat = regval & RIO_PORT_N_MNT_RSP_LSTAT; rio_read_config_32(rdev, - rdev->phys_efptr + RIO_PORT_N_ACK_STS_CSR(pnum), + RIO_DEV_PORT_N_ACK_STS_CSR(rdev, pnum), ®val); pr_debug("RIO_EM: SP%d_ACK_STS_CSR=0x%08x\n", pnum, regval); near_ackid = (regval & RIO_PORT_N_ACK_INBOUND) >> 24; @@ -1121,43 +1132,43 @@ static int rio_clr_err_stopped(struct rio_dev *rdev, u32 pnum, u32 err_status) * far inbound. */ rio_write_config_32(rdev, - rdev->phys_efptr + RIO_PORT_N_ACK_STS_CSR(pnum), + RIO_DEV_PORT_N_ACK_STS_CSR(rdev, pnum), (near_ackid << 24) | (far_ackid << 8) | far_ackid); /* Align far outstanding/outbound ackIDs with * near inbound. */ far_ackid++; - if (nextdev) - rio_write_config_32(nextdev, - nextdev->phys_efptr + - RIO_PORT_N_ACK_STS_CSR(RIO_GET_PORT_NUM(nextdev->swpinfo)), - (far_ackid << 24) | - (near_ackid << 8) | near_ackid); - else - pr_debug("RIO_EM: Invalid nextdev pointer (NULL)\n"); + if (!nextdev) { + pr_debug("RIO_EM: nextdev pointer == NULL\n"); + goto rd_err; + } + + rio_write_config_32(nextdev, + RIO_DEV_PORT_N_ACK_STS_CSR(nextdev, + RIO_GET_PORT_NUM(nextdev->swpinfo)), + (far_ackid << 24) | + (near_ackid << 8) | near_ackid); } rd_err: - rio_read_config_32(rdev, - rdev->phys_efptr + RIO_PORT_N_ERR_STS_CSR(pnum), - &err_status); + rio_read_config_32(rdev, RIO_DEV_PORT_N_ERR_STS_CSR(rdev, pnum), + &err_status); pr_debug("RIO_EM: SP%d_ERR_STS_CSR=0x%08x\n", pnum, err_status); } - if ((err_status & RIO_PORT_N_ERR_STS_PW_INP_ES) && nextdev) { + if ((err_status & RIO_PORT_N_ERR_STS_INP_ES) && nextdev) { pr_debug("RIO_EM: servicing Input Error-Stopped state\n"); rio_get_input_status(nextdev, RIO_GET_PORT_NUM(nextdev->swpinfo), NULL); udelay(50); - rio_read_config_32(rdev, - rdev->phys_efptr + RIO_PORT_N_ERR_STS_CSR(pnum), - &err_status); + rio_read_config_32(rdev, RIO_DEV_PORT_N_ERR_STS_CSR(rdev, pnum), + &err_status); pr_debug("RIO_EM: SP%d_ERR_STS_CSR=0x%08x\n", pnum, err_status); } - return (err_status & (RIO_PORT_N_ERR_STS_PW_OUT_ES | - RIO_PORT_N_ERR_STS_PW_INP_ES)) ? 1 : 0; + return (err_status & (RIO_PORT_N_ERR_STS_OUT_ES | + RIO_PORT_N_ERR_STS_INP_ES)) ? 1 : 0; } /** @@ -1257,9 +1268,8 @@ int rio_inb_pwrite_handler(struct rio_mport *mport, union rio_pw_msg *pw_msg) if (rdev->rswitch->ops && rdev->rswitch->ops->em_handle) rdev->rswitch->ops->em_handle(rdev, portnum); - rio_read_config_32(rdev, - rdev->phys_efptr + RIO_PORT_N_ERR_STS_CSR(portnum), - &err_status); + rio_read_config_32(rdev, RIO_DEV_PORT_N_ERR_STS_CSR(rdev, portnum), + &err_status); pr_debug("RIO_PW: SP%d_ERR_STS_CSR=0x%08x\n", portnum, err_status); if (err_status & RIO_PORT_N_ERR_STS_PORT_OK) { @@ -1276,8 +1286,8 @@ int rio_inb_pwrite_handler(struct rio_mport *mport, union rio_pw_msg *pw_msg) * Depending on the link partner state, two attempts * may be needed for successful recovery. */ - if (err_status & (RIO_PORT_N_ERR_STS_PW_OUT_ES | - RIO_PORT_N_ERR_STS_PW_INP_ES)) { + if (err_status & (RIO_PORT_N_ERR_STS_OUT_ES | + RIO_PORT_N_ERR_STS_INP_ES)) { if (rio_clr_err_stopped(rdev, portnum, err_status)) rio_clr_err_stopped(rdev, portnum, 0); } @@ -1287,10 +1297,18 @@ int rio_inb_pwrite_handler(struct rio_mport *mport, union rio_pw_msg *pw_msg) rdev->rswitch->port_ok &= ~(1 << portnum); rio_set_port_lockout(rdev, portnum, 1); + if (rdev->phys_rmap == 1) { rio_write_config_32(rdev, - rdev->phys_efptr + - RIO_PORT_N_ACK_STS_CSR(portnum), + RIO_DEV_PORT_N_ACK_STS_CSR(rdev, portnum), RIO_PORT_N_ACK_CLEAR); + } else { + rio_write_config_32(rdev, + RIO_DEV_PORT_N_OB_ACK_CSR(rdev, portnum), + RIO_PORT_N_OB_ACK_CLEAR); + rio_write_config_32(rdev, + RIO_DEV_PORT_N_IB_ACK_CSR(rdev, portnum), + 0); + } /* Schedule Extraction Service */ pr_debug("RIO_PW: Device Extraction on [%s]-P%d\n", @@ -1319,9 +1337,8 @@ int rio_inb_pwrite_handler(struct rio_mport *mport, union rio_pw_msg *pw_msg) } /* Clear remaining error bits and Port-Write Pending bit */ - rio_write_config_32(rdev, - rdev->phys_efptr + RIO_PORT_N_ERR_STS_CSR(portnum), - err_status); + rio_write_config_32(rdev, RIO_DEV_PORT_N_ERR_STS_CSR(rdev, portnum), + err_status); return 0; } @@ -1372,20 +1389,7 @@ EXPORT_SYMBOL_GPL(rio_mport_get_efb); * Tell if a device supports a given RapidIO capability. * Returns the offset of the requested extended feature * block within the device's RIO configuration space or - * 0 in case the device does not support it. Possible - * values for @ftr: - * - * %RIO_EFB_PAR_EP_ID LP/LVDS EP Devices - * - * %RIO_EFB_PAR_EP_REC_ID LP/LVDS EP Recovery Devices - * - * %RIO_EFB_PAR_EP_FREE_ID LP/LVDS EP Free Devices - * - * %RIO_EFB_SER_EP_ID LP/Serial EP Devices - * - * %RIO_EFB_SER_EP_REC_ID LP/Serial EP Recovery Devices - * - * %RIO_EFB_SER_EP_FREE_ID LP/Serial EP Free Devices + * 0 in case the device does not support it. */ u32 rio_mport_get_feature(struct rio_mport * port, int local, u16 destid, diff --git a/drivers/rapidio/rio.h b/drivers/rapidio/rio.h index 625d09add001..9796b3fee70d 100644 --- a/drivers/rapidio/rio.h +++ b/drivers/rapidio/rio.h @@ -22,7 +22,7 @@ extern u32 rio_mport_get_feature(struct rio_mport *mport, int local, u16 destid, u8 hopcount, int ftr); extern u32 rio_mport_get_physefb(struct rio_mport *port, int local, - u16 destid, u8 hopcount); + u16 destid, u8 hopcount, u32 *rmap); extern u32 rio_mport_get_efb(struct rio_mport *port, int local, u16 destid, u8 hopcount, u32 from); extern int rio_mport_chk_dev_access(struct rio_mport *mport, u16 destid, diff --git a/drivers/rapidio/switches/tsi57x.c b/drivers/rapidio/switches/tsi57x.c index 42c8b014fe15..2700d15f7584 100644 --- a/drivers/rapidio/switches/tsi57x.c +++ b/drivers/rapidio/switches/tsi57x.c @@ -175,12 +175,10 @@ tsi57x_em_init(struct rio_dev *rdev) /* Clear all pending interrupts */ rio_read_config_32(rdev, - rdev->phys_efptr + - RIO_PORT_N_ERR_STS_CSR(portnum), + RIO_DEV_PORT_N_ERR_STS_CSR(rdev, portnum), ®val); rio_write_config_32(rdev, - rdev->phys_efptr + - RIO_PORT_N_ERR_STS_CSR(portnum), + RIO_DEV_PORT_N_ERR_STS_CSR(rdev, portnum), regval & 0x07120214); rio_read_config_32(rdev, @@ -198,7 +196,7 @@ tsi57x_em_init(struct rio_dev *rdev) /* Skip next (odd) port if the current port is in x4 mode */ rio_read_config_32(rdev, - rdev->phys_efptr + RIO_PORT_N_CTL_CSR(portnum), + RIO_DEV_PORT_N_CTL_CSR(rdev, portnum), ®val); if ((regval & RIO_PORT_N_CTL_PWIDTH) == RIO_PORT_N_CTL_PWIDTH_4) portnum++; @@ -221,23 +219,23 @@ tsi57x_em_handler(struct rio_dev *rdev, u8 portnum) u32 regval; rio_read_config_32(rdev, - rdev->phys_efptr + RIO_PORT_N_ERR_STS_CSR(portnum), + RIO_DEV_PORT_N_ERR_STS_CSR(rdev, portnum), &err_status); if ((err_status & RIO_PORT_N_ERR_STS_PORT_OK) && - (err_status & (RIO_PORT_N_ERR_STS_PW_OUT_ES | - RIO_PORT_N_ERR_STS_PW_INP_ES))) { + (err_status & (RIO_PORT_N_ERR_STS_OUT_ES | + RIO_PORT_N_ERR_STS_INP_ES))) { /* Remove any queued packets by locking/unlocking port */ rio_read_config_32(rdev, - rdev->phys_efptr + RIO_PORT_N_CTL_CSR(portnum), + RIO_DEV_PORT_N_CTL_CSR(rdev, portnum), ®val); if (!(regval & RIO_PORT_N_CTL_LOCKOUT)) { rio_write_config_32(rdev, - rdev->phys_efptr + RIO_PORT_N_CTL_CSR(portnum), + RIO_DEV_PORT_N_CTL_CSR(rdev, portnum), regval | RIO_PORT_N_CTL_LOCKOUT); udelay(50); rio_write_config_32(rdev, - rdev->phys_efptr + RIO_PORT_N_CTL_CSR(portnum), + RIO_DEV_PORT_N_CTL_CSR(rdev, portnum), regval); } @@ -245,7 +243,7 @@ tsi57x_em_handler(struct rio_dev *rdev, u8 portnum) * valid bit */ rio_read_config_32(rdev, - rdev->phys_efptr + RIO_PORT_N_MNT_RSP_CSR(portnum), + RIO_DEV_PORT_N_MNT_RSP_CSR(rdev, portnum), ®val); /* Send a Packet-Not-Accepted/Link-Request-Input-Status control @@ -259,8 +257,8 @@ tsi57x_em_handler(struct rio_dev *rdev, u8 portnum) while (checkcount--) { udelay(50); rio_read_config_32(rdev, - rdev->phys_efptr + - RIO_PORT_N_MNT_RSP_CSR(portnum), + RIO_DEV_PORT_N_MNT_RSP_CSR(rdev, + portnum), ®val); if (regval & RIO_PORT_N_MNT_RSP_RVAL) goto exit_es; diff --git a/include/linux/rio.h b/include/linux/rio.h index f7ec35b48800..37b95c4af99d 100644 --- a/include/linux/rio.h +++ b/include/linux/rio.h @@ -163,6 +163,7 @@ enum rio_device_state { * @dst_ops: Destination operation capabilities * @comp_tag: RIO component tag * @phys_efptr: RIO device extended features pointer + * @phys_rmap: LP-Serial Register Map Type (1 or 2) * @em_efptr: RIO Error Management features pointer * @dma_mask: Mask of bits of RIO address this device implements * @driver: Driver claiming this device @@ -193,6 +194,7 @@ struct rio_dev { u32 dst_ops; u32 comp_tag; u32 phys_efptr; + u32 phys_rmap; u32 em_efptr; u64 dma_mask; struct rio_driver *driver; /* RIO driver claiming this device */ @@ -237,11 +239,6 @@ struct rio_dbell { void *dev_id; }; -enum rio_phy_type { - RIO_PHY_PARALLEL, - RIO_PHY_SERIAL, -}; - /** * struct rio_mport - RIO master port info * @dbells: List of doorbell events @@ -259,8 +256,8 @@ enum rio_phy_type { * @id: Port ID, unique among all ports * @index: Port index, unique among all port interfaces of the same type * @sys_size: RapidIO common transport system size - * @phy_type: RapidIO phy type * @phys_efptr: RIO port extended features pointer + * @phys_rmap: LP-Serial EFB Register Mapping type (1 or 2). * @name: Port name string * @dev: device structure associated with an mport * @priv: Master port private data @@ -289,8 +286,8 @@ struct rio_mport { * 0 - Small size. 256 devices. * 1 - Large size, 65536 devices. */ - enum rio_phy_type phy_type; /* RapidIO phy type */ u32 phys_efptr; + u32 phys_rmap; unsigned char name[RIO_MAX_MPORT_NAME]; struct device dev; void *priv; /* Master port private data */ diff --git a/include/linux/rio_regs.h b/include/linux/rio_regs.h index 1063ae382bc2..40c04efe7409 100644 --- a/include/linux/rio_regs.h +++ b/include/linux/rio_regs.h @@ -42,9 +42,11 @@ #define RIO_PEF_INB_MBOX2 0x00200000 /* [II, <= 1.2] Mailbox 2 */ #define RIO_PEF_INB_MBOX3 0x00100000 /* [II, <= 1.2] Mailbox 3 */ #define RIO_PEF_INB_DOORBELL 0x00080000 /* [II, <= 1.2] Doorbells */ +#define RIO_PEF_DEV32 0x00001000 /* [III] PE supports Common TRansport Dev32 */ #define RIO_PEF_EXT_RT 0x00000200 /* [III, 1.3] Extended route table support */ #define RIO_PEF_STD_RT 0x00000100 /* [III, 1.3] Standard route table support */ -#define RIO_PEF_CTLS 0x00000010 /* [III] CTLS */ +#define RIO_PEF_CTLS 0x00000010 /* [III] Common Transport Large System (< rev.3) */ +#define RIO_PEF_DEV16 0x00000010 /* [III] PE Supports Common Transport Dev16 (rev.3) */ #define RIO_PEF_EXT_FEATURES 0x00000008 /* [I] EFT_PTR valid */ #define RIO_PEF_ADDR_66 0x00000004 /* [I] 66 bits */ #define RIO_PEF_ADDR_50 0x00000002 /* [I] 50 bits */ @@ -194,70 +196,101 @@ #define RIO_GET_BLOCK_ID(x) (x & RIO_EFB_ID_MASK) /* Extended Feature Block IDs */ -#define RIO_EFB_PAR_EP_ID 0x0001 /* [IV] LP/LVDS EP Devices */ -#define RIO_EFB_PAR_EP_REC_ID 0x0002 /* [IV] LP/LVDS EP Recovery Devices */ -#define RIO_EFB_PAR_EP_FREE_ID 0x0003 /* [IV] LP/LVDS EP Free Devices */ -#define RIO_EFB_SER_EP_ID_V13P 0x0001 /* [VI] LP/Serial EP Devices, RapidIO Spec ver 1.3 and above */ -#define RIO_EFB_SER_EP_REC_ID_V13P 0x0002 /* [VI] LP/Serial EP Recovery Devices, RapidIO Spec ver 1.3 and above */ -#define RIO_EFB_SER_EP_FREE_ID_V13P 0x0003 /* [VI] LP/Serial EP Free Devices, RapidIO Spec ver 1.3 and above */ -#define RIO_EFB_SER_EP_ID 0x0004 /* [VI] LP/Serial EP Devices */ -#define RIO_EFB_SER_EP_REC_ID 0x0005 /* [VI] LP/Serial EP Recovery Devices */ -#define RIO_EFB_SER_EP_FREE_ID 0x0006 /* [VI] LP/Serial EP Free Devices */ -#define RIO_EFB_SER_EP_FREC_ID 0x0009 /* [VI] LP/Serial EP Free Recovery Devices */ +#define RIO_EFB_SER_EP_M1_ID 0x0001 /* [VI] LP-Serial EP Devices, Map I */ +#define RIO_EFB_SER_EP_SW_M1_ID 0x0002 /* [VI] LP-Serial EP w SW Recovery Devices, Map I */ +#define RIO_EFB_SER_EPF_M1_ID 0x0003 /* [VI] LP-Serial EP Free Devices, Map I */ +#define RIO_EFB_SER_EP_ID 0x0004 /* [VI] LP-Serial EP Devices, RIO 1.2 */ +#define RIO_EFB_SER_EP_REC_ID 0x0005 /* [VI] LP-Serial EP w SW Recovery Devices, RIO 1.2 */ +#define RIO_EFB_SER_EP_FREE_ID 0x0006 /* [VI] LP-Serial EP Free Devices, RIO 1.2 */ #define RIO_EFB_ERR_MGMNT 0x0007 /* [VIII] Error Management Extensions */ +#define RIO_EFB_SER_EPF_SW_M1_ID 0x0009 /* [VI] LP-Serial EP Free w SW Recovery Devices, Map I */ +#define RIO_EFB_SW_ROUTING_TBL 0x000E /* [III] Switch Routing Table Block */ +#define RIO_EFB_SER_EP_M2_ID 0x0011 /* [VI] LP-Serial EP Devices, Map II */ +#define RIO_EFB_SER_EP_SW_M2_ID 0x0012 /* [VI] LP-Serial EP w SW Recovery Devices, Map II */ +#define RIO_EFB_SER_EPF_M2_ID 0x0013 /* [VI] LP-Serial EP Free Devices, Map II */ +#define RIO_EFB_ERR_MGMNT_HS 0x0017 /* [VIII] Error Management Extensions, Hot-Swap only */ +#define RIO_EFB_SER_EPF_SW_M2_ID 0x0019 /* [VI] LP-Serial EP Free w SW Recovery Devices, Map II */ /* - * Physical 8/16 LP-LVDS - * ID=0x0001, Generic End Point Devices - * ID=0x0002, Generic End Point Devices, software assisted recovery option - * ID=0x0003, Generic End Point Free Devices - * - * Physical LP-Serial - * ID=0x0004, Generic End Point Devices - * ID=0x0005, Generic End Point Devices, software assisted recovery option - * ID=0x0006, Generic End Point Free Devices + * Physical LP-Serial Registers Definitions + * Parameters in register macros: + * n - port number, m - Register Map Type (1 or 2) */ #define RIO_PORT_MNT_HEADER 0x0000 #define RIO_PORT_REQ_CTL_CSR 0x0020 -#define RIO_PORT_RSP_CTL_CSR 0x0024 /* 0x0001/0x0002 */ -#define RIO_PORT_LINKTO_CTL_CSR 0x0020 /* Serial */ -#define RIO_PORT_RSPTO_CTL_CSR 0x0024 /* Serial */ +#define RIO_PORT_RSP_CTL_CSR 0x0024 +#define RIO_PORT_LINKTO_CTL_CSR 0x0020 +#define RIO_PORT_RSPTO_CTL_CSR 0x0024 #define RIO_PORT_GEN_CTL_CSR 0x003c #define RIO_PORT_GEN_HOST 0x80000000 #define RIO_PORT_GEN_MASTER 0x40000000 #define RIO_PORT_GEN_DISCOVERED 0x20000000 -#define RIO_PORT_N_MNT_REQ_CSR(x) (0x0040 + x*0x20) /* 0x0002 */ +#define RIO_PORT_N_MNT_REQ_CSR(n, m) (0x40 + (n) * (0x20 * (m))) #define RIO_MNT_REQ_CMD_RD 0x03 /* Reset-device command */ #define RIO_MNT_REQ_CMD_IS 0x04 /* Input-status command */ -#define RIO_PORT_N_MNT_RSP_CSR(x) (0x0044 + x*0x20) /* 0x0002 */ +#define RIO_PORT_N_MNT_RSP_CSR(n, m) (0x44 + (n) * (0x20 * (m))) #define RIO_PORT_N_MNT_RSP_RVAL 0x80000000 /* Response Valid */ #define RIO_PORT_N_MNT_RSP_ASTAT 0x000007e0 /* ackID Status */ #define RIO_PORT_N_MNT_RSP_LSTAT 0x0000001f /* Link Status */ -#define RIO_PORT_N_ACK_STS_CSR(x) (0x0048 + x*0x20) /* 0x0002 */ +#define RIO_PORT_N_ACK_STS_CSR(n) (0x48 + (n) * 0x20) /* Only in RM-I */ #define RIO_PORT_N_ACK_CLEAR 0x80000000 #define RIO_PORT_N_ACK_INBOUND 0x3f000000 #define RIO_PORT_N_ACK_OUTSTAND 0x00003f00 #define RIO_PORT_N_ACK_OUTBOUND 0x0000003f -#define RIO_PORT_N_CTL2_CSR(x) (0x0054 + x*0x20) +#define RIO_PORT_N_CTL2_CSR(n, m) (0x54 + (n) * (0x20 * (m))) #define RIO_PORT_N_CTL2_SEL_BAUD 0xf0000000 -#define RIO_PORT_N_ERR_STS_CSR(x) (0x0058 + x*0x20) -#define RIO_PORT_N_ERR_STS_PW_OUT_ES 0x00010000 /* Output Error-stopped */ -#define RIO_PORT_N_ERR_STS_PW_INP_ES 0x00000100 /* Input Error-stopped */ +#define RIO_PORT_N_ERR_STS_CSR(n, m) (0x58 + (n) * (0x20 * (m))) +#define RIO_PORT_N_ERR_STS_OUT_ES 0x00010000 /* Output Error-stopped */ +#define RIO_PORT_N_ERR_STS_INP_ES 0x00000100 /* Input Error-stopped */ #define RIO_PORT_N_ERR_STS_PW_PEND 0x00000010 /* Port-Write Pending */ +#define RIO_PORT_N_ERR_STS_PORT_UA 0x00000008 /* Port Unavailable */ #define RIO_PORT_N_ERR_STS_PORT_ERR 0x00000004 #define RIO_PORT_N_ERR_STS_PORT_OK 0x00000002 #define RIO_PORT_N_ERR_STS_PORT_UNINIT 0x00000001 -#define RIO_PORT_N_CTL_CSR(x) (0x005c + x*0x20) +#define RIO_PORT_N_CTL_CSR(n, m) (0x5c + (n) * (0x20 * (m))) #define RIO_PORT_N_CTL_PWIDTH 0xc0000000 #define RIO_PORT_N_CTL_PWIDTH_1 0x00000000 #define RIO_PORT_N_CTL_PWIDTH_4 0x40000000 #define RIO_PORT_N_CTL_IPW 0x38000000 /* Initialized Port Width */ #define RIO_PORT_N_CTL_P_TYP_SER 0x00000001 #define RIO_PORT_N_CTL_LOCKOUT 0x00000002 -#define RIO_PORT_N_CTL_EN_RX_SER 0x00200000 -#define RIO_PORT_N_CTL_EN_TX_SER 0x00400000 -#define RIO_PORT_N_CTL_EN_RX_PAR 0x08000000 -#define RIO_PORT_N_CTL_EN_TX_PAR 0x40000000 +#define RIO_PORT_N_CTL_EN_RX 0x00200000 +#define RIO_PORT_N_CTL_EN_TX 0x00400000 +#define RIO_PORT_N_OB_ACK_CSR(n) (0x60 + (n) * 0x40) /* Only in RM-II */ +#define RIO_PORT_N_OB_ACK_CLEAR 0x80000000 +#define RIO_PORT_N_OB_ACK_OUTSTD 0x00fff000 +#define RIO_PORT_N_OB_ACK_OUTBND 0x00000fff +#define RIO_PORT_N_IB_ACK_CSR(n) (0x64 + (n) * 0x40) /* Only in RM-II */ +#define RIO_PORT_N_IB_ACK_INBND 0x00000fff + +/* + * Device-based helper macros for serial port register access. + * d - pointer to rapidio device object, n - port number + */ + +#define RIO_DEV_PORT_N_MNT_REQ_CSR(d, n) \ + (d->phys_efptr + RIO_PORT_N_MNT_REQ_CSR(n, d->phys_rmap)) + +#define RIO_DEV_PORT_N_MNT_RSP_CSR(d, n) \ + (d->phys_efptr + RIO_PORT_N_MNT_RSP_CSR(n, d->phys_rmap)) + +#define RIO_DEV_PORT_N_ACK_STS_CSR(d, n) \ + (d->phys_efptr + RIO_PORT_N_ACK_STS_CSR(n)) + +#define RIO_DEV_PORT_N_CTL2_CSR(d, n) \ + (d->phys_efptr + RIO_PORT_N_CTL2_CSR(n, d->phys_rmap)) + +#define RIO_DEV_PORT_N_ERR_STS_CSR(d, n) \ + (d->phys_efptr + RIO_PORT_N_ERR_STS_CSR(n, d->phys_rmap)) + +#define RIO_DEV_PORT_N_CTL_CSR(d, n) \ + (d->phys_efptr + RIO_PORT_N_CTL_CSR(n, d->phys_rmap)) + +#define RIO_DEV_PORT_N_OB_ACK_CSR(d, n) \ + (d->phys_efptr + RIO_PORT_N_OB_ACK_CSR(n)) + +#define RIO_DEV_PORT_N_IB_ACK_CSR(d, n) \ + (d->phys_efptr + RIO_PORT_N_IB_ACK_CSR(n)) /* * Error Management Extensions (RapidIO 1.3+, Part 8) @@ -268,6 +301,7 @@ /* General EM Registers (Common for all Ports) */ #define RIO_EM_EFB_HEADER 0x000 /* Error Management Extensions Block Header */ +#define RIO_EM_EMHS_CAR 0x004 /* EM Functionality CAR */ #define RIO_EM_LTL_ERR_DETECT 0x008 /* Logical/Transport Layer Error Detect CSR */ #define RIO_EM_LTL_ERR_EN 0x00c /* Logical/Transport Layer Error Enable CSR */ #define REM_LTL_ERR_ILLTRAN 0x08000000 /* Illegal Transaction decode */ @@ -278,15 +312,33 @@ #define RIO_EM_LTL_ADDR_CAP 0x014 /* Logical/Transport Layer Address Capture CSR */ #define RIO_EM_LTL_DEVID_CAP 0x018 /* Logical/Transport Layer Device ID Capture CSR */ #define RIO_EM_LTL_CTRL_CAP 0x01c /* Logical/Transport Layer Control Capture CSR */ +#define RIO_EM_LTL_DID32_CAP 0x020 /* Logical/Transport Layer Dev32 DestID Capture CSR */ +#define RIO_EM_LTL_SID32_CAP 0x024 /* Logical/Transport Layer Dev32 source ID Capture CSR */ #define RIO_EM_PW_TGT_DEVID 0x028 /* Port-write Target deviceID CSR */ +#define RIO_EM_PW_TGT_DEVID_D16M 0xff000000 /* Port-write Target DID16 MSB */ +#define RIO_EM_PW_TGT_DEVID_D8 0x00ff0000 /* Port-write Target DID16 LSB or DID8 */ +#define RIO_EM_PW_TGT_DEVID_DEV16 0x00008000 /* Port-write Target DID16 LSB or DID8 */ +#define RIO_EM_PW_TGT_DEVID_DEV32 0x00004000 /* Port-write Target DID16 LSB or DID8 */ #define RIO_EM_PKT_TTL 0x02c /* Packet Time-to-live CSR */ +#define RIO_EM_PKT_TTL_VAL 0xffff0000 /* Packet Time-to-live value */ +#define RIO_EM_PW_TGT32_DEVID 0x030 /* Port-write Dev32 Target deviceID CSR */ +#define RIO_EM_PW_TX_CTRL 0x034 /* Port-write Transmission Control CSR */ +#define RIO_EM_PW_TX_CTRL_PW_DIS 0x00000001 /* Port-write Transmission Disable bit */ /* Per-Port EM Registers */ #define RIO_EM_PN_ERR_DETECT(x) (0x040 + x*0x40) /* Port N Error Detect CSR */ #define REM_PED_IMPL_SPEC 0x80000000 +#define REM_PED_LINK_OK2U 0x40000000 /* Link OK to Uninit transition */ +#define REM_PED_LINK_UPDA 0x20000000 /* Link Uninit Packet Discard Active */ +#define REM_PED_LINK_U2OK 0x10000000 /* Link Uninit to OK transition */ #define REM_PED_LINK_TO 0x00000001 + #define RIO_EM_PN_ERRRATE_EN(x) (0x044 + x*0x40) /* Port N Error Rate Enable CSR */ +#define RIO_EM_PN_ERRRATE_EN_OK2U 0x40000000 /* Enable notification for OK2U */ +#define RIO_EM_PN_ERRRATE_EN_UPDA 0x20000000 /* Enable notification for UPDA */ +#define RIO_EM_PN_ERRRATE_EN_U2OK 0x10000000 /* Enable notification for U2OK */ + #define RIO_EM_PN_ATTRIB_CAP(x) (0x048 + x*0x40) /* Port N Attributes Capture CSR */ #define RIO_EM_PN_PKT_CAP_0(x) (0x04c + x*0x40) /* Port N Packet/Control Symbol Capture 0 CSR */ #define RIO_EM_PN_PKT_CAP_1(x) (0x050 + x*0x40) /* Port N Packet Capture 1 CSR */ @@ -294,5 +346,50 @@ #define RIO_EM_PN_PKT_CAP_3(x) (0x058 + x*0x40) /* Port N Packet Capture 3 CSR */ #define RIO_EM_PN_ERRRATE(x) (0x068 + x*0x40) /* Port N Error Rate CSR */ #define RIO_EM_PN_ERRRATE_TR(x) (0x06c + x*0x40) /* Port N Error Rate Threshold CSR */ +#define RIO_EM_PN_LINK_UDT(x) (0x070 + x*0x40) /* Port N Link Uninit Discard Timer CSR */ +#define RIO_EM_PN_LINK_UDT_TO 0xffffff00 /* Link Uninit Timeout value */ + +/* + * Switch Routing Table Register Block ID=0x000E (RapidIO 3.0+, part 3) + * Register offsets are defined from beginning of the block. + */ + +/* Broadcast Routing Table Control CSR */ +#define RIO_BC_RT_CTL_CSR 0x020 +#define RIO_RT_CTL_THREE_LVL 0x80000000 +#define RIO_RT_CTL_DEV32_RT_CTRL 0x40000000 +#define RIO_RT_CTL_MC_MASK_SZ 0x03000000 /* 3.0+ Part 11: Multicast */ + +/* Broadcast Level 0 Info CSR */ +#define RIO_BC_RT_LVL0_INFO_CSR 0x030 +#define RIO_RT_L0I_NUM_GR 0xff000000 +#define RIO_RT_L0I_GR_PTR 0x00fffc00 + +/* Broadcast Level 1 Info CSR */ +#define RIO_BC_RT_LVL1_INFO_CSR 0x034 +#define RIO_RT_L1I_NUM_GR 0xff000000 +#define RIO_RT_L1I_GR_PTR 0x00fffc00 + +/* Broadcast Level 2 Info CSR */ +#define RIO_BC_RT_LVL2_INFO_CSR 0x038 +#define RIO_RT_L2I_NUM_GR 0xff000000 +#define RIO_RT_L2I_GR_PTR 0x00fffc00 + +/* Per-Port Routing Table registers. + * Register fields defined in the broadcast section above are + * applicable to the corresponding registers below. + */ +#define RIO_SPx_RT_CTL_CSR(x) (0x040 + (0x20 * x)) +#define RIO_SPx_RT_LVL0_INFO_CSR(x) (0x50 + (0x20 * x)) +#define RIO_SPx_RT_LVL1_INFO_CSR(x) (0x54 + (0x20 * x)) +#define RIO_SPx_RT_LVL2_INFO_CSR(x) (0x58 + (0x20 * x)) + +/* Register Formats for Routing Table Group entry. + * Register offsets are calculated using GR_PTR field in the corresponding + * table Level_N and group/entry numbers (see RapidIO 3.0+ Part 3). + */ +#define RIO_RT_Ln_ENTRY_IMPL_DEF 0xf0000000 +#define RIO_RT_Ln_ENTRY_RTE_VAL 0x000003ff +#define RIO_RT_ENTRY_DROP_PKT 0x300 #endif /* LINUX_RIO_REGS_H */ -- cgit v1.2.3-55-g7522 From 0b9364b5cf11c6e504f4b77e24b15a0dc8a82df0 Mon Sep 17 00:00:00 2001 From: Alexandre Bounine Date: Tue, 2 Aug 2016 14:07:03 -0700 Subject: rapidio/switches: add driver for IDT gen3 switches Add RapidIO switch driver for IDT Gen3 switch devices: RXS1632 and RXS2448. [alexandre.bounine@idt.com: fixup for original driver patch] Link: http://lkml.kernel.org/r/1469137596-18241-1-git-send-email-alexandre.bounine@idt.com Link: http://lkml.kernel.org/r/1469125134-16523-14-git-send-email-alexandre.bounine@idt.com Signed-off-by: Alexandre Bounine Tested-by: Barry Wood Cc: Matt Porter Cc: Andre van Herk Cc: Barry Wood Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rapidio/switches/Kconfig | 6 + drivers/rapidio/switches/Makefile | 1 + drivers/rapidio/switches/idt_gen3.c | 382 ++++++++++++++++++++++++++++++++++++ include/linux/rio_ids.h | 2 + 4 files changed, 391 insertions(+) create mode 100644 drivers/rapidio/switches/idt_gen3.c (limited to 'include') diff --git a/drivers/rapidio/switches/Kconfig b/drivers/rapidio/switches/Kconfig index 345841562f95..92767fd3b541 100644 --- a/drivers/rapidio/switches/Kconfig +++ b/drivers/rapidio/switches/Kconfig @@ -22,3 +22,9 @@ config RAPIDIO_CPS_GEN2 default n ---help--- Includes support for ITD CPS Gen.2 serial RapidIO switches. + +config RAPIDIO_RXS_GEN3 + tristate "IDT RXS Gen.3 SRIO switch support" + default n + ---help--- + Includes support for ITD RXS Gen.3 serial RapidIO switches. diff --git a/drivers/rapidio/switches/Makefile b/drivers/rapidio/switches/Makefile index 051cc6b38188..6bdd54c4e733 100644 --- a/drivers/rapidio/switches/Makefile +++ b/drivers/rapidio/switches/Makefile @@ -6,3 +6,4 @@ obj-$(CONFIG_RAPIDIO_TSI57X) += tsi57x.o obj-$(CONFIG_RAPIDIO_CPS_XX) += idtcps.o obj-$(CONFIG_RAPIDIO_TSI568) += tsi568.o obj-$(CONFIG_RAPIDIO_CPS_GEN2) += idt_gen2.o +obj-$(CONFIG_RAPIDIO_RXS_GEN3) += idt_gen3.o diff --git a/drivers/rapidio/switches/idt_gen3.c b/drivers/rapidio/switches/idt_gen3.c new file mode 100644 index 000000000000..c5923a547bed --- /dev/null +++ b/drivers/rapidio/switches/idt_gen3.c @@ -0,0 +1,382 @@ +/* + * IDT RXS Gen.3 Serial RapidIO switch family support + * + * Copyright 2016 Integrated Device Technology, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include "../rio.h" + +#define RIO_EM_PW_STAT 0x40020 +#define RIO_PW_CTL 0x40204 +#define RIO_PW_CTL_PW_TMR 0xffffff00 +#define RIO_PW_ROUTE 0x40208 + +#define RIO_EM_DEV_INT_EN 0x40030 + +#define RIO_PLM_SPx_IMP_SPEC_CTL(x) (0x10100 + (x)*0x100) +#define RIO_PLM_SPx_IMP_SPEC_CTL_SOFT_RST 0x02000000 + +#define RIO_PLM_SPx_PW_EN(x) (0x10118 + (x)*0x100) +#define RIO_PLM_SPx_PW_EN_OK2U 0x40000000 +#define RIO_PLM_SPx_PW_EN_LINIT 0x10000000 + +#define RIO_BC_L2_Gn_ENTRYx_CSR(n, x) (0x31000 + (n)*0x400 + (x)*0x4) +#define RIO_SPx_L2_Gn_ENTRYy_CSR(x, n, y) \ + (0x51000 + (x)*0x2000 + (n)*0x400 + (y)*0x4) + +static int +idtg3_route_add_entry(struct rio_mport *mport, u16 destid, u8 hopcount, + u16 table, u16 route_destid, u8 route_port) +{ + u32 rval; + u32 entry = route_port; + int err = 0; + + pr_debug("RIO: %s t=0x%x did_%x to p_%x\n", + __func__, table, route_destid, entry); + + if (route_destid > 0xFF) + return -EINVAL; + + if (route_port == RIO_INVALID_ROUTE) + entry = RIO_RT_ENTRY_DROP_PKT; + + if (table == RIO_GLOBAL_TABLE) { + /* Use broadcast register to update all per-port tables */ + err = rio_mport_write_config_32(mport, destid, hopcount, + RIO_BC_L2_Gn_ENTRYx_CSR(0, route_destid), + entry); + return err; + } + + /* + * Verify that specified port/table number is valid + */ + err = rio_mport_read_config_32(mport, destid, hopcount, + RIO_SWP_INFO_CAR, &rval); + if (err) + return err; + + if (table >= RIO_GET_TOTAL_PORTS(rval)) + return -EINVAL; + + err = rio_mport_write_config_32(mport, destid, hopcount, + RIO_SPx_L2_Gn_ENTRYy_CSR(table, 0, route_destid), + entry); + return err; +} + +static int +idtg3_route_get_entry(struct rio_mport *mport, u16 destid, u8 hopcount, + u16 table, u16 route_destid, u8 *route_port) +{ + u32 rval; + int err; + + if (route_destid > 0xFF) + return -EINVAL; + + err = rio_mport_read_config_32(mport, destid, hopcount, + RIO_SWP_INFO_CAR, &rval); + if (err) + return err; + + /* + * This switch device does not have the dedicated global routing table. + * It is substituted by reading routing table of the ingress port of + * maintenance read requests. + */ + if (table == RIO_GLOBAL_TABLE) + table = RIO_GET_PORT_NUM(rval); + else if (table >= RIO_GET_TOTAL_PORTS(rval)) + return -EINVAL; + + err = rio_mport_read_config_32(mport, destid, hopcount, + RIO_SPx_L2_Gn_ENTRYy_CSR(table, 0, route_destid), + &rval); + if (err) + return err; + + if (rval == RIO_RT_ENTRY_DROP_PKT) + *route_port = RIO_INVALID_ROUTE; + else + *route_port = (u8)rval; + + return 0; +} + +static int +idtg3_route_clr_table(struct rio_mport *mport, u16 destid, u8 hopcount, + u16 table) +{ + u32 i; + u32 rval; + int err; + + if (table == RIO_GLOBAL_TABLE) { + for (i = 0; i <= 0xff; i++) { + err = rio_mport_write_config_32(mport, destid, hopcount, + RIO_BC_L2_Gn_ENTRYx_CSR(0, i), + RIO_RT_ENTRY_DROP_PKT); + if (err) + break; + } + + return err; + } + + err = rio_mport_read_config_32(mport, destid, hopcount, + RIO_SWP_INFO_CAR, &rval); + if (err) + return err; + + if (table >= RIO_GET_TOTAL_PORTS(rval)) + return -EINVAL; + + for (i = 0; i <= 0xff; i++) { + err = rio_mport_write_config_32(mport, destid, hopcount, + RIO_SPx_L2_Gn_ENTRYy_CSR(table, 0, i), + RIO_RT_ENTRY_DROP_PKT); + if (err) + break; + } + + return err; +} + +/* + * This routine performs device-specific initialization only. + * All standard EM configuration should be performed at upper level. + */ +static int +idtg3_em_init(struct rio_dev *rdev) +{ + int i, tmp; + u32 rval; + + pr_debug("RIO: %s [%d:%d]\n", __func__, rdev->destid, rdev->hopcount); + + /* Disable assertion of interrupt signal */ + rio_write_config_32(rdev, RIO_EM_DEV_INT_EN, 0); + + /* Disable port-write event notifications during initialization */ + rio_write_config_32(rdev, rdev->em_efptr + RIO_EM_PW_TX_CTRL, + RIO_EM_PW_TX_CTRL_PW_DIS); + + /* Configure Port-Write notifications for hot-swap events */ + tmp = RIO_GET_TOTAL_PORTS(rdev->swpinfo); + for (i = 0; i < tmp; i++) { + + rio_read_config_32(rdev, + RIO_DEV_PORT_N_ERR_STS_CSR(rdev, i), + &rval); + if (rval & RIO_PORT_N_ERR_STS_PORT_UA) + continue; + + /* Clear events signaled before enabling notification */ + rio_write_config_32(rdev, + rdev->em_efptr + RIO_EM_PN_ERR_DETECT(i), 0); + + /* Enable event notifications */ + rio_write_config_32(rdev, + rdev->em_efptr + RIO_EM_PN_ERRRATE_EN(i), + RIO_EM_PN_ERRRATE_EN_OK2U | RIO_EM_PN_ERRRATE_EN_U2OK); + /* Enable port-write generation on events */ + rio_write_config_32(rdev, RIO_PLM_SPx_PW_EN(i), + RIO_PLM_SPx_PW_EN_OK2U | RIO_PLM_SPx_PW_EN_LINIT); + + } + + /* Set Port-Write destination port */ + tmp = RIO_GET_PORT_NUM(rdev->swpinfo); + rio_write_config_32(rdev, RIO_PW_ROUTE, 1 << tmp); + + + /* Enable sending port-write event notifications */ + rio_write_config_32(rdev, rdev->em_efptr + RIO_EM_PW_TX_CTRL, 0); + + /* set TVAL = ~50us */ + rio_write_config_32(rdev, + rdev->phys_efptr + RIO_PORT_LINKTO_CTL_CSR, 0x8e << 8); + return 0; +} + + +/* + * idtg3_em_handler - device-specific error handler + * + * If the link is down (PORT_UNINIT) does nothing - this is considered + * as link partner removal from the port. + * + * If the link is up (PORT_OK) - situation is handled as *new* device insertion. + * In this case ERR_STOP bits are cleared by issuing soft reset command to the + * reporting port. Inbound and outbound ackIDs are cleared by the reset as well. + * This way the port is synchronized with freshly inserted device (assuming it + * was reset/powered-up on insertion). + * + * TODO: This is not sufficient in a situation when a link between two devices + * was down and up again (e.g. cable disconnect). For that situation full ackID + * realignment process has to be implemented. + */ +static int +idtg3_em_handler(struct rio_dev *rdev, u8 pnum) +{ + u32 err_status; + u32 rval; + + rio_read_config_32(rdev, + RIO_DEV_PORT_N_ERR_STS_CSR(rdev, pnum), + &err_status); + + /* Do nothing for device/link removal */ + if (err_status & RIO_PORT_N_ERR_STS_PORT_UNINIT) + return 0; + + /* When link is OK we have a device insertion. + * Request port soft reset to clear errors if they present. + * Inbound and outbound ackIDs will be 0 after reset. + */ + if (err_status & (RIO_PORT_N_ERR_STS_OUT_ES | + RIO_PORT_N_ERR_STS_INP_ES)) { + rio_read_config_32(rdev, RIO_PLM_SPx_IMP_SPEC_CTL(pnum), &rval); + rio_write_config_32(rdev, RIO_PLM_SPx_IMP_SPEC_CTL(pnum), + rval | RIO_PLM_SPx_IMP_SPEC_CTL_SOFT_RST); + udelay(10); + rio_write_config_32(rdev, RIO_PLM_SPx_IMP_SPEC_CTL(pnum), rval); + msleep(500); + } + + return 0; +} + +static struct rio_switch_ops idtg3_switch_ops = { + .owner = THIS_MODULE, + .add_entry = idtg3_route_add_entry, + .get_entry = idtg3_route_get_entry, + .clr_table = idtg3_route_clr_table, + .em_init = idtg3_em_init, + .em_handle = idtg3_em_handler, +}; + +static int idtg3_probe(struct rio_dev *rdev, const struct rio_device_id *id) +{ + pr_debug("RIO: %s for %s\n", __func__, rio_name(rdev)); + + spin_lock(&rdev->rswitch->lock); + + if (rdev->rswitch->ops) { + spin_unlock(&rdev->rswitch->lock); + return -EINVAL; + } + + rdev->rswitch->ops = &idtg3_switch_ops; + + if (rdev->do_enum) { + /* Disable hierarchical routing support: Existing fabric + * enumeration/discovery process (see rio-scan.c) uses 8-bit + * flat destination ID routing only. + */ + rio_write_config_32(rdev, 0x5000 + RIO_BC_RT_CTL_CSR, 0); + } + + spin_unlock(&rdev->rswitch->lock); + + return 0; +} + +static void idtg3_remove(struct rio_dev *rdev) +{ + pr_debug("RIO: %s for %s\n", __func__, rio_name(rdev)); + spin_lock(&rdev->rswitch->lock); + if (rdev->rswitch->ops == &idtg3_switch_ops) + rdev->rswitch->ops = NULL; + spin_unlock(&rdev->rswitch->lock); +} + +/* + * Gen3 switches repeat sending PW messages until a corresponding event flag + * is cleared. Use shutdown notification to disable generation of port-write + * messages if their destination node is shut down. + */ +static void idtg3_shutdown(struct rio_dev *rdev) +{ + int i; + u32 rval; + u16 destid; + + /* Currently the enumerator node acts also as PW handler */ + if (!rdev->do_enum) + return; + + pr_debug("RIO: %s(%s)\n", __func__, rio_name(rdev)); + + rio_read_config_32(rdev, RIO_PW_ROUTE, &rval); + i = RIO_GET_PORT_NUM(rdev->swpinfo); + + /* Check port-write destination port */ + if (!((1 << i) & rval)) + return; + + /* Disable sending port-write event notifications if PW destID + * matches to one of the enumerator node + */ + rio_read_config_32(rdev, rdev->em_efptr + RIO_EM_PW_TGT_DEVID, &rval); + + if (rval & RIO_EM_PW_TGT_DEVID_DEV16) + destid = rval >> 16; + else + destid = ((rval & RIO_EM_PW_TGT_DEVID_D8) >> 16); + + if (rdev->net->hport->host_deviceid == destid) { + rio_write_config_32(rdev, + rdev->em_efptr + RIO_EM_PW_TX_CTRL, 0); + pr_debug("RIO: %s(%s) PW transmission disabled\n", + __func__, rio_name(rdev)); + } +} + +static struct rio_device_id idtg3_id_table[] = { + {RIO_DEVICE(RIO_DID_IDTRXS1632, RIO_VID_IDT)}, + {RIO_DEVICE(RIO_DID_IDTRXS2448, RIO_VID_IDT)}, + { 0, } /* terminate list */ +}; + +static struct rio_driver idtg3_driver = { + .name = "idt_gen3", + .id_table = idtg3_id_table, + .probe = idtg3_probe, + .remove = idtg3_remove, + .shutdown = idtg3_shutdown, +}; + +static int __init idtg3_init(void) +{ + return rio_register_driver(&idtg3_driver); +} + +static void __exit idtg3_exit(void) +{ + pr_debug("RIO: %s\n", __func__); + rio_unregister_driver(&idtg3_driver); + pr_debug("RIO: %s done\n", __func__); +} + +device_initcall(idtg3_init); +module_exit(idtg3_exit); + +MODULE_DESCRIPTION("IDT RXS Gen.3 Serial RapidIO switch family driver"); +MODULE_AUTHOR("Integrated Device Technology, Inc."); +MODULE_LICENSE("GPL"); diff --git a/include/linux/rio_ids.h b/include/linux/rio_ids.h index 2543bc163d54..334c576c151c 100644 --- a/include/linux/rio_ids.h +++ b/include/linux/rio_ids.h @@ -38,5 +38,7 @@ #define RIO_DID_IDTVPS1616 0x0377 #define RIO_DID_IDTSPS1616 0x0378 #define RIO_DID_TSI721 0x80ab +#define RIO_DID_IDTRXS1632 0x80e5 +#define RIO_DID_IDTRXS2448 0x80e6 #endif /* LINUX_RIO_IDS_H */ -- cgit v1.2.3-55-g7522 From 3bd080e4d8f2351ee3e143f0ec9307cc95ae6639 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 2 Aug 2016 14:07:32 -0700 Subject: ipc: delete "nr_ipc_ns" Write-only variable. Link: http://lkml.kernel.org/r/20160708214356.GA6785@p183.telecom.by Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ipc_namespace.h | 2 -- ipc/msgutil.c | 2 -- ipc/namespace.c | 2 -- 3 files changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index 1eee6bcfcf76..d10e54f03c09 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -63,8 +63,6 @@ struct ipc_namespace { }; extern struct ipc_namespace init_ipc_ns; -extern atomic_t nr_ipc_ns; - extern spinlock_t mq_lock; #ifdef CONFIG_SYSVIPC diff --git a/ipc/msgutil.c b/ipc/msgutil.c index ed81aafd2392..a521999de4f1 100644 --- a/ipc/msgutil.c +++ b/ipc/msgutil.c @@ -37,8 +37,6 @@ struct ipc_namespace init_ipc_ns = { #endif }; -atomic_t nr_ipc_ns = ATOMIC_INIT(1); - struct msg_msgseg { struct msg_msgseg *next; /* the next part of the message follows immediately */ diff --git a/ipc/namespace.c b/ipc/namespace.c index 04cb07eb81f1..d87e6baa1323 100644 --- a/ipc/namespace.c +++ b/ipc/namespace.c @@ -43,7 +43,6 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns, kfree(ns); return ERR_PTR(err); } - atomic_inc(&nr_ipc_ns); sem_init_ns(ns); msg_init_ns(ns); @@ -96,7 +95,6 @@ static void free_ipc_ns(struct ipc_namespace *ns) sem_exit_ns(ns); msg_exit_ns(ns); shm_exit_ns(ns); - atomic_dec(&nr_ipc_ns); put_user_ns(ns->user_ns); ns_free_inum(&ns->ns); -- cgit v1.2.3-55-g7522