summaryrefslogtreecommitdiffstats
path: root/mm/sparse.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/sparse.c')
-rw-r--r--mm/sparse.c355
1 files changed, 224 insertions, 131 deletions
diff --git a/mm/sparse.c b/mm/sparse.c
index fd13166949b5..72f010d9bff5 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -83,8 +83,15 @@ static int __meminit sparse_index_init(unsigned long section_nr, int nid)
unsigned long root = SECTION_NR_TO_ROOT(section_nr);
struct mem_section *section;
+ /*
+ * An existing section is possible in the sub-section hotplug
+ * case. First hot-add instantiates, follow-on hot-add reuses
+ * the existing section.
+ *
+ * The mem_hotplug_lock resolves the apparent race below.
+ */
if (mem_section[root])
- return -EEXIST;
+ return 0;
section = sparse_index_alloc(nid);
if (!section)
@@ -102,7 +109,7 @@ static inline int sparse_index_init(unsigned long section_nr, int nid)
#endif
#ifdef CONFIG_SPARSEMEM_EXTREME
-int __section_nr(struct mem_section* ms)
+unsigned long __section_nr(struct mem_section *ms)
{
unsigned long root_nr;
struct mem_section *root = NULL;
@@ -121,9 +128,9 @@ int __section_nr(struct mem_section* ms)
return (root_nr * SECTIONS_PER_ROOT) + (ms - root);
}
#else
-int __section_nr(struct mem_section* ms)
+unsigned long __section_nr(struct mem_section *ms)
{
- return (int)(ms - mem_section[0]);
+ return (unsigned long)(ms - mem_section[0]);
}
#endif
@@ -178,10 +185,10 @@ void __meminit mminit_validate_memmodel_limits(unsigned long *start_pfn,
* Keeping track of this gives us an easy way to break out of
* those loops early.
*/
-int __highest_present_section_nr;
+unsigned long __highest_present_section_nr;
static void section_mark_present(struct mem_section *ms)
{
- int section_nr = __section_nr(ms);
+ unsigned long section_nr = __section_nr(ms);
if (section_nr > __highest_present_section_nr)
__highest_present_section_nr = section_nr;
@@ -189,7 +196,7 @@ static void section_mark_present(struct mem_section *ms)
ms->section_mem_map |= SECTION_MARKED_PRESENT;
}
-static inline int next_present_section_nr(int section_nr)
+static inline unsigned long next_present_section_nr(unsigned long section_nr)
{
do {
section_nr++;
@@ -210,6 +217,41 @@ static inline unsigned long first_present_section_nr(void)
return next_present_section_nr(-1);
}
+void subsection_mask_set(unsigned long *map, unsigned long pfn,
+ unsigned long nr_pages)
+{
+ int idx = subsection_map_index(pfn);
+ int end = subsection_map_index(pfn + nr_pages - 1);
+
+ bitmap_set(map, idx, end - idx + 1);
+}
+
+void __init subsection_map_init(unsigned long pfn, unsigned long nr_pages)
+{
+ int end_sec = pfn_to_section_nr(pfn + nr_pages - 1);
+ unsigned long nr, start_sec = pfn_to_section_nr(pfn);
+
+ if (!nr_pages)
+ return;
+
+ for (nr = start_sec; nr <= end_sec; nr++) {
+ struct mem_section *ms;
+ unsigned long pfns;
+
+ pfns = min(nr_pages, PAGES_PER_SECTION
+ - (pfn & ~PAGE_SECTION_MASK));
+ ms = __nr_to_section(nr);
+ subsection_mask_set(ms->usage->subsection_map, pfn, pfns);
+
+ pr_debug("%s: sec: %lu pfns: %lu set(%d, %d)\n", __func__, nr,
+ pfns, subsection_map_index(pfn),
+ subsection_map_index(pfn + pfns - 1));
+
+ pfn += pfns;
+ nr_pages -= pfns;
+ }
+}
+
/* Record a memory area against a node. */
void __init memory_present(int nid, unsigned long start, unsigned long end)
{
@@ -288,33 +330,31 @@ struct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pn
static void __meminit sparse_init_one_section(struct mem_section *ms,
unsigned long pnum, struct page *mem_map,
- unsigned long *pageblock_bitmap)
+ struct mem_section_usage *usage, unsigned long flags)
{
ms->section_mem_map &= ~SECTION_MAP_MASK;
- ms->section_mem_map |= sparse_encode_mem_map(mem_map, pnum) |
- SECTION_HAS_MEM_MAP;
- ms->pageblock_flags = pageblock_bitmap;
+ ms->section_mem_map |= sparse_encode_mem_map(mem_map, pnum)
+ | SECTION_HAS_MEM_MAP | flags;
+ ms->usage = usage;
}
-unsigned long usemap_size(void)
+static unsigned long usemap_size(void)
{
return BITS_TO_LONGS(SECTION_BLOCKFLAGS_BITS) * sizeof(unsigned long);
}
-#ifdef CONFIG_MEMORY_HOTPLUG
-static unsigned long *__kmalloc_section_usemap(void)
+size_t mem_section_usage_size(void)
{
- return kmalloc(usemap_size(), GFP_KERNEL);
+ return sizeof(struct mem_section_usage) + usemap_size();
}
-#endif /* CONFIG_MEMORY_HOTPLUG */
#ifdef CONFIG_MEMORY_HOTREMOVE
-static unsigned long * __init
+static struct mem_section_usage * __init
sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
unsigned long size)
{
+ struct mem_section_usage *usage;
unsigned long goal, limit;
- unsigned long *p;
int nid;
/*
* A page may contain usemaps for other sections preventing the
@@ -330,15 +370,16 @@ sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
limit = goal + (1UL << PA_SECTION_SHIFT);
nid = early_pfn_to_nid(goal >> PAGE_SHIFT);
again:
- p = memblock_alloc_try_nid(size, SMP_CACHE_BYTES, goal, limit, nid);
- if (!p && limit) {
+ usage = memblock_alloc_try_nid(size, SMP_CACHE_BYTES, goal, limit, nid);
+ if (!usage && limit) {
limit = 0;
goto again;
}
- return p;
+ return usage;
}
-static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
+static void __init check_usemap_section_nr(int nid,
+ struct mem_section_usage *usage)
{
unsigned long usemap_snr, pgdat_snr;
static unsigned long old_usemap_snr;
@@ -352,7 +393,7 @@ static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
old_pgdat_snr = NR_MEM_SECTIONS;
}
- usemap_snr = pfn_to_section_nr(__pa(usemap) >> PAGE_SHIFT);
+ usemap_snr = pfn_to_section_nr(__pa(usage) >> PAGE_SHIFT);
pgdat_snr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT);
if (usemap_snr == pgdat_snr)
return;
@@ -380,14 +421,15 @@ static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
usemap_snr, pgdat_snr, nid);
}
#else
-static unsigned long * __init
+static struct mem_section_usage * __init
sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
unsigned long size)
{
return memblock_alloc_node(size, SMP_CACHE_BYTES, pgdat->node_id);
}
-static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
+static void __init check_usemap_section_nr(int nid,
+ struct mem_section_usage *usage)
{
}
#endif /* CONFIG_MEMORY_HOTREMOVE */
@@ -404,8 +446,8 @@ static unsigned long __init section_map_size(void)
return PAGE_ALIGN(sizeof(struct page) * PAGES_PER_SECTION);
}
-struct page __init *sparse_mem_map_populate(unsigned long pnum, int nid,
- struct vmem_altmap *altmap)
+struct page __init *__populate_section_memmap(unsigned long pfn,
+ unsigned long nr_pages, int nid, struct vmem_altmap *altmap)
{
unsigned long size = section_map_size();
struct page *map = sparse_buffer_alloc(size);
@@ -474,32 +516,35 @@ static void __init sparse_init_nid(int nid, unsigned long pnum_begin,
unsigned long pnum_end,
unsigned long map_count)
{
- unsigned long pnum, usemap_longs, *usemap;
+ struct mem_section_usage *usage;
+ unsigned long pnum;
struct page *map;
- usemap_longs = BITS_TO_LONGS(SECTION_BLOCKFLAGS_BITS);
- usemap = sparse_early_usemaps_alloc_pgdat_section(NODE_DATA(nid),
- usemap_size() *
- map_count);
- if (!usemap) {
+ usage = sparse_early_usemaps_alloc_pgdat_section(NODE_DATA(nid),
+ mem_section_usage_size() * map_count);
+ if (!usage) {
pr_err("%s: node[%d] usemap allocation failed", __func__, nid);
goto failed;
}
sparse_buffer_init(map_count * section_map_size(), nid);
for_each_present_section_nr(pnum_begin, pnum) {
+ unsigned long pfn = section_nr_to_pfn(pnum);
+
if (pnum >= pnum_end)
break;
- map = sparse_mem_map_populate(pnum, nid, NULL);
+ map = __populate_section_memmap(pfn, PAGES_PER_SECTION,
+ nid, NULL);
if (!map) {
pr_err("%s: node[%d] memory map backing failed. Some memory will not be available.",
__func__, nid);
pnum_begin = pnum;
goto failed;
}
- check_usemap_section_nr(nid, usemap);
- sparse_init_one_section(__nr_to_section(pnum), pnum, map, usemap);
- usemap += usemap_longs;
+ check_usemap_section_nr(nid, usage);
+ sparse_init_one_section(__nr_to_section(pnum), pnum, map, usage,
+ SECTION_IS_EARLY);
+ usage = (void *) usage + mem_section_usage_size();
}
sparse_buffer_fini();
return;
@@ -590,21 +635,20 @@ void offline_mem_sections(unsigned long start_pfn, unsigned long end_pfn)
#endif
#ifdef CONFIG_SPARSEMEM_VMEMMAP
-static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid,
- struct vmem_altmap *altmap)
+static struct page *populate_section_memmap(unsigned long pfn,
+ unsigned long nr_pages, int nid, struct vmem_altmap *altmap)
{
- /* This will make the necessary allocations eventually. */
- return sparse_mem_map_populate(pnum, nid, altmap);
+ return __populate_section_memmap(pfn, nr_pages, nid, altmap);
}
-static void __kfree_section_memmap(struct page *memmap,
+
+static void depopulate_section_memmap(unsigned long pfn, unsigned long nr_pages,
struct vmem_altmap *altmap)
{
- unsigned long start = (unsigned long)memmap;
- unsigned long end = (unsigned long)(memmap + PAGES_PER_SECTION);
+ unsigned long start = (unsigned long) pfn_to_page(pfn);
+ unsigned long end = start + nr_pages * sizeof(struct page);
vmemmap_free(start, end, altmap);
}
-#ifdef CONFIG_MEMORY_HOTREMOVE
static void free_map_bootmem(struct page *memmap)
{
unsigned long start = (unsigned long)memmap;
@@ -612,9 +656,9 @@ static void free_map_bootmem(struct page *memmap)
vmemmap_free(start, end, NULL);
}
-#endif /* CONFIG_MEMORY_HOTREMOVE */
#else
-static struct page *__kmalloc_section_memmap(void)
+struct page *populate_section_memmap(unsigned long pfn,
+ unsigned long nr_pages, int nid, struct vmem_altmap *altmap)
{
struct page *page, *ret;
unsigned long memmap_size = sizeof(struct page) * PAGES_PER_SECTION;
@@ -635,15 +679,11 @@ got_map_ptr:
return ret;
}
-static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid,
+static void depopulate_section_memmap(unsigned long pfn, unsigned long nr_pages,
struct vmem_altmap *altmap)
{
- return __kmalloc_section_memmap();
-}
+ struct page *memmap = pfn_to_page(pfn);
-static void __kfree_section_memmap(struct page *memmap,
- struct vmem_altmap *altmap)
-{
if (is_vmalloc_addr(memmap))
vfree(memmap);
else
@@ -651,7 +691,6 @@ static void __kfree_section_memmap(struct page *memmap,
get_order(sizeof(struct page) * PAGES_PER_SECTION));
}
-#ifdef CONFIG_MEMORY_HOTREMOVE
static void free_map_bootmem(struct page *memmap)
{
unsigned long maps_section_nr, removing_section_nr, i;
@@ -681,13 +720,122 @@ static void free_map_bootmem(struct page *memmap)
put_page_bootmem(page);
}
}
-#endif /* CONFIG_MEMORY_HOTREMOVE */
#endif /* CONFIG_SPARSEMEM_VMEMMAP */
+static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
+ struct vmem_altmap *altmap)
+{
+ DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 };
+ DECLARE_BITMAP(tmp, SUBSECTIONS_PER_SECTION) = { 0 };
+ struct mem_section *ms = __pfn_to_section(pfn);
+ bool section_is_early = early_section(ms);
+ struct page *memmap = NULL;
+ unsigned long *subsection_map = ms->usage
+ ? &ms->usage->subsection_map[0] : NULL;
+
+ subsection_mask_set(map, pfn, nr_pages);
+ if (subsection_map)
+ bitmap_and(tmp, map, subsection_map, SUBSECTIONS_PER_SECTION);
+
+ if (WARN(!subsection_map || !bitmap_equal(tmp, map, SUBSECTIONS_PER_SECTION),
+ "section already deactivated (%#lx + %ld)\n",
+ pfn, nr_pages))
+ return;
+
+ /*
+ * There are 3 cases to handle across two configurations
+ * (SPARSEMEM_VMEMMAP={y,n}):
+ *
+ * 1/ deactivation of a partial hot-added section (only possible
+ * in the SPARSEMEM_VMEMMAP=y case).
+ * a/ section was present at memory init
+ * b/ section was hot-added post memory init
+ * 2/ deactivation of a complete hot-added section
+ * 3/ deactivation of a complete section from memory init
+ *
+ * For 1/, when subsection_map does not empty we will not be
+ * freeing the usage map, but still need to free the vmemmap
+ * range.
+ *
+ * For 2/ and 3/ the SPARSEMEM_VMEMMAP={y,n} cases are unified
+ */
+ bitmap_xor(subsection_map, map, subsection_map, SUBSECTIONS_PER_SECTION);
+ if (bitmap_empty(subsection_map, SUBSECTIONS_PER_SECTION)) {
+ unsigned long section_nr = pfn_to_section_nr(pfn);
+
+ if (!section_is_early) {
+ kfree(ms->usage);
+ ms->usage = NULL;
+ }
+ memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr);
+ ms->section_mem_map = sparse_encode_mem_map(NULL, section_nr);
+ }
+
+ if (section_is_early && memmap)
+ free_map_bootmem(memmap);
+ else
+ depopulate_section_memmap(pfn, nr_pages, altmap);
+}
+
+static struct page * __meminit section_activate(int nid, unsigned long pfn,
+ unsigned long nr_pages, struct vmem_altmap *altmap)
+{
+ DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 };
+ struct mem_section *ms = __pfn_to_section(pfn);
+ struct mem_section_usage *usage = NULL;
+ unsigned long *subsection_map;
+ struct page *memmap;
+ int rc = 0;
+
+ subsection_mask_set(map, pfn, nr_pages);
+
+ if (!ms->usage) {
+ usage = kzalloc(mem_section_usage_size(), GFP_KERNEL);
+ if (!usage)
+ return ERR_PTR(-ENOMEM);
+ ms->usage = usage;
+ }
+ subsection_map = &ms->usage->subsection_map[0];
+
+ if (bitmap_empty(map, SUBSECTIONS_PER_SECTION))
+ rc = -EINVAL;
+ else if (bitmap_intersects(map, subsection_map, SUBSECTIONS_PER_SECTION))
+ rc = -EEXIST;
+ else
+ bitmap_or(subsection_map, map, subsection_map,
+ SUBSECTIONS_PER_SECTION);
+
+ if (rc) {
+ if (usage)
+ ms->usage = NULL;
+ kfree(usage);
+ return ERR_PTR(rc);
+ }
+
+ /*
+ * The early init code does not consider partially populated
+ * initial sections, it simply assumes that memory will never be
+ * referenced. If we hot-add memory into such a section then we
+ * do not need to populate the memmap and can simply reuse what
+ * is already there.
+ */
+ if (nr_pages < PAGES_PER_SECTION && early_section(ms))
+ return pfn_to_page(pfn);
+
+ memmap = populate_section_memmap(pfn, nr_pages, nid, altmap);
+ if (!memmap) {
+ section_deactivate(pfn, nr_pages, altmap);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ return memmap;
+}
+
/**
- * sparse_add_one_section - add a memory section
+ * sparse_add_section - add a memory section, or populate an existing one
* @nid: The node to add section on
* @start_pfn: start pfn of the memory range
+ * @nr_pages: number of pfns to add in the section
* @altmap: device page map
*
* This is only intended for hotplug.
@@ -697,56 +845,40 @@ static void free_map_bootmem(struct page *memmap)
* * -EEXIST - Section has been present.
* * -ENOMEM - Out of memory.
*/
-int __meminit sparse_add_one_section(int nid, unsigned long start_pfn,
- struct vmem_altmap *altmap)
+int __meminit sparse_add_section(int nid, unsigned long start_pfn,
+ unsigned long nr_pages, struct vmem_altmap *altmap)
{
unsigned long section_nr = pfn_to_section_nr(start_pfn);
struct mem_section *ms;
struct page *memmap;
- unsigned long *usemap;
int ret;
- /*
- * no locking for this, because it does its own
- * plus, it does a kmalloc
- */
ret = sparse_index_init(section_nr, nid);
- if (ret < 0 && ret != -EEXIST)
+ if (ret < 0)
return ret;
- ret = 0;
- memmap = kmalloc_section_memmap(section_nr, nid, altmap);
- if (!memmap)
- return -ENOMEM;
- usemap = __kmalloc_section_usemap();
- if (!usemap) {
- __kfree_section_memmap(memmap, altmap);
- return -ENOMEM;
- }
- ms = __pfn_to_section(start_pfn);
- if (ms->section_mem_map & SECTION_MARKED_PRESENT) {
- ret = -EEXIST;
- goto out;
- }
+ memmap = section_activate(nid, start_pfn, nr_pages, altmap);
+ if (IS_ERR(memmap))
+ return PTR_ERR(memmap);
/*
* Poison uninitialized struct pages in order to catch invalid flags
* combinations.
*/
- page_init_poison(memmap, sizeof(struct page) * PAGES_PER_SECTION);
+ page_init_poison(pfn_to_page(start_pfn), sizeof(struct page) * nr_pages);
+ ms = __pfn_to_section(start_pfn);
+ set_section_nid(section_nr, nid);
section_mark_present(ms);
- sparse_init_one_section(ms, section_nr, memmap, usemap);
-out:
- if (ret < 0) {
- kfree(usemap);
- __kfree_section_memmap(memmap, altmap);
- }
- return ret;
+ /* Align memmap to section boundary in the subsection case */
+ if (section_nr_to_pfn(section_nr) != start_pfn)
+ memmap = pfn_to_kaddr(section_nr_to_pfn(section_nr));
+ sparse_init_one_section(ms, section_nr, memmap, ms->usage, 0);
+
+ return 0;
}
-#ifdef CONFIG_MEMORY_HOTREMOVE
#ifdef CONFIG_MEMORY_FAILURE
static void clear_hwpoisoned_pages(struct page *memmap, int nr_pages)
{
@@ -777,51 +909,12 @@ static inline void clear_hwpoisoned_pages(struct page *memmap, int nr_pages)
}
#endif
-static void free_section_usemap(struct page *memmap, unsigned long *usemap,
+void sparse_remove_section(struct mem_section *ms, unsigned long pfn,
+ unsigned long nr_pages, unsigned long map_offset,
struct vmem_altmap *altmap)
{
- struct page *usemap_page;
-
- if (!usemap)
- return;
-
- usemap_page = virt_to_page(usemap);
- /*
- * Check to see if allocation came from hot-plug-add
- */
- if (PageSlab(usemap_page) || PageCompound(usemap_page)) {
- kfree(usemap);
- if (memmap)
- __kfree_section_memmap(memmap, altmap);
- return;
- }
-
- /*
- * The usemap came from bootmem. This is packed with other usemaps
- * on the section which has pgdat at boot time. Just keep it as is now.
- */
-
- if (memmap)
- free_map_bootmem(memmap);
-}
-
-void sparse_remove_one_section(struct zone *zone, struct mem_section *ms,
- unsigned long map_offset, struct vmem_altmap *altmap)
-{
- struct page *memmap = NULL;
- unsigned long *usemap = NULL;
-
- if (ms->section_mem_map) {
- usemap = ms->pageblock_flags;
- memmap = sparse_decode_mem_map(ms->section_mem_map,
- __section_nr(ms));
- ms->section_mem_map = 0;
- ms->pageblock_flags = NULL;
- }
-
- clear_hwpoisoned_pages(memmap + map_offset,
- PAGES_PER_SECTION - map_offset);
- free_section_usemap(memmap, usemap, altmap);
+ clear_hwpoisoned_pages(pfn_to_page(pfn) + map_offset,
+ nr_pages - map_offset);
+ section_deactivate(pfn, nr_pages, altmap);
}
-#endif /* CONFIG_MEMORY_HOTREMOVE */
#endif /* CONFIG_MEMORY_HOTPLUG */