From 33398cf2f360c5ce24c8a22436d52a06ad4e5eb5 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Tue, 8 Sep 2015 15:01:02 -0700 Subject: memcg: export struct mem_cgroup mem_cgroup structure is defined in mm/memcontrol.c currently which means that the code outside of this file has to use external API even for trivial access stuff. This patch exports mm_struct with its dependencies and makes some of the exported functions inlines. This even helps to reduce the code size a bit (make defconfig + CONFIG_MEMCG=y) text data bss dec hex filename 12355346 1823792 1089536 15268674 e8fb42 vmlinux.before 12354970 1823792 1089536 15268298 e8f9ca vmlinux.after This is not much (370B) but better than nothing. We also save a function call in some hot paths like callers of mem_cgroup_count_vm_event which is used for accounting. The patch doesn't introduce any functional changes. [vdavykov@parallels.com: inline memcg_kmem_is_active] [vdavykov@parallels.com: do not expose type outside of CONFIG_MEMCG] [akpm@linux-foundation.org: memcontrol.h needs eventfd.h for eventfd_ctx] [akpm@linux-foundation.org: export mem_cgroup_from_task() to modules] Signed-off-by: Michal Hocko Reviewed-by: Vladimir Davydov Suggested-by: Johannes Weiner Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 315 +------------------------------------------------------- 1 file changed, 2 insertions(+), 313 deletions(-) (limited to 'mm/memcontrol.c') diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 9871f13fc35b..6935f77589e7 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -111,56 +111,10 @@ static const char * const mem_cgroup_lru_names[] = { "unevictable", }; -/* - * Per memcg event counter is incremented at every pagein/pageout. With THP, - * it will be incremated by the number of pages. This counter is used for - * for trigger some periodic events. This is straightforward and better - * than using jiffies etc. to handle periodic memcg event. - */ -enum mem_cgroup_events_target { - MEM_CGROUP_TARGET_THRESH, - MEM_CGROUP_TARGET_SOFTLIMIT, - MEM_CGROUP_TARGET_NUMAINFO, - MEM_CGROUP_NTARGETS, -}; #define THRESHOLDS_EVENTS_TARGET 128 #define SOFTLIMIT_EVENTS_TARGET 1024 #define NUMAINFO_EVENTS_TARGET 1024 -struct mem_cgroup_stat_cpu { - long count[MEM_CGROUP_STAT_NSTATS]; - unsigned long events[MEMCG_NR_EVENTS]; - unsigned long nr_page_events; - unsigned long targets[MEM_CGROUP_NTARGETS]; -}; - -struct reclaim_iter { - struct mem_cgroup *position; - /* scan generation, increased every round-trip */ - unsigned int generation; -}; - -/* - * per-zone information in memory controller. - */ -struct mem_cgroup_per_zone { - struct lruvec lruvec; - unsigned long lru_size[NR_LRU_LISTS]; - - struct reclaim_iter iter[DEF_PRIORITY + 1]; - - struct rb_node tree_node; /* RB tree node */ - unsigned long usage_in_excess;/* Set to the value by which */ - /* the soft limit is exceeded*/ - bool on_tree; - struct mem_cgroup *memcg; /* Back pointer, we cannot */ - /* use container_of */ -}; - -struct mem_cgroup_per_node { - struct mem_cgroup_per_zone zoneinfo[MAX_NR_ZONES]; -}; - /* * Cgroups above their limits are maintained in a RB-Tree, independent of * their hierarchy representation @@ -181,32 +135,6 @@ struct mem_cgroup_tree { static struct mem_cgroup_tree soft_limit_tree __read_mostly; -struct mem_cgroup_threshold { - struct eventfd_ctx *eventfd; - unsigned long threshold; -}; - -/* For threshold */ -struct mem_cgroup_threshold_ary { - /* An array index points to threshold just below or equal to usage. */ - int current_threshold; - /* Size of entries[] */ - unsigned int size; - /* Array of thresholds */ - struct mem_cgroup_threshold entries[0]; -}; - -struct mem_cgroup_thresholds { - /* Primary thresholds array */ - struct mem_cgroup_threshold_ary *primary; - /* - * Spare threshold array. - * This is needed to make mem_cgroup_unregister_event() "never fail". - * It must be able to store at least primary->size - 1 entries. - */ - struct mem_cgroup_threshold_ary *spare; -}; - /* for OOM */ struct mem_cgroup_eventfd_list { struct list_head list; @@ -256,113 +184,6 @@ struct mem_cgroup_event { static void mem_cgroup_threshold(struct mem_cgroup *memcg); static void mem_cgroup_oom_notify(struct mem_cgroup *memcg); -/* - * The memory controller data structure. The memory controller controls both - * page cache and RSS per cgroup. We would eventually like to provide - * statistics based on the statistics developed by Rik Van Riel for clock-pro, - * to help the administrator determine what knobs to tune. - */ -struct mem_cgroup { - struct cgroup_subsys_state css; - - /* Accounted resources */ - struct page_counter memory; - struct page_counter memsw; - struct page_counter kmem; - - /* Normal memory consumption range */ - unsigned long low; - unsigned long high; - - unsigned long soft_limit; - - /* vmpressure notifications */ - struct vmpressure vmpressure; - - /* css_online() has been completed */ - int initialized; - - /* - * Should the accounting and control be hierarchical, per subtree? - */ - bool use_hierarchy; - - /* protected by memcg_oom_lock */ - bool oom_lock; - int under_oom; - - int swappiness; - /* OOM-Killer disable */ - int oom_kill_disable; - - /* protect arrays of thresholds */ - struct mutex thresholds_lock; - - /* thresholds for memory usage. RCU-protected */ - struct mem_cgroup_thresholds thresholds; - - /* thresholds for mem+swap usage. RCU-protected */ - struct mem_cgroup_thresholds memsw_thresholds; - - /* For oom notifier event fd */ - struct list_head oom_notify; - - /* - * Should we move charges of a task when a task is moved into this - * mem_cgroup ? And what type of charges should we move ? - */ - unsigned long move_charge_at_immigrate; - /* - * set > 0 if pages under this cgroup are moving to other cgroup. - */ - atomic_t moving_account; - /* taken only while moving_account > 0 */ - spinlock_t move_lock; - struct task_struct *move_lock_task; - unsigned long move_lock_flags; - /* - * percpu counter. - */ - struct mem_cgroup_stat_cpu __percpu *stat; - spinlock_t pcp_counter_lock; - -#if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_INET) - struct cg_proto tcp_mem; -#endif -#if defined(CONFIG_MEMCG_KMEM) - /* Index in the kmem_cache->memcg_params.memcg_caches array */ - int kmemcg_id; - bool kmem_acct_activated; - bool kmem_acct_active; -#endif - - int last_scanned_node; -#if MAX_NUMNODES > 1 - nodemask_t scan_nodes; - atomic_t numainfo_events; - atomic_t numainfo_updating; -#endif - -#ifdef CONFIG_CGROUP_WRITEBACK - struct list_head cgwb_list; - struct wb_domain cgwb_domain; -#endif - - /* List of events which userspace want to receive */ - struct list_head event_list; - spinlock_t event_list_lock; - - struct mem_cgroup_per_node *nodeinfo[0]; - /* WARNING: nodeinfo must be the last member here */ -}; - -#ifdef CONFIG_MEMCG_KMEM -bool memcg_kmem_is_active(struct mem_cgroup *memcg) -{ - return memcg->kmem_acct_active; -} -#endif - /* Stuffs for move charges at task migration. */ /* * Types of charges to be moved. @@ -423,11 +244,6 @@ enum res_type { */ static DEFINE_MUTEX(memcg_create_mutex); -struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *s) -{ - return s ? container_of(s, struct mem_cgroup, css) : NULL; -} - /* Some nice accessors for the vmpressure. */ struct vmpressure *memcg_to_vmpressure(struct mem_cgroup *memcg) { @@ -593,11 +409,6 @@ mem_cgroup_zone_zoneinfo(struct mem_cgroup *memcg, struct zone *zone) return &memcg->nodeinfo[nid]->zoneinfo[zid]; } -struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *memcg) -{ - return &memcg->css; -} - /** * mem_cgroup_css_from_page - css of the memcg associated with a page * @page: page of interest @@ -876,14 +687,6 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg, __this_cpu_add(memcg->stat->nr_page_events, nr_pages); } -unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru) -{ - struct mem_cgroup_per_zone *mz; - - mz = container_of(lruvec, struct mem_cgroup_per_zone, lruvec); - return mz->lru_size[lru]; -} - static unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg, int nid, unsigned int lru_mask) @@ -986,6 +789,7 @@ struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p) return mem_cgroup_from_css(task_css(p, memory_cgrp_id)); } +EXPORT_SYMBOL(mem_cgroup_from_task); static struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm) { @@ -1031,7 +835,7 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root, struct mem_cgroup *prev, struct mem_cgroup_reclaim_cookie *reclaim) { - struct reclaim_iter *uninitialized_var(iter); + struct mem_cgroup_reclaim_iter *uninitialized_var(iter); struct cgroup_subsys_state *css = NULL; struct mem_cgroup *memcg = NULL; struct mem_cgroup *pos = NULL; @@ -1173,30 +977,6 @@ void mem_cgroup_iter_break(struct mem_cgroup *root, iter != NULL; \ iter = mem_cgroup_iter(NULL, iter, NULL)) -void __mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx) -{ - struct mem_cgroup *memcg; - - rcu_read_lock(); - memcg = mem_cgroup_from_task(rcu_dereference(mm->owner)); - if (unlikely(!memcg)) - goto out; - - switch (idx) { - case PGFAULT: - this_cpu_inc(memcg->stat->events[MEM_CGROUP_EVENTS_PGFAULT]); - break; - case PGMAJFAULT: - this_cpu_inc(memcg->stat->events[MEM_CGROUP_EVENTS_PGMAJFAULT]); - break; - default: - BUG(); - } -out: - rcu_read_unlock(); -} -EXPORT_SYMBOL(__mem_cgroup_count_vm_event); - /** * mem_cgroup_zone_lruvec - get the lru list vector for a zone and memcg * @zone: zone of the wanted lruvec @@ -1295,15 +1075,6 @@ void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru, VM_BUG_ON((long)(*lru_size) < 0); } -bool mem_cgroup_is_descendant(struct mem_cgroup *memcg, struct mem_cgroup *root) -{ - if (root == memcg) - return true; - if (!root->use_hierarchy) - return false; - return cgroup_is_descendant(memcg->css.cgroup, root->css.cgroup); -} - bool task_in_mem_cgroup(struct task_struct *task, struct mem_cgroup *memcg) { struct mem_cgroup *task_memcg; @@ -1330,39 +1101,6 @@ bool task_in_mem_cgroup(struct task_struct *task, struct mem_cgroup *memcg) return ret; } -int mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec) -{ - unsigned long inactive_ratio; - unsigned long inactive; - unsigned long active; - unsigned long gb; - - inactive = mem_cgroup_get_lru_size(lruvec, LRU_INACTIVE_ANON); - active = mem_cgroup_get_lru_size(lruvec, LRU_ACTIVE_ANON); - - gb = (inactive + active) >> (30 - PAGE_SHIFT); - if (gb) - inactive_ratio = int_sqrt(10 * gb); - else - inactive_ratio = 1; - - return inactive * inactive_ratio < active; -} - -bool mem_cgroup_lruvec_online(struct lruvec *lruvec) -{ - struct mem_cgroup_per_zone *mz; - struct mem_cgroup *memcg; - - if (mem_cgroup_disabled()) - return true; - - mz = container_of(lruvec, struct mem_cgroup_per_zone, lruvec); - memcg = mz->memcg; - - return !!(memcg->css.flags & CSS_ONLINE); -} - #define mem_cgroup_from_counter(counter, member) \ container_of(counter, struct mem_cgroup, member) @@ -1394,15 +1132,6 @@ static unsigned long mem_cgroup_margin(struct mem_cgroup *memcg) return margin; } -int mem_cgroup_swappiness(struct mem_cgroup *memcg) -{ - /* root ? */ - if (mem_cgroup_disabled() || !memcg->css.parent) - return vm_swappiness; - - return memcg->swappiness; -} - /* * A routine for checking "mem" is under move_account() or not. * @@ -2067,23 +1796,6 @@ void mem_cgroup_end_page_stat(struct mem_cgroup *memcg) } EXPORT_SYMBOL(mem_cgroup_end_page_stat); -/** - * mem_cgroup_update_page_stat - update page state statistics - * @memcg: memcg to account against - * @idx: page state item to account - * @val: number of pages (positive or negative) - * - * See mem_cgroup_begin_page_stat() for locking requirements. - */ -void mem_cgroup_update_page_stat(struct mem_cgroup *memcg, - enum mem_cgroup_stat_index idx, int val) -{ - VM_BUG_ON(!rcu_read_lock_held()); - - if (memcg) - this_cpu_add(memcg->stat->count[idx], val); -} - /* * size of first charge trial. "32" comes from vmscan.c's magic value. * TODO: maybe necessary to use big numbers in big irons. @@ -2509,16 +2221,6 @@ void memcg_uncharge_kmem(struct mem_cgroup *memcg, unsigned long nr_pages) css_put_many(&memcg->css, nr_pages); } -/* - * helper for acessing a memcg's index. It will be used as an index in the - * child cache array in kmem_cache, and also to derive its name. This function - * will return -1 when this is not a kmem-limited memcg. - */ -int memcg_cache_id(struct mem_cgroup *memcg) -{ - return memcg ? memcg->kmemcg_id : -1; -} - static int memcg_alloc_cache_id(void) { int id, size; @@ -5525,19 +5227,6 @@ struct cgroup_subsys memory_cgrp_subsys = { .early_init = 0, }; -/** - * mem_cgroup_events - count memory events against a cgroup - * @memcg: the memory cgroup - * @idx: the event index - * @nr: the number of events to account for - */ -void mem_cgroup_events(struct mem_cgroup *memcg, - enum mem_cgroup_events_index idx, - unsigned int nr) -{ - this_cpu_add(memcg->stat->events[idx], nr); -} - /** * mem_cgroup_low - check if memory consumption is below the normal range * @root: the highest ancestor to consider -- cgit v1.2.3-55-g7522