From 33659ebbae262228eef4e0fe990f393d1f0ed941 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 7 Aug 2010 18:17:56 +0200 Subject: block: remove wrappers for request type/flags Remove all the trivial wrappers for the cmd_type and cmd_flags fields in struct requests. This allows much easier grepping for different request types instead of unwinding through macros. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/trace/events/block.h | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/block.h b/include/trace/events/block.h index d870a918559c..d8ce278515c3 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -25,8 +25,10 @@ DECLARE_EVENT_CLASS(block_rq_with_error, TP_fast_assign( __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0; - __entry->sector = blk_pc_request(rq) ? 0 : blk_rq_pos(rq); - __entry->nr_sector = blk_pc_request(rq) ? 0 : blk_rq_sectors(rq); + __entry->sector = (rq->cmd_type == REQ_TYPE_BLOCK_PC) ? + 0 : blk_rq_pos(rq); + __entry->nr_sector = (rq->cmd_type == REQ_TYPE_BLOCK_PC) ? + 0 : blk_rq_sectors(rq); __entry->errors = rq->errors; blk_fill_rwbs_rq(__entry->rwbs, rq); @@ -109,9 +111,12 @@ DECLARE_EVENT_CLASS(block_rq, TP_fast_assign( __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0; - __entry->sector = blk_pc_request(rq) ? 0 : blk_rq_pos(rq); - __entry->nr_sector = blk_pc_request(rq) ? 0 : blk_rq_sectors(rq); - __entry->bytes = blk_pc_request(rq) ? blk_rq_bytes(rq) : 0; + __entry->sector = (rq->cmd_type == REQ_TYPE_BLOCK_PC) ? + 0 : blk_rq_pos(rq); + __entry->nr_sector = (rq->cmd_type == REQ_TYPE_BLOCK_PC) ? + 0 : blk_rq_sectors(rq); + __entry->bytes = (rq->cmd_type == REQ_TYPE_BLOCK_PC) ? + blk_rq_bytes(rq) : 0; blk_fill_rwbs_rq(__entry->rwbs, rq); blk_dump_cmd(__get_str(cmd), rq); -- cgit v1.2.3-55-g7522 From 455b2864686d3591b3b2f39eb46290c95f76471f Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 7 Jul 2010 13:24:06 +1000 Subject: writeback: Initial tracing support Trace queue/sched/exec parts of the writeback loop. This provides insight into when and why flusher threads are scheduled to run. e.g a sync invocation leaves traces like: sync-[...]: writeback_queue: bdi 8:0: sb_dev 8:1 nr_pages=7712 sync_mode=0 kupdate=0 range_cyclic=0 background=0 flush-8:0-[...]: writeback_exec: bdi 8:0: sb_dev 8:1 nr_pages=7712 sync_mode=0 kupdate=0 range_cyclic=0 background=0 This also lays the foundation for adding more writeback tracing to provide deeper insight into the whole writeback path. The original tracing code is from Jens Axboe, though this version is a rewrite as a result of the code being traced changing significantly. Signed-off-by: Dave Chinner Signed-off-by: Jens Axboe --- fs/fs-writeback.c | 38 +++++++++++++---- include/trace/events/writeback.h | 91 ++++++++++++++++++++++++++++++++++++++++ mm/backing-dev.c | 3 ++ 3 files changed, 124 insertions(+), 8 deletions(-) create mode 100644 include/trace/events/writeback.h (limited to 'include/trace') diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index c8471b3ddccf..73acab4dc2b7 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -26,15 +26,9 @@ #include #include #include +#include #include "internal.h" -#define inode_to_bdi(inode) ((inode)->i_mapping->backing_dev_info) - -/* - * We don't actually have pdflush, but this one is exported though /proc... - */ -int nr_pdflush_threads; - /* * Passed into wb_writeback(), essentially a subset of writeback_control */ @@ -50,6 +44,21 @@ struct wb_writeback_work { struct completion *done; /* set if the caller waits */ }; +/* + * Include the creation of the trace points after defining the + * wb_writeback_work structure so that the definition remains local to this + * file. + */ +#define CREATE_TRACE_POINTS +#include + +#define inode_to_bdi(inode) ((inode)->i_mapping->backing_dev_info) + +/* + * We don't actually have pdflush, but this one is exported though /proc... + */ +int nr_pdflush_threads; + /** * writeback_in_progress - determine whether there is writeback in progress * @bdi: the device's backing_dev_info structure. @@ -65,6 +74,8 @@ int writeback_in_progress(struct backing_dev_info *bdi) static void bdi_queue_work(struct backing_dev_info *bdi, struct wb_writeback_work *work) { + trace_writeback_queue(bdi, work); + spin_lock(&bdi->wb_lock); list_add_tail(&work->list, &bdi->work_list); spin_unlock(&bdi->wb_lock); @@ -74,6 +85,7 @@ static void bdi_queue_work(struct backing_dev_info *bdi, * it gets created and wakes up, we'll run this work. */ if (unlikely(!bdi->wb.task)) { + trace_writeback_nothread(bdi, work); wake_up_process(default_backing_dev_info.wb.task); } else { struct bdi_writeback *wb = &bdi->wb; @@ -95,8 +107,10 @@ __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages, */ work = kzalloc(sizeof(*work), GFP_ATOMIC); if (!work) { - if (bdi->wb.task) + if (bdi->wb.task) { + trace_writeback_nowork(bdi); wake_up_process(bdi->wb.task); + } return; } @@ -751,6 +765,8 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait) if (force_wait) work->sync_mode = WB_SYNC_ALL; + trace_writeback_exec(bdi, work); + wrote += wb_writeback(wb, work); /* @@ -805,9 +821,13 @@ int bdi_writeback_thread(void *data) smp_mb__after_clear_bit(); wake_up_bit(&bdi->state, BDI_pending); + trace_writeback_thread_start(bdi); + while (!kthread_should_stop()) { pages_written = wb_do_writeback(wb, 0); + trace_writeback_pages_written(pages_written); + if (pages_written) last_active = jiffies; else if (wait_jiffies != -1UL) { @@ -845,6 +865,8 @@ int bdi_writeback_thread(void *data) */ if (!list_empty(&bdi->work_list)) wb_do_writeback(wb, 1); + + trace_writeback_thread_stop(bdi); return 0; } diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h new file mode 100644 index 000000000000..562fcae10d9d --- /dev/null +++ b/include/trace/events/writeback.h @@ -0,0 +1,91 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM writeback + +#if !defined(_TRACE_WRITEBACK_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_WRITEBACK_H + +#include +#include + +struct wb_writeback_work; + +DECLARE_EVENT_CLASS(writeback_work_class, + TP_PROTO(struct backing_dev_info *bdi, struct wb_writeback_work *work), + TP_ARGS(bdi, work), + TP_STRUCT__entry( + __array(char, name, 32) + __field(long, nr_pages) + __field(dev_t, sb_dev) + __field(int, sync_mode) + __field(int, for_kupdate) + __field(int, range_cyclic) + __field(int, for_background) + ), + TP_fast_assign( + strncpy(__entry->name, dev_name(bdi->dev), 32); + __entry->nr_pages = work->nr_pages; + __entry->sb_dev = work->sb ? work->sb->s_dev : 0; + __entry->sync_mode = work->sync_mode; + __entry->for_kupdate = work->for_kupdate; + __entry->range_cyclic = work->range_cyclic; + __entry->for_background = work->for_background; + ), + TP_printk("bdi %s: sb_dev %d:%d nr_pages=%ld sync_mode=%d " + "kupdate=%d range_cyclic=%d background=%d", + __entry->name, + MAJOR(__entry->sb_dev), MINOR(__entry->sb_dev), + __entry->nr_pages, + __entry->sync_mode, + __entry->for_kupdate, + __entry->range_cyclic, + __entry->for_background + ) +); +#define DEFINE_WRITEBACK_WORK_EVENT(name) \ +DEFINE_EVENT(writeback_work_class, name, \ + TP_PROTO(struct backing_dev_info *bdi, struct wb_writeback_work *work), \ + TP_ARGS(bdi, work)) +DEFINE_WRITEBACK_WORK_EVENT(writeback_nothread); +DEFINE_WRITEBACK_WORK_EVENT(writeback_queue); +DEFINE_WRITEBACK_WORK_EVENT(writeback_exec); + +TRACE_EVENT(writeback_pages_written, + TP_PROTO(long pages_written), + TP_ARGS(pages_written), + TP_STRUCT__entry( + __field(long, pages) + ), + TP_fast_assign( + __entry->pages = pages_written; + ), + TP_printk("%ld", __entry->pages) +); + +DECLARE_EVENT_CLASS(writeback_class, + TP_PROTO(struct backing_dev_info *bdi), + TP_ARGS(bdi), + TP_STRUCT__entry( + __array(char, name, 32) + ), + TP_fast_assign( + strncpy(__entry->name, dev_name(bdi->dev), 32); + ), + TP_printk("bdi %s", + __entry->name + ) +); +#define DEFINE_WRITEBACK_EVENT(name) \ +DEFINE_EVENT(writeback_class, name, \ + TP_PROTO(struct backing_dev_info *bdi), \ + TP_ARGS(bdi)) + +DEFINE_WRITEBACK_EVENT(writeback_nowork); +DEFINE_WRITEBACK_EVENT(writeback_bdi_register); +DEFINE_WRITEBACK_EVENT(writeback_bdi_unregister); +DEFINE_WRITEBACK_EVENT(writeback_thread_start); +DEFINE_WRITEBACK_EVENT(writeback_thread_stop); + +#endif /* _TRACE_WRITEBACK_H */ + +/* This part must be outside protection */ +#include diff --git a/mm/backing-dev.c b/mm/backing-dev.c index bceac647e4d1..ac78a3336181 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -10,6 +10,7 @@ #include #include #include +#include static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0); @@ -518,6 +519,7 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent, bdi_debug_register(bdi, dev_name(dev)); set_bit(BDI_registered, &bdi->state); + trace_writeback_bdi_register(bdi); exit: return ret; } @@ -578,6 +580,7 @@ static void bdi_prune_sb(struct backing_dev_info *bdi) void bdi_unregister(struct backing_dev_info *bdi) { if (bdi->dev) { + trace_writeback_bdi_unregister(bdi); bdi_prune_sb(bdi); if (!bdi_cap_flush_forker(bdi)) -- cgit v1.2.3-55-g7522 From 028c2dd184c097809986684f2f0627eea5529fea Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 7 Jul 2010 13:24:07 +1000 Subject: writeback: Add tracing to balance_dirty_pages Tracing high level background writeback events is good, but it doesn't give the entire picture. Add visibility into write throttling to catch IO dispatched by foreground throttling of processing dirtying lots of pages. Signed-off-by: Dave Chinner Signed-off-by: Jens Axboe --- fs/fs-writeback.c | 5 ++++ include/trace/events/writeback.h | 64 ++++++++++++++++++++++++++++++++++++++++ mm/page-writeback.c | 4 +++ 3 files changed, 73 insertions(+) (limited to 'include/trace') diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 73acab4dc2b7..bf10cbf379dd 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -656,10 +656,14 @@ static long wb_writeback(struct bdi_writeback *wb, wbc.more_io = 0; wbc.nr_to_write = MAX_WRITEBACK_PAGES; wbc.pages_skipped = 0; + + trace_wbc_writeback_start(&wbc, wb->bdi); if (work->sb) __writeback_inodes_sb(work->sb, wb, &wbc); else writeback_inodes_wb(wb, &wbc); + trace_wbc_writeback_written(&wbc, wb->bdi); + work->nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write; wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write; @@ -687,6 +691,7 @@ static long wb_writeback(struct bdi_writeback *wb, if (!list_empty(&wb->b_more_io)) { inode = list_entry(wb->b_more_io.prev, struct inode, i_list); + trace_wbc_writeback_wait(&wbc, wb->bdi); inode_wait_for_writeback(inode); } spin_unlock(&inode_lock); diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index 562fcae10d9d..0be26acae064 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -85,6 +85,70 @@ DEFINE_WRITEBACK_EVENT(writeback_bdi_unregister); DEFINE_WRITEBACK_EVENT(writeback_thread_start); DEFINE_WRITEBACK_EVENT(writeback_thread_stop); +DECLARE_EVENT_CLASS(wbc_class, + TP_PROTO(struct writeback_control *wbc, struct backing_dev_info *bdi), + TP_ARGS(wbc, bdi), + TP_STRUCT__entry( + __array(char, name, 32) + __field(long, nr_to_write) + __field(long, pages_skipped) + __field(int, sync_mode) + __field(int, nonblocking) + __field(int, encountered_congestion) + __field(int, for_kupdate) + __field(int, for_background) + __field(int, for_reclaim) + __field(int, range_cyclic) + __field(int, more_io) + __field(unsigned long, older_than_this) + __field(long, range_start) + __field(long, range_end) + ), + + TP_fast_assign( + strncpy(__entry->name, dev_name(bdi->dev), 32); + __entry->nr_to_write = wbc->nr_to_write; + __entry->pages_skipped = wbc->pages_skipped; + __entry->sync_mode = wbc->sync_mode; + __entry->for_kupdate = wbc->for_kupdate; + __entry->for_background = wbc->for_background; + __entry->for_reclaim = wbc->for_reclaim; + __entry->range_cyclic = wbc->range_cyclic; + __entry->more_io = wbc->more_io; + __entry->older_than_this = wbc->older_than_this ? + *wbc->older_than_this : 0; + __entry->range_start = (long)wbc->range_start; + __entry->range_end = (long)wbc->range_end; + ), + + TP_printk("bdi %s: towrt=%ld skip=%ld mode=%d kupd=%d " + "bgrd=%d reclm=%d cyclic=%d more=%d older=0x%lx " + "start=0x%lx end=0x%lx", + __entry->name, + __entry->nr_to_write, + __entry->pages_skipped, + __entry->sync_mode, + __entry->for_kupdate, + __entry->for_background, + __entry->for_reclaim, + __entry->range_cyclic, + __entry->more_io, + __entry->older_than_this, + __entry->range_start, + __entry->range_end) +) + +#define DEFINE_WBC_EVENT(name) \ +DEFINE_EVENT(wbc_class, name, \ + TP_PROTO(struct writeback_control *wbc, struct backing_dev_info *bdi), \ + TP_ARGS(wbc, bdi)) +DEFINE_WBC_EVENT(wbc_writeback_start); +DEFINE_WBC_EVENT(wbc_writeback_written); +DEFINE_WBC_EVENT(wbc_writeback_wait); +DEFINE_WBC_EVENT(wbc_balance_dirty_start); +DEFINE_WBC_EVENT(wbc_balance_dirty_written); +DEFINE_WBC_EVENT(wbc_balance_dirty_wait); + #endif /* _TRACE_WRITEBACK_H */ /* This part must be outside protection */ diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 37498ef61548..d556cd829af6 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -34,6 +34,7 @@ #include #include #include +#include /* * After a CPU has dirtied this many pages, balance_dirty_pages_ratelimited @@ -535,11 +536,13 @@ static void balance_dirty_pages(struct address_space *mapping, * threshold otherwise wait until the disk writes catch * up. */ + trace_wbc_balance_dirty_start(&wbc, bdi); if (bdi_nr_reclaimable > bdi_thresh) { writeback_inodes_wb(&bdi->wb, &wbc); pages_written += write_chunk - wbc.nr_to_write; get_dirty_limits(&background_thresh, &dirty_thresh, &bdi_thresh, bdi); + trace_wbc_balance_dirty_written(&wbc, bdi); } /* @@ -565,6 +568,7 @@ static void balance_dirty_pages(struct address_space *mapping, if (pages_written >= write_chunk) break; /* We've done our duty */ + trace_wbc_balance_dirty_wait(&wbc, bdi); __set_current_state(TASK_INTERRUPTIBLE); io_schedule_timeout(pause); -- cgit v1.2.3-55-g7522 From 9e094383b60066996fbc3b53891324e5d2ec858d Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 7 Jul 2010 13:24:08 +1000 Subject: writeback: Add tracing to write_cache_pages Add a trace event to the ->writepage loop in write_cache_pages to give visibility into how the ->writepage call is changing variables within the writeback control structure. Of most interest is how wbc->nr_to_write changes from call to call, especially with filesystems that write multiple pages in ->writepage. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/trace/events/writeback.h | 1 + mm/page-writeback.c | 1 + 2 files changed, 2 insertions(+) (limited to 'include/trace') diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index 0be26acae064..bde92e07fa4f 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -148,6 +148,7 @@ DEFINE_WBC_EVENT(wbc_writeback_wait); DEFINE_WBC_EVENT(wbc_balance_dirty_start); DEFINE_WBC_EVENT(wbc_balance_dirty_written); DEFINE_WBC_EVENT(wbc_balance_dirty_wait); +DEFINE_WBC_EVENT(wbc_writepage); #endif /* _TRACE_WRITEBACK_H */ diff --git a/mm/page-writeback.c b/mm/page-writeback.c index d556cd829af6..3d2111a22236 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -933,6 +933,7 @@ continue_unlock: if (!clear_page_dirty_for_io(page)) goto continue_unlock; + trace_wbc_writepage(wbc, mapping->backing_dev_info); ret = (*writepage)(page, wbc, data); if (unlikely(ret)) { if (ret == AOP_WRITEPAGE_ACTIVATE) { -- cgit v1.2.3-55-g7522 From 96dccab1d63cb35d3f5e75d2ef275fdbff4d5f3b Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 19 Jul 2010 16:49:17 -0700 Subject: writeback.h: needs linux/device.h include/trace/events/writeback.h uses dev_name(), so it needs to include linux/device.h. include/trace/events/writeback.h:12: error: implicit declaration of function 'dev_name' Signed-off-by: Randy Dunlap Signed-off-by: Jens Axboe --- include/trace/events/writeback.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/trace') diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index bde92e07fa4f..84ab72df953c 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -5,6 +5,7 @@ #define _TRACE_WRITEBACK_H #include +#include #include struct wb_writeback_work; -- cgit v1.2.3-55-g7522 From 603320239fb436f175c8b6bfa43d5023c47a6dc2 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Sun, 25 Jul 2010 14:29:24 +0300 Subject: writeback: add new tracepoints Add 2 new trace points to the periodic write-back wake up case, just like we do in the 'bdi_queue_work()' function. Namely, introduce: 1. trace_writeback_wake_thread(bdi) 2. trace_writeback_wake_forker_thread(bdi) The first event is triggered every time we wake up a bdi thread to start periodic background write-out. The second event is triggered only when the bdi thread does not exist and should be created by the forker thread. This patch was suggested by Dave Chinner and Christoph Hellwig. Signed-off-by: Artem Bityutskiy Signed-off-by: Jens Axboe --- include/trace/events/writeback.h | 2 ++ mm/backing-dev.c | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include/trace') diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index 84ab72df953c..f345f66ae9d1 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -81,6 +81,8 @@ DEFINE_EVENT(writeback_class, name, \ TP_ARGS(bdi)) DEFINE_WRITEBACK_EVENT(writeback_nowork); +DEFINE_WRITEBACK_EVENT(writeback_wake_thread); +DEFINE_WRITEBACK_EVENT(writeback_wake_forker_thread); DEFINE_WRITEBACK_EVENT(writeback_bdi_register); DEFINE_WRITEBACK_EVENT(writeback_bdi_unregister); DEFINE_WRITEBACK_EVENT(writeback_thread_start); diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 99890831d557..9008c4e207f6 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -310,6 +310,7 @@ static void wakeup_timer_fn(unsigned long data) spin_lock_bh(&bdi->wb_lock); if (bdi->wb.task) { + trace_writeback_wake_thread(bdi); wake_up_process(bdi->wb.task); } else { /* @@ -317,6 +318,7 @@ static void wakeup_timer_fn(unsigned long data) * In this case we have to wake-up the forker thread which * should create and run the bdi thread. */ + trace_writeback_wake_forker_thread(bdi); wake_up_process(default_backing_dev_info.wb.task); } spin_unlock_bh(&bdi->wb_lock); -- cgit v1.2.3-55-g7522