summaryrefslogtreecommitdiffstats
path: root/tools/perf/util/machine.c
diff options
context:
space:
mode:
authorJin Yao2017-08-07 15:05:15 +0200
committerArnaldo Carvalho de Melo2017-08-30 15:03:27 +0200
commitc4ee06251d4212a0d55e2371f2db464f6a1e0901 (patch)
tree03b82daa6519bfd7d999ccc288f8edb09b27e4e2 /tools/perf/util/machine.c
parentMerge tag 'perf-core-for-mingo-4.14-20170829' of git://git.kernel.org/pub/scm... (diff)
downloadkernel-qcow2-linux-c4ee06251d4212a0d55e2371f2db464f6a1e0901.tar.gz
kernel-qcow2-linux-c4ee06251d4212a0d55e2371f2db464f6a1e0901.tar.xz
kernel-qcow2-linux-c4ee06251d4212a0d55e2371f2db464f6a1e0901.zip
perf report: Calculate the average cycles of iterations
The branch history code has a loop detection function. With this, we can get the number of iterations by calculating the removed loops. While it would be nice for knowing the average cycles of iterations. This patch adds up the cycles in branch entries of removed loops and save the result to the next branch entry (e.g. branch entry A). Finally it will display the iteration number and average cycles at the "from" of branch entry A. For example: perf record -g -j any,save_type ./div perf report --branch-history --no-children --stdio --22.63%--main div.c:42 (RET CROSS_2M) compute_flag div.c:28 (cycles:2 iter:173115 avg_cycles:2) | --10.73%--compute_flag div.c:27 (RET CROSS_2M) rand rand.c:28 (cycles:1) rand rand.c:28 (RET CROSS_2M) __random random.c:298 (cycles:1) __random random.c:297 (COND_BWD CROSS_2M) __random random.c:295 (cycles:1) __random random.c:295 (COND_BWD CROSS_2M) __random random.c:295 (cycles:1) __random random.c:295 (RET CROSS_2M) Signed-off-by: Yao Jin <yao.jin@linux.intel.com> Reviewed-by: Andi Kleen <ak@linux.intel.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kan Liang <kan.liang@intel.com> Cc: Peter Zijlstra <peterz@infradead.org> Link: http://lkml.kernel.org/r/1502111115-18305-1-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools/perf/util/machine.c')
-rw-r--r--tools/perf/util/machine.c88
1 files changed, 53 insertions, 35 deletions
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 5c8eacaca4f4..9eaa95302c86 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -1675,6 +1675,11 @@ struct mem_info *sample__resolve_mem(struct perf_sample *sample,
return mi;
}
+struct iterations {
+ int nr_loop_iter;
+ u64 cycles;
+};
+
static int add_callchain_ip(struct thread *thread,
struct callchain_cursor *cursor,
struct symbol **parent,
@@ -1683,11 +1688,12 @@ static int add_callchain_ip(struct thread *thread,
u64 ip,
bool branch,
struct branch_flags *flags,
- int nr_loop_iter,
- int samples,
+ struct iterations *iter,
u64 branch_from)
{
struct addr_location al;
+ int nr_loop_iter = 0;
+ u64 iter_cycles = 0;
al.filtered = 0;
al.sym = NULL;
@@ -1737,9 +1743,15 @@ static int add_callchain_ip(struct thread *thread,
if (symbol_conf.hide_unresolved && al.sym == NULL)
return 0;
+
+ if (iter) {
+ nr_loop_iter = iter->nr_loop_iter;
+ iter_cycles = iter->cycles;
+ }
+
return callchain_cursor_append(cursor, al.addr, al.map, al.sym,
- branch, flags, nr_loop_iter, samples,
- branch_from);
+ branch, flags, nr_loop_iter,
+ iter_cycles, branch_from);
}
struct branch_info *sample__resolve_bstack(struct perf_sample *sample,
@@ -1760,6 +1772,18 @@ struct branch_info *sample__resolve_bstack(struct perf_sample *sample,
return bi;
}
+static void save_iterations(struct iterations *iter,
+ struct branch_entry *be, int nr)
+{
+ int i;
+
+ iter->nr_loop_iter = nr;
+ iter->cycles = 0;
+
+ for (i = 0; i < nr; i++)
+ iter->cycles += be[i].flags.cycles;
+}
+
#define CHASHSZ 127
#define CHASHBITS 7
#define NO_ENTRY 0xff
@@ -1767,7 +1791,8 @@ struct branch_info *sample__resolve_bstack(struct perf_sample *sample,
#define PERF_MAX_BRANCH_DEPTH 127
/* Remove loops. */
-static int remove_loops(struct branch_entry *l, int nr)
+static int remove_loops(struct branch_entry *l, int nr,
+ struct iterations *iter)
{
int i, j, off;
unsigned char chash[CHASHSZ];
@@ -1792,8 +1817,18 @@ static int remove_loops(struct branch_entry *l, int nr)
break;
}
if (is_loop) {
- memmove(l + i, l + i + off,
- (nr - (i + off)) * sizeof(*l));
+ j = nr - (i + off);
+ if (j > 0) {
+ save_iterations(iter + i + off,
+ l + i, off);
+
+ memmove(iter + i, iter + i + off,
+ j * sizeof(*iter));
+
+ memmove(l + i, l + i + off,
+ j * sizeof(*l));
+ }
+
nr -= off;
}
}
@@ -1883,7 +1918,7 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
err = add_callchain_ip(thread, cursor, parent,
root_al, &cpumode, ip,
- branch, flags, 0, 0,
+ branch, flags, NULL,
branch_from);
if (err)
return (err < 0) ? err : 0;
@@ -1909,7 +1944,6 @@ static int thread__resolve_callchain_sample(struct thread *thread,
int i, j, err, nr_entries;
int skip_idx = -1;
int first_call = 0;
- int nr_loop_iter;
if (chain)
chain_nr = chain->nr;
@@ -1942,6 +1976,7 @@ static int thread__resolve_callchain_sample(struct thread *thread,
if (branch && callchain_param.branch_callstack) {
int nr = min(max_stack, (int)branch->nr);
struct branch_entry be[nr];
+ struct iterations iter[nr];
if (branch->nr > PERF_MAX_BRANCH_DEPTH) {
pr_warning("corrupted branch chain. skipping...\n");
@@ -1972,38 +2007,21 @@ static int thread__resolve_callchain_sample(struct thread *thread,
be[i] = branch->entries[branch->nr - i - 1];
}
- nr_loop_iter = nr;
- nr = remove_loops(be, nr);
-
- /*
- * Get the number of iterations.
- * It's only approximation, but good enough in practice.
- */
- if (nr_loop_iter > nr)
- nr_loop_iter = nr_loop_iter - nr + 1;
- else
- nr_loop_iter = 0;
+ memset(iter, 0, sizeof(struct iterations) * nr);
+ nr = remove_loops(be, nr, iter);
for (i = 0; i < nr; i++) {
- if (i == nr - 1)
- err = add_callchain_ip(thread, cursor, parent,
- root_al,
- NULL, be[i].to,
- true, &be[i].flags,
- nr_loop_iter, 1,
- be[i].from);
- else
- err = add_callchain_ip(thread, cursor, parent,
- root_al,
- NULL, be[i].to,
- true, &be[i].flags,
- 0, 0, be[i].from);
+ err = add_callchain_ip(thread, cursor, parent,
+ root_al,
+ NULL, be[i].to,
+ true, &be[i].flags,
+ NULL, be[i].from);
if (!err)
err = add_callchain_ip(thread, cursor, parent, root_al,
NULL, be[i].from,
true, &be[i].flags,
- 0, 0, 0);
+ &iter[i], 0);
if (err == -EINVAL)
break;
if (err)
@@ -2037,7 +2055,7 @@ check_calls:
err = add_callchain_ip(thread, cursor, parent,
root_al, &cpumode, ip,
- false, NULL, 0, 0, 0);
+ false, NULL, NULL, 0);
if (err)
return (err < 0) ? err : 0;