From f4fe11b7bf7ff6a1ccf15d7a9484f0ff7d1e92ae Mon Sep 17 00:00:00 2001 From: Alexey Budankov Date: Tue, 22 Jan 2019 20:52:03 +0300 Subject: perf record: Implement --affinity=node|cpu option Implement --affinity=node|cpu option for the record mode defaulting to system affinity mask bouncing. Signed-off-by: Alexey Budankov Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/083f5422-ece9-10dd-8305-bf59c860f10f@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-record.txt | 5 +++++ tools/perf/builtin-record.c | 18 ++++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 02b4aa2621e7..8f0c2be34848 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -454,6 +454,11 @@ Use control blocks in asynchronous (Posix AIO) trace writing mode (default: Asynchronous mode is supported only when linking Perf tool with libc library providing implementation for Posix AIO API. +--affinity=mode:: +Set affinity mask of trace reading thread according to the policy defined by 'mode' value: + node - thread affinity mask is set to NUMA node cpu mask of the processed mmap buffer + cpu - thread affinity mask is set to cpu of the processed mmap buffer + --all-kernel:: Configure all used events to run in kernel space. diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 3fdfbaebd95e..6c3719ac901d 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -1656,6 +1656,21 @@ static int parse_clockid(const struct option *opt, const char *str, int unset) return -1; } +static int record__parse_affinity(const struct option *opt, const char *str, int unset) +{ + struct record_opts *opts = (struct record_opts *)opt->value; + + if (unset || !str) + return 0; + + if (!strcasecmp(str, "node")) + opts->affinity = PERF_AFFINITY_NODE; + else if (!strcasecmp(str, "cpu")) + opts->affinity = PERF_AFFINITY_CPU; + + return 0; +} + static int record__parse_mmap_pages(const struct option *opt, const char *str, int unset __maybe_unused) @@ -1964,6 +1979,9 @@ static struct option __record_options[] = { &nr_cblocks_default, "n", "Use control blocks in asynchronous trace writing mode (default: 1, max: 4)", record__aio_parse), #endif + OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu", + "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer", + record__parse_affinity), OPT_END() }; -- cgit v1.2.3-55-g7522 From 859dcf64389c93a647f230a7cfd206d30bc9d286 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Fri, 8 Feb 2019 15:35:43 -0700 Subject: perf cs-etm: Add proper header file for symbols After 'commit e22c1c751140 ("perf thread: Don't include symbol.h, symbol_conf.h is enough")' Compilation of the perf tools is broken when using the functionality provided by the openCSD library: [...] ... timerfd: [ on ] ... sched_getcpu: [ on ] ... sdt: [ OFF ] ... setns: [ on ] ... libopencsd: [ on ] [...] CC util/arm-spe.o CC util/arm-spe-pkt-decoder.o CC util/s390-cpumsf.o CC util/cs-etm.o CC util/parse-branch-options.o util/cs-etm.c: In function ‘cs_etm__mem_access’: util/cs-etm.c:297:24: error: storage size of ‘al’ isn’t known struct addr_location al; And rightly so since file cs-etm.c doesn't include symbol.h, something that is rectified in this patch. Signed-off-by: Mathieu Poirier Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Suzuki K Poulouse Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/20190208223543.31836-1-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cs-etm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 8b3f882d6e2f..0b11d653cfbe 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -25,6 +25,7 @@ #include "machine.h" #include "map.h" #include "perf.h" +#include "symbol.h" #include "thread.h" #include "thread_map.h" #include "thread-stack.h" -- cgit v1.2.3-55-g7522 From 2187d87eacd46f6214ce3dc9cfd7a558375a4153 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Mon, 11 Feb 2019 11:06:27 +0100 Subject: perf report: Add s390 diagnosic sampling descriptor size On IBM z13 machine types 2964 and 2965 the descriptor sizes for sampling and diagnostic sampling entries might be missing in the trailer entry and are set to zero. This leads to a perf report failure when processing diagnostic sampling entries. This patch adds missing descriptor sizes when the trailer entry contains zero for these fields. Output before: [root@s38lp82 perf]# ./perf report --stdio | fgrep Samples 0xabbf0 [0x8]: failed to process type: 68 Error: failed to process sample [root@s38lp82 perf]# Output after: [root@s38lp82 perf]# ./perf report --stdio | fgrep Samples # Total Lost Samples: 0 # Samples: 3K of event 'SF_CYCLES_BASIC_DIAG' # Samples: 162 of event 'CF_DIAG' [root@s38lp82 perf]# Fixes: 2b1444f2e28b ("perf report: Add raw report support for s390 auxiliary trace") Signed-off-by: Thomas Richter Reviewed-by: Hendrik Brueckner Cc: Heiko Carstens Cc: Martin Schwidefsky Link: http://lkml.kernel.org/r/20190211100627.85714-1-tmricht@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/s390-cpumsf.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/perf/util/s390-cpumsf.c b/tools/perf/util/s390-cpumsf.c index d9525d220db1..c215704931dc 100644 --- a/tools/perf/util/s390-cpumsf.c +++ b/tools/perf/util/s390-cpumsf.c @@ -352,6 +352,11 @@ static bool s390_cpumsf_validate(int machine_type, *dsdes = 85; *bsdes = 32; break; + case 2964: + case 2965: + *dsdes = 112; + *bsdes = 32; + break; default: /* Illegal trailer entry */ return false; -- cgit v1.2.3-55-g7522 From dd81eafacc52961ed1b2bf3e998b92ccfd9108bc Mon Sep 17 00:00:00 2001 From: Paul Clarke Date: Thu, 7 Feb 2019 12:53:11 -0500 Subject: perf vendor events power8: Cpi_breakdown & estimated_dcache_miss_cpi metrics POWER8 metrics are not well publicized. Some are here: https://www.ibm.com/support/knowledgecenter/en/SSFK5S_2.2.0/com.ibm.cluster.pedev.v2r2.pedev100.doc/bl7ug_derivedmetricspower8.htm This patch is for metric groups: - cpi_breakdown - estimated_dcache_miss_cpi Signed-off-by: Paul Clarke Cc: Ananth N Mavinakayanahalli Cc: Carl Love Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Naveen N. Rao Cc: Sukadev Bhattiprolu Link: http://lkml.kernel.org/r/20190207175314.31813-2-pc@us.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- .../pmu-events/arch/powerpc/power8/metrics.json | 461 +++++++++++++++++++++ 1 file changed, 461 insertions(+) create mode 100644 tools/perf/pmu-events/arch/powerpc/power8/metrics.json diff --git a/tools/perf/pmu-events/arch/powerpc/power8/metrics.json b/tools/perf/pmu-events/arch/powerpc/power8/metrics.json new file mode 100644 index 000000000000..377b76226c08 --- /dev/null +++ b/tools/perf/pmu-events/arch/powerpc/power8/metrics.json @@ -0,0 +1,461 @@ +[ + { + "BriefDescription": "Cycles stalled due to CRU or BRU operations", + "MetricExpr": "PM_CMPLU_STALL_BRU_CRU / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "bru_cru_stall_cpi" + }, + { + "BriefDescription": "Cycles stalled due to ISU Branch Operations", + "MetricExpr": "PM_CMPLU_STALL_BRU / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "bru_stall_cpi" + }, + { + "BriefDescription": "Cycles in which a Group Completed", + "MetricExpr": "PM_GRP_CMPL / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "completion_cpi" + }, + { + "BriefDescription": "Cycles stalled by CO queue full", + "MetricExpr": "PM_CMPLU_STALL_COQ_FULL / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "coq_full_stall_cpi" + }, + { + "BriefDescription": "Cycles stalled due to CRU Operations", + "MetricExpr": "(PM_CMPLU_STALL_BRU_CRU - PM_CMPLU_STALL_BRU) / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "cru_stall_cpi" + }, + { + "BriefDescription": "Cycles stalled by flushes", + "MetricExpr": "PM_CMPLU_STALL_FLUSH / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "flush_stall_cpi" + }, + { + "BriefDescription": "Cycles stalled by FXU Multi-Cycle Instructions", + "MetricExpr": "PM_CMPLU_STALL_FXLONG / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "fxu_multi_cyc_cpi" + }, + { + "BriefDescription": "Cycles stalled by FXU", + "MetricExpr": "PM_CMPLU_STALL_FXU / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "fxu_stall_cpi" + }, + { + "BriefDescription": "Other cycles stalled by FXU", + "MetricExpr": "(PM_CMPLU_STALL_FXU / PM_RUN_INST_CMPL) - (PM_CMPLU_STALL_FXLONG / PM_RUN_INST_CMPL)", + "MetricGroup": "cpi_breakdown", + "MetricName": "fxu_stall_other_cpi" + }, + { + "BriefDescription": "Cycles GCT empty due to Branch Mispredicts", + "MetricExpr": "PM_GCT_NOSLOT_BR_MPRED / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "gct_empty_br_mpred_cpi" + }, + { + "BriefDescription": "Cycles GCT empty due to Branch Mispredicts and Icache Misses", + "MetricExpr": "PM_GCT_NOSLOT_BR_MPRED_ICMISS / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "gct_empty_br_mpred_ic_miss_cpi" + }, + { + "BriefDescription": "GCT empty cycles", + "MetricExpr": "PM_GCT_NOSLOT_CYC / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "gct_empty_cpi" + }, + { + "BriefDescription": "Cycles GCT empty where dispatch was held", + "MetricExpr": "(PM_GCT_NOSLOT_DISP_HELD_MAP + PM_GCT_NOSLOT_DISP_HELD_SRQ + PM_GCT_NOSLOT_DISP_HELD_ISSQ + PM_GCT_NOSLOT_DISP_HELD_OTHER) / PM_RUN_INST_CMPL)", + "MetricGroup": "cpi_breakdown", + "MetricName": "gct_empty_disp_held_cpi" + }, + { + "BriefDescription": "Cycles GCT empty where dispatch was held due to issue queue", + "MetricExpr": "PM_GCT_NOSLOT_DISP_HELD_ISSQ / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "gct_empty_disp_held_issq_cpi" + }, + { + "BriefDescription": "Cycles GCT empty where dispatch was held due to maps", + "MetricExpr": "PM_GCT_NOSLOT_DISP_HELD_MAP / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "gct_empty_disp_held_map_cpi" + }, + { + "BriefDescription": "Cycles GCT empty where dispatch was held due to syncs and other effects", + "MetricExpr": "PM_GCT_NOSLOT_DISP_HELD_OTHER / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "gct_empty_disp_held_other_cpi" + }, + { + "BriefDescription": "Cycles GCT empty where dispatch was held due to SRQ", + "MetricExpr": "PM_GCT_NOSLOT_DISP_HELD_SRQ / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "gct_empty_disp_held_srq_cpi" + }, + { + "BriefDescription": "Cycles stalled by GCT empty due to Icache misses", + "MetricExpr": "PM_GCT_NOSLOT_IC_MISS / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "gct_empty_ic_miss_cpi" + }, + { + "BriefDescription": "Cycles stalled by GCT empty due to Icache misses that resolve in the local L2 or L3", + "MetricExpr": "(PM_GCT_NOSLOT_IC_MISS - PM_GCT_NOSLOT_IC_L3MISS) / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "gct_empty_ic_miss_l2l3_cpi" + }, + { + "BriefDescription": "Cycles stalled by GCT empty due to Icache misses that resolve off-chip", + "MetricExpr": "PM_GCT_NOSLOT_IC_L3MISS / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "gct_empty_ic_miss_l3miss_cpi" + }, + { + "BriefDescription": "Other GCT empty cycles", + "MetricExpr": "(PM_GCT_NOSLOT_CYC / PM_RUN_INST_CMPL) - (PM_GCT_NOSLOT_IC_MISS / PM_RUN_INST_CMPL) - (PM_GCT_NOSLOT_BR_MPRED / PM_RUN_INST_CMPL) - (PM_GCT_NOSLOT_BR_MPRED_ICMISS / PM_RUN_INST_CMPL) - ((PM_GCT_NOSLOT_DISP_HELD_MAP / PM_RUN_INST_CMPL) + (PM_GCT_NOSLOT_DISP_HELD_SRQ / PM_RUN_INST_CMPL) + (PM_GCT_NOSLOT_DISP_HELD_ISSQ / PM_RUN_INST_CMPL) + (PM_GCT_NOSLOT_DISP_HELD_OTHER / PM_RUN_INST_CMPL))", + "MetricGroup": "cpi_breakdown", + "MetricName": "gct_empty_other_cpi" + }, + { + "BriefDescription": "Cycles stalled by heavyweight syncs", + "MetricExpr": "PM_CMPLU_STALL_HWSYNC / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "hwsync_stall_cpi" + }, + { + "BriefDescription": "Cycles stalled by LSU", + "MetricExpr": "PM_CMPLU_STALL_LSU / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "lsu_stall_cpi" + }, + { + "BriefDescription": "Cycles stalled by D-Cache Misses", + "MetricExpr": "PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "lsu_stall_dcache_miss_cpi" + }, + { + "BriefDescription": "Cycles stalled by D-Cache Misses that resolved in distant interventions and memory", + "MetricExpr": "(PM_CMPLU_STALL_DMISS_L3MISS - PM_CMPLU_STALL_DMISS_LMEM - PM_CMPLU_STALL_DMISS_L21_L31 - PM_CMPLU_STALL_DMISS_REMOTE) / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "lsu_stall_dcache_miss_distant_cpi" + }, + { + "BriefDescription": "Cycles stalled by D-Cache Misses that resolved in remote or distant caches", + "MetricExpr": "PM_CMPLU_STALL_DMISS_L21_L31 / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "lsu_stall_dcache_miss_l21l31_cpi" + }, + { + "BriefDescription": "Cycles stalled by D-Cache Misses that resolved in the local L2 or L3, where there was a conflict", + "MetricExpr": "PM_CMPLU_STALL_DMISS_L2L3_CONFLICT / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "lsu_stall_dcache_miss_l2l3_conflict_cpi" + }, + { + "BriefDescription": "Cycles stalled by D-Cache Misses that resolved in the local L2 or L3", + "MetricExpr": "PM_CMPLU_STALL_DMISS_L2L3 / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "lsu_stall_dcache_miss_l2l3_cpi" + }, + { + "BriefDescription": "Cycles stalled by D-Cache Misses that resolved in the local L2 or L3, where there was no conflict", + "MetricExpr": "(PM_CMPLU_STALL_DMISS_L2L3 - PM_CMPLU_STALL_DMISS_L2L3_CONFLICT) / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "lsu_stall_dcache_miss_l2l3_noconflict_cpi" + }, + { + "BriefDescription": "Cycles stalled by D-Cache Misses that resolved in other core's caches or memory", + "MetricExpr": "PM_CMPLU_STALL_DMISS_L3MISS / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "lsu_stall_dcache_miss_l3miss_cpi" + }, + { + "BriefDescription": "Cycles stalled by D-Cache Misses that resolved in local memory or local L4", + "MetricExpr": "PM_CMPLU_STALL_DMISS_LMEM / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "lsu_stall_dcache_miss_lmem_cpi" + }, + { + "BriefDescription": "Cycles stalled by D-Cache Misses that resolved in remote interventions and memory", + "MetricExpr": "PM_CMPLU_STALL_DMISS_REMOTE / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "lsu_stall_dcache_miss_remote_cpi" + }, + { + "BriefDescription": "Cycles stalled by ERAT Translation rejects", + "MetricExpr": "PM_CMPLU_STALL_ERAT_MISS / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "lsu_stall_erat_miss_cpi" + }, + { + "BriefDescription": "Cycles stalled by LSU load finishes", + "MetricExpr": "PM_CMPLU_STALL_LOAD_FINISH / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "lsu_stall_ld_fin_cpi" + }, + { + "BriefDescription": "Cycles stalled by LHS rejects", + "MetricExpr": "PM_CMPLU_STALL_REJECT_LHS / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "lsu_stall_lhs_cpi" + }, + { + "BriefDescription": "Cycles stalled by LMQ Full rejects", + "MetricExpr": "PM_CMPLU_STALL_REJ_LMQ_FULL / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "lsu_stall_lmq_full_cpi" + }, + { + "BriefDescription": "Cycles stalled by Other LSU Operations", + "MetricExpr": "(PM_CMPLU_STALL_LSU / PM_RUN_INST_CMPL) - (PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL) - (PM_CMPLU_STALL_REJECT / PM_RUN_INST_CMPL) - (PM_CMPLU_STALL_STORE / PM_RUN_INST_CMPL) - (PM_CMPLU_STALL_LOAD_FINISH / PM_RUN_INST_CMPL) - (PM_CMPLU_STALL_ST_FWD / PM_RUN_INST_CMPL)", + "MetricGroup": "cpi_breakdown", + "MetricName": "lsu_stall_other_cpi" + }, + { + "BriefDescription": "Cycles stalled by LSU Rejects", + "MetricExpr": "PM_CMPLU_STALL_REJECT / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "lsu_stall_reject_cpi" + }, + { + "BriefDescription": "Cycles stalled by Other LSU Rejects", + "MetricExpr": "(PM_CMPLU_STALL_REJECT / PM_RUN_INST_CMPL) - (PM_CMPLU_STALL_REJECT_LHS / PM_RUN_INST_CMPL) - (PM_CMPLU_STALL_ERAT_MISS / PM_RUN_INST_CMPL) - (PM_CMPLU_STALL_REJ_LMQ_FULL / PM_RUN_INST_CMPL)", + "MetricGroup": "cpi_breakdown", + "MetricName": "lsu_stall_reject_other_cpi" + }, + { + "BriefDescription": "Cycles stalled by LSU store forwarding", + "MetricExpr": "PM_CMPLU_STALL_ST_FWD / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "lsu_stall_st_fwd_cpi" + }, + { + "BriefDescription": "Cycles stalled by LSU Stores", + "MetricExpr": "PM_CMPLU_STALL_STORE / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "lsu_stall_store_cpi" + }, + { + "BriefDescription": "Cycles stalled by lightweight syncs", + "MetricExpr": "PM_CMPLU_STALL_LWSYNC / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "lwsync_stall_cpi" + }, + { + "MetricExpr": "PM_CMPLU_STALL_MEM_ECC_DELAY / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "mem_ecc_delay_stall_cpi" + }, + { + "BriefDescription": "Cycles stalled by nops (nothing next to finish)", + "MetricExpr": "PM_CMPLU_STALL_NO_NTF / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "no_ntf_stall_cpi" + }, + { + "MetricExpr": "PM_NTCG_ALL_FIN / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "ntcg_all_fin_cpi" + }, + { + "MetricExpr": "PM_CMPLU_STALL_NTCG_FLUSH / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "ntcg_flush_cpi" + }, + { + "BriefDescription": "Other thread block stall cycles", + "MetricExpr": "(PM_CMPLU_STALL_THRD - PM_CMPLU_STALL_LWSYNC - PM_CMPLU_STALL_HWSYNC - PM_CMPLU_STALL_MEM_ECC_DELAY - PM_CMPLU_STALL_FLUSH - PM_CMPLU_STALL_COQ_FULL) / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "other_block_stall_cpi" + }, + { + "BriefDescription": "Cycles unaccounted for", + "MetricExpr": "(PM_RUN_CYC / PM_RUN_INST_CMPL) - (PM_CMPLU_STALL / PM_RUN_INST_CMPL) - (PM_GCT_NOSLOT_CYC / PM_RUN_INST_CMPL) - (PM_NTCG_ALL_FIN / PM_RUN_INST_CMPL) - (PM_CMPLU_STALL_THRD / PM_RUN_INST_CMPL) - (PM_GRP_CMPL / PM_RUN_INST_CMPL)", + "MetricGroup": "cpi_breakdown", + "MetricName": "other_cpi" + }, + { + "BriefDescription": "Stall cycles unaccounted for", + "MetricExpr": "(PM_CMPLU_STALL / PM_RUN_INST_CMPL) - (PM_CMPLU_STALL_BRU_CRU / PM_RUN_INST_CMPL) - (PM_CMPLU_STALL_FXU / PM_RUN_INST_CMPL) - (PM_CMPLU_STALL_VSU / PM_RUN_INST_CMPL) - (PM_CMPLU_STALL_LSU / PM_RUN_INST_CMPL) - (PM_CMPLU_STALL_NTCG_FLUSH / PM_RUN_INST_CMPL) - (PM_CMPLU_STALL_NO_NTF / PM_RUN_INST_CMPL)", + "MetricGroup": "cpi_breakdown", + "MetricName": "other_stall_cpi" + }, + { + "BriefDescription": "Run cycles per run instruction", + "MetricExpr": "PM_RUN_CYC / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "run_cpi" + }, + { + "BriefDescription": "Completion Stall Cycles", + "MetricExpr": "PM_CMPLU_STALL / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "stall_cpi" + }, + { + "BriefDescription": "Cycles a thread was blocked", + "MetricExpr": "PM_CMPLU_STALL_THRD / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "thread_block_stall_cpi" + }, + { + "BriefDescription": "Cycles stalled by VSU", + "MetricExpr": "PM_CMPLU_STALL_VSU / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "vsu_stall_cpi" + }, + { + "BriefDescription": "Cycles stalled by other VSU Operations", + "MetricExpr": "(PM_CMPLU_STALL_VSU - PM_CMPLU_STALL_VECTOR - PM_CMPLU_STALL_SCALAR) / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "vsu_stall_other_cpi" + }, + { + "BriefDescription": "Cycles stalled by VSU Scalar Operations", + "MetricExpr": "PM_CMPLU_STALL_SCALAR / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "vsu_stall_scalar_cpi" + }, + { + "BriefDescription": "Cycles stalled by VSU Scalar Long Operations", + "MetricExpr": "PM_CMPLU_STALL_SCALAR_LONG / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "vsu_stall_scalar_long_cpi" + }, + { + "BriefDescription": "Cycles stalled by Other VSU Scalar Operations", + "MetricExpr": "(PM_CMPLU_STALL_SCALAR / PM_RUN_INST_CMPL) - (PM_CMPLU_STALL_SCALAR_LONG / PM_RUN_INST_CMPL)", + "MetricGroup": "cpi_breakdown", + "MetricName": "vsu_stall_scalar_other_cpi" + }, + { + "BriefDescription": "Cycles stalled by VSU Vector Operations", + "MetricExpr": "PM_CMPLU_STALL_VECTOR / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "vsu_stall_vector_cpi" + }, + { + "BriefDescription": "Cycles stalled by VSU Vector Long Operations", + "MetricExpr": "PM_CMPLU_STALL_VECTOR_LONG / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "vsu_stall_vector_long_cpi" + }, + { + "BriefDescription": "Cycles stalled by other VSU Vector Operations", + "MetricExpr": "(PM_CMPLU_STALL_VECTOR - PM_CMPLU_STALL_VECTOR_LONG) / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "vsu_stall_vector_other_cpi" + }, + { + "BriefDescription": "dL1 miss portion of CPI", + "MetricExpr": "( (PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL)/ (PM_RUN_CYC / PM_RUN_INST_CMPL)) * 100", + "MetricGroup": "estimated_dcache_miss_cpi", + "MetricName": "dcache_miss_cpi_percent" + }, + { + "BriefDescription": "estimate of dl2l3 distant MOD miss rates with measured DL2L3 MOD latency as a %of dcache miss cpi", + "MetricExpr": "(((PM_DATA_FROM_DL2L3_MOD / PM_RUN_INST_CMPL) * (PM_MRK_DATA_FROM_DL2L3_MOD_CYC/ PM_MRK_DATA_FROM_DL2L3_MOD)) / (PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL)) *100", + "MetricGroup": "estimated_dcache_miss_cpi", + "MetricName": "dl2l3_mod_cpi_percent" + }, + { + "BriefDescription": "estimate of dl2l3 distant SHR miss rates with measured DL2L3 SHR latency as a %of dcache miss cpi", + "MetricExpr": "(((PM_DATA_FROM_DL2L3_SHR / PM_RUN_INST_CMPL) * (PM_MRK_DATA_FROM_DL2L3_SHR_CYC/ PM_MRK_DATA_FROM_DL2L3_SHR)) / (PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL)) *100", + "MetricGroup": "estimated_dcache_miss_cpi", + "MetricName": "dl2l3_shr_cpi_percent" + }, + { + "BriefDescription": "estimate of distant L4 miss rates with measured DL4 latency as a %of dcache miss cpi", + "MetricExpr": "(((PM_DATA_FROM_DL4 / PM_RUN_INST_CMPL) * (PM_MRK_DATA_FROM_DL4_CYC/ PM_MRK_DATA_FROM_DL4)) / (PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL)) *100", + "MetricGroup": "estimated_dcache_miss_cpi", + "MetricName": "dl4_cpi_percent" + }, + { + "BriefDescription": "estimate of distant memory miss rates with measured DMEM latency as a %of dcache miss cpi", + "MetricExpr": "(((PM_DATA_FROM_DMEM / PM_RUN_INST_CMPL) * (PM_MRK_DATA_FROM_DMEM_CYC/ PM_MRK_DATA_FROM_DMEM)) / (PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL)) *100", + "MetricGroup": "estimated_dcache_miss_cpi", + "MetricName": "dmem_cpi_percent" + }, + { + "BriefDescription": "estimate of dl21 MOD miss rates with measured L21 MOD latency as a %of dcache miss cpi", + "MetricExpr": "(((PM_DATA_FROM_L21_MOD / PM_RUN_INST_CMPL) * (PM_MRK_DATA_FROM_L21_MOD_CYC/ PM_MRK_DATA_FROM_L21_MOD)) / (PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL)) *100", + "MetricGroup": "estimated_dcache_miss_cpi", + "MetricName": "l21_mod_cpi_percent" + }, + { + "BriefDescription": "estimate of dl21 SHR miss rates with measured L21 SHR latency as a %of dcache miss cpi", + "MetricExpr": "(((PM_DATA_FROM_L21_SHR / PM_RUN_INST_CMPL) * (PM_MRK_DATA_FROM_L21_SHR_CYC/ PM_MRK_DATA_FROM_L21_SHR)) / (PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL)) *100", + "MetricGroup": "estimated_dcache_miss_cpi", + "MetricName": "l21_shr_cpi_percent" + }, + { + "BriefDescription": "estimate of dl2 miss rates with measured L2 latency as a %of dcache miss cpi", + "MetricExpr": "(((PM_DATA_FROM_L2 / PM_RUN_INST_CMPL) * (PM_MRK_DATA_FROM_L2_CYC/ PM_MRK_DATA_FROM_L2)) / (PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL) ) *100", + "MetricGroup": "estimated_dcache_miss_cpi", + "MetricName": "l2_cpi_percent" + }, + { + "BriefDescription": "estimate of dl31 MOD miss rates with measured L31 MOD latency as a %of dcache miss cpi", + "MetricExpr": "(((PM_DATA_FROM_L31_MOD / PM_RUN_INST_CMPL) * (PM_MRK_DATA_FROM_L31_MOD_CYC/ PM_MRK_DATA_FROM_L31_MOD)) / (PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL)) *100", + "MetricGroup": "estimated_dcache_miss_cpi", + "MetricName": "l31_mod_cpi_percent" + }, + { + "BriefDescription": "estimate of dl31 SHR miss rates with measured L31 SHR latency as a %of dcache miss cpi", + "MetricExpr": "(((PM_DATA_FROM_L31_SHR / PM_RUN_INST_CMPL) * (PM_MRK_DATA_FROM_L31_SHR_CYC/ PM_MRK_DATA_FROM_L31_SHR)) / (PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL)) *100", + "MetricGroup": "estimated_dcache_miss_cpi", + "MetricName": "l31_shr_cpi_percent" + }, + { + "BriefDescription": "estimate of dl3 miss rates with measured L3 latency as a % of dcache miss cpi", + "MetricExpr": "(((PM_DATA_FROM_L3 / PM_RUN_INST_CMPL) * (PM_MRK_DATA_FROM_L3_CYC/ PM_MRK_DATA_FROM_L3)) / (PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL)) * 100", + "MetricGroup": "estimated_dcache_miss_cpi", + "MetricName": "l3_cpi_percent" + }, + { + "BriefDescription": "estimate of Local L4 miss rates with measured LL4 latency as a %of dcache miss cpi", + "MetricExpr": "(((PM_DATA_FROM_LL4 / PM_RUN_INST_CMPL) * (PM_MRK_DATA_FROM_LL4_CYC/ PM_MRK_DATA_FROM_LL4)) / (PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL)) *100", + "MetricGroup": "estimated_dcache_miss_cpi", + "MetricName": "ll4_cpi_percent" + }, + { + "BriefDescription": "estimate of Local memory miss rates with measured LMEM latency as a %of dcache miss cpi", + "MetricExpr": "(((PM_DATA_FROM_LMEM / PM_RUN_INST_CMPL) * (PM_MRK_DATA_FROM_LMEM_CYC/ PM_MRK_DATA_FROM_LMEM)) / (PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL)) *100", + "MetricGroup": "estimated_dcache_miss_cpi", + "MetricName": "lmem_cpi_percent" + }, + { + "BriefDescription": "estimate of dl2l3 remote MOD miss rates with measured RL2L3 MOD latency as a %of dcache miss cpi", + "MetricExpr": "(((PM_DATA_FROM_RL2L3_MOD / PM_RUN_INST_CMPL) * (PM_MRK_DATA_FROM_RL2L3_MOD_CYC/ PM_MRK_DATA_FROM_RL2L3_MOD)) / (PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL)) *100", + "MetricGroup": "estimated_dcache_miss_cpi", + "MetricName": "rl2l3_mod_cpi_percent" + }, + { + "BriefDescription": "estimate of dl2l3 shared miss rates with measured RL2L3 SHR latency as a %of dcache miss cpi", + "MetricExpr": "(((PM_DATA_FROM_RL2L3_SHR / PM_RUN_INST_CMPL) * (PM_MRK_DATA_FROM_RL2L3_SHR_CYC/ PM_MRK_DATA_FROM_RL2L3_SHR)) / (PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL)) * 100", + "MetricGroup": "estimated_dcache_miss_cpi", + "MetricName": "rl2l3_shr_cpi_percent" + }, + { + "BriefDescription": "estimate of remote L4 miss rates with measured RL4 latency as a %of dcache miss cpi", + "MetricExpr": "(((PM_DATA_FROM_RL4 / PM_RUN_INST_CMPL) * (PM_MRK_DATA_FROM_RL4_CYC/ PM_MRK_DATA_FROM_RL4)) / (PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL)) *100", + "MetricGroup": "estimated_dcache_miss_cpi", + "MetricName": "rl4_cpi_percent" + }, + { + "BriefDescription": "estimate of remote memory miss rates with measured RMEM latency as a %of dcache miss cpi", + "MetricExpr": "(((PM_DATA_FROM_RMEM / PM_RUN_INST_CMPL) * (PM_MRK_DATA_FROM_RMEM_CYC/ PM_MRK_DATA_FROM_RMEM)) / (PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL)) *100", + "MetricGroup": "estimated_dcache_miss_cpi", + "MetricName": "rmem_cpi_percent" + } +] -- cgit v1.2.3-55-g7522 From ffe18505ba1d641a4935321d3c525e4e2efd64c3 Mon Sep 17 00:00:00 2001 From: Paul Clarke Date: Thu, 7 Feb 2019 12:53:12 -0500 Subject: perf vendor events power8: Dl1_reload, instruction_misses, l2_stats, lsu_rejects, memory & pteg_reloads metrics POWER8 metrics are not well publicized. Some are here: https://www.ibm.com/support/knowledgecenter/en/SSFK5S_2.2.0/com.ibm.cluster.pedev.v2r2.pedev100.doc/bl7ug_derivedmetricspower8.htm This patch is for metric groups: - dl1_reloads_percent_per_inst - dl1_reloads_percent_per_ref - instruction_misses_percent_per_inst - l2_stats - lsu_rejects - memory - pteg_reloads_percent_per_inst - pteg_reloads_percent_per_ref Signed-off-by: Paul Clarke Cc: Ananth N Mavinakayanahalli Cc: Carl Love Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Naveen N. Rao Cc: Sukadev Bhattiprolu Link: http://lkml.kernel.org/r/20190207175314.31813-3-pc@us.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- .../pmu-events/arch/powerpc/power8/metrics.json | 702 +++++++++++++++++++++ 1 file changed, 702 insertions(+) diff --git a/tools/perf/pmu-events/arch/powerpc/power8/metrics.json b/tools/perf/pmu-events/arch/powerpc/power8/metrics.json index 377b76226c08..9a6ec8aadffd 100644 --- a/tools/perf/pmu-events/arch/powerpc/power8/metrics.json +++ b/tools/perf/pmu-events/arch/powerpc/power8/metrics.json @@ -356,6 +356,288 @@ "MetricGroup": "cpi_breakdown", "MetricName": "vsu_stall_vector_other_cpi" }, + { + "BriefDescription": "% of DL1 Reloads from Distant L2 or L3 (Modified) per Inst", + "MetricExpr": "PM_DATA_FROM_DL2L3_MOD * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "dl1_reload_from_dl2l3_mod_rate_percent" + }, + { + "BriefDescription": "% of DL1 Reloads from Distant L2 or L3 (Shared) per Inst", + "MetricExpr": "PM_DATA_FROM_DL2L3_SHR * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "dl1_reload_from_dl2l3_shr_rate_percent" + }, + { + "BriefDescription": "% of DL1 Reloads from Distant L4 per Inst", + "MetricExpr": "PM_DATA_FROM_DL4 * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "dl1_reload_from_dl4_rate_percent" + }, + { + "BriefDescription": "% of DL1 Reloads from Distant Memory per Inst", + "MetricExpr": "PM_DATA_FROM_DMEM * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "dl1_reload_from_dmem_rate_percent" + }, + { + "BriefDescription": "% of DL1 reloads from Private L2, other core per Inst", + "MetricExpr": "PM_DATA_FROM_L21_MOD * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "dl1_reload_from_l21_mod_rate_percent" + }, + { + "BriefDescription": "% of DL1 reloads from Private L2, other core per Inst", + "MetricExpr": "PM_DATA_FROM_L21_SHR * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "dl1_reload_from_l21_shr_rate_percent" + }, + { + "BriefDescription": "Percentage of L2 load hits per instruction where the L2 experienced a Load-Hit-Store conflict", + "MetricExpr": "PM_DATA_FROM_L2_DISP_CONFLICT_LDHITST * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "dl1_reload_from_l2_lhs_rate_percent" + }, + { + "BriefDescription": "% of DL1 reloads from L2 per Inst", + "MetricExpr": "PM_DATA_FROM_L2MISS * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "dl1_reload_from_l2_miss_rate_percent" + }, + { + "BriefDescription": "Percentage of L2 load hits per instruction where the L2 did not experience a conflict", + "MetricExpr": "PM_DATA_FROM_L2_NO_CONFLICT * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "dl1_reload_from_l2_no_conflict_rate_percent" + }, + { + "BriefDescription": "Percentage of L2 load hits per instruction where the L2 experienced some conflict other than Load-Hit-Store", + "MetricExpr": "PM_DATA_FROM_L2_DISP_CONFLICT_OTHER * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "dl1_reload_from_l2_other_conflict_rate_percent" + }, + { + "BriefDescription": "% of DL1 reloads from L2 per Inst", + "MetricExpr": "PM_DATA_FROM_L2 * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "dl1_reload_from_l2_rate_percent" + }, + { + "BriefDescription": "% of DL1 reloads from Private L3 M state, other core per Inst", + "MetricExpr": "PM_DATA_FROM_L31_MOD * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "dl1_reload_from_l31_mod_rate_percent" + }, + { + "BriefDescription": "% of DL1 reloads from Private L3 S tate, other core per Inst", + "MetricExpr": "PM_DATA_FROM_L31_SHR * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "dl1_reload_from_l31_shr_rate_percent" + }, + { + "BriefDescription": "Percentage of L3 load hits per instruction where the load collided with a pending prefetch", + "MetricExpr": "PM_DATA_FROM_L3_DISP_CONFLICT * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "dl1_reload_from_l3_conflict_rate_percent" + }, + { + "BriefDescription": "% of DL1 reloads from L3 per Inst", + "MetricExpr": "PM_DATA_FROM_L3MISS * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "dl1_reload_from_l3_miss_rate_percent" + }, + { + "BriefDescription": "Percentage of L3 load hits per instruction where the L3 did not experience a conflict", + "MetricExpr": "PM_DATA_FROM_L3_NO_CONFLICT * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "dl1_reload_from_l3_no_conflict_rate_percent" + }, + { + "BriefDescription": "% of DL1 Reloads from L3 per Inst", + "MetricExpr": "PM_DATA_FROM_L3 * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "dl1_reload_from_l3_rate_percent" + }, + { + "BriefDescription": "% of DL1 Reloads from Local L4 per Inst", + "MetricExpr": "PM_DATA_FROM_LL4 * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "dl1_reload_from_ll4_rate_percent" + }, + { + "BriefDescription": "% of DL1 Reloads from Local Memory per Inst", + "MetricExpr": "PM_DATA_FROM_LMEM * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "dl1_reload_from_lmem_rate_percent" + }, + { + "BriefDescription": "% of DL1 reloads from Private L3, other core per Inst", + "MetricExpr": "PM_DATA_FROM_RL2L3_MOD * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "dl1_reload_from_rl2l3_mod_rate_percent" + }, + { + "BriefDescription": "% of DL1 reloads from Private L3, other core per Inst", + "MetricExpr": "PM_DATA_FROM_RL2L3_SHR * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "dl1_reload_from_rl2l3_shr_rate_percent" + }, + { + "BriefDescription": "% of DL1 Reloads from Remote Memory per Inst", + "MetricExpr": "PM_DATA_FROM_RL4 * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "dl1_reload_from_rl4_rate_percent" + }, + { + "BriefDescription": "% of DL1 Reloads from Remote Memory per Inst", + "MetricExpr": "PM_DATA_FROM_RMEM * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "dl1_reload_from_rmem_rate_percent" + }, + { + "BriefDescription": "Percentage of L1 demand load misses per run instruction", + "MetricExpr": "PM_LD_MISS_L1 * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "l1_ld_miss_rate_percent" + }, + { + "BriefDescription": "% of DL1 misses that result in a cache reload", + "MetricExpr": "PM_L1_DCACHE_RELOAD_VALID * 100 / PM_LD_MISS_L1", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_miss_reloads_percent" + }, + { + "BriefDescription": "% of DL1 dL1_Reloads from Distant L2 or L3 (Modified)", + "MetricExpr": "PM_DATA_FROM_DL2L3_MOD * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_reload_from_dl2l3_mod_percent" + }, + { + "BriefDescription": "% of DL1 dL1_Reloads from Distant L2 or L3 (Shared)", + "MetricExpr": "PM_DATA_FROM_DL2L3_SHR * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_reload_from_dl2l3_shr_percent" + }, + { + "BriefDescription": "% of DL1 dL1_Reloads from Distant L4", + "MetricExpr": "PM_DATA_FROM_DL4 * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_reload_from_dl4_percent" + }, + { + "BriefDescription": "% of DL1 dL1_Reloads from Distant Memory", + "MetricExpr": "PM_DATA_FROM_DMEM * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_reload_from_dmem_percent" + }, + { + "BriefDescription": "% of DL1 reloads from Private L2, other core", + "MetricExpr": "PM_DATA_FROM_L21_MOD * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_reload_from_l21_mod_percent" + }, + { + "BriefDescription": "% of DL1 reloads from Private L2, other core", + "MetricExpr": "PM_DATA_FROM_L21_SHR * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_reload_from_l21_shr_percent" + }, + { + "BriefDescription": "Percentage of DL1 reloads from L2 with a Load-Hit-Store conflict", + "MetricExpr": "PM_DATA_FROM_L2_DISP_CONFLICT_LDHITST * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_reload_from_l2_lhs_percent" + }, + { + "BriefDescription": "Percentage of DL1 reloads from L2 with no conflicts", + "MetricExpr": "PM_DATA_FROM_L2_NO_CONFLICT * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_reload_from_l2_no_conflict_percent" + }, + { + "BriefDescription": "Percentage of DL1 reloads from L2 with some conflict other than Load-Hit-Store", + "MetricExpr": "PM_DATA_FROM_L2_DISP_CONFLICT_OTHER * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_reload_from_l2_other_conflict_percent" + }, + { + "BriefDescription": "% of DL1 reloads from L2", + "MetricExpr": "PM_DATA_FROM_L2 * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_reload_from_l2_percent" + }, + { + "BriefDescription": "% of DL1 reloads from Private L3, other core", + "MetricExpr": "PM_DATA_FROM_L31_MOD * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_reload_from_l31_mod_percent" + }, + { + "BriefDescription": "% of DL1 reloads from Private L3, other core", + "MetricExpr": "PM_DATA_FROM_L31_SHR * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_reload_from_l31_shr_percent" + }, + { + "BriefDescription": "Percentage of DL1 reloads from L3 where the load collided with a pending prefetch", + "MetricExpr": "PM_DATA_FROM_L3_DISP_CONFLICT * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_reload_from_l3_conflict_percent" + }, + { + "BriefDescription": "Percentage of L3 load hits per instruction where the line was brought into the L3 by a prefetch operation", + "MetricExpr": "PM_DATA_FROM_L3_MEPF * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_reload_from_l3_mepf_rate_percent" + }, + { + "BriefDescription": "Percentage of DL1 reloads from L3 without conflicts", + "MetricExpr": "PM_DATA_FROM_L3_NO_CONFLICT * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_reload_from_l3_no_conflict_percent" + }, + { + "BriefDescription": "% of DL1 Reloads from L3", + "MetricExpr": "PM_DATA_FROM_L3 * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_reload_from_l3_percent" + }, + { + "BriefDescription": "% of DL1 dL1_Reloads from Local L4", + "MetricExpr": "PM_DATA_FROM_LL4 * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_reload_from_ll4_percent" + }, + { + "BriefDescription": "% of DL1 dL1_Reloads from Local Memory", + "MetricExpr": "PM_DATA_FROM_LMEM * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_reload_from_lmem_percent" + }, + { + "BriefDescription": "% of DL1 dL1_Reloads from Remote L2 or L3 (Modified)", + "MetricExpr": "PM_DATA_FROM_RL2L3_MOD * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_reload_from_rl2l3_mod_percent" + }, + { + "BriefDescription": "% of DL1 dL1_Reloads from Remote L2 or L3 (Shared)", + "MetricExpr": "PM_DATA_FROM_RL2L3_SHR * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_reload_from_rl2l3_shr_percent" + }, + { + "BriefDescription": "% of DL1 dL1_Reloads from Remote L4", + "MetricExpr": "PM_DATA_FROM_RL4 * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_reload_from_rl4_percent" + }, + { + "BriefDescription": "% of DL1 dL1_Reloads from Remote Memory", + "MetricExpr": "PM_DATA_FROM_RMEM * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_reload_from_rmem_percent" + }, { "BriefDescription": "dL1 miss portion of CPI", "MetricExpr": "( (PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL)/ (PM_RUN_CYC / PM_RUN_INST_CMPL)) * 100", @@ -457,5 +739,425 @@ "MetricExpr": "(((PM_DATA_FROM_RMEM / PM_RUN_INST_CMPL) * (PM_MRK_DATA_FROM_RMEM_CYC/ PM_MRK_DATA_FROM_RMEM)) / (PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL)) *100", "MetricGroup": "estimated_dcache_miss_cpi", "MetricName": "rmem_cpi_percent" + }, + { + "BriefDescription": "% of ICache reloads from Distant L2 or L3 (Modified) per Inst", + "MetricExpr": "PM_INST_FROM_DL2L3_MOD * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "instruction_misses_percent_per_inst", + "MetricName": "inst_from_dl2l3_mod_rate_percent" + }, + { + "BriefDescription": "% of ICache reloads from Distant L2 or L3 (Shared) per Inst", + "MetricExpr": "PM_INST_FROM_DL2L3_SHR * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "instruction_misses_percent_per_inst", + "MetricName": "inst_from_dl2l3_shr_rate_percent" + }, + { + "BriefDescription": "% of ICache reloads from Distant L4 per Inst", + "MetricExpr": "PM_INST_FROM_DL4 * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "instruction_misses_percent_per_inst", + "MetricName": "inst_from_dl4_rate_percent" + }, + { + "BriefDescription": "% of ICache reloads from Distant Memory per Inst", + "MetricExpr": "PM_INST_FROM_DMEM * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "instruction_misses_percent_per_inst", + "MetricName": "inst_from_dmem_rate_percent" + }, + { + "BriefDescription": "% of ICache reloads from Private L2, other core per Inst", + "MetricExpr": "PM_INST_FROM_L21_MOD * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "instruction_misses_percent_per_inst", + "MetricName": "inst_from_l21_mod_rate_percent" + }, + { + "BriefDescription": "% of ICache reloads from Private L2, other core per Inst", + "MetricExpr": "PM_INST_FROM_L21_SHR * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "instruction_misses_percent_per_inst", + "MetricName": "inst_from_l21_shr_rate_percent" + }, + { + "BriefDescription": "% of ICache reloads from L2 per Inst", + "MetricExpr": "PM_INST_FROM_L2 * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "instruction_misses_percent_per_inst", + "MetricName": "inst_from_l2_rate_percent" + }, + { + "BriefDescription": "% of ICache reloads from Private L3, other core per Inst", + "MetricExpr": "PM_INST_FROM_L31_MOD * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "instruction_misses_percent_per_inst", + "MetricName": "inst_from_l31_mod_rate_percent" + }, + { + "BriefDescription": "% of ICache reloads from Private L3 other core per Inst", + "MetricExpr": "PM_INST_FROM_L31_SHR * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "instruction_misses_percent_per_inst", + "MetricName": "inst_from_l31_shr_rate_percent" + }, + { + "BriefDescription": "% of ICache reloads from L3 per Inst", + "MetricExpr": "PM_INST_FROM_L3 * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "instruction_misses_percent_per_inst", + "MetricName": "inst_from_l3_rate_percent" + }, + { + "BriefDescription": "% of ICache reloads from Local L4 per Inst", + "MetricExpr": "PM_INST_FROM_LL4 * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "instruction_misses_percent_per_inst", + "MetricName": "inst_from_ll4_rate_percent" + }, + { + "BriefDescription": "% of ICache reloads from Local Memory per Inst", + "MetricExpr": "PM_INST_FROM_LMEM * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "instruction_misses_percent_per_inst", + "MetricName": "inst_from_lmem_rate_percent" + }, + { + "BriefDescription": "% of ICache reloads from Remote L2 or L3 (Modified) per Inst", + "MetricExpr": "PM_INST_FROM_RL2L3_MOD * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "instruction_misses_percent_per_inst", + "MetricName": "inst_from_rl2l3_mod_rate_percent" + }, + { + "BriefDescription": "% of ICache reloads from Remote L2 or L3 (Shared) per Inst", + "MetricExpr": "PM_INST_FROM_RL2L3_SHR * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "instruction_misses_percent_per_inst", + "MetricName": "inst_from_rl2l3_shr_rate_percent" + }, + { + "BriefDescription": "% of ICache reloads from Remote L4 per Inst", + "MetricExpr": "PM_INST_FROM_RL4 * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "instruction_misses_percent_per_inst", + "MetricName": "inst_from_rl4_rate_percent" + }, + { + "BriefDescription": "% of ICache reloads from Remote Memory per Inst", + "MetricExpr": "PM_INST_FROM_RMEM * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "instruction_misses_percent_per_inst", + "MetricName": "inst_from_rmem_rate_percent" + }, + { + "BriefDescription": "Instruction Cache Miss Rate (Per run Instruction)(%)", + "MetricExpr": "PM_L1_ICACHE_MISS * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "instruction_misses_percent_per_inst", + "MetricName": "l1_inst_miss_rate_percent" + }, + { + "BriefDescription": "Average number of stores that gather in the store buffer before being sent to an L2 RC machine", + "MetricExpr": "PM_ST_CMPL / (PM_L2_ST / 2)", + "MetricGroup": "l2_stats", + "MetricName": "avg_stores_gathered" + }, + { + "BriefDescription": "L2 Store misses as a % of total L2 Store dispatches (per thread)", + "MetricExpr": "PM_L2_ST_MISS / PM_L2_ST * 100", + "MetricGroup": "l2_stats", + "MetricName": "l2_st_miss_ratio_percent" + }, + { + "BriefDescription": "Percentage of L2 store misses per drained store. A drained store may contain multiple individual stores if they target the same line", + "MetricExpr": "PM_L2_ST_MISS / (PM_L2_ST / 2)", + "MetricGroup": "l2_stats", + "MetricName": "l2_store_miss_ratio_percent" + }, + { + "BriefDescription": "ERAT miss reject ratio", + "MetricExpr": "PM_LSU_REJECT_ERAT_MISS * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "lsu_rejects", + "MetricName": "erat_reject_rate_percent" + }, + { + "BriefDescription": "ERAT miss reject ratio", + "MetricExpr": "PM_LSU_REJECT_ERAT_MISS * 100 / (PM_LSU_FIN - PM_LSU_FX_FIN)", + "MetricGroup": "lsu_rejects", + "MetricName": "erat_reject_ratio_percent" + }, + { + "BriefDescription": "LHS reject ratio", + "MetricExpr": "PM_LSU_REJECT_LHS *100/ PM_RUN_INST_CMPL", + "MetricGroup": "lsu_rejects", + "MetricName": "lhs_reject_rate_percent" + }, + { + "BriefDescription": "LHS reject ratio", + "MetricExpr": "PM_LSU_REJECT_LHS *100/ (PM_LSU_FIN - PM_LSU_FX_FIN)", + "MetricGroup": "lsu_rejects", + "MetricName": "lhs_reject_ratio_percent" + }, + { + "BriefDescription": "LMQ full reject ratio", + "MetricExpr": "PM_LSU_REJECT_LMQ_FULL * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "lsu_rejects", + "MetricName": "lmq_full_reject_rate_percent" + }, + { + "BriefDescription": "ERAT miss reject ratio", + "MetricExpr": "PM_LSU_REJECT_LMQ_FULL * 100 / PM_LD_REF_L1", + "MetricGroup": "lsu_rejects", + "MetricName": "lmq_full_reject_ratio_percent" + }, + { + "BriefDescription": "LSU reject ratio", + "MetricExpr": "PM_LSU_REJECT *100/ PM_RUN_INST_CMPL", + "MetricGroup": "lsu_rejects", + "MetricName": "lsu_reject_rate_percent" + }, + { + "BriefDescription": "LSU reject ratio", + "MetricExpr": "PM_LSU_REJECT *100/ (PM_LSU_FIN - PM_LSU_FX_FIN)", + "MetricGroup": "lsu_rejects", + "MetricName": "lsu_reject_ratio_percent" + }, + { + "BriefDescription": "Ratio of reloads from local L4 to distant L4", + "MetricExpr": "PM_DATA_FROM_LL4 / PM_DATA_FROM_DL4", + "MetricGroup": "memory", + "MetricName": "ld_ll4_per_ld_dmem" + }, + { + "BriefDescription": "Ratio of reloads from local L4 to remote+distant L4", + "MetricExpr": "PM_DATA_FROM_LL4 / (PM_DATA_FROM_DL4 + PM_DATA_FROM_RL4)", + "MetricGroup": "memory", + "MetricName": "ld_ll4_per_ld_mem" + }, + { + "BriefDescription": "Ratio of reloads from local L4 to remote L4", + "MetricExpr": "PM_DATA_FROM_LL4 / PM_DATA_FROM_RL4", + "MetricGroup": "memory", + "MetricName": "ld_ll4_per_ld_rl4" + }, + { + "BriefDescription": "Number of loads from local memory per loads from distant memory", + "MetricExpr": "PM_DATA_FROM_LMEM / PM_DATA_FROM_DMEM", + "MetricGroup": "memory", + "MetricName": "ld_lmem_per_ld_dmem" + }, + { + "BriefDescription": "Number of loads from local memory per loads from remote and distant memory", + "MetricExpr": "PM_DATA_FROM_LMEM / (PM_DATA_FROM_DMEM + PM_DATA_FROM_RMEM)", + "MetricGroup": "memory", + "MetricName": "ld_lmem_per_ld_mem" + }, + { + "BriefDescription": "Number of loads from local memory per loads from remote memory", + "MetricExpr": "PM_DATA_FROM_LMEM / PM_DATA_FROM_RMEM", + "MetricGroup": "memory", + "MetricName": "ld_lmem_per_ld_rmem" + }, + { + "BriefDescription": "Number of loads from remote memory per loads from distant memory", + "MetricExpr": "PM_DATA_FROM_RMEM / PM_DATA_FROM_DMEM", + "MetricGroup": "memory", + "MetricName": "ld_rmem_per_ld_dmem" + }, + { + "BriefDescription": "Memory locality", + "MetricExpr": "(PM_DATA_FROM_LL4 + PM_DATA_FROM_LMEM) * 100/ (PM_DATA_FROM_LMEM + PM_DATA_FROM_LL4 + PM_DATA_FROM_RMEM + PM_DATA_FROM_RL4 + PM_DATA_FROM_DMEM + PM_DATA_FROM_DL4)", + "MetricGroup": "memory", + "MetricName": "mem_locality_percent" + }, + { + "BriefDescription": "DERAT Miss Rate (per run instruction)(%)", + "MetricExpr": "PM_LSU_DERAT_MISS * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "pteg_reloads_percent_per_inst", + "MetricName": "derat_miss_rate_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Distant L2 or L3 (Modified) per inst", + "MetricExpr": "PM_DPTEG_FROM_DL2L3_MOD * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "pteg_reloads_percent_per_inst", + "MetricName": "pteg_from_dl2l3_mod_rate_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Distant L2 or L3 (Shared) per inst", + "MetricExpr": "PM_DPTEG_FROM_DL2L3_SHR * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "pteg_reloads_percent_per_inst", + "MetricName": "pteg_from_dl2l3_shr_rate_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Distant L4 per inst", + "MetricExpr": "PM_DPTEG_FROM_DL4 * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "pteg_reloads_percent_per_inst", + "MetricName": "pteg_from_dl4_rate_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Distant Memory per inst", + "MetricExpr": "PM_DPTEG_FROM_DMEM * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "pteg_reloads_percent_per_inst", + "MetricName": "pteg_from_dmem_rate_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Private L2, other core per inst", + "MetricExpr": "PM_DPTEG_FROM_L21_MOD * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "pteg_reloads_percent_per_inst", + "MetricName": "pteg_from_l21_mod_rate_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Private L2, other core per inst", + "MetricExpr": "PM_DPTEG_FROM_L21_SHR * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "pteg_reloads_percent_per_inst", + "MetricName": "pteg_from_l21_shr_rate_percent" + }, + { + "BriefDescription": "% of DERAT reloads from L2 per inst", + "MetricExpr": "PM_DPTEG_FROM_L2 * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "pteg_reloads_percent_per_inst", + "MetricName": "pteg_from_l2_rate_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Private L3, other core per inst", + "MetricExpr": "PM_DPTEG_FROM_L31_MOD * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "pteg_reloads_percent_per_inst", + "MetricName": "pteg_from_l31_mod_rate_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Private L3, other core per inst", + "MetricExpr": "PM_DPTEG_FROM_L31_SHR * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "pteg_reloads_percent_per_inst", + "MetricName": "pteg_from_l31_shr_rate_percent" + }, + { + "BriefDescription": "% of DERAT reloads from L3 per inst", + "MetricExpr": "PM_DPTEG_FROM_L3 * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "pteg_reloads_percent_per_inst", + "MetricName": "pteg_from_l3_rate_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Local L4 per inst", + "MetricExpr": "PM_DPTEG_FROM_LL4 * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "pteg_reloads_percent_per_inst", + "MetricName": "pteg_from_ll4_rate_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Local Memory per inst", + "MetricExpr": "PM_DPTEG_FROM_LMEM * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "pteg_reloads_percent_per_inst", + "MetricName": "pteg_from_lmem_rate_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Remote L2 or L3 (Modified) per inst", + "MetricExpr": "PM_DPTEG_FROM_RL2L3_MOD * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "pteg_reloads_percent_per_inst", + "MetricName": "pteg_from_rl2l3_mod_rate_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Remote L2 or L3 (Shared) per inst", + "MetricExpr": "PM_DPTEG_FROM_RL2L3_SHR * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "pteg_reloads_percent_per_inst", + "MetricName": "pteg_from_rl2l3_shr_rate_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Remote L4 per inst", + "MetricExpr": "PM_DPTEG_FROM_RL4 * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "pteg_reloads_percent_per_inst", + "MetricName": "pteg_from_rl4_rate_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Remote Memory per inst", + "MetricExpr": "PM_DPTEG_FROM_RMEM * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "pteg_reloads_percent_per_inst", + "MetricName": "pteg_from_rmem_rate_percent" + }, + { + "BriefDescription": "% of DERAT misses that result in an ERAT reload", + "MetricExpr": "PM_DTLB_MISS * 100 / PM_LSU_DERAT_MISS", + "MetricGroup": "pteg_reloads_percent_per_ref", + "MetricName": "derat_miss_reload_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Distant L2 or L3 (Modified)", + "MetricExpr": "PM_DPTEG_FROM_DL2L3_MOD * 100 / PM_DTLB_MISS", + "MetricGroup": "pteg_reloads_percent_per_ref", + "MetricName": "pteg_from_dl2l3_mod_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Distant L2 or L3 (Shared)", + "MetricExpr": "PM_DPTEG_FROM_DL2L3_SHR * 100 / PM_DTLB_MISS", + "MetricGroup": "pteg_reloads_percent_per_ref", + "MetricName": "pteg_from_dl2l3_shr_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Distant L4", + "MetricExpr": "PM_DPTEG_FROM_DL4 * 100 / PM_DTLB_MISS", + "MetricGroup": "pteg_reloads_percent_per_ref", + "MetricName": "pteg_from_dl4_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Distant Memory", + "MetricExpr": "PM_DPTEG_FROM_DMEM * 100 / PM_DTLB_MISS", + "MetricGroup": "pteg_reloads_percent_per_ref", + "MetricName": "pteg_from_dmem_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Private L2, other core", + "MetricExpr": "PM_DPTEG_FROM_L21_MOD * 100 / PM_DTLB_MISS", + "MetricGroup": "pteg_reloads_percent_per_ref", + "MetricName": "pteg_from_l21_mod_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Private L2, other core", + "MetricExpr": "PM_DPTEG_FROM_L21_SHR * 100 / PM_DTLB_MISS", + "MetricGroup": "pteg_reloads_percent_per_ref", + "MetricName": "pteg_from_l21_shr_percent" + }, + { + "BriefDescription": "% of DERAT reloads from L2", + "MetricExpr": "PM_DPTEG_FROM_L2 * 100 / PM_DTLB_MISS", + "MetricGroup": "pteg_reloads_percent_per_ref", + "MetricName": "pteg_from_l2_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Private L3, other core", + "MetricExpr": "PM_DPTEG_FROM_L31_MOD * 100 / PM_DTLB_MISS", + "MetricGroup": "pteg_reloads_percent_per_ref", + "MetricName": "pteg_from_l31_mod_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Private L3, other core", + "MetricExpr": "PM_DPTEG_FROM_L31_SHR * 100 / PM_DTLB_MISS", + "MetricGroup": "pteg_reloads_percent_per_ref", + "MetricName": "pteg_from_l31_shr_percent" + }, + { + "BriefDescription": "% of DERAT reloads from L3", + "MetricExpr": "PM_DPTEG_FROM_L3 * 100 / PM_DTLB_MISS", + "MetricGroup": "pteg_reloads_percent_per_ref", + "MetricName": "pteg_from_l3_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Local L4", + "MetricExpr": "PM_DPTEG_FROM_LL4 * 100 / PM_DTLB_MISS", + "MetricGroup": "pteg_reloads_percent_per_ref", + "MetricName": "pteg_from_ll4_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Local Memory", + "MetricExpr": "PM_DPTEG_FROM_LMEM * 100 / PM_DTLB_MISS", + "MetricGroup": "pteg_reloads_percent_per_ref", + "MetricName": "pteg_from_lmem_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Remote L2 or L3 (Modified)", + "MetricExpr": "PM_DPTEG_FROM_RL2L3_MOD * 100 / PM_DTLB_MISS", + "MetricGroup": "pteg_reloads_percent_per_ref", + "MetricName": "pteg_from_rl2l3_mod_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Remote L2 or L3 (Shared)", + "MetricExpr": "PM_DPTEG_FROM_RL2L3_SHR * 100 / PM_DTLB_MISS", + "MetricGroup": "pteg_reloads_percent_per_ref", + "MetricName": "pteg_from_rl2l3_shr_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Remote L4", + "MetricExpr": "PM_DPTEG_FROM_RL4 * 100 / PM_DTLB_MISS", + "MetricGroup": "pteg_reloads_percent_per_ref", + "MetricName": "pteg_from_rl4_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Remote Memory", + "MetricExpr": "PM_DPTEG_FROM_RMEM * 100 / PM_DTLB_MISS", + "MetricGroup": "pteg_reloads_percent_per_ref", + "MetricName": "pteg_from_rmem_percent" } ] -- cgit v1.2.3-55-g7522 From 69ba708f4df6250dfa0410297024eeedd7ab3362 Mon Sep 17 00:00:00 2001 From: Paul Clarke Date: Thu, 7 Feb 2019 12:53:13 -0500 Subject: perf vendor events power8: Branch_prediction, latency, bus_stats, instruction_mix & instruction_stats metrics POWER8 metrics are not well publicized. Some are here: https://www.ibm.com/support/knowledgecenter/en/SSFK5S_2.2.0/com.ibm.cluster.pedev.v2r2.pedev100.doc/bl7ug_derivedmetricspower8.htm This patch is for metric groups: - branch_prediction - latency - bus_stats - instruction_mix - instruction_stats_percent_per_ref Signed-off-by: Paul Clarke Cc: Ananth N Mavinakayanahalli Cc: Carl Love Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Naveen N. Rao Cc: Sukadev Bhattiprolu Link: http://lkml.kernel.org/r/20190207175314.31813-4-pc@us.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- .../pmu-events/arch/powerpc/power8/metrics.json | 492 +++++++++++++++++++++ 1 file changed, 492 insertions(+) diff --git a/tools/perf/pmu-events/arch/powerpc/power8/metrics.json b/tools/perf/pmu-events/arch/powerpc/power8/metrics.json index 9a6ec8aadffd..d8b710e12377 100644 --- a/tools/perf/pmu-events/arch/powerpc/power8/metrics.json +++ b/tools/perf/pmu-events/arch/powerpc/power8/metrics.json @@ -1,4 +1,100 @@ [ + { + "BriefDescription": "% of finished branches that were treated as BC+8", + "MetricExpr": "PM_BR_BC_8_CONV / PM_BRU_FIN * 100", + "MetricGroup": "branch_prediction", + "MetricName": "bc_8_branch_ratio_percent" + }, + { + "BriefDescription": "% of finished branches that were pairable but not treated as BC+8", + "MetricExpr": "PM_BR_BC_8 / PM_BRU_FIN * 100", + "MetricGroup": "branch_prediction", + "MetricName": "bc_8_not_converted_branch_ratio_percent" + }, + { + "BriefDescription": "Percent of mispredicted branches out of all predicted (correctly and incorrectly) branches that completed", + "MetricExpr": "PM_BR_MPRED_CMPL / (PM_BR_PRED_BR0 + PM_BR_PRED_BR1) * 100", + "MetricGroup": "branch_prediction", + "MetricName": "br_misprediction_percent" + }, + { + "BriefDescription": "% of Branch miss predictions per instruction", + "MetricExpr": "PM_BR_MPRED_CMPL / PM_RUN_INST_CMPL * 100", + "MetricGroup": "branch_prediction", + "MetricName": "branch_mispredict_rate_percent" + }, + { + "BriefDescription": "Count cache branch misprediction per instruction", + "MetricExpr": "PM_BR_MPRED_CCACHE / PM_RUN_INST_CMPL * 100", + "MetricGroup": "branch_prediction", + "MetricName": "ccache_mispredict_rate_percent" + }, + { + "BriefDescription": "Percent of count catch mispredictions out of all completed branches that required count cache predictionn", + "MetricExpr": "PM_BR_MPRED_CCACHE / (PM_BR_PRED_CCACHE_BR0 + PM_BR_PRED_CCACHE_BR1) * 100", + "MetricGroup": "branch_prediction", + "MetricName": "ccache_misprediction_percent" + }, + { + "BriefDescription": "CR MisPredictions per Instruction", + "MetricExpr": "PM_BR_MPRED_CR / PM_RUN_INST_CMPL * 100", + "MetricGroup": "branch_prediction", + "MetricName": "cr_mispredict_rate_percent" + }, + { + "BriefDescription": "Link stack branch misprediction", + "MetricExpr": "(PM_BR_MPRED_TA - PM_BR_MPRED_CCACHE) / PM_RUN_INST_CMPL * 100", + "MetricGroup": "branch_prediction", + "MetricName": "lstack_mispredict_rate_percent" + }, + { + "BriefDescription": "Percent of link stack mispredictions out of all completed branches that required link stack prediction", + "MetricExpr": "(PM_BR_MPRED_TA - PM_BR_MPRED_CCACHE) / (PM_BR_PRED_LSTACK_BR0 + PM_BR_PRED_LSTACK_BR1) * 100", + "MetricGroup": "branch_prediction", + "MetricName": "lstack_misprediction_percent" + }, + { + "BriefDescription": "TA MisPredictions per Instruction", + "MetricExpr": "PM_BR_MPRED_TA / PM_RUN_INST_CMPL * 100", + "MetricGroup": "branch_prediction", + "MetricName": "ta_mispredict_rate_percent" + }, + { + "BriefDescription": "Percent of target address mispredictions out of all completed branches that required address prediction", + "MetricExpr": "PM_BR_MPRED_TA / (PM_BR_PRED_CCACHE_BR0 + PM_BR_PRED_CCACHE_BR1 + PM_BR_PRED_LSTACK_BR0 + PM_BR_PRED_LSTACK_BR1) * 100", + "MetricGroup": "branch_prediction", + "MetricName": "ta_misprediction_percent" + }, + { + "BriefDescription": "Percent of branches completed that were taken", + "MetricExpr": "PM_BR_TAKEN_CMPL * 100 / PM_BR_CMPL", + "MetricGroup": "branch_prediction", + "MetricName": "taken_branches_percent" + }, + { + "BriefDescription": "Percent of chip+group+sys pumps that were incorrectly predicted", + "MetricExpr": "PM_PUMP_MPRED * 100 / (PM_PUMP_CPRED + PM_PUMP_MPRED)", + "MetricGroup": "bus_stats", + "MetricName": "any_pump_mpred_percent" + }, + { + "BriefDescription": "Percent of chip pumps that were correctly predicted as chip pumps the first time", + "MetricExpr": "PM_CHIP_PUMP_CPRED * 100 / PM_L2_CHIP_PUMP", + "MetricGroup": "bus_stats", + "MetricName": "chip_pump_cpred_percent" + }, + { + "BriefDescription": "Percent of group pumps that were correctly predicted as group pumps the first time", + "MetricExpr": "PM_GRP_PUMP_CPRED * 100 / PM_L2_GROUP_PUMP", + "MetricGroup": "bus_stats", + "MetricName": "group_pump_cpred_percent" + }, + { + "BriefDescription": "Percent of system pumps that were correctly predicted as group pumps the first time", + "MetricExpr": "PM_SYS_PUMP_CPRED * 100 / PM_L2_GROUP_PUMP", + "MetricGroup": "bus_stats", + "MetricName": "sys_pump_cpred_percent" + }, { "BriefDescription": "Cycles stalled due to CRU or BRU operations", "MetricExpr": "PM_CMPLU_STALL_BRU_CRU / PM_RUN_INST_CMPL", @@ -842,6 +938,180 @@ "MetricGroup": "instruction_misses_percent_per_inst", "MetricName": "l1_inst_miss_rate_percent" }, + { + "BriefDescription": "% Branches per instruction", + "MetricExpr": "PM_BRU_FIN / PM_RUN_INST_CMPL", + "MetricGroup": "instruction_mix", + "MetricName": "branches_per_inst" + }, + { + "BriefDescription": "Total Fixed point operations", + "MetricExpr": "(PM_FXU0_FIN + PM_FXU1_FIN)/PM_RUN_INST_CMPL", + "MetricGroup": "instruction_mix", + "MetricName": "fixed_per_inst" + }, + { + "BriefDescription": "FXU0 balance", + "MetricExpr": "PM_FXU0_FIN / (PM_FXU0_FIN + PM_FXU1_FIN)", + "MetricGroup": "instruction_mix", + "MetricName": "fxu0_balance" + }, + { + "BriefDescription": "Fraction of cycles that FXU0 is in use", + "MetricExpr": "PM_FXU0_FIN / PM_RUN_CYC", + "MetricGroup": "instruction_mix", + "MetricName": "fxu0_fin" + }, + { + "BriefDescription": "FXU0 only Busy", + "MetricExpr": "PM_FXU0_BUSY_FXU1_IDLE / PM_CYC", + "MetricGroup": "instruction_mix", + "MetricName": "fxu0_only_busy" + }, + { + "BriefDescription": "Fraction of cycles that FXU1 is in use", + "MetricExpr": "PM_FXU1_FIN / PM_RUN_CYC", + "MetricGroup": "instruction_mix", + "MetricName": "fxu1_fin" + }, + { + "BriefDescription": "FXU1 only Busy", + "MetricExpr": "PM_FXU1_BUSY_FXU0_IDLE / PM_CYC", + "MetricGroup": "instruction_mix", + "MetricName": "fxu1_only_busy" + }, + { + "BriefDescription": "Both FXU Busy", + "MetricExpr": "PM_FXU_BUSY / PM_CYC", + "MetricGroup": "instruction_mix", + "MetricName": "fxu_both_busy" + }, + { + "BriefDescription": "Both FXU Idle", + "MetricExpr": "PM_FXU_IDLE / PM_CYC", + "MetricGroup": "instruction_mix", + "MetricName": "fxu_both_idle" + }, + { + "BriefDescription": "PCT instruction loads", + "MetricExpr": "PM_LD_REF_L1 / PM_RUN_INST_CMPL", + "MetricGroup": "instruction_mix", + "MetricName": "loads_per_inst" + }, + { + "BriefDescription": "PCT instruction stores", + "MetricExpr": "PM_ST_FIN / PM_RUN_INST_CMPL", + "MetricGroup": "instruction_mix", + "MetricName": "stores_per_inst" + }, + { + "BriefDescription": "Icache Fetchs per Icache Miss", + "MetricExpr": "(PM_L1_ICACHE_MISS - PM_IC_PREF_WRITE) / PM_L1_ICACHE_MISS", + "MetricGroup": "instruction_stats_percent_per_ref", + "MetricName": "icache_miss_reload" + }, + { + "BriefDescription": "% of ICache reloads due to prefetch", + "MetricExpr": "PM_IC_PREF_WRITE * 100 / PM_L1_ICACHE_MISS", + "MetricGroup": "instruction_stats_percent_per_ref", + "MetricName": "icache_pref_percent" + }, + { + "BriefDescription": "% of ICache reloads from Distant L2 or L3 (Modified)", + "MetricExpr": "PM_INST_FROM_DL2L3_MOD * 100 / PM_L1_ICACHE_MISS", + "MetricGroup": "instruction_stats_percent_per_ref", + "MetricName": "inst_from_dl2l3_mod_percent" + }, + { + "BriefDescription": "% of ICache reloads from Distant L2 or L3 (Shared)", + "MetricExpr": "PM_INST_FROM_DL2L3_SHR * 100 / PM_L1_ICACHE_MISS", + "MetricGroup": "instruction_stats_percent_per_ref", + "MetricName": "inst_from_dl2l3_shr_percent" + }, + { + "BriefDescription": "% of ICache reloads from Distant L4", + "MetricExpr": "PM_INST_FROM_DL4 * 100 / PM_L1_ICACHE_MISS", + "MetricGroup": "instruction_stats_percent_per_ref", + "MetricName": "inst_from_dl4_percent" + }, + { + "BriefDescription": "% of ICache reloads from Distant Memory", + "MetricExpr": "PM_INST_FROM_DMEM * 100 / PM_L1_ICACHE_MISS", + "MetricGroup": "instruction_stats_percent_per_ref", + "MetricName": "inst_from_dmem_percent" + }, + { + "BriefDescription": "% of ICache reloads from Private L2, other core", + "MetricExpr": "PM_INST_FROM_L21_MOD * 100 / PM_L1_ICACHE_MISS", + "MetricGroup": "instruction_stats_percent_per_ref", + "MetricName": "inst_from_l21_mod_percent" + }, + { + "BriefDescription": "% of ICache reloads from Private L2, other core", + "MetricExpr": "PM_INST_FROM_L21_SHR * 100 / PM_L1_ICACHE_MISS", + "MetricGroup": "instruction_stats_percent_per_ref", + "MetricName": "inst_from_l21_shr_percent" + }, + { + "BriefDescription": "% of ICache reloads from L2", + "MetricExpr": "PM_INST_FROM_L2 * 100 / PM_L1_ICACHE_MISS", + "MetricGroup": "instruction_stats_percent_per_ref", + "MetricName": "inst_from_l2_percent" + }, + { + "BriefDescription": "% of ICache reloads from Private L3, other core", + "MetricExpr": "PM_INST_FROM_L31_MOD * 100 / PM_L1_ICACHE_MISS", + "MetricGroup": "instruction_stats_percent_per_ref", + "MetricName": "inst_from_l31_mod_percent" + }, + { + "BriefDescription": "% of ICache reloads from Private L3, other core", + "MetricExpr": "PM_INST_FROM_L31_SHR * 100 / PM_L1_ICACHE_MISS", + "MetricGroup": "instruction_stats_percent_per_ref", + "MetricName": "inst_from_l31_shr_percent" + }, + { + "BriefDescription": "% of ICache reloads from L3", + "MetricExpr": "PM_INST_FROM_L3 * 100 / PM_L1_ICACHE_MISS", + "MetricGroup": "instruction_stats_percent_per_ref", + "MetricName": "inst_from_l3_percent" + }, + { + "BriefDescription": "% of ICache reloads from Local L4", + "MetricExpr": "PM_INST_FROM_LL4 * 100 / PM_L1_ICACHE_MISS", + "MetricGroup": "instruction_stats_percent_per_ref", + "MetricName": "inst_from_ll4_percent" + }, + { + "BriefDescription": "% of ICache reloads from Local Memory", + "MetricExpr": "PM_INST_FROM_LMEM * 100 / PM_L1_ICACHE_MISS", + "MetricGroup": "instruction_stats_percent_per_ref", + "MetricName": "inst_from_lmem_percent" + }, + { + "BriefDescription": "% of ICache reloads from Remote L2 or L3 (Modified)", + "MetricExpr": "PM_INST_FROM_RL2L3_MOD * 100 / PM_L1_ICACHE_MISS", + "MetricGroup": "instruction_stats_percent_per_ref", + "MetricName": "inst_from_rl2l3_mod_percent" + }, + { + "BriefDescription": "% of ICache reloads from Remote L2 or L3 (Shared)", + "MetricExpr": "PM_INST_FROM_RL2L3_SHR * 100 / PM_L1_ICACHE_MISS", + "MetricGroup": "instruction_stats_percent_per_ref", + "MetricName": "inst_from_rl2l3_shr_percent" + }, + { + "BriefDescription": "% of ICache reloads from Remote L4", + "MetricExpr": "PM_INST_FROM_RL4 * 100 / PM_L1_ICACHE_MISS", + "MetricGroup": "instruction_stats_percent_per_ref", + "MetricName": "inst_from_rl4_percent" + }, + { + "BriefDescription": "% of ICache reloads from Remote Memory", + "MetricExpr": "PM_INST_FROM_RMEM * 100 / PM_L1_ICACHE_MISS", + "MetricGroup": "instruction_stats_percent_per_ref", + "MetricName": "inst_from_rmem_percent" + }, { "BriefDescription": "Average number of stores that gather in the store buffer before being sent to an L2 RC machine", "MetricExpr": "PM_ST_CMPL / (PM_L2_ST / 2)", @@ -860,6 +1130,228 @@ "MetricGroup": "l2_stats", "MetricName": "l2_store_miss_ratio_percent" }, + { + "BriefDescription": "average L1 miss latency using marked events", + "MetricExpr": "PM_MRK_LD_MISS_L1_CYC / PM_MRK_LD_MISS_L1", + "MetricGroup": "latency", + "MetricName": "average_dl1miss_latency" + }, + { + "BriefDescription": "Average icache miss latency", + "MetricExpr": "(PM_IC_DEMAND_CYC / PM_IC_DEMAND_REQ)", + "MetricGroup": "latency", + "MetricName": "average_il1_miss_latency" + }, + { + "BriefDescription": "average service time for SYNC", + "MetricExpr": "PM_LSU_SRQ_SYNC_CYC / PM_LSU_SRQ_SYNC", + "MetricGroup": "latency", + "MetricName": "average_sync_cyc" + }, + { + "BriefDescription": "Cycles LMQ slot0 was active on an average", + "MetricExpr": "PM_LSU_LMQ_S0_VALID / PM_LSU_LMQ_S0_ALLOC", + "MetricGroup": "latency", + "MetricName": "avg_lmq_life_time" + }, + { + "BriefDescription": "Average number of cycles LRQ stays active for one load. Slot 0 is VALID ONLY FOR EVEN THREADS", + "MetricExpr": "PM_LSU_LRQ_S0_VALID / PM_LSU_LRQ_S0_ALLOC", + "MetricGroup": "latency", + "MetricName": "avg_lrq_life_time_even" + }, + { + "BriefDescription": "Average number of cycles LRQ stays active for one load. Slot 43 is valid ONLY FOR ODD THREADS", + "MetricExpr": "PM_LSU_LRQ_S43_VALID / PM_LSU_LRQ_S43_ALLOC", + "MetricGroup": "latency", + "MetricName": "avg_lrq_life_time_odd" + }, + { + "BriefDescription": "Average number of cycles SRQ stays active for one load. Slot 0 is VALID ONLY FOR EVEN THREADS", + "MetricExpr": "PM_LSU_SRQ_S0_VALID / PM_LSU_SRQ_S0_ALLOC", + "MetricGroup": "latency", + "MetricName": "avg_srq_life_time_even" + }, + { + "BriefDescription": "Average number of cycles SRQ stays active for one load. Slot 39 is valid ONLY FOR ODD THREADS", + "MetricExpr": "PM_LSU_SRQ_S39_VALID / PM_LSU_SRQ_S39_ALLOC", + "MetricGroup": "latency", + "MetricName": "avg_srq_life_time_odd" + }, + { + "BriefDescription": "Marked background kill latency, measured in L2", + "MetricExpr": "PM_MRK_FAB_RSP_BKILL_CYC / PM_MRK_FAB_RSP_BKILL", + "MetricGroup": "latency", + "MetricName": "bkill_latency" + }, + { + "BriefDescription": "Marked dclaim latency, measured in L2", + "MetricExpr": "PM_MRK_FAB_RSP_DCLAIM_CYC / PM_MRK_FAB_RSP_DCLAIM", + "MetricGroup": "latency", + "MetricName": "dclaim_latency" + }, + { + "BriefDescription": "Marked L2L3 remote Load latency", + "MetricExpr": "PM_MRK_DATA_FROM_DL2L3_MOD_CYC/ PM_MRK_DATA_FROM_DL2L3_MOD", + "MetricGroup": "latency", + "MetricName": "dl2l3_mod_latency" + }, + { + "BriefDescription": "Marked L2L3 distant Load latency", + "MetricExpr": "PM_MRK_DATA_FROM_DL2L3_SHR_CYC/ PM_MRK_DATA_FROM_DL2L3_SHR", + "MetricGroup": "latency", + "MetricName": "dl2l3_shr_latency" + }, + { + "BriefDescription": "Distant L4 average load latency", + "MetricExpr": "PM_MRK_DATA_FROM_DL4_CYC/ PM_MRK_DATA_FROM_DL4", + "MetricGroup": "latency", + "MetricName": "dl4_latency" + }, + { + "BriefDescription": "Marked Dmem Load latency", + "MetricExpr": "PM_MRK_DATA_FROM_DMEM_CYC/ PM_MRK_DATA_FROM_DMEM", + "MetricGroup": "latency", + "MetricName": "dmem_latency" + }, + { + "BriefDescription": "estimated exposed miss latency for dL1 misses, ie load miss when we were NTC", + "MetricExpr": "PM_MRK_LD_MISS_EXPOSED_CYC / PM_MRK_LD_MISS_EXPOSED", + "MetricGroup": "latency", + "MetricName": "exposed_dl1miss_latency" + }, + { + "BriefDescription": "Average load latency for all marked demand loads that came from L2.1 in the M state", + "MetricExpr": "PM_MRK_DATA_FROM_L21_MOD_CYC/ PM_MRK_DATA_FROM_L21_MOD", + "MetricGroup": "latency", + "MetricName": "l21_mod_latency" + }, + { + "BriefDescription": "Average load latency for all marked demand loads that came from L2.1 in the S state", + "MetricExpr": "PM_MRK_DATA_FROM_L21_SHR_CYC/ PM_MRK_DATA_FROM_L21_SHR", + "MetricGroup": "latency", + "MetricName": "l21_shr_latency" + }, + { + "BriefDescription": "Average load latency for all marked demand loads that came from the L2 and suffered a conflict at RC machine dispatch time due to load-hit-store", + "MetricExpr": "PM_MRK_DATA_FROM_L2_DISP_CONFLICT_LDHITST_CYC/ PM_MRK_DATA_FROM_L2_DISP_CONFLICT_LDHITST", + "MetricGroup": "latency", + "MetricName": "l2_disp_conflict_ldhitst_latency" + }, + { + "BriefDescription": "Average load latency for all marked demand loads that came from the L2 and suffered a conflict at RC machine dispatch time NOT due load-hit-store", + "MetricExpr": "PM_MRK_DATA_FROM_L2_DISP_CONFLICT_OTHER_CYC/ PM_MRK_DATA_FROM_L2_DISP_CONFLICT_OTHER", + "MetricGroup": "latency", + "MetricName": "l2_disp_conflict_other_latency" + }, + { + "BriefDescription": "Average load latency for all marked demand loads that came from the L2", + "MetricExpr": "PM_MRK_DATA_FROM_L2_CYC/ PM_MRK_DATA_FROM_L2", + "MetricGroup": "latency", + "MetricName": "l2_latency" + }, + { + "BriefDescription": "Average load latency for all marked demand loads that were satisfied by lines prefetched into the L3. This information is forwarded from the L3", + "MetricExpr": "PM_MRK_DATA_FROM_L2_MEPF_CYC/ PM_MRK_DATA_FROM_L2", + "MetricGroup": "latency", + "MetricName": "l2_mepf_latency" + }, + { + "BriefDescription": "Average load latency for all marked demand loads that came from the L2 and suffered no conflicts", + "MetricExpr": "PM_MRK_DATA_FROM_L2_NO_CONFLICT_CYC/ PM_MRK_DATA_FROM_L2", + "MetricGroup": "latency", + "MetricName": "l2_no_conflict_latency" + }, + { + "BriefDescription": "Average load latency for all marked demand loads that came from the L3 and beyond", + "MetricExpr": "PM_MRK_DATA_FROM_L2MISS_CYC/ PM_MRK_DATA_FROM_L2MISS", + "MetricGroup": "latency", + "MetricName": "l2miss_latency" + }, + { + "BriefDescription": "Marked L31 Load latency", + "MetricExpr": "PM_MRK_DATA_FROM_L31_MOD_CYC/ PM_MRK_DATA_FROM_L31_MOD", + "MetricGroup": "latency", + "MetricName": "l31_mod_latency" + }, + { + "BriefDescription": "Marked L31 Load latency", + "MetricExpr": "PM_MRK_DATA_FROM_L31_SHR_CYC/ PM_MRK_DATA_FROM_L31_SHR", + "MetricGroup": "latency", + "MetricName": "l31_shr_latency" + }, + { + "BriefDescription": "Average load latency for all marked demand loads that came from the L3", + "MetricExpr": "PM_MRK_DATA_FROM_L3_CYC/ PM_MRK_DATA_FROM_L3", + "MetricGroup": "latency", + "MetricName": "l3_latency" + }, + { + "BriefDescription": "Average load latency for all marked demand loads that came from the L3 and suffered no conflicts", + "MetricExpr": "PM_MRK_DATA_FROM_L3_NO_CONFLICT_CYC/ PM_MRK_DATA_FROM_L2", + "MetricGroup": "latency", + "MetricName": "l3_no_conflict_latency" + }, + { + "BriefDescription": "Average load latency for all marked demand loads that come from beyond the L3", + "MetricExpr": "PM_MRK_DATA_FROM_L3MISS_CYC/ PM_MRK_DATA_FROM_L3MISS", + "MetricGroup": "latency", + "MetricName": "l3miss_latency" + }, + { + "BriefDescription": "Average latency for marked reloads that hit in the L3 on the MEPF state. i.e. lines that were prefetched into the L3", + "MetricExpr": "PM_MRK_DATA_FROM_L3_MEPF_CYC/ PM_MRK_DATA_FROM_L3_MEPF", + "MetricGroup": "latency", + "MetricName": "l3pref_latency" + }, + { + "BriefDescription": "Local L4 average load latency", + "MetricExpr": "PM_MRK_DATA_FROM_LL4_CYC/ PM_MRK_DATA_FROM_LL4", + "MetricGroup": "latency", + "MetricName": "ll4_latency" + }, + { + "BriefDescription": "Marked Lmem Load latency", + "MetricExpr": "PM_MRK_DATA_FROM_LMEM_CYC/ PM_MRK_DATA_FROM_LMEM", + "MetricGroup": "latency", + "MetricName": "lmem_latency" + }, + { + "BriefDescription": "Latency for marked reloads that hit in the L2 or L3 of any other core on a different chip", + "MetricExpr": "PM_MRK_DATA_FROM_OFF_CHIP_CACHE_CYC/ PM_MRK_DATA_FROM_OFF_CHIP_CACHE", + "MetricGroup": "latency", + "MetricName": "off_chip_cache_latency" + }, + { + "BriefDescription": "Latency for marked reloads that hit in the L2 or L3 of any other core on the same chip", + "MetricExpr": "PM_MRK_DATA_FROM_ON_CHIP_CACHE_CYC/ PM_MRK_DATA_FROM_ON_CHIP_CACHE", + "MetricGroup": "latency", + "MetricName": "on_chip_cache_latency" + }, + { + "BriefDescription": "Marked L2L3 remote Load latency", + "MetricExpr": "PM_MRK_DATA_FROM_RL2L3_MOD_CYC/ PM_MRK_DATA_FROM_RL2L3_MOD", + "MetricGroup": "latency", + "MetricName": "rl2l3_mod_latency" + }, + { + "BriefDescription": "Marked L2L3 remote Load latency", + "MetricExpr": "PM_MRK_DATA_FROM_RL2L3_SHR_CYC/ PM_MRK_DATA_FROM_RL2L3_SHR", + "MetricGroup": "latency", + "MetricName": "rl2l3_shr_latency" + }, + { + "BriefDescription": "Remote L4 average load latency", + "MetricExpr": "PM_MRK_DATA_FROM_RL4_CYC/ PM_MRK_DATA_FROM_RL4", + "MetricGroup": "latency", + "MetricName": "rl4_latency" + }, + { + "BriefDescription": "Marked Rmem Load latency", + "MetricExpr": "PM_MRK_DATA_FROM_RMEM_CYC/ PM_MRK_DATA_FROM_RMEM", + "MetricGroup": "latency", + "MetricName": "rmem_latency" + }, { "BriefDescription": "ERAT miss reject ratio", "MetricExpr": "PM_LSU_REJECT_ERAT_MISS * 100 / PM_RUN_INST_CMPL", -- cgit v1.2.3-55-g7522 From 72ab50203f3f588c2b64c68f11a28ef56a8ff1a1 Mon Sep 17 00:00:00 2001 From: Paul Clarke Date: Thu, 7 Feb 2019 12:53:14 -0500 Subject: perf vendor events power8: Translaton & general metrics POWER8 metrics are not well publicized. Some are here: https://www.ibm.com/support/knowledgecenter/en/SSFK5S_2.2.0/com.ibm.cluster.pedev.v2r2.pedev100.doc/bl7ug_derivedmetricspower8.htm This patch is for metric groups: - translation - general and other metrics not in a metric group. Signed-off-by: Paul Clarke Cc: Ananth N Mavinakayanahalli Cc: Carl Love Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Naveen N. Rao Cc: Sukadev Bhattiprolu Link: http://lkml.kernel.org/r/20190207175314.31813-5-pc@us.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- .../pmu-events/arch/powerpc/power8/metrics.json | 590 +++++++++++++++++++++ 1 file changed, 590 insertions(+) diff --git a/tools/perf/pmu-events/arch/powerpc/power8/metrics.json b/tools/perf/pmu-events/arch/powerpc/power8/metrics.json index d8b710e12377..bffb2d4a6420 100644 --- a/tools/perf/pmu-events/arch/powerpc/power8/metrics.json +++ b/tools/perf/pmu-events/arch/powerpc/power8/metrics.json @@ -836,6 +836,216 @@ "MetricGroup": "estimated_dcache_miss_cpi", "MetricName": "rmem_cpi_percent" }, + { + "BriefDescription": "Branch Mispredict flushes per instruction", + "MetricExpr": "PM_FLUSH_BR_MPRED / PM_RUN_INST_CMPL * 100", + "MetricGroup": "general", + "MetricName": "br_mpred_flush_rate_percent" + }, + { + "BriefDescription": "Cycles per instruction", + "MetricExpr": "PM_CYC / PM_INST_CMPL", + "MetricGroup": "general", + "MetricName": "cpi" + }, + { + "BriefDescription": "Percentage Cycles a group completed", + "MetricExpr": "PM_GRP_CMPL / PM_CYC * 100", + "MetricGroup": "general", + "MetricName": "cyc_grp_completed_percent" + }, + { + "BriefDescription": "Percentage Cycles a group dispatched", + "MetricExpr": "PM_1PLUS_PPC_DISP / PM_CYC * 100", + "MetricGroup": "general", + "MetricName": "cyc_grp_dispatched_percent" + }, + { + "BriefDescription": "Cycles per group", + "MetricExpr": "PM_CYC / PM_1PLUS_PPC_CMPL", + "MetricGroup": "general", + "MetricName": "cyc_per_group" + }, + { + "BriefDescription": "GCT empty cycles", + "MetricExpr": "(PM_FLUSH_DISP / PM_RUN_INST_CMPL) * 100", + "MetricGroup": "general", + "MetricName": "disp_flush_rate_percent" + }, + { + "BriefDescription": "% DTLB miss rate per inst", + "MetricExpr": "PM_DTLB_MISS / PM_RUN_INST_CMPL *100", + "MetricGroup": "general", + "MetricName": "dtlb_miss_rate_percent" + }, + { + "BriefDescription": "Flush rate (%)", + "MetricExpr": "PM_FLUSH * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "general", + "MetricName": "flush_rate_percent" + }, + { + "BriefDescription": "GCT slot utilization (11 to 14) as a % of cycles this thread had atleast 1 slot valid", + "MetricExpr": "PM_GCT_UTIL_11_14_ENTRIES / ( PM_RUN_CYC - PM_GCT_NOSLOT_CYC) * 100", + "MetricGroup": "general", + "MetricName": "gct_util_11to14_slots_percent" + }, + { + "BriefDescription": "GCT slot utilization (15 to 17) as a % of cycles this thread had atleast 1 slot valid", + "MetricExpr": "PM_GCT_UTIL_15_17_ENTRIES / ( PM_RUN_CYC - PM_GCT_NOSLOT_CYC) * 100", + "MetricGroup": "general", + "MetricName": "gct_util_15to17_slots_percent" + }, + { + "BriefDescription": "GCT slot utilization 18+ as a % of cycles this thread had atleast 1 slot valid", + "MetricExpr": "PM_GCT_UTIL_18_ENTRIES / ( PM_RUN_CYC - PM_GCT_NOSLOT_CYC) * 100", + "MetricGroup": "general", + "MetricName": "gct_util_18plus_slots_percent" + }, + { + "BriefDescription": "GCT slot utilization (1 to 2) as a % of cycles this thread had atleast 1 slot valid", + "MetricExpr": "PM_GCT_UTIL_1_2_ENTRIES / ( PM_RUN_CYC - PM_GCT_NOSLOT_CYC) * 100", + "MetricGroup": "general", + "MetricName": "gct_util_1to2_slots_percent" + }, + { + "BriefDescription": "GCT slot utilization (3 to 6) as a % of cycles this thread had atleast 1 slot valid", + "MetricExpr": "PM_GCT_UTIL_3_6_ENTRIES / ( PM_RUN_CYC - PM_GCT_NOSLOT_CYC) * 100", + "MetricGroup": "general", + "MetricName": "gct_util_3to6_slots_percent" + }, + { + "BriefDescription": "GCT slot utilization (7 to 10) as a % of cycles this thread had atleast 1 slot valid", + "MetricExpr": "PM_GCT_UTIL_7_10_ENTRIES / ( PM_RUN_CYC - PM_GCT_NOSLOT_CYC) * 100", + "MetricGroup": "general", + "MetricName": "gct_util_7to10_slots_percent" + }, + { + "BriefDescription": "Avg. group size", + "MetricExpr": "PM_INST_CMPL / PM_1PLUS_PPC_CMPL", + "MetricGroup": "general", + "MetricName": "group_size" + }, + { + "BriefDescription": "Instructions per group", + "MetricExpr": "PM_INST_CMPL / PM_1PLUS_PPC_CMPL", + "MetricGroup": "general", + "MetricName": "inst_per_group" + }, + { + "BriefDescription": "Instructions per cycles", + "MetricExpr": "PM_INST_CMPL / PM_CYC", + "MetricGroup": "general", + "MetricName": "ipc" + }, + { + "BriefDescription": "% ITLB miss rate per inst", + "MetricExpr": "PM_ITLB_MISS / PM_RUN_INST_CMPL *100", + "MetricGroup": "general", + "MetricName": "itlb_miss_rate_percent" + }, + { + "BriefDescription": "Percentage of L1 load misses per L1 load ref", + "MetricExpr": "PM_LD_MISS_L1 / PM_LD_REF_L1 * 100", + "MetricGroup": "general", + "MetricName": "l1_ld_miss_ratio_percent" + }, + { + "BriefDescription": "Percentage of L1 store misses per run instruction", + "MetricExpr": "PM_ST_MISS_L1 * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "general", + "MetricName": "l1_st_miss_rate_percent" + }, + { + "BriefDescription": "Percentage of L1 store misses per L1 store ref", + "MetricExpr": "PM_ST_MISS_L1 / PM_ST_FIN * 100", + "MetricGroup": "general", + "MetricName": "l1_st_miss_ratio_percent" + }, + { + "BriefDescription": "L2 Instruction Miss Rate (per instruction)(%)", + "MetricExpr": "PM_INST_FROM_L2MISS * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "general", + "MetricName": "l2_inst_miss_rate_percent" + }, + { + "BriefDescription": "L2 dmand Load Miss Rate (per run instruction)(%)", + "MetricExpr": "PM_DATA_FROM_L2MISS * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "general", + "MetricName": "l2_ld_miss_rate_percent" + }, + { + "BriefDescription": "L2 PTEG Miss Rate (per run instruction)(%)", + "MetricExpr": "PM_DPTEG_FROM_L2MISS * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "general", + "MetricName": "l2_pteg_miss_rate_percent" + }, + { + "BriefDescription": "Percentage of L2 store misses per run instruction", + "MetricExpr": "PM_ST_MISS_L1 * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "general", + "MetricName": "l2_st_miss_rate_percent" + }, + { + "BriefDescription": "L3 Instruction Miss Rate (per instruction)(%)", + "MetricExpr": "PM_INST_FROM_L3MISS * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "general", + "MetricName": "l3_inst_miss_rate_percent" + }, + { + "BriefDescription": "L3 demand Load Miss Rate (per run instruction)(%)", + "MetricExpr": "PM_DATA_FROM_L3MISS * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "general", + "MetricName": "l3_ld_miss_rate_percent" + }, + { + "BriefDescription": "L3 PTEG Miss Rate (per run instruction)(%)", + "MetricExpr": "PM_DPTEG_FROM_L3MISS * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "general", + "MetricName": "l3_pteg_miss_rate_percent" + }, + { + "BriefDescription": "Run cycles per cycle", + "MetricExpr": "PM_RUN_CYC / PM_CYC*100", + "MetricGroup": "general", + "MetricName": "run_cycles_percent" + }, + { + "BriefDescription": "Percentage of cycles spent in SMT2 Mode", + "MetricExpr": "(PM_RUN_CYC_SMT2_MODE/PM_RUN_CYC) * 100", + "MetricGroup": "general", + "MetricName": "smt2_cycles_percent" + }, + { + "BriefDescription": "Percentage of cycles spent in SMT4 Mode", + "MetricExpr": "(PM_RUN_CYC_SMT4_MODE/PM_RUN_CYC) * 100", + "MetricGroup": "general", + "MetricName": "smt4_cycles_percent" + }, + { + "BriefDescription": "Percentage of cycles spent in SMT8 Mode", + "MetricExpr": "(PM_RUN_CYC_SMT8_MODE/PM_RUN_CYC) * 100", + "MetricGroup": "general", + "MetricName": "smt8_cycles_percent" + }, + { + "BriefDescription": "IPC of all instructions completed by the core while this thread was stalled", + "MetricExpr": "PM_CMPLU_STALL_OTHER_CMPL/PM_RUN_CYC", + "MetricGroup": "general", + "MetricName": "smt_benefit" + }, + { + "BriefDescription": "Instruction dispatch-to-completion ratio", + "MetricExpr": "PM_INST_DISP / PM_INST_CMPL", + "MetricGroup": "general", + "MetricName": "speculation" + }, + { + "BriefDescription": "Percentage of cycles spent in Single Thread Mode", + "MetricExpr": "(PM_RUN_CYC_ST_MODE/PM_RUN_CYC) * 100", + "MetricGroup": "general", + "MetricName": "st_cycles_percent" + }, { "BriefDescription": "% of ICache reloads from Distant L2 or L3 (Modified) per Inst", "MetricExpr": "PM_INST_FROM_DL2L3_MOD * 100 / PM_RUN_INST_CMPL", @@ -1651,5 +1861,385 @@ "MetricExpr": "PM_DPTEG_FROM_RMEM * 100 / PM_DTLB_MISS", "MetricGroup": "pteg_reloads_percent_per_ref", "MetricName": "pteg_from_rmem_percent" + }, + { + "BriefDescription": "% DERAT miss ratio for 16G page per inst", + "MetricExpr": "100 * PM_DERAT_MISS_16G / PM_RUN_INST_CMPL", + "MetricGroup": "translation", + "MetricName": "derat_16g_miss_rate_percent" + }, + { + "BriefDescription": "DERAT miss ratio for 16G page", + "MetricExpr": "PM_DERAT_MISS_16G / PM_LSU_DERAT_MISS", + "MetricGroup": "translation", + "MetricName": "derat_16g_miss_ratio" + }, + { + "BriefDescription": "% DERAT miss rate for 16M page per inst", + "MetricExpr": "PM_DERAT_MISS_16M * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "translation", + "MetricName": "derat_16m_miss_rate_percent" + }, + { + "BriefDescription": "DERAT miss ratio for 16M page", + "MetricExpr": "PM_DERAT_MISS_16M / PM_LSU_DERAT_MISS", + "MetricGroup": "translation", + "MetricName": "derat_16m_miss_ratio" + }, + { + "BriefDescription": "% DERAT miss rate for 4K page per inst", + "MetricExpr": "PM_DERAT_MISS_4K * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "translation", + "MetricName": "derat_4k_miss_rate_percent" + }, + { + "BriefDescription": "DERAT miss ratio for 4K page", + "MetricExpr": "PM_DERAT_MISS_4K / PM_LSU_DERAT_MISS", + "MetricGroup": "translation", + "MetricName": "derat_4k_miss_ratio" + }, + { + "BriefDescription": "% DERAT miss ratio for 64K page per inst", + "MetricExpr": "PM_DERAT_MISS_64K * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "translation", + "MetricName": "derat_64k_miss_rate_percent" + }, + { + "BriefDescription": "DERAT miss ratio for 64K page", + "MetricExpr": "PM_DERAT_MISS_64K / PM_LSU_DERAT_MISS", + "MetricGroup": "translation", + "MetricName": "derat_64k_miss_ratio" + }, + { + "BriefDescription": "% DSLB_Miss_Rate per inst", + "MetricExpr": "PM_DSLB_MISS * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "translation", + "MetricName": "dslb_miss_rate_percent" + }, + { + "BriefDescription": "% ISLB miss rate per inst", + "MetricExpr": "PM_ISLB_MISS * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "translation", + "MetricName": "islb_miss_rate_percent" + }, + { + "BriefDescription": "Fraction of hits on any Centaur (local, remote, or distant) on either L4 or DRAM per L1 load ref", + "MetricExpr": "PM_DATA_FROM_MEMORY / PM_LD_REF_L1", + "MetricName": "any_centaur_ld_hit_ratio" + }, + { + "BriefDescription": "Base Completion Cycles", + "MetricExpr": "PM_1PLUS_PPC_CMPL / PM_RUN_INST_CMPL", + "MetricName": "base_completion_cpi" + }, + { + "BriefDescription": "Marked background kill latency, measured in L2", + "MetricExpr": "PM_MRK_FAB_RSP_BKILL_CYC / PM_MRK_FAB_RSP_BKILL", + "MetricName": "bkill_ratio_percent" + }, + { + "BriefDescription": "cycles", + "MetricExpr": "PM_RUN_CYC", + "MetricName": "custom_secs" + }, + { + "BriefDescription": "Fraction of hits on a distant chip's Centaur (L4 or DRAM) per L1 load ref", + "MetricExpr": "(PM_DATA_FROM_DMEM + PM_DATA_FROM_DL4) / PM_LD_REF_L1", + "MetricName": "distant_centaur_ld_hit_ratio" + }, + { + "BriefDescription": "% of DL1 reloads that came from the L3 and beyond", + "MetricExpr": "PM_DATA_FROM_L2MISS * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricName": "dl1_reload_from_l2_miss_percent" + }, + { + "BriefDescription": "% of DL1 reloads from Private L3, other core per Inst", + "MetricExpr": "(PM_DATA_FROM_L31_MOD + PM_DATA_FROM_L31_SHR) * 100 / PM_RUN_INST_CMPL", + "MetricName": "dl1_reload_from_l31_rate_percent" + }, + { + "BriefDescription": "Percentage of DL1 reloads from L3 where the lines were brought into the L3 by a prefetch operation", + "MetricExpr": "PM_DATA_FROM_L3_MEPF * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricName": "dl1_reload_from_l3_mepf_percent" + }, + { + "BriefDescription": "% of DL1 Reloads from beyond the local L3", + "MetricExpr": "PM_DATA_FROM_L3MISS * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricName": "dl1_reload_from_l3_miss_percent" + }, + { + "BriefDescription": "Fraction of hits of a line in the M (exclusive) state on the L2 or L3 of a core on a distant chip per L1 load ref", + "MetricExpr": "PM_DATA_FROM_DL2L3_MOD / PM_LD_REF_L1", + "MetricName": "dl2l3_mod_ld_hit_ratio" + }, + { + "BriefDescription": "Fraction of hits of a line in the S state on the L2 or L3 of a core on a distant chip per L1 load ref", + "MetricExpr": "PM_DATA_FROM_DL2L3_SHR / PM_LD_REF_L1", + "MetricName": "dl2l3_shr_ld_hit_ratio" + }, + { + "BriefDescription": "Fraction of hits on a distant Centaur's cache per L1 load ref", + "MetricExpr": "PM_DATA_FROM_DL4 / PM_LD_REF_L1", + "MetricName": "dl4_ld_hit_ratio" + }, + { + "BriefDescription": "Fraction of hits on a distant Centaur's DRAM per L1 load ref", + "MetricExpr": "PM_DATA_FROM_DMEM / PM_LD_REF_L1", + "MetricName": "dmem_ld_hit_ratio" + }, + { + "BriefDescription": "Rate of DERAT reloads from L2", + "MetricExpr": "PM_DPTEG_FROM_L2 * 100 / PM_RUN_INST_CMPL", + "MetricName": "dpteg_from_l2_rate_percent" + }, + { + "BriefDescription": "Rate of DERAT reloads from L3", + "MetricExpr": "PM_DPTEG_FROM_L3 * 100 / PM_RUN_INST_CMPL", + "MetricName": "dpteg_from_l3_rate_percent" + }, + { + "BriefDescription": "Overhead of expansion cycles", + "MetricExpr": "(PM_GRP_CMPL / PM_RUN_INST_CMPL) - (PM_1PLUS_PPC_CMPL / PM_RUN_INST_CMPL)", + "MetricName": "expansion_overhead_cpi" + }, + { + "BriefDescription": "Total Fixed point operations executded in the Load/Store Unit following a load/store operation", + "MetricExpr": "PM_LSU_FX_FIN/PM_RUN_INST_CMPL", + "MetricName": "fixed_in_lsu_per_inst" + }, + { + "BriefDescription": "GCT empty cycles", + "MetricExpr": "(PM_GCT_NOSLOT_CYC / PM_RUN_CYC) * 100", + "MetricName": "gct_empty_percent" + }, + { + "BriefDescription": "Rate of IERAT reloads from L2", + "MetricExpr": "PM_IPTEG_FROM_L2 * 100 / PM_RUN_INST_CMPL", + "MetricName": "ipteg_from_l2_rate_percent" + }, + { + "BriefDescription": "Rate of IERAT reloads from L3", + "MetricExpr": "PM_IPTEG_FROM_L3 * 100 / PM_RUN_INST_CMPL", + "MetricName": "ipteg_from_l3_rate_percent" + }, + { + "BriefDescription": "Rate of IERAT reloads from local memory", + "MetricExpr": "PM_IPTEG_FROM_LL4 * 100 / PM_RUN_INST_CMPL", + "MetricName": "ipteg_from_ll4_rate_percent" + }, + { + "BriefDescription": "Rate of IERAT reloads from local memory", + "MetricExpr": "PM_IPTEG_FROM_LMEM * 100 / PM_RUN_INST_CMPL", + "MetricName": "ipteg_from_lmem_rate_percent" + }, + { + "BriefDescription": "Fraction of L1 hits per load ref", + "MetricExpr": "(PM_LD_REF_L1 - PM_LD_MISS_L1) / PM_LD_REF_L1", + "MetricName": "l1_ld_hit_ratio" + }, + { + "BriefDescription": "Fraction of L1 load misses per L1 load ref", + "MetricExpr": "PM_LD_MISS_L1 / PM_LD_REF_L1", + "MetricName": "l1_ld_miss_ratio" + }, + { + "BriefDescription": "Fraction of hits on another core's L2 on the same chip per L1 load ref", + "MetricExpr": "(PM_DATA_FROM_L21_MOD + PM_DATA_FROM_L21_SHR) / PM_LD_REF_L1", + "MetricName": "l2_1_ld_hit_ratio" + }, + { + "BriefDescription": "Fraction of hits of a line in the M (exclusive) state on another core's L2 on the same chip per L1 load ref", + "MetricExpr": "PM_DATA_FROM_L21_MOD / PM_LD_REF_L1", + "MetricName": "l2_1_mod_ld_hit_ratio" + }, + { + "BriefDescription": "Fraction of hits of a line in the S state on another core's L2 on the same chip per L1 load ref", + "MetricExpr": "PM_DATA_FROM_L21_SHR / PM_LD_REF_L1", + "MetricName": "l2_1_shr_ld_hit_ratio" + }, + { + "BriefDescription": "Average number of Castout machines used. 1 of 16 CO machines is sampled every L2 cycle", + "MetricExpr": "(PM_CO_USAGE / PM_RUN_CYC) * 16", + "MetricName": "l2_co_usage" + }, + { + "BriefDescription": "Fraction of L2 load hits per L1 load ref", + "MetricExpr": "PM_DATA_FROM_L2 / PM_LD_REF_L1", + "MetricName": "l2_ld_hit_ratio" + }, + { + "BriefDescription": "Fraction of L2 load misses per L1 load ref", + "MetricExpr": "PM_DATA_FROM_L2MISS / PM_LD_REF_L1", + "MetricName": "l2_ld_miss_ratio" + }, + { + "BriefDescription": "Fraction of L2 load hits per L1 load ref where the L2 experienced a Load-Hit-Store conflict", + "MetricExpr": "PM_DATA_FROM_L2_DISP_CONFLICT_LDHITST / PM_LD_REF_L1", + "MetricName": "l2_lhs_ld_hit_ratio" + }, + { + "BriefDescription": "Fraction of L2 load hits per L1 load ref where the L2 did not experience a conflict", + "MetricExpr": "PM_DATA_FROM_L2_NO_CONFLICT / PM_LD_REF_L1", + "MetricName": "l2_no_conflict_ld_hit_ratio" + }, + { + "BriefDescription": "Fraction of L2 load hits per L1 load ref where the L2 experienced some conflict other than Load-Hit-Store", + "MetricExpr": "PM_DATA_FROM_L2_DISP_CONFLICT_OTHER / PM_LD_REF_L1", + "MetricName": "l2_other_conflict_ld_hit_ratio" + }, + { + "BriefDescription": "Average number of Read/Claim machines used. 1 of 16 RC machines is sampled every L2 cycle", + "MetricExpr": "(PM_RC_USAGE / PM_RUN_CYC) * 16", + "MetricName": "l2_rc_usage" + }, + { + "BriefDescription": "Average number of Snoop machines used. 1 of 8 SN machines is sampled every L2 cycle", + "MetricExpr": "(PM_SN_USAGE / PM_RUN_CYC) * 8", + "MetricName": "l2_sn_usage" + }, + { + "BriefDescription": "Marked L31 Load latency", + "MetricExpr": "(PM_MRK_DATA_FROM_L31_SHR_CYC + PM_MRK_DATA_FROM_L31_MOD_CYC) / (PM_MRK_DATA_FROM_L31_SHR + PM_MRK_DATA_FROM_L31_MOD)", + "MetricName": "l31_latency" + }, + { + "BriefDescription": "Fraction of hits on another core's L3 on the same chip per L1 load ref", + "MetricExpr": "(PM_DATA_FROM_L31_MOD + PM_DATA_FROM_L31_SHR) / PM_LD_REF_L1", + "MetricName": "l3_1_ld_hit_ratio" + }, + { + "BriefDescription": "Fraction of hits of a line in the M (exclusive) state on another core's L3 on the same chip per L1 load ref", + "MetricExpr": "PM_DATA_FROM_L31_MOD / PM_LD_REF_L1", + "MetricName": "l3_1_mod_ld_hit_ratio" + }, + { + "BriefDescription": "Fraction of hits of a line in the S state on another core's L3 on the same chip per L1 load ref", + "MetricExpr": "PM_DATA_FROM_L31_SHR / PM_LD_REF_L1", + "MetricName": "l3_1_shr_ld_hit_ratio" + }, + { + "BriefDescription": "Fraction of L3 load hits per load ref where the demand load collided with a pending prefetch", + "MetricExpr": "PM_DATA_FROM_L3_DISP_CONFLICT / PM_LD_REF_L1", + "MetricName": "l3_conflict_ld_hit_ratio" + }, + { + "BriefDescription": "Fraction of L3 load hits per L1 load ref", + "MetricExpr": "PM_DATA_FROM_L3 / PM_LD_REF_L1", + "MetricName": "l3_ld_hit_ratio" + }, + { + "BriefDescription": "Fraction of L3 load misses per L1 load ref", + "MetricExpr": "PM_DATA_FROM_L3MISS / PM_LD_REF_L1", + "MetricName": "l3_ld_miss_ratio" + }, + { + "BriefDescription": "Fraction of L3 load hits per load ref where the L3 did not experience a conflict", + "MetricExpr": "PM_DATA_FROM_L3_NO_CONFLICT / PM_LD_REF_L1", + "MetricName": "l3_no_conflict_ld_hit_ratio" + }, + { + "BriefDescription": "Fraction of L3 hits on lines that were not in the MEPF state per L1 load ref", + "MetricExpr": "(PM_DATA_FROM_L3 - PM_DATA_FROM_L3_MEPF) / PM_LD_REF_L1", + "MetricName": "l3other_ld_hit_ratio" + }, + { + "BriefDescription": "Fraction of L3 hits on lines that were recently prefetched into the L3 (MEPF state) per L1 load ref", + "MetricExpr": "PM_DATA_FROM_L3_MEPF / PM_LD_REF_L1", + "MetricName": "l3pref_ld_hit_ratio" + }, + { + "BriefDescription": "Fraction of hits on a local Centaur's cache per L1 load ref", + "MetricExpr": "PM_DATA_FROM_LL4 / PM_LD_REF_L1", + "MetricName": "ll4_ld_hit_ratio" + }, + { + "BriefDescription": "Fraction of hits on a local Centaur's DRAM per L1 load ref", + "MetricExpr": "PM_DATA_FROM_LMEM / PM_LD_REF_L1", + "MetricName": "lmem_ld_hit_ratio" + }, + { + "BriefDescription": "Fraction of hits on a local Centaur (L4 or DRAM) per L1 load ref", + "MetricExpr": "(PM_DATA_FROM_LMEM + PM_DATA_FROM_LL4) / PM_LD_REF_L1", + "MetricName": "local_centaur_ld_hit_ratio" + }, + { + "BriefDescription": "Cycles stalled by Other LSU Operations", + "MetricExpr": "(PM_CMPLU_STALL_LSU - PM_CMPLU_STALL_REJECT - PM_CMPLU_STALL_DCACHE_MISS - PM_CMPLU_STALL_STORE) / (PM_LD_REF_L1 - PM_LD_MISS_L1)", + "MetricName": "lsu_stall_avg_cyc_per_l1hit_stfw" + }, + { + "BriefDescription": "Fraction of hits on another core's L2 or L3 on a different chip (remote or distant) per L1 load ref", + "MetricExpr": "PM_DATA_FROM_OFF_CHIP_CACHE / PM_LD_REF_L1", + "MetricName": "off_chip_cache_ld_hit_ratio" + }, + { + "BriefDescription": "Fraction of hits on another core's L2 or L3 on the same chip per L1 load ref", + "MetricExpr": "PM_DATA_FROM_ON_CHIP_CACHE / PM_LD_REF_L1", + "MetricName": "on_chip_cache_ld_hit_ratio" + }, + { + "BriefDescription": "Fraction of hits on a remote chip's Centaur (L4 or DRAM) per L1 load ref", + "MetricExpr": "(PM_DATA_FROM_RMEM + PM_DATA_FROM_RL4) / PM_LD_REF_L1", + "MetricName": "remote_centaur_ld_hit_ratio" + }, + { + "BriefDescription": "Percent of all FXU/VSU instructions that got rejected because of unavailable resources or facilities", + "MetricExpr": "PM_ISU_REJECT_RES_NA *100/ PM_RUN_INST_CMPL", + "MetricName": "resource_na_reject_rate_percent" + }, + { + "BriefDescription": "Fraction of hits of a line in the M (exclusive) state on the L2 or L3 of a core on a remote chip per L1 load ref", + "MetricExpr": "PM_DATA_FROM_RL2L3_MOD / PM_LD_REF_L1", + "MetricName": "rl2l3_mod_ld_hit_ratio" + }, + { + "BriefDescription": "Fraction of hits of a line in the S state on the L2 or L3 of a core on a remote chip per L1 load ref", + "MetricExpr": "PM_DATA_FROM_RL2L3_SHR / PM_LD_REF_L1", + "MetricName": "rl2l3_shr_ld_hit_ratio" + }, + { + "BriefDescription": "Fraction of hits on a remote Centaur's cache per L1 load ref", + "MetricExpr": "PM_DATA_FROM_RL4 / PM_LD_REF_L1", + "MetricName": "rl4_ld_hit_ratio" + }, + { + "BriefDescription": "Fraction of hits on a remote Centaur's DRAM per L1 load ref", + "MetricExpr": "PM_DATA_FROM_RMEM / PM_LD_REF_L1", + "MetricName": "rmem_ld_hit_ratio" + }, + { + "BriefDescription": "Percent of all FXU/VSU instructions that got rejected due to SAR Bypass", + "MetricExpr": "PM_ISU_REJECT_SAR_BYPASS *100/ PM_RUN_INST_CMPL", + "MetricName": "sar_bypass_reject_rate_percent" + }, + { + "BriefDescription": "Percent of all FXU/VSU instructions that got rejected because of unavailable sources", + "MetricExpr": "PM_ISU_REJECT_SRC_NA *100/ PM_RUN_INST_CMPL", + "MetricName": "source_na_reject_rate_percent" + }, + { + "BriefDescription": "Store forward rate", + "MetricExpr": "100 * (PM_LSU0_SRQ_STFWD + PM_LSU1_SRQ_STFWD) / PM_RUN_INST_CMPL", + "MetricName": "store_forward_rate_percent" + }, + { + "BriefDescription": "Store forward rate", + "MetricExpr": "100 * (PM_LSU0_SRQ_STFWD + PM_LSU1_SRQ_STFWD) / (PM_LD_REF_L1 - PM_LD_MISS_L1)", + "MetricName": "store_forward_ratio_percent" + }, + { + "BriefDescription": "Marked store latency, from core completion to L2 RC machine completion", + "MetricExpr": "(PM_MRK_ST_L2DISP_TO_CMPL_CYC + PM_MRK_ST_DRAIN_TO_L2DISP_CYC) / PM_MRK_ST_NEST", + "MetricName": "store_latency" + }, + { + "BriefDescription": "Cycles stalled by any sync", + "MetricExpr": "(PM_CMPLU_STALL_LWSYNC + PM_CMPLU_STALL_HWSYNC) / PM_RUN_INST_CMPL", + "MetricName": "sync_stall_cpi" + }, + { + "BriefDescription": "Percentage of lines that were prefetched into the L3 and evicted before they were consumed", + "MetricExpr": "(PM_L3_CO_MEPF / 2) / PM_L3_PREF_ALL * 100", + "MetricName": "wasted_l3_prefetch_percent" } ] -- cgit v1.2.3-55-g7522 From 7f3cf5ac7743f924753a2c75fd71317e418844d0 Mon Sep 17 00:00:00 2001 From: Paul Clarke Date: Sat, 9 Feb 2019 13:14:26 -0500 Subject: perf vendor events power9: Cpi_breakdown & estimated_dcache_miss_cpi metrics Descriptions of metrics for POWER9 processors can be found in the "POWER9 Performance Monitor Unit User’s Guide", which is currently available on the "IBM Portal for OpenPOWER" (https://www-355.ibm.com/systems/power/openpower/welcome.xhtml) at https://www-355.ibm.com/systems/power/openpower/posting.xhtml?postingId=4948CDE1963C9BCA852582F800718190 This patch is for metric groups: - cpi_breakdown - estimated_dcache_miss_cpi Signed-off-by: Paul Clarke Cc: Ananth N Mavinakayanahalli Cc: Carl Love Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Naveen N. Rao Cc: Sukadev Bhattiprolu Cc: linuxppc-dev@ozlabs.org Link: http://lkml.kernel.org/r/20190209181429.23950-2-pc@us.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- .../pmu-events/arch/powerpc/power9/metrics.json | 577 +++++++++++++++++++++ 1 file changed, 577 insertions(+) create mode 100644 tools/perf/pmu-events/arch/powerpc/power9/metrics.json diff --git a/tools/perf/pmu-events/arch/powerpc/power9/metrics.json b/tools/perf/pmu-events/arch/powerpc/power9/metrics.json new file mode 100644 index 000000000000..cd46ebb8da6a --- /dev/null +++ b/tools/perf/pmu-events/arch/powerpc/power9/metrics.json @@ -0,0 +1,577 @@ +[ + { + "BriefDescription": "Completion stall due to a Branch Unit", + "MetricExpr": "PM_CMPLU_STALL_BRU/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "bru_stall_cpi" + }, + { + "BriefDescription": "Finish stall because the NTF instruction was routed to the crypto execution pipe and was waiting to finish", + "MetricExpr": "PM_CMPLU_STALL_CRYPTO/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "crypto_stall_cpi" + }, + { + "BriefDescription": "Finish stall because the NTF instruction was a load that missed the L1 and was waiting for the data to return from the nest", + "MetricExpr": "PM_CMPLU_STALL_DCACHE_MISS/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "dcache_miss_stall_cpi" + }, + { + "BriefDescription": "Finish stall because the NTF instruction was a multi-cycle instruction issued to the Decimal Floating Point execution pipe and waiting to finish.", + "MetricExpr": "PM_CMPLU_STALL_DFLONG/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "dflong_stall_cpi" + }, + { + "BriefDescription": "Stalls due to short latency decimal floating ops.", + "MetricExpr": "(PM_CMPLU_STALL_DFU - PM_CMPLU_STALL_DFLONG)/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "dfu_other_stall_cpi" + }, + { + "BriefDescription": "Finish stall because the NTF instruction was issued to the Decimal Floating Point execution pipe and waiting to finish.", + "MetricExpr": "PM_CMPLU_STALL_DFU/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "dfu_stall_cpi" + }, + { + "BriefDescription": "Completion stall by Dcache miss which resolved off node memory/cache", + "MetricExpr": "(PM_CMPLU_STALL_DMISS_L3MISS - PM_CMPLU_STALL_DMISS_L21_L31 - PM_CMPLU_STALL_DMISS_LMEM - PM_CMPLU_STALL_DMISS_REMOTE)/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "dmiss_distant_stall_cpi" + }, + { + "BriefDescription": "Completion stall by Dcache miss which resolved on chip ( excluding local L2/L3)", + "MetricExpr": "PM_CMPLU_STALL_DMISS_L21_L31/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "dmiss_l21_l31_stall_cpi" + }, + { + "BriefDescription": "Completion stall due to cache miss that resolves in the L2 or L3 with a conflict", + "MetricExpr": "PM_CMPLU_STALL_DMISS_L2L3_CONFLICT/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "dmiss_l2l3_conflict_stall_cpi" + }, + { + "BriefDescription": "Completion stall due to cache miss that resolves in the L2 or L3 without conflict", + "MetricExpr": "(PM_CMPLU_STALL_DMISS_L2L3 - PM_CMPLU_STALL_DMISS_L2L3_CONFLICT)/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "dmiss_l2l3_noconflict_stall_cpi" + }, + { + "BriefDescription": "Completion stall by Dcache miss which resolved in L2/L3", + "MetricExpr": "PM_CMPLU_STALL_DMISS_L2L3/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "dmiss_l2l3_stall_cpi" + }, + { + "BriefDescription": "Completion stall due to cache miss resolving missed the L3", + "MetricExpr": "PM_CMPLU_STALL_DMISS_L3MISS/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "dmiss_l3miss_stall_cpi" + }, + { + "BriefDescription": "Completion stall due to cache miss that resolves in local memory", + "MetricExpr": "PM_CMPLU_STALL_DMISS_LMEM/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "dmiss_lmem_stall_cpi" + }, + { + "BriefDescription": "Completion stall by Dcache miss which resolved outside of local memory", + "MetricExpr": "(PM_CMPLU_STALL_DMISS_L3MISS - PM_CMPLU_STALL_DMISS_L21_L31 - PM_CMPLU_STALL_DMISS_LMEM)/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "dmiss_non_local_stall_cpi" + }, + { + "BriefDescription": "Completion stall by Dcache miss which resolved from remote chip (cache or memory)", + "MetricExpr": "PM_CMPLU_STALL_DMISS_REMOTE/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "dmiss_remote_stall_cpi" + }, + { + "BriefDescription": "Stalls due to short latency double precision ops.", + "MetricExpr": "(PM_CMPLU_STALL_DP - PM_CMPLU_STALL_DPLONG)/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "dp_other_stall_cpi" + }, + { + "BriefDescription": "Finish stall because the NTF instruction was a scalar instruction issued to the Double Precision execution pipe and waiting to finish. Includes binary floating point instructions in 32 and 64 bit binary floating point format.", + "MetricExpr": "PM_CMPLU_STALL_DP/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "dp_stall_cpi" + }, + { + "BriefDescription": "Finish stall because the NTF instruction was a scalar multi-cycle instruction issued to the Double Precision execution pipe and waiting to finish. Includes binary floating point instructions in 32 and 64 bit binary floating point format.", + "MetricExpr": "PM_CMPLU_STALL_DPLONG/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "dplong_stall_cpi" + }, + { + "BriefDescription": "Finish stall because the NTF instruction is an EIEIO waiting for response from L2", + "MetricExpr": "PM_CMPLU_STALL_EIEIO/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "eieio_stall_cpi" + }, + { + "BriefDescription": "Finish stall because the next to finish instruction suffered an ERAT miss and the EMQ was full", + "MetricExpr": "PM_CMPLU_STALL_EMQ_FULL/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "emq_full_stall_cpi" + }, + { + "MetricExpr": "(PM_CMPLU_STALL_ERAT_MISS + PM_CMPLU_STALL_EMQ_FULL)/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "emq_stall_cpi" + }, + { + "BriefDescription": "Finish stall because the NTF instruction was a load or store that suffered a translation miss", + "MetricExpr": "PM_CMPLU_STALL_ERAT_MISS/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "erat_miss_stall_cpi" + }, + { + "BriefDescription": "Cycles in which the NTC instruction is not allowed to complete because it was interrupted by ANY exception, which has to be serviced before the instruction can complete", + "MetricExpr": "PM_CMPLU_STALL_EXCEPTION/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "exception_stall_cpi" + }, + { + "BriefDescription": "Completion stall due to execution units for other reasons.", + "MetricExpr": "(PM_CMPLU_STALL_EXEC_UNIT - PM_CMPLU_STALL_FXU - PM_CMPLU_STALL_DP - PM_CMPLU_STALL_DFU - PM_CMPLU_STALL_PM - PM_CMPLU_STALL_CRYPTO - PM_CMPLU_STALL_VFXU - PM_CMPLU_STALL_VDP)/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "exec_unit_other_stall_cpi" + }, + { + "BriefDescription": "Completion stall due to execution units (FXU/VSU/CRU)", + "MetricExpr": "PM_CMPLU_STALL_EXEC_UNIT/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "exec_unit_stall_cpi" + }, + { + "BriefDescription": "Cycles in which the NTC instruction is not allowed to complete because any of the 4 threads in the same core suffered a flush, which blocks completion", + "MetricExpr": "PM_CMPLU_STALL_FLUSH_ANY_THREAD/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "flush_any_thread_stall_cpi" + }, + { + "BriefDescription": "Completion stall due to a long latency scalar fixed point instruction (division, square root)", + "MetricExpr": "PM_CMPLU_STALL_FXLONG/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "fxlong_stall_cpi" + }, + { + "BriefDescription": "Stalls due to short latency integer ops", + "MetricExpr": "(PM_CMPLU_STALL_FXU - PM_CMPLU_STALL_FXLONG)/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "fxu_other_stall_cpi" + }, + { + "BriefDescription": "Finish stall due to a scalar fixed point or CR instruction in the execution pipeline. These instructions get routed to the ALU, ALU2, and DIV pipes", + "MetricExpr": "PM_CMPLU_STALL_FXU/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "fxu_stall_cpi" + }, + { + "MetricExpr": "(PM_NTC_ISSUE_HELD_DARQ_FULL + PM_NTC_ISSUE_HELD_ARB + PM_NTC_ISSUE_HELD_OTHER)/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "issue_hold_cpi" + }, + { + "BriefDescription": "Finish stall because the NTF instruction was a larx waiting to be satisfied", + "MetricExpr": "PM_CMPLU_STALL_LARX/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "larx_stall_cpi" + }, + { + "BriefDescription": "Finish stall because the NTF instruction was a load that hit on an older store and it was waiting for store data", + "MetricExpr": "PM_CMPLU_STALL_LHS/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "lhs_stall_cpi" + }, + { + "BriefDescription": "Finish stall because the NTF instruction was a load that missed in the L1 and the LMQ was unable to accept this load miss request because it was full", + "MetricExpr": "PM_CMPLU_STALL_LMQ_FULL/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "lmq_full_stall_cpi" + }, + { + "BriefDescription": "Finish stall because the NTF instruction was a load instruction with all its dependencies satisfied just going through the LSU pipe to finish", + "MetricExpr": "PM_CMPLU_STALL_LOAD_FINISH/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "load_finish_stall_cpi" + }, + { + "BriefDescription": "Finish stall because the NTF instruction was a load that was held in LSAQ because the LRQ was full", + "MetricExpr": "PM_CMPLU_STALL_LRQ_FULL/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "lrq_full_stall_cpi" + }, + { + "BriefDescription": "Finish stall due to LRQ miscellaneous reasons, lost arbitration to LMQ slot, bank collisions, set prediction cleanup, set prediction multihit and others", + "MetricExpr": "PM_CMPLU_STALL_LRQ_OTHER/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "lrq_other_stall_cpi" + }, + { + "MetricExpr": "(PM_CMPLU_STALL_LMQ_FULL + PM_CMPLU_STALL_ST_FWD + PM_CMPLU_STALL_LHS + PM_CMPLU_STALL_LSU_MFSPR + PM_CMPLU_STALL_LARX + PM_CMPLU_STALL_LRQ_OTHER)/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "lrq_stall_cpi" + }, + { + "BriefDescription": "Finish stall because the NTF instruction was a load or store that was held in LSAQ because an older instruction from SRQ or LRQ won arbitration to the LSU pipe when this instruction tried to launch", + "MetricExpr": "PM_CMPLU_STALL_LSAQ_ARB/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "lsaq_arb_stall_cpi" + }, + { + "MetricExpr": "(PM_CMPLU_STALL_LRQ_FULL + PM_CMPLU_STALL_SRQ_FULL + PM_CMPLU_STALL_LSAQ_ARB)/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "lsaq_stall_cpi" + }, + { + "BriefDescription": "Finish stall because the NTF instruction was an LSU op (other than a load or a store) with all its dependencies met and just going through the LSU pipe to finish", + "MetricExpr": "PM_CMPLU_STALL_LSU_FIN/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "lsu_fin_stall_cpi" + }, + { + "BriefDescription": "Completion stall of one cycle because the LSU requested to flush the next iop in the sequence. It takes 1 cycle for the ISU to process this request before the LSU instruction is allowed to complete", + "MetricExpr": "PM_CMPLU_STALL_LSU_FLUSH_NEXT/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "lsu_flush_next_stall_cpi" + }, + { + "BriefDescription": "Finish stall because the NTF instruction was a mfspr instruction targeting an LSU SPR and it was waiting for the register data to be returned", + "MetricExpr": "PM_CMPLU_STALL_LSU_MFSPR/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "lsu_mfspr_stall_cpi" + }, + { + "BriefDescription": "Completion LSU stall for other reasons", + "MetricExpr": "(PM_CMPLU_STALL_LSU - PM_CMPLU_STALL_LSU_FIN - PM_CMPLU_STALL_STORE_FINISH - PM_CMPLU_STALL_STORE_DATA - PM_CMPLU_STALL_EIEIO - PM_CMPLU_STALL_STCX - PM_CMPLU_STALL_SLB - PM_CMPLU_STALL_TEND - PM_CMPLU_STALL_PASTE - PM_CMPLU_STALL_TLBIE - PM_CMPLU_STALL_STORE_PIPE_ARB - PM_CMPLU_STALL_STORE_FIN_ARB - PM_CMPLU_STALL_LOAD_FINISH + PM_CMPLU_STALL_DCACHE_MISS - PM_CMPLU_STALL_LMQ_FULL - PM_CMPLU_STALL_ST_FWD - PM_CMPLU_STALL_LHS - PM_CMPLU_STALL_LSU_MFSPR - PM_CMPLU_STALL_LARX - PM_CMPLU_STALL_LRQ_OTHER + PM_CMPLU_STALL_ERAT_MISS + PM_CMPLU_STALL_EMQ_FULL - PM_CMPLU_STALL_LRQ_FULL - PM_CMPLU_STALL_SRQ_FULL - PM_CMPLU_STALL_LSAQ_ARB) / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "lsu_other_stall_cpi" + }, + { + "BriefDescription": "Completion stall by LSU instruction", + "MetricExpr": "PM_CMPLU_STALL_LSU/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "lsu_stall_cpi" + }, + { + "BriefDescription": "Completion stall because the ISU is updating the register and notifying the Effective Address Table (EAT)", + "MetricExpr": "PM_CMPLU_STALL_MTFPSCR/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "mtfpscr_stall_cpi" + }, + { + "BriefDescription": "Completion stall because the ISU is updating the TEXASR to keep track of the nested tbegin. This is a short delay, and it includes ROT", + "MetricExpr": "PM_CMPLU_STALL_NESTED_TBEGIN/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "nested_tbegin_stall_cpi" + }, + { + "BriefDescription": "Completion stall because the ISU is updating the TEXASR to keep track of the nested tend and decrement the TEXASR nested level. This is a short delay", + "MetricExpr": "PM_CMPLU_STALL_NESTED_TEND/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "nested_tend_stall_cpi" + }, + { + "BriefDescription": "Number of cycles the ICT has no itags assigned to this thread", + "MetricExpr": "PM_ICT_NOSLOT_CYC/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "nothing_dispatched_cpi" + }, + { + "BriefDescription": "Finish stall because the NTF instruction was one that must finish at dispatch.", + "MetricExpr": "PM_CMPLU_STALL_NTC_DISP_FIN/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "ntc_disp_fin_stall_cpi" + }, + { + "BriefDescription": "Cycles in which the oldest instruction in the pipeline (NTC) finishes. This event is used to account for cycles in which work is being completed in the CPI stack", + "MetricExpr": "PM_NTC_FIN/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "ntc_fin_cpi" + }, + { + "BriefDescription": "Completion stall due to ntc flush", + "MetricExpr": "PM_CMPLU_STALL_NTC_FLUSH/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "ntc_flush_stall_cpi" + }, + { + "BriefDescription": "The NTC instruction is being held at dispatch because it lost arbitration onto the issue pipe to another instruction (from the same thread or a different thread)", + "MetricExpr": "PM_NTC_ISSUE_HELD_ARB/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "ntc_issue_held_arb_cpi" + }, + { + "BriefDescription": "The NTC instruction is being held at dispatch because there are no slots in the DARQ for it", + "MetricExpr": "PM_NTC_ISSUE_HELD_DARQ_FULL/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "ntc_issue_held_darq_full_cpi" + }, + { + "BriefDescription": "The NTC instruction is being held at dispatch during regular pipeline cycles, or because the VSU is busy with multi-cycle instructions, or because of a write-back collision with VSU", + "MetricExpr": "PM_NTC_ISSUE_HELD_OTHER/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "ntc_issue_held_other_cpi" + }, + { + "BriefDescription": "Cycles unaccounted for.", + "MetricExpr": "(PM_RUN_CYC - PM_1PLUS_PPC_CMPL - PM_CMPLU_STALL_THRD - PM_CMPLU_STALL - PM_ICT_NOSLOT_CYC)/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "other_cpi" + }, + { + "BriefDescription": "Completion stall for other reasons", + "MetricExpr": "PM_CMPLU_STALL - PM_CMPLU_STALL_NTC_DISP_FIN - PM_CMPLU_STALL_NTC_FLUSH - PM_CMPLU_STALL_LSU - PM_CMPLU_STALL_EXEC_UNIT - PM_CMPLU_STALL_BRU)/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "other_stall_cpi" + }, + { + "BriefDescription": "Finish stall because the NTF instruction was a paste waiting for response from L2", + "MetricExpr": "PM_CMPLU_STALL_PASTE/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "paste_stall_cpi" + }, + { + "BriefDescription": "Finish stall because the NTF instruction was issued to the Permute execution pipe and waiting to finish.", + "MetricExpr": "PM_CMPLU_STALL_PM/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "pm_stall_cpi" + }, + { + "BriefDescription": "Run cycles per run instruction", + "MetricExpr": "PM_RUN_CYC / PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "run_cpi" + }, + { + "BriefDescription": "Run_cycles", + "MetricExpr": "PM_RUN_CYC/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "run_cyc_cpi" + }, + { + "MetricExpr": "(PM_CMPLU_STALL_FXU + PM_CMPLU_STALL_DP + PM_CMPLU_STALL_DFU + PM_CMPLU_STALL_PM + PM_CMPLU_STALL_CRYPTO)/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "scalar_stall_cpi" + }, + { + "BriefDescription": "Finish stall because the NTF instruction was awaiting L2 response for an SLB", + "MetricExpr": "PM_CMPLU_STALL_SLB/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "slb_stall_cpi" + }, + { + "BriefDescription": "Finish stall while waiting for the non-speculative finish of either a stcx waiting for its result or a load waiting for non-critical sectors of data and ECC", + "MetricExpr": "PM_CMPLU_STALL_SPEC_FINISH/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "spec_finish_stall_cpi" + }, + { + "BriefDescription": "Finish stall because the NTF instruction was a store that was held in LSAQ because the SRQ was full", + "MetricExpr": "PM_CMPLU_STALL_SRQ_FULL/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "srq_full_stall_cpi" + }, + { + "MetricExpr": "(PM_CMPLU_STALL_STORE_DATA + PM_CMPLU_STALL_EIEIO + PM_CMPLU_STALL_STCX + PM_CMPLU_STALL_SLB + PM_CMPLU_STALL_TEND + PM_CMPLU_STALL_PASTE + PM_CMPLU_STALL_TLBIE + PM_CMPLU_STALL_STORE_PIPE_ARB + PM_CMPLU_STALL_STORE_FIN_ARB)/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "srq_stall_cpi" + }, + { + "BriefDescription": "Completion stall due to store forward", + "MetricExpr": "PM_CMPLU_STALL_ST_FWD/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "st_fwd_stall_cpi" + }, + { + "BriefDescription": "Nothing completed and ICT not empty", + "MetricExpr": "PM_CMPLU_STALL/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "stall_cpi" + }, + { + "BriefDescription": "Finish stall because the NTF instruction was a stcx waiting for response from L2", + "MetricExpr": "PM_CMPLU_STALL_STCX/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "stcx_stall_cpi" + }, + { + "BriefDescription": "Finish stall because the next to finish instruction was a store waiting on data", + "MetricExpr": "PM_CMPLU_STALL_STORE_DATA/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "store_data_stall_cpi" + }, + { + "BriefDescription": "Finish stall because the NTF instruction was a store waiting for a slot in the store finish pipe. This means the instruction is ready to finish but there are instructions ahead of it, using the finish pipe", + "MetricExpr": "PM_CMPLU_STALL_STORE_FIN_ARB/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "store_fin_arb_stall_cpi" + }, + { + "BriefDescription": "Finish stall because the NTF instruction was a store with all its dependencies met, just waiting to go through the LSU pipe to finish", + "MetricExpr": "PM_CMPLU_STALL_STORE_FINISH/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "store_finish_stall_cpi" + }, + { + "BriefDescription": "Finish stall because the NTF instruction was a store waiting for the next relaunch opportunity after an internal reject. This means the instruction is ready to relaunch and tried once but lost arbitration", + "MetricExpr": "PM_CMPLU_STALL_STORE_PIPE_ARB/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "store_pipe_arb_stall_cpi" + }, + { + "BriefDescription": "Finish stall because the NTF instruction was a tend instruction awaiting response from L2", + "MetricExpr": "PM_CMPLU_STALL_TEND/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "tend_stall_cpi" + }, + { + "BriefDescription": "Completion Stalled because the thread was blocked", + "MetricExpr": "PM_CMPLU_STALL_THRD/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "thread_block_stall_cpi" + }, + { + "BriefDescription": "Finish stall because the NTF instruction was a tlbie waiting for response from L2", + "MetricExpr": "PM_CMPLU_STALL_TLBIE/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "tlbie_stall_cpi" + }, + { + "BriefDescription": "Vector stalls due to small latency double precision ops", + "MetricExpr": "(PM_CMPLU_STALL_VDP - PM_CMPLU_STALL_VDPLONG)/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "vdp_other_stall_cpi" + }, + { + "BriefDescription": "Finish stall because the NTF instruction was a vector instruction issued to the Double Precision execution pipe and waiting to finish.", + "MetricExpr": "PM_CMPLU_STALL_VDP/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "vdp_stall_cpi" + }, + { + "BriefDescription": "Finish stall because the NTF instruction was a scalar multi-cycle instruction issued to the Double Precision execution pipe and waiting to finish. Includes binary floating point instructions in 32 and 64 bit binary floating point format.", + "MetricExpr": "PM_CMPLU_STALL_VDPLONG/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "vdplong_stall_cpi" + }, + { + "MetricExpr": "(PM_CMPLU_STALL_VFXU + PM_CMPLU_STALL_VDP)/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "vector_stall_cpi" + }, + { + "BriefDescription": "Completion stall due to a long latency vector fixed point instruction (division, square root)", + "MetricExpr": "PM_CMPLU_STALL_VFXLONG/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "vfxlong_stall_cpi" + }, + { + "BriefDescription": "Vector stalls due to small latency integer ops", + "MetricExpr": "(PM_CMPLU_STALL_VFXU - PM_CMPLU_STALL_VFXLONG)/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "vfxu_other_stall_cpi" + }, + { + "BriefDescription": "Finish stall due to a vector fixed point instruction in the execution pipeline. These instructions get routed to the ALU, ALU2, and DIV pipes", + "MetricExpr": "PM_CMPLU_STALL_VFXU/PM_RUN_INST_CMPL", + "MetricGroup": "cpi_breakdown", + "MetricName": "vfxu_stall_cpi" + }, + { + "BriefDescription": "estimate of dl2l3 distant MOD miss rates with measured DL2L3 MOD latency as a %of dcache miss cpi", + "MetricExpr": "PM_DATA_FROM_DL2L3_MOD * PM_MRK_DATA_FROM_DL2L3_MOD_CYC / PM_MRK_DATA_FROM_DL2L3_MOD / PM_CMPLU_STALL_DCACHE_MISS *100", + "MetricGroup": "estimated_dcache_miss_cpi", + "MetricName": "dl2l3_mod_cpi_percent" + }, + { + "BriefDescription": "estimate of dl2l3 distant SHR miss rates with measured DL2L3 SHR latency as a %of dcache miss cpi", + "MetricExpr": "PM_DATA_FROM_DL2L3_SHR * PM_MRK_DATA_FROM_DL2L3_SHR_CYC / PM_MRK_DATA_FROM_DL2L3_SHR / PM_CMPLU_STALL_DCACHE_MISS *100", + "MetricGroup": "estimated_dcache_miss_cpi", + "MetricName": "dl2l3_shr_cpi_percent" + }, + { + "BriefDescription": "estimate of distant L4 miss rates with measured DL4 latency as a %of dcache miss cpi", + "MetricExpr": "PM_DATA_FROM_DL4 * PM_MRK_DATA_FROM_DL4_CYC / PM_MRK_DATA_FROM_DL4 / PM_CMPLU_STALL_DCACHE_MISS *100", + "MetricGroup": "estimated_dcache_miss_cpi", + "MetricName": "dl4_cpi_percent" + }, + { + "BriefDescription": "estimate of distant memory miss rates with measured DMEM latency as a %of dcache miss cpi", + "MetricExpr": "PM_DATA_FROM_DMEM * PM_MRK_DATA_FROM_DMEM_CYC / PM_MRK_DATA_FROM_DMEM / PM_CMPLU_STALL_DCACHE_MISS *100", + "MetricGroup": "estimated_dcache_miss_cpi", + "MetricName": "dmem_cpi_percent" + }, + { + "BriefDescription": "estimate of dl21 MOD miss rates with measured L21 MOD latency as a %of dcache miss cpi", + "MetricExpr": "PM_DATA_FROM_L21_MOD * PM_MRK_DATA_FROM_L21_MOD_CYC / PM_MRK_DATA_FROM_L21_MOD / PM_CMPLU_STALL_DCACHE_MISS *100", + "MetricGroup": "estimated_dcache_miss_cpi", + "MetricName": "l21_mod_cpi_percent" + }, + { + "BriefDescription": "estimate of dl21 SHR miss rates with measured L21 SHR latency as a %of dcache miss cpi", + "MetricExpr": "PM_DATA_FROM_L21_SHR * PM_MRK_DATA_FROM_L21_SHR_CYC / PM_MRK_DATA_FROM_L21_SHR / PM_CMPLU_STALL_DCACHE_MISS *100", + "MetricGroup": "estimated_dcache_miss_cpi", + "MetricName": "l21_shr_cpi_percent" + }, + { + "BriefDescription": "estimate of dl2 miss rates with measured L2 latency as a %of dcache miss cpi", + "MetricExpr": "PM_DATA_FROM_L2 * PM_MRK_DATA_FROM_L2_CYC / PM_MRK_DATA_FROM_L2 / PM_CMPLU_STALL_DCACHE_MISS *100", + "MetricGroup": "estimated_dcache_miss_cpi", + "MetricName": "l2_cpi_percent" + }, + { + "BriefDescription": "estimate of dl31 MOD miss rates with measured L31 MOD latency as a %of dcache miss cpi", + "MetricExpr": "PM_DATA_FROM_L31_MOD * PM_MRK_DATA_FROM_L31_MOD_CYC / PM_MRK_DATA_FROM_L31_MOD / PM_CMPLU_STALL_DCACHE_MISS *100", + "MetricGroup": "estimated_dcache_miss_cpi", + "MetricName": "l31_mod_cpi_percent" + }, + { + "BriefDescription": "estimate of dl31 SHR miss rates with measured L31 SHR latency as a %of dcache miss cpi", + "MetricExpr": "PM_DATA_FROM_L31_SHR * PM_MRK_DATA_FROM_L31_SHR_CYC / PM_MRK_DATA_FROM_L31_SHR / PM_CMPLU_STALL_DCACHE_MISS *100", + "MetricGroup": "estimated_dcache_miss_cpi", + "MetricName": "l31_shr_cpi_percent" + }, + { + "BriefDescription": "estimate of dl3 miss rates with measured L3 latency as a % of dcache miss cpi", + "MetricExpr": "PM_DATA_FROM_L3 * PM_MRK_DATA_FROM_L3_CYC / PM_MRK_DATA_FROM_L3 / PM_CMPLU_STALL_DCACHE_MISS * 100", + "MetricGroup": "estimated_dcache_miss_cpi", + "MetricName": "l3_cpi_percent" + }, + { + "BriefDescription": "estimate of Local memory miss rates with measured LMEM latency as a %of dcache miss cpi", + "MetricExpr": "PM_DATA_FROM_LMEM * PM_MRK_DATA_FROM_LMEM_CYC / PM_MRK_DATA_FROM_LMEM / PM_CMPLU_STALL_DCACHE_MISS *100", + "MetricGroup": "estimated_dcache_miss_cpi", + "MetricName": "lmem_cpi_percent" + }, + { + "BriefDescription": "estimate of dl2l3 remote MOD miss rates with measured RL2L3 MOD latency as a %of dcache miss cpi", + "MetricExpr": "PM_DATA_FROM_RL2L3_MOD * PM_MRK_DATA_FROM_RL2L3_MOD_CYC / PM_MRK_DATA_FROM_RL2L3_MOD / PM_CMPLU_STALL_DCACHE_MISS *100", + "MetricGroup": "estimated_dcache_miss_cpi", + "MetricName": "rl2l3_mod_cpi_percent" + }, + { + "BriefDescription": "estimate of dl2l3 shared miss rates with measured RL2L3 SHR latency as a %of dcache miss cpi", + "MetricExpr": "PM_DATA_FROM_RL2L3_SHR * PM_MRK_DATA_FROM_RL2L3_SHR_CYC / PM_MRK_DATA_FROM_RL2L3_SHR / PM_CMPLU_STALL_DCACHE_MISS * 100", + "MetricGroup": "estimated_dcache_miss_cpi", + "MetricName": "rl2l3_shr_cpi_percent" + }, + { + "BriefDescription": "estimate of remote L4 miss rates with measured RL4 latency as a %of dcache miss cpi", + "MetricExpr": "PM_DATA_FROM_RL4 * PM_MRK_DATA_FROM_RL4_CYC / PM_MRK_DATA_FROM_RL4 / PM_CMPLU_STALL_DCACHE_MISS *100", + "MetricGroup": "estimated_dcache_miss_cpi", + "MetricName": "rl4_cpi_percent" + }, + { + "BriefDescription": "estimate of remote memory miss rates with measured RMEM latency as a %of dcache miss cpi", + "MetricExpr": "PM_DATA_FROM_RMEM * PM_MRK_DATA_FROM_RMEM_CYC / PM_MRK_DATA_FROM_RMEM / PM_CMPLU_STALL_DCACHE_MISS *100", + "MetricGroup": "estimated_dcache_miss_cpi", + "MetricName": "rmem_cpi_percent" + } +] -- cgit v1.2.3-55-g7522 From 0133491d4641b493aa0d0e5bd66b52999619cd8a Mon Sep 17 00:00:00 2001 From: Paul Clarke Date: Sat, 9 Feb 2019 13:14:27 -0500 Subject: perf vendor events power9: Dl1_reloads, instruction_misses, l[23]_stats & pteg_reloads metrics Descriptions of metrics for POWER9 processors can be found in the "POWER9 Performance Monitor Unit User’s Guide", which is currently available on the "IBM Portal for OpenPOWER" (https://www-355.ibm.com/systems/power/openpower/welcome.xhtml) at https://www-355.ibm.com/systems/power/openpower/posting.xhtml?postingId=4948CDE1963C9BCA852582F800718190 This patch is for metric groups: - dl1_reloads_percent_per_inst - dl1_reloads_percent_per_ref - instruction_misses_percent_per_inst - l2_stats - l3_stats - pteg_reloads_percent_per_inst - pteg_reloads_percent_per_ref Signed-off-by: Paul Clarke Cc: Ananth N Mavinakayanahalli Cc: Carl Love Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Naveen N. Rao Cc: Sukadev Bhattiprolu Cc: linuxppc-dev@ozlabs.org Link: http://lkml.kernel.org/r/20190209181429.23950-3-pc@us.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- .../pmu-events/arch/powerpc/power9/metrics.json | 660 +++++++++++++++++++++ 1 file changed, 660 insertions(+) diff --git a/tools/perf/pmu-events/arch/powerpc/power9/metrics.json b/tools/perf/pmu-events/arch/powerpc/power9/metrics.json index cd46ebb8da6a..166f95518c45 100644 --- a/tools/perf/pmu-events/arch/powerpc/power9/metrics.json +++ b/tools/perf/pmu-events/arch/powerpc/power9/metrics.json @@ -484,6 +484,210 @@ "MetricGroup": "cpi_breakdown", "MetricName": "vfxu_stall_cpi" }, + { + "BriefDescription": "% of DL1 Reloads from Distant L2 or L3 (Modified) per Inst", + "MetricExpr": "PM_DATA_FROM_DL2L3_MOD * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "dl1_reload_from_dl2l3_mod_rate_percent" + }, + { + "BriefDescription": "% of DL1 Reloads from Distant L2 or L3 (Shared) per Inst", + "MetricExpr": "PM_DATA_FROM_DL2L3_SHR * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "dl1_reload_from_dl2l3_shr_rate_percent" + }, + { + "BriefDescription": "% of DL1 Reloads from Distant Memory per Inst", + "MetricExpr": "PM_DATA_FROM_DMEM * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "dl1_reload_from_dmem_rate_percent" + }, + { + "BriefDescription": "% of DL1 reloads from Private L2, other core per Inst", + "MetricExpr": "PM_DATA_FROM_L21_MOD * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "dl1_reload_from_l21_mod_rate_percent" + }, + { + "BriefDescription": "% of DL1 reloads from Private L2, other core per Inst", + "MetricExpr": "PM_DATA_FROM_L21_SHR * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "dl1_reload_from_l21_shr_rate_percent" + }, + { + "BriefDescription": "% of DL1 reloads from L2 per Inst", + "MetricExpr": "PM_DATA_FROM_L2MISS * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "dl1_reload_from_l2_miss_rate_percent" + }, + { + "BriefDescription": "% of DL1 reloads from L2 per Inst", + "MetricExpr": "PM_DATA_FROM_L2 * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "dl1_reload_from_l2_rate_percent" + }, + { + "BriefDescription": "% of DL1 reloads from Private L3 M state, other core per Inst", + "MetricExpr": "PM_DATA_FROM_L31_MOD * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "dl1_reload_from_l31_mod_rate_percent" + }, + { + "BriefDescription": "% of DL1 reloads from Private L3 S tate, other core per Inst", + "MetricExpr": "PM_DATA_FROM_L31_SHR * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "dl1_reload_from_l31_shr_rate_percent" + }, + { + "BriefDescription": "% of DL1 Reloads that came from the L3 and were brought into the L3 by a prefetch, per instruction completed", + "MetricExpr": "PM_DATA_FROM_L3_MEPF * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "dl1_reload_from_l3_mepf_rate_percent" + }, + { + "BriefDescription": "% of DL1 reloads from L3 per Inst", + "MetricExpr": "PM_DATA_FROM_L3MISS * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "dl1_reload_from_l3_miss_rate_percent" + }, + { + "BriefDescription": "% of DL1 Reloads from L3 per Inst", + "MetricExpr": "PM_DATA_FROM_L3 * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "dl1_reload_from_l3_rate_percent" + }, + { + "BriefDescription": "% of DL1 Reloads from Local Memory per Inst", + "MetricExpr": "PM_DATA_FROM_LMEM * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "dl1_reload_from_lmem_rate_percent" + }, + { + "BriefDescription": "% of DL1 reloads from Private L3, other core per Inst", + "MetricExpr": "PM_DATA_FROM_RL2L3_MOD * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "dl1_reload_from_rl2l3_mod_rate_percent" + }, + { + "BriefDescription": "% of DL1 reloads from Private L3, other core per Inst", + "MetricExpr": "PM_DATA_FROM_RL2L3_SHR * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "dl1_reload_from_rl2l3_shr_rate_percent" + }, + { + "BriefDescription": "% of DL1 Reloads from Remote Memory per Inst", + "MetricExpr": "PM_DATA_FROM_RMEM * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "dl1_reload_from_rmem_rate_percent" + }, + { + "BriefDescription": "Percentage of L1 demand load misses per run instruction", + "MetricExpr": "PM_LD_MISS_L1 * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "dl1_reloads_percent_per_inst", + "MetricName": "l1_ld_miss_rate_percent" + }, + { + "BriefDescription": "% of DL1 misses that result in a cache reload", + "MetricExpr": "PM_L1_DCACHE_RELOAD_VALID * 100 / PM_LD_MISS_L1", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_miss_reloads_percent" + }, + { + "BriefDescription": "% of DL1 dL1_Reloads from Distant L2 or L3 (Modified)", + "MetricExpr": "PM_DATA_FROM_DL2L3_MOD * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_reload_from_dl2l3_mod_percent" + }, + { + "BriefDescription": "% of DL1 dL1_Reloads from Distant L2 or L3 (Shared)", + "MetricExpr": "PM_DATA_FROM_DL2L3_SHR * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_reload_from_dl2l3_shr_percent" + }, + { + "BriefDescription": "% of DL1 dL1_Reloads from Distant Memory", + "MetricExpr": "PM_DATA_FROM_DMEM * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_reload_from_dmem_percent" + }, + { + "BriefDescription": "% of DL1 reloads from Private L2, other core", + "MetricExpr": "PM_DATA_FROM_L21_MOD * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_reload_from_l21_mod_percent" + }, + { + "BriefDescription": "% of DL1 reloads from Private L2, other core", + "MetricExpr": "PM_DATA_FROM_L21_SHR * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_reload_from_l21_shr_percent" + }, + { + "BriefDescription": "% of DL1 Reloads from sources beyond the local L2", + "MetricExpr": "PM_DATA_FROM_L2MISS * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_reload_from_l2_miss_percent" + }, + { + "BriefDescription": "% of DL1 reloads from L2", + "MetricExpr": "PM_DATA_FROM_L2 * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_reload_from_l2_percent" + }, + { + "BriefDescription": "% of DL1 reloads from Private L3, other core", + "MetricExpr": "PM_DATA_FROM_L31_MOD * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_reload_from_l31_mod_percent" + }, + { + "BriefDescription": "% of DL1 reloads from Private L3, other core", + "MetricExpr": "PM_DATA_FROM_L31_SHR * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_reload_from_l31_shr_percent" + }, + { + "BriefDescription": "% of DL1 Reloads that came from L3 and were brought into the L3 by a prefetch", + "MetricExpr": "PM_DATA_FROM_L3_MEPF * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_reload_from_l3_mepf_percent" + }, + { + "BriefDescription": "% of DL1 Reloads from sources beyond the local L3", + "MetricExpr": "PM_DATA_FROM_L3MISS * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_reload_from_l3_miss_percent" + }, + { + "BriefDescription": "% of DL1 Reloads from L3", + "MetricExpr": "PM_DATA_FROM_L3 * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_reload_from_l3_percent" + }, + { + "BriefDescription": "% of DL1 dL1_Reloads from Local Memory", + "MetricExpr": "PM_DATA_FROM_LMEM * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_reload_from_lmem_percent" + }, + { + "BriefDescription": "% of DL1 dL1_Reloads from Remote L2 or L3 (Modified)", + "MetricExpr": "PM_DATA_FROM_RL2L3_MOD * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_reload_from_rl2l3_mod_percent" + }, + { + "BriefDescription": "% of DL1 dL1_Reloads from Remote L2 or L3 (Shared)", + "MetricExpr": "PM_DATA_FROM_RL2L3_SHR * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_reload_from_rl2l3_shr_percent" + }, + { + "BriefDescription": "% of DL1 dL1_Reloads from Remote Memory", + "MetricExpr": "PM_DATA_FROM_RMEM * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricGroup": "dl1_reloads_percent_per_ref", + "MetricName": "dl1_reload_from_rmem_percent" + }, { "BriefDescription": "estimate of dl2l3 distant MOD miss rates with measured DL2L3 MOD latency as a %of dcache miss cpi", "MetricExpr": "PM_DATA_FROM_DL2L3_MOD * PM_MRK_DATA_FROM_DL2L3_MOD_CYC / PM_MRK_DATA_FROM_DL2L3_MOD / PM_CMPLU_STALL_DCACHE_MISS *100", @@ -573,5 +777,461 @@ "MetricExpr": "PM_DATA_FROM_RMEM * PM_MRK_DATA_FROM_RMEM_CYC / PM_MRK_DATA_FROM_RMEM / PM_CMPLU_STALL_DCACHE_MISS *100", "MetricGroup": "estimated_dcache_miss_cpi", "MetricName": "rmem_cpi_percent" + }, + { + "BriefDescription": "% of ICache reloads from Distant L2 or L3 (Modified) per Inst", + "MetricExpr": "PM_INST_FROM_DL2L3_MOD * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "instruction_misses_percent_per_inst", + "MetricName": "inst_from_dl2l3_mod_rate_percent" + }, + { + "BriefDescription": "% of ICache reloads from Distant L2 or L3 (Shared) per Inst", + "MetricExpr": "PM_INST_FROM_DL2L3_SHR * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "instruction_misses_percent_per_inst", + "MetricName": "inst_from_dl2l3_shr_rate_percent" + }, + { + "BriefDescription": "% of ICache reloads from Distant L4 per Inst", + "MetricExpr": "PM_INST_FROM_DL4 * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "instruction_misses_percent_per_inst", + "MetricName": "inst_from_dl4_rate_percent" + }, + { + "BriefDescription": "% of ICache reloads from Distant Memory per Inst", + "MetricExpr": "PM_INST_FROM_DMEM * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "instruction_misses_percent_per_inst", + "MetricName": "inst_from_dmem_rate_percent" + }, + { + "BriefDescription": "% of ICache reloads from Private L2, other core per Inst", + "MetricExpr": "PM_INST_FROM_L21_MOD * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "instruction_misses_percent_per_inst", + "MetricName": "inst_from_l21_mod_rate_percent" + }, + { + "BriefDescription": "% of ICache reloads from Private L2, other core per Inst", + "MetricExpr": "PM_INST_FROM_L21_SHR * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "instruction_misses_percent_per_inst", + "MetricName": "inst_from_l21_shr_rate_percent" + }, + { + "BriefDescription": "% of ICache reloads from L2 per Inst", + "MetricExpr": "PM_INST_FROM_L2 * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "instruction_misses_percent_per_inst", + "MetricName": "inst_from_l2_rate_percent" + }, + { + "BriefDescription": "% of ICache reloads from Private L3, other core per Inst", + "MetricExpr": "PM_INST_FROM_L31_MOD * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "instruction_misses_percent_per_inst", + "MetricName": "inst_from_l31_mod_rate_percent" + }, + { + "BriefDescription": "% of ICache reloads from Private L3 other core per Inst", + "MetricExpr": "PM_INST_FROM_L31_SHR * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "instruction_misses_percent_per_inst", + "MetricName": "inst_from_l31_shr_rate_percent" + }, + { + "BriefDescription": "% of ICache reloads from L3 per Inst", + "MetricExpr": "PM_INST_FROM_L3 * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "instruction_misses_percent_per_inst", + "MetricName": "inst_from_l3_rate_percent" + }, + { + "BriefDescription": "% of ICache reloads from Local L4 per Inst", + "MetricExpr": "PM_INST_FROM_LL4 * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "instruction_misses_percent_per_inst", + "MetricName": "inst_from_ll4_rate_percent" + }, + { + "BriefDescription": "% of ICache reloads from Local Memory per Inst", + "MetricExpr": "PM_INST_FROM_LMEM * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "instruction_misses_percent_per_inst", + "MetricName": "inst_from_lmem_rate_percent" + }, + { + "BriefDescription": "% of ICache reloads from Remote L2 or L3 (Modified) per Inst", + "MetricExpr": "PM_INST_FROM_RL2L3_MOD * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "instruction_misses_percent_per_inst", + "MetricName": "inst_from_rl2l3_mod_rate_percent" + }, + { + "BriefDescription": "% of ICache reloads from Remote L2 or L3 (Shared) per Inst", + "MetricExpr": "PM_INST_FROM_RL2L3_SHR * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "instruction_misses_percent_per_inst", + "MetricName": "inst_from_rl2l3_shr_rate_percent" + }, + { + "BriefDescription": "% of ICache reloads from Remote L4 per Inst", + "MetricExpr": "PM_INST_FROM_RL4 * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "instruction_misses_percent_per_inst", + "MetricName": "inst_from_rl4_rate_percent" + }, + { + "BriefDescription": "% of ICache reloads from Remote Memory per Inst", + "MetricExpr": "PM_INST_FROM_RMEM * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "instruction_misses_percent_per_inst", + "MetricName": "inst_from_rmem_rate_percent" + }, + { + "BriefDescription": "Instruction Cache Miss Rate (Per run Instruction)(%)", + "MetricExpr": "PM_L1_ICACHE_MISS * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "instruction_misses_percent_per_inst", + "MetricName": "l1_inst_miss_rate_percent" + }, + { + "BriefDescription": "%L2 Modified CO Cache read Utilization (4 pclks per disp attempt)", + "MetricExpr": "((PM_L2_CASTOUT_MOD/2)*4)/ PM_RUN_CYC * 100", + "MetricGroup": "l2_stats", + "MetricName": "l2_co_m_rd_util" + }, + { + "BriefDescription": "L2 dcache invalidates per run inst (per core)", + "MetricExpr": "(PM_L2_DC_INV / 2) / PM_RUN_INST_CMPL * 100", + "MetricGroup": "l2_stats", + "MetricName": "l2_dc_inv_rate_percent" + }, + { + "BriefDescription": "Demand load misses as a % of L2 LD dispatches (per thread)", + "MetricExpr": "PM_L1_DCACHE_RELOAD_VALID / (PM_L2_LD / 2) * 100", + "MetricGroup": "l2_stats", + "MetricName": "l2_dem_ld_disp_percent" + }, + { + "BriefDescription": "L2 Icache invalidates per run inst (per core)", + "MetricExpr": "(PM_L2_IC_INV / 2) / PM_RUN_INST_CMPL * 100", + "MetricGroup": "l2_stats", + "MetricName": "l2_ic_inv_rate_percent" + }, + { + "BriefDescription": "L2 Inst misses as a % of total L2 Inst dispatches (per thread)", + "MetricExpr": "PM_L2_INST_MISS / PM_L2_INST * 100", + "MetricGroup": "l2_stats", + "MetricName": "l2_inst_miss_ratio_percent" + }, + { + "BriefDescription": "Average number of cycles between L2 Load hits", + "MetricExpr": "(PM_L2_LD_HIT / PM_RUN_CYC) / 2", + "MetricGroup": "l2_stats", + "MetricName": "l2_ld_hit_frequency" + }, + { + "BriefDescription": "Average number of cycles between L2 Load misses", + "MetricExpr": "(PM_L2_LD_MISS / PM_RUN_CYC) / 2", + "MetricGroup": "l2_stats", + "MetricName": "l2_ld_miss_frequency" + }, + { + "BriefDescription": "L2 Load misses as a % of total L2 Load dispatches (per thread)", + "MetricExpr": "PM_L2_LD_MISS / PM_L2_LD * 100", + "MetricGroup": "l2_stats", + "MetricName": "l2_ld_miss_ratio_percent" + }, + { + "BriefDescription": "% L2 load disp attempts Cache read Utilization (4 pclks per disp attempt)", + "MetricExpr": "((PM_L2_RCLD_DISP/2)*4)/ PM_RUN_CYC * 100", + "MetricGroup": "l2_stats", + "MetricName": "l2_ld_rd_util" + }, + { + "BriefDescription": "L2 load misses that require a cache write (4 pclks per disp attempt) % of pclks", + "MetricExpr": "((( PM_L2_LD_DISP - PM_L2_LD_HIT)/2)*4)/ PM_RUN_CYC * 100", + "MetricGroup": "l2_stats", + "MetricName": "l2_ldmiss_wr_util" + }, + { + "BriefDescription": "L2 local pump prediction success", + "MetricExpr": "PM_L2_LOC_GUESS_CORRECT / (PM_L2_LOC_GUESS_CORRECT + PM_L2_LOC_GUESS_WRONG) * 100", + "MetricGroup": "l2_stats", + "MetricName": "l2_local_pred_correct_percent" + }, + { + "BriefDescription": "L2 COs that were in M,Me,Mu state as a % of all L2 COs", + "MetricExpr": "PM_L2_CASTOUT_MOD / (PM_L2_CASTOUT_MOD + PM_L2_CASTOUT_SHR) * 100", + "MetricGroup": "l2_stats", + "MetricName": "l2_mod_co_percent" + }, + { + "BriefDescription": "% of L2 Load RC dispatch atampts that failed because of address collisions and cclass conflicts", + "MetricExpr": "(PM_L2_RCLD_DISP_FAIL_ADDR )/ PM_L2_RCLD_DISP * 100", + "MetricGroup": "l2_stats", + "MetricName": "l2_rc_ld_disp_addr_fail_percent" + }, + { + "BriefDescription": "% of L2 Load RC dispatch attempts that failed", + "MetricExpr": "(PM_L2_RCLD_DISP_FAIL_ADDR + PM_L2_RCLD_DISP_FAIL_OTHER)/ PM_L2_RCLD_DISP * 100", + "MetricGroup": "l2_stats", + "MetricName": "l2_rc_ld_disp_fail_percent" + }, + { + "BriefDescription": "% of L2 Store RC dispatch atampts that failed because of address collisions and cclass conflicts", + "MetricExpr": "PM_L2_RCST_DISP_FAIL_ADDR / PM_L2_RCST_DISP * 100", + "MetricGroup": "l2_stats", + "MetricName": "l2_rc_st_disp_addr_fail_percent" + }, + { + "BriefDescription": "% of L2 Store RC dispatch attempts that failed", + "MetricExpr": "(PM_L2_RCST_DISP_FAIL_ADDR + PM_L2_RCST_DISP_FAIL_OTHER)/ PM_L2_RCST_DISP * 100", + "MetricGroup": "l2_stats", + "MetricName": "l2_rc_st_disp_fail_percent" + }, + { + "BriefDescription": "L2 Cache Read Utilization (per core)", + "MetricExpr": "(((PM_L2_RCLD_DISP/2)*4)/ PM_RUN_CYC * 100) + (((PM_L2_RCST_DISP/2)*4)/PM_RUN_CYC * 100) + (((PM_L2_CASTOUT_MOD/2)*4)/PM_RUN_CYC * 100)", + "MetricGroup": "l2_stats", + "MetricName": "l2_rd_util_percent" + }, + { + "BriefDescription": "L2 COs that were in T,Te,Si,S state as a % of all L2 COs", + "MetricExpr": "PM_L2_CASTOUT_SHR / (PM_L2_CASTOUT_MOD + PM_L2_CASTOUT_SHR) * 100", + "MetricGroup": "l2_stats", + "MetricName": "l2_shr_co_percent" + }, + { + "BriefDescription": "L2 Store misses as a % of total L2 Store dispatches (per thread)", + "MetricExpr": "PM_L2_ST_MISS / PM_L2_ST * 100", + "MetricGroup": "l2_stats", + "MetricName": "l2_st_miss_ratio_percent" + }, + { + "BriefDescription": "% L2 store disp attempts Cache read Utilization (4 pclks per disp attempt)", + "MetricExpr": "((PM_L2_RCST_DISP/2)*4) / PM_RUN_CYC * 100", + "MetricGroup": "l2_stats", + "MetricName": "l2_st_rd_util" + }, + { + "BriefDescription": "L2 stores that require a cache write (4 pclks per disp attempt) % of pclks", + "MetricExpr": "((PM_L2_ST_DISP/2)*4) / PM_RUN_CYC * 100", + "MetricGroup": "l2_stats", + "MetricName": "l2_st_wr_util" + }, + { + "BriefDescription": "L2 Cache Write Utilization (per core)", + "MetricExpr": "((((PM_L2_LD_DISP - PM_L2_LD_HIT)/2)*4) / PM_RUN_CYC * 100) + (((PM_L2_ST_DISP/2)*4) / PM_RUN_CYC * 100)", + "MetricGroup": "l2_stats", + "MetricName": "l2_wr_util_percent" + }, + { + "BriefDescription": "Average number of cycles between L3 Load hits", + "MetricExpr": "(PM_L3_LD_HIT / PM_RUN_CYC) / 2", + "MetricGroup": "l3_stats", + "MetricName": "l3_ld_hit_frequency" + }, + { + "BriefDescription": "Average number of cycles between L3 Load misses", + "MetricExpr": "(PM_L3_LD_MISS / PM_RUN_CYC) / 2", + "MetricGroup": "l3_stats", + "MetricName": "l3_ld_miss_frequency" + }, + { + "BriefDescription": "Average number of Write-in machines used. 1 of 8 WI machines is sampled every L3 cycle", + "MetricExpr": "(PM_L3_WI_USAGE / PM_RUN_CYC) * 8", + "MetricGroup": "l3_stats", + "MetricName": "l3_wi_usage" + }, + { + "BriefDescription": "DERAT Miss Rate (per run instruction)(%)", + "MetricExpr": "PM_LSU_DERAT_MISS * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "pteg_reloads_percent_per_inst", + "MetricName": "derat_miss_rate_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Distant L2 or L3 (Modified) per inst", + "MetricExpr": "PM_DPTEG_FROM_DL2L3_MOD * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "pteg_reloads_percent_per_inst", + "MetricName": "pteg_from_dl2l3_mod_rate_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Distant L2 or L3 (Shared) per inst", + "MetricExpr": "PM_DPTEG_FROM_DL2L3_SHR * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "pteg_reloads_percent_per_inst", + "MetricName": "pteg_from_dl2l3_shr_rate_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Distant L4 per inst", + "MetricExpr": "PM_DPTEG_FROM_DL4 * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "pteg_reloads_percent_per_inst", + "MetricName": "pteg_from_dl4_rate_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Distant Memory per inst", + "MetricExpr": "PM_DPTEG_FROM_DMEM * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "pteg_reloads_percent_per_inst", + "MetricName": "pteg_from_dmem_rate_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Private L2, other core per inst", + "MetricExpr": "PM_DPTEG_FROM_L21_MOD * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "pteg_reloads_percent_per_inst", + "MetricName": "pteg_from_l21_mod_rate_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Private L2, other core per inst", + "MetricExpr": "PM_DPTEG_FROM_L21_SHR * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "pteg_reloads_percent_per_inst", + "MetricName": "pteg_from_l21_shr_rate_percent" + }, + { + "BriefDescription": "% of DERAT reloads from L2 per inst", + "MetricExpr": "PM_DPTEG_FROM_L2 * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "pteg_reloads_percent_per_inst", + "MetricName": "pteg_from_l2_rate_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Private L3, other core per inst", + "MetricExpr": "PM_DPTEG_FROM_L31_MOD * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "pteg_reloads_percent_per_inst", + "MetricName": "pteg_from_l31_mod_rate_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Private L3, other core per inst", + "MetricExpr": "PM_DPTEG_FROM_L31_SHR * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "pteg_reloads_percent_per_inst", + "MetricName": "pteg_from_l31_shr_rate_percent" + }, + { + "BriefDescription": "% of DERAT reloads from L3 per inst", + "MetricExpr": "PM_DPTEG_FROM_L3 * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "pteg_reloads_percent_per_inst", + "MetricName": "pteg_from_l3_rate_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Local L4 per inst", + "MetricExpr": "PM_DPTEG_FROM_LL4 * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "pteg_reloads_percent_per_inst", + "MetricName": "pteg_from_ll4_rate_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Local Memory per inst", + "MetricExpr": "PM_DPTEG_FROM_LMEM * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "pteg_reloads_percent_per_inst", + "MetricName": "pteg_from_lmem_rate_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Remote L2 or L3 (Modified) per inst", + "MetricExpr": "PM_DPTEG_FROM_RL2L3_MOD * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "pteg_reloads_percent_per_inst", + "MetricName": "pteg_from_rl2l3_mod_rate_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Remote L2 or L3 (Shared) per inst", + "MetricExpr": "PM_DPTEG_FROM_RL2L3_SHR * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "pteg_reloads_percent_per_inst", + "MetricName": "pteg_from_rl2l3_shr_rate_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Remote L4 per inst", + "MetricExpr": "PM_DPTEG_FROM_RL4 * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "pteg_reloads_percent_per_inst", + "MetricName": "pteg_from_rl4_rate_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Remote Memory per inst", + "MetricExpr": "PM_DPTEG_FROM_RMEM * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "pteg_reloads_percent_per_inst", + "MetricName": "pteg_from_rmem_rate_percent" + }, + { + "BriefDescription": "% of DERAT misses that result in an ERAT reload", + "MetricExpr": "PM_DTLB_MISS * 100 / PM_LSU_DERAT_MISS", + "MetricGroup": "pteg_reloads_percent_per_ref", + "MetricName": "derat_miss_reload_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Distant L2 or L3 (Modified)", + "MetricExpr": "PM_DPTEG_FROM_DL2L3_MOD * 100 / PM_DTLB_MISS", + "MetricGroup": "pteg_reloads_percent_per_ref", + "MetricName": "pteg_from_dl2l3_mod_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Distant L2 or L3 (Shared)", + "MetricExpr": "PM_DPTEG_FROM_DL2L3_SHR * 100 / PM_DTLB_MISS", + "MetricGroup": "pteg_reloads_percent_per_ref", + "MetricName": "pteg_from_dl2l3_shr_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Distant L4", + "MetricExpr": "PM_DPTEG_FROM_DL4 * 100 / PM_DTLB_MISS", + "MetricGroup": "pteg_reloads_percent_per_ref", + "MetricName": "pteg_from_dl4_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Distant Memory", + "MetricExpr": "PM_DPTEG_FROM_DMEM * 100 / PM_DTLB_MISS", + "MetricGroup": "pteg_reloads_percent_per_ref", + "MetricName": "pteg_from_dmem_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Private L2, other core", + "MetricExpr": "PM_DPTEG_FROM_L21_MOD * 100 / PM_DTLB_MISS", + "MetricGroup": "pteg_reloads_percent_per_ref", + "MetricName": "pteg_from_l21_mod_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Private L2, other core", + "MetricExpr": "PM_DPTEG_FROM_L21_SHR * 100 / PM_DTLB_MISS", + "MetricGroup": "pteg_reloads_percent_per_ref", + "MetricName": "pteg_from_l21_shr_percent" + }, + { + "BriefDescription": "% of DERAT reloads from L2", + "MetricExpr": "PM_DPTEG_FROM_L2 * 100 / PM_DTLB_MISS", + "MetricGroup": "pteg_reloads_percent_per_ref", + "MetricName": "pteg_from_l2_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Private L3, other core", + "MetricExpr": "PM_DPTEG_FROM_L31_MOD * 100 / PM_DTLB_MISS", + "MetricGroup": "pteg_reloads_percent_per_ref", + "MetricName": "pteg_from_l31_mod_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Private L3, other core", + "MetricExpr": "PM_DPTEG_FROM_L31_SHR * 100 / PM_DTLB_MISS", + "MetricGroup": "pteg_reloads_percent_per_ref", + "MetricName": "pteg_from_l31_shr_percent" + }, + { + "BriefDescription": "% of DERAT reloads from L3", + "MetricExpr": "PM_DPTEG_FROM_L3 * 100 / PM_DTLB_MISS", + "MetricGroup": "pteg_reloads_percent_per_ref", + "MetricName": "pteg_from_l3_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Local L4", + "MetricExpr": "PM_DPTEG_FROM_LL4 * 100 / PM_DTLB_MISS", + "MetricGroup": "pteg_reloads_percent_per_ref", + "MetricName": "pteg_from_ll4_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Local Memory", + "MetricExpr": "PM_DPTEG_FROM_LMEM * 100 / PM_DTLB_MISS", + "MetricGroup": "pteg_reloads_percent_per_ref", + "MetricName": "pteg_from_lmem_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Remote L2 or L3 (Modified)", + "MetricExpr": "PM_DPTEG_FROM_RL2L3_MOD * 100 / PM_DTLB_MISS", + "MetricGroup": "pteg_reloads_percent_per_ref", + "MetricName": "pteg_from_rl2l3_mod_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Remote L2 or L3 (Shared)", + "MetricExpr": "PM_DPTEG_FROM_RL2L3_SHR * 100 / PM_DTLB_MISS", + "MetricGroup": "pteg_reloads_percent_per_ref", + "MetricName": "pteg_from_rl2l3_shr_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Remote L4", + "MetricExpr": "PM_DPTEG_FROM_RL4 * 100 / PM_DTLB_MISS", + "MetricGroup": "pteg_reloads_percent_per_ref", + "MetricName": "pteg_from_rl4_percent" + }, + { + "BriefDescription": "% of DERAT reloads from Remote Memory", + "MetricExpr": "PM_DPTEG_FROM_RMEM * 100 / PM_DTLB_MISS", + "MetricGroup": "pteg_reloads_percent_per_ref", + "MetricName": "pteg_from_rmem_percent" } ] -- cgit v1.2.3-55-g7522 From a4d832726471963b327fae33f14fa28c83db6a0e Mon Sep 17 00:00:00 2001 From: Paul Clarke Date: Sat, 9 Feb 2019 13:14:28 -0500 Subject: perf vendor events power9: Branch_prediction, instruction_stats, latency, lsu_rejects, memory, prefetch & translation metrics Descriptions of metrics for POWER9 processors can be found in the "POWER9 Performance Monitor Unit User’s Guide", which is currently available on the "IBM Portal for OpenPOWER" (https://www-355.ibm.com/systems/power/openpower/welcome.xhtml) at https://www-355.ibm.com/systems/power/openpower/posting.xhtml?postingId=4948CDE1963C9BCA852582F800718190 This patch is for metric groups: - branch_prediction - instruction_stats_percent_per_ref - latency - lsu_rejects - memory - prefetch - translation Plus, some whitespace changes. Signed-off-by: Paul Clarke Cc: Ananth N Mavinakayanahalli Cc: Carl Love Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Naveen N. Rao Cc: Sukadev Bhattiprolu Cc: linuxppc-dev@ozlabs.org Link: http://lkml.kernel.org/r/20190209181429.23950-4-pc@us.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- .../pmu-events/arch/powerpc/power9/metrics.json | 403 ++++++++++++++++++++- 1 file changed, 390 insertions(+), 13 deletions(-) diff --git a/tools/perf/pmu-events/arch/powerpc/power9/metrics.json b/tools/perf/pmu-events/arch/powerpc/power9/metrics.json index 166f95518c45..c39a922aaf84 100644 --- a/tools/perf/pmu-events/arch/powerpc/power9/metrics.json +++ b/tools/perf/pmu-events/arch/powerpc/power9/metrics.json @@ -1,4 +1,39 @@ [ + { + "MetricExpr": "PM_BR_MPRED_CMPL / PM_BR_PRED * 100", + "MetricGroup": "branch_prediction", + "MetricName": "br_misprediction_percent" + }, + { + "BriefDescription": "Count cache branch misprediction per instruction", + "MetricExpr": "PM_BR_MPRED_CCACHE / PM_RUN_INST_CMPL * 100", + "MetricGroup": "branch_prediction", + "MetricName": "ccache_mispredict_rate_percent" + }, + { + "BriefDescription": "Count cache branch misprediction", + "MetricExpr": "PM_BR_MPRED_CCACHE / PM_BR_PRED_CCACHE * 100", + "MetricGroup": "branch_prediction", + "MetricName": "ccache_misprediction_percent" + }, + { + "BriefDescription": "Link stack branch misprediction", + "MetricExpr": "PM_BR_MPRED_LSTACK / PM_RUN_INST_CMPL * 100", + "MetricGroup": "branch_prediction", + "MetricName": "lstack_mispredict_rate_percent" + }, + { + "BriefDescription": "Link stack branch misprediction", + "MetricExpr": "PM_BR_MPRED_LSTACK/ PM_BR_PRED_LSTACK * 100", + "MetricGroup": "branch_prediction", + "MetricName": "lstack_misprediction_percent" + }, + { + "BriefDescription": "% Branches Taken", + "MetricExpr": "PM_BR_TAKEN_CMPL * 100 / PM_BRU_FIN", + "MetricGroup": "branch_prediction", + "MetricName": "taken_branches_percent" + }, { "BriefDescription": "Completion stall due to a Branch Unit", "MetricExpr": "PM_CMPLU_STALL_BRU/PM_RUN_INST_CMPL", @@ -881,13 +916,121 @@ "MetricName": "l1_inst_miss_rate_percent" }, { - "BriefDescription": "%L2 Modified CO Cache read Utilization (4 pclks per disp attempt)", + "BriefDescription": "Icache Fetchs per Icache Miss", + "MetricExpr": "(PM_L1_ICACHE_MISS - PM_IC_PREF_WRITE) / PM_L1_ICACHE_MISS", + "MetricGroup": "instruction_stats_percent_per_ref", + "MetricName": "icache_miss_reload" + }, + { + "BriefDescription": "% of ICache reloads due to prefetch", + "MetricExpr": "PM_IC_PREF_WRITE * 100 / PM_L1_ICACHE_MISS", + "MetricGroup": "instruction_stats_percent_per_ref", + "MetricName": "icache_pref_percent" + }, + { + "BriefDescription": "% of ICache reloads from Distant L2 or L3 (Modified)", + "MetricExpr": "PM_INST_FROM_DL2L3_MOD * 100 / PM_L1_ICACHE_MISS", + "MetricGroup": "instruction_stats_percent_per_ref", + "MetricName": "inst_from_dl2l3_mod_percent" + }, + { + "BriefDescription": "% of ICache reloads from Distant L2 or L3 (Shared)", + "MetricExpr": "PM_INST_FROM_DL2L3_SHR * 100 / PM_L1_ICACHE_MISS", + "MetricGroup": "instruction_stats_percent_per_ref", + "MetricName": "inst_from_dl2l3_shr_percent" + }, + { + "BriefDescription": "% of ICache reloads from Distant L4", + "MetricExpr": "PM_INST_FROM_DL4 * 100 / PM_L1_ICACHE_MISS", + "MetricGroup": "instruction_stats_percent_per_ref", + "MetricName": "inst_from_dl4_percent" + }, + { + "BriefDescription": "% of ICache reloads from Distant Memory", + "MetricExpr": "PM_INST_FROM_DMEM * 100 / PM_L1_ICACHE_MISS", + "MetricGroup": "instruction_stats_percent_per_ref", + "MetricName": "inst_from_dmem_percent" + }, + { + "BriefDescription": "% of ICache reloads from Private L2, other core", + "MetricExpr": "PM_INST_FROM_L21_MOD * 100 / PM_L1_ICACHE_MISS", + "MetricGroup": "instruction_stats_percent_per_ref", + "MetricName": "inst_from_l21_mod_percent" + }, + { + "BriefDescription": "% of ICache reloads from Private L2, other core", + "MetricExpr": "PM_INST_FROM_L21_SHR * 100 / PM_L1_ICACHE_MISS", + "MetricGroup": "instruction_stats_percent_per_ref", + "MetricName": "inst_from_l21_shr_percent" + }, + { + "BriefDescription": "% of ICache reloads from L2", + "MetricExpr": "PM_INST_FROM_L2 * 100 / PM_L1_ICACHE_MISS", + "MetricGroup": "instruction_stats_percent_per_ref", + "MetricName": "inst_from_l2_percent" + }, + { + "BriefDescription": "% of ICache reloads from Private L3, other core", + "MetricExpr": "PM_INST_FROM_L31_MOD * 100 / PM_L1_ICACHE_MISS", + "MetricGroup": "instruction_stats_percent_per_ref", + "MetricName": "inst_from_l31_mod_percent" + }, + { + "BriefDescription": "% of ICache reloads from Private L3, other core", + "MetricExpr": "PM_INST_FROM_L31_SHR * 100 / PM_L1_ICACHE_MISS", + "MetricGroup": "instruction_stats_percent_per_ref", + "MetricName": "inst_from_l31_shr_percent" + }, + { + "BriefDescription": "% of ICache reloads from L3", + "MetricExpr": "PM_INST_FROM_L3 * 100 / PM_L1_ICACHE_MISS", + "MetricGroup": "instruction_stats_percent_per_ref", + "MetricName": "inst_from_l3_percent" + }, + { + "BriefDescription": "% of ICache reloads from Local L4", + "MetricExpr": "PM_INST_FROM_LL4 * 100 / PM_L1_ICACHE_MISS", + "MetricGroup": "instruction_stats_percent_per_ref", + "MetricName": "inst_from_ll4_percent" + }, + { + "BriefDescription": "% of ICache reloads from Local Memory", + "MetricExpr": "PM_INST_FROM_LMEM * 100 / PM_L1_ICACHE_MISS", + "MetricGroup": "instruction_stats_percent_per_ref", + "MetricName": "inst_from_lmem_percent" + }, + { + "BriefDescription": "% of ICache reloads from Remote L2 or L3 (Modified)", + "MetricExpr": "PM_INST_FROM_RL2L3_MOD * 100 / PM_L1_ICACHE_MISS", + "MetricGroup": "instruction_stats_percent_per_ref", + "MetricName": "inst_from_rl2l3_mod_percent" + }, + { + "BriefDescription": "% of ICache reloads from Remote L2 or L3 (Shared)", + "MetricExpr": "PM_INST_FROM_RL2L3_SHR * 100 / PM_L1_ICACHE_MISS", + "MetricGroup": "instruction_stats_percent_per_ref", + "MetricName": "inst_from_rl2l3_shr_percent" + }, + { + "BriefDescription": "% of ICache reloads from Remote L4", + "MetricExpr": "PM_INST_FROM_RL4 * 100 / PM_L1_ICACHE_MISS", + "MetricGroup": "instruction_stats_percent_per_ref", + "MetricName": "inst_from_rl4_percent" + }, + { + "BriefDescription": "% of ICache reloads from Remote Memory", + "MetricExpr": "PM_INST_FROM_RMEM * 100 / PM_L1_ICACHE_MISS", + "MetricGroup": "instruction_stats_percent_per_ref", + "MetricName": "inst_from_rmem_percent" + }, + { + "BriefDescription": "%L2 Modified CO Cache read Utilization (4 pclks per disp attempt)", "MetricExpr": "((PM_L2_CASTOUT_MOD/2)*4)/ PM_RUN_CYC * 100", "MetricGroup": "l2_stats", "MetricName": "l2_co_m_rd_util" }, { - "BriefDescription": "L2 dcache invalidates per run inst (per core)", + "BriefDescription": "L2 dcache invalidates per run inst (per core)", "MetricExpr": "(PM_L2_DC_INV / 2) / PM_RUN_INST_CMPL * 100", "MetricGroup": "l2_stats", "MetricName": "l2_dc_inv_rate_percent" @@ -899,14 +1042,14 @@ "MetricName": "l2_dem_ld_disp_percent" }, { - "BriefDescription": "L2 Icache invalidates per run inst (per core)", + "BriefDescription": "L2 Icache invalidates per run inst (per core)", "MetricExpr": "(PM_L2_IC_INV / 2) / PM_RUN_INST_CMPL * 100", "MetricGroup": "l2_stats", "MetricName": "l2_ic_inv_rate_percent" }, { - "BriefDescription": "L2 Inst misses as a % of total L2 Inst dispatches (per thread)", - "MetricExpr": "PM_L2_INST_MISS / PM_L2_INST * 100", + "BriefDescription": "L2 Inst misses as a % of total L2 Inst dispatches (per thread)", + "MetricExpr": "PM_L2_INST_MISS / PM_L2_INST * 100", "MetricGroup": "l2_stats", "MetricName": "l2_inst_miss_ratio_percent" }, @@ -923,7 +1066,7 @@ "MetricName": "l2_ld_miss_frequency" }, { - "BriefDescription": "L2 Load misses as a % of total L2 Load dispatches (per thread)", + "BriefDescription": "L2 Load misses as a % of total L2 Load dispatches (per thread)", "MetricExpr": "PM_L2_LD_MISS / PM_L2_LD * 100", "MetricGroup": "l2_stats", "MetricName": "l2_ld_miss_ratio_percent" @@ -941,7 +1084,7 @@ "MetricName": "l2_ldmiss_wr_util" }, { - "BriefDescription": "L2 local pump prediction success", + "BriefDescription": "L2 local pump prediction success", "MetricExpr": "PM_L2_LOC_GUESS_CORRECT / (PM_L2_LOC_GUESS_CORRECT + PM_L2_LOC_GUESS_WRONG) * 100", "MetricGroup": "l2_stats", "MetricName": "l2_local_pred_correct_percent" @@ -953,7 +1096,7 @@ "MetricName": "l2_mod_co_percent" }, { - "BriefDescription": "% of L2 Load RC dispatch atampts that failed because of address collisions and cclass conflicts", + "BriefDescription": "% of L2 Load RC dispatch atampts that failed because of address collisions and cclass conflicts", "MetricExpr": "(PM_L2_RCLD_DISP_FAIL_ADDR )/ PM_L2_RCLD_DISP * 100", "MetricGroup": "l2_stats", "MetricName": "l2_rc_ld_disp_addr_fail_percent" @@ -965,7 +1108,7 @@ "MetricName": "l2_rc_ld_disp_fail_percent" }, { - "BriefDescription": "% of L2 Store RC dispatch atampts that failed because of address collisions and cclass conflicts", + "BriefDescription": "% of L2 Store RC dispatch atampts that failed because of address collisions and cclass conflicts", "MetricExpr": "PM_L2_RCST_DISP_FAIL_ADDR / PM_L2_RCST_DISP * 100", "MetricGroup": "l2_stats", "MetricName": "l2_rc_st_disp_addr_fail_percent" @@ -983,13 +1126,13 @@ "MetricName": "l2_rd_util_percent" }, { - "BriefDescription": "L2 COs that were in T,Te,Si,S state as a % of all L2 COs", + "BriefDescription": "L2 COs that were in T,Te,Si,S state as a % of all L2 COs", "MetricExpr": "PM_L2_CASTOUT_SHR / (PM_L2_CASTOUT_MOD + PM_L2_CASTOUT_SHR) * 100", "MetricGroup": "l2_stats", "MetricName": "l2_shr_co_percent" }, { - "BriefDescription": "L2 Store misses as a % of total L2 Store dispatches (per thread)", + "BriefDescription": "L2 Store misses as a % of total L2 Store dispatches (per thread)", "MetricExpr": "PM_L2_ST_MISS / PM_L2_ST * 100", "MetricGroup": "l2_stats", "MetricName": "l2_st_miss_ratio_percent" @@ -1025,13 +1168,205 @@ "MetricName": "l3_ld_miss_frequency" }, { - "BriefDescription": "Average number of Write-in machines used. 1 of 8 WI machines is sampled every L3 cycle", + "BriefDescription": "Average number of Write-in machines used. 1 of 8 WI machines is sampled every L3 cycle", "MetricExpr": "(PM_L3_WI_USAGE / PM_RUN_CYC) * 8", "MetricGroup": "l3_stats", "MetricName": "l3_wi_usage" }, { - "BriefDescription": "DERAT Miss Rate (per run instruction)(%)", + "BriefDescription": "Average icache miss latency", + "MetricExpr": "PM_IC_DEMAND_CYC / PM_IC_DEMAND_REQ", + "MetricGroup": "latency", + "MetricName": "average_il1_miss_latency" + }, + { + "BriefDescription": "Marked L2L3 remote Load latency", + "MetricExpr": "PM_MRK_DATA_FROM_DL2L3_MOD_CYC/ PM_MRK_DATA_FROM_DL2L3_MOD", + "MetricGroup": "latency", + "MetricName": "dl2l3_mod_latency" + }, + { + "BriefDescription": "Marked L2L3 distant Load latency", + "MetricExpr": "PM_MRK_DATA_FROM_DL2L3_SHR_CYC/ PM_MRK_DATA_FROM_DL2L3_SHR", + "MetricGroup": "latency", + "MetricName": "dl2l3_shr_latency" + }, + { + "BriefDescription": "Distant L4 average load latency", + "MetricExpr": "PM_MRK_DATA_FROM_DL4_CYC/ PM_MRK_DATA_FROM_DL4", + "MetricGroup": "latency", + "MetricName": "dl4_latency" + }, + { + "BriefDescription": "Marked Dmem Load latency", + "MetricExpr": "PM_MRK_DATA_FROM_DMEM_CYC/ PM_MRK_DATA_FROM_DMEM", + "MetricGroup": "latency", + "MetricName": "dmem_latency" + }, + { + "BriefDescription": "average L1 miss latency using marked events", + "MetricExpr": "PM_MRK_LD_MISS_L1_CYC / PM_MRK_LD_MISS_L1", + "MetricGroup": "latency", + "MetricName": "estimated_dl1miss_latency" + }, + { + "BriefDescription": "Marked L21 Load latency", + "MetricExpr": "PM_MRK_DATA_FROM_L21_MOD_CYC/ PM_MRK_DATA_FROM_L21_MOD", + "MetricGroup": "latency", + "MetricName": "l21_mod_latency" + }, + { + "BriefDescription": "Marked L21 Load latency", + "MetricExpr": "PM_MRK_DATA_FROM_L21_SHR_CYC/ PM_MRK_DATA_FROM_L21_SHR", + "MetricGroup": "latency", + "MetricName": "l21_shr_latency" + }, + { + "BriefDescription": "Marked L2 Load latency", + "MetricExpr": "PM_MRK_DATA_FROM_L2_CYC/ PM_MRK_DATA_FROM_L2", + "MetricGroup": "latency", + "MetricName": "l2_latency" + }, + { + "BriefDescription": "Marked L31 Load latency", + "MetricExpr": "PM_MRK_DATA_FROM_L31_MOD_CYC/ PM_MRK_DATA_FROM_L31_MOD", + "MetricGroup": "latency", + "MetricName": "l31_mod_latency" + }, + { + "BriefDescription": "Marked L31 Load latency", + "MetricExpr": "PM_MRK_DATA_FROM_L31_SHR_CYC/ PM_MRK_DATA_FROM_L31_SHR", + "MetricGroup": "latency", + "MetricName": "l31_shr_latency" + }, + { + "BriefDescription": "Marked L3 Load latency", + "MetricExpr": "PM_MRK_DATA_FROM_L3_CYC/ PM_MRK_DATA_FROM_L3", + "MetricGroup": "latency", + "MetricName": "l3_latency" + }, + { + "BriefDescription": "Local L4 average load latency", + "MetricExpr": "PM_MRK_DATA_FROM_LL4_CYC/ PM_MRK_DATA_FROM_LL4", + "MetricGroup": "latency", + "MetricName": "ll4_latency" + }, + { + "BriefDescription": "Marked Lmem Load latency", + "MetricExpr": "PM_MRK_DATA_FROM_LMEM_CYC/ PM_MRK_DATA_FROM_LMEM", + "MetricGroup": "latency", + "MetricName": "lmem_latency" + }, + { + "BriefDescription": "Marked L2L3 remote Load latency", + "MetricExpr": "PM_MRK_DATA_FROM_RL2L3_MOD_CYC/ PM_MRK_DATA_FROM_RL2L3_MOD", + "MetricGroup": "latency", + "MetricName": "rl2l3_mod_latency" + }, + { + "BriefDescription": "Marked L2L3 remote Load latency", + "MetricExpr": "PM_MRK_DATA_FROM_RL2L3_SHR_CYC/ PM_MRK_DATA_FROM_RL2L3_SHR", + "MetricGroup": "latency", + "MetricName": "rl2l3_shr_latency" + }, + { + "BriefDescription": "Remote L4 average load latency", + "MetricExpr": "PM_MRK_DATA_FROM_RL4_CYC/ PM_MRK_DATA_FROM_RL4", + "MetricGroup": "latency", + "MetricName": "rl4_latency" + }, + { + "BriefDescription": "Marked Rmem Load latency", + "MetricExpr": "PM_MRK_DATA_FROM_RMEM_CYC/ PM_MRK_DATA_FROM_RMEM", + "MetricGroup": "latency", + "MetricName": "rmem_latency" + }, + { + "BriefDescription": "ERAT miss reject ratio", + "MetricExpr": "PM_LSU_REJECT_ERAT_MISS * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "lsu_rejects", + "MetricName": "erat_reject_rate_percent" + }, + { + "BriefDescription": "LHS reject ratio", + "MetricExpr": "PM_LSU_REJECT_LHS *100/ PM_RUN_INST_CMPL", + "MetricGroup": "lsu_rejects", + "MetricName": "lhs_reject_rate_percent" + }, + { + "BriefDescription": "ERAT miss reject ratio", + "MetricExpr": "PM_LSU_REJECT_LMQ_FULL * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "lsu_rejects", + "MetricName": "lmq_full_reject_rate_percent" + }, + { + "BriefDescription": "ERAT miss reject ratio", + "MetricExpr": "PM_LSU_REJECT_LMQ_FULL * 100 / PM_LD_REF_L1", + "MetricGroup": "lsu_rejects", + "MetricName": "lmq_full_reject_ratio_percent" + }, + { + "BriefDescription": "L4 locality(%)", + "MetricExpr": "PM_DATA_FROM_LL4 * 100 / (PM_DATA_FROM_LL4 + PM_DATA_FROM_RL4 + PM_DATA_FROM_DL4)", + "MetricGroup": "memory", + "MetricName": "l4_locality" + }, + { + "BriefDescription": "Ratio of reloads from local L4 to distant L4", + "MetricExpr": "PM_DATA_FROM_LL4 / PM_DATA_FROM_DL4", + "MetricGroup": "memory", + "MetricName": "ld_ll4_per_ld_dmem" + }, + { + "BriefDescription": "Ratio of reloads from local L4 to remote+distant L4", + "MetricExpr": "PM_DATA_FROM_LL4 / (PM_DATA_FROM_DL4 + PM_DATA_FROM_RL4)", + "MetricGroup": "memory", + "MetricName": "ld_ll4_per_ld_mem" + }, + { + "BriefDescription": "Ratio of reloads from local L4 to remote L4", + "MetricExpr": "PM_DATA_FROM_LL4 / PM_DATA_FROM_RL4", + "MetricGroup": "memory", + "MetricName": "ld_ll4_per_ld_rl4" + }, + { + "BriefDescription": "Number of loads from local memory per loads from distant memory", + "MetricExpr": "PM_DATA_FROM_LMEM / PM_DATA_FROM_DMEM", + "MetricGroup": "memory", + "MetricName": "ld_lmem_per_ld_dmem" + }, + { + "BriefDescription": "Number of loads from local memory per loads from remote and distant memory", + "MetricExpr": "PM_DATA_FROM_LMEM / (PM_DATA_FROM_DMEM + PM_DATA_FROM_RMEM)", + "MetricGroup": "memory", + "MetricName": "ld_lmem_per_ld_mem" + }, + { + "BriefDescription": "Number of loads from local memory per loads from remote memory", + "MetricExpr": "PM_DATA_FROM_LMEM / PM_DATA_FROM_RMEM", + "MetricGroup": "memory", + "MetricName": "ld_lmem_per_ld_rmem" + }, + { + "BriefDescription": "Number of loads from remote memory per loads from distant memory", + "MetricExpr": "PM_DATA_FROM_RMEM / PM_DATA_FROM_DMEM", + "MetricGroup": "memory", + "MetricName": "ld_rmem_per_ld_dmem" + }, + { + "BriefDescription": "Memory locality", + "MetricExpr": "PM_DATA_FROM_LMEM * 100/ (PM_DATA_FROM_LMEM + PM_DATA_FROM_RMEM + PM_DATA_FROM_DMEM)", + "MetricGroup": "memory", + "MetricName": "mem_locality_percent" + }, + { + "BriefDescription": "L1 Prefetches issued by the prefetch machine per instruction (per thread)", + "MetricExpr": "PM_L1_PREF / PM_RUN_INST_CMPL * 100", + "MetricGroup": "prefetch", + "MetricName": "l1_prefetch_rate_percent" + }, + { + "BriefDescription": "DERAT Miss Rate (per run instruction)(%)", "MetricExpr": "PM_LSU_DERAT_MISS * 100 / PM_RUN_INST_CMPL", "MetricGroup": "pteg_reloads_percent_per_inst", "MetricName": "derat_miss_rate_percent" @@ -1233,5 +1568,47 @@ "MetricExpr": "PM_DPTEG_FROM_RMEM * 100 / PM_DTLB_MISS", "MetricGroup": "pteg_reloads_percent_per_ref", "MetricName": "pteg_from_rmem_percent" + }, + { + "BriefDescription": "% DERAT miss rate for 4K page per inst", + "MetricExpr": "PM_DERAT_MISS_4K * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "translation", + "MetricName": "derat_4k_miss_rate_percent" + }, + { + "BriefDescription": "DERAT miss ratio for 4K page", + "MetricExpr": "PM_DERAT_MISS_4K / PM_LSU_DERAT_MISS", + "MetricGroup": "translation", + "MetricName": "derat_4k_miss_ratio" + }, + { + "BriefDescription": "% DERAT miss ratio for 64K page per inst", + "MetricExpr": "PM_DERAT_MISS_64K * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "translation", + "MetricName": "derat_64k_miss_rate_percent" + }, + { + "BriefDescription": "DERAT miss ratio for 64K page", + "MetricExpr": "PM_DERAT_MISS_64K / PM_LSU_DERAT_MISS", + "MetricGroup": "translation", + "MetricName": "derat_64k_miss_ratio" + }, + { + "BriefDescription": "DERAT miss ratio", + "MetricExpr": "PM_LSU_DERAT_MISS / PM_LSU_DERAT_MISS", + "MetricGroup": "translation", + "MetricName": "derat_miss_ratio" + }, + { + "BriefDescription": "% DSLB_Miss_Rate per inst", + "MetricExpr": "PM_DSLB_MISS * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "translation", + "MetricName": "dslb_miss_rate_percent" + }, + { + "BriefDescription": "% ISLB miss rate per inst", + "MetricExpr": "PM_ISLB_MISS * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "translation", + "MetricName": "islb_miss_rate_percent" } ] -- cgit v1.2.3-55-g7522 From 33937e599449c65dbd69c60d7e2255012427baed Mon Sep 17 00:00:00 2001 From: Paul Clarke Date: Sat, 9 Feb 2019 13:14:29 -0500 Subject: perf vendor events power9: General metrics Descriptions of metrics for POWER9 processors can be found in the "POWER9 Performance Monitor Unit User’s Guide", which is currently available on the "IBM Portal for OpenPOWER" (https://www-355.ibm.com/systems/power/openpower/welcome.xhtml) at https://www-355.ibm.com/systems/power/openpower/posting.xhtml?postingId=4948CDE1963C9BCA852582F800718190 This patch is for metric groups: - general and other metrics not in a metric group. Signed-off-by: Paul Clarke Cc: Ananth N Mavinakayanahalli Cc: Carl Love Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Naveen N. Rao Cc: Sukadev Bhattiprolu Cc: linuxppc-dev@ozlabs.org Link: http://lkml.kernel.org/r/20190209181429.23950-5-pc@us.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- .../pmu-events/arch/powerpc/power9/metrics.json | 368 +++++++++++++++++++++ 1 file changed, 368 insertions(+) diff --git a/tools/perf/pmu-events/arch/powerpc/power9/metrics.json b/tools/perf/pmu-events/arch/powerpc/power9/metrics.json index c39a922aaf84..811c2a8c1c9e 100644 --- a/tools/perf/pmu-events/arch/powerpc/power9/metrics.json +++ b/tools/perf/pmu-events/arch/powerpc/power9/metrics.json @@ -813,6 +813,114 @@ "MetricGroup": "estimated_dcache_miss_cpi", "MetricName": "rmem_cpi_percent" }, + { + "BriefDescription": "Branch Mispredict flushes per instruction", + "MetricExpr": "PM_FLUSH_MPRED / PM_RUN_INST_CMPL * 100", + "MetricGroup": "general", + "MetricName": "br_mpred_flush_rate_percent" + }, + { + "BriefDescription": "Cycles per instruction", + "MetricExpr": "PM_CYC / PM_INST_CMPL", + "MetricGroup": "general", + "MetricName": "cpi" + }, + { + "BriefDescription": "GCT empty cycles", + "MetricExpr": "(PM_FLUSH_DISP / PM_RUN_INST_CMPL) * 100", + "MetricGroup": "general", + "MetricName": "disp_flush_rate_percent" + }, + { + "BriefDescription": "% DTLB miss rate per inst", + "MetricExpr": "PM_DTLB_MISS / PM_RUN_INST_CMPL *100", + "MetricGroup": "general", + "MetricName": "dtlb_miss_rate_percent" + }, + { + "BriefDescription": "Flush rate (%)", + "MetricExpr": "PM_FLUSH * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "general", + "MetricName": "flush_rate_percent" + }, + { + "BriefDescription": "Instructions per cycles", + "MetricExpr": "PM_INST_CMPL / PM_CYC", + "MetricGroup": "general", + "MetricName": "ipc" + }, + { + "BriefDescription": "% ITLB miss rate per inst", + "MetricExpr": "PM_ITLB_MISS / PM_RUN_INST_CMPL *100", + "MetricGroup": "general", + "MetricName": "itlb_miss_rate_percent" + }, + { + "BriefDescription": "Percentage of L1 load misses per L1 load ref", + "MetricExpr": "PM_LD_MISS_L1 / PM_LD_REF_L1 * 100", + "MetricGroup": "general", + "MetricName": "l1_ld_miss_ratio_percent" + }, + { + "BriefDescription": "Percentage of L1 store misses per run instruction", + "MetricExpr": "PM_ST_MISS_L1 * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "general", + "MetricName": "l1_st_miss_rate_percent" + }, + { + "BriefDescription": "Percentage of L1 store misses per L1 store ref", + "MetricExpr": "PM_ST_MISS_L1 / PM_ST_FIN * 100", + "MetricGroup": "general", + "MetricName": "l1_st_miss_ratio_percent" + }, + { + "BriefDescription": "L2 Instruction Miss Rate (per instruction)(%)", + "MetricExpr": "PM_INST_FROM_L2MISS * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "general", + "MetricName": "l2_inst_miss_rate_percent" + }, + { + "BriefDescription": "L2 dmand Load Miss Rate (per run instruction)(%)", + "MetricExpr": "PM_DATA_FROM_L2MISS * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "general", + "MetricName": "l2_ld_miss_rate_percent" + }, + { + "BriefDescription": "L2 PTEG Miss Rate (per run instruction)(%)", + "MetricExpr": "PM_DPTEG_FROM_L2MISS * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "general", + "MetricName": "l2_pteg_miss_rate_percent" + }, + { + "BriefDescription": "L3 Instruction Miss Rate (per instruction)(%)", + "MetricExpr": "PM_INST_FROM_L3MISS * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "general", + "MetricName": "l3_inst_miss_rate_percent" + }, + { + "BriefDescription": "L3 demand Load Miss Rate (per run instruction)(%)", + "MetricExpr": "PM_DATA_FROM_L3MISS * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "general", + "MetricName": "l3_ld_miss_rate_percent" + }, + { + "BriefDescription": "L3 PTEG Miss Rate (per run instruction)(%)", + "MetricExpr": "PM_DPTEG_FROM_L3MISS * 100 / PM_RUN_INST_CMPL", + "MetricGroup": "general", + "MetricName": "l3_pteg_miss_rate_percent" + }, + { + "BriefDescription": "Run cycles per cycle", + "MetricExpr": "PM_RUN_CYC / PM_CYC*100", + "MetricGroup": "general", + "MetricName": "run_cycles_percent" + }, + { + "BriefDescription": "Instruction dispatch-to-completion ratio", + "MetricExpr": "PM_INST_DISP / PM_INST_CMPL", + "MetricGroup": "general", + "MetricName": "speculation" + }, { "BriefDescription": "% of ICache reloads from Distant L2 or L3 (Modified) per Inst", "MetricExpr": "PM_INST_FROM_DL2L3_MOD * 100 / PM_RUN_INST_CMPL", @@ -1610,5 +1718,265 @@ "MetricExpr": "PM_ISLB_MISS * 100 / PM_RUN_INST_CMPL", "MetricGroup": "translation", "MetricName": "islb_miss_rate_percent" + }, + { + "BriefDescription": "ANY_SYNC_STALL_CPI", + "MetricExpr": "PM_CMPLU_STALL_ANY_SYNC / PM_RUN_INST_CMPL", + "MetricName": "any_sync_stall_cpi" + }, + { + "BriefDescription": "Avg. more than 1 instructions completed", + "MetricExpr": "PM_INST_CMPL / PM_1PLUS_PPC_CMPL", + "MetricName": "average_completed_instruction_set_size" + }, + { + "BriefDescription": "% Branches per instruction", + "MetricExpr": "PM_BRU_FIN / PM_RUN_INST_CMPL", + "MetricName": "branches_per_inst" + }, + { + "BriefDescription": "Cycles in which at least one instruction completes in this thread", + "MetricExpr": "PM_1PLUS_PPC_CMPL/PM_RUN_INST_CMPL", + "MetricName": "completion_cpi" + }, + { + "BriefDescription": "cycles", + "MetricExpr": "PM_RUN_CYC", + "MetricName": "custom_secs" + }, + { + "BriefDescription": "Percentage Cycles atleast one instruction dispatched", + "MetricExpr": "PM_1PLUS_PPC_DISP / PM_CYC * 100", + "MetricName": "cycles_atleast_one_inst_dispatched_percent" + }, + { + "BriefDescription": "Cycles per instruction group", + "MetricExpr": "PM_CYC / PM_1PLUS_PPC_CMPL", + "MetricName": "cycles_per_completed_instructions_set" + }, + { + "BriefDescription": "% of DL1 dL1_Reloads from Distant L4", + "MetricExpr": "PM_DATA_FROM_DL4 * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricName": "dl1_reload_from_dl4_percent" + }, + { + "BriefDescription": "% of DL1 Reloads from Distant L4 per Inst", + "MetricExpr": "PM_DATA_FROM_DL4 * 100 / PM_RUN_INST_CMPL", + "MetricName": "dl1_reload_from_dl4_rate_percent" + }, + { + "BriefDescription": "% of DL1 reloads from Private L3, other core per Inst", + "MetricExpr": "(PM_DATA_FROM_L31_MOD + PM_DATA_FROM_L31_SHR) * 100 / PM_RUN_INST_CMPL", + "MetricName": "dl1_reload_from_l31_rate_percent" + }, + { + "BriefDescription": "% of DL1 dL1_Reloads from Local L4", + "MetricExpr": "PM_DATA_FROM_LL4 * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricName": "dl1_reload_from_ll4_percent" + }, + { + "BriefDescription": "% of DL1 Reloads from Local L4 per Inst", + "MetricExpr": "PM_DATA_FROM_LL4 * 100 / PM_RUN_INST_CMPL", + "MetricName": "dl1_reload_from_ll4_rate_percent" + }, + { + "BriefDescription": "% of DL1 dL1_Reloads from Remote L4", + "MetricExpr": "PM_DATA_FROM_RL4 * 100 / PM_L1_DCACHE_RELOAD_VALID", + "MetricName": "dl1_reload_from_rl4_percent" + }, + { + "BriefDescription": "% of DL1 Reloads from Remote Memory per Inst", + "MetricExpr": "PM_DATA_FROM_RL4 * 100 / PM_RUN_INST_CMPL", + "MetricName": "dl1_reload_from_rl4_rate_percent" + }, + { + "BriefDescription": "Rate of DERAT reloads from L2", + "MetricExpr": "PM_DPTEG_FROM_L2 * 100 / PM_RUN_INST_CMPL", + "MetricName": "dpteg_from_l2_rate_percent" + }, + { + "BriefDescription": "Rate of DERAT reloads from L3", + "MetricExpr": "PM_DPTEG_FROM_L3 * 100 / PM_RUN_INST_CMPL", + "MetricName": "dpteg_from_l3_rate_percent" + }, + { + "BriefDescription": "Cycles in which the oldest instruction is finished and ready to complete for waiting to get through the completion pipe", + "MetricExpr": "PM_NTC_ALL_FIN / PM_RUN_INST_CMPL", + "MetricName": "finish_to_cmpl_cpi" + }, + { + "BriefDescription": "Total Fixed point operations", + "MetricExpr": "PM_FXU_FIN/PM_RUN_INST_CMPL", + "MetricName": "fixed_per_inst" + }, + { + "BriefDescription": "All FXU Busy", + "MetricExpr": "PM_FXU_BUSY / PM_CYC", + "MetricName": "fxu_all_busy" + }, + { + "BriefDescription": "All FXU Idle", + "MetricExpr": "PM_FXU_IDLE / PM_CYC", + "MetricName": "fxu_all_idle" + }, + { + "BriefDescription": "Ict empty for this thread due to branch mispred", + "MetricExpr": "PM_ICT_NOSLOT_BR_MPRED/PM_RUN_INST_CMPL", + "MetricName": "ict_noslot_br_mpred_cpi" + }, + { + "BriefDescription": "Ict empty for this thread due to Icache Miss and branch mispred", + "MetricExpr": "PM_ICT_NOSLOT_BR_MPRED_ICMISS/PM_RUN_INST_CMPL", + "MetricName": "ict_noslot_br_mpred_icmiss_cpi" + }, + { + "BriefDescription": "ICT other stalls", + "MetricExpr": "(PM_ICT_NOSLOT_CYC - PM_ICT_NOSLOT_IC_MISS - PM_ICT_NOSLOT_BR_MPRED_ICMISS - PM_ICT_NOSLOT_BR_MPRED - PM_ICT_NOSLOT_DISP_HELD)/PM_RUN_INST_CMPL", + "MetricName": "ict_noslot_cyc_other_cpi" + }, + { + "BriefDescription": "Cycles in which the NTC instruciton is held at dispatch for any reason", + "MetricExpr": "PM_ICT_NOSLOT_DISP_HELD/PM_RUN_INST_CMPL", + "MetricName": "ict_noslot_disp_held_cpi" + }, + { + "BriefDescription": "Ict empty for this thread due to dispatch holds because the History Buffer was full. Could be GPR/VSR/VMR/FPR/CR/XVF", + "MetricExpr": "PM_ICT_NOSLOT_DISP_HELD_HB_FULL/PM_RUN_INST_CMPL", + "MetricName": "ict_noslot_disp_held_hb_full_cpi" + }, + { + "BriefDescription": "Ict empty for this thread due to dispatch hold on this thread due to Issue q full, BRQ full, XVCF Full, Count cache, Link, Tar full", + "MetricExpr": "PM_ICT_NOSLOT_DISP_HELD_ISSQ/PM_RUN_INST_CMPL", + "MetricName": "ict_noslot_disp_held_issq_cpi" + }, + { + "BriefDescription": "ICT_NOSLOT_DISP_HELD_OTHER_CPI", + "MetricExpr": "(PM_ICT_NOSLOT_DISP_HELD - PM_ICT_NOSLOT_DISP_HELD_HB_FULL - PM_ICT_NOSLOT_DISP_HELD_SYNC - PM_ICT_NOSLOT_DISP_HELD_TBEGIN - PM_ICT_NOSLOT_DISP_HELD_ISSQ)/PM_RUN_INST_CMPL", + "MetricName": "ict_noslot_disp_held_other_cpi" + }, + { + "BriefDescription": "Dispatch held due to a synchronizing instruction at dispatch", + "MetricExpr": "PM_ICT_NOSLOT_DISP_HELD_SYNC/PM_RUN_INST_CMPL", + "MetricName": "ict_noslot_disp_held_sync_cpi" + }, + { + "BriefDescription": "the NTC instruction is being held at dispatch because it is a tbegin instruction and there is an older tbegin in the pipeline that must complete before the younger tbegin can dispatch", + "MetricExpr": "PM_ICT_NOSLOT_DISP_HELD_TBEGIN/PM_RUN_INST_CMPL", + "MetricName": "ict_noslot_disp_held_tbegin_cpi" + }, + { + "BriefDescription": "ICT_NOSLOT_IC_L2_CPI", + "MetricExpr": "(PM_ICT_NOSLOT_IC_MISS - PM_ICT_NOSLOT_IC_L3 - PM_ICT_NOSLOT_IC_L3MISS)/PM_RUN_INST_CMPL", + "MetricName": "ict_noslot_ic_l2_cpi" + }, + { + "BriefDescription": "Ict empty for this thread due to icache misses that were sourced from the local L3", + "MetricExpr": "PM_ICT_NOSLOT_IC_L3/PM_RUN_INST_CMPL", + "MetricName": "ict_noslot_ic_l3_cpi" + }, + { + "BriefDescription": "Ict empty for this thread due to icache misses that were sourced from beyond the local L3. The source could be local/remote/distant memory or another core's cache", + "MetricExpr": "PM_ICT_NOSLOT_IC_L3MISS/PM_RUN_INST_CMPL", + "MetricName": "ict_noslot_ic_l3miss_cpi" + }, + { + "BriefDescription": "Ict empty for this thread due to Icache Miss", + "MetricExpr": "PM_ICT_NOSLOT_IC_MISS/PM_RUN_INST_CMPL", + "MetricName": "ict_noslot_ic_miss_cpi" + }, + { + "BriefDescription": "Rate of IERAT reloads from L2", + "MetricExpr": "PM_IPTEG_FROM_L2 * 100 / PM_RUN_INST_CMPL", + "MetricName": "ipteg_from_l2_rate_percent" + }, + { + "BriefDescription": "Rate of IERAT reloads from L3", + "MetricExpr": "PM_IPTEG_FROM_L3 * 100 / PM_RUN_INST_CMPL", + "MetricName": "ipteg_from_l3_rate_percent" + }, + { + "BriefDescription": "Rate of IERAT reloads from local memory", + "MetricExpr": "PM_IPTEG_FROM_LL4 * 100 / PM_RUN_INST_CMPL", + "MetricName": "ipteg_from_ll4_rate_percent" + }, + { + "BriefDescription": "Rate of IERAT reloads from local memory", + "MetricExpr": "PM_IPTEG_FROM_LMEM * 100 / PM_RUN_INST_CMPL", + "MetricName": "ipteg_from_lmem_rate_percent" + }, + { + "BriefDescription": "Average number of Castout machines used. 1 of 16 CO machines is sampled every L2 cycle", + "MetricExpr": "PM_CO_USAGE / PM_RUN_CYC * 16", + "MetricName": "l2_co_usage" + }, + { + "BriefDescription": "Percent of instruction reads out of all L2 commands", + "MetricExpr": "PM_ISIDE_DISP * 100 / (PM_L2_ST + PM_L2_LD + PM_ISIDE_DISP)", + "MetricName": "l2_instr_commands_percent" + }, + { + "BriefDescription": "Percent of loads out of all L2 commands", + "MetricExpr": "PM_L2_LD * 100 / (PM_L2_ST + PM_L2_LD + PM_ISIDE_DISP)", + "MetricName": "l2_ld_commands_percent" + }, + { + "BriefDescription": "Rate of L2 store dispatches that failed per core", + "MetricExpr": "100 * (PM_L2_RCST_DISP_FAIL_ADDR + PM_L2_RCST_DISP_FAIL_OTHER)/2 / PM_RUN_INST_CMPL", + "MetricName": "l2_rc_st_disp_fail_rate_percent" + }, + { + "BriefDescription": "Average number of Read/Claim machines used. 1 of 16 RC machines is sampled every L2 cycle", + "MetricExpr": "PM_RC_USAGE / PM_RUN_CYC * 16", + "MetricName": "l2_rc_usage" + }, + { + "BriefDescription": "Average number of Snoop machines used. 1 of 8 SN machines is sampled every L2 cycle", + "MetricExpr": "PM_SN_USAGE / PM_RUN_CYC * 8", + "MetricName": "l2_sn_usage" + }, + { + "BriefDescription": "Percent of stores out of all L2 commands", + "MetricExpr": "PM_L2_ST * 100 / (PM_L2_ST + PM_L2_LD + PM_ISIDE_DISP)", + "MetricName": "l2_st_commands_percent" + }, + { + "BriefDescription": "Rate of L2 store dispatches that failed per core", + "MetricExpr": "100 * (PM_L2_RCST_DISP_FAIL_ADDR + PM_L2_RCST_DISP_FAIL_OTHER)/2 / PM_RUN_INST_CMPL", + "MetricName": "l2_st_disp_fail_rate_percent" + }, + { + "BriefDescription": "Rate of L2 dispatches per core", + "MetricExpr": "100 * PM_L2_RCST_DISP/2 / PM_RUN_INST_CMPL", + "MetricName": "l2_st_disp_rate_percent" + }, + { + "BriefDescription": "Marked L31 Load latency", + "MetricExpr": "(PM_MRK_DATA_FROM_L31_SHR_CYC + PM_MRK_DATA_FROM_L31_MOD_CYC) / (PM_MRK_DATA_FROM_L31_SHR + PM_MRK_DATA_FROM_L31_MOD)", + "MetricName": "l31_latency" + }, + { + "BriefDescription": "PCT instruction loads", + "MetricExpr": "PM_LD_REF_L1 / PM_RUN_INST_CMPL", + "MetricName": "loads_per_inst" + }, + { + "BriefDescription": "Cycles stalled by D-Cache Misses", + "MetricExpr": "PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL", + "MetricName": "lsu_stall_dcache_miss_cpi" + }, + { + "BriefDescription": "Completion stall because a different thread was using the completion pipe", + "MetricExpr": "(PM_CMPLU_STALL_THRD - PM_CMPLU_STALL_EXCEPTION - PM_CMPLU_STALL_ANY_SYNC - PM_CMPLU_STALL_SYNC_PMU_INT - PM_CMPLU_STALL_SPEC_FINISH - PM_CMPLU_STALL_FLUSH_ANY_THREAD - PM_CMPLU_STALL_LSU_FLUSH_NEXT - PM_CMPLU_STALL_NESTED_TBEGIN - PM_CMPLU_STALL_NESTED_TEND - PM_CMPLU_STALL_MTFPSCR)/PM_RUN_INST_CMPL", + "MetricName": "other_thread_cmpl_stall" + }, + { + "BriefDescription": "PCT instruction stores", + "MetricExpr": "PM_ST_FIN / PM_RUN_INST_CMPL", + "MetricName": "stores_per_inst" + }, + { + "BriefDescription": "ANY_SYNC_STALL_CPI", + "MetricExpr": "PM_CMPLU_STALL_SYNC_PMU_INT / PM_RUN_INST_CMPL", + "MetricName": "sync_pmu_int_stall_cpi" } ] -- cgit v1.2.3-55-g7522 From 39f4a913d6d439178177cae8aa2e9a232160fd51 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Mon, 4 Feb 2019 11:31:40 -0800 Subject: perf utils: Silence "Couldn't synthesize bpf events" warning for EPERM Synthesizing BPF events is only supported for root. Silent warning msg when non-root user runs perf-record. Reported-by: David Carrillo-Cisneros Signed-off-by: Song Liu Tested-by: David Carrillo-Cisneros Acked-by: Jiri Olsa Cc: kernel-team@fb.com Link: http://lkml.kernel.org/r/20190204193140.719740-1-songliubraving@fb.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/bpf-event.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c index 796ef793f4ce..62dda96b0096 100644 --- a/tools/perf/util/bpf-event.c +++ b/tools/perf/util/bpf-event.c @@ -236,8 +236,8 @@ int perf_event__synthesize_bpf_events(struct perf_tool *tool, pr_debug("%s: can't get next program: %s%s", __func__, strerror(errno), errno == EINVAL ? " -- kernel too old?" : ""); - /* don't report error on old kernel */ - err = (errno == EINVAL) ? 0 : -1; + /* don't report error on old kernel or EPERM */ + err = (errno == EINVAL || errno == EPERM) ? 0 : -1; break; } fd = bpf_prog_get_fd_by_id(id); -- cgit v1.2.3-55-g7522 From 0ec572f05721f430cf8470595da8c3d5e25567de Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 12 Feb 2019 09:56:57 -0300 Subject: tools feature: Undef _GNU_SOURCE at the end of feature tests Since we get all the tests in a single .c file for a first test, tools/build/feature/test-all.c, if individual tests set that define and fail to undef it at its end, then it the test-all.c build will fail due to defining _GNU_SOURCE multiple times, getting us to the slow path, so undef it at the end in tests that define it. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-w6s00jfo1xabgphzczadl59b@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/feature/test-get_current_dir_name.c | 1 + tools/build/feature/test-libpython.c | 1 + tools/build/feature/test-setns.c | 1 + 3 files changed, 3 insertions(+) diff --git a/tools/build/feature/test-get_current_dir_name.c b/tools/build/feature/test-get_current_dir_name.c index 573000f93212..c3c201691b4f 100644 --- a/tools/build/feature/test-get_current_dir_name.c +++ b/tools/build/feature/test-get_current_dir_name.c @@ -8,3 +8,4 @@ int main(void) free(get_current_dir_name()); return 0; } +#undef _GNU_SOURCE diff --git a/tools/build/feature/test-libpython.c b/tools/build/feature/test-libpython.c index 0c1641b0d9a7..371c9113e49d 100644 --- a/tools/build/feature/test-libpython.c +++ b/tools/build/feature/test-libpython.c @@ -7,3 +7,4 @@ int main(void) return 0; } +#undef _GNU_SOURCE diff --git a/tools/build/feature/test-setns.c b/tools/build/feature/test-setns.c index 1f714d2a658b..4a1581ae7a55 100644 --- a/tools/build/feature/test-setns.c +++ b/tools/build/feature/test-setns.c @@ -5,3 +5,4 @@ int main(void) { return setns(0, 0); } +#undef _GNU_SOURCE -- cgit v1.2.3-55-g7522 From e1be4a5c03e1b282633e5528ea634c1cc2095c43 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 12 Feb 2019 10:12:28 -0300 Subject: perf beauty ioctl cmd: The 'fd' arg is signed It is possible to pass a negative number as the fd and that has to be handled, so stop using 'unsigned int fd' in the ioctl syscall 'cmd' beautifier. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Luis Cláudio Gonçalves Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-b7qwa0l19dswa09h3s41akfu@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/trace/beauty/ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/trace/beauty/ioctl.c b/tools/perf/trace/beauty/ioctl.c index 620350d41209..52242fa4072b 100644 --- a/tools/perf/trace/beauty/ioctl.c +++ b/tools/perf/trace/beauty/ioctl.c @@ -175,7 +175,7 @@ static size_t ioctl__scnprintf_cmd(unsigned long cmd, char *bf, size_t size, boo size_t syscall_arg__scnprintf_ioctl_cmd(char *bf, size_t size, struct syscall_arg *arg) { unsigned long cmd = arg->val; - unsigned int fd = syscall_arg__val(arg, 0); + int fd = syscall_arg__val(arg, 0); struct file *file = thread__files_entry(arg->thread, fd); if (file != NULL) { -- cgit v1.2.3-55-g7522 From 051074867434cc520c08f188479d4757dcfdaef8 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 12 Feb 2019 10:18:36 -0300 Subject: perf trace: Check if the 'fd' is negative when mapping it to pathname We were crashing when processing a negative fd: Program received signal SIGSEGV, Segmentation fault. 0x0000000000609bbf in syscall_arg__scnprintf_ioctl_cmd (bf=0x1172eca "", size=2038, arg=0x7fffffff8360) at trace/beauty/ioctl.c:182 182 if (file->dev_maj == USB_DEVICE_MAJOR) Missing separate debuginfos, use: dnf debuginfo-install bzip2-libs-1.0.6-28.fc29.x86_64 elfutils-libelf-0.174-5.fc29.x86_64 elfutils-libs-0.174-5.fc29.x86_64 glib2-2.58.3-1.fc29.x86_64 libbabeltrace-1.5.6-1.fc29.x86_64 libunwind-1.2.1-6.fc29.x86_64 libuuid-2.32.1-1.fc29.x86_64 libxcrypt-4.4.3-2.fc29.x86_64 numactl-libs-2.0.12-1.fc29.x86_64 openssl-libs-1.1.1a-1.fc29.x86_64 pcre-8.42-6.fc29.x86_64 perl-libs-5.28.1-427.fc29.x86_64 popt-1.16-15.fc29.x86_64 python2-libs-2.7.15-11.fc29.x86_64 slang-2.3.2-4.fc29.x86_64 xz-libs-5.2.4-3.fc29.x86_64 (gdb) bt #0 0x0000000000609bbf in syscall_arg__scnprintf_ioctl_cmd (bf=0x1172eca "", size=2038, arg=0x7fffffff8360) at trace/beauty/ioctl.c:182 #1 0x000000000048e295 in syscall__scnprintf_val (sc=0x123b500, bf=0x1172eca "", size=2038, arg=0x7fffffff8360, val=21519) at builtin-trace.c:1594 #2 0x000000000048e60d in syscall__scnprintf_args (sc=0x123b500, bf=0x1172ec6 "-1, ", size=2042, args=0x7ffff6a7c034 "\377\377\377\377", augmented_args=0x7ffff6a7c064, augmented_args_size=4, trace=0x7fffffffa8d0, thread=0x1175cd0) at builtin-trace.c:1661 #3 0x000000000048f04e in trace__sys_enter (trace=0x7fffffffa8d0, evsel=0xb260b0, event=0x7ffff6a7bfe8, sample=0x7fffffff84f0) at builtin-trace.c:1880 #4 0x00000000004915a4 in trace__handle_event (trace=0x7fffffffa8d0, event=0x7ffff6a7bfe8, sample=0x7fffffff84f0) at builtin-trace.c:2590 #5 0x0000000000491eed in __trace__deliver_event (trace=0x7fffffffa8d0, event=0x7ffff6a7bfe8) at builtin-trace.c:2818 #6 0x0000000000492030 in trace__deliver_event (trace=0x7fffffffa8d0, event=0x7ffff6a7bfe8) at builtin-trace.c:2845 #7 0x0000000000492896 in trace__run (trace=0x7fffffffa8d0, argc=0, argv=0x7fffffffdb58) at builtin-trace.c:3040 #8 0x000000000049603a in cmd_trace (argc=0, argv=0x7fffffffdb58) at builtin-trace.c:3952 #9 0x00000000004d5103 in main (argc=1, argv=0x7fffffffdb58) at perf.c:474 (gdb) p fd $1 = -1 (gdb) p file $7 = (struct file *) 0xfffffffffffffff0 (gdb) p ((struct thread_trace *)arg->thread)->files.table + fd $8 = (struct file *) 0xfffffffffffffff0 (gdb) Check for that and return NULL instead. This problem was introduced recently, the other codepaths leading to thread_trace__files_entry() check for negative fds, like thread__fd_path(), but we need to do it at thread_trace__files_entry() as more users are now calling it directly. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Luis Cláudio Gonçalves Cc: Namhyung Kim Cc: Wang Nan Fixes: 2d473389f87a ("perf trace beauty: Export function to get the files for a thread") Link: https://lkml.kernel.org/n/tip-oq7bvaaf07gsd4yqty3107u2@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index b4e420c41831..a9b51f59ab54 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1041,6 +1041,9 @@ static const size_t trace__entry_str_size = 2048; static struct file *thread_trace__files_entry(struct thread_trace *ttrace, int fd) { + if (fd < 0) + return NULL; + if (fd > ttrace->files.max) { struct file *nfiles = realloc(ttrace->files.table, (fd + 1) * sizeof(struct file)); -- cgit v1.2.3-55-g7522 From 1da7e0022784b0e05b49bf73521fa2cc4633af85 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 12 Feb 2019 10:51:31 -0300 Subject: perf beauty waitid options: Fix up prefix showing logic When introducing the possibility for selecting if the common prefix to options such as the waitid ones, i.e. all 'waitid' options start with 'W', so, to make it make it more compact if configured to suppress it, 'perf trace' will do so, other examples include mmap's PROT_ prefix for its 'prot' argument, etc, which, when showing the syscall argument name ends up producing duplicated info that clutters the screen, i.e.: # perf trace -e mmap --max-events 2 sleep 1 0.000 ( 0.014 ms): sleep/20886 mmap(len: 112595, prot: PROT_READ, flags: MAP_PRIVATE, fd: 3) = 0x7f3e986d2000 0.041 ( 0.005 ms): sleep/20886 mmap(len: 8192, prot: PROT_READ|PROT_WRITE, flags: MAP_PRIVATE|MAP_ANONYMOUS) = 0x7f3e986d0000 # So it is possible to suppress that and make it more compact by having this in your ~/.perfconfig: # cat ~/.perfconfig [trace] show_prefix = no # # perf trace -e mmap --max-events 2 sleep 1 0.000 ( 0.014 ms): sleep/8009 mmap(len: 112595, prot: READ, flags: PRIVATE, fd: 3) = 0x7ff2373de000 0.040 ( 0.005 ms): sleep/8009 mmap(len: 8192, prot: READ|WRITE, flags: PRIVATE|ANONYMOUS) = 0x7ff2373dc000 # To have it look more like strace's output, we instead want to suppress the arg name and show the prefix, so use: # cat ~/.perfconfig [trace] show_prefix = yes show_arg_names = no # # perf trace -e mmap --max-events 2 sleep 1 0.000 ( 0.006 ms): sleep/15513 mmap(NULL, 112595, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f7a9b6d3000 0.020 ( 0.002 ms): sleep/15513 mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS) = 0x7f7a9b6d1000 # When this logic was introduced a bug came with it when processing the waitid 'option' arg that ended up expecting 3 strings when just two were being provided, fix it. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Luis Cláudio Gonçalves Cc: Namhyung Kim Cc: Wang Nan Fixes: c65c83ffe904 ("perf trace: Allow asking for not suppressing common string prefixes") Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/trace/beauty/waitid_options.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/trace/beauty/waitid_options.c b/tools/perf/trace/beauty/waitid_options.c index 6897fab40dcc..d4d10b33ba0e 100644 --- a/tools/perf/trace/beauty/waitid_options.c +++ b/tools/perf/trace/beauty/waitid_options.c @@ -11,7 +11,7 @@ static size_t syscall_arg__scnprintf_waitid_options(char *bf, size_t size, #define P_OPTION(n) \ if (options & W##n) { \ - printed += scnprintf(bf + printed, size - printed, "%s%s%s", printed ? "|" : "", show_prefix ? prefix : #n); \ + printed += scnprintf(bf + printed, size - printed, "%s%s%s", printed ? "|" : "", show_prefix ? prefix : "", #n); \ options &= ~W##n; \ } -- cgit v1.2.3-55-g7522 From aa8f9c517ebce7a0959da064ef2660ea03f133f8 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 12 Feb 2019 11:20:56 -0300 Subject: tools build: Add -lrt to FEATURE_CHECK_LDFLAGS-libaio Since we need it to resolve the AIO symbols, otherwise we fail with: $ cat /tmp/build/perf/feature/test-all.make.output /usr/bin/ld: /tmp/ccEqrj36.o: undefined reference to symbol 'aio_return64@@GLIBC_2.2.5' /usr/bin/ld: //usr/lib64/librt.so.1: error adding symbols: DSO missing from command line collect2: error: ld returned 1 exit status $ When we added the aio support in 'perf record' only the test-libaio.bin target got the -lrt, i.e. the feature detection slow path. Fix it. Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexey Budankov Cc: Andi Kleen Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Fixes: 2a07d814747b ("tools build feature: Check if libaio is available") Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index b441c88cafa1..e6360d47e73a 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -218,6 +218,8 @@ FEATURE_CHECK_LDFLAGS-libpython := $(PYTHON_EMBED_LDOPTS) FEATURE_CHECK_CFLAGS-libpython-version := $(PYTHON_EMBED_CCOPTS) FEATURE_CHECK_LDFLAGS-libpython-version := $(PYTHON_EMBED_LDOPTS) +FEATURE_CHECK_LDFLAGS-libaio = -lrt + CFLAGS += -fno-omit-frame-pointer CFLAGS += -ggdb3 CFLAGS += -funwind-tables -- cgit v1.2.3-55-g7522 From ca2da70c411c4022a479d36063f0d0a862ea636e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 12 Feb 2019 13:51:49 -0300 Subject: perf trace: Filter out gnome-terminal* parent Just like it does with 'sshd', to reduce the feedback loop when doing system wide tracing on on a gnome GUI. Need to figure out how to auto-filter the calls to other UI components tho. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Luis Cláudio Gonçalves Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-rjopq5y92itgokppdhe8sc6z@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index a9b51f59ab54..68a01e624ad3 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2771,7 +2771,8 @@ static int trace__set_filter_loop_pids(struct trace *trace) if (parent == NULL) break; - if (!strcmp(thread__comm_str(parent), "sshd")) { + if (!strcmp(thread__comm_str(parent), "sshd") || + strstarts(thread__comm_str(parent), "gnome-terminal")) { pids[nr++] = parent->tid; break; } -- cgit v1.2.3-55-g7522 From 1c3b28fd7ae80c8f6bf1a09e1848e20a953c9ce4 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 12 Feb 2019 14:37:15 -0300 Subject: perf coresight: Do not test for libopencsd by default Since it is not yet that generally available, avoid testing for the presence of libcoresight in the fast path test-all.bin feature test. # dnf search opencsd No matches found. # dnf search OpenCSD No matches found. # cat /etc/fedora-release Fedora release 29 (Twenty Nine) # I.e. right now, in my system test-all.bin is failing all the time since Fedora29 doesn't have libopencsd available: $ cat /tmp/build/perf/feature/test-all.make.output In file included from test-all.c:174: test-libopencsd.c:2:10: fatal error: opencsd/c_api/opencsd_c_api.h: No such file or directory #include ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ compilation terminated. See: 6ab2b762befd ("perf build: Disable libbabeltrace check by default") For the rationale, as soon as libopencsd becomes more generally packaged and available, we do the same thing we did with babeltrace, enabling it by default, as done in: 24787afbcd01 ("perf tools: Enable LIBBABELTRACE by default") For now, to explicitely ask for opencsd, make sure you have it installed and use: make -C tools/perf CORESIGHT=1 The feature test output will be there as an empty file: $ ls -la /tmp/build/perf/feature/test-libopencsd.make.output Because the binary used for the feature check was successfully built: $ ls -la /tmp/build/perf/feature/test-libopencsd.bin -rwxrwxr-x. 1 acme acme 18336 Feb 12 14:49 /tmp/build/perf/feature/test-libopencsd.bin $ ldd /tmp/build/perf/feature/test-libopencsd.bin linux-vdso.so.1 (0x00007fffe18cc000) libopencsd_c_api.so.0 => /lib64/libopencsd_c_api.so.0 (0x00007fb8e67f6000) libopencsd.so.0 => /lib64/libopencsd.so.0 (0x00007fb8e676f000) libc.so.6 => /lib64/libc.so.6 (0x00007fb8e65a9000) libstdc++.so.6 => /lib64/libstdc++.so.6 (0x00007fb8e6411000) libm.so.6 => /lib64/libm.so.6 (0x00007fb8e628d000) libgcc_s.so.1 => /lib64/libgcc_s.so.1 (0x00007fb8e6272000) /lib64/ld-linux-x86-64.so.2 (0x00007fb8e6828000) $ And the resulting perf binary will be linked with it: -rw-rw-r--. 1 acme acme 0 Feb 12 14:49 /tmp/build/perf/feature/test-libopencsd.make.output $ ldd ~/bin/perf | grep opencsd libopencsd_c_api.so.0 => /lib64/libopencsd_c_api.so.0 (0x00007fd43097f000) libopencsd.so.0 => /lib64/libopencsd.so.0 (0x00007fd4308f8000) $ To make sure this gets built before pushing things upstream I have a ubuntu:19.04-x-arm64 container that has: [root@quaco x-arm64]# grep CORESIGHT Dockerfile ENV EXTRA_MAKE_ARGS=CORESIGHT=1 [root@quaco x-arm64]# So that I always build with libopencsd before pushing things upstream. Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Kim Phillips Cc: linux-arm-kernel@lists.infradead.org Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Suzuki Poulouse Link: https://lkml.kernel.org/n/tip-20vyy39jw9jgrijesi30fgox@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/Makefile.feature | 2 +- tools/build/feature/test-all.c | 5 ----- tools/perf/Makefile.config | 3 ++- tools/perf/Makefile.perf | 2 +- 4 files changed, 4 insertions(+), 8 deletions(-) diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index 5467c6bf9ceb..bb9dca65eb5f 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -70,7 +70,6 @@ FEATURE_TESTS_BASIC := \ sched_getcpu \ sdt \ setns \ - libopencsd \ libaio # FEATURE_TESTS_BASIC + FEATURE_TESTS_EXTRA is the complete list @@ -84,6 +83,7 @@ FEATURE_TESTS_EXTRA := \ libbabeltrace \ libbfd-liberty \ libbfd-liberty-z \ + libopencsd \ libunwind-debug-frame \ libunwind-debug-frame-arm \ libunwind-debug-frame-aarch64 \ diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c index 20cdaa4fc112..74329957553a 100644 --- a/tools/build/feature/test-all.c +++ b/tools/build/feature/test-all.c @@ -170,10 +170,6 @@ # include "test-setns.c" #undef main -#define main main_test_libopencsd -# include "test-libopencsd.c" -#undef main - #define main main_test_libaio # include "test-libaio.c" #undef main @@ -217,7 +213,6 @@ int main(int argc, char *argv[]) main_test_sched_getcpu(); main_test_sdt(); main_test_setns(); - main_test_libopencsd(); main_test_libaio(); return 0; diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index e6360d47e73a..cf4a8329c4c0 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -388,7 +388,8 @@ ifeq ($(feature-setns), 1) $(call detected,CONFIG_SETNS) endif -ifndef NO_CORESIGHT +ifdef CORESIGHT + $(call feature_check,libopencsd) ifeq ($(feature-libopencsd), 1) CFLAGS += -DHAVE_CSTRACE_SUPPORT $(LIBOPENCSD_CFLAGS) LDFLAGS += $(LIBOPENCSD_LDFLAGS) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 09df1c8a4ec9..c2ccc54618d1 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -102,7 +102,7 @@ include ../scripts/utilities.mak # When selected, pass LLVM_CONFIG=/path/to/llvm-config to `make' if # llvm-config is not in $PATH. # -# Define NO_CORESIGHT if you do not want support for CoreSight trace decoding. +# Define CORESIGHT if you DO WANT support for CoreSight trace decoding. # # Define NO_AIO if you do not want support of Posix AIO based trace # streaming for record mode. Currently Posix AIO trace streaming is -- cgit v1.2.3-55-g7522 From 5c4d7c82c0dceccfdcf37062bd100322a69bd160 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 12 Feb 2019 16:34:32 -0300 Subject: perf unwind: Do not put libunwind-{x86,aarch64} in FEATURE_TESTS_BASIC As it is not normally available on x86_64 not being tested on test-all.c but being in FEATURE_TESTS_BASIC ends up implying that those features are present, which leads to trying to link with those libraries and a build failure now that test-all.c is finally again building successfully: /usr/bin/ld: cannot find -lunwind-x86 /usr/bin/ld: cannot find -lunwind-aarch64 collect2: error: ld returned 1 exit status make[3]: *** [Makefile:199: /tmp/build/perf/plugin_jbd2.so] Error 1 make[3]: *** Waiting for unfinished jobs.... /usr/bin/ld: cannot find -lunwind-x86 /usr/bin/ld: cannot find -lunwind-aarch64 So remove those features from there and explicitely test them. And then move this patch to just before the last one that allows this to be exposed, so that we keep the tree bisectable. With all this in place we get, at this point: $ ldd /tmp/build/perf/feature/test-libunwind.bin linux-vdso.so.1 (0x00007fffa09c6000) libunwind-x86_64.so.8 => /lib64/libunwind-x86_64.so.8 (0x00007fbcf4451000) libunwind.so.8 => /lib64/libunwind.so.8 (0x00007fbcf4435000) liblzma.so.5 => /lib64/liblzma.so.5 (0x00007fbcf440c000) libelf.so.1 => /lib64/libelf.so.1 (0x00007fbcf43f2000) libc.so.6 => /lib64/libc.so.6 (0x00007fbcf422c000) libgcc_s.so.1 => /lib64/libgcc_s.so.1 (0x00007fbcf4211000) /lib64/ld-linux-x86-64.so.2 (0x00007fbcf4491000) libpthread.so.0 => /lib64/libpthread.so.0 (0x00007fbcf41ed000) libz.so.1 => /lib64/libz.so.1 (0x00007fbcf41d3000) $ cat /tmp/build/perf/feature/test-libunwind-x86.make.output test-libunwind-x86.c:2:10: fatal error: libunwind-x86.h: No such file or directory #include ^~~~~~~~~~~~~~~~~ compilation terminated. $ cat /tmp/build/perf/feature/test-libunwind-aarch64.make.output test-libunwind-aarch64.c:2:10: fatal error: libunwind-aarch64.h: No such file or directory #include ^~~~~~~~~~~~~~~~~~~~~ compilation terminated. $ $ ldd ~/bin/perf | grep unwind libunwind-x86_64.so.8 => /lib64/libunwind-x86_64.so.8 (0x00007f5ceb24b000) libunwind.so.8 => /lib64/libunwind.so.8 (0x00007f5ceb22f000) $ Cc: Adrian Hunter Cc: He Kuang Cc: Jean Pihet Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Cc: Will Deacon Link: https://lkml.kernel.org/n/tip-vs6kwqsvwk7oxhs6z9mq87pp@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/Makefile.feature | 8 ++++---- tools/perf/Makefile.config | 7 +++++++ 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index bb9dca65eb5f..61e46d54a67c 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -53,10 +53,6 @@ FEATURE_TESTS_BASIC := \ libslang \ libcrypto \ libunwind \ - libunwind-x86 \ - libunwind-x86_64 \ - libunwind-arm \ - libunwind-aarch64 \ pthread-attr-setaffinity-np \ pthread-barrier \ reallocarray \ @@ -84,6 +80,10 @@ FEATURE_TESTS_EXTRA := \ libbfd-liberty \ libbfd-liberty-z \ libopencsd \ + libunwind-x86 \ + libunwind-x86_64 \ + libunwind-arm \ + libunwind-aarch64 \ libunwind-debug-frame \ libunwind-debug-frame-arm \ libunwind-debug-frame-aarch64 \ diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index cf4a8329c4c0..4b735042cad7 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -109,6 +109,11 @@ FEATURE_CHECK_LDFLAGS-libunwind = $(LIBUNWIND_LDFLAGS) $(LIBUNWIND_LIBS) FEATURE_CHECK_CFLAGS-libunwind-debug-frame = $(LIBUNWIND_CFLAGS) FEATURE_CHECK_LDFLAGS-libunwind-debug-frame = $(LIBUNWIND_LDFLAGS) $(LIBUNWIND_LIBS) +FEATURE_CHECK_LDFLAGS-libunwind-arm = -lunwind -lunwind-arm +FEATURE_CHECK_LDFLAGS-libunwind-aarch64 = -lunwind -lunwind-aarch64 +FEATURE_CHECK_LDFLAGS-libunwind-x86 = -lunwind -llzma -lunwind-x86 +FEATURE_CHECK_LDFLAGS-libunwind-x86_64 = -lunwind -llzma -lunwind-x86_64 + ifdef CSINCLUDES LIBOPENCSD_CFLAGS := -I$(CSINCLUDES) endif @@ -485,6 +490,7 @@ endif ifndef NO_LIBUNWIND have_libunwind := + $(call feature_check,libunwind-x86) ifeq ($(feature-libunwind-x86), 1) $(call detected,CONFIG_LIBUNWIND_X86) CFLAGS += -DHAVE_LIBUNWIND_X86_SUPPORT @@ -493,6 +499,7 @@ ifndef NO_LIBUNWIND have_libunwind = 1 endif + $(call feature_check,libunwind-aarch64) ifeq ($(feature-libunwind-aarch64), 1) $(call detected,CONFIG_LIBUNWIND_AARCH64) CFLAGS += -DHAVE_LIBUNWIND_AARCH64_SUPPORT -- cgit v1.2.3-55-g7522 From a96c03e8cdcf123384319f312d0a08a7a760bb35 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 14 Feb 2019 12:01:04 -0300 Subject: tools build: Add test-reallocarray.c to test-all.c to fix the build When a test is in the FEATURE_TESTS_BASIC list in tools/build/Makefile.feature must be added to tools/build/feature/test-all.c, because the successfull compilation and linking of that test-all.bin file means that all the features listed in FEATURE_TESTS_BASIC are present in the system, so we don't have to go on feature by feature test building them. Since reallocarray() is expected to be present in modern systems, it has a place in FEATURE_TESTS_BASIC, so that we speed up the build process building just that file. For older systems, such as ubuntu:16.04 (build failure reported by Jin Yao) debian:8, and for the current flagship RHEL distro, RHEL7, the build will fail as test-all.bin (without test-reallocarray.c included) passes but reallocarray() isn't present, making the build fail with: CC /tmp/build/perf/libbpf.o MKDIR /tmp/build/perf/fs/ CC /tmp/build/perf/fs/tracing_path.o LD /tmp/build/perf/fd/libapi-in.o CC /tmp/build/perf/bpf.o libbpf.c: In function 'bpf_object__add_program': libbpf.c:367:10: error: implicit declaration of function 'reallocarray' [-Werror=implicit-function-declaration] progs = reallocarray(progs, nr_progs + 1, sizeof(progs[0])); ^ libbpf.c:367:2: error: nested extern declaration of 'reallocarray' [-Werror=nested-externs] progs = reallocarray(progs, nr_progs + 1, sizeof(progs[0])); ^ libbpf.c:367:8: error: assignment makes pointer from integer without a cast [-Werror=int-conversion] progs = reallocarray(progs, nr_progs + 1, sizeof(progs[0])); ^ libbpf.c: In function 'bpf_object__elf_collect': libbpf.c:887:10: error: assignment makes pointer from integer without a cast [-Werror=int-conversion] reloc = reallocarray(reloc, nr_reloc, ^ libbpf.c: In function 'bpf_program__reloc_text': libbpf.c:1394:12: error: assignment makes pointer from integer without a cast [-Werror=int-conversion] new_insn = reallocarray(prog->insns, new_cnt, sizeof(*insn)); ^ CC /tmp/build/perf/nlattr.o Even with: $ grep reallocarray /tmp/build/perf/FEATURE-DUMP feature-reallocarray=1 $ Which ubuntu:16.04.5 LTS doesn't have: perfbuilder@38a153a1bba8:/$ head -2 /etc/os-release NAME="Ubuntu" VERSION="16.04.5 LTS (Xenial Xerus)" perfbuilder@38a153a1bba8:/$ find /usr/include/ -name "*.h" | xargs grep -w reallocarray perfbuilder@38a153a1bba8:/$ Fix it by including it to test-all.c, which ends up forcing the individual tests to be triggered and for the build process to notice that indeed reallocarray() is not there: perfbuilder@38a153a1bba8:/$ cat /tmp/build/perf/feature/test-all.make.output In file included from test-all.c:178:0: test-reallocarray.c: In function 'main_test_reallocarray': test-reallocarray.c:7:11: error: implicit declaration of function 'reallocarray' [-Werror=implicit-function-declaration] return !!reallocarray(NULL, 1, 1); ^ cc1: all warnings being treated as errors perfbuilder@38a153a1bba8:/$ That is the only test that is failing on Ubuntu 16.03.5 LTS, so all tests are forced: perfbuilder@38a153a1bba8:/tmp/build/perf/feature$ ls -lSr *.make.output -rw-r--r--. 1 perfbuilder perfbuilder 0 Feb 14 15:00 test-dwarf.make.output -rw-r--r--. 1 perfbuilder perfbuilder 0 Feb 14 14:16 test-cplus-demangle.make.output -rw-r--r--. 1 perfbuilder perfbuilder 0 Feb 14 15:00 test-bpf.make.output -rw-r--r--. 1 perfbuilder perfbuilder 0 Feb 14 15:00 test-backtrace.make.output -rw-r--r--. 1 perfbuilder perfbuilder 104 Feb 14 15:00 test-bionic.make.output -rw-r--r--. 1 perfbuilder perfbuilder 107 Feb 14 15:00 test-libunwind-x86.make.output -rw-r--r--. 1 perfbuilder perfbuilder 115 Feb 14 15:00 test-libunwind-aarch64.make.output -rw-r--r--. 1 perfbuilder perfbuilder 122 Feb 14 15:00 test-libbabeltrace.make.output -rw-r--r--. 1 perfbuilder perfbuilder 254 Feb 14 15:00 test-reallocarray.make.output -rw-r--r--. 1 perfbuilder perfbuilder 312 Feb 14 15:00 test-all.make.output perfbuilder@38a153a1bba8:/tmp/build/perf/feature$ And that reallocarray() one shows: perfbuilder@38a153a1bba8:/tmp/build/perf/feature$ cat test-reallocarray.make.output test-reallocarray.c: In function 'main': test-reallocarray.c:7:11: error: implicit declaration of function 'reallocarray' [-Werror=implicit-function-declaration] return !!reallocarray(NULL, 1, 1); ^ cc1: all warnings being treated as errors perfbuilder@38a153a1bba8:/tmp/build/perf/feature$ Which now generates the expected result: perfbuilder@38a153a1bba8:~$ grep reallocarray /tmp/build/perf/FEATURE-DUMP feature-reallocarray=0 perfbuilder@38a153a1bba8:~$ The fallback mechanism kicks in and libbpf and perf are again buildable in systems without reallocarray(): $ cat tools/include/tools/libc_compat.h // SPDX-License-Identifier: (LGPL-2.0+ OR BSD-2-Clause) /* Copyright (C) 2018 Netronome Systems, Inc. */ #ifndef __TOOLS_LIBC_COMPAT_H #define __TOOLS_LIBC_COMPAT_H #include #include #ifdef COMPAT_NEED_REALLOCARRAY static inline void *reallocarray(void *ptr, size_t nmemb, size_t size) { size_t bytes; if (unlikely(check_mul_overflow(nmemb, size, &bytes))) return NULL; return realloc(ptr, bytes); } #endif #endif $ Reported-by: Jin Yao Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexei Starovoitov Cc: Andrii Nakryiko Cc: Daniel Borkmann Cc: Jakub Kicinski Cc: Namhyung Kim Cc: Song Liu Cc: Yonghong Song Fixes: 531b014e7a2f ("tools: bpf: make use of reallocarray") Link: https://lkml.kernel.org/n/tip-aonqku8axii8rxki5g11w40b@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/feature/test-all.c | 5 +++++ tools/build/feature/test-reallocarray.c | 2 ++ 2 files changed, 7 insertions(+) diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c index 74329957553a..e903b86b742f 100644 --- a/tools/build/feature/test-all.c +++ b/tools/build/feature/test-all.c @@ -174,6 +174,10 @@ # include "test-libaio.c" #undef main +#define main main_test_reallocarray +# include "test-reallocarray.c" +#undef main + int main(int argc, char *argv[]) { main_test_libpython(); @@ -214,6 +218,7 @@ int main(int argc, char *argv[]) main_test_sdt(); main_test_setns(); main_test_libaio(); + main_test_reallocarray(); return 0; } diff --git a/tools/build/feature/test-reallocarray.c b/tools/build/feature/test-reallocarray.c index 8170de35150d..8f6743e31da7 100644 --- a/tools/build/feature/test-reallocarray.c +++ b/tools/build/feature/test-reallocarray.c @@ -6,3 +6,5 @@ int main(void) { return !!reallocarray(NULL, 1, 1); } + +#undef _GNU_SOURCE -- cgit v1.2.3-55-g7522 From 271402a3e97e3b9a4ce1e322a5f66c493122e1ec Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 12 Feb 2019 16:19:45 -0300 Subject: perf build: Add missing FEATURE_CHECK_LDFLAGS-libcrypto When the libcrypto feature test was added we forgot to add its FEATURE_CHECK_LDFLAGS pointing to the library needed to link with the test-all.bin feature test fast path binary, so even when it was introduced we got this: $ cat /tmp/build/perf/feature/test-all.make.output /usr/bin/ld: /tmp/ccjKeJJU.o: in function `main_test_libcrypto': /home/acme/git/perf/tools/build/feature/test-libcrypto.c:10: undefined reference to `MD5_Init' /usr/bin/ld: /home/acme/git/perf/tools/build/feature/test-libcrypto.c:11: undefined reference to `MD5_Update' /usr/bin/ld: /home/acme/git/perf/tools/build/feature/test-libcrypto.c:12: undefined reference to `MD5_Final' /usr/bin/ld: /home/acme/git/perf/tools/build/feature/test-libcrypto.c:14: undefined reference to `SHA1' collect2: error: ld returned 1 exit status $ cat /tmp/build/perf/feature/test-libcrypto. test-libcrypto.bin test-libcrypto.d test-libcrypto.make.output $ cat /tmp/build/perf/feature/test-libcrypto.make.output $ Fix it, so that we keep the fast path, which, at this point, will fail with the unwind-ARCH feature tests, that will be fixed in a followup patch: $ make -C tools/perf O=/tmp/build/perf ... libcrypto: [ on ] $ cat /tmp/build/perf/feature/test-all.make.output $ ldd /tmp/build/perf/feature/test-all.bin | grep libcrypto libcrypto.so.1.1 => /lib64/libcrypto.so.1.1 (0x00007f9892805000) $ $ grep libcrypto /tmp/build/perf/FEATURE-DUMP feature-libcrypto=1 $ With the unwind-ARCH tests fixed, we now finally manage to get test-all.bin built and linked with the features it tests, among them the ones fixed in this patchkit: $ ldd /tmp/build/perf/feature/test-all.bin | egrep 'unwind|crypto' libcrypto.so.1.1 => /lib64/libcrypto.so.1.1 (0x00007f95cf2b8000) libunwind-x86_64.so.8 => /lib64/libunwind-x86_64.so.8 (0x00007f95cf294000) libunwind.so.8 => /lib64/libunwind.so.8 (0x00007f95cf278000) $ Cc: Adrian Hunter Cc: Andi Kleen Cc: Carl Love Cc: David Ahern Cc: Jiri Olsa Cc: John McCutchan Cc: Namhyung Kim Cc: Pawel Moll Cc: Peter Zijlstra Cc: Sonny Rao Cc: Stephane Eranian Cc: Sukadev Bhattiprolu Fixes: 8ee4646038e4 ("perf build: Add libcrypto feature detection") Link: https://lkml.kernel.org/n/tip-rexc248jorf5b4l3qjn888cz@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 4b735042cad7..0f11d5891301 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -114,6 +114,8 @@ FEATURE_CHECK_LDFLAGS-libunwind-aarch64 = -lunwind -lunwind-aarch64 FEATURE_CHECK_LDFLAGS-libunwind-x86 = -lunwind -llzma -lunwind-x86 FEATURE_CHECK_LDFLAGS-libunwind-x86_64 = -lunwind -llzma -lunwind-x86_64 +FEATURE_CHECK_LDFLAGS-libcrypto = -lcrypto + ifdef CSINCLUDES LIBOPENCSD_CFLAGS := -I$(CSINCLUDES) endif -- cgit v1.2.3-55-g7522 From b611f63bb1b7ba20e87d9d0d7af88d247bcf98eb Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Tue, 12 Feb 2019 10:16:06 -0700 Subject: perf cs-etm: Remove unused structure field "state" Field "state" in structure cs_etm_queue is no longer used and needs to be removed. Signed-off-by: Mathieu Poirier Cc: Jiri Olsa Cc: Leo Yan Cc: Peter Zijlstra Cc: Suzuki K Poulouse Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/20190212171618.25355-2-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cs-etm.c | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 0b11d653cfbe..ebd68eb43da9 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -65,7 +65,6 @@ struct cs_etm_queue { struct thread *thread; struct cs_etm_decoder *decoder; struct auxtrace_buffer *buffer; - const struct cs_etm_state *state; union perf_event *event_buf; unsigned int queue_nr; pid_t pid, tid; -- cgit v1.2.3-55-g7522 From fc7ac4138cf5ab76850bca2b9a8f43449c762a37 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Tue, 12 Feb 2019 10:16:07 -0700 Subject: perf cs-etm: Remove unused structure field "time" and "timestamp" Field "time" and "timestamp" in structure cs_etm_queue are no longer used and need to be removed. Signed-off-by: Mathieu Poirier Cc: Jiri Olsa Cc: Leo Yan Cc: Peter Zijlstra Cc: Suzuki K Poulouse Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/20190212171618.25355-3-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cs-etm.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index ebd68eb43da9..1d9419a0cf0c 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -69,8 +69,6 @@ struct cs_etm_queue { unsigned int queue_nr; pid_t pid, tid; int cpu; - u64 time; - u64 timestamp; u64 offset; u64 period_instructions; struct branch_stack *last_branch; @@ -82,7 +80,7 @@ struct cs_etm_queue { static int cs_etm__update_queues(struct cs_etm_auxtrace *etm); static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, - pid_t tid, u64 time_); + pid_t tid); /* PTMs ETMIDR [11:8] set to b0011 */ #define ETMIDR_PTM_VERSION 0x00000300 @@ -234,7 +232,7 @@ static int cs_etm__flush_events(struct perf_session *session, if (ret < 0) return ret; - return cs_etm__process_timeless_queues(etm, -1, MAX_TIMESTAMP - 1); + return cs_etm__process_timeless_queues(etm, -1); } static void cs_etm__free_queue(void *priv) @@ -1583,7 +1581,7 @@ static int cs_etm__run_decoder(struct cs_etm_queue *etmq) } static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, - pid_t tid, u64 time_) + pid_t tid) { unsigned int i; struct auxtrace_queues *queues = &etm->queues; @@ -1593,7 +1591,6 @@ static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, struct cs_etm_queue *etmq = queue->priv; if (etmq && ((tid == -1) || (etmq->tid == tid))) { - etmq->time = time_; cs_etm__set_pid_tid_cpu(etm, queue); cs_etm__run_decoder(etmq); } @@ -1637,8 +1634,7 @@ static int cs_etm__process_event(struct perf_session *session, if (event->header.type == PERF_RECORD_EXIT) return cs_etm__process_timeless_queues(etm, - event->fork.tid, - sample->time); + event->fork.tid); return 0; } -- cgit v1.2.3-55-g7522 From d3267ad43dd8b3778f9b3deb9a3946c844bdc9dc Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Tue, 12 Feb 2019 10:16:08 -0700 Subject: perf cs-etm: Fix wrong return values in error path Function cs_etm__mem_access() is supposed to return a u32 but the error path returns negative values at a couple of places, something that really throws off the clients using it. Fix the situation by return '0'. Signed-off-by: Mathieu Poirier Cc: Jiri Olsa Cc: Leo Yan Cc: Peter Zijlstra Cc: Suzuki K Poulouse Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/20190212171618.25355-4-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cs-etm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 1d9419a0cf0c..f396fee9bb95 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -324,7 +324,7 @@ static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u64 address, struct addr_location al; if (!etmq) - return -1; + return 0; machine = etmq->etm->machine; cpumode = cs_etm__cpu_mode(etmq, address); @@ -332,7 +332,7 @@ static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u64 address, thread = etmq->thread; if (!thread) { if (cpumode != PERF_RECORD_MISC_KERNEL) - return -EINVAL; + return 0; thread = etmq->etm->unknown_thread; } -- cgit v1.2.3-55-g7522 From 65963e5b4dfa9b9ba4bdf7ef8dc7d4424cfd2097 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Tue, 12 Feb 2019 10:16:09 -0700 Subject: perf cs-etm: Introducing function cs_etm_decoder__init_dparams() Introducing function cs_etm_decoder__init_dparams() to avoid repeating code at two different places. Signed-off-by: Mathieu Poirier Cc: Jiri Olsa Cc: Leo Yan Cc: Peter Zijlstra Cc: Suzuki K Poulouse Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/20190212171618.25355-5-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cs-etm-decoder/cs-etm-decoder.h | 3 +- tools/perf/util/cs-etm.c | 41 +++++++++++++++++-------- 2 files changed, 30 insertions(+), 14 deletions(-) diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h index 7e6a8850be4a..663309486784 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h @@ -105,9 +105,10 @@ enum { CS_ETM_PROTO_PTM, }; -enum { +enum cs_etm_decoder_operation { CS_ETM_OPERATION_PRINT = 1, CS_ETM_OPERATION_DECODE, + CS_ETM_OPERATION_MAX, }; int cs_etm_decoder__process_data_block(struct cs_etm_decoder *decoder, diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index f396fee9bb95..3011c6cae531 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -136,6 +136,28 @@ static void cs_etm__packet_dump(const char *pkt_string) fflush(stdout); } +static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params, + struct cs_etm_queue *etmq, + enum cs_etm_decoder_operation mode) +{ + int ret = -EINVAL; + + if (!(mode < CS_ETM_OPERATION_MAX)) + goto out; + + d_params->packet_printer = cs_etm__packet_dump; + d_params->operation = mode; + d_params->data = etmq; + d_params->formatted = true; + d_params->fsyncs = false; + d_params->hsyncs = false; + d_params->frame_aligned = true; + + ret = 0; +out: + return ret; +} + static void cs_etm__dump_event(struct cs_etm_auxtrace *etm, struct auxtrace_buffer *buffer) { @@ -182,12 +204,9 @@ static void cs_etm__dump_event(struct cs_etm_auxtrace *etm, } /* Set decoder parameters to simply print the trace packets */ - d_params.packet_printer = cs_etm__packet_dump; - d_params.operation = CS_ETM_OPERATION_PRINT; - d_params.formatted = true; - d_params.fsyncs = false; - d_params.hsyncs = false; - d_params.frame_aligned = true; + if (cs_etm__init_decoder_params(&d_params, NULL, + CS_ETM_OPERATION_PRINT)) + return; decoder = cs_etm_decoder__new(etm->num_cpu, &d_params, t_params); @@ -436,13 +455,9 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm, } /* Set decoder parameters to simply print the trace packets */ - d_params.packet_printer = cs_etm__packet_dump; - d_params.operation = CS_ETM_OPERATION_DECODE; - d_params.formatted = true; - d_params.fsyncs = false; - d_params.hsyncs = false; - d_params.frame_aligned = true; - d_params.data = etmq; + if (cs_etm__init_decoder_params(&d_params, etmq, + CS_ETM_OPERATION_DECODE)) + goto out_free; etmq->decoder = cs_etm_decoder__new(etm->num_cpu, &d_params, t_params); -- cgit v1.2.3-55-g7522 From ae4d9f5236439ef08f9963f39d5984959b1ae04d Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Tue, 12 Feb 2019 10:16:10 -0700 Subject: perf cs-etm: Fix memory leak in error path Memory allocated for variable 't_params' isn't released properly in the error path of function cs_etm_queue *cs_etm__alloc_queue() and cs_etm__dump_event(), something this patch addresses. Signed-off-by: Mathieu Poirier Cc: Jiri Olsa Cc: Leo Yan Cc: Peter Zijlstra Cc: Suzuki K Poulouse Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/20190212171618.25355-6-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cs-etm.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 3011c6cae531..aac07f950074 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -175,6 +175,10 @@ static void cs_etm__dump_event(struct cs_etm_auxtrace *etm, /* Use metadata to fill in trace parameters for trace decoder */ t_params = zalloc(sizeof(*t_params) * etm->num_cpu); + + if (!t_params) + return; + for (i = 0; i < etm->num_cpu; i++) { if (etm->metadata[i][CS_ETM_MAGIC] == __perf_cs_etmv3_magic) { u32 etmidr = etm->metadata[i][CS_ETM_ETMIDR]; @@ -206,14 +210,12 @@ static void cs_etm__dump_event(struct cs_etm_auxtrace *etm, /* Set decoder parameters to simply print the trace packets */ if (cs_etm__init_decoder_params(&d_params, NULL, CS_ETM_OPERATION_PRINT)) - return; + goto out_free; decoder = cs_etm_decoder__new(etm->num_cpu, &d_params, t_params); - zfree(&t_params); - if (!decoder) - return; + goto out_free; do { size_t consumed; @@ -228,6 +230,9 @@ static void cs_etm__dump_event(struct cs_etm_auxtrace *etm, } while (buffer_used < buffer->size); cs_etm_decoder__free(decoder); + +out_free: + zfree(&t_params); } static int cs_etm__flush_events(struct perf_session *session, @@ -379,7 +384,7 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm, { int i; struct cs_etm_decoder_params d_params; - struct cs_etm_trace_params *t_params; + struct cs_etm_trace_params *t_params = NULL; struct cs_etm_queue *etmq; size_t szp = sizeof(struct cs_etm_packet); @@ -461,8 +466,6 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm, etmq->decoder = cs_etm_decoder__new(etm->num_cpu, &d_params, t_params); - zfree(&t_params); - if (!etmq->decoder) goto out_free; @@ -475,6 +478,8 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm, cs_etm__mem_access)) goto out_free_decoder; + zfree(&t_params); + etmq->offset = 0; etmq->period_instructions = 0; @@ -483,6 +488,7 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm, out_free_decoder: cs_etm_decoder__free(etmq->decoder); out_free: + zfree(&t_params); zfree(&etmq->event_buf); zfree(&etmq->last_branch); zfree(&etmq->last_branch_rb); -- cgit v1.2.3-55-g7522 From 2507a3d982f2968168b53f4d1e67774d68f79b0c Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Tue, 12 Feb 2019 10:16:11 -0700 Subject: perf cs-etm: Introducing function cs_etm__init_trace_params() The trace parameter initialisation code is repeated in two different places, something that bloats the file and can lead to errors. This is fixed by introducing a helper function and calling the right protocol initialisation code when required. Signed-off-by: Mathieu Poirier Cc: Jiri Olsa Cc: Leo Yan Cc: Peter Zijlstra Cc: Suzuki K Poulouse Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/20190212171618.25355-7-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cs-etm.c | 112 +++++++++++++++++++++++------------------------ tools/perf/util/cs-etm.h | 4 +- 2 files changed, 58 insertions(+), 58 deletions(-) diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index aac07f950074..f3a6dfaf3026 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -136,6 +136,57 @@ static void cs_etm__packet_dump(const char *pkt_string) fflush(stdout); } +static void cs_etm__set_trace_param_etmv3(struct cs_etm_trace_params *t_params, + struct cs_etm_auxtrace *etm, int idx, + u32 etmidr) +{ + u64 **metadata = etm->metadata; + + t_params[idx].protocol = cs_etm__get_v7_protocol_version(etmidr); + t_params[idx].etmv3.reg_ctrl = metadata[idx][CS_ETM_ETMCR]; + t_params[idx].etmv3.reg_trc_id = metadata[idx][CS_ETM_ETMTRACEIDR]; +} + +static void cs_etm__set_trace_param_etmv4(struct cs_etm_trace_params *t_params, + struct cs_etm_auxtrace *etm, int idx) +{ + u64 **metadata = etm->metadata; + + t_params[idx].protocol = CS_ETM_PROTO_ETMV4i; + t_params[idx].etmv4.reg_idr0 = metadata[idx][CS_ETMV4_TRCIDR0]; + t_params[idx].etmv4.reg_idr1 = metadata[idx][CS_ETMV4_TRCIDR1]; + t_params[idx].etmv4.reg_idr2 = metadata[idx][CS_ETMV4_TRCIDR2]; + t_params[idx].etmv4.reg_idr8 = metadata[idx][CS_ETMV4_TRCIDR8]; + t_params[idx].etmv4.reg_configr = metadata[idx][CS_ETMV4_TRCCONFIGR]; + t_params[idx].etmv4.reg_traceidr = metadata[idx][CS_ETMV4_TRCTRACEIDR]; +} + +static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params, + struct cs_etm_auxtrace *etm) +{ + int i; + u32 etmidr; + u64 architecture; + + for (i = 0; i < etm->num_cpu; i++) { + architecture = etm->metadata[i][CS_ETM_MAGIC]; + + switch (architecture) { + case __perf_cs_etmv3_magic: + etmidr = etm->metadata[i][CS_ETM_ETMIDR]; + cs_etm__set_trace_param_etmv3(t_params, etm, i, etmidr); + break; + case __perf_cs_etmv4_magic: + cs_etm__set_trace_param_etmv4(t_params, etm, i); + break; + default: + return -EINVAL; + } + } + + return 0; +} + static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params, struct cs_etm_queue *etmq, enum cs_etm_decoder_operation mode) @@ -161,7 +212,7 @@ out: static void cs_etm__dump_event(struct cs_etm_auxtrace *etm, struct auxtrace_buffer *buffer) { - int i, ret; + int ret; const char *color = PERF_COLOR_BLUE; struct cs_etm_decoder_params d_params; struct cs_etm_trace_params *t_params; @@ -179,33 +230,8 @@ static void cs_etm__dump_event(struct cs_etm_auxtrace *etm, if (!t_params) return; - for (i = 0; i < etm->num_cpu; i++) { - if (etm->metadata[i][CS_ETM_MAGIC] == __perf_cs_etmv3_magic) { - u32 etmidr = etm->metadata[i][CS_ETM_ETMIDR]; - - t_params[i].protocol = - cs_etm__get_v7_protocol_version(etmidr); - t_params[i].etmv3.reg_ctrl = - etm->metadata[i][CS_ETM_ETMCR]; - t_params[i].etmv3.reg_trc_id = - etm->metadata[i][CS_ETM_ETMTRACEIDR]; - } else if (etm->metadata[i][CS_ETM_MAGIC] == - __perf_cs_etmv4_magic) { - t_params[i].protocol = CS_ETM_PROTO_ETMV4i; - t_params[i].etmv4.reg_idr0 = - etm->metadata[i][CS_ETMV4_TRCIDR0]; - t_params[i].etmv4.reg_idr1 = - etm->metadata[i][CS_ETMV4_TRCIDR1]; - t_params[i].etmv4.reg_idr2 = - etm->metadata[i][CS_ETMV4_TRCIDR2]; - t_params[i].etmv4.reg_idr8 = - etm->metadata[i][CS_ETMV4_TRCIDR8]; - t_params[i].etmv4.reg_configr = - etm->metadata[i][CS_ETMV4_TRCCONFIGR]; - t_params[i].etmv4.reg_traceidr = - etm->metadata[i][CS_ETMV4_TRCTRACEIDR]; - } - } + if (cs_etm__init_trace_params(t_params, etm)) + goto out_free; /* Set decoder parameters to simply print the trace packets */ if (cs_etm__init_decoder_params(&d_params, NULL, @@ -382,7 +408,6 @@ static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u64 address, static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm, unsigned int queue_nr) { - int i; struct cs_etm_decoder_params d_params; struct cs_etm_trace_params *t_params = NULL; struct cs_etm_queue *etmq; @@ -431,33 +456,8 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm, if (!t_params) goto out_free; - for (i = 0; i < etm->num_cpu; i++) { - if (etm->metadata[i][CS_ETM_MAGIC] == __perf_cs_etmv3_magic) { - u32 etmidr = etm->metadata[i][CS_ETM_ETMIDR]; - - t_params[i].protocol = - cs_etm__get_v7_protocol_version(etmidr); - t_params[i].etmv3.reg_ctrl = - etm->metadata[i][CS_ETM_ETMCR]; - t_params[i].etmv3.reg_trc_id = - etm->metadata[i][CS_ETM_ETMTRACEIDR]; - } else if (etm->metadata[i][CS_ETM_MAGIC] == - __perf_cs_etmv4_magic) { - t_params[i].protocol = CS_ETM_PROTO_ETMV4i; - t_params[i].etmv4.reg_idr0 = - etm->metadata[i][CS_ETMV4_TRCIDR0]; - t_params[i].etmv4.reg_idr1 = - etm->metadata[i][CS_ETMV4_TRCIDR1]; - t_params[i].etmv4.reg_idr2 = - etm->metadata[i][CS_ETMV4_TRCIDR2]; - t_params[i].etmv4.reg_idr8 = - etm->metadata[i][CS_ETMV4_TRCIDR8]; - t_params[i].etmv4.reg_configr = - etm->metadata[i][CS_ETMV4_TRCCONFIGR]; - t_params[i].etmv4.reg_traceidr = - etm->metadata[i][CS_ETMV4_TRCTRACEIDR]; - } - } + if (cs_etm__init_trace_params(t_params, etm)) + goto out_free; /* Set decoder parameters to simply print the trace packets */ if (cs_etm__init_decoder_params(&d_params, etmq, diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h index d76126e0e3d0..0e97c196147a 100644 --- a/tools/perf/util/cs-etm.h +++ b/tools/perf/util/cs-etm.h @@ -105,8 +105,8 @@ struct intlist *traceid_list; #define CS_ETM_HEADER_SIZE (CS_HEADER_VERSION_0_MAX * sizeof(u64)) -static const u64 __perf_cs_etmv3_magic = 0x3030303030303030ULL; -static const u64 __perf_cs_etmv4_magic = 0x4040404040404040ULL; +#define __perf_cs_etmv3_magic 0x3030303030303030ULL +#define __perf_cs_etmv4_magic 0x4040404040404040ULL #define CS_ETMV3_PRIV_SIZE (CS_ETM_PRIV_MAX * sizeof(u64)) #define CS_ETMV4_PRIV_SIZE (CS_ETMV4_PRIV_MAX * sizeof(u64)) -- cgit v1.2.3-55-g7522 From e4aa592d183228e5fbd3b49a317248c2895d3819 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Tue, 12 Feb 2019 10:16:12 -0700 Subject: perf cs-etm: Fix erroneous comment The comment just before initialising the decoder is plane wrong since it is part of the decoding queue setup function and the operation code specifically mention that trace data is to be decoded rather than printed out. This patch simply fix the comment to prevent people from getting really confused. Signed-off-by: Mathieu Poirier Cc: Jiri Olsa Cc: Leo Yan Cc: Peter Zijlstra Cc: Suzuki K Poulouse Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/20190212171618.25355-8-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cs-etm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index f3a6dfaf3026..4cc9fce97a86 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -459,7 +459,7 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm, if (cs_etm__init_trace_params(t_params, etm)) goto out_free; - /* Set decoder parameters to simply print the trace packets */ + /* Set decoder parameters to decode trace packets */ if (cs_etm__init_decoder_params(&d_params, etmq, CS_ETM_OPERATION_DECODE)) goto out_free; -- cgit v1.2.3-55-g7522 From 4f5b37139fb3178b3db4e876eec0f2e92c82ac45 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Tue, 12 Feb 2019 10:16:13 -0700 Subject: perf cs-etm: Cleaning up function cs_etm__alloc_queue() Function cs_etm__alloc_queue() should only be concerned with the allocation of memory for the etmq and accompanying decoder. Everything else should be done in the calling function. Signed-off-by: Mathieu Poirier Cc: Jiri Olsa Cc: Leo Yan Cc: Peter Zijlstra Cc: Suzuki K Poulouse Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/20190212171618.25355-9-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cs-etm.c | 37 ++++++++++++++++--------------------- 1 file changed, 16 insertions(+), 21 deletions(-) diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 4cc9fce97a86..c9a5b4935209 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -405,8 +405,7 @@ static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u64 address, return len; } -static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm, - unsigned int queue_nr) +static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm) { struct cs_etm_decoder_params d_params; struct cs_etm_trace_params *t_params = NULL; @@ -444,12 +443,6 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm, if (!etmq->event_buf) goto out_free; - etmq->etm = etm; - etmq->queue_nr = queue_nr; - etmq->pid = -1; - etmq->tid = -1; - etmq->cpu = -1; - /* Use metadata to fill in trace parameters for trace decoder */ t_params = zalloc(sizeof(*t_params) * etm->num_cpu); @@ -479,10 +472,6 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm, goto out_free_decoder; zfree(&t_params); - - etmq->offset = 0; - etmq->period_instructions = 0; - return etmq; out_free_decoder: @@ -503,24 +492,30 @@ static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm, struct auxtrace_queue *queue, unsigned int queue_nr) { + int ret = 0; struct cs_etm_queue *etmq = queue->priv; if (list_empty(&queue->head) || etmq) - return 0; + goto out; - etmq = cs_etm__alloc_queue(etm, queue_nr); + etmq = cs_etm__alloc_queue(etm); - if (!etmq) - return -ENOMEM; + if (!etmq) { + ret = -ENOMEM; + goto out; + } queue->priv = etmq; - - if (queue->cpu != -1) - etmq->cpu = queue->cpu; - + etmq->etm = etm; + etmq->queue_nr = queue_nr; + etmq->cpu = queue->cpu; etmq->tid = queue->tid; + etmq->pid = -1; + etmq->offset = 0; + etmq->period_instructions = 0; - return 0; +out: + return ret; } static int cs_etm__setup_queues(struct cs_etm_auxtrace *etm) -- cgit v1.2.3-55-g7522 From 4b6df11ab6cf13c5babe9ed1a935cd99d50bf3fe Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Tue, 12 Feb 2019 10:16:14 -0700 Subject: perf cs-etm: Rethink kernel address initialisation Moving initialisation of the kernel start address to function cs_etm__setup_queues(), considered to be the common denominator for queue initialisation. That way we don't have to repeat the same code at different places. No change of functionatlity is introduced by this patch. Signed-off-by: Mathieu Poirier Cc: Jiri Olsa Cc: Leo Yan Cc: Peter Zijlstra Cc: Suzuki K Poulouse Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/20190212171618.25355-10-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cs-etm.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index c9a5b4935209..2d2de898ea68 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -523,6 +523,9 @@ static int cs_etm__setup_queues(struct cs_etm_auxtrace *etm) unsigned int i; int ret; + if (!etm->kernel_start) + etm->kernel_start = machine__kernel_start(etm->machine); + for (i = 0; i < etm->queues.nr_queues; i++) { ret = cs_etm__setup_queue(etm, &etm->queues.queue_array[i], i); if (ret) @@ -1490,14 +1493,10 @@ static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq) static int cs_etm__run_decoder(struct cs_etm_queue *etmq) { - struct cs_etm_auxtrace *etm = etmq->etm; struct cs_etm_buffer buffer; size_t buffer_used, processed; int err = 0; - if (!etm->kernel_start) - etm->kernel_start = machine__kernel_start(etm->machine); - /* Go through each buffer in the queue and decode them one by one */ while (1) { buffer_used = 0; -- cgit v1.2.3-55-g7522 From 23cfcd6d75cc0c1a7f95c44658dc91380fb40770 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Tue, 12 Feb 2019 10:16:15 -0700 Subject: perf cs-etm: Make cs_etm__run_decoder() queue independent This patch makes decoding of auxtrace buffer centered around a struct cs_etm_queue. This eliminates surperflous variables and is a precursor for work that simplifies the main decoder loop. Signed-off-by: Mathieu Poirier Cc: Jiri Olsa Cc: Leo Yan Cc: Peter Zijlstra Cc: Suzuki K Poulouse Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/20190212171618.25355-11-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cs-etm-decoder/cs-etm-decoder.h | 7 ---- tools/perf/util/cs-etm.c | 52 ++++++++++++------------- 2 files changed, 26 insertions(+), 33 deletions(-) diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h index 663309486784..3ab11dfa92ae 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h @@ -15,13 +15,6 @@ struct cs_etm_decoder; -struct cs_etm_buffer { - const unsigned char *buf; - size_t len; - u64 offset; - u64 ref_timestamp; -}; - enum cs_etm_sample_type { CS_ETM_EMPTY, CS_ETM_RANGE, diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 2d2de898ea68..d2c90b369e7c 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -76,6 +76,8 @@ struct cs_etm_queue { size_t last_branch_pos; struct cs_etm_packet *prev_packet; struct cs_etm_packet *packet; + const unsigned char *buf; + size_t buf_len, buf_used; }; static int cs_etm__update_queues(struct cs_etm_auxtrace *etm); @@ -683,7 +685,7 @@ static int cs_etm__inject_event(union perf_event *event, static int -cs_etm__get_trace(struct cs_etm_buffer *buff, struct cs_etm_queue *etmq) +cs_etm__get_trace(struct cs_etm_queue *etmq) { struct auxtrace_buffer *aux_buffer = etmq->buffer; struct auxtrace_buffer *old_buffer = aux_buffer; @@ -697,7 +699,7 @@ cs_etm__get_trace(struct cs_etm_buffer *buff, struct cs_etm_queue *etmq) if (!aux_buffer) { if (old_buffer) auxtrace_buffer__drop_data(old_buffer); - buff->len = 0; + etmq->buf_len = 0; return 0; } @@ -717,13 +719,11 @@ cs_etm__get_trace(struct cs_etm_buffer *buff, struct cs_etm_queue *etmq) if (old_buffer) auxtrace_buffer__drop_data(old_buffer); - buff->offset = aux_buffer->offset; - buff->len = aux_buffer->size; - buff->buf = aux_buffer->data; + etmq->buf_used = 0; + etmq->buf_len = aux_buffer->size; + etmq->buf = aux_buffer->data; - buff->ref_timestamp = aux_buffer->reference; - - return buff->len; + return etmq->buf_len; } static void cs_etm__set_pid_tid_cpu(struct cs_etm_auxtrace *etm, @@ -1493,24 +1493,23 @@ static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq) static int cs_etm__run_decoder(struct cs_etm_queue *etmq) { - struct cs_etm_buffer buffer; - size_t buffer_used, processed; + size_t processed; int err = 0; /* Go through each buffer in the queue and decode them one by one */ while (1) { - buffer_used = 0; - memset(&buffer, 0, sizeof(buffer)); - err = cs_etm__get_trace(&buffer, etmq); - if (err <= 0) - return err; - /* - * We cannot assume consecutive blocks in the data file are - * contiguous, reset the decoder to force re-sync. - */ - err = cs_etm_decoder__reset(etmq->decoder); - if (err != 0) - return err; + if (!etmq->buf_len) { + err = cs_etm__get_trace(etmq); + if (err <= 0) + return err; + /* + * We cannot assume consecutive blocks in the data file + * are contiguous, reset the decoder to force re-sync. + */ + err = cs_etm_decoder__reset(etmq->decoder); + if (err != 0) + return err; + } /* Run trace decoder until buffer consumed or end of trace */ do { @@ -1518,14 +1517,15 @@ static int cs_etm__run_decoder(struct cs_etm_queue *etmq) err = cs_etm_decoder__process_data_block( etmq->decoder, etmq->offset, - &buffer.buf[buffer_used], - buffer.len - buffer_used, + &etmq->buf[etmq->buf_used], + etmq->buf_len, &processed); if (err) return err; etmq->offset += processed; - buffer_used += processed; + etmq->buf_used += processed; + etmq->buf_len -= processed; /* Process each packet in this chunk */ while (1) { @@ -1585,7 +1585,7 @@ static int cs_etm__run_decoder(struct cs_etm_queue *etmq) break; } } - } while (buffer.len > buffer_used); + } while (etmq->buf_len); if (err == 0) /* Flush any remaining branch stack entries */ -- cgit v1.2.3-55-g7522 From f74f349c211e3e62bc7fe35a132918c7f2c0fafb Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Tue, 12 Feb 2019 10:16:16 -0700 Subject: perf cs-etm: Modularize main decoder function Making the main decoder block modular so that it can be called from different decoding context (timeless vs. non-timeless), avoiding to repeat code. No change in functionality is introduced by this patch. Signed-off-by: Mathieu Poirier Cc: Jiri Olsa Cc: Leo Yan Cc: Peter Zijlstra Cc: Suzuki K Poulouse Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/20190212171618.25355-12-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cs-etm.c | 41 +++++++++++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index d2c90b369e7c..cfa686fe223e 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -1491,9 +1491,36 @@ static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq) return 0; } +static int cs_etm__decode_data_block(struct cs_etm_queue *etmq) +{ + int ret = 0; + size_t processed = 0; + + /* + * Packets are decoded and added to the decoder's packet queue + * until the decoder packet processing callback has requested that + * processing stops or there is nothing left in the buffer. Normal + * operations that stop processing are a timestamp packet or a full + * decoder buffer queue. + */ + ret = cs_etm_decoder__process_data_block(etmq->decoder, + etmq->offset, + &etmq->buf[etmq->buf_used], + etmq->buf_len, + &processed); + if (ret) + goto out; + + etmq->offset += processed; + etmq->buf_used += processed; + etmq->buf_len -= processed; + +out: + return ret; +} + static int cs_etm__run_decoder(struct cs_etm_queue *etmq) { - size_t processed; int err = 0; /* Go through each buffer in the queue and decode them one by one */ @@ -1513,20 +1540,10 @@ static int cs_etm__run_decoder(struct cs_etm_queue *etmq) /* Run trace decoder until buffer consumed or end of trace */ do { - processed = 0; - err = cs_etm_decoder__process_data_block( - etmq->decoder, - etmq->offset, - &etmq->buf[etmq->buf_used], - etmq->buf_len, - &processed); + err = cs_etm__decode_data_block(etmq); if (err) return err; - etmq->offset += processed; - etmq->buf_used += processed; - etmq->buf_len -= processed; - /* Process each packet in this chunk */ while (1) { err = cs_etm_decoder__get_packet(etmq->decoder, -- cgit v1.2.3-55-g7522 From 3fa0e83e29488547b39f321fd5a239f08f129c72 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Tue, 12 Feb 2019 10:16:17 -0700 Subject: perf cs-etm: Modularize main packet processing loop Making the main packet processing loop modular so that it can be called from different decoding context (timeless vs. non-timless), avoiding to repeat code. Signed-off-by: Mathieu Poirier Cc: Jiri Olsa Cc: Leo Yan Cc: Peter Zijlstra Cc: Suzuki K Poulouse Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/20190212171618.25355-13-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cs-etm.c | 129 ++++++++++++++++++++++++++--------------------- 1 file changed, 72 insertions(+), 57 deletions(-) diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index cfa686fe223e..f607bc58bd03 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -1519,6 +1519,72 @@ out: return ret; } +static int cs_etm__process_decoder_queue(struct cs_etm_queue *etmq) +{ + int ret; + + /* Process each packet in this chunk */ + while (1) { + ret = cs_etm_decoder__get_packet(etmq->decoder, + etmq->packet); + if (ret <= 0) + /* + * Stop processing this chunk on + * end of data or error + */ + break; + + /* + * Since packet addresses are swapped in packet + * handling within below switch() statements, + * thus setting sample flags must be called + * prior to switch() statement to use address + * information before packets swapping. + */ + ret = cs_etm__set_sample_flags(etmq); + if (ret < 0) + break; + + switch (etmq->packet->sample_type) { + case CS_ETM_RANGE: + /* + * If the packet contains an instruction + * range, generate instruction sequence + * events. + */ + cs_etm__sample(etmq); + break; + case CS_ETM_EXCEPTION: + case CS_ETM_EXCEPTION_RET: + /* + * If the exception packet is coming, + * make sure the previous instruction + * range packet to be handled properly. + */ + cs_etm__exception(etmq); + break; + case CS_ETM_DISCONTINUITY: + /* + * Discontinuity in trace, flush + * previous branch stack + */ + cs_etm__flush(etmq); + break; + case CS_ETM_EMPTY: + /* + * Should not receive empty packet, + * report error. + */ + pr_err("CS ETM Trace: empty packet\n"); + return -EINVAL; + default: + break; + } + } + + return ret; +} + static int cs_etm__run_decoder(struct cs_etm_queue *etmq) { int err = 0; @@ -1544,64 +1610,13 @@ static int cs_etm__run_decoder(struct cs_etm_queue *etmq) if (err) return err; - /* Process each packet in this chunk */ - while (1) { - err = cs_etm_decoder__get_packet(etmq->decoder, - etmq->packet); - if (err <= 0) - /* - * Stop processing this chunk on - * end of data or error - */ - break; + /* + * Process each packet in this chunk, nothing to do if + * an error occurs other than hoping the next one will + * be better. + */ + err = cs_etm__process_decoder_queue(etmq); - /* - * Since packet addresses are swapped in packet - * handling within below switch() statements, - * thus setting sample flags must be called - * prior to switch() statement to use address - * information before packets swapping. - */ - err = cs_etm__set_sample_flags(etmq); - if (err < 0) - break; - - switch (etmq->packet->sample_type) { - case CS_ETM_RANGE: - /* - * If the packet contains an instruction - * range, generate instruction sequence - * events. - */ - cs_etm__sample(etmq); - break; - case CS_ETM_EXCEPTION: - case CS_ETM_EXCEPTION_RET: - /* - * If the exception packet is coming, - * make sure the previous instruction - * range packet to be handled properly. - */ - cs_etm__exception(etmq); - break; - case CS_ETM_DISCONTINUITY: - /* - * Discontinuity in trace, flush - * previous branch stack - */ - cs_etm__flush(etmq); - break; - case CS_ETM_EMPTY: - /* - * Should not receive empty packet, - * report error. - */ - pr_err("CS ETM Trace: empty packet\n"); - return -EINVAL; - default: - break; - } - } } while (etmq->buf_len); if (err == 0) -- cgit v1.2.3-55-g7522 From 8224531cf5a1246bbe1d43c5db26e5348aeb77c5 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Tue, 12 Feb 2019 10:16:18 -0700 Subject: perf cs-etm: Modularize auxtrace_buffer fetch function Making the auxtrace_buffer fetch function modular so that it can be called from different decoding context (timeless vs. non-timeless), avoiding to repeat code. No change in functionality is introduced by this patch. Signed-off-by: Mathieu Poirier Cc: Jiri Olsa Cc: Leo Yan Cc: Peter Zijlstra Cc: Suzuki K Poulouse Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/20190212171618.25355-14-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cs-etm.c | 41 +++++++++++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index f607bc58bd03..110804936fc3 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -1152,6 +1152,32 @@ static int cs_etm__end_block(struct cs_etm_queue *etmq) return 0; } +/* + * cs_etm__get_data_block: Fetch a block from the auxtrace_buffer queue + * if need be. + * Returns: < 0 if error + * = 0 if no more auxtrace_buffer to read + * > 0 if the current buffer isn't empty yet + */ +static int cs_etm__get_data_block(struct cs_etm_queue *etmq) +{ + int ret; + + if (!etmq->buf_len) { + ret = cs_etm__get_trace(etmq); + if (ret <= 0) + return ret; + /* + * We cannot assume consecutive blocks in the data file + * are contiguous, reset the decoder to force re-sync. + */ + ret = cs_etm_decoder__reset(etmq->decoder); + if (ret) + return ret; + } + + return etmq->buf_len; +} static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq, struct cs_etm_packet *packet, @@ -1591,18 +1617,9 @@ static int cs_etm__run_decoder(struct cs_etm_queue *etmq) /* Go through each buffer in the queue and decode them one by one */ while (1) { - if (!etmq->buf_len) { - err = cs_etm__get_trace(etmq); - if (err <= 0) - return err; - /* - * We cannot assume consecutive blocks in the data file - * are contiguous, reset the decoder to force re-sync. - */ - err = cs_etm_decoder__reset(etmq->decoder); - if (err != 0) - return err; - } + err = cs_etm__get_data_block(etmq); + if (err <= 0) + return err; /* Run trace decoder until buffer consumed or end of trace */ do { -- cgit v1.2.3-55-g7522 From d0bfbedad72b74f93b028c26474acba1ba3d73ed Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 13 Feb 2019 13:32:37 +0100 Subject: perf tools: Compile perf with libperf-in.o instead of libperf.a There's no need for perf build to use libperf.a, we can use directly libperf-in.o. The libperf.a stays as a target if needed: $ make libperf.a ... CC util/pmu.o CC util/pmu-flex.o LD util/libperf-in.o LD libperf-in.o AR libperf.a Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20190213123246.4015-2-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index c2ccc54618d1..93de7c7b8007 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -346,7 +346,7 @@ export PERL_PATH LIB_FILE=$(OUTPUT)libperf.a -PERFLIBS = $(LIB_FILE) $(LIBAPI) $(LIBTRACEEVENT) $(LIBSUBCMD) +PERFLIBS = $(LIBAPI) $(LIBTRACEEVENT) $(LIBSUBCMD) ifndef NO_LIBBPF PERFLIBS += $(LIBBPF) endif @@ -549,6 +549,8 @@ JEVENTS_IN := $(OUTPUT)pmu-events/jevents-in.o PMU_EVENTS_IN := $(OUTPUT)pmu-events/pmu-events-in.o +LIBPERF_IN := $(OUTPUT)libperf-in.o + export JEVENTS build := -f $(srctree)/tools/build/Makefile.build dir=. obj @@ -565,9 +567,12 @@ $(JEVENTS): $(JEVENTS_IN) $(PMU_EVENTS_IN): $(JEVENTS) FORCE $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=pmu-events obj=pmu-events -$(OUTPUT)perf: $(PERFLIBS) $(PERF_IN) $(PMU_EVENTS_IN) $(LIBTRACEEVENT_DYNAMIC_LIST) +$(LIBPERF_IN): prepare FORCE + $(Q)$(MAKE) $(build)=libperf + +$(OUTPUT)perf: $(PERFLIBS) $(PERF_IN) $(PMU_EVENTS_IN) $(LIBPERF_IN) $(LIBTRACEEVENT_DYNAMIC_LIST) $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS) \ - $(PERF_IN) $(PMU_EVENTS_IN) $(LIBS) -o $@ + $(PERF_IN) $(PMU_EVENTS_IN) $(LIBPERF_IN) $(LIBS) -o $@ $(GTK_IN): FORCE $(Q)$(MAKE) $(build)=gtk @@ -683,11 +688,6 @@ endif $(patsubst perf-%,%.o,$(PROGRAMS)): $(wildcard */*.h) -LIBPERF_IN := $(OUTPUT)libperf-in.o - -$(LIBPERF_IN): prepare FORCE - $(Q)$(MAKE) $(build)=libperf - $(LIB_FILE): $(LIBPERF_IN) $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIBPERF_IN) $(LIB_OBJS) -- cgit v1.2.3-55-g7522 From 6368942a9223fb8cd9b5623fc0b99d75d5ca93a7 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 13 Feb 2019 13:32:38 +0100 Subject: perf tools: Rename LIB_FILE to LIBPERF_A Simple rename, no functional change. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20190213123246.4015-3-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 93de7c7b8007..01f7555fd933 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -344,7 +344,7 @@ endif export PERL_PATH -LIB_FILE=$(OUTPUT)libperf.a +LIBPERF_A=$(OUTPUT)libperf.a PERFLIBS = $(LIBAPI) $(LIBTRACEEVENT) $(LIBSUBCMD) ifndef NO_LIBBPF @@ -688,7 +688,7 @@ endif $(patsubst perf-%,%.o,$(PROGRAMS)): $(wildcard */*.h) -$(LIB_FILE): $(LIBPERF_IN) +$(LIBPERF_A): $(LIBPERF_IN) $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIBPERF_IN) $(LIB_OBJS) LIBTRACEEVENT_FLAGS += plugin_dir=$(plugindir_SQ) 'EXTRA_CFLAGS=$(EXTRA_CFLAGS)' 'LDFLAGS=$(LDFLAGS)' @@ -910,7 +910,7 @@ python-clean: $(python-clean) clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean config-clean fixdep-clean python-clean - $(call QUIET_CLEAN, core-objs) $(RM) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(LANG_BINDINGS) + $(call QUIET_CLEAN, core-objs) $(RM) $(LIBPERF_A) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(LANG_BINDINGS) $(Q)find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete $(Q)$(RM) $(OUTPUT).config-detected $(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32 $(OUTPUT)pmu-events/jevents $(OUTPUT)$(LIBJVMTI).so -- cgit v1.2.3-55-g7522 From 5ff328836dfde0cef9f28c8b8791a90a36d7a183 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 13 Feb 2019 13:32:39 +0100 Subject: perf tools: Rename build libperf to perf Rename build libperf to perf, because it's used to build perf. The libperf build object name will be used for libperf library. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20190213123246.4015-4-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Build | 10 +- tools/perf/arch/Build | 4 +- tools/perf/arch/arm/Build | 4 +- tools/perf/arch/arm/tests/Build | 8 +- tools/perf/arch/arm/util/Build | 8 +- tools/perf/arch/arm64/Build | 4 +- tools/perf/arch/arm64/tests/Build | 6 +- tools/perf/arch/arm64/util/Build | 12 +- tools/perf/arch/nds32/Build | 2 +- tools/perf/arch/nds32/util/Build | 2 +- tools/perf/arch/powerpc/Build | 4 +- tools/perf/arch/powerpc/tests/Build | 6 +- tools/perf/arch/powerpc/util/Build | 18 +- tools/perf/arch/s390/Build | 2 +- tools/perf/arch/s390/util/Build | 12 +- tools/perf/arch/sh/Build | 2 +- tools/perf/arch/sh/util/Build | 2 +- tools/perf/arch/sparc/Build | 2 +- tools/perf/arch/sparc/util/Build | 2 +- tools/perf/arch/x86/Build | 4 +- tools/perf/arch/x86/tests/Build | 14 +- tools/perf/arch/x86/util/Build | 30 +-- tools/perf/arch/xtensa/Build | 2 +- tools/perf/arch/xtensa/util/Build | 2 +- tools/perf/scripts/Build | 4 +- tools/perf/scripts/perl/Perf-Trace-Util/Build | 2 +- tools/perf/scripts/python/Perf-Trace-Util/Build | 2 +- tools/perf/trace/beauty/Build | 26 +-- tools/perf/ui/Build | 18 +- tools/perf/ui/browsers/Build | 10 +- tools/perf/ui/tui/Build | 8 +- tools/perf/util/Build | 276 ++++++++++++------------ tools/perf/util/c++/Build | 4 +- tools/perf/util/cs-etm-decoder/Build | 2 +- tools/perf/util/intel-pt-decoder/Build | 2 +- tools/perf/util/scripting-engines/Build | 4 +- 36 files changed, 260 insertions(+), 260 deletions(-) diff --git a/tools/perf/Build b/tools/perf/Build index e5232d567611..5f392dbb88fc 100644 --- a/tools/perf/Build +++ b/tools/perf/Build @@ -46,10 +46,10 @@ CFLAGS_builtin-trace.o += -DSTRACE_GROUPS_DIR="BUILD_STR($(STRACE_GROUPS_DIR_ CFLAGS_builtin-report.o += -DTIPDIR="BUILD_STR($(tipdir_SQ))" CFLAGS_builtin-report.o += -DDOCDIR="BUILD_STR($(srcdir_SQ)/Documentation)" -libperf-y += util/ -libperf-y += arch/ -libperf-y += ui/ -libperf-y += scripts/ -libperf-$(CONFIG_TRACE) += trace/beauty/ +perf-y += util/ +perf-y += arch/ +perf-y += ui/ +perf-y += scripts/ +perf-$(CONFIG_TRACE) += trace/beauty/ gtk-y += ui/gtk/ diff --git a/tools/perf/arch/Build b/tools/perf/arch/Build index d9b6af837c7d..688818844c11 100644 --- a/tools/perf/arch/Build +++ b/tools/perf/arch/Build @@ -1,2 +1,2 @@ -libperf-y += common.o -libperf-y += $(SRCARCH)/ +perf-y += common.o +perf-y += $(SRCARCH)/ diff --git a/tools/perf/arch/arm/Build b/tools/perf/arch/arm/Build index 41bf61da476a..36222e64bbf7 100644 --- a/tools/perf/arch/arm/Build +++ b/tools/perf/arch/arm/Build @@ -1,2 +1,2 @@ -libperf-y += util/ -libperf-$(CONFIG_DWARF_UNWIND) += tests/ +perf-y += util/ +perf-$(CONFIG_DWARF_UNWIND) += tests/ diff --git a/tools/perf/arch/arm/tests/Build b/tools/perf/arch/arm/tests/Build index d9ae2733f9cc..bc8e97380c82 100644 --- a/tools/perf/arch/arm/tests/Build +++ b/tools/perf/arch/arm/tests/Build @@ -1,5 +1,5 @@ -libperf-y += regs_load.o -libperf-y += dwarf-unwind.o -libperf-y += vectors-page.o +perf-y += regs_load.o +perf-y += dwarf-unwind.o +perf-y += vectors-page.o -libperf-y += arch-tests.o +perf-y += arch-tests.o diff --git a/tools/perf/arch/arm/util/Build b/tools/perf/arch/arm/util/Build index e64c5f216448..296f0eac5e18 100644 --- a/tools/perf/arch/arm/util/Build +++ b/tools/perf/arch/arm/util/Build @@ -1,6 +1,6 @@ -libperf-$(CONFIG_DWARF) += dwarf-regs.o +perf-$(CONFIG_DWARF) += dwarf-regs.o -libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o -libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o +perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o +perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o -libperf-$(CONFIG_AUXTRACE) += pmu.o auxtrace.o cs-etm.o +perf-$(CONFIG_AUXTRACE) += pmu.o auxtrace.o cs-etm.o diff --git a/tools/perf/arch/arm64/Build b/tools/perf/arch/arm64/Build index 41bf61da476a..36222e64bbf7 100644 --- a/tools/perf/arch/arm64/Build +++ b/tools/perf/arch/arm64/Build @@ -1,2 +1,2 @@ -libperf-y += util/ -libperf-$(CONFIG_DWARF_UNWIND) += tests/ +perf-y += util/ +perf-$(CONFIG_DWARF_UNWIND) += tests/ diff --git a/tools/perf/arch/arm64/tests/Build b/tools/perf/arch/arm64/tests/Build index 883c57ff0c08..41707fea74b3 100644 --- a/tools/perf/arch/arm64/tests/Build +++ b/tools/perf/arch/arm64/tests/Build @@ -1,4 +1,4 @@ -libperf-y += regs_load.o -libperf-y += dwarf-unwind.o +perf-y += regs_load.o +perf-y += dwarf-unwind.o -libperf-y += arch-tests.o +perf-y += arch-tests.o diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build index 68f8a8eb3ad0..3cde540d2fcf 100644 --- a/tools/perf/arch/arm64/util/Build +++ b/tools/perf/arch/arm64/util/Build @@ -1,10 +1,10 @@ -libperf-y += header.o -libperf-y += sym-handling.o -libperf-$(CONFIG_DWARF) += dwarf-regs.o -libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o -libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o +perf-y += header.o +perf-y += sym-handling.o +perf-$(CONFIG_DWARF) += dwarf-regs.o +perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o +perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o -libperf-$(CONFIG_AUXTRACE) += ../../arm/util/pmu.o \ +perf-$(CONFIG_AUXTRACE) += ../../arm/util/pmu.o \ ../../arm/util/auxtrace.o \ ../../arm/util/cs-etm.o \ arm-spe.o diff --git a/tools/perf/arch/nds32/Build b/tools/perf/arch/nds32/Build index 54afe4a467e7..e4e5f33c84d8 100644 --- a/tools/perf/arch/nds32/Build +++ b/tools/perf/arch/nds32/Build @@ -1 +1 @@ -libperf-y += util/ +perf-y += util/ diff --git a/tools/perf/arch/nds32/util/Build b/tools/perf/arch/nds32/util/Build index ca623bbf993c..d0bc205fe49a 100644 --- a/tools/perf/arch/nds32/util/Build +++ b/tools/perf/arch/nds32/util/Build @@ -1 +1 @@ -libperf-y += header.o +perf-y += header.o diff --git a/tools/perf/arch/powerpc/Build b/tools/perf/arch/powerpc/Build index db52fa22d3a1..a7dd46a5b678 100644 --- a/tools/perf/arch/powerpc/Build +++ b/tools/perf/arch/powerpc/Build @@ -1,2 +1,2 @@ -libperf-y += util/ -libperf-y += tests/ +perf-y += util/ +perf-y += tests/ diff --git a/tools/perf/arch/powerpc/tests/Build b/tools/perf/arch/powerpc/tests/Build index d827ef384b33..3526ab0af9f9 100644 --- a/tools/perf/arch/powerpc/tests/Build +++ b/tools/perf/arch/powerpc/tests/Build @@ -1,4 +1,4 @@ -libperf-$(CONFIG_DWARF_UNWIND) += regs_load.o -libperf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o +perf-$(CONFIG_DWARF_UNWIND) += regs_load.o +perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o -libperf-y += arch-tests.o +perf-y += arch-tests.o diff --git a/tools/perf/arch/powerpc/util/Build b/tools/perf/arch/powerpc/util/Build index ba98bd006488..7cf0b8803097 100644 --- a/tools/perf/arch/powerpc/util/Build +++ b/tools/perf/arch/powerpc/util/Build @@ -1,11 +1,11 @@ -libperf-y += header.o -libperf-y += sym-handling.o -libperf-y += kvm-stat.o -libperf-y += perf_regs.o -libperf-y += mem-events.o +perf-y += header.o +perf-y += sym-handling.o +perf-y += kvm-stat.o +perf-y += perf_regs.o +perf-y += mem-events.o -libperf-$(CONFIG_DWARF) += dwarf-regs.o -libperf-$(CONFIG_DWARF) += skip-callchain-idx.o +perf-$(CONFIG_DWARF) += dwarf-regs.o +perf-$(CONFIG_DWARF) += skip-callchain-idx.o -libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o -libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o +perf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o +perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o diff --git a/tools/perf/arch/s390/Build b/tools/perf/arch/s390/Build index 54afe4a467e7..e4e5f33c84d8 100644 --- a/tools/perf/arch/s390/Build +++ b/tools/perf/arch/s390/Build @@ -1 +1 @@ -libperf-y += util/ +perf-y += util/ diff --git a/tools/perf/arch/s390/util/Build b/tools/perf/arch/s390/util/Build index 4a233683c684..22797f043b84 100644 --- a/tools/perf/arch/s390/util/Build +++ b/tools/perf/arch/s390/util/Build @@ -1,9 +1,9 @@ -libperf-y += header.o -libperf-y += kvm-stat.o +perf-y += header.o +perf-y += kvm-stat.o -libperf-$(CONFIG_DWARF) += dwarf-regs.o -libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o +perf-$(CONFIG_DWARF) += dwarf-regs.o +perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o -libperf-y += machine.o +perf-y += machine.o -libperf-$(CONFIG_AUXTRACE) += auxtrace.o +perf-$(CONFIG_AUXTRACE) += auxtrace.o diff --git a/tools/perf/arch/sh/Build b/tools/perf/arch/sh/Build index 54afe4a467e7..e4e5f33c84d8 100644 --- a/tools/perf/arch/sh/Build +++ b/tools/perf/arch/sh/Build @@ -1 +1 @@ -libperf-y += util/ +perf-y += util/ diff --git a/tools/perf/arch/sh/util/Build b/tools/perf/arch/sh/util/Build index 954e287bbb89..e813e618954b 100644 --- a/tools/perf/arch/sh/util/Build +++ b/tools/perf/arch/sh/util/Build @@ -1 +1 @@ -libperf-$(CONFIG_DWARF) += dwarf-regs.o +perf-$(CONFIG_DWARF) += dwarf-regs.o diff --git a/tools/perf/arch/sparc/Build b/tools/perf/arch/sparc/Build index 54afe4a467e7..e4e5f33c84d8 100644 --- a/tools/perf/arch/sparc/Build +++ b/tools/perf/arch/sparc/Build @@ -1 +1 @@ -libperf-y += util/ +perf-y += util/ diff --git a/tools/perf/arch/sparc/util/Build b/tools/perf/arch/sparc/util/Build index 954e287bbb89..e813e618954b 100644 --- a/tools/perf/arch/sparc/util/Build +++ b/tools/perf/arch/sparc/util/Build @@ -1 +1 @@ -libperf-$(CONFIG_DWARF) += dwarf-regs.o +perf-$(CONFIG_DWARF) += dwarf-regs.o diff --git a/tools/perf/arch/x86/Build b/tools/perf/arch/x86/Build index db52fa22d3a1..a7dd46a5b678 100644 --- a/tools/perf/arch/x86/Build +++ b/tools/perf/arch/x86/Build @@ -1,2 +1,2 @@ -libperf-y += util/ -libperf-y += tests/ +perf-y += util/ +perf-y += tests/ diff --git a/tools/perf/arch/x86/tests/Build b/tools/perf/arch/x86/tests/Build index 586849ff83a0..3d83d0c6982d 100644 --- a/tools/perf/arch/x86/tests/Build +++ b/tools/perf/arch/x86/tests/Build @@ -1,8 +1,8 @@ -libperf-$(CONFIG_DWARF_UNWIND) += regs_load.o -libperf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o +perf-$(CONFIG_DWARF_UNWIND) += regs_load.o +perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o -libperf-y += arch-tests.o -libperf-y += rdpmc.o -libperf-y += perf-time-to-tsc.o -libperf-$(CONFIG_AUXTRACE) += insn-x86.o -libperf-$(CONFIG_X86_64) += bp-modify.o +perf-y += arch-tests.o +perf-y += rdpmc.o +perf-y += perf-time-to-tsc.o +perf-$(CONFIG_AUXTRACE) += insn-x86.o +perf-$(CONFIG_X86_64) += bp-modify.o diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build index 844b8f335532..7aab0be5fc5f 100644 --- a/tools/perf/arch/x86/util/Build +++ b/tools/perf/arch/x86/util/Build @@ -1,18 +1,18 @@ -libperf-y += header.o -libperf-y += tsc.o -libperf-y += pmu.o -libperf-y += kvm-stat.o -libperf-y += perf_regs.o -libperf-y += group.o -libperf-y += machine.o -libperf-y += event.o +perf-y += header.o +perf-y += tsc.o +perf-y += pmu.o +perf-y += kvm-stat.o +perf-y += perf_regs.o +perf-y += group.o +perf-y += machine.o +perf-y += event.o -libperf-$(CONFIG_DWARF) += dwarf-regs.o -libperf-$(CONFIG_BPF_PROLOGUE) += dwarf-regs.o +perf-$(CONFIG_DWARF) += dwarf-regs.o +perf-$(CONFIG_BPF_PROLOGUE) += dwarf-regs.o -libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o -libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o +perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o +perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o -libperf-$(CONFIG_AUXTRACE) += auxtrace.o -libperf-$(CONFIG_AUXTRACE) += intel-pt.o -libperf-$(CONFIG_AUXTRACE) += intel-bts.o +perf-$(CONFIG_AUXTRACE) += auxtrace.o +perf-$(CONFIG_AUXTRACE) += intel-pt.o +perf-$(CONFIG_AUXTRACE) += intel-bts.o diff --git a/tools/perf/arch/xtensa/Build b/tools/perf/arch/xtensa/Build index 54afe4a467e7..e4e5f33c84d8 100644 --- a/tools/perf/arch/xtensa/Build +++ b/tools/perf/arch/xtensa/Build @@ -1 +1 @@ -libperf-y += util/ +perf-y += util/ diff --git a/tools/perf/arch/xtensa/util/Build b/tools/perf/arch/xtensa/util/Build index 954e287bbb89..e813e618954b 100644 --- a/tools/perf/arch/xtensa/util/Build +++ b/tools/perf/arch/xtensa/util/Build @@ -1 +1 @@ -libperf-$(CONFIG_DWARF) += dwarf-regs.o +perf-$(CONFIG_DWARF) += dwarf-regs.o diff --git a/tools/perf/scripts/Build b/tools/perf/scripts/Build index 41efd7e368b3..68d4b54574ad 100644 --- a/tools/perf/scripts/Build +++ b/tools/perf/scripts/Build @@ -1,2 +1,2 @@ -libperf-$(CONFIG_LIBPERL) += perl/Perf-Trace-Util/ -libperf-$(CONFIG_LIBPYTHON) += python/Perf-Trace-Util/ +perf-$(CONFIG_LIBPERL) += perl/Perf-Trace-Util/ +perf-$(CONFIG_LIBPYTHON) += python/Perf-Trace-Util/ diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/Build b/tools/perf/scripts/perl/Perf-Trace-Util/Build index 34faecf774ae..db0036129307 100644 --- a/tools/perf/scripts/perl/Perf-Trace-Util/Build +++ b/tools/perf/scripts/perl/Perf-Trace-Util/Build @@ -1,4 +1,4 @@ -libperf-y += Context.o +perf-y += Context.o CFLAGS_Context.o += $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes CFLAGS_Context.o += -Wno-unused-parameter -Wno-nested-externs -Wno-undef diff --git a/tools/perf/scripts/python/Perf-Trace-Util/Build b/tools/perf/scripts/python/Perf-Trace-Util/Build index aefc15c9444a..7d0e33ce6aba 100644 --- a/tools/perf/scripts/python/Perf-Trace-Util/Build +++ b/tools/perf/scripts/python/Perf-Trace-Util/Build @@ -1,3 +1,3 @@ -libperf-y += Context.o +perf-y += Context.o CFLAGS_Context.o += $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs diff --git a/tools/perf/trace/beauty/Build b/tools/perf/trace/beauty/Build index 637365099b7d..85f328ddf897 100644 --- a/tools/perf/trace/beauty/Build +++ b/tools/perf/trace/beauty/Build @@ -1,15 +1,15 @@ -libperf-y += clone.o -libperf-y += fcntl.o -libperf-y += flock.o +perf-y += clone.o +perf-y += fcntl.o +perf-y += flock.o ifeq ($(SRCARCH),$(filter $(SRCARCH),x86)) -libperf-y += ioctl.o +perf-y += ioctl.o endif -libperf-y += kcmp.o -libperf-y += mount_flags.o -libperf-y += pkey_alloc.o -libperf-y += arch_prctl.o -libperf-y += prctl.o -libperf-y += renameat.o -libperf-y += sockaddr.o -libperf-y += socket.o -libperf-y += statx.o +perf-y += kcmp.o +perf-y += mount_flags.o +perf-y += pkey_alloc.o +perf-y += arch_prctl.o +perf-y += prctl.o +perf-y += renameat.o +perf-y += sockaddr.o +perf-y += socket.o +perf-y += statx.o diff --git a/tools/perf/ui/Build b/tools/perf/ui/Build index 0a73538c0441..3aff83c3275f 100644 --- a/tools/perf/ui/Build +++ b/tools/perf/ui/Build @@ -1,14 +1,14 @@ -libperf-y += setup.o -libperf-y += helpline.o -libperf-y += progress.o -libperf-y += util.o -libperf-y += hist.o -libperf-y += stdio/hist.o +perf-y += setup.o +perf-y += helpline.o +perf-y += progress.o +perf-y += util.o +perf-y += hist.o +perf-y += stdio/hist.o CFLAGS_setup.o += -DLIBDIR="BUILD_STR($(LIBDIR))" -libperf-$(CONFIG_SLANG) += browser.o -libperf-$(CONFIG_SLANG) += browsers/ -libperf-$(CONFIG_SLANG) += tui/ +perf-$(CONFIG_SLANG) += browser.o +perf-$(CONFIG_SLANG) += browsers/ +perf-$(CONFIG_SLANG) += tui/ CFLAGS_browser.o += -DENABLE_SLFUTURE_CONST diff --git a/tools/perf/ui/browsers/Build b/tools/perf/ui/browsers/Build index de223f5bed58..8fee56b46502 100644 --- a/tools/perf/ui/browsers/Build +++ b/tools/perf/ui/browsers/Build @@ -1,8 +1,8 @@ -libperf-y += annotate.o -libperf-y += hists.o -libperf-y += map.o -libperf-y += scripts.o -libperf-y += header.o +perf-y += annotate.o +perf-y += hists.o +perf-y += map.o +perf-y += scripts.o +perf-y += header.o CFLAGS_annotate.o += -DENABLE_SLFUTURE_CONST CFLAGS_hists.o += -DENABLE_SLFUTURE_CONST diff --git a/tools/perf/ui/tui/Build b/tools/perf/ui/tui/Build index 9e4c6ca41a9f..f916df33a1a7 100644 --- a/tools/perf/ui/tui/Build +++ b/tools/perf/ui/tui/Build @@ -1,4 +1,4 @@ -libperf-y += setup.o -libperf-y += util.o -libperf-y += helpline.o -libperf-y += progress.o +perf-y += setup.o +perf-y += util.o +perf-y += helpline.o +perf-y += progress.o diff --git a/tools/perf/util/Build b/tools/perf/util/Build index a36e6e5a6f4f..ca0741c91903 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -1,162 +1,162 @@ -libperf-y += annotate.o -libperf-y += block-range.o -libperf-y += build-id.o -libperf-y += config.o -libperf-y += ctype.o -libperf-y += db-export.o -libperf-y += env.o -libperf-y += event.o -libperf-y += evlist.o -libperf-y += evsel.o -libperf-y += evsel_fprintf.o -libperf-y += find_bit.o -libperf-y += get_current_dir_name.o -libperf-y += kallsyms.o -libperf-y += levenshtein.o -libperf-y += llvm-utils.o -libperf-y += mmap.o -libperf-y += memswap.o -libperf-y += parse-events.o -libperf-y += perf_regs.o -libperf-y += path.o -libperf-y += print_binary.o -libperf-y += rbtree.o -libperf-y += libstring.o -libperf-y += bitmap.o -libperf-y += hweight.o -libperf-y += smt.o -libperf-y += strbuf.o -libperf-y += string.o -libperf-y += strlist.o -libperf-y += strfilter.o -libperf-y += top.o -libperf-y += usage.o -libperf-y += dso.o -libperf-y += symbol.o -libperf-y += symbol_fprintf.o -libperf-y += color.o -libperf-y += color_config.o -libperf-y += metricgroup.o -libperf-y += header.o -libperf-y += callchain.o -libperf-y += values.o -libperf-y += debug.o -libperf-y += machine.o -libperf-y += map.o -libperf-y += pstack.o -libperf-y += session.o -libperf-y += sample-raw.o -libperf-y += s390-sample-raw.o -libperf-$(CONFIG_TRACE) += syscalltbl.o -libperf-y += ordered-events.o -libperf-y += namespaces.o -libperf-y += comm.o -libperf-y += thread.o -libperf-y += thread_map.o -libperf-y += trace-event-parse.o -libperf-y += parse-events-flex.o -libperf-y += parse-events-bison.o -libperf-y += pmu.o -libperf-y += pmu-flex.o -libperf-y += pmu-bison.o -libperf-y += trace-event-read.o -libperf-y += trace-event-info.o -libperf-y += trace-event-scripting.o -libperf-y += trace-event.o -libperf-y += svghelper.o -libperf-y += sort.o -libperf-y += hist.o -libperf-y += util.o -libperf-y += xyarray.o -libperf-y += cpumap.o -libperf-y += cgroup.o -libperf-y += target.o -libperf-y += rblist.o -libperf-y += intlist.o -libperf-y += vdso.o -libperf-y += counts.o -libperf-y += stat.o -libperf-y += stat-shadow.o -libperf-y += stat-display.o -libperf-y += record.o -libperf-y += srcline.o -libperf-y += srccode.o -libperf-y += data.o -libperf-y += tsc.o -libperf-y += cloexec.o -libperf-y += call-path.o -libperf-y += rwsem.o -libperf-y += thread-stack.o -libperf-$(CONFIG_AUXTRACE) += auxtrace.o -libperf-$(CONFIG_AUXTRACE) += intel-pt-decoder/ -libperf-$(CONFIG_AUXTRACE) += intel-pt.o -libperf-$(CONFIG_AUXTRACE) += intel-bts.o -libperf-$(CONFIG_AUXTRACE) += arm-spe.o -libperf-$(CONFIG_AUXTRACE) += arm-spe-pkt-decoder.o -libperf-$(CONFIG_AUXTRACE) += s390-cpumsf.o +perf-y += annotate.o +perf-y += block-range.o +perf-y += build-id.o +perf-y += config.o +perf-y += ctype.o +perf-y += db-export.o +perf-y += env.o +perf-y += event.o +perf-y += evlist.o +perf-y += evsel.o +perf-y += evsel_fprintf.o +perf-y += find_bit.o +perf-y += get_current_dir_name.o +perf-y += kallsyms.o +perf-y += levenshtein.o +perf-y += llvm-utils.o +perf-y += mmap.o +perf-y += memswap.o +perf-y += parse-events.o +perf-y += perf_regs.o +perf-y += path.o +perf-y += print_binary.o +perf-y += rbtree.o +perf-y += libstring.o +perf-y += bitmap.o +perf-y += hweight.o +perf-y += smt.o +perf-y += strbuf.o +perf-y += string.o +perf-y += strlist.o +perf-y += strfilter.o +perf-y += top.o +perf-y += usage.o +perf-y += dso.o +perf-y += symbol.o +perf-y += symbol_fprintf.o +perf-y += color.o +perf-y += color_config.o +perf-y += metricgroup.o +perf-y += header.o +perf-y += callchain.o +perf-y += values.o +perf-y += debug.o +perf-y += machine.o +perf-y += map.o +perf-y += pstack.o +perf-y += session.o +perf-y += sample-raw.o +perf-y += s390-sample-raw.o +perf-$(CONFIG_TRACE) += syscalltbl.o +perf-y += ordered-events.o +perf-y += namespaces.o +perf-y += comm.o +perf-y += thread.o +perf-y += thread_map.o +perf-y += trace-event-parse.o +perf-y += parse-events-flex.o +perf-y += parse-events-bison.o +perf-y += pmu.o +perf-y += pmu-flex.o +perf-y += pmu-bison.o +perf-y += trace-event-read.o +perf-y += trace-event-info.o +perf-y += trace-event-scripting.o +perf-y += trace-event.o +perf-y += svghelper.o +perf-y += sort.o +perf-y += hist.o +perf-y += util.o +perf-y += xyarray.o +perf-y += cpumap.o +perf-y += cgroup.o +perf-y += target.o +perf-y += rblist.o +perf-y += intlist.o +perf-y += vdso.o +perf-y += counts.o +perf-y += stat.o +perf-y += stat-shadow.o +perf-y += stat-display.o +perf-y += record.o +perf-y += srcline.o +perf-y += srccode.o +perf-y += data.o +perf-y += tsc.o +perf-y += cloexec.o +perf-y += call-path.o +perf-y += rwsem.o +perf-y += thread-stack.o +perf-$(CONFIG_AUXTRACE) += auxtrace.o +perf-$(CONFIG_AUXTRACE) += intel-pt-decoder/ +perf-$(CONFIG_AUXTRACE) += intel-pt.o +perf-$(CONFIG_AUXTRACE) += intel-bts.o +perf-$(CONFIG_AUXTRACE) += arm-spe.o +perf-$(CONFIG_AUXTRACE) += arm-spe-pkt-decoder.o +perf-$(CONFIG_AUXTRACE) += s390-cpumsf.o ifdef CONFIG_LIBOPENCSD -libperf-$(CONFIG_AUXTRACE) += cs-etm.o -libperf-$(CONFIG_AUXTRACE) += cs-etm-decoder/ +perf-$(CONFIG_AUXTRACE) += cs-etm.o +perf-$(CONFIG_AUXTRACE) += cs-etm-decoder/ endif -libperf-y += parse-branch-options.o -libperf-y += dump-insn.o -libperf-y += parse-regs-options.o -libperf-y += term.o -libperf-y += help-unknown-cmd.o -libperf-y += mem-events.o -libperf-y += vsprintf.o -libperf-y += units.o -libperf-y += time-utils.o -libperf-y += expr-bison.o -libperf-y += branch.o -libperf-y += mem2node.o - -libperf-$(CONFIG_LIBBPF) += bpf-loader.o -libperf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o -libperf-$(CONFIG_LIBELF) += symbol-elf.o -libperf-$(CONFIG_LIBELF) += probe-file.o -libperf-$(CONFIG_LIBELF) += probe-event.o +perf-y += parse-branch-options.o +perf-y += dump-insn.o +perf-y += parse-regs-options.o +perf-y += term.o +perf-y += help-unknown-cmd.o +perf-y += mem-events.o +perf-y += vsprintf.o +perf-y += units.o +perf-y += time-utils.o +perf-y += expr-bison.o +perf-y += branch.o +perf-y += mem2node.o + +perf-$(CONFIG_LIBBPF) += bpf-loader.o +perf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o +perf-$(CONFIG_LIBELF) += symbol-elf.o +perf-$(CONFIG_LIBELF) += probe-file.o +perf-$(CONFIG_LIBELF) += probe-event.o ifndef CONFIG_LIBELF -libperf-y += symbol-minimal.o +perf-y += symbol-minimal.o endif ifndef CONFIG_SETNS -libperf-y += setns.o +perf-y += setns.o endif -libperf-$(CONFIG_DWARF) += probe-finder.o -libperf-$(CONFIG_DWARF) += dwarf-aux.o -libperf-$(CONFIG_DWARF) += dwarf-regs.o +perf-$(CONFIG_DWARF) += probe-finder.o +perf-$(CONFIG_DWARF) += dwarf-aux.o +perf-$(CONFIG_DWARF) += dwarf-regs.o -libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o -libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind-local.o -libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o -libperf-$(CONFIG_LIBUNWIND_X86) += libunwind/x86_32.o -libperf-$(CONFIG_LIBUNWIND_AARCH64) += libunwind/arm64.o +perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o +perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind-local.o +perf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o +perf-$(CONFIG_LIBUNWIND_X86) += libunwind/x86_32.o +perf-$(CONFIG_LIBUNWIND_AARCH64) += libunwind/arm64.o -libperf-$(CONFIG_LIBBABELTRACE) += data-convert-bt.o +perf-$(CONFIG_LIBBABELTRACE) += data-convert-bt.o -libperf-y += scripting-engines/ +perf-y += scripting-engines/ -libperf-$(CONFIG_ZLIB) += zlib.o -libperf-$(CONFIG_LZMA) += lzma.o -libperf-y += demangle-java.o -libperf-y += demangle-rust.o +perf-$(CONFIG_ZLIB) += zlib.o +perf-$(CONFIG_LZMA) += lzma.o +perf-y += demangle-java.o +perf-y += demangle-rust.o ifdef CONFIG_JITDUMP -libperf-$(CONFIG_LIBELF) += jitdump.o -libperf-$(CONFIG_LIBELF) += genelf.o -libperf-$(CONFIG_DWARF) += genelf_debug.o +perf-$(CONFIG_LIBELF) += jitdump.o +perf-$(CONFIG_LIBELF) += genelf.o +perf-$(CONFIG_DWARF) += genelf_debug.o endif -libperf-y += perf-hooks.o +perf-y += perf-hooks.o -libperf-$(CONFIG_LIBBPF) += bpf-event.o +perf-$(CONFIG_LIBBPF) += bpf-event.o -libperf-$(CONFIG_CXX) += c++/ +perf-$(CONFIG_CXX) += c++/ CFLAGS_config.o += -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" CFLAGS_llvm-utils.o += -DPERF_INCLUDE_DIR="BUILD_STR($(perf_include_dir_SQ))" diff --git a/tools/perf/util/c++/Build b/tools/perf/util/c++/Build index 988fef1b11d7..613ecfd76527 100644 --- a/tools/perf/util/c++/Build +++ b/tools/perf/util/c++/Build @@ -1,2 +1,2 @@ -libperf-$(CONFIG_CLANGLLVM) += clang.o -libperf-$(CONFIG_CLANGLLVM) += clang-test.o +perf-$(CONFIG_CLANGLLVM) += clang.o +perf-$(CONFIG_CLANGLLVM) += clang-test.o diff --git a/tools/perf/util/cs-etm-decoder/Build b/tools/perf/util/cs-etm-decoder/Build index bc22c39c727f..216cb17a3322 100644 --- a/tools/perf/util/cs-etm-decoder/Build +++ b/tools/perf/util/cs-etm-decoder/Build @@ -1 +1 @@ -libperf-$(CONFIG_AUXTRACE) += cs-etm-decoder.o +perf-$(CONFIG_AUXTRACE) += cs-etm-decoder.o diff --git a/tools/perf/util/intel-pt-decoder/Build b/tools/perf/util/intel-pt-decoder/Build index 1b704fbea9de..23bf788f84b9 100644 --- a/tools/perf/util/intel-pt-decoder/Build +++ b/tools/perf/util/intel-pt-decoder/Build @@ -1,4 +1,4 @@ -libperf-$(CONFIG_AUXTRACE) += intel-pt-pkt-decoder.o intel-pt-insn-decoder.o intel-pt-log.o intel-pt-decoder.o +perf-$(CONFIG_AUXTRACE) += intel-pt-pkt-decoder.o intel-pt-insn-decoder.o intel-pt-log.o intel-pt-decoder.o inat_tables_script = util/intel-pt-decoder/gen-insn-attr-x86.awk inat_tables_maps = util/intel-pt-decoder/x86-opcode-map.txt diff --git a/tools/perf/util/scripting-engines/Build b/tools/perf/util/scripting-engines/Build index 82d28c67e0f3..7b342ce38d99 100644 --- a/tools/perf/util/scripting-engines/Build +++ b/tools/perf/util/scripting-engines/Build @@ -1,5 +1,5 @@ -libperf-$(CONFIG_LIBPERL) += trace-event-perl.o -libperf-$(CONFIG_LIBPYTHON) += trace-event-python.o +perf-$(CONFIG_LIBPERL) += trace-event-perl.o +perf-$(CONFIG_LIBPYTHON) += trace-event-python.o CFLAGS_trace-event-perl.o += $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-nested-externs -Wno-undef -Wno-switch-default -- cgit v1.2.3-55-g7522 From 714a92d83fd1e20461f53549cfbee77b20be5032 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 13 Feb 2019 13:32:40 +0100 Subject: perf tools: Fix legacy events symbol separator parsing Fixing legacy symbol events parsing. We can't support single slash separator, like 'cycles/u', because it conflicts with non empty terms, like 'cycles/period/u'. Keeping only '//' and ':' separator for these events: cycles//u cycles:k And removing '/' separator support, which is not working anymore. Also adding automated tests for above events. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20190213123246.4015-5-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/parse-events.c | 30 ++++++++++++++++++++++++++++++ tools/perf/util/parse-events.y | 4 ++-- 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index 3b97ac018d5a..4a69c07f4101 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -1330,6 +1330,26 @@ static int test__checkevent_complex_name(struct perf_evlist *evlist) return 0; } +static int test__sym_event_slash(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = perf_evlist__first(evlist); + + TEST_ASSERT_VAL("wrong type", evsel->attr.type == PERF_TYPE_HARDWARE); + TEST_ASSERT_VAL("wrong config", evsel->attr.config == PERF_COUNT_HW_CPU_CYCLES); + TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel); + return 0; +} + +static int test__sym_event_dc(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = perf_evlist__first(evlist); + + TEST_ASSERT_VAL("wrong type", evsel->attr.type == PERF_TYPE_HARDWARE); + TEST_ASSERT_VAL("wrong config", evsel->attr.config == PERF_COUNT_HW_CPU_CYCLES); + TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user); + return 0; +} + static int count_tracepoints(void) { struct dirent *events_ent; @@ -1670,6 +1690,16 @@ static struct evlist_test test__events[] = { .name = "cycles/name='COMPLEX_CYCLES_NAME:orig=cycles,desc=chip-clock-ticks'/Duk", .check = test__checkevent_complex_name, .id = 53 + }, + { + .name = "cycles//u", + .check = test__sym_event_slash, + .id = 54, + }, + { + .name = "cycles:k", + .check = test__sym_event_dc, + .id = 55, } }; diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index da8fe57691b8..44819bdb037d 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -311,7 +311,7 @@ value_sym '/' event_config '/' $$ = list; } | -value_sym sep_slash_dc +value_sym sep_slash_slash_dc { struct list_head *list; int type = $1 >> 16; @@ -702,7 +702,7 @@ PE_VALUE PE_ARRAY_RANGE PE_VALUE sep_dc: ':' | -sep_slash_dc: '/' | ':' | +sep_slash_slash_dc: '/' '/' | ':' | %% -- cgit v1.2.3-55-g7522 From 33bbc571ed79cace481fae4031b80a51d93ae997 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 13 Feb 2019 13:32:41 +0100 Subject: perf list: Display metric expressions for --details option Display metric expression itself when --details is specified. Current list with no details: # perf list metrics ... TopDownL1: IPC [Instructions Per Cycle (per logical thread)] SLOTS [Total issue-pipeline slots] ... Detailed output with metric formula: # perf list --details metrics ... TopDownL1: IPC [Instructions Per Cycle (per logical thread)] [inst_retired.any / cpu_clk_unhalted.thread] SLOTS [Total issue-pipeline slots] [4*(( cpu_clk_unhalted.thread_any / 2 ) if #smt_on else cycles)] ... Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20190213123246.4015-6-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-list.c | 8 ++++---- tools/perf/util/metricgroup.c | 8 +++++++- tools/perf/util/metricgroup.h | 3 ++- tools/perf/util/parse-events.c | 2 +- 4 files changed, 14 insertions(+), 7 deletions(-) diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index ead221e49f00..c9f98d00c0e9 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -82,9 +82,9 @@ int cmd_list(int argc, const char **argv) else if (strcmp(argv[i], "sdt") == 0) print_sdt_events(NULL, NULL, raw_dump); else if (strcmp(argv[i], "metric") == 0) - metricgroup__print(true, false, NULL, raw_dump); + metricgroup__print(true, false, NULL, raw_dump, details_flag); else if (strcmp(argv[i], "metricgroup") == 0) - metricgroup__print(false, true, NULL, raw_dump); + metricgroup__print(false, true, NULL, raw_dump, details_flag); else if ((sep = strchr(argv[i], ':')) != NULL) { int sep_idx; @@ -102,7 +102,7 @@ int cmd_list(int argc, const char **argv) s[sep_idx] = '\0'; print_tracepoint_events(s, s + sep_idx + 1, raw_dump); print_sdt_events(s, s + sep_idx + 1, raw_dump); - metricgroup__print(true, true, s, raw_dump); + metricgroup__print(true, true, s, raw_dump, details_flag); free(s); } else { if (asprintf(&s, "*%s*", argv[i]) < 0) { @@ -119,7 +119,7 @@ int cmd_list(int argc, const char **argv) details_flag); print_tracepoint_events(NULL, s, raw_dump); print_sdt_events(NULL, s, raw_dump); - metricgroup__print(true, true, NULL, raw_dump); + metricgroup__print(true, true, NULL, raw_dump, details_flag); free(s); } } diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 8529cbd3955b..b8d864ed4afe 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -270,7 +270,7 @@ static void metricgroup__print_strlist(struct strlist *metrics, bool raw) } void metricgroup__print(bool metrics, bool metricgroups, char *filter, - bool raw) + bool raw, bool details) { struct pmu_events_map *map = perf_pmu__find_map(NULL); struct pmu_event *pe; @@ -329,6 +329,12 @@ void metricgroup__print(bool metrics, bool metricgroups, char *filter, if (asprintf(&s, "%s\n%*s%s]", pe->metric_name, 8, "[", pe->desc) < 0) return; + + if (details) { + if (asprintf(&s, "%s\n%*s%s]", + s, 8, "[", pe->metric_expr) < 0) + return; + } } if (!s) diff --git a/tools/perf/util/metricgroup.h b/tools/perf/util/metricgroup.h index 8a155dba0581..5c52097a5c63 100644 --- a/tools/perf/util/metricgroup.h +++ b/tools/perf/util/metricgroup.h @@ -27,6 +27,7 @@ int metricgroup__parse_groups(const struct option *opt, const char *str, struct rblist *metric_events); -void metricgroup__print(bool metrics, bool groups, char *filter, bool raw); +void metricgroup__print(bool metrics, bool groups, char *filter, + bool raw, bool details); bool metricgroup__has_metric(const char *metric); #endif diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 920e1e6551dd..4dcc01b2532c 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -2540,7 +2540,7 @@ void print_events(const char *event_glob, bool name_only, bool quiet_flag, print_sdt_events(NULL, NULL, name_only); - metricgroup__print(true, true, NULL, name_only); + metricgroup__print(true, true, NULL, name_only, details_flag); } int parse_events__is_hardcoded_term(struct parse_events_term *term) -- cgit v1.2.3-55-g7522 From a9aeb87b98badb55ec28dfcae8a8ce127d6208f5 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 13 Feb 2019 13:32:43 +0100 Subject: perf header: Get rid of write_it label Simplifying the code a bit. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20190213123246.4015-8-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/header.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index dec6d218c31c..f1508adefc16 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1042,11 +1042,9 @@ static int write_cpuid(struct feat_fd *ff, int ret; ret = get_cpuid(buffer, sizeof(buffer)); - if (!ret) - goto write_it; + if (ret) + return -1; - return -1; -write_it: return do_write_string(ff, buffer); } -- cgit v1.2.3-55-g7522 From aa4df30db52293c146b1851b88021d0bc2dd3b89 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 13 Feb 2019 13:32:44 +0100 Subject: perf header: Remove unused 'cpu_nr' field from 'struct cpu_topo' Not used at all. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20190213123246.4015-9-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/header.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index f1508adefc16..61ce197c5362 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -563,7 +563,6 @@ static int write_cmdline(struct feat_fd *ff, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list" struct cpu_topo { - u32 cpu_nr; u32 core_sib; u32 thread_sib; char **core_siblings; @@ -679,7 +678,6 @@ static struct cpu_topo *build_cpu_topology(void) goto out_free; tp = addr; - tp->cpu_nr = nr; addr += sizeof(*tp); tp->core_siblings = addr; addr += sz; -- cgit v1.2.3-55-g7522 From 44ec8396e2dadf0f0806767642cfac9e04d0d5b1 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 14 Feb 2019 13:02:01 -0300 Subject: tools build feature sched_getcpu: Undef _GNU_SOURCE at the end Since this feature test is included in test-all.c, the feature detection fast path compile/link phase, it can't leave any defines behind, as it can affect the tests included after it, so remove it. Reported-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexei Starovoitov Cc: Andrii Nakryiko Cc: Daniel Borkmann Cc: Jakub Kicinski Cc: Jin Yao Cc: Namhyung Kim Cc: Song Liu Cc: Yonghong Song Link: https://lkml.kernel.org/n/tip-lg3kpd9tzypc797vb1f42u6k@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/feature/test-sched_getcpu.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/build/feature/test-sched_getcpu.c b/tools/build/feature/test-sched_getcpu.c index e448deb4124c..48995ac7911e 100644 --- a/tools/build/feature/test-sched_getcpu.c +++ b/tools/build/feature/test-sched_getcpu.c @@ -8,3 +8,5 @@ int main(void) { return sched_getcpu(); } + +#undef _GNU_SOURCE -- cgit v1.2.3-55-g7522