diff options
author | Richard Henderson | 2021-11-04 13:33:46 +0100 |
---|---|---|
committer | Richard Henderson | 2021-11-04 13:33:46 +0100 |
commit | 18e356a53a2926a15343b914db64324d63748f25 (patch) | |
tree | b4da84fdfec10103fa82301f5dfb4e06b94430c5 /contrib/plugins/cache.c | |
parent | Merge remote-tracking branch 'remotes/quic/tags/pull-hex-20211103' into staging (diff) | |
parent | tests/vm/openbsd: Update to release 7.0 (diff) | |
download | qemu-18e356a53a2926a15343b914db64324d63748f25.tar.gz qemu-18e356a53a2926a15343b914db64324d63748f25.tar.xz qemu-18e356a53a2926a15343b914db64324d63748f25.zip |
Merge remote-tracking branch 'remotes/stsquad/tags/pull-for-6.2-041121-2' into staging
Testing, gdbstub and plugin updates for 6.2
- add microblaze and nios2 compiler docker images
- fix test cross compiler detection for some targets
- don't try and link ebf to user targets
- add L2 tracking to cache plugin
- exit cleanly on C-a x
- clean up debug output in check-tcg
- switch to thread on break in gdbstub
- update openbsd VM to 7.0
# gpg: Signature made Thu 04 Nov 2021 08:14:35 AM EDT
# gpg: using RSA key 6685AE99E75167BCAFC8DF35FBD0DB095A9E2A44
# gpg: Good signature from "Alex Bennée (Master Work Key) <alex.bennee@linaro.org>" [full]
* remotes/stsquad/tags/pull-for-6.2-041121-2:
tests/vm/openbsd: Update to release 7.0
tests/tcg: remove debug polluting make output
gdbstub: Switch to the thread receiving a signal
tests/tcg: remove duplicate EXTRA_RUNS
plugins: try and make plugin_insn_append more ergonomic
tests/plugins: extend the insn plugin to track opcode sizes
chardev: don't exit() straight away on C-a x
docs/tcg-plugins: add L2 arguments to cache docs
plugins/cache: make L2 emulation optional through args
plugins/cache: split command line arguments into name and value
plugins/cache: implement unified L2 cache emulation
plugins/cache: freed heap-allocated mutexes
ebpf: really include it only in system emulators
tests/tcg: enable debian-nios2-cross for test building
tests/docker: split PARTIAL into PARTIAL and VIRTUAL images
tests/tcg: Fix some targets default cross compiler path
tests/tcg: Enable container_cross_cc for microblaze
tests/docker: Add debian-microblaze-cross image
tests/docker: Add debian-nios2-cross image
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Diffstat (limited to 'contrib/plugins/cache.c')
-rw-r--r-- | contrib/plugins/cache.c | 318 |
1 files changed, 225 insertions, 93 deletions
diff --git a/contrib/plugins/cache.c b/contrib/plugins/cache.c index a1e03ca882..b9226e7c40 100644 --- a/contrib/plugins/cache.c +++ b/contrib/plugins/cache.c @@ -11,6 +11,8 @@ #include <qemu-plugin.h> +#define STRTOLL(x) g_ascii_strtoll(x, NULL, 10) + QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION; static enum qemu_plugin_mem_rw rw = QEMU_PLUGIN_MEM_RW; @@ -82,8 +84,9 @@ typedef struct { char *disas_str; const char *symbol; uint64_t addr; - uint64_t dmisses; - uint64_t imisses; + uint64_t l1_dmisses; + uint64_t l1_imisses; + uint64_t l2_misses; } InsnData; void (*update_hit)(Cache *cache, int set, int blk); @@ -93,15 +96,22 @@ void (*metadata_init)(Cache *cache); void (*metadata_destroy)(Cache *cache); static int cores; -static Cache **dcaches, **icaches; +static Cache **l1_dcaches, **l1_icaches; + +static bool use_l2; +static Cache **l2_ucaches; -static GMutex *dcache_locks; -static GMutex *icache_locks; +static GMutex *l1_dcache_locks; +static GMutex *l1_icache_locks; +static GMutex *l2_ucache_locks; -static uint64_t all_dmem_accesses; -static uint64_t all_imem_accesses; -static uint64_t all_imisses; -static uint64_t all_dmisses; +static uint64_t l1_dmem_accesses; +static uint64_t l1_imem_accesses; +static uint64_t l1_imisses; +static uint64_t l1_dmisses; + +static uint64_t l2_mem_accesses; +static uint64_t l2_misses; static int pow_of_two(int num) { @@ -382,6 +392,7 @@ static void vcpu_mem_access(unsigned int vcpu_index, qemu_plugin_meminfo_t info, struct qemu_plugin_hwaddr *hwaddr; int cache_idx; InsnData *insn; + bool hit_in_l1; hwaddr = qemu_plugin_get_hwaddr(info, vaddr); if (hwaddr && qemu_plugin_hwaddr_is_io(hwaddr)) { @@ -391,14 +402,29 @@ static void vcpu_mem_access(unsigned int vcpu_index, qemu_plugin_meminfo_t info, effective_addr = hwaddr ? qemu_plugin_hwaddr_phys_addr(hwaddr) : vaddr; cache_idx = vcpu_index % cores; - g_mutex_lock(&dcache_locks[cache_idx]); - if (!access_cache(dcaches[cache_idx], effective_addr)) { + g_mutex_lock(&l1_dcache_locks[cache_idx]); + hit_in_l1 = access_cache(l1_dcaches[cache_idx], effective_addr); + if (!hit_in_l1) { + insn = (InsnData *) userdata; + __atomic_fetch_add(&insn->l1_dmisses, 1, __ATOMIC_SEQ_CST); + l1_dcaches[cache_idx]->misses++; + } + l1_dcaches[cache_idx]->accesses++; + g_mutex_unlock(&l1_dcache_locks[cache_idx]); + + if (hit_in_l1 || !use_l2) { + /* No need to access L2 */ + return; + } + + g_mutex_lock(&l2_ucache_locks[cache_idx]); + if (!access_cache(l2_ucaches[cache_idx], effective_addr)) { insn = (InsnData *) userdata; - __atomic_fetch_add(&insn->dmisses, 1, __ATOMIC_SEQ_CST); - dcaches[cache_idx]->misses++; + __atomic_fetch_add(&insn->l2_misses, 1, __ATOMIC_SEQ_CST); + l2_ucaches[cache_idx]->misses++; } - dcaches[cache_idx]->accesses++; - g_mutex_unlock(&dcache_locks[cache_idx]); + l2_ucaches[cache_idx]->accesses++; + g_mutex_unlock(&l2_ucache_locks[cache_idx]); } static void vcpu_insn_exec(unsigned int vcpu_index, void *userdata) @@ -406,18 +432,34 @@ static void vcpu_insn_exec(unsigned int vcpu_index, void *userdata) uint64_t insn_addr; InsnData *insn; int cache_idx; + bool hit_in_l1; insn_addr = ((InsnData *) userdata)->addr; cache_idx = vcpu_index % cores; - g_mutex_lock(&icache_locks[cache_idx]); - if (!access_cache(icaches[cache_idx], insn_addr)) { + g_mutex_lock(&l1_icache_locks[cache_idx]); + hit_in_l1 = access_cache(l1_icaches[cache_idx], insn_addr); + if (!hit_in_l1) { + insn = (InsnData *) userdata; + __atomic_fetch_add(&insn->l1_imisses, 1, __ATOMIC_SEQ_CST); + l1_icaches[cache_idx]->misses++; + } + l1_icaches[cache_idx]->accesses++; + g_mutex_unlock(&l1_icache_locks[cache_idx]); + + if (hit_in_l1 || !use_l2) { + /* No need to access L2 */ + return; + } + + g_mutex_lock(&l2_ucache_locks[cache_idx]); + if (!access_cache(l2_ucaches[cache_idx], insn_addr)) { insn = (InsnData *) userdata; - __atomic_fetch_add(&insn->imisses, 1, __ATOMIC_SEQ_CST); - icaches[cache_idx]->misses++; + __atomic_fetch_add(&insn->l2_misses, 1, __ATOMIC_SEQ_CST); + l2_ucaches[cache_idx]->misses++; } - icaches[cache_idx]->accesses++; - g_mutex_unlock(&icache_locks[cache_idx]); + l2_ucaches[cache_idx]->accesses++; + g_mutex_unlock(&l2_ucache_locks[cache_idx]); } static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb) @@ -493,30 +535,34 @@ static void caches_free(Cache **caches) } } -static int dcmp(gconstpointer a, gconstpointer b) +static void append_stats_line(GString *line, uint64_t l1_daccess, + uint64_t l1_dmisses, uint64_t l1_iaccess, + uint64_t l1_imisses, uint64_t l2_access, + uint64_t l2_misses) { - InsnData *insn_a = (InsnData *) a; - InsnData *insn_b = (InsnData *) b; + double l1_dmiss_rate, l1_imiss_rate, l2_miss_rate; - return insn_a->dmisses < insn_b->dmisses ? 1 : -1; -} + l1_dmiss_rate = ((double) l1_dmisses) / (l1_daccess) * 100.0; + l1_imiss_rate = ((double) l1_imisses) / (l1_iaccess) * 100.0; -static void append_stats_line(GString *line, uint64_t daccess, uint64_t dmisses, - uint64_t iaccess, uint64_t imisses) -{ - double dmiss_rate, imiss_rate; + g_string_append_printf(line, "%-14lu %-12lu %9.4lf%% %-14lu %-12lu" + " %9.4lf%%", + l1_daccess, + l1_dmisses, + l1_daccess ? l1_dmiss_rate : 0.0, + l1_iaccess, + l1_imisses, + l1_iaccess ? l1_imiss_rate : 0.0); - dmiss_rate = ((double) dmisses) / (daccess) * 100.0; - imiss_rate = ((double) imisses) / (iaccess) * 100.0; + if (use_l2) { + l2_miss_rate = ((double) l2_misses) / (l2_access) * 100.0; + g_string_append_printf(line, " %-12lu %-11lu %10.4lf%%", + l2_access, + l2_misses, + l2_access ? l2_miss_rate : 0.0); + } - g_string_append_printf(line, "%-14lu %-12lu %9.4lf%% %-14lu %-12lu" - " %9.4lf%%\n", - daccess, - dmisses, - daccess ? dmiss_rate : 0.0, - iaccess, - imisses, - iaccess ? imiss_rate : 0.0); + g_string_append(line, "\n"); } static void sum_stats(void) @@ -525,43 +571,74 @@ static void sum_stats(void) g_assert(cores > 1); for (i = 0; i < cores; i++) { - all_imisses += icaches[i]->misses; - all_dmisses += dcaches[i]->misses; - all_imem_accesses += icaches[i]->accesses; - all_dmem_accesses += dcaches[i]->accesses; + l1_imisses += l1_icaches[i]->misses; + l1_dmisses += l1_dcaches[i]->misses; + l1_imem_accesses += l1_icaches[i]->accesses; + l1_dmem_accesses += l1_dcaches[i]->accesses; + + if (use_l2) { + l2_misses += l2_ucaches[i]->misses; + l2_mem_accesses += l2_ucaches[i]->accesses; + } } } +static int dcmp(gconstpointer a, gconstpointer b) +{ + InsnData *insn_a = (InsnData *) a; + InsnData *insn_b = (InsnData *) b; + + return insn_a->l1_dmisses < insn_b->l1_dmisses ? 1 : -1; +} + static int icmp(gconstpointer a, gconstpointer b) { InsnData *insn_a = (InsnData *) a; InsnData *insn_b = (InsnData *) b; - return insn_a->imisses < insn_b->imisses ? 1 : -1; + return insn_a->l1_imisses < insn_b->l1_imisses ? 1 : -1; +} + +static int l2_cmp(gconstpointer a, gconstpointer b) +{ + InsnData *insn_a = (InsnData *) a; + InsnData *insn_b = (InsnData *) b; + + return insn_a->l2_misses < insn_b->l2_misses ? 1 : -1; } static void log_stats(void) { int i; - Cache *icache, *dcache; + Cache *icache, *dcache, *l2_cache; g_autoptr(GString) rep = g_string_new("core #, data accesses, data misses," " dmiss rate, insn accesses," - " insn misses, imiss rate\n"); + " insn misses, imiss rate"); + + if (use_l2) { + g_string_append(rep, ", l2 accesses, l2 misses, l2 miss rate"); + } + + g_string_append(rep, "\n"); for (i = 0; i < cores; i++) { g_string_append_printf(rep, "%-8d", i); - dcache = dcaches[i]; - icache = icaches[i]; + dcache = l1_dcaches[i]; + icache = l1_icaches[i]; + l2_cache = use_l2 ? l2_ucaches[i] : NULL; append_stats_line(rep, dcache->accesses, dcache->misses, - icache->accesses, icache->misses); + icache->accesses, icache->misses, + l2_cache ? l2_cache->accesses : 0, + l2_cache ? l2_cache->misses : 0); } if (cores > 1) { sum_stats(); g_string_append_printf(rep, "%-8s", "sum"); - append_stats_line(rep, all_dmem_accesses, all_dmisses, - all_imem_accesses, all_imisses); + append_stats_line(rep, l1_dmem_accesses, l1_dmisses, + l1_imem_accesses, l1_imisses, + l2_cache ? l2_mem_accesses : 0, l2_cache ? l2_misses : 0); } g_string_append(rep, "\n"); @@ -585,7 +662,7 @@ static void log_top_insns(void) if (insn->symbol) { g_string_append_printf(rep, " (%s)", insn->symbol); } - g_string_append_printf(rep, ", %ld, %s\n", insn->dmisses, + g_string_append_printf(rep, ", %ld, %s\n", insn->l1_dmisses, insn->disas_str); } @@ -598,10 +675,28 @@ static void log_top_insns(void) if (insn->symbol) { g_string_append_printf(rep, " (%s)", insn->symbol); } - g_string_append_printf(rep, ", %ld, %s\n", insn->imisses, + g_string_append_printf(rep, ", %ld, %s\n", insn->l1_imisses, insn->disas_str); } + if (!use_l2) { + goto finish; + } + + miss_insns = g_list_sort(miss_insns, l2_cmp); + g_string_append_printf(rep, "%s", "\naddress, L2 misses, instruction\n"); + + for (curr = miss_insns, i = 0; curr && i < limit; i++, curr = curr->next) { + insn = (InsnData *) curr->data; + g_string_append_printf(rep, "0x%" PRIx64, insn->addr); + if (insn->symbol) { + g_string_append_printf(rep, " (%s)", insn->symbol); + } + g_string_append_printf(rep, ", %ld, %s\n", insn->l2_misses, + insn->disas_str); + } + +finish: qemu_plugin_outs(rep->str); g_list_free(miss_insns); } @@ -611,8 +706,16 @@ static void plugin_exit(qemu_plugin_id_t id, void *p) log_stats(); log_top_insns(); - caches_free(dcaches); - caches_free(icaches); + caches_free(l1_dcaches); + caches_free(l1_icaches); + + g_free(l1_dcache_locks); + g_free(l1_icache_locks); + + if (use_l2) { + caches_free(l2_ucaches); + g_free(l2_ucache_locks); + } g_hash_table_destroy(miss_ht); } @@ -644,19 +747,24 @@ int qemu_plugin_install(qemu_plugin_id_t id, const qemu_info_t *info, int argc, char **argv) { int i; - int iassoc, iblksize, icachesize; - int dassoc, dblksize, dcachesize; + int l1_iassoc, l1_iblksize, l1_icachesize; + int l1_dassoc, l1_dblksize, l1_dcachesize; + int l2_assoc, l2_blksize, l2_cachesize; limit = 32; sys = info->system_emulation; - dassoc = 8; - dblksize = 64; - dcachesize = dblksize * dassoc * 32; + l1_dassoc = 8; + l1_dblksize = 64; + l1_dcachesize = l1_dblksize * l1_dassoc * 32; - iassoc = 8; - iblksize = 64; - icachesize = iblksize * iassoc * 32; + l1_iassoc = 8; + l1_iblksize = 64; + l1_icachesize = l1_iblksize * l1_iassoc * 32; + + l2_assoc = 16; + l2_blksize = 64; + l2_cachesize = l2_assoc * l2_blksize * 2048; policy = LRU; @@ -664,29 +772,44 @@ int qemu_plugin_install(qemu_plugin_id_t id, const qemu_info_t *info, for (i = 0; i < argc; i++) { char *opt = argv[i]; - if (g_str_has_prefix(opt, "iblksize=")) { - iblksize = g_ascii_strtoll(opt + 9, NULL, 10); - } else if (g_str_has_prefix(opt, "iassoc=")) { - iassoc = g_ascii_strtoll(opt + 7, NULL, 10); - } else if (g_str_has_prefix(opt, "icachesize=")) { - icachesize = g_ascii_strtoll(opt + 11, NULL, 10); - } else if (g_str_has_prefix(opt, "dblksize=")) { - dblksize = g_ascii_strtoll(opt + 9, NULL, 10); - } else if (g_str_has_prefix(opt, "dassoc=")) { - dassoc = g_ascii_strtoll(opt + 7, NULL, 10); - } else if (g_str_has_prefix(opt, "dcachesize=")) { - dcachesize = g_ascii_strtoll(opt + 11, NULL, 10); - } else if (g_str_has_prefix(opt, "limit=")) { - limit = g_ascii_strtoll(opt + 6, NULL, 10); - } else if (g_str_has_prefix(opt, "cores=")) { - cores = g_ascii_strtoll(opt + 6, NULL, 10); - } else if (g_str_has_prefix(opt, "evict=")) { - gchar *p = opt + 6; - if (g_strcmp0(p, "rand") == 0) { + g_autofree char **tokens = g_strsplit(opt, "=", 2); + + if (g_strcmp0(tokens[0], "iblksize") == 0) { + l1_iblksize = STRTOLL(tokens[1]); + } else if (g_strcmp0(tokens[0], "iassoc") == 0) { + l1_iassoc = STRTOLL(tokens[1]); + } else if (g_strcmp0(tokens[0], "icachesize") == 0) { + l1_icachesize = STRTOLL(tokens[1]); + } else if (g_strcmp0(tokens[0], "dblksize") == 0) { + l1_dblksize = STRTOLL(tokens[1]); + } else if (g_strcmp0(tokens[0], "dassoc") == 0) { + l1_dassoc = STRTOLL(tokens[1]); + } else if (g_strcmp0(tokens[0], "dcachesize") == 0) { + l1_dcachesize = STRTOLL(tokens[1]); + } else if (g_strcmp0(tokens[0], "limit") == 0) { + limit = STRTOLL(tokens[1]); + } else if (g_strcmp0(tokens[0], "cores") == 0) { + cores = STRTOLL(tokens[1]); + } else if (g_strcmp0(tokens[0], "l2cachesize") == 0) { + use_l2 = true; + l2_cachesize = STRTOLL(tokens[1]); + } else if (g_strcmp0(tokens[0], "l2blksize") == 0) { + use_l2 = true; + l2_blksize = STRTOLL(tokens[1]); + } else if (g_strcmp0(tokens[0], "l2assoc") == 0) { + use_l2 = true; + l2_assoc = STRTOLL(tokens[1]); + } else if (g_strcmp0(tokens[0], "l2") == 0) { + if (!qemu_plugin_bool_parse(tokens[0], tokens[1], &use_l2)) { + fprintf(stderr, "boolean argument parsing failed: %s\n", opt); + return -1; + } + } else if (g_strcmp0(tokens[0], "evict") == 0) { + if (g_strcmp0(tokens[1], "rand") == 0) { policy = RAND; - } else if (g_strcmp0(p, "lru") == 0) { + } else if (g_strcmp0(tokens[1], "lru") == 0) { policy = LRU; - } else if (g_strcmp0(p, "fifo") == 0) { + } else if (g_strcmp0(tokens[1], "fifo") == 0) { policy = FIFO; } else { fprintf(stderr, "invalid eviction policy: %s\n", opt); @@ -700,24 +823,33 @@ int qemu_plugin_install(qemu_plugin_id_t id, const qemu_info_t *info, policy_init(); - dcaches = caches_init(dblksize, dassoc, dcachesize); - if (!dcaches) { - const char *err = cache_config_error(dblksize, dassoc, dcachesize); + l1_dcaches = caches_init(l1_dblksize, l1_dassoc, l1_dcachesize); + if (!l1_dcaches) { + const char *err = cache_config_error(l1_dblksize, l1_dassoc, l1_dcachesize); fprintf(stderr, "dcache cannot be constructed from given parameters\n"); fprintf(stderr, "%s\n", err); return -1; } - icaches = caches_init(iblksize, iassoc, icachesize); - if (!icaches) { - const char *err = cache_config_error(iblksize, iassoc, icachesize); + l1_icaches = caches_init(l1_iblksize, l1_iassoc, l1_icachesize); + if (!l1_icaches) { + const char *err = cache_config_error(l1_iblksize, l1_iassoc, l1_icachesize); fprintf(stderr, "icache cannot be constructed from given parameters\n"); fprintf(stderr, "%s\n", err); return -1; } - dcache_locks = g_new0(GMutex, cores); - icache_locks = g_new0(GMutex, cores); + l2_ucaches = use_l2 ? caches_init(l2_blksize, l2_assoc, l2_cachesize) : NULL; + if (!l2_ucaches && use_l2) { + const char *err = cache_config_error(l2_blksize, l2_assoc, l2_cachesize); + fprintf(stderr, "L2 cache cannot be constructed from given parameters\n"); + fprintf(stderr, "%s\n", err); + return -1; + } + + l1_dcache_locks = g_new0(GMutex, cores); + l1_icache_locks = g_new0(GMutex, cores); + l2_ucache_locks = use_l2 ? g_new0(GMutex, cores) : NULL; qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans); qemu_plugin_register_atexit_cb(id, plugin_exit, NULL); |