From be2cdc5e352eb28b4ff631f053a261d91e6af78e Mon Sep 17 00:00:00 2001 From: Emilio G. Cota Date: Wed, 26 Jul 2017 16:58:05 -0400 Subject: tcg: track TBs with per-region BST's This paves the way for enabling scalable parallel generation of TCG code. Instead of tracking TBs with a single binary search tree (BST), use a BST for each TCG region, protecting it with a lock. This is as scalable as it gets, since each TCG thread operates on a separate region. The core of this change is the introduction of struct tcg_region_tree, which contains a pointer to a GTree and an associated lock to serialize accesses to it. We then allocate an array of tcg_region_tree's, adding the appropriate padding to avoid false sharing based on qemu_dcache_linesize. Given a tc_ptr, we first find the corresponding region_tree. This is done by special-casing the first and last regions first, since they might be of size != region.size; otherwise we just divide the offset by region.stride. I was worried about this division (several dozen cycles of latency), but profiling shows that this is not a fast path. Note that region.stride is not required to be a power of two; it is only required to be a multiple of the host's page size. Note that with this design we can also provide consistent snapshots about all region trees at once; for instance, tcg_tb_foreach acquires/releases all region_tree locks before/after iterating over them. For this reason we now drop tb_lock in dump_exec_info(). As an alternative I considered implementing a concurrent BST, but this can be tricky to get right, offers no consistent snapshots of the BST, and performance and scalability-wise I don't think it could ever beat having separate GTrees, given that our workload is insert-mostly (all concurrent BST designs I've seen focus, understandably, on making lookups fast, which comes at the expense of convoluted, non-wait-free insertions/removals). Reviewed-by: Richard Henderson Reviewed-by: Alex Bennée Signed-off-by: Emilio G. Cota Signed-off-by: Richard Henderson --- include/exec/exec-all.h | 1 - include/exec/tb-context.h | 1 - 2 files changed, 2 deletions(-) (limited to 'include/exec') diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index 8bbea787a9..8d4306ac25 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -405,7 +405,6 @@ static inline uint32_t curr_cflags(void) | (use_icount ? CF_USE_ICOUNT : 0); } -void tb_remove(TranslationBlock *tb); void tb_flush(CPUState *cpu); void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr); TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc, diff --git a/include/exec/tb-context.h b/include/exec/tb-context.h index 1d41202485..d8472c88fb 100644 --- a/include/exec/tb-context.h +++ b/include/exec/tb-context.h @@ -31,7 +31,6 @@ typedef struct TBContext TBContext; struct TBContext { - GTree *tb_tree; struct qht htable; /* any access to the tbs or the page table must use this lock */ QemuMutex tb_lock; -- cgit v1.2.3-55-g7522 From 128ed2278c4e6ad063f101c5dda7999b43f2d8a3 Mon Sep 17 00:00:00 2001 From: Emilio G. Cota Date: Tue, 1 Aug 2017 15:11:12 -0400 Subject: tcg: move tb_ctx.tb_phys_invalidate_count to tcg_ctx Thereby making it per-TCGContext. Once we remove tb_lock, this will avoid an atomic increment every time a TB is invalidated. Reviewed-by: Richard Henderson Reviewed-by: Alex Bennée Signed-off-by: Emilio G. Cota Signed-off-by: Richard Henderson --- accel/tcg/translate-all.c | 5 +++-- include/exec/tb-context.h | 1 - tcg/tcg.c | 14 ++++++++++++++ tcg/tcg.h | 3 +++ 4 files changed, 20 insertions(+), 3 deletions(-) (limited to 'include/exec') diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index ef841c82cc..a9f2bfb468 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -1069,7 +1069,8 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr) /* suppress any remaining jumps to this TB */ tb_jmp_unlink(tb); - tb_ctx.tb_phys_invalidate_count++; + atomic_set(&tcg_ctx->tb_phys_invalidate_count, + tcg_ctx->tb_phys_invalidate_count + 1); } #ifdef CONFIG_SOFTMMU @@ -1855,7 +1856,7 @@ void dump_exec_info(FILE *f, fprintf_function cpu_fprintf) cpu_fprintf(f, "\nStatistics:\n"); cpu_fprintf(f, "TB flush count %u\n", atomic_read(&tb_ctx.tb_flush_count)); - cpu_fprintf(f, "TB invalidate count %d\n", tb_ctx.tb_phys_invalidate_count); + cpu_fprintf(f, "TB invalidate count %zu\n", tcg_tb_phys_invalidate_count()); cpu_fprintf(f, "TLB flush count %zu\n", tlb_flush_count()); tcg_dump_info(f, cpu_fprintf); } diff --git a/include/exec/tb-context.h b/include/exec/tb-context.h index d8472c88fb..8c9b49c98e 100644 --- a/include/exec/tb-context.h +++ b/include/exec/tb-context.h @@ -37,7 +37,6 @@ struct TBContext { /* statistics */ unsigned tb_flush_count; - int tb_phys_invalidate_count; }; extern TBContext tb_ctx; diff --git a/tcg/tcg.c b/tcg/tcg.c index 62e3391020..1d1dfd7f7c 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -791,6 +791,20 @@ size_t tcg_code_capacity(void) return capacity; } +size_t tcg_tb_phys_invalidate_count(void) +{ + unsigned int n_ctxs = atomic_read(&n_tcg_ctxs); + unsigned int i; + size_t total = 0; + + for (i = 0; i < n_ctxs; i++) { + const TCGContext *s = atomic_read(&tcg_ctxs[i]); + + total += atomic_read(&s->tb_phys_invalidate_count); + } + return total; +} + /* pool based memory allocation */ void *tcg_malloc_internal(TCGContext *s, int size) { diff --git a/tcg/tcg.h b/tcg/tcg.h index 1e6df1906f..e49b289ba1 100644 --- a/tcg/tcg.h +++ b/tcg/tcg.h @@ -695,6 +695,8 @@ struct TCGContext { /* Threshold to flush the translated code buffer. */ void *code_gen_highwater; + size_t tb_phys_invalidate_count; + /* Track which vCPU triggers events */ CPUState *cpu; /* *_trans */ @@ -868,6 +870,7 @@ size_t tcg_code_capacity(void); void tcg_tb_insert(TranslationBlock *tb); void tcg_tb_remove(TranslationBlock *tb); +size_t tcg_tb_phys_invalidate_count(void); TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr); void tcg_tb_foreach(GTraverseFunc func, gpointer user_data); size_t tcg_nb_tbs(void); -- cgit v1.2.3-55-g7522 From 1e05197f24c49d52f339de9053bb1d17082f1be3 Mon Sep 17 00:00:00 2001 From: Emilio G. Cota Date: Thu, 3 Aug 2017 18:37:15 -0400 Subject: translate-all: iterate over TBs in a page with PAGE_FOR_EACH_TB This commit does several things, but to avoid churn I merged them all into the same commit. To wit: - Use uintptr_t instead of TranslationBlock * for the list of TBs in a page. Just like we did in (c37e6d7e "tcg: Use uintptr_t type for jmp_list_{next|first} fields of TB"), the rationale is the same: these are tagged pointers, not pointers. So use a more appropriate type. - Only check the least significant bit of the tagged pointers. Masking with 3/~3 is unnecessary and confusing. - Introduce the TB_FOR_EACH_TAGGED macro, and use it to define PAGE_FOR_EACH_TB, which improves readability. Note that TB_FOR_EACH_TAGGED will gain another user in a subsequent patch. - Update tb_page_remove to use PAGE_FOR_EACH_TB. In case there is a bug and we attempt to remove a TB that is not in the list, instead of segfaulting (since the list is NULL-terminated) we will reach g_assert_not_reached(). Reviewed-by: Richard Henderson Signed-off-by: Emilio G. Cota Signed-off-by: Richard Henderson --- accel/tcg/translate-all.c | 62 ++++++++++++++++++++++------------------------- include/exec/exec-all.h | 2 +- 2 files changed, 30 insertions(+), 34 deletions(-) (limited to 'include/exec') diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index a9f2bfb468..52e62125ed 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -103,7 +103,7 @@ typedef struct PageDesc { /* list of TBs intersecting this ram page */ - TranslationBlock *first_tb; + uintptr_t first_tb; #ifdef CONFIG_SOFTMMU /* in order to optimize self modifying code, we count the number of lookups we do to a given page to use a bitmap */ @@ -114,6 +114,15 @@ typedef struct PageDesc { #endif } PageDesc; +/* list iterators for lists of tagged pointers in TranslationBlock */ +#define TB_FOR_EACH_TAGGED(head, tb, n, field) \ + for (n = (head) & 1, tb = (TranslationBlock *)((head) & ~1); \ + tb; tb = (TranslationBlock *)tb->field[n], n = (uintptr_t)tb & 1, \ + tb = (TranslationBlock *)((uintptr_t)tb & ~1)) + +#define PAGE_FOR_EACH_TB(pagedesc, tb, n) \ + TB_FOR_EACH_TAGGED((pagedesc)->first_tb, tb, n, page_next) + /* In system mode we want L1_MAP to be based on ram offsets, while in user mode we want it to be based on virtual addresses. */ #if !defined(CONFIG_USER_ONLY) @@ -815,7 +824,7 @@ static void page_flush_tb_1(int level, void **lp) PageDesc *pd = *lp; for (i = 0; i < V_L2_SIZE; ++i) { - pd[i].first_tb = NULL; + pd[i].first_tb = (uintptr_t)NULL; invalidate_page_bitmap(pd + i); } } else { @@ -943,21 +952,21 @@ static void tb_page_check(void) #endif /* CONFIG_USER_ONLY */ -static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb) +static inline void tb_page_remove(PageDesc *pd, TranslationBlock *tb) { TranslationBlock *tb1; + uintptr_t *pprev; unsigned int n1; - for (;;) { - tb1 = *ptb; - n1 = (uintptr_t)tb1 & 3; - tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3); + pprev = &pd->first_tb; + PAGE_FOR_EACH_TB(pd, tb1, n1) { if (tb1 == tb) { - *ptb = tb1->page_next[n1]; - break; + *pprev = tb1->page_next[n1]; + return; } - ptb = &tb1->page_next[n1]; + pprev = &tb1->page_next[n1]; } + g_assert_not_reached(); } /* remove the TB from a list of TBs jumping to the n-th jump target of the TB */ @@ -1045,12 +1054,12 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr) /* remove the TB from the page list */ if (tb->page_addr[0] != page_addr) { p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS); - tb_page_remove(&p->first_tb, tb); + tb_page_remove(p, tb); invalidate_page_bitmap(p); } if (tb->page_addr[1] != -1 && tb->page_addr[1] != page_addr) { p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS); - tb_page_remove(&p->first_tb, tb); + tb_page_remove(p, tb); invalidate_page_bitmap(p); } @@ -1081,10 +1090,7 @@ static void build_page_bitmap(PageDesc *p) p->code_bitmap = bitmap_new(TARGET_PAGE_SIZE); - tb = p->first_tb; - while (tb != NULL) { - n = (uintptr_t)tb & 3; - tb = (TranslationBlock *)((uintptr_t)tb & ~3); + PAGE_FOR_EACH_TB(p, tb, n) { /* NOTE: this is subtle as a TB may span two physical pages */ if (n == 0) { /* NOTE: tb_end may be after the end of the page, but @@ -1099,7 +1105,6 @@ static void build_page_bitmap(PageDesc *p) tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK); } bitmap_set(p->code_bitmap, tb_start, tb_end - tb_start); - tb = tb->page_next[n]; } } #endif @@ -1122,9 +1127,9 @@ static inline void tb_alloc_page(TranslationBlock *tb, p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1); tb->page_next[n] = p->first_tb; #ifndef CONFIG_USER_ONLY - page_already_protected = p->first_tb != NULL; + page_already_protected = p->first_tb != (uintptr_t)NULL; #endif - p->first_tb = (TranslationBlock *)((uintptr_t)tb | n); + p->first_tb = (uintptr_t)tb | n; invalidate_page_bitmap(p); #if defined(CONFIG_USER_ONLY) @@ -1401,7 +1406,7 @@ void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end) void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end, int is_cpu_write_access) { - TranslationBlock *tb, *tb_next; + TranslationBlock *tb; tb_page_addr_t tb_start, tb_end; PageDesc *p; int n; @@ -1432,11 +1437,7 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end, /* we remove all the TBs in the range [start, end[ */ /* XXX: see if in some cases it could be faster to invalidate all the code */ - tb = p->first_tb; - while (tb != NULL) { - n = (uintptr_t)tb & 3; - tb = (TranslationBlock *)((uintptr_t)tb & ~3); - tb_next = tb->page_next[n]; + PAGE_FOR_EACH_TB(p, tb, n) { /* NOTE: this is subtle as a TB may span two physical pages */ if (n == 0) { /* NOTE: tb_end may be after the end of the page, but @@ -1474,7 +1475,6 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end, #endif /* TARGET_HAS_PRECISE_SMC */ tb_phys_invalidate(tb, -1); } - tb = tb_next; } #if !defined(CONFIG_USER_ONLY) /* if no code remaining, no need to continue to use slow writes */ @@ -1568,18 +1568,15 @@ static bool tb_invalidate_phys_page(tb_page_addr_t addr, uintptr_t pc) } tb_lock(); - tb = p->first_tb; #ifdef TARGET_HAS_PRECISE_SMC - if (tb && pc != 0) { + if (p->first_tb && pc != 0) { current_tb = tcg_tb_lookup(pc); } if (cpu != NULL) { env = cpu->env_ptr; } #endif - while (tb != NULL) { - n = (uintptr_t)tb & 3; - tb = (TranslationBlock *)((uintptr_t)tb & ~3); + PAGE_FOR_EACH_TB(p, tb, n) { #ifdef TARGET_HAS_PRECISE_SMC if (current_tb == tb && (current_tb->cflags & CF_COUNT_MASK) != 1) { @@ -1596,9 +1593,8 @@ static bool tb_invalidate_phys_page(tb_page_addr_t addr, uintptr_t pc) } #endif /* TARGET_HAS_PRECISE_SMC */ tb_phys_invalidate(tb, addr); - tb = tb->page_next[n]; } - p->first_tb = NULL; + p->first_tb = (uintptr_t)NULL; #ifdef TARGET_HAS_PRECISE_SMC if (current_tb_modified) { /* Force execution of one insn next time. */ diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index 8d4306ac25..07653d3c92 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -360,7 +360,7 @@ struct TranslationBlock { struct TranslationBlock *orig_tb; /* first and second physical page containing code. The lower bit of the pointer tells the index in page_next[] */ - struct TranslationBlock *page_next[2]; + uintptr_t page_next[2]; tb_page_addr_t page_addr[2]; /* The following data are used to directly call another TB from -- cgit v1.2.3-55-g7522 From 0b5c91f74f3c83a36f37740969df8c775c997e69 Mon Sep 17 00:00:00 2001 From: Emilio G. Cota Date: Wed, 26 Jul 2017 20:22:51 -0400 Subject: translate-all: use per-page locking in !user-mode Groundwork for supporting parallel TCG generation. Instead of using a global lock (tb_lock) to protect changes to pages, use fine-grained, per-page locks in !user-mode. User-mode stays with mmap_lock. Sometimes changes need to happen atomically on more than one page (e.g. when a TB that spans across two pages is added/invalidated, or when a range of pages is invalidated). We therefore introduce struct page_collection, which helps us keep track of a set of pages that have been locked in the appropriate locking order (i.e. by ascending page index). This commit first introduces the structs and the function helpers, to then convert the calling code to use per-page locking. Note that tb_lock is not removed yet. While at it, rename tb_alloc_page to tb_page_add, which pairs with tb_page_remove. Reviewed-by: Richard Henderson Signed-off-by: Emilio G. Cota Signed-off-by: Richard Henderson --- accel/tcg/translate-all.c | 444 +++++++++++++++++++++++++++++++++++++++++----- accel/tcg/translate-all.h | 3 + include/exec/exec-all.h | 3 +- 3 files changed, 409 insertions(+), 41 deletions(-) (limited to 'include/exec') diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index 83bb40fb20..1cc7aab82c 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -112,8 +112,55 @@ typedef struct PageDesc { #else unsigned long flags; #endif +#ifndef CONFIG_USER_ONLY + QemuSpin lock; +#endif } PageDesc; +/** + * struct page_entry - page descriptor entry + * @pd: pointer to the &struct PageDesc of the page this entry represents + * @index: page index of the page + * @locked: whether the page is locked + * + * This struct helps us keep track of the locked state of a page, without + * bloating &struct PageDesc. + * + * A page lock protects accesses to all fields of &struct PageDesc. + * + * See also: &struct page_collection. + */ +struct page_entry { + PageDesc *pd; + tb_page_addr_t index; + bool locked; +}; + +/** + * struct page_collection - tracks a set of pages (i.e. &struct page_entry's) + * @tree: Binary search tree (BST) of the pages, with key == page index + * @max: Pointer to the page in @tree with the highest page index + * + * To avoid deadlock we lock pages in ascending order of page index. + * When operating on a set of pages, we need to keep track of them so that + * we can lock them in order and also unlock them later. For this we collect + * pages (i.e. &struct page_entry's) in a binary search @tree. Given that the + * @tree implementation we use does not provide an O(1) operation to obtain the + * highest-ranked element, we use @max to keep track of the inserted page + * with the highest index. This is valuable because if a page is not in + * the tree and its index is higher than @max's, then we can lock it + * without breaking the locking order rule. + * + * Note on naming: 'struct page_set' would be shorter, but we already have a few + * page_set_*() helpers, so page_collection is used instead to avoid confusion. + * + * See also: page_collection_lock(). + */ +struct page_collection { + GTree *tree; + struct page_entry *max; +}; + /* list iterators for lists of tagged pointers in TranslationBlock */ #define TB_FOR_EACH_TAGGED(head, tb, n, field) \ for (n = (head) & 1, tb = (TranslationBlock *)((head) & ~1); \ @@ -507,6 +554,15 @@ static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc) return NULL; } pd = g_new0(PageDesc, V_L2_SIZE); +#ifndef CONFIG_USER_ONLY + { + int i; + + for (i = 0; i < V_L2_SIZE; i++) { + qemu_spin_init(&pd[i].lock); + } + } +#endif existing = atomic_cmpxchg(lp, NULL, pd); if (unlikely(existing)) { g_free(pd); @@ -522,6 +578,253 @@ static inline PageDesc *page_find(tb_page_addr_t index) return page_find_alloc(index, 0); } +static void page_lock_pair(PageDesc **ret_p1, tb_page_addr_t phys1, + PageDesc **ret_p2, tb_page_addr_t phys2, int alloc); + +/* In user-mode page locks aren't used; mmap_lock is enough */ +#ifdef CONFIG_USER_ONLY +static inline void page_lock(PageDesc *pd) +{ } + +static inline void page_unlock(PageDesc *pd) +{ } + +static inline void page_lock_tb(const TranslationBlock *tb) +{ } + +static inline void page_unlock_tb(const TranslationBlock *tb) +{ } + +struct page_collection * +page_collection_lock(tb_page_addr_t start, tb_page_addr_t end) +{ + return NULL; +} + +void page_collection_unlock(struct page_collection *set) +{ } +#else /* !CONFIG_USER_ONLY */ + +static inline void page_lock(PageDesc *pd) +{ + qemu_spin_lock(&pd->lock); +} + +static inline void page_unlock(PageDesc *pd) +{ + qemu_spin_unlock(&pd->lock); +} + +/* lock the page(s) of a TB in the correct acquisition order */ +static inline void page_lock_tb(const TranslationBlock *tb) +{ + page_lock_pair(NULL, tb->page_addr[0], NULL, tb->page_addr[1], 0); +} + +static inline void page_unlock_tb(const TranslationBlock *tb) +{ + page_unlock(page_find(tb->page_addr[0] >> TARGET_PAGE_BITS)); + if (unlikely(tb->page_addr[1] != -1)) { + page_unlock(page_find(tb->page_addr[1] >> TARGET_PAGE_BITS)); + } +} + +static inline struct page_entry * +page_entry_new(PageDesc *pd, tb_page_addr_t index) +{ + struct page_entry *pe = g_malloc(sizeof(*pe)); + + pe->index = index; + pe->pd = pd; + pe->locked = false; + return pe; +} + +static void page_entry_destroy(gpointer p) +{ + struct page_entry *pe = p; + + g_assert(pe->locked); + page_unlock(pe->pd); + g_free(pe); +} + +/* returns false on success */ +static bool page_entry_trylock(struct page_entry *pe) +{ + bool busy; + + busy = qemu_spin_trylock(&pe->pd->lock); + if (!busy) { + g_assert(!pe->locked); + pe->locked = true; + } + return busy; +} + +static void do_page_entry_lock(struct page_entry *pe) +{ + page_lock(pe->pd); + g_assert(!pe->locked); + pe->locked = true; +} + +static gboolean page_entry_lock(gpointer key, gpointer value, gpointer data) +{ + struct page_entry *pe = value; + + do_page_entry_lock(pe); + return FALSE; +} + +static gboolean page_entry_unlock(gpointer key, gpointer value, gpointer data) +{ + struct page_entry *pe = value; + + if (pe->locked) { + pe->locked = false; + page_unlock(pe->pd); + } + return FALSE; +} + +/* + * Trylock a page, and if successful, add the page to a collection. + * Returns true ("busy") if the page could not be locked; false otherwise. + */ +static bool page_trylock_add(struct page_collection *set, tb_page_addr_t addr) +{ + tb_page_addr_t index = addr >> TARGET_PAGE_BITS; + struct page_entry *pe; + PageDesc *pd; + + pe = g_tree_lookup(set->tree, &index); + if (pe) { + return false; + } + + pd = page_find(index); + if (pd == NULL) { + return false; + } + + pe = page_entry_new(pd, index); + g_tree_insert(set->tree, &pe->index, pe); + + /* + * If this is either (1) the first insertion or (2) a page whose index + * is higher than any other so far, just lock the page and move on. + */ + if (set->max == NULL || pe->index > set->max->index) { + set->max = pe; + do_page_entry_lock(pe); + return false; + } + /* + * Try to acquire out-of-order lock; if busy, return busy so that we acquire + * locks in order. + */ + return page_entry_trylock(pe); +} + +static gint tb_page_addr_cmp(gconstpointer ap, gconstpointer bp, gpointer udata) +{ + tb_page_addr_t a = *(const tb_page_addr_t *)ap; + tb_page_addr_t b = *(const tb_page_addr_t *)bp; + + if (a == b) { + return 0; + } else if (a < b) { + return -1; + } + return 1; +} + +/* + * Lock a range of pages ([@start,@end[) as well as the pages of all + * intersecting TBs. + * Locking order: acquire locks in ascending order of page index. + */ +struct page_collection * +page_collection_lock(tb_page_addr_t start, tb_page_addr_t end) +{ + struct page_collection *set = g_malloc(sizeof(*set)); + tb_page_addr_t index; + PageDesc *pd; + + start >>= TARGET_PAGE_BITS; + end >>= TARGET_PAGE_BITS; + g_assert(start <= end); + + set->tree = g_tree_new_full(tb_page_addr_cmp, NULL, NULL, + page_entry_destroy); + set->max = NULL; + + retry: + g_tree_foreach(set->tree, page_entry_lock, NULL); + + for (index = start; index <= end; index++) { + TranslationBlock *tb; + int n; + + pd = page_find(index); + if (pd == NULL) { + continue; + } + if (page_trylock_add(set, index << TARGET_PAGE_BITS)) { + g_tree_foreach(set->tree, page_entry_unlock, NULL); + goto retry; + } + PAGE_FOR_EACH_TB(pd, tb, n) { + if (page_trylock_add(set, tb->page_addr[0]) || + (tb->page_addr[1] != -1 && + page_trylock_add(set, tb->page_addr[1]))) { + /* drop all locks, and reacquire in order */ + g_tree_foreach(set->tree, page_entry_unlock, NULL); + goto retry; + } + } + } + return set; +} + +void page_collection_unlock(struct page_collection *set) +{ + /* entries are unlocked and freed via page_entry_destroy */ + g_tree_destroy(set->tree); + g_free(set); +} + +#endif /* !CONFIG_USER_ONLY */ + +static void page_lock_pair(PageDesc **ret_p1, tb_page_addr_t phys1, + PageDesc **ret_p2, tb_page_addr_t phys2, int alloc) +{ + PageDesc *p1, *p2; + + assert_memory_lock(); + g_assert(phys1 != -1 && phys1 != phys2); + p1 = page_find_alloc(phys1 >> TARGET_PAGE_BITS, alloc); + if (ret_p1) { + *ret_p1 = p1; + } + if (likely(phys2 == -1)) { + page_lock(p1); + return; + } + p2 = page_find_alloc(phys2 >> TARGET_PAGE_BITS, alloc); + if (ret_p2) { + *ret_p2 = p2; + } + if (phys1 < phys2) { + page_lock(p1); + page_lock(p2); + } else { + page_lock(p2); + page_lock(p1); + } +} + #if defined(CONFIG_USER_ONLY) /* Currently it is not recommended to allocate big chunks of data in user mode. It will change when a dedicated libc will be used. */ @@ -807,6 +1110,7 @@ static TranslationBlock *tb_alloc(target_ulong pc) return tb; } +/* call with @p->lock held */ static inline void invalidate_page_bitmap(PageDesc *p) { #ifdef CONFIG_SOFTMMU @@ -828,8 +1132,10 @@ static void page_flush_tb_1(int level, void **lp) PageDesc *pd = *lp; for (i = 0; i < V_L2_SIZE; ++i) { + page_lock(&pd[i]); pd[i].first_tb = (uintptr_t)NULL; invalidate_page_bitmap(pd + i); + page_unlock(&pd[i]); } } else { void **pp = *lp; @@ -956,6 +1262,7 @@ static void tb_page_check(void) #endif /* CONFIG_USER_ONLY */ +/* call with @pd->lock held */ static inline void tb_page_remove(PageDesc *pd, TranslationBlock *tb) { TranslationBlock *tb1; @@ -1032,11 +1339,8 @@ static inline void tb_jmp_unlink(TranslationBlock *tb) } } -/* invalidate one TB - * - * Called with tb_lock held. - */ -void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr) +/* If @rm_from_page_list is set, call with the TB's pages' locks held */ +static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list) { CPUState *cpu; PageDesc *p; @@ -1056,15 +1360,15 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr) } /* remove the TB from the page list */ - if (tb->page_addr[0] != page_addr) { + if (rm_from_page_list) { p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS); tb_page_remove(p, tb); invalidate_page_bitmap(p); - } - if (tb->page_addr[1] != -1 && tb->page_addr[1] != page_addr) { - p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS); - tb_page_remove(p, tb); - invalidate_page_bitmap(p); + if (tb->page_addr[1] != -1) { + p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS); + tb_page_remove(p, tb); + invalidate_page_bitmap(p); + } } /* remove the TB from the hash list */ @@ -1086,7 +1390,28 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr) tcg_ctx->tb_phys_invalidate_count + 1); } +static void tb_phys_invalidate__locked(TranslationBlock *tb) +{ + do_tb_phys_invalidate(tb, true); +} + +/* invalidate one TB + * + * Called with tb_lock held. + */ +void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr) +{ + if (page_addr == -1) { + page_lock_tb(tb); + do_tb_phys_invalidate(tb, true); + page_unlock_tb(tb); + } else { + do_tb_phys_invalidate(tb, false); + } +} + #ifdef CONFIG_SOFTMMU +/* call with @p->lock held */ static void build_page_bitmap(PageDesc *p) { int n, tb_start, tb_end; @@ -1116,11 +1441,11 @@ static void build_page_bitmap(PageDesc *p) /* add the tb in the target page and protect it if necessary * * Called with mmap_lock held for user-mode emulation. + * Called with @p->lock held. */ -static inline void tb_alloc_page(TranslationBlock *tb, - unsigned int n, tb_page_addr_t page_addr) +static inline void tb_page_add(PageDesc *p, TranslationBlock *tb, + unsigned int n, tb_page_addr_t page_addr) { - PageDesc *p; #ifndef CONFIG_USER_ONLY bool page_already_protected; #endif @@ -1128,7 +1453,6 @@ static inline void tb_alloc_page(TranslationBlock *tb, assert_memory_lock(); tb->page_addr[n] = page_addr; - p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1); tb->page_next[n] = p->first_tb; #ifndef CONFIG_USER_ONLY page_already_protected = p->first_tb != (uintptr_t)NULL; @@ -1180,18 +1504,28 @@ static inline void tb_alloc_page(TranslationBlock *tb, static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc, tb_page_addr_t phys_page2) { + PageDesc *p; + PageDesc *p2 = NULL; uint32_t h; assert_memory_lock(); - /* add in the page list */ - tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK); - if (phys_page2 != -1) { - tb_alloc_page(tb, 1, phys_page2); + /* + * Add the TB to the page list, acquiring first the pages's locks. + */ + page_lock_pair(&p, phys_pc, &p2, phys_page2, 1); + tb_page_add(p, tb, 0, phys_pc & TARGET_PAGE_MASK); + if (p2) { + tb_page_add(p2, tb, 1, phys_page2); } else { tb->page_addr[1] = -1; } + if (p2) { + page_unlock(p2); + } + page_unlock(p); + /* add in the hash table */ h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->cflags & CF_HASH_MASK, tb->trace_vcpu_dstate); @@ -1364,21 +1698,17 @@ TranslationBlock *tb_gen_code(CPUState *cpu, } /* - * Invalidate all TBs which intersect with the target physical address range - * [start;end[. NOTE: start and end must refer to the *same* physical page. - * 'is_cpu_write_access' should be true if called from a real cpu write - * access: the virtual CPU will exit the current TB if code is modified inside - * this TB. - * - * Called with tb_lock/mmap_lock held for user-mode emulation - * Called with tb_lock held for system-mode emulation + * Call with all @pages locked. + * @p must be non-NULL. */ -void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end, - int is_cpu_write_access) +static void +tb_invalidate_phys_page_range__locked(struct page_collection *pages, + PageDesc *p, tb_page_addr_t start, + tb_page_addr_t end, + int is_cpu_write_access) { TranslationBlock *tb; tb_page_addr_t tb_start, tb_end; - PageDesc *p; int n; #ifdef TARGET_HAS_PRECISE_SMC CPUState *cpu = current_cpu; @@ -1394,10 +1724,6 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end, assert_memory_lock(); assert_tb_locked(); - p = page_find(start >> TARGET_PAGE_BITS); - if (!p) { - return; - } #if defined(TARGET_HAS_PRECISE_SMC) if (cpu != NULL) { env = cpu->env_ptr; @@ -1443,7 +1769,7 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end, ¤t_flags); } #endif /* TARGET_HAS_PRECISE_SMC */ - tb_phys_invalidate(tb, -1); + tb_phys_invalidate__locked(tb); } } #if !defined(CONFIG_USER_ONLY) @@ -1455,6 +1781,7 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end, #endif #ifdef TARGET_HAS_PRECISE_SMC if (current_tb_modified) { + page_collection_unlock(pages); /* Force execution of one insn next time. */ cpu->cflags_next_tb = 1 | curr_cflags(); cpu_loop_exit_noexc(cpu); @@ -1462,6 +1789,35 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end, #endif } +/* + * Invalidate all TBs which intersect with the target physical address range + * [start;end[. NOTE: start and end must refer to the *same* physical page. + * 'is_cpu_write_access' should be true if called from a real cpu write + * access: the virtual CPU will exit the current TB if code is modified inside + * this TB. + * + * Called with tb_lock/mmap_lock held for user-mode emulation + * Called with tb_lock held for system-mode emulation + */ +void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end, + int is_cpu_write_access) +{ + struct page_collection *pages; + PageDesc *p; + + assert_memory_lock(); + assert_tb_locked(); + + p = page_find(start >> TARGET_PAGE_BITS); + if (p == NULL) { + return; + } + pages = page_collection_lock(start, end); + tb_invalidate_phys_page_range__locked(pages, p, start, end, + is_cpu_write_access); + page_collection_unlock(pages); +} + /* * Invalidate all TBs which intersect with the target physical address range * [start;end[. NOTE: start and end may refer to *different* physical pages. @@ -1474,15 +1830,22 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end, */ static void tb_invalidate_phys_range_1(tb_page_addr_t start, tb_page_addr_t end) { + struct page_collection *pages; tb_page_addr_t next; + pages = page_collection_lock(start, end); for (next = (start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE; start < end; start = next, next += TARGET_PAGE_SIZE) { + PageDesc *pd = page_find(start >> TARGET_PAGE_BITS); tb_page_addr_t bound = MIN(next, end); - tb_invalidate_phys_page_range(start, bound, 0); + if (pd == NULL) { + continue; + } + tb_invalidate_phys_page_range__locked(pages, pd, start, bound, 0); } + page_collection_unlock(pages); } #ifdef CONFIG_SOFTMMU @@ -1508,6 +1871,7 @@ void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end) */ void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len) { + struct page_collection *pages; PageDesc *p; #if 0 @@ -1525,11 +1889,10 @@ void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len) if (!p) { return; } + + pages = page_collection_lock(start, start + len); if (!p->code_bitmap && ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD) { - /* build code bitmap. FIXME: writes should be protected by - * tb_lock, reads by tb_lock or RCU. - */ build_page_bitmap(p); } if (p->code_bitmap) { @@ -1543,8 +1906,9 @@ void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len) } } else { do_invalidate: - tb_invalidate_phys_page_range(start, start + len, 1); + tb_invalidate_phys_page_range__locked(pages, p, start, start + len, 1); } + page_collection_unlock(pages); } #else /* Called with mmap_lock held. If pc is not 0 then it indicates the diff --git a/accel/tcg/translate-all.h b/accel/tcg/translate-all.h index ba8e4d63c4..6d1d2588b5 100644 --- a/accel/tcg/translate-all.h +++ b/accel/tcg/translate-all.h @@ -23,6 +23,9 @@ /* translate-all.c */ +struct page_collection *page_collection_lock(tb_page_addr_t start, + tb_page_addr_t end); +void page_collection_unlock(struct page_collection *set); void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len); void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end, int is_cpu_write_access); diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index 07653d3c92..8d92e3cea9 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -359,7 +359,8 @@ struct TranslationBlock { /* original tb when cflags has CF_NOCACHE */ struct TranslationBlock *orig_tb; /* first and second physical page containing code. The lower bit - of the pointer tells the index in page_next[] */ + of the pointer tells the index in page_next[]. + The list is protected by the TB's page('s) lock(s) */ uintptr_t page_next[2]; tb_page_addr_t page_addr[2]; -- cgit v1.2.3-55-g7522 From faa9372c07d062eb01f9da72e3f6c0f32efffca7 Mon Sep 17 00:00:00 2001 From: Emilio G. Cota Date: Thu, 22 Feb 2018 20:50:29 -0500 Subject: translate-all: introduce assert_no_pages_locked The appended adds assertions to make sure we do not longjmp with page locks held. Note that user-mode has nothing to check, since page_locks are !user-mode only. Reviewed-by: Richard Henderson Signed-off-by: Emilio G. Cota Signed-off-by: Richard Henderson --- accel/tcg/cpu-exec.c | 1 + accel/tcg/translate-all.c | 7 +++++++ include/exec/exec-all.h | 8 ++++++++ 3 files changed, 16 insertions(+) (limited to 'include/exec') diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c index 7570c59f09..d75c35380a 100644 --- a/accel/tcg/cpu-exec.c +++ b/accel/tcg/cpu-exec.c @@ -273,6 +273,7 @@ void cpu_exec_step_atomic(CPUState *cpu) tcg_debug_assert(!have_mmap_lock()); #endif tb_lock_reset(); + assert_no_pages_locked(); } if (in_exclusive_region) { diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index 8b378586f4..c75298d08a 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -658,6 +658,12 @@ do_assert_page_locked(const PageDesc *pd, const char *file, int line) #define assert_page_locked(pd) do_assert_page_locked(pd, __FILE__, __LINE__) +void assert_no_pages_locked(void) +{ + ht_pages_locked_debug_init(); + g_assert(g_hash_table_size(ht_pages_locked_debug) == 0); +} + #else /* !CONFIG_DEBUG_TCG */ #define assert_page_locked(pd) @@ -829,6 +835,7 @@ page_collection_lock(tb_page_addr_t start, tb_page_addr_t end) set->tree = g_tree_new_full(tb_page_addr_cmp, NULL, NULL, page_entry_destroy); set->max = NULL; + assert_no_pages_locked(); retry: g_tree_foreach(set->tree, page_entry_lock, NULL); diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index 8d92e3cea9..4f07a17052 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -435,6 +435,14 @@ void tb_lock(void); void tb_unlock(void); void tb_lock_reset(void); +#if !defined(CONFIG_USER_ONLY) && defined(CONFIG_DEBUG_TCG) +void assert_no_pages_locked(void); +#else +static inline void assert_no_pages_locked(void) +{ +} +#endif + #if !defined(CONFIG_USER_ONLY) /** -- cgit v1.2.3-55-g7522 From 194125e3ebd553acb02aaf3797a4f0387493fe94 Mon Sep 17 00:00:00 2001 From: Emilio G. Cota Date: Wed, 2 Aug 2017 20:34:06 -0400 Subject: translate-all: protect TB jumps with a per-destination-TB lock This applies to both user-mode and !user-mode emulation. Instead of relying on a global lock, protect the list of incoming jumps with tb->jmp_lock. This lock also protects tb->cflags, so update all tb->cflags readers outside tb->jmp_lock to use atomic reads via tb_cflags(). In order to find the destination TB (and therefore its jmp_lock) from the origin TB, we introduce tb->jmp_dest[]. I considered not using a linked list of jumps, which simplifies code and makes the struct smaller. However, it unnecessarily increases memory usage, which results in a performance decrease. See for instance these numbers booting+shutting down debian-arm: Time (s) Rel. err (%) Abs. err (s) Rel. slowdown (%) ------------------------------------------------------------------------------ before 20.88 0.74 0.154512 0. after 20.81 0.38 0.079078 -0.33524904 GTree 21.02 0.28 0.058856 0.67049808 GHashTable + xxhash 21.63 1.08 0.233604 3.5919540 Using a hash table or a binary tree to keep track of the jumps doesn't really pay off, not only due to the increased memory usage, but also because most TBs have only 0 or 1 jumps to them. The maximum number of jumps when booting debian-arm that I measured is 35, but as we can see in the histogram below a TB with that many incoming jumps is extremely rare; the average TB has 0.80 incoming jumps. n_jumps: 379208; avg jumps/tb: 0.801099 dist: [0.0,1.0)|▄█▁▁▁▁▁▁▁▁▁▁▁ ▁▁▁▁▁▁ ▁▁▁ ▁▁▁ ▁|[34.0,35.0] Reviewed-by: Richard Henderson Signed-off-by: Emilio G. Cota Signed-off-by: Richard Henderson --- accel/tcg/cpu-exec.c | 41 +++++++++----- accel/tcg/translate-all.c | 118 ++++++++++++++++++++++++---------------- docs/devel/multi-thread-tcg.txt | 6 +- include/exec/exec-all.h | 35 +++++++----- 4 files changed, 124 insertions(+), 76 deletions(-) (limited to 'include/exec') diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c index 45f6ebc65e..c482008bc7 100644 --- a/accel/tcg/cpu-exec.c +++ b/accel/tcg/cpu-exec.c @@ -352,28 +352,43 @@ void tb_set_jmp_target(TranslationBlock *tb, int n, uintptr_t addr) } } -/* Called with tb_lock held. */ static inline void tb_add_jump(TranslationBlock *tb, int n, TranslationBlock *tb_next) { + uintptr_t old; + assert(n < ARRAY_SIZE(tb->jmp_list_next)); - if (tb->jmp_list_next[n]) { - /* Another thread has already done this while we were - * outside of the lock; nothing to do in this case */ - return; + qemu_spin_lock(&tb_next->jmp_lock); + + /* make sure the destination TB is valid */ + if (tb_next->cflags & CF_INVALID) { + goto out_unlock_next; + } + /* Atomically claim the jump destination slot only if it was NULL */ + old = atomic_cmpxchg(&tb->jmp_dest[n], (uintptr_t)NULL, (uintptr_t)tb_next); + if (old) { + goto out_unlock_next; } + + /* patch the native jump address */ + tb_set_jmp_target(tb, n, (uintptr_t)tb_next->tc.ptr); + + /* add in TB jmp list */ + tb->jmp_list_next[n] = tb_next->jmp_list_head; + tb_next->jmp_list_head = (uintptr_t)tb | n; + + qemu_spin_unlock(&tb_next->jmp_lock); + qemu_log_mask_and_addr(CPU_LOG_EXEC, tb->pc, "Linking TBs %p [" TARGET_FMT_lx "] index %d -> %p [" TARGET_FMT_lx "]\n", tb->tc.ptr, tb->pc, n, tb_next->tc.ptr, tb_next->pc); + return; - /* patch the native jump address */ - tb_set_jmp_target(tb, n, (uintptr_t)tb_next->tc.ptr); - - /* add in TB jmp circular list */ - tb->jmp_list_next[n] = tb_next->jmp_list_first; - tb_next->jmp_list_first = (uintptr_t)tb | n; + out_unlock_next: + qemu_spin_unlock(&tb_next->jmp_lock); + return; } static inline TranslationBlock *tb_find(CPUState *cpu, @@ -416,9 +431,7 @@ static inline TranslationBlock *tb_find(CPUState *cpu, tb_lock(); acquired_tb_lock = true; } - if (!(tb->cflags & CF_INVALID)) { - tb_add_jump(last_tb, tb_exit, tb); - } + tb_add_jump(last_tb, tb_exit, tb); } if (acquired_tb_lock) { tb_unlock(); diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index 3f977532bf..55c9e1b196 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -170,6 +170,9 @@ struct page_collection { #define PAGE_FOR_EACH_TB(pagedesc, tb, n) \ TB_FOR_EACH_TAGGED((pagedesc)->first_tb, tb, n, page_next) +#define TB_FOR_EACH_JMP(head_tb, tb, n) \ + TB_FOR_EACH_TAGGED((head_tb)->jmp_list_head, tb, n, jmp_list_next) + /* In system mode we want L1_MAP to be based on ram offsets, while in user mode we want it to be based on virtual addresses. */ #if !defined(CONFIG_USER_ONLY) @@ -389,7 +392,7 @@ static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb, return -1; found: - if (reset_icount && (tb->cflags & CF_USE_ICOUNT)) { + if (reset_icount && (tb_cflags(tb) & CF_USE_ICOUNT)) { assert(use_icount); /* Reset the cycle counter to the start of the block and shift if to the number of actually executed instructions */ @@ -432,7 +435,7 @@ bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc, bool will_exit) tb = tcg_tb_lookup(host_pc); if (tb) { cpu_restore_state_from_tb(cpu, tb, host_pc, will_exit); - if (tb->cflags & CF_NOCACHE) { + if (tb_cflags(tb) & CF_NOCACHE) { /* one-shot translation, invalidate it immediately */ tb_phys_invalidate(tb, -1); tcg_tb_remove(tb); @@ -1360,34 +1363,53 @@ static inline void tb_page_remove(PageDesc *pd, TranslationBlock *tb) g_assert_not_reached(); } -/* remove the TB from a list of TBs jumping to the n-th jump target of the TB */ -static inline void tb_remove_from_jmp_list(TranslationBlock *tb, int n) +/* remove @orig from its @n_orig-th jump list */ +static inline void tb_remove_from_jmp_list(TranslationBlock *orig, int n_orig) { - TranslationBlock *tb1; - uintptr_t *ptb, ntb; - unsigned int n1; + uintptr_t ptr, ptr_locked; + TranslationBlock *dest; + TranslationBlock *tb; + uintptr_t *pprev; + int n; - ptb = &tb->jmp_list_next[n]; - if (*ptb) { - /* find tb(n) in circular list */ - for (;;) { - ntb = *ptb; - n1 = ntb & 3; - tb1 = (TranslationBlock *)(ntb & ~3); - if (n1 == n && tb1 == tb) { - break; - } - if (n1 == 2) { - ptb = &tb1->jmp_list_first; - } else { - ptb = &tb1->jmp_list_next[n1]; - } - } - /* now we can suppress tb(n) from the list */ - *ptb = tb->jmp_list_next[n]; + /* mark the LSB of jmp_dest[] so that no further jumps can be inserted */ + ptr = atomic_or_fetch(&orig->jmp_dest[n_orig], 1); + dest = (TranslationBlock *)(ptr & ~1); + if (dest == NULL) { + return; + } - tb->jmp_list_next[n] = (uintptr_t)NULL; + qemu_spin_lock(&dest->jmp_lock); + /* + * While acquiring the lock, the jump might have been removed if the + * destination TB was invalidated; check again. + */ + ptr_locked = atomic_read(&orig->jmp_dest[n_orig]); + if (ptr_locked != ptr) { + qemu_spin_unlock(&dest->jmp_lock); + /* + * The only possibility is that the jump was unlinked via + * tb_jump_unlink(dest). Seeing here another destination would be a bug, + * because we set the LSB above. + */ + g_assert(ptr_locked == 1 && dest->cflags & CF_INVALID); + return; } + /* + * We first acquired the lock, and since the destination pointer matches, + * we know for sure that @orig is in the jmp list. + */ + pprev = &dest->jmp_list_head; + TB_FOR_EACH_JMP(dest, tb, n) { + if (tb == orig && n == n_orig) { + *pprev = tb->jmp_list_next[n]; + /* no need to set orig->jmp_dest[n]; setting the LSB was enough */ + qemu_spin_unlock(&dest->jmp_lock); + return; + } + pprev = &tb->jmp_list_next[n]; + } + g_assert_not_reached(); } /* reset the jump entry 'n' of a TB so that it is not chained to @@ -1399,24 +1421,21 @@ static inline void tb_reset_jump(TranslationBlock *tb, int n) } /* remove any jumps to the TB */ -static inline void tb_jmp_unlink(TranslationBlock *tb) +static inline void tb_jmp_unlink(TranslationBlock *dest) { - TranslationBlock *tb1; - uintptr_t *ptb, ntb; - unsigned int n1; + TranslationBlock *tb; + int n; - ptb = &tb->jmp_list_first; - for (;;) { - ntb = *ptb; - n1 = ntb & 3; - tb1 = (TranslationBlock *)(ntb & ~3); - if (n1 == 2) { - break; - } - tb_reset_jump(tb1, n1); - *ptb = tb1->jmp_list_next[n1]; - tb1->jmp_list_next[n1] = (uintptr_t)NULL; + qemu_spin_lock(&dest->jmp_lock); + + TB_FOR_EACH_JMP(dest, tb, n) { + tb_reset_jump(tb, n); + atomic_and(&tb->jmp_dest[n], (uintptr_t)NULL | 1); + /* No need to clear the list entry; setting the dest ptr is enough */ } + dest->jmp_list_head = (uintptr_t)NULL; + + qemu_spin_unlock(&dest->jmp_lock); } /* If @rm_from_page_list is set, call with the TB's pages' locks held */ @@ -1429,11 +1448,14 @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list) assert_tb_locked(); + /* make sure no further incoming jumps will be chained to this TB */ + qemu_spin_lock(&tb->jmp_lock); atomic_set(&tb->cflags, tb->cflags | CF_INVALID); + qemu_spin_unlock(&tb->jmp_lock); /* remove the TB from the hash list */ phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK); - h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->cflags & CF_HASH_MASK, + h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb_cflags(tb) & CF_HASH_MASK, tb->trace_vcpu_dstate); if (!qht_remove(&tb_ctx.htable, tb, h)) { return; @@ -1773,10 +1795,12 @@ TranslationBlock *tb_gen_code(CPUState *cpu, CODE_GEN_ALIGN)); /* init jump list */ - assert(((uintptr_t)tb & 3) == 0); - tb->jmp_list_first = (uintptr_t)tb | 2; + qemu_spin_init(&tb->jmp_lock); + tb->jmp_list_head = (uintptr_t)NULL; tb->jmp_list_next[0] = (uintptr_t)NULL; tb->jmp_list_next[1] = (uintptr_t)NULL; + tb->jmp_dest[0] = (uintptr_t)NULL; + tb->jmp_dest[1] = (uintptr_t)NULL; /* init original jump addresses wich has been set during tcg_gen_code() */ if (tb->jmp_reset_offset[0] != TB_JMP_RESET_OFFSET_INVALID) { @@ -1868,7 +1892,7 @@ tb_invalidate_phys_page_range__locked(struct page_collection *pages, } } if (current_tb == tb && - (current_tb->cflags & CF_COUNT_MASK) != 1) { + (tb_cflags(current_tb) & CF_COUNT_MASK) != 1) { /* If we are modifying the current TB, we must stop its execution. We could be more precise by checking that the modification is after the current PC, but it @@ -2067,7 +2091,7 @@ static bool tb_invalidate_phys_page(tb_page_addr_t addr, uintptr_t pc) PAGE_FOR_EACH_TB(p, tb, n) { #ifdef TARGET_HAS_PRECISE_SMC if (current_tb == tb && - (current_tb->cflags & CF_COUNT_MASK) != 1) { + (tb_cflags(current_tb) & CF_COUNT_MASK) != 1) { /* If we are modifying the current TB, we must stop its execution. We could be more precise by checking that the modification is after the current PC, but it @@ -2192,7 +2216,7 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr) /* Generate a new TB executing the I/O insn. */ cpu->cflags_next_tb = curr_cflags() | CF_LAST_IO | n; - if (tb->cflags & CF_NOCACHE) { + if (tb_cflags(tb) & CF_NOCACHE) { if (tb->orig_tb) { /* Invalidate original TB if this TB was generated in * cpu_exec_nocache() */ diff --git a/docs/devel/multi-thread-tcg.txt b/docs/devel/multi-thread-tcg.txt index faf09c6069..df83445ccf 100644 --- a/docs/devel/multi-thread-tcg.txt +++ b/docs/devel/multi-thread-tcg.txt @@ -131,8 +131,10 @@ DESIGN REQUIREMENT: Safely handle invalidation of TBs The direct jump themselves are updated atomically by the TCG tb_set_jmp_target() code. Modification to the linked lists that allow -searching for linked pages are done under the protect of the -tb_lock(). +searching for linked pages are done under the protection of tb->jmp_lock, +where tb is the destination block of a jump. Each origin block keeps a +pointer to its destinations so that the appropriate lock can be acquired before +iterating over a jump list. The global page table is a lockless radix tree; cmpxchg is used to atomically insert new elements. diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index 4f07a17052..3c2a0efb55 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -345,7 +345,7 @@ struct TranslationBlock { #define CF_LAST_IO 0x00008000 /* Last insn may be an IO access. */ #define CF_NOCACHE 0x00010000 /* To be freed after execution */ #define CF_USE_ICOUNT 0x00020000 -#define CF_INVALID 0x00040000 /* TB is stale. Setters need tb_lock */ +#define CF_INVALID 0x00040000 /* TB is stale. Set with @jmp_lock held */ #define CF_PARALLEL 0x00080000 /* Generate code for a parallel context */ /* cflags' mask for hashing/comparison */ #define CF_HASH_MASK \ @@ -364,6 +364,9 @@ struct TranslationBlock { uintptr_t page_next[2]; tb_page_addr_t page_addr[2]; + /* jmp_lock placed here to fill a 4-byte hole. Its documentation is below */ + QemuSpin jmp_lock; + /* The following data are used to directly call another TB from * the code of this one. This can be done either by emitting direct or * indirect native jump instructions. These jumps are reset so that the TB @@ -375,20 +378,26 @@ struct TranslationBlock { #define TB_JMP_RESET_OFFSET_INVALID 0xffff /* indicates no jump generated */ uintptr_t jmp_target_arg[2]; /* target address or offset */ - /* Each TB has an associated circular list of TBs jumping to this one. - * jmp_list_first points to the first TB jumping to this one. - * jmp_list_next is used to point to the next TB in a list. - * Since each TB can have two jumps, it can participate in two lists. - * jmp_list_first and jmp_list_next are 4-byte aligned pointers to a - * TranslationBlock structure, but the two least significant bits of - * them are used to encode which data field of the pointed TB should - * be used to traverse the list further from that TB: - * 0 => jmp_list_next[0], 1 => jmp_list_next[1], 2 => jmp_list_first. - * In other words, 0/1 tells which jump is used in the pointed TB, - * and 2 means that this is a pointer back to the target TB of this list. + /* + * Each TB has a NULL-terminated list (jmp_list_head) of incoming jumps. + * Each TB can have two outgoing jumps, and therefore can participate + * in two lists. The list entries are kept in jmp_list_next[2]. The least + * significant bit (LSB) of the pointers in these lists is used to encode + * which of the two list entries is to be used in the pointed TB. + * + * List traversals are protected by jmp_lock. The destination TB of each + * outgoing jump is kept in jmp_dest[] so that the appropriate jmp_lock + * can be acquired from any origin TB. + * + * jmp_dest[] are tagged pointers as well. The LSB is set when the TB is + * being invalidated, so that no further outgoing jumps from it can be set. + * + * jmp_lock also protects the CF_INVALID cflag; a jump must not be chained + * to a destination TB that has CF_INVALID set. */ + uintptr_t jmp_list_head; uintptr_t jmp_list_next[2]; - uintptr_t jmp_list_first; + uintptr_t jmp_dest[2]; }; extern bool parallel_cpus; -- cgit v1.2.3-55-g7522 From 0ac20318ce16f4de288969b2007ef5a654176058 Mon Sep 17 00:00:00 2001 From: Emilio G. Cota Date: Fri, 4 Aug 2017 23:46:31 -0400 Subject: tcg: remove tb_lock Use mmap_lock in user-mode to protect TCG state and the page descriptors. In !user-mode, each vCPU has its own TCG state, so no locks needed. Per-page locks are used to protect the page descriptors. Per-TB locks are used in both modes to protect TB jumps. Some notes: - tb_lock is removed from notdirty_mem_write by passing a locked page_collection to tb_invalidate_phys_page_fast. - tcg_tb_lookup/remove/insert/etc have their own internal lock(s), so there is no need to further serialize access to them. - do_tb_flush is run in a safe async context, meaning no other vCPU threads are running. Therefore acquiring mmap_lock there is just to please tools such as thread sanitizer. - Not visible in the diff, but tb_invalidate_phys_page already has an assert_memory_lock. - cpu_io_recompile is !user-only, so no mmap_lock there. - Added mmap_unlock()'s before all siglongjmp's that could be called in user-mode while mmap_lock is held. + Added an assert for !have_mmap_lock() after returning from the longjmp in cpu_exec, just like we do in cpu_exec_step_atomic. Performance numbers before/after: Host: AMD Opteron(tm) Processor 6376 ubuntu 17.04 ppc64 bootup+shutdown time 700 +-+--+----+------+------------+-----------+------------*--+-+ | + + + + + *B | | before ***B*** ** * | |tb lock removal ###D### *** | 600 +-+ *** +-+ | ** # | | *B* #D | | *** * ## | 500 +-+ *** ### +-+ | * *** ### | | *B* # ## | | ** * #D# | 400 +-+ ** ## +-+ | ** ### | | ** ## | | ** # ## | 300 +-+ * B* #D# +-+ | B *** ### | | * ** #### | | * *** ### | 200 +-+ B *B #D# +-+ | #B* * ## # | | #* ## | | + D##D# + + + + | 100 +-+--+----+------+------------+-----------+------------+--+-+ 1 8 16 Guest CPUs 48 64 png: https://imgur.com/HwmBHXe debian jessie aarch64 bootup+shutdown time 90 +-+--+-----+-----+------------+------------+------------+--+-+ | + + + + + + | | before ***B*** B | 80 +tb lock removal ###D### **D +-+ | **### | | **## | 70 +-+ ** # +-+ | ** ## | | ** # | 60 +-+ *B ## +-+ | ** ## | | *** #D | 50 +-+ *** ## +-+ | * ** ### | | **B* ### | 40 +-+ **** # ## +-+ | **** #D# | | ***B** ### | 30 +-+ B***B** #### +-+ | B * * # ### | | B ###D# | 20 +-+ D ##D## +-+ | D# | | + + + + + + | 10 +-+--+-----+-----+------------+------------+------------+--+-+ 1 8 16 Guest CPUs 48 64 png: https://imgur.com/iGpGFtv The gains are high for 4-8 CPUs. Beyond that point, however, unrelated lock contention significantly hurts scalability. Reviewed-by: Richard Henderson Reviewed-by: Alex Bennée Signed-off-by: Emilio G. Cota Signed-off-by: Richard Henderson --- accel/tcg/cpu-exec.c | 34 +++-------- accel/tcg/translate-all.c | 132 ++++++++++++---------------------------- accel/tcg/translate-all.h | 3 +- docs/devel/multi-thread-tcg.txt | 11 ++-- exec.c | 26 ++++---- include/exec/cpu-common.h | 2 +- include/exec/exec-all.h | 4 -- include/exec/memory-internal.h | 6 +- include/exec/tb-context.h | 2 - linux-user/main.c | 3 - tcg/tcg.h | 4 +- 11 files changed, 75 insertions(+), 152 deletions(-) (limited to 'include/exec') diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c index c482008bc7..c738b7f7d6 100644 --- a/accel/tcg/cpu-exec.c +++ b/accel/tcg/cpu-exec.c @@ -212,20 +212,20 @@ static void cpu_exec_nocache(CPUState *cpu, int max_cycles, We only end up here when an existing TB is too long. */ cflags |= MIN(max_cycles, CF_COUNT_MASK); - tb_lock(); + mmap_lock(); tb = tb_gen_code(cpu, orig_tb->pc, orig_tb->cs_base, orig_tb->flags, cflags); tb->orig_tb = orig_tb; - tb_unlock(); + mmap_unlock(); /* execute the generated code */ trace_exec_tb_nocache(tb, tb->pc); cpu_tb_exec(cpu, tb); - tb_lock(); + mmap_lock(); tb_phys_invalidate(tb, -1); + mmap_unlock(); tcg_tb_remove(tb); - tb_unlock(); } #endif @@ -244,9 +244,7 @@ void cpu_exec_step_atomic(CPUState *cpu) tb = tb_lookup__cpu_state(cpu, &pc, &cs_base, &flags, cf_mask); if (tb == NULL) { mmap_lock(); - tb_lock(); tb = tb_gen_code(cpu, pc, cs_base, flags, cflags); - tb_unlock(); mmap_unlock(); } @@ -261,15 +259,13 @@ void cpu_exec_step_atomic(CPUState *cpu) cpu_tb_exec(cpu, tb); cc->cpu_exec_exit(cpu); } else { - /* We may have exited due to another problem here, so we need - * to reset any tb_locks we may have taken but didn't release. + /* * The mmap_lock is dropped by tb_gen_code if it runs out of * memory. */ #ifndef CONFIG_SOFTMMU tcg_debug_assert(!have_mmap_lock()); #endif - tb_lock_reset(); assert_no_pages_locked(); } @@ -398,20 +394,11 @@ static inline TranslationBlock *tb_find(CPUState *cpu, TranslationBlock *tb; target_ulong cs_base, pc; uint32_t flags; - bool acquired_tb_lock = false; tb = tb_lookup__cpu_state(cpu, &pc, &cs_base, &flags, cf_mask); if (tb == NULL) { - /* mmap_lock is needed by tb_gen_code, and mmap_lock must be - * taken outside tb_lock. As system emulation is currently - * single threaded the locks are NOPs. - */ mmap_lock(); - tb_lock(); - acquired_tb_lock = true; - tb = tb_gen_code(cpu, pc, cs_base, flags, cf_mask); - mmap_unlock(); /* We add the TB in the virtual pc hash table for the fast lookup */ atomic_set(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)], tb); @@ -427,15 +414,8 @@ static inline TranslationBlock *tb_find(CPUState *cpu, #endif /* See if we can patch the calling TB. */ if (last_tb && !qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) { - if (!acquired_tb_lock) { - tb_lock(); - acquired_tb_lock = true; - } tb_add_jump(last_tb, tb_exit, tb); } - if (acquired_tb_lock) { - tb_unlock(); - } return tb; } @@ -710,7 +690,9 @@ int cpu_exec(CPUState *cpu) g_assert(cpu == current_cpu); g_assert(cc == CPU_GET_CLASS(cpu)); #endif /* buggy compiler */ - tb_lock_reset(); +#ifndef CONFIG_SOFTMMU + tcg_debug_assert(!have_mmap_lock()); +#endif if (qemu_mutex_iothread_locked()) { qemu_mutex_unlock_iothread(); } diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index b33cd9bc98..f0c3fd4d03 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -88,13 +88,13 @@ #endif /* Access to the various translations structures need to be serialised via locks - * for consistency. This is automatic for SoftMMU based system - * emulation due to its single threaded nature. In user-mode emulation - * access to the memory related structures are protected with the - * mmap_lock. + * for consistency. + * In user-mode emulation access to the memory related structures are protected + * with mmap_lock. + * In !user-mode we use per-page locks. */ #ifdef CONFIG_SOFTMMU -#define assert_memory_lock() tcg_debug_assert(have_tb_lock) +#define assert_memory_lock() #else #define assert_memory_lock() tcg_debug_assert(have_mmap_lock()) #endif @@ -216,9 +216,6 @@ __thread TCGContext *tcg_ctx; TBContext tb_ctx; bool parallel_cpus; -/* translation block context */ -static __thread int have_tb_lock; - static void page_table_config_init(void) { uint32_t v_l1_bits; @@ -239,31 +236,6 @@ static void page_table_config_init(void) assert(v_l2_levels >= 0); } -#define assert_tb_locked() tcg_debug_assert(have_tb_lock) -#define assert_tb_unlocked() tcg_debug_assert(!have_tb_lock) - -void tb_lock(void) -{ - assert_tb_unlocked(); - qemu_mutex_lock(&tb_ctx.tb_lock); - have_tb_lock++; -} - -void tb_unlock(void) -{ - assert_tb_locked(); - have_tb_lock--; - qemu_mutex_unlock(&tb_ctx.tb_lock); -} - -void tb_lock_reset(void) -{ - if (have_tb_lock) { - qemu_mutex_unlock(&tb_ctx.tb_lock); - have_tb_lock = 0; - } -} - void cpu_gen_init(void) { tcg_context_init(&tcg_init_ctx); @@ -420,8 +392,7 @@ bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc, bool will_exit) * - fault during translation (instruction fetch) * - fault from helper (not using GETPC() macro) * - * Either way we need return early to avoid blowing up on a - * recursive tb_lock() as we can't resolve it here. + * Either way we need return early as we can't resolve it here. * * We are using unsigned arithmetic so if host_pc < * tcg_init_ctx.code_gen_buffer check_offset will wrap to way @@ -430,7 +401,6 @@ bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc, bool will_exit) check_offset = host_pc - (uintptr_t) tcg_init_ctx.code_gen_buffer; if (check_offset < tcg_init_ctx.code_gen_buffer_size) { - tb_lock(); tb = tcg_tb_lookup(host_pc); if (tb) { cpu_restore_state_from_tb(cpu, tb, host_pc, will_exit); @@ -441,7 +411,6 @@ bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc, bool will_exit) } r = true; } - tb_unlock(); } return r; @@ -1130,7 +1099,6 @@ static inline void code_gen_alloc(size_t tb_size) fprintf(stderr, "Could not allocate dynamic translator buffer\n"); exit(1); } - qemu_mutex_init(&tb_ctx.tb_lock); } static bool tb_cmp(const void *ap, const void *bp) @@ -1174,14 +1142,12 @@ void tcg_exec_init(unsigned long tb_size) /* * Allocate a new translation block. Flush the translation buffer if * too many translation blocks or too much generated code. - * - * Called with tb_lock held. */ static TranslationBlock *tb_alloc(target_ulong pc) { TranslationBlock *tb; - assert_tb_locked(); + assert_memory_lock(); tb = tcg_tb_alloc(tcg_ctx); if (unlikely(tb == NULL)) { @@ -1248,8 +1214,7 @@ static gboolean tb_host_size_iter(gpointer key, gpointer value, gpointer data) /* flush all the translation blocks */ static void do_tb_flush(CPUState *cpu, run_on_cpu_data tb_flush_count) { - tb_lock(); - + mmap_lock(); /* If it is already been done on request of another CPU, * just retry. */ @@ -1279,7 +1244,7 @@ static void do_tb_flush(CPUState *cpu, run_on_cpu_data tb_flush_count) atomic_mb_set(&tb_ctx.tb_flush_count, tb_ctx.tb_flush_count + 1); done: - tb_unlock(); + mmap_unlock(); } void tb_flush(CPUState *cpu) @@ -1313,7 +1278,7 @@ do_tb_invalidate_check(struct qht *ht, void *p, uint32_t hash, void *userp) /* verify that all the pages have correct rights for code * - * Called with tb_lock held. + * Called with mmap_lock held. */ static void tb_invalidate_check(target_ulong address) { @@ -1343,7 +1308,10 @@ static void tb_page_check(void) #endif /* CONFIG_USER_ONLY */ -/* call with @pd->lock held */ +/* + * user-mode: call with mmap_lock held + * !user-mode: call with @pd->lock held + */ static inline void tb_page_remove(PageDesc *pd, TranslationBlock *tb) { TranslationBlock *tb1; @@ -1437,7 +1405,11 @@ static inline void tb_jmp_unlink(TranslationBlock *dest) qemu_spin_unlock(&dest->jmp_lock); } -/* If @rm_from_page_list is set, call with the TB's pages' locks held */ +/* + * In user-mode, call with mmap_lock held. + * In !user-mode, if @rm_from_page_list is set, call with the TB's pages' + * locks held. + */ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list) { CPUState *cpu; @@ -1445,7 +1417,7 @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list) uint32_t h; tb_page_addr_t phys_pc; - assert_tb_locked(); + assert_memory_lock(); /* make sure no further incoming jumps will be chained to this TB */ qemu_spin_lock(&tb->jmp_lock); @@ -1498,7 +1470,7 @@ static void tb_phys_invalidate__locked(TranslationBlock *tb) /* invalidate one TB * - * Called with tb_lock held. + * Called with mmap_lock held in user-mode. */ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr) { @@ -1543,7 +1515,7 @@ static void build_page_bitmap(PageDesc *p) /* add the tb in the target page and protect it if necessary * * Called with mmap_lock held for user-mode emulation. - * Called with @p->lock held. + * Called with @p->lock held in !user-mode. */ static inline void tb_page_add(PageDesc *p, TranslationBlock *tb, unsigned int n, tb_page_addr_t page_addr) @@ -1815,10 +1787,9 @@ TranslationBlock *tb_gen_code(CPUState *cpu, if ((pc & TARGET_PAGE_MASK) != virt_page2) { phys_page2 = get_page_addr_code(env, virt_page2); } - /* As long as consistency of the TB stuff is provided by tb_lock in user - * mode and is implicit in single-threaded softmmu emulation, no explicit - * memory barrier is required before tb_link_page() makes the TB visible - * through the physical hash table and physical page list. + /* + * No explicit memory barrier is required -- tb_link_page() makes the + * TB visible in a consistent state. */ existing_tb = tb_link_page(tb, phys_pc, phys_page2); /* if the TB already exists, discard what we just translated */ @@ -1834,8 +1805,9 @@ TranslationBlock *tb_gen_code(CPUState *cpu, } /* - * Call with all @pages locked. * @p must be non-NULL. + * user-mode: call with mmap_lock held. + * !user-mode: call with all @pages locked. */ static void tb_invalidate_phys_page_range__locked(struct page_collection *pages, @@ -1920,6 +1892,7 @@ tb_invalidate_phys_page_range__locked(struct page_collection *pages, page_collection_unlock(pages); /* Force execution of one insn next time. */ cpu->cflags_next_tb = 1 | curr_cflags(); + mmap_unlock(); cpu_loop_exit_noexc(cpu); } #endif @@ -1932,8 +1905,7 @@ tb_invalidate_phys_page_range__locked(struct page_collection *pages, * access: the virtual CPU will exit the current TB if code is modified inside * this TB. * - * Called with tb_lock/mmap_lock held for user-mode emulation - * Called with tb_lock held for system-mode emulation + * Called with mmap_lock held for user-mode emulation */ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end, int is_cpu_write_access) @@ -1942,7 +1914,6 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end, PageDesc *p; assert_memory_lock(); - assert_tb_locked(); p = page_find(start >> TARGET_PAGE_BITS); if (p == NULL) { @@ -1961,14 +1932,15 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end, * access: the virtual CPU will exit the current TB if code is modified inside * this TB. * - * Called with mmap_lock held for user-mode emulation, grabs tb_lock - * Called with tb_lock held for system-mode emulation + * Called with mmap_lock held for user-mode emulation. */ -static void tb_invalidate_phys_range_1(tb_page_addr_t start, tb_page_addr_t end) +void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end) { struct page_collection *pages; tb_page_addr_t next; + assert_memory_lock(); + pages = page_collection_lock(start, end); for (next = (start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE; start < end; @@ -1984,30 +1956,16 @@ static void tb_invalidate_phys_range_1(tb_page_addr_t start, tb_page_addr_t end) page_collection_unlock(pages); } -#ifdef CONFIG_SOFTMMU -void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end) -{ - assert_tb_locked(); - tb_invalidate_phys_range_1(start, end); -} -#else -void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end) -{ - assert_memory_lock(); - tb_lock(); - tb_invalidate_phys_range_1(start, end); - tb_unlock(); -} -#endif - #ifdef CONFIG_SOFTMMU /* len must be <= 8 and start must be a multiple of len. * Called via softmmu_template.h when code areas are written to with * iothread mutex not held. + * + * Call with all @pages in the range [@start, @start + len[ locked. */ -void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len) +void tb_invalidate_phys_page_fast(struct page_collection *pages, + tb_page_addr_t start, int len) { - struct page_collection *pages; PageDesc *p; #if 0 @@ -2026,7 +1984,6 @@ void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len) return; } - pages = page_collection_lock(start, start + len); assert_page_locked(p); if (!p->code_bitmap && ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD) { @@ -2045,7 +2002,6 @@ void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len) do_invalidate: tb_invalidate_phys_page_range__locked(pages, p, start, start + len, 1); } - page_collection_unlock(pages); } #else /* Called with mmap_lock held. If pc is not 0 then it indicates the @@ -2077,7 +2033,6 @@ static bool tb_invalidate_phys_page(tb_page_addr_t addr, uintptr_t pc) return false; } - tb_lock(); #ifdef TARGET_HAS_PRECISE_SMC if (p->first_tb && pc != 0) { current_tb = tcg_tb_lookup(pc); @@ -2110,12 +2065,9 @@ static bool tb_invalidate_phys_page(tb_page_addr_t addr, uintptr_t pc) if (current_tb_modified) { /* Force execution of one insn next time. */ cpu->cflags_next_tb = 1 | curr_cflags(); - /* tb_lock will be reset after cpu_loop_exit_noexc longjmps - * back into the cpu_exec loop. */ return true; } #endif - tb_unlock(); return false; } @@ -2136,18 +2088,18 @@ void tb_invalidate_phys_addr(AddressSpace *as, hwaddr addr, MemTxAttrs attrs) return; } ram_addr = memory_region_get_ram_addr(mr) + addr; - tb_lock(); tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0); - tb_unlock(); rcu_read_unlock(); } #endif /* !defined(CONFIG_USER_ONLY) */ -/* Called with tb_lock held. */ +/* user-mode: call with mmap_lock held */ void tb_check_watchpoint(CPUState *cpu) { TranslationBlock *tb; + assert_memory_lock(); + tb = tcg_tb_lookup(cpu->mem_io_pc); if (tb) { /* We can use retranslation to find the PC. */ @@ -2181,7 +2133,6 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr) TranslationBlock *tb; uint32_t n; - tb_lock(); tb = tcg_tb_lookup(retaddr); if (!tb) { cpu_abort(cpu, "cpu_io_recompile: could not find TB for pc=%p", @@ -2229,9 +2180,6 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr) * repeating the fault, which is horribly inefficient. * Better would be to execute just this insn uncached, or generate a * second new TB. - * - * cpu_loop_exit_noexc will longjmp back to cpu_exec where the - * tb_lock gets reset. */ cpu_loop_exit_noexc(cpu); } diff --git a/accel/tcg/translate-all.h b/accel/tcg/translate-all.h index 6d1d2588b5..e6cb963d7e 100644 --- a/accel/tcg/translate-all.h +++ b/accel/tcg/translate-all.h @@ -26,7 +26,8 @@ struct page_collection *page_collection_lock(tb_page_addr_t start, tb_page_addr_t end); void page_collection_unlock(struct page_collection *set); -void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len); +void tb_invalidate_phys_page_fast(struct page_collection *pages, + tb_page_addr_t start, int len); void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end, int is_cpu_write_access); void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end); diff --git a/docs/devel/multi-thread-tcg.txt b/docs/devel/multi-thread-tcg.txt index df83445ccf..06530be1e9 100644 --- a/docs/devel/multi-thread-tcg.txt +++ b/docs/devel/multi-thread-tcg.txt @@ -61,6 +61,7 @@ have their block-to-block jumps patched. Global TCG State ---------------- +### User-mode emulation We need to protect the entire code generation cycle including any post generation patching of the translated code. This also implies a shared translation buffer which contains code running on all cores. Any @@ -75,9 +76,11 @@ patching. (Current solution) -Mainly as part of the linux-user work all code generation is -serialised with a tb_lock(). For the SoftMMU tb_lock() also takes the -place of mmap_lock() in linux-user. +Code generation is serialised with mmap_lock(). + +### !User-mode emulation +Each vCPU has its own TCG context and associated TCG region, thereby +requiring no locking. Translation Blocks ------------------ @@ -195,7 +198,7 @@ work as "safe work" and exiting the cpu run loop. This ensure by the time execution restarts all flush operations have completed. TLB flag updates are all done atomically and are also protected by the -tb_lock() which is used by the functions that update the TLB in bulk. +corresponding page lock. (Known limitation) diff --git a/exec.c b/exec.c index ebadc0e302..28f9bdcbf9 100644 --- a/exec.c +++ b/exec.c @@ -1031,9 +1031,7 @@ const char *parse_cpu_model(const char *cpu_model) static void breakpoint_invalidate(CPUState *cpu, target_ulong pc) { mmap_lock(); - tb_lock(); tb_invalidate_phys_page_range(pc, pc + 1, 0); - tb_unlock(); mmap_unlock(); } #else @@ -2644,21 +2642,21 @@ void memory_notdirty_write_prepare(NotDirtyInfo *ndi, ndi->ram_addr = ram_addr; ndi->mem_vaddr = mem_vaddr; ndi->size = size; - ndi->locked = false; + ndi->pages = NULL; assert(tcg_enabled()); if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) { - ndi->locked = true; - tb_lock(); - tb_invalidate_phys_page_fast(ram_addr, size); + ndi->pages = page_collection_lock(ram_addr, ram_addr + size); + tb_invalidate_phys_page_fast(ndi->pages, ram_addr, size); } } /* Called within RCU critical section. */ void memory_notdirty_write_complete(NotDirtyInfo *ndi) { - if (ndi->locked) { - tb_unlock(); + if (ndi->pages) { + page_collection_unlock(ndi->pages); + ndi->pages = NULL; } /* Set both VGA and migration bits for simplicity and to remove @@ -2745,18 +2743,16 @@ static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags) } cpu->watchpoint_hit = wp; - /* Both tb_lock and iothread_mutex will be reset when - * cpu_loop_exit or cpu_loop_exit_noexc longjmp - * back into the cpu_exec main loop. - */ - tb_lock(); + mmap_lock(); tb_check_watchpoint(cpu); if (wp->flags & BP_STOP_BEFORE_ACCESS) { cpu->exception_index = EXCP_DEBUG; + mmap_unlock(); cpu_loop_exit(cpu); } else { /* Force execution of one insn next time. */ cpu->cflags_next_tb = 1 | curr_cflags(); + mmap_unlock(); cpu_loop_exit_noexc(cpu); } } @@ -3147,9 +3143,9 @@ static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr, } if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) { assert(tcg_enabled()); - tb_lock(); + mmap_lock(); tb_invalidate_phys_range(addr, addr + length); - tb_unlock(); + mmap_unlock(); dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE); } cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask); diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h index 0b58e262f3..18b40d6145 100644 --- a/include/exec/cpu-common.h +++ b/include/exec/cpu-common.h @@ -23,7 +23,7 @@ typedef struct CPUListState { FILE *file; } CPUListState; -/* The CPU list lock nests outside tb_lock/tb_unlock. */ +/* The CPU list lock nests outside page_(un)lock or mmap_(un)lock */ void qemu_init_cpu_list(void); void cpu_list_lock(void); void cpu_list_unlock(void); diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index 3c2a0efb55..25a6f28ab8 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -440,10 +440,6 @@ extern uintptr_t tci_tb_ptr; smaller than 4 bytes, so we don't worry about special-casing this. */ #define GETPC_ADJ 2 -void tb_lock(void); -void tb_unlock(void); -void tb_lock_reset(void); - #if !defined(CONFIG_USER_ONLY) && defined(CONFIG_DEBUG_TCG) void assert_no_pages_locked(void); #else diff --git a/include/exec/memory-internal.h b/include/exec/memory-internal.h index 56c25c0ef7..bb08fa4d2f 100644 --- a/include/exec/memory-internal.h +++ b/include/exec/memory-internal.h @@ -49,6 +49,8 @@ void mtree_print_dispatch(fprintf_function mon, void *f, struct AddressSpaceDispatch *d, MemoryRegion *root); +struct page_collection; + /* Opaque struct for passing info from memory_notdirty_write_prepare() * to memory_notdirty_write_complete(). Callers should treat all fields * as private, with the exception of @active. @@ -60,10 +62,10 @@ void mtree_print_dispatch(fprintf_function mon, void *f, */ typedef struct { CPUState *cpu; + struct page_collection *pages; ram_addr_t ram_addr; vaddr mem_vaddr; unsigned size; - bool locked; bool active; } NotDirtyInfo; @@ -91,7 +93,7 @@ typedef struct { * * This must only be called if we are using TCG; it will assert otherwise. * - * We may take a lock in the prepare call, so callers must ensure that + * We may take locks in the prepare call, so callers must ensure that * they don't exit (via longjump or otherwise) without calling complete. * * This call must only be made inside an RCU critical section. diff --git a/include/exec/tb-context.h b/include/exec/tb-context.h index 8c9b49c98e..feb585e0a7 100644 --- a/include/exec/tb-context.h +++ b/include/exec/tb-context.h @@ -32,8 +32,6 @@ typedef struct TBContext TBContext; struct TBContext { struct qht htable; - /* any access to the tbs or the page table must use this lock */ - QemuMutex tb_lock; /* statistics */ unsigned tb_flush_count; diff --git a/linux-user/main.c b/linux-user/main.c index 78d6d3e7eb..84e9ec9335 100644 --- a/linux-user/main.c +++ b/linux-user/main.c @@ -120,7 +120,6 @@ void fork_start(void) { start_exclusive(); mmap_fork_start(); - qemu_mutex_lock(&tb_ctx.tb_lock); cpu_list_lock(); } @@ -136,14 +135,12 @@ void fork_end(int child) QTAILQ_REMOVE(&cpus, cpu, node); } } - qemu_mutex_init(&tb_ctx.tb_lock); qemu_init_cpu_list(); gdbserver_fork(thread_cpu); /* qemu_init_cpu_list() takes care of reinitializing the * exclusive state, so we don't need to end_exclusive() here. */ } else { - qemu_mutex_unlock(&tb_ctx.tb_lock); cpu_list_unlock(); end_exclusive(); } diff --git a/tcg/tcg.h b/tcg/tcg.h index e49b289ba1..532d2a0710 100644 --- a/tcg/tcg.h +++ b/tcg/tcg.h @@ -857,7 +857,7 @@ static inline bool tcg_op_buf_full(void) /* pool based memory allocation */ -/* user-mode: tb_lock must be held for tcg_malloc_internal. */ +/* user-mode: mmap_lock must be held for tcg_malloc_internal. */ void *tcg_malloc_internal(TCGContext *s, int size); void tcg_pool_reset(TCGContext *s); TranslationBlock *tcg_tb_alloc(TCGContext *s); @@ -875,7 +875,7 @@ TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr); void tcg_tb_foreach(GTraverseFunc func, gpointer user_data); size_t tcg_nb_tbs(void); -/* user-mode: Called with tb_lock held. */ +/* user-mode: Called with mmap_lock held. */ static inline void *tcg_malloc(int size) { TCGContext *s = tcg_ctx; -- cgit v1.2.3-55-g7522