qemu

FORK: QEMU emulator
git clone https://git.neptards.moe/neptards/qemu.git
Log | Files | Refs | Submodules | LICENSE

translate-all.c (48011B)


      1 /*
      2  *  Host code generation
      3  *
      4  *  Copyright (c) 2003 Fabrice Bellard
      5  *
      6  * This library is free software; you can redistribute it and/or
      7  * modify it under the terms of the GNU Lesser General Public
      8  * License as published by the Free Software Foundation; either
      9  * version 2.1 of the License, or (at your option) any later version.
     10  *
     11  * This library is distributed in the hope that it will be useful,
     12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     14  * Lesser General Public License for more details.
     15  *
     16  * You should have received a copy of the GNU Lesser General Public
     17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
     18  */
     19 
     20 #include "qemu/osdep.h"
     21 
     22 #define NO_CPU_IO_DEFS
     23 #include "trace.h"
     24 #include "disas/disas.h"
     25 #include "exec/exec-all.h"
     26 #include "tcg/tcg.h"
     27 #if defined(CONFIG_USER_ONLY)
     28 #include "qemu.h"
     29 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
     30 #include <sys/param.h>
     31 #if __FreeBSD_version >= 700104
     32 #define HAVE_KINFO_GETVMMAP
     33 #define sigqueue sigqueue_freebsd  /* avoid redefinition */
     34 #include <sys/proc.h>
     35 #include <machine/profile.h>
     36 #define _KERNEL
     37 #include <sys/user.h>
     38 #undef _KERNEL
     39 #undef sigqueue
     40 #include <libutil.h>
     41 #endif
     42 #endif
     43 #else
     44 #include "exec/ram_addr.h"
     45 #endif
     46 
     47 #include "exec/cputlb.h"
     48 #include "exec/translate-all.h"
     49 #include "exec/translator.h"
     50 #include "qemu/bitmap.h"
     51 #include "qemu/qemu-print.h"
     52 #include "qemu/timer.h"
     53 #include "qemu/main-loop.h"
     54 #include "qemu/cacheinfo.h"
     55 #include "exec/log.h"
     56 #include "sysemu/cpus.h"
     57 #include "sysemu/cpu-timers.h"
     58 #include "sysemu/tcg.h"
     59 #include "qapi/error.h"
     60 #include "hw/core/tcg-cpu-ops.h"
     61 #include "tb-jmp-cache.h"
     62 #include "tb-hash.h"
     63 #include "tb-context.h"
     64 #include "internal.h"
     65 
     66 /* make various TB consistency checks */
     67 
     68 /**
     69  * struct page_entry - page descriptor entry
     70  * @pd:     pointer to the &struct PageDesc of the page this entry represents
     71  * @index:  page index of the page
     72  * @locked: whether the page is locked
     73  *
     74  * This struct helps us keep track of the locked state of a page, without
     75  * bloating &struct PageDesc.
     76  *
     77  * A page lock protects accesses to all fields of &struct PageDesc.
     78  *
     79  * See also: &struct page_collection.
     80  */
     81 struct page_entry {
     82     PageDesc *pd;
     83     tb_page_addr_t index;
     84     bool locked;
     85 };
     86 
     87 /**
     88  * struct page_collection - tracks a set of pages (i.e. &struct page_entry's)
     89  * @tree:   Binary search tree (BST) of the pages, with key == page index
     90  * @max:    Pointer to the page in @tree with the highest page index
     91  *
     92  * To avoid deadlock we lock pages in ascending order of page index.
     93  * When operating on a set of pages, we need to keep track of them so that
     94  * we can lock them in order and also unlock them later. For this we collect
     95  * pages (i.e. &struct page_entry's) in a binary search @tree. Given that the
     96  * @tree implementation we use does not provide an O(1) operation to obtain the
     97  * highest-ranked element, we use @max to keep track of the inserted page
     98  * with the highest index. This is valuable because if a page is not in
     99  * the tree and its index is higher than @max's, then we can lock it
    100  * without breaking the locking order rule.
    101  *
    102  * Note on naming: 'struct page_set' would be shorter, but we already have a few
    103  * page_set_*() helpers, so page_collection is used instead to avoid confusion.
    104  *
    105  * See also: page_collection_lock().
    106  */
    107 struct page_collection {
    108     GTree *tree;
    109     struct page_entry *max;
    110 };
    111 
    112 /*
    113  * In system mode we want L1_MAP to be based on ram offsets,
    114  * while in user mode we want it to be based on virtual addresses.
    115  *
    116  * TODO: For user mode, see the caveat re host vs guest virtual
    117  * address spaces near GUEST_ADDR_MAX.
    118  */
    119 #if !defined(CONFIG_USER_ONLY)
    120 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
    121 # define L1_MAP_ADDR_SPACE_BITS  HOST_LONG_BITS
    122 #else
    123 # define L1_MAP_ADDR_SPACE_BITS  TARGET_PHYS_ADDR_SPACE_BITS
    124 #endif
    125 #else
    126 # define L1_MAP_ADDR_SPACE_BITS  MIN(HOST_LONG_BITS, TARGET_ABI_BITS)
    127 #endif
    128 
    129 /* Make sure all possible CPU event bits fit in tb->trace_vcpu_dstate */
    130 QEMU_BUILD_BUG_ON(CPU_TRACE_DSTATE_MAX_EVENTS >
    131                   sizeof_field(TranslationBlock, trace_vcpu_dstate)
    132                   * BITS_PER_BYTE);
    133 
    134 /*
    135  * L1 Mapping properties
    136  */
    137 int v_l1_size;
    138 int v_l1_shift;
    139 int v_l2_levels;
    140 
    141 void *l1_map[V_L1_MAX_SIZE];
    142 
    143 TBContext tb_ctx;
    144 
    145 static void page_table_config_init(void)
    146 {
    147     uint32_t v_l1_bits;
    148 
    149     assert(TARGET_PAGE_BITS);
    150     /* The bits remaining after N lower levels of page tables.  */
    151     v_l1_bits = (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % V_L2_BITS;
    152     if (v_l1_bits < V_L1_MIN_BITS) {
    153         v_l1_bits += V_L2_BITS;
    154     }
    155 
    156     v_l1_size = 1 << v_l1_bits;
    157     v_l1_shift = L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - v_l1_bits;
    158     v_l2_levels = v_l1_shift / V_L2_BITS - 1;
    159 
    160     assert(v_l1_bits <= V_L1_MAX_BITS);
    161     assert(v_l1_shift % V_L2_BITS == 0);
    162     assert(v_l2_levels >= 0);
    163 }
    164 
    165 /* Encode VAL as a signed leb128 sequence at P.
    166    Return P incremented past the encoded value.  */
    167 static uint8_t *encode_sleb128(uint8_t *p, target_long val)
    168 {
    169     int more, byte;
    170 
    171     do {
    172         byte = val & 0x7f;
    173         val >>= 7;
    174         more = !((val == 0 && (byte & 0x40) == 0)
    175                  || (val == -1 && (byte & 0x40) != 0));
    176         if (more) {
    177             byte |= 0x80;
    178         }
    179         *p++ = byte;
    180     } while (more);
    181 
    182     return p;
    183 }
    184 
    185 /* Decode a signed leb128 sequence at *PP; increment *PP past the
    186    decoded value.  Return the decoded value.  */
    187 static target_long decode_sleb128(const uint8_t **pp)
    188 {
    189     const uint8_t *p = *pp;
    190     target_long val = 0;
    191     int byte, shift = 0;
    192 
    193     do {
    194         byte = *p++;
    195         val |= (target_ulong)(byte & 0x7f) << shift;
    196         shift += 7;
    197     } while (byte & 0x80);
    198     if (shift < TARGET_LONG_BITS && (byte & 0x40)) {
    199         val |= -(target_ulong)1 << shift;
    200     }
    201 
    202     *pp = p;
    203     return val;
    204 }
    205 
    206 /* Encode the data collected about the instructions while compiling TB.
    207    Place the data at BLOCK, and return the number of bytes consumed.
    208 
    209    The logical table consists of TARGET_INSN_START_WORDS target_ulong's,
    210    which come from the target's insn_start data, followed by a uintptr_t
    211    which comes from the host pc of the end of the code implementing the insn.
    212 
    213    Each line of the table is encoded as sleb128 deltas from the previous
    214    line.  The seed for the first line is { tb->pc, 0..., tb->tc.ptr }.
    215    That is, the first column is seeded with the guest pc, the last column
    216    with the host pc, and the middle columns with zeros.  */
    217 
    218 static int encode_search(TranslationBlock *tb, uint8_t *block)
    219 {
    220     uint8_t *highwater = tcg_ctx->code_gen_highwater;
    221     uint8_t *p = block;
    222     int i, j, n;
    223 
    224     for (i = 0, n = tb->icount; i < n; ++i) {
    225         target_ulong prev;
    226 
    227         for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
    228             if (i == 0) {
    229                 prev = (!TARGET_TB_PCREL && j == 0 ? tb_pc(tb) : 0);
    230             } else {
    231                 prev = tcg_ctx->gen_insn_data[i - 1][j];
    232             }
    233             p = encode_sleb128(p, tcg_ctx->gen_insn_data[i][j] - prev);
    234         }
    235         prev = (i == 0 ? 0 : tcg_ctx->gen_insn_end_off[i - 1]);
    236         p = encode_sleb128(p, tcg_ctx->gen_insn_end_off[i] - prev);
    237 
    238         /* Test for (pending) buffer overflow.  The assumption is that any
    239            one row beginning below the high water mark cannot overrun
    240            the buffer completely.  Thus we can test for overflow after
    241            encoding a row without having to check during encoding.  */
    242         if (unlikely(p > highwater)) {
    243             return -1;
    244         }
    245     }
    246 
    247     return p - block;
    248 }
    249 
    250 static int cpu_unwind_data_from_tb(TranslationBlock *tb, uintptr_t host_pc,
    251                                    uint64_t *data)
    252 {
    253     uintptr_t iter_pc = (uintptr_t)tb->tc.ptr;
    254     const uint8_t *p = tb->tc.ptr + tb->tc.size;
    255     int i, j, num_insns = tb->icount;
    256 
    257     host_pc -= GETPC_ADJ;
    258 
    259     if (host_pc < iter_pc) {
    260         return -1;
    261     }
    262 
    263     memset(data, 0, sizeof(uint64_t) * TARGET_INSN_START_WORDS);
    264     if (!TARGET_TB_PCREL) {
    265         data[0] = tb_pc(tb);
    266     }
    267 
    268     /*
    269      * Reconstruct the stored insn data while looking for the point
    270      * at which the end of the insn exceeds host_pc.
    271      */
    272     for (i = 0; i < num_insns; ++i) {
    273         for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
    274             data[j] += decode_sleb128(&p);
    275         }
    276         iter_pc += decode_sleb128(&p);
    277         if (iter_pc > host_pc) {
    278             return num_insns - i;
    279         }
    280     }
    281     return -1;
    282 }
    283 
    284 /*
    285  * The cpu state corresponding to 'host_pc' is restored in
    286  * preparation for exiting the TB.
    287  */
    288 void cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
    289                                uintptr_t host_pc)
    290 {
    291     uint64_t data[TARGET_INSN_START_WORDS];
    292 #ifdef CONFIG_PROFILER
    293     TCGProfile *prof = &tcg_ctx->prof;
    294     int64_t ti = profile_getclock();
    295 #endif
    296     int insns_left = cpu_unwind_data_from_tb(tb, host_pc, data);
    297 
    298     if (insns_left < 0) {
    299         return;
    300     }
    301 
    302     if (tb_cflags(tb) & CF_USE_ICOUNT) {
    303         assert(icount_enabled());
    304         /*
    305          * Reset the cycle counter to the start of the block and
    306          * shift if to the number of actually executed instructions.
    307          */
    308         cpu_neg(cpu)->icount_decr.u16.low += insns_left;
    309     }
    310 
    311     cpu->cc->tcg_ops->restore_state_to_opc(cpu, tb, data);
    312 
    313 #ifdef CONFIG_PROFILER
    314     qatomic_set(&prof->restore_time,
    315                 prof->restore_time + profile_getclock() - ti);
    316     qatomic_set(&prof->restore_count, prof->restore_count + 1);
    317 #endif
    318 }
    319 
    320 bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc)
    321 {
    322     /*
    323      * The host_pc has to be in the rx region of the code buffer.
    324      * If it is not we will not be able to resolve it here.
    325      * The two cases where host_pc will not be correct are:
    326      *
    327      *  - fault during translation (instruction fetch)
    328      *  - fault from helper (not using GETPC() macro)
    329      *
    330      * Either way we need return early as we can't resolve it here.
    331      */
    332     if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) {
    333         TranslationBlock *tb = tcg_tb_lookup(host_pc);
    334         if (tb) {
    335             cpu_restore_state_from_tb(cpu, tb, host_pc);
    336             return true;
    337         }
    338     }
    339     return false;
    340 }
    341 
    342 bool cpu_unwind_state_data(CPUState *cpu, uintptr_t host_pc, uint64_t *data)
    343 {
    344     if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) {
    345         TranslationBlock *tb = tcg_tb_lookup(host_pc);
    346         if (tb) {
    347             return cpu_unwind_data_from_tb(tb, host_pc, data) >= 0;
    348         }
    349     }
    350     return false;
    351 }
    352 
    353 void page_init(void)
    354 {
    355     page_size_init();
    356     page_table_config_init();
    357 
    358 #if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
    359     {
    360 #ifdef HAVE_KINFO_GETVMMAP
    361         struct kinfo_vmentry *freep;
    362         int i, cnt;
    363 
    364         freep = kinfo_getvmmap(getpid(), &cnt);
    365         if (freep) {
    366             mmap_lock();
    367             for (i = 0; i < cnt; i++) {
    368                 unsigned long startaddr, endaddr;
    369 
    370                 startaddr = freep[i].kve_start;
    371                 endaddr = freep[i].kve_end;
    372                 if (h2g_valid(startaddr)) {
    373                     startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
    374 
    375                     if (h2g_valid(endaddr)) {
    376                         endaddr = h2g(endaddr);
    377                         page_set_flags(startaddr, endaddr, PAGE_RESERVED);
    378                     } else {
    379 #if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
    380                         endaddr = ~0ul;
    381                         page_set_flags(startaddr, endaddr, PAGE_RESERVED);
    382 #endif
    383                     }
    384                 }
    385             }
    386             free(freep);
    387             mmap_unlock();
    388         }
    389 #else
    390         FILE *f;
    391 
    392         last_brk = (unsigned long)sbrk(0);
    393 
    394         f = fopen("/compat/linux/proc/self/maps", "r");
    395         if (f) {
    396             mmap_lock();
    397 
    398             do {
    399                 unsigned long startaddr, endaddr;
    400                 int n;
    401 
    402                 n = fscanf(f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
    403 
    404                 if (n == 2 && h2g_valid(startaddr)) {
    405                     startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
    406 
    407                     if (h2g_valid(endaddr)) {
    408                         endaddr = h2g(endaddr);
    409                     } else {
    410                         endaddr = ~0ul;
    411                     }
    412                     page_set_flags(startaddr, endaddr, PAGE_RESERVED);
    413                 }
    414             } while (!feof(f));
    415 
    416             fclose(f);
    417             mmap_unlock();
    418         }
    419 #endif
    420     }
    421 #endif
    422 }
    423 
    424 PageDesc *page_find_alloc(tb_page_addr_t index, bool alloc)
    425 {
    426     PageDesc *pd;
    427     void **lp;
    428     int i;
    429 
    430     /* Level 1.  Always allocated.  */
    431     lp = l1_map + ((index >> v_l1_shift) & (v_l1_size - 1));
    432 
    433     /* Level 2..N-1.  */
    434     for (i = v_l2_levels; i > 0; i--) {
    435         void **p = qatomic_rcu_read(lp);
    436 
    437         if (p == NULL) {
    438             void *existing;
    439 
    440             if (!alloc) {
    441                 return NULL;
    442             }
    443             p = g_new0(void *, V_L2_SIZE);
    444             existing = qatomic_cmpxchg(lp, NULL, p);
    445             if (unlikely(existing)) {
    446                 g_free(p);
    447                 p = existing;
    448             }
    449         }
    450 
    451         lp = p + ((index >> (i * V_L2_BITS)) & (V_L2_SIZE - 1));
    452     }
    453 
    454     pd = qatomic_rcu_read(lp);
    455     if (pd == NULL) {
    456         void *existing;
    457 
    458         if (!alloc) {
    459             return NULL;
    460         }
    461         pd = g_new0(PageDesc, V_L2_SIZE);
    462 #ifndef CONFIG_USER_ONLY
    463         {
    464             int i;
    465 
    466             for (i = 0; i < V_L2_SIZE; i++) {
    467                 qemu_spin_init(&pd[i].lock);
    468             }
    469         }
    470 #endif
    471         existing = qatomic_cmpxchg(lp, NULL, pd);
    472         if (unlikely(existing)) {
    473 #ifndef CONFIG_USER_ONLY
    474             {
    475                 int i;
    476 
    477                 for (i = 0; i < V_L2_SIZE; i++) {
    478                     qemu_spin_destroy(&pd[i].lock);
    479                 }
    480             }
    481 #endif
    482             g_free(pd);
    483             pd = existing;
    484         }
    485     }
    486 
    487     return pd + (index & (V_L2_SIZE - 1));
    488 }
    489 
    490 /* In user-mode page locks aren't used; mmap_lock is enough */
    491 #ifdef CONFIG_USER_ONLY
    492 struct page_collection *
    493 page_collection_lock(tb_page_addr_t start, tb_page_addr_t end)
    494 {
    495     return NULL;
    496 }
    497 
    498 void page_collection_unlock(struct page_collection *set)
    499 { }
    500 #else /* !CONFIG_USER_ONLY */
    501 
    502 #ifdef CONFIG_DEBUG_TCG
    503 
    504 static __thread GHashTable *ht_pages_locked_debug;
    505 
    506 static void ht_pages_locked_debug_init(void)
    507 {
    508     if (ht_pages_locked_debug) {
    509         return;
    510     }
    511     ht_pages_locked_debug = g_hash_table_new(NULL, NULL);
    512 }
    513 
    514 static bool page_is_locked(const PageDesc *pd)
    515 {
    516     PageDesc *found;
    517 
    518     ht_pages_locked_debug_init();
    519     found = g_hash_table_lookup(ht_pages_locked_debug, pd);
    520     return !!found;
    521 }
    522 
    523 static void page_lock__debug(PageDesc *pd)
    524 {
    525     ht_pages_locked_debug_init();
    526     g_assert(!page_is_locked(pd));
    527     g_hash_table_insert(ht_pages_locked_debug, pd, pd);
    528 }
    529 
    530 static void page_unlock__debug(const PageDesc *pd)
    531 {
    532     bool removed;
    533 
    534     ht_pages_locked_debug_init();
    535     g_assert(page_is_locked(pd));
    536     removed = g_hash_table_remove(ht_pages_locked_debug, pd);
    537     g_assert(removed);
    538 }
    539 
    540 void do_assert_page_locked(const PageDesc *pd, const char *file, int line)
    541 {
    542     if (unlikely(!page_is_locked(pd))) {
    543         error_report("assert_page_lock: PageDesc %p not locked @ %s:%d",
    544                      pd, file, line);
    545         abort();
    546     }
    547 }
    548 
    549 void assert_no_pages_locked(void)
    550 {
    551     ht_pages_locked_debug_init();
    552     g_assert(g_hash_table_size(ht_pages_locked_debug) == 0);
    553 }
    554 
    555 #else /* !CONFIG_DEBUG_TCG */
    556 
    557 static inline void page_lock__debug(const PageDesc *pd) { }
    558 static inline void page_unlock__debug(const PageDesc *pd) { }
    559 
    560 #endif /* CONFIG_DEBUG_TCG */
    561 
    562 void page_lock(PageDesc *pd)
    563 {
    564     page_lock__debug(pd);
    565     qemu_spin_lock(&pd->lock);
    566 }
    567 
    568 void page_unlock(PageDesc *pd)
    569 {
    570     qemu_spin_unlock(&pd->lock);
    571     page_unlock__debug(pd);
    572 }
    573 
    574 static inline struct page_entry *
    575 page_entry_new(PageDesc *pd, tb_page_addr_t index)
    576 {
    577     struct page_entry *pe = g_malloc(sizeof(*pe));
    578 
    579     pe->index = index;
    580     pe->pd = pd;
    581     pe->locked = false;
    582     return pe;
    583 }
    584 
    585 static void page_entry_destroy(gpointer p)
    586 {
    587     struct page_entry *pe = p;
    588 
    589     g_assert(pe->locked);
    590     page_unlock(pe->pd);
    591     g_free(pe);
    592 }
    593 
    594 /* returns false on success */
    595 static bool page_entry_trylock(struct page_entry *pe)
    596 {
    597     bool busy;
    598 
    599     busy = qemu_spin_trylock(&pe->pd->lock);
    600     if (!busy) {
    601         g_assert(!pe->locked);
    602         pe->locked = true;
    603         page_lock__debug(pe->pd);
    604     }
    605     return busy;
    606 }
    607 
    608 static void do_page_entry_lock(struct page_entry *pe)
    609 {
    610     page_lock(pe->pd);
    611     g_assert(!pe->locked);
    612     pe->locked = true;
    613 }
    614 
    615 static gboolean page_entry_lock(gpointer key, gpointer value, gpointer data)
    616 {
    617     struct page_entry *pe = value;
    618 
    619     do_page_entry_lock(pe);
    620     return FALSE;
    621 }
    622 
    623 static gboolean page_entry_unlock(gpointer key, gpointer value, gpointer data)
    624 {
    625     struct page_entry *pe = value;
    626 
    627     if (pe->locked) {
    628         pe->locked = false;
    629         page_unlock(pe->pd);
    630     }
    631     return FALSE;
    632 }
    633 
    634 /*
    635  * Trylock a page, and if successful, add the page to a collection.
    636  * Returns true ("busy") if the page could not be locked; false otherwise.
    637  */
    638 static bool page_trylock_add(struct page_collection *set, tb_page_addr_t addr)
    639 {
    640     tb_page_addr_t index = addr >> TARGET_PAGE_BITS;
    641     struct page_entry *pe;
    642     PageDesc *pd;
    643 
    644     pe = g_tree_lookup(set->tree, &index);
    645     if (pe) {
    646         return false;
    647     }
    648 
    649     pd = page_find(index);
    650     if (pd == NULL) {
    651         return false;
    652     }
    653 
    654     pe = page_entry_new(pd, index);
    655     g_tree_insert(set->tree, &pe->index, pe);
    656 
    657     /*
    658      * If this is either (1) the first insertion or (2) a page whose index
    659      * is higher than any other so far, just lock the page and move on.
    660      */
    661     if (set->max == NULL || pe->index > set->max->index) {
    662         set->max = pe;
    663         do_page_entry_lock(pe);
    664         return false;
    665     }
    666     /*
    667      * Try to acquire out-of-order lock; if busy, return busy so that we acquire
    668      * locks in order.
    669      */
    670     return page_entry_trylock(pe);
    671 }
    672 
    673 static gint tb_page_addr_cmp(gconstpointer ap, gconstpointer bp, gpointer udata)
    674 {
    675     tb_page_addr_t a = *(const tb_page_addr_t *)ap;
    676     tb_page_addr_t b = *(const tb_page_addr_t *)bp;
    677 
    678     if (a == b) {
    679         return 0;
    680     } else if (a < b) {
    681         return -1;
    682     }
    683     return 1;
    684 }
    685 
    686 /*
    687  * Lock a range of pages ([@start,@end[) as well as the pages of all
    688  * intersecting TBs.
    689  * Locking order: acquire locks in ascending order of page index.
    690  */
    691 struct page_collection *
    692 page_collection_lock(tb_page_addr_t start, tb_page_addr_t end)
    693 {
    694     struct page_collection *set = g_malloc(sizeof(*set));
    695     tb_page_addr_t index;
    696     PageDesc *pd;
    697 
    698     start >>= TARGET_PAGE_BITS;
    699     end   >>= TARGET_PAGE_BITS;
    700     g_assert(start <= end);
    701 
    702     set->tree = g_tree_new_full(tb_page_addr_cmp, NULL, NULL,
    703                                 page_entry_destroy);
    704     set->max = NULL;
    705     assert_no_pages_locked();
    706 
    707  retry:
    708     g_tree_foreach(set->tree, page_entry_lock, NULL);
    709 
    710     for (index = start; index <= end; index++) {
    711         TranslationBlock *tb;
    712         int n;
    713 
    714         pd = page_find(index);
    715         if (pd == NULL) {
    716             continue;
    717         }
    718         if (page_trylock_add(set, index << TARGET_PAGE_BITS)) {
    719             g_tree_foreach(set->tree, page_entry_unlock, NULL);
    720             goto retry;
    721         }
    722         assert_page_locked(pd);
    723         PAGE_FOR_EACH_TB(pd, tb, n) {
    724             if (page_trylock_add(set, tb_page_addr0(tb)) ||
    725                 (tb_page_addr1(tb) != -1 &&
    726                  page_trylock_add(set, tb_page_addr1(tb)))) {
    727                 /* drop all locks, and reacquire in order */
    728                 g_tree_foreach(set->tree, page_entry_unlock, NULL);
    729                 goto retry;
    730             }
    731         }
    732     }
    733     return set;
    734 }
    735 
    736 void page_collection_unlock(struct page_collection *set)
    737 {
    738     /* entries are unlocked and freed via page_entry_destroy */
    739     g_tree_destroy(set->tree);
    740     g_free(set);
    741 }
    742 
    743 #endif /* !CONFIG_USER_ONLY */
    744 
    745 /*
    746  * Isolate the portion of code gen which can setjmp/longjmp.
    747  * Return the size of the generated code, or negative on error.
    748  */
    749 static int setjmp_gen_code(CPUArchState *env, TranslationBlock *tb,
    750                            target_ulong pc, void *host_pc,
    751                            int *max_insns, int64_t *ti)
    752 {
    753     int ret = sigsetjmp(tcg_ctx->jmp_trans, 0);
    754     if (unlikely(ret != 0)) {
    755         return ret;
    756     }
    757 
    758     tcg_func_start(tcg_ctx);
    759 
    760     tcg_ctx->cpu = env_cpu(env);
    761     gen_intermediate_code(env_cpu(env), tb, *max_insns, pc, host_pc);
    762     assert(tb->size != 0);
    763     tcg_ctx->cpu = NULL;
    764     *max_insns = tb->icount;
    765 
    766 #ifdef CONFIG_PROFILER
    767     qatomic_set(&tcg_ctx->prof.tb_count, tcg_ctx->prof.tb_count + 1);
    768     qatomic_set(&tcg_ctx->prof.interm_time,
    769                 tcg_ctx->prof.interm_time + profile_getclock() - *ti);
    770     *ti = profile_getclock();
    771 #endif
    772 
    773     return tcg_gen_code(tcg_ctx, tb, pc);
    774 }
    775 
    776 /* Called with mmap_lock held for user mode emulation.  */
    777 TranslationBlock *tb_gen_code(CPUState *cpu,
    778                               target_ulong pc, target_ulong cs_base,
    779                               uint32_t flags, int cflags)
    780 {
    781     CPUArchState *env = cpu->env_ptr;
    782     TranslationBlock *tb, *existing_tb;
    783     tb_page_addr_t phys_pc;
    784     tcg_insn_unit *gen_code_buf;
    785     int gen_code_size, search_size, max_insns;
    786 #ifdef CONFIG_PROFILER
    787     TCGProfile *prof = &tcg_ctx->prof;
    788 #endif
    789     int64_t ti;
    790     void *host_pc;
    791 
    792     assert_memory_lock();
    793     qemu_thread_jit_write();
    794 
    795     phys_pc = get_page_addr_code_hostp(env, pc, &host_pc);
    796 
    797     if (phys_pc == -1) {
    798         /* Generate a one-shot TB with 1 insn in it */
    799         cflags = (cflags & ~CF_COUNT_MASK) | CF_LAST_IO | 1;
    800     }
    801 
    802     max_insns = cflags & CF_COUNT_MASK;
    803     if (max_insns == 0) {
    804         max_insns = TCG_MAX_INSNS;
    805     }
    806     QEMU_BUILD_BUG_ON(CF_COUNT_MASK + 1 != TCG_MAX_INSNS);
    807 
    808  buffer_overflow:
    809     tb = tcg_tb_alloc(tcg_ctx);
    810     if (unlikely(!tb)) {
    811         /* flush must be done */
    812         tb_flush(cpu);
    813         mmap_unlock();
    814         /* Make the execution loop process the flush as soon as possible.  */
    815         cpu->exception_index = EXCP_INTERRUPT;
    816         cpu_loop_exit(cpu);
    817     }
    818 
    819     gen_code_buf = tcg_ctx->code_gen_ptr;
    820     tb->tc.ptr = tcg_splitwx_to_rx(gen_code_buf);
    821 #if !TARGET_TB_PCREL
    822     tb->pc = pc;
    823 #endif
    824     tb->cs_base = cs_base;
    825     tb->flags = flags;
    826     tb->cflags = cflags;
    827     tb->trace_vcpu_dstate = *cpu->trace_dstate;
    828     tb_set_page_addr0(tb, phys_pc);
    829     tb_set_page_addr1(tb, -1);
    830     tcg_ctx->tb_cflags = cflags;
    831  tb_overflow:
    832 
    833 #ifdef CONFIG_PROFILER
    834     /* includes aborted translations because of exceptions */
    835     qatomic_set(&prof->tb_count1, prof->tb_count1 + 1);
    836     ti = profile_getclock();
    837 #endif
    838 
    839     trace_translate_block(tb, pc, tb->tc.ptr);
    840 
    841     gen_code_size = setjmp_gen_code(env, tb, pc, host_pc, &max_insns, &ti);
    842     if (unlikely(gen_code_size < 0)) {
    843         switch (gen_code_size) {
    844         case -1:
    845             /*
    846              * Overflow of code_gen_buffer, or the current slice of it.
    847              *
    848              * TODO: We don't need to re-do gen_intermediate_code, nor
    849              * should we re-do the tcg optimization currently hidden
    850              * inside tcg_gen_code.  All that should be required is to
    851              * flush the TBs, allocate a new TB, re-initialize it per
    852              * above, and re-do the actual code generation.
    853              */
    854             qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT,
    855                           "Restarting code generation for "
    856                           "code_gen_buffer overflow\n");
    857             goto buffer_overflow;
    858 
    859         case -2:
    860             /*
    861              * The code generated for the TranslationBlock is too large.
    862              * The maximum size allowed by the unwind info is 64k.
    863              * There may be stricter constraints from relocations
    864              * in the tcg backend.
    865              *
    866              * Try again with half as many insns as we attempted this time.
    867              * If a single insn overflows, there's a bug somewhere...
    868              */
    869             assert(max_insns > 1);
    870             max_insns /= 2;
    871             qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT,
    872                           "Restarting code generation with "
    873                           "smaller translation block (max %d insns)\n",
    874                           max_insns);
    875             goto tb_overflow;
    876 
    877         default:
    878             g_assert_not_reached();
    879         }
    880     }
    881     search_size = encode_search(tb, (void *)gen_code_buf + gen_code_size);
    882     if (unlikely(search_size < 0)) {
    883         goto buffer_overflow;
    884     }
    885     tb->tc.size = gen_code_size;
    886 
    887 #ifdef CONFIG_PROFILER
    888     qatomic_set(&prof->code_time, prof->code_time + profile_getclock() - ti);
    889     qatomic_set(&prof->code_in_len, prof->code_in_len + tb->size);
    890     qatomic_set(&prof->code_out_len, prof->code_out_len + gen_code_size);
    891     qatomic_set(&prof->search_out_len, prof->search_out_len + search_size);
    892 #endif
    893 
    894 #ifdef DEBUG_DISAS
    895     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) &&
    896         qemu_log_in_addr_range(pc)) {
    897         FILE *logfile = qemu_log_trylock();
    898         if (logfile) {
    899             int code_size, data_size;
    900             const tcg_target_ulong *rx_data_gen_ptr;
    901             size_t chunk_start;
    902             int insn = 0;
    903 
    904             if (tcg_ctx->data_gen_ptr) {
    905                 rx_data_gen_ptr = tcg_splitwx_to_rx(tcg_ctx->data_gen_ptr);
    906                 code_size = (const void *)rx_data_gen_ptr - tb->tc.ptr;
    907                 data_size = gen_code_size - code_size;
    908             } else {
    909                 rx_data_gen_ptr = 0;
    910                 code_size = gen_code_size;
    911                 data_size = 0;
    912             }
    913 
    914             /* Dump header and the first instruction */
    915             fprintf(logfile, "OUT: [size=%d]\n", gen_code_size);
    916             fprintf(logfile,
    917                     "  -- guest addr 0x" TARGET_FMT_lx " + tb prologue\n",
    918                     tcg_ctx->gen_insn_data[insn][0]);
    919             chunk_start = tcg_ctx->gen_insn_end_off[insn];
    920             disas(logfile, tb->tc.ptr, chunk_start);
    921 
    922             /*
    923              * Dump each instruction chunk, wrapping up empty chunks into
    924              * the next instruction. The whole array is offset so the
    925              * first entry is the beginning of the 2nd instruction.
    926              */
    927             while (insn < tb->icount) {
    928                 size_t chunk_end = tcg_ctx->gen_insn_end_off[insn];
    929                 if (chunk_end > chunk_start) {
    930                     fprintf(logfile, "  -- guest addr 0x" TARGET_FMT_lx "\n",
    931                             tcg_ctx->gen_insn_data[insn][0]);
    932                     disas(logfile, tb->tc.ptr + chunk_start,
    933                           chunk_end - chunk_start);
    934                     chunk_start = chunk_end;
    935                 }
    936                 insn++;
    937             }
    938 
    939             if (chunk_start < code_size) {
    940                 fprintf(logfile, "  -- tb slow paths + alignment\n");
    941                 disas(logfile, tb->tc.ptr + chunk_start,
    942                       code_size - chunk_start);
    943             }
    944 
    945             /* Finally dump any data we may have after the block */
    946             if (data_size) {
    947                 int i;
    948                 fprintf(logfile, "  data: [size=%d]\n", data_size);
    949                 for (i = 0; i < data_size / sizeof(tcg_target_ulong); i++) {
    950                     if (sizeof(tcg_target_ulong) == 8) {
    951                         fprintf(logfile,
    952                                 "0x%08" PRIxPTR ":  .quad  0x%016" TCG_PRIlx "\n",
    953                                 (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]);
    954                     } else if (sizeof(tcg_target_ulong) == 4) {
    955                         fprintf(logfile,
    956                                 "0x%08" PRIxPTR ":  .long  0x%08" TCG_PRIlx "\n",
    957                                 (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]);
    958                     } else {
    959                         qemu_build_not_reached();
    960                     }
    961                 }
    962             }
    963             fprintf(logfile, "\n");
    964             qemu_log_unlock(logfile);
    965         }
    966     }
    967 #endif
    968 
    969     qatomic_set(&tcg_ctx->code_gen_ptr, (void *)
    970         ROUND_UP((uintptr_t)gen_code_buf + gen_code_size + search_size,
    971                  CODE_GEN_ALIGN));
    972 
    973     /* init jump list */
    974     qemu_spin_init(&tb->jmp_lock);
    975     tb->jmp_list_head = (uintptr_t)NULL;
    976     tb->jmp_list_next[0] = (uintptr_t)NULL;
    977     tb->jmp_list_next[1] = (uintptr_t)NULL;
    978     tb->jmp_dest[0] = (uintptr_t)NULL;
    979     tb->jmp_dest[1] = (uintptr_t)NULL;
    980 
    981     /* init original jump addresses which have been set during tcg_gen_code() */
    982     if (tb->jmp_reset_offset[0] != TB_JMP_RESET_OFFSET_INVALID) {
    983         tb_reset_jump(tb, 0);
    984     }
    985     if (tb->jmp_reset_offset[1] != TB_JMP_RESET_OFFSET_INVALID) {
    986         tb_reset_jump(tb, 1);
    987     }
    988 
    989     /*
    990      * If the TB is not associated with a physical RAM page then it must be
    991      * a temporary one-insn TB, and we have nothing left to do. Return early
    992      * before attempting to link to other TBs or add to the lookup table.
    993      */
    994     if (tb_page_addr0(tb) == -1) {
    995         return tb;
    996     }
    997 
    998     /*
    999      * Insert TB into the corresponding region tree before publishing it
   1000      * through QHT. Otherwise rewinding happened in the TB might fail to
   1001      * lookup itself using host PC.
   1002      */
   1003     tcg_tb_insert(tb);
   1004 
   1005     /*
   1006      * No explicit memory barrier is required -- tb_link_page() makes the
   1007      * TB visible in a consistent state.
   1008      */
   1009     existing_tb = tb_link_page(tb, tb_page_addr0(tb), tb_page_addr1(tb));
   1010     /* if the TB already exists, discard what we just translated */
   1011     if (unlikely(existing_tb != tb)) {
   1012         uintptr_t orig_aligned = (uintptr_t)gen_code_buf;
   1013 
   1014         orig_aligned -= ROUND_UP(sizeof(*tb), qemu_icache_linesize);
   1015         qatomic_set(&tcg_ctx->code_gen_ptr, (void *)orig_aligned);
   1016         tcg_tb_remove(tb);
   1017         return existing_tb;
   1018     }
   1019     return tb;
   1020 }
   1021 
   1022 /* user-mode: call with mmap_lock held */
   1023 void tb_check_watchpoint(CPUState *cpu, uintptr_t retaddr)
   1024 {
   1025     TranslationBlock *tb;
   1026 
   1027     assert_memory_lock();
   1028 
   1029     tb = tcg_tb_lookup(retaddr);
   1030     if (tb) {
   1031         /* We can use retranslation to find the PC.  */
   1032         cpu_restore_state_from_tb(cpu, tb, retaddr);
   1033         tb_phys_invalidate(tb, -1);
   1034     } else {
   1035         /* The exception probably happened in a helper.  The CPU state should
   1036            have been saved before calling it. Fetch the PC from there.  */
   1037         CPUArchState *env = cpu->env_ptr;
   1038         target_ulong pc, cs_base;
   1039         tb_page_addr_t addr;
   1040         uint32_t flags;
   1041 
   1042         cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
   1043         addr = get_page_addr_code(env, pc);
   1044         if (addr != -1) {
   1045             tb_invalidate_phys_range(addr, addr + 1);
   1046         }
   1047     }
   1048 }
   1049 
   1050 #ifndef CONFIG_USER_ONLY
   1051 /*
   1052  * In deterministic execution mode, instructions doing device I/Os
   1053  * must be at the end of the TB.
   1054  *
   1055  * Called by softmmu_template.h, with iothread mutex not held.
   1056  */
   1057 void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
   1058 {
   1059     TranslationBlock *tb;
   1060     CPUClass *cc;
   1061     uint32_t n;
   1062 
   1063     tb = tcg_tb_lookup(retaddr);
   1064     if (!tb) {
   1065         cpu_abort(cpu, "cpu_io_recompile: could not find TB for pc=%p",
   1066                   (void *)retaddr);
   1067     }
   1068     cpu_restore_state_from_tb(cpu, tb, retaddr);
   1069 
   1070     /*
   1071      * Some guests must re-execute the branch when re-executing a delay
   1072      * slot instruction.  When this is the case, adjust icount and N
   1073      * to account for the re-execution of the branch.
   1074      */
   1075     n = 1;
   1076     cc = CPU_GET_CLASS(cpu);
   1077     if (cc->tcg_ops->io_recompile_replay_branch &&
   1078         cc->tcg_ops->io_recompile_replay_branch(cpu, tb)) {
   1079         cpu_neg(cpu)->icount_decr.u16.low++;
   1080         n = 2;
   1081     }
   1082 
   1083     /*
   1084      * Exit the loop and potentially generate a new TB executing the
   1085      * just the I/O insns. We also limit instrumentation to memory
   1086      * operations only (which execute after completion) so we don't
   1087      * double instrument the instruction.
   1088      */
   1089     cpu->cflags_next_tb = curr_cflags(cpu) | CF_MEMI_ONLY | CF_LAST_IO | n;
   1090 
   1091     if (qemu_loglevel_mask(CPU_LOG_EXEC)) {
   1092         target_ulong pc = log_pc(cpu, tb);
   1093         if (qemu_log_in_addr_range(pc)) {
   1094             qemu_log("cpu_io_recompile: rewound execution of TB to "
   1095                      TARGET_FMT_lx "\n", pc);
   1096         }
   1097     }
   1098 
   1099     cpu_loop_exit_noexc(cpu);
   1100 }
   1101 
   1102 static void print_qht_statistics(struct qht_stats hst, GString *buf)
   1103 {
   1104     uint32_t hgram_opts;
   1105     size_t hgram_bins;
   1106     char *hgram;
   1107 
   1108     if (!hst.head_buckets) {
   1109         return;
   1110     }
   1111     g_string_append_printf(buf, "TB hash buckets     %zu/%zu "
   1112                            "(%0.2f%% head buckets used)\n",
   1113                            hst.used_head_buckets, hst.head_buckets,
   1114                            (double)hst.used_head_buckets /
   1115                            hst.head_buckets * 100);
   1116 
   1117     hgram_opts =  QDIST_PR_BORDER | QDIST_PR_LABELS;
   1118     hgram_opts |= QDIST_PR_100X   | QDIST_PR_PERCENT;
   1119     if (qdist_xmax(&hst.occupancy) - qdist_xmin(&hst.occupancy) == 1) {
   1120         hgram_opts |= QDIST_PR_NODECIMAL;
   1121     }
   1122     hgram = qdist_pr(&hst.occupancy, 10, hgram_opts);
   1123     g_string_append_printf(buf, "TB hash occupancy   %0.2f%% avg chain occ. "
   1124                            "Histogram: %s\n",
   1125                            qdist_avg(&hst.occupancy) * 100, hgram);
   1126     g_free(hgram);
   1127 
   1128     hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS;
   1129     hgram_bins = qdist_xmax(&hst.chain) - qdist_xmin(&hst.chain);
   1130     if (hgram_bins > 10) {
   1131         hgram_bins = 10;
   1132     } else {
   1133         hgram_bins = 0;
   1134         hgram_opts |= QDIST_PR_NODECIMAL | QDIST_PR_NOBINRANGE;
   1135     }
   1136     hgram = qdist_pr(&hst.chain, hgram_bins, hgram_opts);
   1137     g_string_append_printf(buf, "TB hash avg chain   %0.3f buckets. "
   1138                            "Histogram: %s\n",
   1139                            qdist_avg(&hst.chain), hgram);
   1140     g_free(hgram);
   1141 }
   1142 
   1143 struct tb_tree_stats {
   1144     size_t nb_tbs;
   1145     size_t host_size;
   1146     size_t target_size;
   1147     size_t max_target_size;
   1148     size_t direct_jmp_count;
   1149     size_t direct_jmp2_count;
   1150     size_t cross_page;
   1151 };
   1152 
   1153 static gboolean tb_tree_stats_iter(gpointer key, gpointer value, gpointer data)
   1154 {
   1155     const TranslationBlock *tb = value;
   1156     struct tb_tree_stats *tst = data;
   1157 
   1158     tst->nb_tbs++;
   1159     tst->host_size += tb->tc.size;
   1160     tst->target_size += tb->size;
   1161     if (tb->size > tst->max_target_size) {
   1162         tst->max_target_size = tb->size;
   1163     }
   1164     if (tb_page_addr1(tb) != -1) {
   1165         tst->cross_page++;
   1166     }
   1167     if (tb->jmp_reset_offset[0] != TB_JMP_RESET_OFFSET_INVALID) {
   1168         tst->direct_jmp_count++;
   1169         if (tb->jmp_reset_offset[1] != TB_JMP_RESET_OFFSET_INVALID) {
   1170             tst->direct_jmp2_count++;
   1171         }
   1172     }
   1173     return false;
   1174 }
   1175 
   1176 void dump_exec_info(GString *buf)
   1177 {
   1178     struct tb_tree_stats tst = {};
   1179     struct qht_stats hst;
   1180     size_t nb_tbs, flush_full, flush_part, flush_elide;
   1181 
   1182     tcg_tb_foreach(tb_tree_stats_iter, &tst);
   1183     nb_tbs = tst.nb_tbs;
   1184     /* XXX: avoid using doubles ? */
   1185     g_string_append_printf(buf, "Translation buffer state:\n");
   1186     /*
   1187      * Report total code size including the padding and TB structs;
   1188      * otherwise users might think "-accel tcg,tb-size" is not honoured.
   1189      * For avg host size we use the precise numbers from tb_tree_stats though.
   1190      */
   1191     g_string_append_printf(buf, "gen code size       %zu/%zu\n",
   1192                            tcg_code_size(), tcg_code_capacity());
   1193     g_string_append_printf(buf, "TB count            %zu\n", nb_tbs);
   1194     g_string_append_printf(buf, "TB avg target size  %zu max=%zu bytes\n",
   1195                            nb_tbs ? tst.target_size / nb_tbs : 0,
   1196                            tst.max_target_size);
   1197     g_string_append_printf(buf, "TB avg host size    %zu bytes "
   1198                            "(expansion ratio: %0.1f)\n",
   1199                            nb_tbs ? tst.host_size / nb_tbs : 0,
   1200                            tst.target_size ?
   1201                            (double)tst.host_size / tst.target_size : 0);
   1202     g_string_append_printf(buf, "cross page TB count %zu (%zu%%)\n",
   1203                            tst.cross_page,
   1204                            nb_tbs ? (tst.cross_page * 100) / nb_tbs : 0);
   1205     g_string_append_printf(buf, "direct jump count   %zu (%zu%%) "
   1206                            "(2 jumps=%zu %zu%%)\n",
   1207                            tst.direct_jmp_count,
   1208                            nb_tbs ? (tst.direct_jmp_count * 100) / nb_tbs : 0,
   1209                            tst.direct_jmp2_count,
   1210                            nb_tbs ? (tst.direct_jmp2_count * 100) / nb_tbs : 0);
   1211 
   1212     qht_statistics_init(&tb_ctx.htable, &hst);
   1213     print_qht_statistics(hst, buf);
   1214     qht_statistics_destroy(&hst);
   1215 
   1216     g_string_append_printf(buf, "\nStatistics:\n");
   1217     g_string_append_printf(buf, "TB flush count      %u\n",
   1218                            qatomic_read(&tb_ctx.tb_flush_count));
   1219     g_string_append_printf(buf, "TB invalidate count %u\n",
   1220                            qatomic_read(&tb_ctx.tb_phys_invalidate_count));
   1221 
   1222     tlb_flush_counts(&flush_full, &flush_part, &flush_elide);
   1223     g_string_append_printf(buf, "TLB full flushes    %zu\n", flush_full);
   1224     g_string_append_printf(buf, "TLB partial flushes %zu\n", flush_part);
   1225     g_string_append_printf(buf, "TLB elided flushes  %zu\n", flush_elide);
   1226     tcg_dump_info(buf);
   1227 }
   1228 
   1229 #else /* CONFIG_USER_ONLY */
   1230 
   1231 void cpu_interrupt(CPUState *cpu, int mask)
   1232 {
   1233     g_assert(qemu_mutex_iothread_locked());
   1234     cpu->interrupt_request |= mask;
   1235     qatomic_set(&cpu_neg(cpu)->icount_decr.u16.high, -1);
   1236 }
   1237 
   1238 /*
   1239  * Walks guest process memory "regions" one by one
   1240  * and calls callback function 'fn' for each region.
   1241  */
   1242 struct walk_memory_regions_data {
   1243     walk_memory_regions_fn fn;
   1244     void *priv;
   1245     target_ulong start;
   1246     int prot;
   1247 };
   1248 
   1249 static int walk_memory_regions_end(struct walk_memory_regions_data *data,
   1250                                    target_ulong end, int new_prot)
   1251 {
   1252     if (data->start != -1u) {
   1253         int rc = data->fn(data->priv, data->start, end, data->prot);
   1254         if (rc != 0) {
   1255             return rc;
   1256         }
   1257     }
   1258 
   1259     data->start = (new_prot ? end : -1u);
   1260     data->prot = new_prot;
   1261 
   1262     return 0;
   1263 }
   1264 
   1265 static int walk_memory_regions_1(struct walk_memory_regions_data *data,
   1266                                  target_ulong base, int level, void **lp)
   1267 {
   1268     target_ulong pa;
   1269     int i, rc;
   1270 
   1271     if (*lp == NULL) {
   1272         return walk_memory_regions_end(data, base, 0);
   1273     }
   1274 
   1275     if (level == 0) {
   1276         PageDesc *pd = *lp;
   1277 
   1278         for (i = 0; i < V_L2_SIZE; ++i) {
   1279             int prot = pd[i].flags;
   1280 
   1281             pa = base | (i << TARGET_PAGE_BITS);
   1282             if (prot != data->prot) {
   1283                 rc = walk_memory_regions_end(data, pa, prot);
   1284                 if (rc != 0) {
   1285                     return rc;
   1286                 }
   1287             }
   1288         }
   1289     } else {
   1290         void **pp = *lp;
   1291 
   1292         for (i = 0; i < V_L2_SIZE; ++i) {
   1293             pa = base | ((target_ulong)i <<
   1294                 (TARGET_PAGE_BITS + V_L2_BITS * level));
   1295             rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
   1296             if (rc != 0) {
   1297                 return rc;
   1298             }
   1299         }
   1300     }
   1301 
   1302     return 0;
   1303 }
   1304 
   1305 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
   1306 {
   1307     struct walk_memory_regions_data data;
   1308     uintptr_t i, l1_sz = v_l1_size;
   1309 
   1310     data.fn = fn;
   1311     data.priv = priv;
   1312     data.start = -1u;
   1313     data.prot = 0;
   1314 
   1315     for (i = 0; i < l1_sz; i++) {
   1316         target_ulong base = i << (v_l1_shift + TARGET_PAGE_BITS);
   1317         int rc = walk_memory_regions_1(&data, base, v_l2_levels, l1_map + i);
   1318         if (rc != 0) {
   1319             return rc;
   1320         }
   1321     }
   1322 
   1323     return walk_memory_regions_end(&data, 0, 0);
   1324 }
   1325 
   1326 static int dump_region(void *priv, target_ulong start,
   1327     target_ulong end, unsigned long prot)
   1328 {
   1329     FILE *f = (FILE *)priv;
   1330 
   1331     (void) fprintf(f, TARGET_FMT_lx"-"TARGET_FMT_lx
   1332         " "TARGET_FMT_lx" %c%c%c\n",
   1333         start, end, end - start,
   1334         ((prot & PAGE_READ) ? 'r' : '-'),
   1335         ((prot & PAGE_WRITE) ? 'w' : '-'),
   1336         ((prot & PAGE_EXEC) ? 'x' : '-'));
   1337 
   1338     return 0;
   1339 }
   1340 
   1341 /* dump memory mappings */
   1342 void page_dump(FILE *f)
   1343 {
   1344     const int length = sizeof(target_ulong) * 2;
   1345     (void) fprintf(f, "%-*s %-*s %-*s %s\n",
   1346             length, "start", length, "end", length, "size", "prot");
   1347     walk_memory_regions(f, dump_region);
   1348 }
   1349 
   1350 int page_get_flags(target_ulong address)
   1351 {
   1352     PageDesc *p;
   1353 
   1354     p = page_find(address >> TARGET_PAGE_BITS);
   1355     if (!p) {
   1356         return 0;
   1357     }
   1358     return p->flags;
   1359 }
   1360 
   1361 /*
   1362  * Allow the target to decide if PAGE_TARGET_[12] may be reset.
   1363  * By default, they are not kept.
   1364  */
   1365 #ifndef PAGE_TARGET_STICKY
   1366 #define PAGE_TARGET_STICKY  0
   1367 #endif
   1368 #define PAGE_STICKY  (PAGE_ANON | PAGE_PASSTHROUGH | PAGE_TARGET_STICKY)
   1369 
   1370 /* Modify the flags of a page and invalidate the code if necessary.
   1371    The flag PAGE_WRITE_ORG is positioned automatically depending
   1372    on PAGE_WRITE.  The mmap_lock should already be held.  */
   1373 void page_set_flags(target_ulong start, target_ulong end, int flags)
   1374 {
   1375     target_ulong addr, len;
   1376     bool reset, inval_tb = false;
   1377 
   1378     /* This function should never be called with addresses outside the
   1379        guest address space.  If this assert fires, it probably indicates
   1380        a missing call to h2g_valid.  */
   1381     assert(end - 1 <= GUEST_ADDR_MAX);
   1382     assert(start < end);
   1383     /* Only set PAGE_ANON with new mappings. */
   1384     assert(!(flags & PAGE_ANON) || (flags & PAGE_RESET));
   1385     assert_memory_lock();
   1386 
   1387     start = start & TARGET_PAGE_MASK;
   1388     end = TARGET_PAGE_ALIGN(end);
   1389 
   1390     if (flags & PAGE_WRITE) {
   1391         flags |= PAGE_WRITE_ORG;
   1392     }
   1393     reset = !(flags & PAGE_VALID) || (flags & PAGE_RESET);
   1394     if (reset) {
   1395         page_reset_target_data(start, end);
   1396     }
   1397     flags &= ~PAGE_RESET;
   1398 
   1399     for (addr = start, len = end - start;
   1400          len != 0;
   1401          len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
   1402         PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, true);
   1403 
   1404         /*
   1405          * If the page was executable, but is reset, or is no longer
   1406          * executable, or has become writable, then invalidate any code.
   1407          */
   1408         if ((p->flags & PAGE_EXEC)
   1409             && (reset ||
   1410                 !(flags & PAGE_EXEC) ||
   1411                 (flags & ~p->flags & PAGE_WRITE))) {
   1412             inval_tb = true;
   1413         }
   1414         /* Using mprotect on a page does not change sticky bits. */
   1415         p->flags = (reset ? 0 : p->flags & PAGE_STICKY) | flags;
   1416     }
   1417 
   1418     if (inval_tb) {
   1419         tb_invalidate_phys_range(start, end);
   1420     }
   1421 }
   1422 
   1423 int page_check_range(target_ulong start, target_ulong len, int flags)
   1424 {
   1425     PageDesc *p;
   1426     target_ulong end;
   1427     target_ulong addr;
   1428 
   1429     /* This function should never be called with addresses outside the
   1430        guest address space.  If this assert fires, it probably indicates
   1431        a missing call to h2g_valid.  */
   1432     if (TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS) {
   1433         assert(start < ((target_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
   1434     }
   1435 
   1436     if (len == 0) {
   1437         return 0;
   1438     }
   1439     if (start + len - 1 < start) {
   1440         /* We've wrapped around.  */
   1441         return -1;
   1442     }
   1443 
   1444     /* must do before we loose bits in the next step */
   1445     end = TARGET_PAGE_ALIGN(start + len);
   1446     start = start & TARGET_PAGE_MASK;
   1447 
   1448     for (addr = start, len = end - start;
   1449          len != 0;
   1450          len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
   1451         p = page_find(addr >> TARGET_PAGE_BITS);
   1452         if (!p) {
   1453             return -1;
   1454         }
   1455         if (!(p->flags & PAGE_VALID)) {
   1456             return -1;
   1457         }
   1458 
   1459         if ((flags & PAGE_READ) && !(p->flags & PAGE_READ)) {
   1460             return -1;
   1461         }
   1462         if (flags & PAGE_WRITE) {
   1463             if (!(p->flags & PAGE_WRITE_ORG)) {
   1464                 return -1;
   1465             }
   1466             /* unprotect the page if it was put read-only because it
   1467                contains translated code */
   1468             if (!(p->flags & PAGE_WRITE)) {
   1469                 if (!page_unprotect(addr, 0)) {
   1470                     return -1;
   1471                 }
   1472             }
   1473         }
   1474     }
   1475     return 0;
   1476 }
   1477 
   1478 void page_protect(tb_page_addr_t page_addr)
   1479 {
   1480     target_ulong addr;
   1481     PageDesc *p;
   1482     int prot;
   1483 
   1484     p = page_find(page_addr >> TARGET_PAGE_BITS);
   1485     if (p && (p->flags & PAGE_WRITE)) {
   1486         /*
   1487          * Force the host page as non writable (writes will have a page fault +
   1488          * mprotect overhead).
   1489          */
   1490         page_addr &= qemu_host_page_mask;
   1491         prot = 0;
   1492         for (addr = page_addr; addr < page_addr + qemu_host_page_size;
   1493              addr += TARGET_PAGE_SIZE) {
   1494 
   1495             p = page_find(addr >> TARGET_PAGE_BITS);
   1496             if (!p) {
   1497                 continue;
   1498             }
   1499             prot |= p->flags;
   1500             p->flags &= ~PAGE_WRITE;
   1501         }
   1502         mprotect(g2h_untagged(page_addr), qemu_host_page_size,
   1503                  (prot & PAGE_BITS) & ~PAGE_WRITE);
   1504     }
   1505 }
   1506 
   1507 /* called from signal handler: invalidate the code and unprotect the
   1508  * page. Return 0 if the fault was not handled, 1 if it was handled,
   1509  * and 2 if it was handled but the caller must cause the TB to be
   1510  * immediately exited. (We can only return 2 if the 'pc' argument is
   1511  * non-zero.)
   1512  */
   1513 int page_unprotect(target_ulong address, uintptr_t pc)
   1514 {
   1515     unsigned int prot;
   1516     bool current_tb_invalidated;
   1517     PageDesc *p;
   1518     target_ulong host_start, host_end, addr;
   1519 
   1520     /* Technically this isn't safe inside a signal handler.  However we
   1521        know this only ever happens in a synchronous SEGV handler, so in
   1522        practice it seems to be ok.  */
   1523     mmap_lock();
   1524 
   1525     p = page_find(address >> TARGET_PAGE_BITS);
   1526     if (!p) {
   1527         mmap_unlock();
   1528         return 0;
   1529     }
   1530 
   1531     /* if the page was really writable, then we change its
   1532        protection back to writable */
   1533     if (p->flags & PAGE_WRITE_ORG) {
   1534         current_tb_invalidated = false;
   1535         if (p->flags & PAGE_WRITE) {
   1536             /* If the page is actually marked WRITE then assume this is because
   1537              * this thread raced with another one which got here first and
   1538              * set the page to PAGE_WRITE and did the TB invalidate for us.
   1539              */
   1540 #ifdef TARGET_HAS_PRECISE_SMC
   1541             TranslationBlock *current_tb = tcg_tb_lookup(pc);
   1542             if (current_tb) {
   1543                 current_tb_invalidated = tb_cflags(current_tb) & CF_INVALID;
   1544             }
   1545 #endif
   1546         } else {
   1547             host_start = address & qemu_host_page_mask;
   1548             host_end = host_start + qemu_host_page_size;
   1549 
   1550             prot = 0;
   1551             for (addr = host_start; addr < host_end; addr += TARGET_PAGE_SIZE) {
   1552                 p = page_find(addr >> TARGET_PAGE_BITS);
   1553                 p->flags |= PAGE_WRITE;
   1554                 prot |= p->flags;
   1555 
   1556                 /* and since the content will be modified, we must invalidate
   1557                    the corresponding translated code. */
   1558                 current_tb_invalidated |=
   1559                     tb_invalidate_phys_page_unwind(addr, pc);
   1560             }
   1561             mprotect((void *)g2h_untagged(host_start), qemu_host_page_size,
   1562                      prot & PAGE_BITS);
   1563         }
   1564         mmap_unlock();
   1565         /* If current TB was invalidated return to main loop */
   1566         return current_tb_invalidated ? 2 : 1;
   1567     }
   1568     mmap_unlock();
   1569     return 0;
   1570 }
   1571 #endif /* CONFIG_USER_ONLY */
   1572 
   1573 /*
   1574  * Called by generic code at e.g. cpu reset after cpu creation,
   1575  * therefore we must be prepared to allocate the jump cache.
   1576  */
   1577 void tcg_flush_jmp_cache(CPUState *cpu)
   1578 {
   1579     CPUJumpCache *jc = cpu->tb_jmp_cache;
   1580 
   1581     /* During early initialization, the cache may not yet be allocated. */
   1582     if (unlikely(jc == NULL)) {
   1583         return;
   1584     }
   1585 
   1586     for (int i = 0; i < TB_JMP_CACHE_SIZE; i++) {
   1587         qatomic_set(&jc->array[i].tb, NULL);
   1588     }
   1589 }
   1590 
   1591 /* This is a wrapper for common code that can not use CONFIG_SOFTMMU */
   1592 void tcg_flush_softmmu_tlb(CPUState *cs)
   1593 {
   1594 #ifdef CONFIG_SOFTMMU
   1595     tlb_flush(cs);
   1596 #endif
   1597 }