Introduce a complete set of operations on TCGTempSet, and do not directly call <qemu/bitops.h> functions. Expand the array as necessary on SET. Use the tcg allocation pool so that we do not have to worry about explicitly freeing the array.
Signed-off-by: Richard Henderson <richard.hender...@linaro.org> --- include/tcg/tcg.h | 28 +++++++++++++++++++++++++- tcg/optimize.c | 18 ++++++++--------- tcg/tcg.c | 51 ++++++++++++++++++++++++++++++++++++++++------- 3 files changed, 80 insertions(+), 17 deletions(-) diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h index 0d90701dcd..4d001fed39 100644 --- a/include/tcg/tcg.h +++ b/include/tcg/tcg.h @@ -524,9 +524,35 @@ typedef struct TCGTemp { typedef struct TCGContext TCGContext; typedef struct TCGTempSet { - unsigned long l[BITS_TO_LONGS(TCG_MAX_TEMPS)]; + unsigned long *data; + size_t word_len; } TCGTempSet; +void tempset_init(TCGTempSet *set, size_t len); +bool tempset_find_first(const TCGTempSet *set, size_t *i); +void tempset_set(TCGTempSet *set, size_t i); + +static inline void tempset_clear_all(TCGTempSet *set) +{ + memset(set->data, 0, set->word_len * sizeof(unsigned long)); +} + +static inline void tempset_clear(TCGTempSet *set, size_t i) +{ + size_t l = i / BITS_PER_LONG; + size_t b = i % BITS_PER_LONG; + if (likely(l < set->word_len)) { + set->data[l] &= ~BIT(b); + } +} + +static inline bool tempset_test(const TCGTempSet *set, size_t i) +{ + size_t l = i / BITS_PER_LONG; + size_t b = i % BITS_PER_LONG; + return l < set->word_len && (set->data[l] & BIT(b)); +} + /* While we limit helpers to 6 arguments, for 32-bit hosts, with padding, this imples a max of 6*2 (64-bit in) + 2 (64-bit out) = 14 operands. There are never more than 2 outputs, which means that we can store all diff --git a/tcg/optimize.c b/tcg/optimize.c index 2aa491605e..b0ecef1fb6 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -94,10 +94,10 @@ static void init_ts_info(TCGTempSet *temps_used, TCGTemp *ts) size_t idx = temp_idx(ts); TempOptInfo *ti; - if (test_bit(idx, temps_used->l)) { + if (tempset_test(temps_used, idx)) { return; } - set_bit(idx, temps_used->l); + tempset_set(temps_used, idx); ti = ts->state_ptr; if (ti == NULL) { @@ -612,7 +612,7 @@ void tcg_optimize(TCGContext *s) nb_temps = s->nb_temps; nb_globals = s->nb_globals; - memset(&temps_used, 0, sizeof(temps_used)); + tempset_init(&temps_used, nb_temps); for (i = 0; i < nb_temps; ++i) { TCGTemp *ts = tcg_temp(s, i); ts->state_ptr = NULL; @@ -1254,7 +1254,7 @@ void tcg_optimize(TCGContext *s) op->args[1], op->args[2]); if (tmp != 2) { if (tmp) { - memset(&temps_used, 0, sizeof(temps_used)); + tempset_clear_all(&temps_used); op->opc = INDEX_op_br; op->args[0] = op->args[3]; } else { @@ -1338,7 +1338,7 @@ void tcg_optimize(TCGContext *s) if (tmp != 2) { if (tmp) { do_brcond_true: - memset(&temps_used, 0, sizeof(temps_used)); + tempset_clear_all(&temps_used); op->opc = INDEX_op_br; op->args[0] = op->args[5]; } else { @@ -1354,7 +1354,7 @@ void tcg_optimize(TCGContext *s) /* Simplify LT/GE comparisons vs zero to a single compare vs the high word of the input. */ do_brcond_high: - memset(&temps_used, 0, sizeof(temps_used)); + tempset_clear_all(&temps_used); op->opc = INDEX_op_brcond_i32; op->args[0] = op->args[1]; op->args[1] = op->args[3]; @@ -1380,7 +1380,7 @@ void tcg_optimize(TCGContext *s) goto do_default; } do_brcond_low: - memset(&temps_used, 0, sizeof(temps_used)); + tempset_clear_all(&temps_used); op->opc = INDEX_op_brcond_i32; op->args[1] = op->args[2]; op->args[2] = op->args[4]; @@ -1485,7 +1485,7 @@ void tcg_optimize(TCGContext *s) if (!(op->args[nb_oargs + nb_iargs + 1] & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) { for (i = 0; i < nb_globals; i++) { - if (test_bit(i, temps_used.l)) { + if (tempset_test(&temps_used, i)) { reset_ts(tcg_temp(s, i)); } } @@ -1500,7 +1500,7 @@ void tcg_optimize(TCGContext *s) block, otherwise we only trash the output args. "mask" is the non-zero bits mask for the first output arg. */ if (def->flags & TCG_OPF_BB_END) { - memset(&temps_used, 0, sizeof(temps_used)); + tempset_clear_all(&temps_used); } else { do_reset_output: for (i = 0; i < nb_oargs; i++) { diff --git a/tcg/tcg.c b/tcg/tcg.c index 7284209cff..a505457cee 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1182,7 +1182,9 @@ void tcg_func_start(TCGContext *s) s->nb_temps = s->nb_globals; /* No temps have been previously allocated for size or locality. */ - memset(s->free_temps, 0, sizeof(s->free_temps)); + for (int i = 0; i < ARRAY_SIZE(s->free_temps); ++i) { + tempset_init(&s->free_temps[i], TCG_MAX_TEMPS); + } /* No constant temps have been previously allocated. */ for (int i = 0; i < TCG_TYPE_COUNT; ++i) { @@ -1324,13 +1326,12 @@ TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local) TCGContext *s = tcg_ctx; TCGTempKind kind = temp_local ? TEMP_LOCAL : TEMP_NORMAL; TCGTemp *ts; - int idx, k; + size_t idx, k; k = type + (temp_local ? TCG_TYPE_COUNT : 0); - idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS); - if (idx < TCG_MAX_TEMPS) { + if (tempset_find_first(&s->free_temps[k], &idx)) { /* There is already an available temp with the right type. */ - clear_bit(idx, s->free_temps[k].l); + tempset_clear(&s->free_temps[k], idx); ts = tcg_temp(s, idx); ts->temp_allocated = 1; @@ -1403,7 +1404,7 @@ TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match) void tcg_temp_free_internal(TCGTemp *ts) { TCGContext *s = tcg_ctx; - int k, idx; + size_t k, idx; /* In order to simplify users of tcg_constant_*, silently ignore free. */ if (ts->kind == TEMP_CONST) { @@ -1423,7 +1424,7 @@ void tcg_temp_free_internal(TCGTemp *ts) idx = temp_idx(ts); k = ts->base_type + (ts->kind == TEMP_NORMAL ? 0 : TCG_TYPE_COUNT); - set_bit(idx, s->free_temps[k].l); + tempset_set(&s->free_temps[k], idx); } TCGTemp *tcg_constant_internal(TCGType type, int64_t val) @@ -4665,6 +4666,42 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb) return tcg_current_code_size(s); } +void tempset_init(TCGTempSet *set, size_t len) +{ + size_t word_len = BITS_TO_LONGS(len); + + set->word_len = word_len; + set->data = tcg_malloc(word_len * sizeof(unsigned long)); + memset(set->data, 0, word_len * sizeof(unsigned long)); +} + +void tempset_set(TCGTempSet *set, size_t i) +{ + size_t l = i / BITS_PER_LONG; + size_t b = i % BITS_PER_LONG; + + if (l >= set->word_len) { + size_t old_blen = set->word_len * sizeof(unsigned long); + size_t new_wlen = set->word_len * 2; + unsigned long *new_data = tcg_malloc(old_blen * 2); + + memcpy(new_data, set->data, old_blen); + memset((char *)new_data + old_blen, 0, old_blen); + + set->data = new_data; + set->word_len = new_wlen; + } + set->data[l] |= BIT(b); +} + +bool tempset_find_first(const TCGTempSet *set, size_t *i) +{ + size_t max = set->word_len * BITS_PER_LONG; + size_t ret = find_first_bit(set->data, max); + *i = ret; + return ret < max; +} + #ifdef CONFIG_PROFILER void tcg_dump_info(void) { -- 2.25.1