> -----Original Message-----
> From: Brian Cain <brian.c...@oss.qualcomm.com>
> Sent: Monday, March 3, 2025 10:24 AM
> To: qemu-devel@nongnu.org
> Cc: richard.hender...@linaro.org; phi...@linaro.org;
> quic_mathb...@quicinc.com; a...@rev.ng; a...@rev.ng;
> quic_mlie...@quicinc.com; ltaylorsimp...@gmail.com;
> alex.ben...@linaro.org; quic_mbur...@quicinc.com;
> sidn...@quicinc.com; Brian Cain <bc...@quicinc.com>
> Subject: Re: [PATCH 34/39] target/hexagon: Add TLB, k0 {un,}lock
>
>
> On 2/28/2025 11:28 PM, Brian Cain wrote:
> > From: Brian Cain <bc...@quicinc.com>
> >
> > Signed-off-by: Brian Cain <brian.c...@oss.qualcomm.com>
> > ---
> > target/hexagon/sys_macros.h | 8 +--
> > target/hexagon/op_helper.c | 104
> ++++++++++++++++++++++++++++++++++++
> > 2 files changed, 108 insertions(+), 4 deletions(-)
> >
> > diff --git a/target/hexagon/sys_macros.h b/target/hexagon/sys_macros.h
> > index 3c4c3c7aa5..e5dc1ce0ab 100644
> > --- a/target/hexagon/sys_macros.h
> > +++ b/target/hexagon/sys_macros.h
> > @@ -143,11 +143,11 @@
> > #define fDCINVIDX(REG)
> > #define fDCINVA(REG) do { REG = REG; } while (0) /* Nothing to do in
> > qemu */
> >
> > -#define fSET_TLB_LOCK() g_assert_not_reached()
> > -#define fCLEAR_TLB_LOCK() g_assert_not_reached()
> > +#define fSET_TLB_LOCK() hex_tlb_lock(env);
> > +#define fCLEAR_TLB_LOCK() hex_tlb_unlock(env);
> >
> > -#define fSET_K0_LOCK() g_assert_not_reached()
> > -#define fCLEAR_K0_LOCK() g_assert_not_reached()
> > +#define fSET_K0_LOCK() hex_k0_lock(env);
> > +#define fCLEAR_K0_LOCK() hex_k0_unlock(env);
> >
> > #define fTLB_IDXMASK(INDEX) \
> > ((INDEX) & (fPOW2_ROUNDUP(fCAST4u(env_archcpu(env)-
> >num_tlbs)) -
> > 1)) diff --git a/target/hexagon/op_helper.c
> > b/target/hexagon/op_helper.c index 702c3dd3c6..f3b14fbf58 100644
> > --- a/target/hexagon/op_helper.c
> > +++ b/target/hexagon/op_helper.c
> > @@ -1184,6 +1184,110 @@ void HELPER(modify_ssr)(CPUHexagonState
> *env, uint32_t new, uint32_t old)
> > BQL_LOCK_GUARD();
> > hexagon_modify_ssr(env, new, old);
> > }
> > +
> > +static void hex_k0_lock(CPUHexagonState *env) {
> > + BQL_LOCK_GUARD();
> > + g_assert((env->k0_lock_count == 0) || (env->k0_lock_count == 1));
> > +
> > + uint32_t syscfg = arch_get_system_reg(env, HEX_SREG_SYSCFG);
Minor nit - registers should be target_ulong type.
> > + if (GET_SYSCFG_FIELD(SYSCFG_K0LOCK, syscfg)) {
> > + if (env->k0_lock_state == HEX_LOCK_QUEUED) {
Are HEX_LOCK_* defined in an earlier patch in this series. Can we move the
definitions here?
> > + env->next_PC += 4;
> > + env->k0_lock_count++;
> > + env->k0_lock_state = HEX_LOCK_OWNER;
> > + SET_SYSCFG_FIELD(env, SYSCFG_K0LOCK, 1);
> > + return;
> > + }
> > + if (env->k0_lock_state == HEX_LOCK_OWNER) {
> > + qemu_log_mask(LOG_GUEST_ERROR,
> > + "Double k0lock at PC: 0x%x, thread may hang\n",
> > + env->next_PC);
> > + env->next_PC += 4;
> > + CPUState *cs = env_cpu(env);
> > + cpu_interrupt(cs, CPU_INTERRUPT_HALT);
> > + return;
> > + }
> > + env->k0_lock_state = HEX_LOCK_WAITING;
> > + CPUState *cs = env_cpu(env);
> > + cpu_interrupt(cs, CPU_INTERRUPT_HALT);
> > + } else {
> > + env->next_PC += 4;
> > + env->k0_lock_count++;
> > + env->k0_lock_state = HEX_LOCK_OWNER;
> > + SET_SYSCFG_FIELD(env, SYSCFG_K0LOCK, 1);
> > + }
> > +
> > +}
>
> This was discussed previously at
> https://lore.kernel.org/qemu-
> devel/CH3PR02MB102479550F96F09E0C9D50BA7B87B2@CH3PR02MB10247.n
> amprd02.prod.outlook.com/
>
> We have taken some but not all of the suggestions from then. One of our
> concerns is regarding an architectural requirement for "fairness" with regards
> to picking the hardware thread to be selected to pass the lock to. If we
> unleash the thundering herd, does this just mean that the fairness is
> dependent on the host scheduler design / configuration?
>
> Also - I note that we didn't take the suggestions regarding cpu_loop_exit /
> cpu_loop_exit_restore. That was an oversight, the next revision will include
> that update.
>
> > +
> > +static void hex_k0_unlock(CPUHexagonState *env) {
> > + BQL_LOCK_GUARD();
> > + g_assert((env->k0_lock_count == 0) || (env->k0_lock_count == 1));
> > +
> > + /* Nothing to do if the k0 isn't locked by this thread */
> > + uint32_t syscfg = arch_get_system_reg(env, HEX_SREG_SYSCFG);
target_ulong
> > + if ((GET_SYSCFG_FIELD(SYSCFG_K0LOCK, syscfg) == 0) ||
> > + (env->k0_lock_state != HEX_LOCK_OWNER)) {
> > + qemu_log_mask(LOG_GUEST_ERROR,
> > + "thread %d attempted to unlock k0 without having the
> > "
> > + "lock, k0_lock state = %d, syscfg:k0 = %d\n",
> > + env->threadId, env->k0_lock_state,
> > + GET_SYSCFG_FIELD(SYSCFG_K0LOCK, syscfg));
> > + g_assert(env->k0_lock_state != HEX_LOCK_WAITING);
> > + return;
> > + }
> > +
> > + env->k0_lock_count--;
> > + env->k0_lock_state = HEX_LOCK_UNLOCKED;
> > + SET_SYSCFG_FIELD(env, SYSCFG_K0LOCK, 0);
> > +
> > + /* Look for a thread to unlock */
> > + unsigned int this_threadId = env->threadId;
> > + CPUHexagonState *unlock_thread = NULL;
> > + CPUState *cs;
> > + CPU_FOREACH(cs) {
> > + CPUHexagonState *thread = cpu_env(cs);
> > +
> > + /*
> > + * The hardware implements round-robin fairness, so we look for
> threads
> > + * starting at env->threadId + 1 and incrementing modulo the number
> of
> > + * threads.
> > + *
> > + * To implement this, we check if thread is a earlier in the modulo
> > + * sequence than unlock_thread.
> > + * if unlock thread is higher than this thread
> > + * thread must be between this thread and unlock_thread
> > + * else
> > + * thread higher than this thread is ahead of unlock_thread
> > + * thread must be lower then unlock thread
> > + */
> > + if (thread->k0_lock_state == HEX_LOCK_WAITING) {
> > + if (!unlock_thread) {
> > + unlock_thread = thread;
> > + } else if (unlock_thread->threadId > this_threadId) {
> > + if (this_threadId < thread->threadId &&
> > + thread->threadId < unlock_thread->threadId) {
> > + unlock_thread = thread;
> > + }
> > + } else {
> > + if (thread->threadId > this_threadId) {
> > + unlock_thread = thread;
> > + }
> > + if (thread->threadId < unlock_thread->threadId) {
> > + unlock_thread = thread;
> > + }
> > + }
> > + }
> > + }
> > + if (unlock_thread) {
> > + cs = env_cpu(unlock_thread);
> > + unlock_thread->k0_lock_state = HEX_LOCK_QUEUED;
> > + SET_SYSCFG_FIELD(unlock_thread, SYSCFG_K0LOCK, 1);
> > + cpu_interrupt(cs, CPU_INTERRUPT_K0_UNLOCK);
> > + }
> > +
> > +}
> > #endif
> >
> >