>>> On 01.02.18 at 21:53, <andrew.coop...@citrix.com> wrote:
> On 07/12/17 14:03, Jan Beulich wrote:
>> @@ -2805,13 +2808,17 @@ x86_decode(
>>              ea.type = OP_MEM;
>>              if ( modrm_rm == 4 )
>>              {
>> -                sib = insn_fetch_type(uint8_t);
>> -                sib_index = ((sib >> 3) & 7) | ((rex_prefix << 2) & 8);
>> -                sib_base  = (sib & 7) | ((rex_prefix << 3) & 8);
>> -                if ( sib_index != 4 && !(d & vSIB) )
>> -                    ea.mem.off = *decode_register(sib_index, state->regs,
>> -                                                  false);
>> -                ea.mem.off <<= (sib >> 6) & 3;
>> +                uint8_t sib = insn_fetch_type(uint8_t);
>> +                uint8_t sib_base = (sib & 7) | ((rex_prefix << 3) & 8);
>> +
>> +                state->sib_index = ((sib >> 3) & 7) | ((rex_prefix << 2) & 
>> 8);
>> +                state->sib_scale = (sib >> 6) & 3;
>> +                if ( state->sib_index != 4 && !(d & vSIB) )
>> +                {
>> +                    ea.mem.off = *decode_register(state->sib_index,
>> +                                                  state->regs, false);
>> +                    ea.mem.off <<= state->sib_scale;
> 
> This is a functional change.

In what way? The code is just being re-organized, so that the two
pieces of information needed later go into the new state fields. Are
you perhaps referring to the shift previously having happened
outside the if()? With the condition being false, that was simply a
shifting zero left (or else it would have been wrong to sit outside
the if()).

>> @@ -7472,6 +7479,110 @@ x86_emulate(
>>          break;
>>      }
>>  
>> +    case X86EMUL_OPC_VEX_66(0x0f38, 0x90): /* vpgatherd{d,q} 
>> {x,y}mm,mem,{x,y}mm */
>> +    case X86EMUL_OPC_VEX_66(0x0f38, 0x91): /* vpgatherq{d,q} 
>> {x,y}mm,mem,{x,y}mm */
>> +    case X86EMUL_OPC_VEX_66(0x0f38, 0x92): /* vgatherdp{s,d} 
>> {x,y}mm,mem,{x,y}mm */
>> +    case X86EMUL_OPC_VEX_66(0x0f38, 0x93): /* vgatherqp{s,d} 
>> {x,y}mm,mem,{x,y}mm */
>> +    {
>> +        unsigned int mask_reg = ~vex.reg & (mode_64bit() ? 0xf : 7);
>> +        typeof(vex) *pvex;
>> +        union {
>> +            int32_t dw[8];
>> +            int64_t qw[4];
>> +        } index, mask;
>> +
>> +        ASSERT(ea.type == OP_MEM);
>> +        generate_exception_if(modrm_reg == state->sib_index ||
>> +                              modrm_reg == mask_reg ||
>> +                              state->sib_index == mask_reg, EXC_UD);
>> +        generate_exception_if(!cpu_has_avx, EXC_UD);
>> +        vcpu_must_have(avx2);
>> +        get_fpu(X86EMUL_FPU_ymm, &fic);
>> +
>> +        /* Read destination, index, and mask registers. */
>> +        opc = init_prefixes(stub);
>> +        pvex = copy_VEX(opc, vex);
>> +        pvex->opcx = vex_0f;
>> +        opc[0] = 0x7f; /* vmovdqa */
>> +        /* Use (%rax) as destination and modrm_reg as source. */
>> +        pvex->r = !mode_64bit() || !(modrm_reg & 8);
>> +        pvex->b = 1;
>> +        opc[1] = (modrm_reg & 7) << 3;
>> +        pvex->reg = 0xf;
>> +        opc[2] = 0xc3;
>> +
>> +        invoke_stub("", "", "=m" (*mmvalp) : "a" (mmvalp));
>> +
>> +        pvex->pfx = vex_f3; /* vmovdqu */
>> +        /* Switch to sib_index as source. */
>> +        pvex->r = !mode_64bit() || !(state->sib_index & 8);
>> +        opc[1] = (state->sib_index & 7) << 3;
>> +
>> +        invoke_stub("", "", "=m" (index) : "a" (&index));
>> +
>> +        /* Switch to mask_reg as source. */
>> +        pvex->r = !mode_64bit() || !(mask_reg & 8);
>> +        opc[1] = (mask_reg & 7) << 3;
>> +
>> +        invoke_stub("", "", "=m" (mask) : "a" (&mask));
>> +        put_stub(stub);
>> +
>> +        /* Clear untouched parts of the destination and mask values. */
>> +        n = 1 << (2 + vex.l - ((b & 1) | vex.w));
>> +        op_bytes = 4 << vex.w;
>> +        memset((void *)mmvalp + n * op_bytes, 0, 32 - n * op_bytes);
>> +        memset((void *)&mask + n * op_bytes, 0, 32 - n * op_bytes);
>> +
>> +        for ( i = 0; i < n && rc == X86EMUL_OKAY; ++i )
>> +        {
>> +            if ( (vex.w ? mask.qw[i] : mask.dw[i]) < 0 )
>> +            {
>> +                signed long idx = b & 1 ? index.qw[i] : index.dw[i];
>> +
>> +                rc = ops->read(ea.mem.seg,
>> +                               ea.mem.off + (idx << state->sib_scale),
>> +                               (void *)mmvalp + i * op_bytes, op_bytes, 
>> ctxt);
>> +                if ( rc != X86EMUL_OKAY )
>> +                    break;
>> +
>> +#ifdef __XEN__
>> +                if ( i + 1 < n && local_events_need_delivery() )
>> +                    rc = X86EMUL_RETRY;
>> +#endif
>> +            }
>> +
>> +            if ( vex.w )
>> +                mask.qw[i] = 0;
>> +            else
>> +                mask.dw[i] = 0;
>> +        }
> 
> The incomplete case here is rather more complicated.  In the case that
> rc != OK and local events are pending, RF needs setting, although it is
> not clear if this is only applicable if an exception is pending, or
> between every element.

Isn't this a more general issue, e.g. also applicable to repeated
string insns? Right now we only ever clear RF. I'm not convinced
dealing with this belongs here.

>> --- a/xen/arch/x86/x86_emulate.c
>> +++ b/xen/arch/x86/x86_emulate.c
>> @@ -10,6 +10,7 @@
>>   */
>>  
>>  #include <xen/domain_page.h>
>> +#include <xen/event.h>
> 
> Spurious hunk?

No - this is for local_events_need_delivery() (still visible above).

Jan

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Reply via email to