On 19-09-2024 08:52 am, Gustavo Romero wrote:
Hi Ganapatrao, On 9/12/24 06:16, Ganapatrao Kulkarni wrote:Extend the 'mte' property for the virt machine to cover KVM as well. For KVM, we don't allocate tag memory, but instead enable the capability. If MTE has been enabled, we need to disable migration, as we do not yet have a way to migrate the tags as well. Therefore, MTE will stay off with KVM unless requested explicitly. This patch is rework of commit b320e21c48ce64853904bea6631c0158cc2ef227 which broke TCG since it made the TCG -cpu max report the presence of MTE to the guest even if the board hadn't enabled MTE by wiring up the tag RAM. This meant that if the guest then tried to use MTE QEMU would segfault accessing the non-existent tag RAM. Signed-off-by: Cornelia Huck <coh...@redhat.com> Signed-off-by: Ganapatrao Kulkarni <gankulka...@os.amperecomputing.com> --- Changes since V1: Added code to enable MTE before reading register id_aa64pfr1 (unmasked MTE bits).This patch is boot tested on ARM64 with KVM and on X86 with TCG for mte=onand default case(i.e, no mte). hw/arm/virt.c | 72 ++++++++++++++++++++++++++------------------ target/arm/cpu.c | 7 +++-- target/arm/cpu.h | 2 ++ target/arm/kvm.c | 59 ++++++++++++++++++++++++++++++++++++ target/arm/kvm_arm.h | 19 ++++++++++++ 5 files changed, 126 insertions(+), 33 deletions(-)Overall the patch looks good. I just have a couple of questions.diff --git a/hw/arm/virt.c b/hw/arm/virt.c index 7934b23651..a33af7d996 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -2211,7 +2211,7 @@ static void machvirt_init(MachineState *machine) exit(1); } - if (vms->mte && (kvm_enabled() || hvf_enabled())) { + if (vms->mte && hvf_enabled()) { error_report("mach-virt: %s does not support providing " "MTE to the guest CPU", current_accel_name()); @@ -2281,39 +2281,51 @@ static void machvirt_init(MachineState *machine) } if (vms->mte) {- /* Create the memory region only once, but link to all cpus. */- if (!tag_sysmem) { - /* - * The property exists only if MemTag is supported. - * If it is, we must allocate the ram to back that up. - */ - if (!object_property_find(cpuobj, "tag-memory")) { - error_report("MTE requested, but not supported " - "by the guest CPU"); - exit(1); + if (tcg_enabled()) {+ /* Create the memory region only once, but link to all cpus. */+ if (!tag_sysmem) { + /* + * The property exists only if MemTag is supported.+ * If it is, we must allocate the ram to back that up.+ */ + if (!object_property_find(cpuobj, "tag-memory")) { + error_report("MTE requested, but not supported " + "by the guest CPU"); + exit(1); + } + + tag_sysmem = g_new(MemoryRegion, 1); + memory_region_init(tag_sysmem, OBJECT(machine), + "tag-memory", UINT64_MAX / 32); + + if (vms->secure) { + secure_tag_sysmem = g_new(MemoryRegion, 1);+ memory_region_init(secure_tag_sysmem, OBJECT(machine),+ "secure-tag-memory", + UINT64_MAX / 32); ++ /* As with ram, secure-tag takes precedence over tag. */ + memory_region_add_subregion_overlap(secure_tag_sysmem, + 0, tag_sysmem, -1);+ } } - tag_sysmem = g_new(MemoryRegion, 1); - memory_region_init(tag_sysmem, OBJECT(machine), - "tag-memory", UINT64_MAX / 32); - + object_property_set_link(cpuobj, "tag-memory",+ OBJECT(tag_sysmem), &error_abort);if (vms->secure) { - secure_tag_sysmem = g_new(MemoryRegion, 1);- memory_region_init(secure_tag_sysmem, OBJECT(machine), - "secure-tag-memory", UINT64_MAX / 32);-- /* As with ram, secure-tag takes precedence over tag. */ - memory_region_add_subregion_overlap(secure_tag_sysmem, 0,- tag_sysmem, -1);+ object_property_set_link(cpuobj, "secure-tag-memory",+ OBJECT(secure_tag_sysmem), + &error_abort); } - } -- object_property_set_link(cpuobj, "tag-memory", OBJECT(tag_sysmem),- &error_abort); - if (vms->secure) { - object_property_set_link(cpuobj, "secure-tag-memory", - OBJECT(secure_tag_sysmem), - &error_abort); + } else if (kvm_enabled()) { + if (!kvm_arm_mte_supported()) {+ error_report("MTE requested, but not supported by KVM");+ exit(1); + } + kvm_arm_enable_mte(cpuobj, &error_abort); + } else { + error_report("MTE requested, but not supported "); + exit(1); } } diff --git a/target/arm/cpu.c b/target/arm/cpu.c index 19191c2391..a59417aac8 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c@@ -2390,11 +2390,12 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)#ifndef CONFIG_USER_ONLY /* - * If we do not have tag-memory provided by the machine, - * reduce MTE support to instructions enabled at EL0. + * If we do not have tag-memory provided by the TCG + * nor MTE at KVM enabled, reduce MTE support to + * instructions enabled at EL0.What controls if MTE insns. (stg, etc.) are enabled on EL0 and EL1 are the ATA and ATA0 bits in SCTRL register. Also, AA64PFR1 is a register just to report the features available on the CPU, so I wonder if that comment is indeed correct. @rth: what do you think?* This matches Cortex-A710 BROADCASTMTE input being LOW. */ - if (cpu->tag_memory == NULL) { + if (cpu->tag_memory == NULL && !cpu->kvm_mte) { cpu->isar.id_aa64pfr1 = FIELD_DP64(cpu->isar.id_aa64pfr1, ID_AA64PFR1, MTE, 1); }If MTE is off on TGC and on KVM, then we're actually setting (deposit) MTE field in AA64PFR1, meaning that MTE is implemented? or am I missing something?
Thanks for the comment. Looks like I did a mistake by mixing the TCG and KVM.I have modified as below diff to keep TCG if loop as it is and adding if for KVM case to clear/mask the MTE bits if MTE in KVM mode is not enabled by user command(if no mte=on).
Is below diff makes sense? diff --git a/target/arm/cpu.c b/target/arm/cpu.c index a59417aac8..523996576d 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c@@ -2390,15 +2390,20 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
#ifndef CONFIG_USER_ONLY /* - * If we do not have tag-memory provided by the TCG - * nor MTE at KVM enabled, reduce MTE support to - * instructions enabled at EL0. + * If we do not have tag-memory provided by the TCG, + * reduce MTE support to instructions enabled at EL0. * This matches Cortex-A710 BROADCASTMTE input being LOW. */ - if (cpu->tag_memory == NULL && !cpu->kvm_mte) { + if (tcg_enabled() && cpu->tag_memory == NULL) { cpu->isar.id_aa64pfr1 = FIELD_DP64(cpu->isar.id_aa64pfr1, ID_AA64PFR1, MTE, 1); } + + /* Disable MTE, if it is not enabled by the user for KVM mode. + */ + if (kvm_enabled() && !cpu->kvm_mte) { + FIELD_DP64(cpu->isar.id_aa64pfr1, ID_AA64PFR1, MTE, 0); + } #endif }
diff --git a/target/arm/cpu.h b/target/arm/cpu.h index f065756c5c..8fc8b6398f 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -922,6 +922,8 @@ struct ArchCPU { /* CPU has memory protection unit */ bool has_mpu; + /* CPU has MTE enabled in KVM mode */ + bool kvm_mte; /* PMSAv7 MPU number of supported regions */ uint32_t pmsav7_dregion; /* PMSAv8 MPU number of supported hyp regions */ diff --git a/target/arm/kvm.c b/target/arm/kvm.c index 849e2e21b3..29865609fb 100644 --- a/target/arm/kvm.c +++ b/target/arm/kvm.c @@ -39,6 +39,7 @@ #include "hw/acpi/acpi.h" #include "hw/acpi/ghes.h" #include "target/arm/gtimer.h" +#include "migration/blocker.h" const KVMCapabilityInfo kvm_arch_required_capabilities[] = { KVM_CAP_LAST_INFO@@ -119,6 +120,21 @@ bool kvm_arm_create_scratch_host_vcpu(const uint32_t *cpus_to_try,if (vmfd < 0) { goto err; } + + /* + * MTE bits of register id_aa64pfr1 are masked if MTE is + * not enabled and required to enable before VCPU + * is created. Hence enable MTE(if supported) before VCPU + * is created to read unmasked MTE bits. + */ + if (kvm_arm_mte_supported()) { + KVMState kvm_state; + + kvm_state.fd = kvmfd; + kvm_state.vmfd = vmfd; + kvm_vm_enable_cap(&kvm_state, KVM_CAP_ARM_MTE, 0); + } + cpufd = ioctl(vmfd, KVM_CREATE_VCPU, 0); if (cpufd < 0) { goto err; @@ -1793,6 +1809,11 @@ bool kvm_arm_sve_supported(void) return kvm_check_extension(kvm_state, KVM_CAP_ARM_SVE); } +bool kvm_arm_mte_supported(void) +{ + return kvm_check_extension(kvm_state, KVM_CAP_ARM_MTE); +} + QEMU_BUILD_BUG_ON(KVM_ARM64_SVE_VQ_MIN != 1); uint32_t kvm_arm_sve_get_vls(ARMCPU *cpu)@@ -2417,3 +2438,41 @@ int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)} return 0; } + +void kvm_arm_enable_mte(Object *cpuobj, Error **errp) +{ + static bool tried_to_enable; + static bool succeeded_to_enable; + Error *mte_migration_blocker = NULL; + ARMCPU *cpu = ARM_CPU(cpuobj); + int ret; + + if (!tried_to_enable) { + /*+ * MTE on KVM is enabled on a per-VM basis (and retrying doesn't make+ * sense), and we only want a single migration blocker as well. + */ + tried_to_enable = true; + + ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_ARM_MTE, 0); + if (ret) {+ error_setg_errno(errp, -ret, "Failed to enable KVM_CAP_ARM_MTE");+ return; + } + + /* TODO: Add migration support with MTE enabled */ + error_setg(&mte_migration_blocker, + "Live migration disabled due to MTE enabled"); + if (migrate_add_blocker(&mte_migration_blocker, errp)) { + error_free(mte_migration_blocker); + return; + } + + succeeded_to_enable = true; + } + + if (succeeded_to_enable) { + cpu->kvm_mte = true; + } +} +nit: remove ending blank line here.
Sure, thanks. -- Thanks, Ganapat/GK