On Wed, Apr 10, 2019 at 07:04:38PM +0200, Cédric Le Goater wrote:
> These controls will be used by the H_INT_SET_QUEUE_CONFIG and
> H_INT_GET_QUEUE_CONFIG hcalls from QEMU to configure the underlying
> Event Queue in the XIVE IC. They will also be used to restore the
> configuration of the XIVE EQs and to capture the internal run-time
> state of the EQs. Both 'get' and 'set' rely on an OPAL call to access
> the EQ toggle bit and EQ index which are updated by the XIVE IC when
> event notifications are enqueued in the EQ.
> 
> The value of the guest physical address of the event queue is saved in
> the XIVE internal xive_q structure for later use. That is when
> migration needs to mark the EQ pages dirty to capture a consistent
> memory state of the VM.
> 
> To be noted that H_INT_SET_QUEUE_CONFIG does not require the extra
> OPAL call setting the EQ toggle bit and EQ index to configure the EQ,
> but restoring the EQ state will.
> 
> Signed-off-by: Cédric Le Goater <c...@kaod.org>

Reviewed-by: David Gibson <da...@gibson.dropbear.id.au>

> ---
> 
>  Changes since v4 :
> 
>  - add check on EQ page alignment
>  - add requirement on KVM_XIVE_EQ_ALWAYS_NOTIFY
> 
>  arch/powerpc/include/asm/xive.h            |   2 +
>  arch/powerpc/include/uapi/asm/kvm.h        |  19 ++
>  arch/powerpc/kvm/book3s_xive.h             |   2 +
>  arch/powerpc/kvm/book3s_xive.c             |  15 +-
>  arch/powerpc/kvm/book3s_xive_native.c      | 249 +++++++++++++++++++++
>  Documentation/virtual/kvm/devices/xive.txt |  34 +++
>  6 files changed, 315 insertions(+), 6 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h
> index b579a943407b..c4e88abd3b67 100644
> --- a/arch/powerpc/include/asm/xive.h
> +++ b/arch/powerpc/include/asm/xive.h
> @@ -73,6 +73,8 @@ struct xive_q {
>       u32                     esc_irq;
>       atomic_t                count;
>       atomic_t                pending_count;
> +     u64                     guest_qaddr;
> +     u32                     guest_qshift;
>  };
>  
>  /* Global enable flags for the XIVE support */
> diff --git a/arch/powerpc/include/uapi/asm/kvm.h 
> b/arch/powerpc/include/uapi/asm/kvm.h
> index e8161e21629b..85005400fd86 100644
> --- a/arch/powerpc/include/uapi/asm/kvm.h
> +++ b/arch/powerpc/include/uapi/asm/kvm.h
> @@ -681,6 +681,7 @@ struct kvm_ppc_cpu_char {
>  #define KVM_DEV_XIVE_GRP_CTRL                1
>  #define KVM_DEV_XIVE_GRP_SOURCE              2       /* 64-bit source 
> identifier */
>  #define KVM_DEV_XIVE_GRP_SOURCE_CONFIG       3       /* 64-bit source 
> identifier */
> +#define KVM_DEV_XIVE_GRP_EQ_CONFIG   4       /* 64-bit EQ identifier */
>  
>  /* Layout of 64-bit XIVE source attribute values */
>  #define KVM_XIVE_LEVEL_SENSITIVE     (1ULL << 0)
> @@ -696,4 +697,22 @@ struct kvm_ppc_cpu_char {
>  #define KVM_XIVE_SOURCE_EISN_SHIFT   33
>  #define KVM_XIVE_SOURCE_EISN_MASK    0xfffffffe00000000ULL
>  
> +/* Layout of 64-bit EQ identifier */
> +#define KVM_XIVE_EQ_PRIORITY_SHIFT   0
> +#define KVM_XIVE_EQ_PRIORITY_MASK    0x7
> +#define KVM_XIVE_EQ_SERVER_SHIFT     3
> +#define KVM_XIVE_EQ_SERVER_MASK              0xfffffff8ULL
> +
> +/* Layout of EQ configuration values (64 bytes) */
> +struct kvm_ppc_xive_eq {
> +     __u32 flags;
> +     __u32 qshift;
> +     __u64 qaddr;
> +     __u32 qtoggle;
> +     __u32 qindex;
> +     __u8  pad[40];
> +};
> +
> +#define KVM_XIVE_EQ_ALWAYS_NOTIFY    0x00000001
> +
>  #endif /* __LINUX_KVM_POWERPC_H */
> diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h
> index ae26fe653d98..622f594d93e1 100644
> --- a/arch/powerpc/kvm/book3s_xive.h
> +++ b/arch/powerpc/kvm/book3s_xive.h
> @@ -272,6 +272,8 @@ struct kvmppc_xive_src_block 
> *kvmppc_xive_create_src_block(
>       struct kvmppc_xive *xive, int irq);
>  void kvmppc_xive_free_sources(struct kvmppc_xive_src_block *sb);
>  int kvmppc_xive_select_target(struct kvm *kvm, u32 *server, u8 prio);
> +int kvmppc_xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio,
> +                               bool single_escalation);
>  
>  #endif /* CONFIG_KVM_XICS */
>  #endif /* _KVM_PPC_BOOK3S_XICS_H */
> diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
> index e09f3addffe5..c1b7aa7dbc28 100644
> --- a/arch/powerpc/kvm/book3s_xive.c
> +++ b/arch/powerpc/kvm/book3s_xive.c
> @@ -166,7 +166,8 @@ static irqreturn_t xive_esc_irq(int irq, void *data)
>       return IRQ_HANDLED;
>  }
>  
> -static int xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio)
> +int kvmppc_xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio,
> +                               bool single_escalation)
>  {
>       struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
>       struct xive_q *q = &xc->queues[prio];
> @@ -185,7 +186,7 @@ static int xive_attach_escalation(struct kvm_vcpu *vcpu, 
> u8 prio)
>               return -EIO;
>       }
>  
> -     if (xc->xive->single_escalation)
> +     if (single_escalation)
>               name = kasprintf(GFP_KERNEL, "kvm-%d-%d",
>                                vcpu->kvm->arch.lpid, xc->server_num);
>       else
> @@ -217,7 +218,7 @@ static int xive_attach_escalation(struct kvm_vcpu *vcpu, 
> u8 prio)
>        * interrupt, thus leaving it effectively masked after
>        * it fires once.
>        */
> -     if (xc->xive->single_escalation) {
> +     if (single_escalation) {
>               struct irq_data *d = irq_get_irq_data(xc->esc_virq[prio]);
>               struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
>  
> @@ -291,7 +292,8 @@ static int xive_check_provisioning(struct kvm *kvm, u8 
> prio)
>                       continue;
>               rc = xive_provision_queue(vcpu, prio);
>               if (rc == 0 && !xive->single_escalation)
> -                     xive_attach_escalation(vcpu, prio);
> +                     kvmppc_xive_attach_escalation(vcpu, prio,
> +                                                   xive->single_escalation);
>               if (rc)
>                       return rc;
>       }
> @@ -1214,7 +1216,8 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
>               if (xive->qmap & (1 << i)) {
>                       r = xive_provision_queue(vcpu, i);
>                       if (r == 0 && !xive->single_escalation)
> -                             xive_attach_escalation(vcpu, i);
> +                             kvmppc_xive_attach_escalation(
> +                                     vcpu, i, xive->single_escalation);
>                       if (r)
>                               goto bail;
>               } else {
> @@ -1229,7 +1232,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
>       }
>  
>       /* If not done above, attach priority 0 escalation */
> -     r = xive_attach_escalation(vcpu, 0);
> +     r = kvmppc_xive_attach_escalation(vcpu, 0, xive->single_escalation);
>       if (r)
>               goto bail;
>  
> diff --git a/arch/powerpc/kvm/book3s_xive_native.c 
> b/arch/powerpc/kvm/book3s_xive_native.c
> index 492825a35958..3e7cdcacc932 100644
> --- a/arch/powerpc/kvm/book3s_xive_native.c
> +++ b/arch/powerpc/kvm/book3s_xive_native.c
> @@ -335,6 +335,243 @@ static int kvmppc_xive_native_set_source_config(struct 
> kvmppc_xive *xive,
>                                                      priority, masked, eisn);
>  }
>  
> +static int xive_native_validate_queue_size(u32 qshift)
> +{
> +     /*
> +      * We only support 64K pages for the moment. This is also
> +      * advertised in the DT property "ibm,xive-eq-sizes"
> +      */
> +     switch (qshift) {
> +     case 0: /* EQ reset */
> +     case 16:
> +             return 0;
> +     case 12:
> +     case 21:
> +     case 24:
> +     default:
> +             return -EINVAL;
> +     }
> +}
> +
> +static int kvmppc_xive_native_set_queue_config(struct kvmppc_xive *xive,
> +                                            long eq_idx, u64 addr)
> +{
> +     struct kvm *kvm = xive->kvm;
> +     struct kvm_vcpu *vcpu;
> +     struct kvmppc_xive_vcpu *xc;
> +     void __user *ubufp = (void __user *) addr;
> +     u32 server;
> +     u8 priority;
> +     struct kvm_ppc_xive_eq kvm_eq;
> +     int rc;
> +     __be32 *qaddr = 0;
> +     struct page *page;
> +     struct xive_q *q;
> +     gfn_t gfn;
> +     unsigned long page_size;
> +
> +     /*
> +      * Demangle priority/server tuple from the EQ identifier
> +      */
> +     priority = (eq_idx & KVM_XIVE_EQ_PRIORITY_MASK) >>
> +             KVM_XIVE_EQ_PRIORITY_SHIFT;
> +     server = (eq_idx & KVM_XIVE_EQ_SERVER_MASK) >>
> +             KVM_XIVE_EQ_SERVER_SHIFT;
> +
> +     if (copy_from_user(&kvm_eq, ubufp, sizeof(kvm_eq)))
> +             return -EFAULT;
> +
> +     vcpu = kvmppc_xive_find_server(kvm, server);
> +     if (!vcpu) {
> +             pr_err("Can't find server %d\n", server);
> +             return -ENOENT;
> +     }
> +     xc = vcpu->arch.xive_vcpu;
> +
> +     if (priority != xive_prio_from_guest(priority)) {
> +             pr_err("Trying to restore invalid queue %d for VCPU %d\n",
> +                    priority, server);
> +             return -EINVAL;
> +     }
> +     q = &xc->queues[priority];
> +
> +     pr_devel("%s VCPU %d priority %d fl:%x shift:%d addr:%llx g:%d 
> idx:%d\n",
> +              __func__, server, priority, kvm_eq.flags,
> +              kvm_eq.qshift, kvm_eq.qaddr, kvm_eq.qtoggle, kvm_eq.qindex);
> +
> +     /*
> +      * sPAPR specifies a "Unconditional Notify (n) flag" for the
> +      * H_INT_SET_QUEUE_CONFIG hcall which forces notification
> +      * without using the coalescing mechanisms provided by the
> +      * XIVE END ESBs. This is required on KVM as notification
> +      * using the END ESBs is not supported.
> +      */
> +     if (kvm_eq.flags != KVM_XIVE_EQ_ALWAYS_NOTIFY) {
> +             pr_err("invalid flags %d\n", kvm_eq.flags);
> +             return -EINVAL;
> +     }
> +
> +     rc = xive_native_validate_queue_size(kvm_eq.qshift);
> +     if (rc) {
> +             pr_err("invalid queue size %d\n", kvm_eq.qshift);
> +             return rc;
> +     }
> +
> +     /* reset queue and disable queueing */
> +     if (!kvm_eq.qshift) {
> +             q->guest_qaddr  = 0;
> +             q->guest_qshift = 0;
> +
> +             rc = xive_native_configure_queue(xc->vp_id, q, priority,
> +                                              NULL, 0, true);
> +             if (rc) {
> +                     pr_err("Failed to reset queue %d for VCPU %d: %d\n",
> +                            priority, xc->server_num, rc);
> +                     return rc;
> +             }
> +
> +             if (q->qpage) {
> +                     put_page(virt_to_page(q->qpage));
> +                     q->qpage = NULL;
> +             }
> +
> +             return 0;
> +     }
> +
> +     if (kvm_eq.qaddr & ((1ull << kvm_eq.qshift) - 1)) {
> +             pr_err("queue page is not aligned %llx/%llx\n", kvm_eq.qaddr,
> +                    1ull << kvm_eq.qshift);
> +             return -EINVAL;
> +     }
> +
> +     gfn = gpa_to_gfn(kvm_eq.qaddr);
> +     page = gfn_to_page(kvm, gfn);
> +     if (is_error_page(page)) {
> +             pr_err("Couldn't get queue page %llx!\n", kvm_eq.qaddr);
> +             return -EINVAL;
> +     }
> +     page_size = kvm_host_page_size(kvm, gfn);
> +     if (1ull << kvm_eq.qshift > page_size) {
> +             pr_warn("Incompatible host page size %lx!\n", page_size);
> +             return -EINVAL;
> +     }
> +
> +     qaddr = page_to_virt(page) + (kvm_eq.qaddr & ~PAGE_MASK);
> +
> +     /*
> +      * Backup the queue page guest address to the mark EQ page
> +      * dirty for migration.
> +      */
> +     q->guest_qaddr  = kvm_eq.qaddr;
> +     q->guest_qshift = kvm_eq.qshift;
> +
> +      /*
> +       * Unconditional Notification is forced by default at the
> +       * OPAL level because the use of END ESBs is not supported by
> +       * Linux.
> +       */
> +     rc = xive_native_configure_queue(xc->vp_id, q, priority,
> +                                      (__be32 *) qaddr, kvm_eq.qshift, true);
> +     if (rc) {
> +             pr_err("Failed to configure queue %d for VCPU %d: %d\n",
> +                    priority, xc->server_num, rc);
> +             put_page(page);
> +             return rc;
> +     }
> +
> +     /*
> +      * Only restore the queue state when needed. When doing the
> +      * H_INT_SET_SOURCE_CONFIG hcall, it should not.
> +      */
> +     if (kvm_eq.qtoggle != 1 || kvm_eq.qindex != 0) {
> +             rc = xive_native_set_queue_state(xc->vp_id, priority,
> +                                              kvm_eq.qtoggle,
> +                                              kvm_eq.qindex);
> +             if (rc)
> +                     goto error;
> +     }
> +
> +     rc = kvmppc_xive_attach_escalation(vcpu, priority,
> +                                        xive->single_escalation);
> +error:
> +     if (rc)
> +             kvmppc_xive_native_cleanup_queue(vcpu, priority);
> +     return rc;
> +}
> +
> +static int kvmppc_xive_native_get_queue_config(struct kvmppc_xive *xive,
> +                                            long eq_idx, u64 addr)
> +{
> +     struct kvm *kvm = xive->kvm;
> +     struct kvm_vcpu *vcpu;
> +     struct kvmppc_xive_vcpu *xc;
> +     struct xive_q *q;
> +     void __user *ubufp = (u64 __user *) addr;
> +     u32 server;
> +     u8 priority;
> +     struct kvm_ppc_xive_eq kvm_eq;
> +     u64 qaddr;
> +     u64 qshift;
> +     u64 qeoi_page;
> +     u32 escalate_irq;
> +     u64 qflags;
> +     int rc;
> +
> +     /*
> +      * Demangle priority/server tuple from the EQ identifier
> +      */
> +     priority = (eq_idx & KVM_XIVE_EQ_PRIORITY_MASK) >>
> +             KVM_XIVE_EQ_PRIORITY_SHIFT;
> +     server = (eq_idx & KVM_XIVE_EQ_SERVER_MASK) >>
> +             KVM_XIVE_EQ_SERVER_SHIFT;
> +
> +     vcpu = kvmppc_xive_find_server(kvm, server);
> +     if (!vcpu) {
> +             pr_err("Can't find server %d\n", server);
> +             return -ENOENT;
> +     }
> +     xc = vcpu->arch.xive_vcpu;
> +
> +     if (priority != xive_prio_from_guest(priority)) {
> +             pr_err("invalid priority for queue %d for VCPU %d\n",
> +                    priority, server);
> +             return -EINVAL;
> +     }
> +     q = &xc->queues[priority];
> +
> +     memset(&kvm_eq, 0, sizeof(kvm_eq));
> +
> +     if (!q->qpage)
> +             return 0;
> +
> +     rc = xive_native_get_queue_info(xc->vp_id, priority, &qaddr, &qshift,
> +                                     &qeoi_page, &escalate_irq, &qflags);
> +     if (rc)
> +             return rc;
> +
> +     kvm_eq.flags = 0;
> +     if (qflags & OPAL_XIVE_EQ_ALWAYS_NOTIFY)
> +             kvm_eq.flags |= KVM_XIVE_EQ_ALWAYS_NOTIFY;
> +
> +     kvm_eq.qshift = q->guest_qshift;
> +     kvm_eq.qaddr  = q->guest_qaddr;
> +
> +     rc = xive_native_get_queue_state(xc->vp_id, priority, &kvm_eq.qtoggle,
> +                                      &kvm_eq.qindex);
> +     if (rc)
> +             return rc;
> +
> +     pr_devel("%s VCPU %d priority %d fl:%x shift:%d addr:%llx g:%d 
> idx:%d\n",
> +              __func__, server, priority, kvm_eq.flags,
> +              kvm_eq.qshift, kvm_eq.qaddr, kvm_eq.qtoggle, kvm_eq.qindex);
> +
> +     if (copy_to_user(ubufp, &kvm_eq, sizeof(kvm_eq)))
> +             return -EFAULT;
> +
> +     return 0;
> +}
> +
>  static int kvmppc_xive_native_set_attr(struct kvm_device *dev,
>                                      struct kvm_device_attr *attr)
>  {
> @@ -349,6 +586,9 @@ static int kvmppc_xive_native_set_attr(struct kvm_device 
> *dev,
>       case KVM_DEV_XIVE_GRP_SOURCE_CONFIG:
>               return kvmppc_xive_native_set_source_config(xive, attr->attr,
>                                                           attr->addr);
> +     case KVM_DEV_XIVE_GRP_EQ_CONFIG:
> +             return kvmppc_xive_native_set_queue_config(xive, attr->attr,
> +                                                        attr->addr);
>       }
>       return -ENXIO;
>  }
> @@ -356,6 +596,13 @@ static int kvmppc_xive_native_set_attr(struct kvm_device 
> *dev,
>  static int kvmppc_xive_native_get_attr(struct kvm_device *dev,
>                                      struct kvm_device_attr *attr)
>  {
> +     struct kvmppc_xive *xive = dev->private;
> +
> +     switch (attr->group) {
> +     case KVM_DEV_XIVE_GRP_EQ_CONFIG:
> +             return kvmppc_xive_native_get_queue_config(xive, attr->attr,
> +                                                        attr->addr);
> +     }
>       return -ENXIO;
>  }
>  
> @@ -371,6 +618,8 @@ static int kvmppc_xive_native_has_attr(struct kvm_device 
> *dev,
>                   attr->attr < KVMPPC_XIVE_NR_IRQS)
>                       return 0;
>               break;
> +     case KVM_DEV_XIVE_GRP_EQ_CONFIG:
> +             return 0;
>       }
>       return -ENXIO;
>  }
> diff --git a/Documentation/virtual/kvm/devices/xive.txt 
> b/Documentation/virtual/kvm/devices/xive.txt
> index 33c64b2cdbe8..cc13bfd5cf53 100644
> --- a/Documentation/virtual/kvm/devices/xive.txt
> +++ b/Documentation/virtual/kvm/devices/xive.txt
> @@ -53,3 +53,37 @@ the legacy interrupt mode, referred as XICS (POWER7/8).
>      -ENXIO:  CPU event queues not configured or configuration of the
>               underlying HW interrupt failed
>      -EBUSY:  No CPU available to serve interrupt
> +
> +  4. KVM_DEV_XIVE_GRP_EQ_CONFIG (read-write)
> +  Configures an event queue of a CPU
> +  Attributes:
> +    EQ descriptor identifier (64-bit)
> +  The EQ descriptor identifier is a tuple (server, priority) :
> +  bits:     | 63   ....  32 | 31 .. 3 |  2 .. 0
> +  values:   |    unused     |  server | priority
> +  The kvm_device_attr.addr points to :
> +    struct kvm_ppc_xive_eq {
> +     __u32 flags;
> +     __u32 qshift;
> +     __u64 qaddr;
> +     __u32 qtoggle;
> +     __u32 qindex;
> +     __u8  pad[40];
> +    };
> +  - flags: queue flags
> +    KVM_XIVE_EQ_ALWAYS_NOTIFY (required)
> +     forces notification without using the coalescing mechanism
> +     provided by the XIVE END ESBs.
> +  - qshift: queue size (power of 2)
> +  - qaddr: real address of queue
> +  - qtoggle: current queue toggle bit
> +  - qindex: current queue index
> +  - pad: reserved for future use
> +  Errors:
> +    -ENOENT: Invalid CPU number
> +    -EINVAL: Invalid priority
> +    -EINVAL: Invalid flags
> +    -EINVAL: Invalid queue size
> +    -EINVAL: Invalid queue address
> +    -EFAULT: Invalid user pointer for attr->addr.
> +    -EIO:    Configuration of the underlying HW failed

-- 
David Gibson                    | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
                                | _way_ _around_!
http://www.ozlabs.org/~dgibson

Attachment: signature.asc
Description: PGP signature

Reply via email to