Re: [PATCH v3 5/8] iommufd: Associate fault object with iommufd_hw_pgtable

2024-03-01 Thread Zhangfei Gao
On Mon, 22 Jan 2024 at 15:46, Lu Baolu  wrote:
>
> When allocating a user iommufd_hw_pagetable, the user space is allowed to
> associate a fault object with the hw_pagetable by specifying the fault
> object ID in the page table allocation data and setting the
> IOMMU_HWPT_FAULT_ID_VALID flag bit.
>
> On a successful return of hwpt allocation, the user can retrieve and
> respond to page faults by reading and writing the file interface of the
> fault object.
>
> Once a fault object has been associated with a hwpt, the hwpt is
> iopf-capable, indicated by fault_capable set to true. Attaching,
> detaching, or replacing an iopf-capable hwpt to an RID or PASID will
> differ from those that are not iopf-capable. The implementation of these
> will be introduced in the next patch.
>
> Signed-off-by: Lu Baolu 
> ---
>  drivers/iommu/iommufd/iommufd_private.h | 11 
>  include/uapi/linux/iommufd.h|  6 +
>  drivers/iommu/iommufd/fault.c   | 14 ++
>  drivers/iommu/iommufd/hw_pagetable.c| 36 +++--
>  4 files changed, 59 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/iommu/iommufd/iommufd_private.h 
> b/drivers/iommu/iommufd/iommufd_private.h
> index 52d83e888bd0..2780bed0c6b1 100644
> --- a/drivers/iommu/iommufd/iommufd_private.h
> +++ b/drivers/iommu/iommufd/iommufd_private.h
> @@ -293,6 +293,8 @@ int iommufd_check_iova_range(struct io_pagetable *iopt,
>  struct iommufd_hw_pagetable {
> struct iommufd_object obj;
> struct iommu_domain *domain;
> +   struct iommufd_fault *fault;
> +   bool fault_capable : 1;
>  };
>
>  struct iommufd_hwpt_paging {
> @@ -446,8 +448,17 @@ struct iommufd_fault {
> struct wait_queue_head wait_queue;
>  };
>
> +static inline struct iommufd_fault *
> +iommufd_get_fault(struct iommufd_ucmd *ucmd, u32 id)
> +{
> +   return container_of(iommufd_get_object(ucmd->ictx, id,
> +  IOMMUFD_OBJ_FAULT),
> +   struct iommufd_fault, obj);
> +}
> +
>  int iommufd_fault_alloc(struct iommufd_ucmd *ucmd);
>  void iommufd_fault_destroy(struct iommufd_object *obj);
> +int iommufd_fault_iopf_handler(struct iopf_group *group);
>
>  #ifdef CONFIG_IOMMUFD_TEST
>  int iommufd_test(struct iommufd_ucmd *ucmd);
> diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h
> index c32d62b02306..7481cdd57027 100644
> --- a/include/uapi/linux/iommufd.h
> +++ b/include/uapi/linux/iommufd.h
> @@ -357,10 +357,13 @@ struct iommu_vfio_ioas {
>   *the parent HWPT in a nesting configuration.
>   * @IOMMU_HWPT_ALLOC_DIRTY_TRACKING: Dirty tracking support for device IOMMU 
> is
>   *   enforced on device attachment
> + * @IOMMU_HWPT_FAULT_ID_VALID: The fault_id field of hwpt allocation data is
> + * valid.
>   */
>  enum iommufd_hwpt_alloc_flags {
> IOMMU_HWPT_ALLOC_NEST_PARENT = 1 << 0,
> IOMMU_HWPT_ALLOC_DIRTY_TRACKING = 1 << 1,
> +   IOMMU_HWPT_FAULT_ID_VALID = 1 << 2,
>  };
>
>  /**
> @@ -411,6 +414,8 @@ enum iommu_hwpt_data_type {
>   * @__reserved: Must be 0
>   * @data_type: One of enum iommu_hwpt_data_type
>   * @data_len: Length of the type specific data
> + * @fault_id: The ID of IOMMUFD_FAULT object. Valid only if flags field of
> + *IOMMU_HWPT_FAULT_ID_VALID is set.
>   * @data_uptr: User pointer to the type specific data
>   *
>   * Explicitly allocate a hardware page table object. This is the same object
> @@ -441,6 +446,7 @@ struct iommu_hwpt_alloc {
> __u32 __reserved;
> __u32 data_type;
> __u32 data_len;
> +   __u32 fault_id;
> __aligned_u64 data_uptr;
>  };
>  #define IOMMU_HWPT_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_ALLOC)
> diff --git a/drivers/iommu/iommufd/fault.c b/drivers/iommu/iommufd/fault.c
> index 9844a85feeb2..e752d1c49dde 100644
> --- a/drivers/iommu/iommufd/fault.c
> +++ b/drivers/iommu/iommufd/fault.c
> @@ -253,3 +253,17 @@ int iommufd_fault_alloc(struct iommufd_ucmd *ucmd)
>
> return rc;
>  }
> +
> +int iommufd_fault_iopf_handler(struct iopf_group *group)
> +{
> +   struct iommufd_hw_pagetable *hwpt = group->cookie->domain->fault_data;
> +   struct iommufd_fault *fault = hwpt->fault;
> +
> +   mutex_lock(&fault->mutex);
> +   list_add_tail(&group->node, &fault->deliver);
> +   mutex_unlock(&fault->mutex);
> +
> +   wake_up_interruptible(&fault->wait_queue);
> +
> +   return 0;
> +}
> diff --git a/drivers/iommu/iommufd/hw_pagetable.c 
> b/drivers/iommu/iommufd/hw_pagetable.c
> index 3f3f1fa1a0a9..2703d5aea4f5 100644
> --- a/drivers/iommu/iommufd/hw_pagetable.c
> +++ b/drivers/iommu/iommufd/hw_pagetable.c
> @@ -8,6 +8,15 @@
>  #include "../iommu-priv.h"
>  #include "iommufd_private.h"
>
> +static void __iommufd_hwpt_destroy(struct iommufd_hw_pagetable *hwpt)
> +{
> +   if (hwpt->domain)
> +   iommu_domain_

Re: [PATCH v3 5/8] iommufd: Associate fault object with iommufd_hw_pgtable

2024-03-06 Thread Zhangfei Gao
Hi, Baolu

On Sat, 2 Mar 2024 at 10:36, Zhangfei Gao  wrote:
>
> On Mon, 22 Jan 2024 at 15:46, Lu Baolu  wrote:
> >
> > When allocating a user iommufd_hw_pagetable, the user space is allowed to
> > associate a fault object with the hw_pagetable by specifying the fault
> > object ID in the page table allocation data and setting the
> > IOMMU_HWPT_FAULT_ID_VALID flag bit.
> >
> > On a successful return of hwpt allocation, the user can retrieve and
> > respond to page faults by reading and writing the file interface of the
> > fault object.
> >
> > Once a fault object has been associated with a hwpt, the hwpt is
> > iopf-capable, indicated by fault_capable set to true. Attaching,
> > detaching, or replacing an iopf-capable hwpt to an RID or PASID will
> > differ from those that are not iopf-capable. The implementation of these
> > will be introduced in the next patch.
> >
> > Signed-off-by: Lu Baolu 
> > ---
> >  drivers/iommu/iommufd/iommufd_private.h | 11 
> >  include/uapi/linux/iommufd.h|  6 +
> >  drivers/iommu/iommufd/fault.c   | 14 ++
> >  drivers/iommu/iommufd/hw_pagetable.c| 36 +++--
> >  4 files changed, 59 insertions(+), 8 deletions(-)
> >
> > diff --git a/drivers/iommu/iommufd/iommufd_private.h 
> > b/drivers/iommu/iommufd/iommufd_private.h
> > index 52d83e888bd0..2780bed0c6b1 100644
> > --- a/drivers/iommu/iommufd/iommufd_private.h
> > +++ b/drivers/iommu/iommufd/iommufd_private.h
> > @@ -293,6 +293,8 @@ int iommufd_check_iova_range(struct io_pagetable *iopt,
> >  struct iommufd_hw_pagetable {
> > struct iommufd_object obj;
> > struct iommu_domain *domain;
> > +   struct iommufd_fault *fault;
> > +   bool fault_capable : 1;
> >  };
> >
> >  struct iommufd_hwpt_paging {
> > @@ -446,8 +448,17 @@ struct iommufd_fault {
> > struct wait_queue_head wait_queue;
> >  };
> >
> > +static inline struct iommufd_fault *
> > +iommufd_get_fault(struct iommufd_ucmd *ucmd, u32 id)
> > +{
> > +   return container_of(iommufd_get_object(ucmd->ictx, id,
> > +  IOMMUFD_OBJ_FAULT),
> > +   struct iommufd_fault, obj);
> > +}
> > +
> >  int iommufd_fault_alloc(struct iommufd_ucmd *ucmd);
> >  void iommufd_fault_destroy(struct iommufd_object *obj);
> > +int iommufd_fault_iopf_handler(struct iopf_group *group);
> >
> >  #ifdef CONFIG_IOMMUFD_TEST
> >  int iommufd_test(struct iommufd_ucmd *ucmd);
> > diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h
> > index c32d62b02306..7481cdd57027 100644
> > --- a/include/uapi/linux/iommufd.h
> > +++ b/include/uapi/linux/iommufd.h
> > @@ -357,10 +357,13 @@ struct iommu_vfio_ioas {
> >   *the parent HWPT in a nesting 
> > configuration.
> >   * @IOMMU_HWPT_ALLOC_DIRTY_TRACKING: Dirty tracking support for device 
> > IOMMU is
> >   *   enforced on device attachment
> > + * @IOMMU_HWPT_FAULT_ID_VALID: The fault_id field of hwpt allocation data 
> > is
> > + * valid.
> >   */
> >  enum iommufd_hwpt_alloc_flags {
> > IOMMU_HWPT_ALLOC_NEST_PARENT = 1 << 0,
> > IOMMU_HWPT_ALLOC_DIRTY_TRACKING = 1 << 1,
> > +   IOMMU_HWPT_FAULT_ID_VALID = 1 << 2,
> >  };
> >
> >  /**
> > @@ -411,6 +414,8 @@ enum iommu_hwpt_data_type {
> >   * @__reserved: Must be 0
> >   * @data_type: One of enum iommu_hwpt_data_type
> >   * @data_len: Length of the type specific data
> > + * @fault_id: The ID of IOMMUFD_FAULT object. Valid only if flags field of
> > + *IOMMU_HWPT_FAULT_ID_VALID is set.
> >   * @data_uptr: User pointer to the type specific data
> >   *
> >   * Explicitly allocate a hardware page table object. This is the same 
> > object
> > @@ -441,6 +446,7 @@ struct iommu_hwpt_alloc {
> > __u32 __reserved;
> > __u32 data_type;
> > __u32 data_len;
> > +   __u32 fault_id;
> > __aligned_u64 data_uptr;
> >  };
> >  #define IOMMU_HWPT_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_ALLOC)
> > diff --git a/drivers/iommu/iommufd/fault.c b/drivers/iommu/iommufd/fault.c
> > index 9844a85feeb2..e752d1c49dde 100644
> > --- a/drivers/iommu/iommufd/fault.c
> > +++ b/drivers/iommu/iommufd/fault.c
> > @@ -253,3 +253,17 @@ int iommufd_fault_alloc(struct iommu

Re: ext4 error when testing virtio-scsi & vhost-scsi

2016-07-30 Thread Zhangfei Gao
Hi, Michael

I have met ext4 error when using vhost_scsi on arm64 platform, and
suspect it is vhost_scsi issue.

Ext4 error when testing virtio_scsi & vhost_scsi


No issue:
1. virtio_scsi, ext4
2. vhost_scsi & virtio_scsi, ext2
3.  Instead of vhost, also tried loopback and no problem.
Using loopback, host can use the new block device, while vhost is used
by guest (qemu).
http://www.linux-iscsi.org/wiki/Tcm_loop
Test directly in host, not find ext4 error.



Have issue:
1. vhost_scsi & virtio_scsi, ext4
a. iblock
b, fileio, file located in /tmp (ram), no device based.

2, Have tried 4.7-r2 and 4.5-rc1 on D02 board, both have issue.
Since I need kvm specific patch for D02, so it may not freely to switch
to older version.

3. Also test with ext4, disabling journal
mkfs.ext4 -O ^has_journal /dev/sda


Do you have any suggestion?

Thanks

On Tue, Jul 19, 2016 at 4:21 PM, Zhangfei Gao  wrote:
> On Tue, Jul 19, 2016 at 3:56 PM, Zhangfei Gao  wrote:
>> Dear Ted
>>
>> On Wed, Jul 13, 2016 at 12:43 AM, Theodore Ts'o  wrote:
>>> On Tue, Jul 12, 2016 at 03:14:38PM +0800, Zhangfei Gao wrote:
>>>> Some update:
>>>>
>>>> If test with ext2, no problem in iblock.
>>>> If test with ext4, ext4_mb_generate_buddy reported error in the
>>>> removing files after reboot.
>>>>
>>>>
>>>> root@(none)$ rm test
>>>> [   21.006549] EXT4-fs error (device sda): ext4_mb_generate_buddy:758: 
>>>> group 18
>>>> , block bitmap and bg descriptor inconsistent: 26464 vs 25600 free clusters
>>>> [   21.008249] JBD2: Spotted dirty metadata buffer (dev = sda, blocknr = 
>>>> 0). Th
>>>> ere's a risk of filesystem corruption in case of system crash.
>>>>
>>>> Any special notes of using ext4 in qemu?
>>>
>>> Ext4 has more runtime consistency checking than ext2.  So just because
>>> ext4 complains doesn't mean that there isn't a problem with the file
>>> system; it just means that ext4 is more likely to notice before you
>>> lose user data.
>>>
>>> So if you test with ext2, try running e2fsck afterwards, to make sure
>>> the file system is consistent.
>>>
>>> Given that I'm reguarly testing ext4 using kvm, and I haven't seen
>>> anything like this in a very long time, I suspect the problemb is with
>>> your SCSI code, and not with ext4.
>>>
>>
>> Do you know what's the possible reason of this error.
>>
>> Have tried 4.7-rc2, same issue exist.
>> It can be reproduced by fileio and iblock as backstore.
>> It is easier to happen in qemu like this process:
>> qemu-> mount-> dd xx -> umout -> mount -> rm xx, then the error may
>> happen, no need to reboot.
>>
>> ramdisk can not cause error just because it just malloc and memcpy,
>> while not going to blk layer.
>>
>> Also tried creating one file in /tmp, used as fileio, also can reproduce.
>> So no real device is based.
>>
>> like:
>> cd /tmp
>> dd if=/dev/zero of=test bs=1M count=1024; sync;
>> targetcli
>> #targetcli
>> (targetcli) /> cd backstores/fileio
>> (targetcli) /> create name=file_backend file_or_dev=/tmp/test size=1G
>> (targetcli) /> cd /vhost
>> (targetcli) /> create wwn=naa.60014052cc816bf4
>> (targetcli) /> cd naa.60014052cc816bf4/tpgt1/luns
>> (targetcli) /> create /backstores/fileio/file_backend
>> (targetcli) /> cd /
>> (targetcli) /> saveconfig
>> (targetcli) /> exit
>>
>> /work/qemu.git/aarch64-softmmu/qemu-system-aarch64 \
>> -enable-kvm -nographic -kernel Image \
>> -device vhost-scsi-pci,wwpn=naa.60014052cc816bf4 \
>> -m 512 -M virt -cpu host \
>> -append "earlyprintk console=ttyAMA0 mem=512M"
>>
>> in qemu:
>> mkfs.ext4 /dev/sda
>> mount /dev/sda /mnt/
>> sync; date; dd if=/dev/zero of=/mnt/test bs=1M count=100; sync; date;
>>
>> using dd test, then some error happen.
>> log like:
>> oot@(none)$ sync; date; dd if=/dev/zero of=test bs=1M count=100; sync;; date;
>> [ 1789.917963] sbc_parse_cdb cdb[0]=0x35
>> [ 1789.922000] fd_execute_sync_cache immed=0
>> Tue Jul 19 07:26:12 UTC 2016
>> [  200.712879] EXT4-fs error (device sda) [ 1790.191770] sbc_parse_cdb
>> cdb[0]=0x2a
>> in ext4_reserve_inode_write:5362[ 1790.198382]  fd_execute_rw
>> : Corrupt filesystem
>> [  200.729001] EXT4-fs error (device sda) [ 1790.207843] sbc_parse_cdb
>> cdb[0]=0x2a
>> in ext4_reserve_inode_write:5362[ 1790.214495]  fd_execute_rw
>>

Re: ext4 error when testing virtio-scsi & vhost-scsi

2016-07-30 Thread Zhangfei Gao
Hi, Jan

On Wed, Jul 27, 2016 at 11:56 PM, Jan Kara  wrote:
> Hi!
>
> On Wed 27-07-16 15:58:55, Zhangfei Gao wrote:
>> Hi, Michael
>>
>> I have met ext4 error when using vhost_scsi on arm64 platform, and
>> suspect it is vhost_scsi issue.
>>
>> Ext4 error when testing virtio_scsi & vhost_scsi
>>
>>
>> No issue:
>> 1. virtio_scsi, ext4
>> 2. vhost_scsi & virtio_scsi, ext2
>> 3.  Instead of vhost, also tried loopback and no problem.
>> Using loopback, host can use the new block device, while vhost is used
>> by guest (qemu).
>> http://www.linux-iscsi.org/wiki/Tcm_loop
>> Test directly in host, not find ext4 error.
>>
>>
>>
>> Have issue:
>> 1. vhost_scsi & virtio_scsi, ext4
>> a. iblock
>> b, fileio, file located in /tmp (ram), no device based.
>>
>> 2, Have tried 4.7-r2 and 4.5-rc1 on D02 board, both have issue.
>> Since I need kvm specific patch for D02, so it may not freely to switch
>> to older version.
>>
>> 3. Also test with ext4, disabling journal
>> mkfs.ext4 -O ^has_journal /dev/sda
>>
>>
>> Do you have any suggestion?
>
> So can you mount the filesystem with errors=remount-ro to avoid clobbering
> the fs after the problem happens? And then run e2fsck on the problematic
> filesystem and send the output here?
>

Tested twice, log pasted.
Both using fileio, located in host ramfs /tmp
Before e2fsck, umount /dev/sda

1.
root@(none)$ mount -o errors=remount-ro /dev/sda /mnt
[   22.812053] EXT4-fs (sda): mounted filesystem with ordered data
mode. Opts: errors=remount-ro
$ rm /mnt/test
[  108.388905] EXT4-fs error (device sda) in
ext4_reserve_inode_write:5362: Corrupt filesystem
[  108.406930] Aborting journal on device sda-8.
[  108.414120] EXT4-fs (sda): Remounting filesystem read-only
[  108.414847] EXT4-fs error (device sda) in ext4_dirty_inode:5487: IO failure
[  108.423571] EXT4-fs error (device sda) in ext4_free_blocks:4904:
Journal has aborted
[  108.431919] EXT4-fs error (device sda) in
ext4_reserve_inode_write:5362: Corrupt filesystem
[  108.440269] EXT4-fs error (device sda) in
ext4_reserve_inode_write:5362: Corrupt filesystem
[  108.448568] EXT4-fs error (device sda) in
ext4_ext_remove_space:3058: IO failure
[  108.456917] EXT4-fs error (device sda) in ext4_ext_truncate:4657:
Corrupt filesystem
[  108.465267] EXT4-fs error (device sda) in
ext4_reserve_inode_write:5362: Corrupt filesystem
[  108.473567] EXT4-fs error (device sda) in ext4_truncate:4150: IO failure
[  108.481917] EXT4-fs error (device sda) in
ext4_reserve_inode_write:5362: Corrupt filesystem
root@(none)$ e2fsck /dev/sda
e2fsck 1.42.9 (28-Dec-2013)
/dev/sda is mounted.
e2fsck: Cannot continue, aborting.


root@(none)$ umount /mnt
[  260.756250] EXT4-fs error (device sda): ext4_put_super:837:
Couldn't clean up the journal
root@(none)$ umount /mnt   e2fsck /dev/sda
e2fsck 1.42.9 (28-Dec-2013)
ext2fs_open2: Bad magic number in super-block
e2fsck: Superblock invalid, trying backup blocks...
Superblock needs_recovery flag is clear, but journal has data.
Recovery flag not set in backup superblock, so running journal anyway.
/dev/sda: recovering journal
Pass 1: Checking inodes, blocks, and sizes
Pass 2: Checking directory structure
Pass 3: Checking directory connectivity
Pass 4: Checking reference counts
Pass 5: Checking group summary information
Free blocks count wrong for group #1 (32703, counted=8127).
Fix? yes
Free blocks count wrong for group #2 (32768, counted=31744).
Fix? yes
Free blocks count wrong (249509, counted=223909).
Fix? yes
Free inodes count wrong for group #0 (8181, counted=8180).
Fix? yes
Free inodes count wrong (65525, counted=65524).
Fix? yes

/dev/sda: * FILE SYSTEM WAS MODIFIED *
/dev/sda: 12/65536 files (8.3% non-contiguous), 38235/262144 blocks
root@(none)$

2.

 root@(none)$ rm /mnt/test
[   71.021484] EXT4-fs error (device sda) in
ext4_reserve_inode_write:5362: Corrupt filesystem
[   71.044959] Aborting journal on device sda-8.
[   71.052152] EXT4-fs (sda): Remounting filesystem read-only
[   71.052833] EXT4-fs error (device sda) in ext4_dirty_inode:5487: IO failure
[   71.061600] EXT4-fs error (device sda) in ext4_free_blocks:4904:
Journal has aborted
[   71.069948] EXT4-fs error (device sda) in
ext4_reserve_inode_write:5362: Corrupt filesystem
[   71.078296] EXT4-fs error (device sda) in
ext4_reserve_inode_write:5362: Corrupt filesystem
[   71.086597] EXT4-fs error (device sda) in
ext4_ext_remove_space:3058: IO failure
[   71.094946] EXT4-fs error (device sda) in ext4_ext_truncate:4657:
Corrupt filesystem
[   71.103296] EXT4-fs error (device sda) in
ext4_reserve_inode_write:5362: Corrupt filesystem
[   71.111595] EXT4-fs error (device sda) in ext4_truncate:4150: IO failure
[   71.119946] EXT4-fs error (device sda) in
ext4_reserve_inode_write:5362: Corrup

Re: ext4 error when testing virtio-scsi & vhost-scsi

2016-07-31 Thread Zhangfei Gao
Hi, Jan

On Thu, Jul 28, 2016 at 9:29 AM, Zhangfei Gao  wrote:
> Hi, Jan
>
> On Wed, Jul 27, 2016 at 11:56 PM, Jan Kara  wrote:
>> Hi!
>>
>> On Wed 27-07-16 15:58:55, Zhangfei Gao wrote:
>>> Hi, Michael
>>>
>>> I have met ext4 error when using vhost_scsi on arm64 platform, and
>>> suspect it is vhost_scsi issue.
>>>
>>> Ext4 error when testing virtio_scsi & vhost_scsi
>>>
>>>
>>> No issue:
>>> 1. virtio_scsi, ext4
>>> 2. vhost_scsi & virtio_scsi, ext2
>>> 3.  Instead of vhost, also tried loopback and no problem.
>>> Using loopback, host can use the new block device, while vhost is used
>>> by guest (qemu).
>>> http://www.linux-iscsi.org/wiki/Tcm_loop
>>> Test directly in host, not find ext4 error.
>>>
>>>
>>>
>>> Have issue:
>>> 1. vhost_scsi & virtio_scsi, ext4
>>> a. iblock
>>> b, fileio, file located in /tmp (ram), no device based.
>>>
>>> 2, Have tried 4.7-r2 and 4.5-rc1 on D02 board, both have issue.
>>> Since I need kvm specific patch for D02, so it may not freely to switch
>>> to older version.
>>>
>>> 3. Also test with ext4, disabling journal
>>> mkfs.ext4 -O ^has_journal /dev/sda
>>>
>>>
>>> Do you have any suggestion?
>>
>> So can you mount the filesystem with errors=remount-ro to avoid clobbering
>> the fs after the problem happens? And then run e2fsck on the problematic
>> filesystem and send the output here?
>>
>
> Tested twice, log pasted.
> Both using fileio, located in host ramfs /tmp
> Before e2fsck, umount /dev/sda
>
> 1.
> root@(none)$ mount -o errors=remount-ro /dev/sda /mnt
> [   22.812053] EXT4-fs (sda): mounted filesystem with ordered data
> mode. Opts: errors=remount-ro
> $ rm /mnt/test
> [  108.388905] EXT4-fs error (device sda) in
> ext4_reserve_inode_write:5362: Corrupt filesystem
> [  108.406930] Aborting journal on device sda-8.
> [  108.414120] EXT4-fs (sda): Remounting filesystem read-only
> [  108.414847] EXT4-fs error (device sda) in ext4_dirty_inode:5487: IO failure
> [  108.423571] EXT4-fs error (device sda) in ext4_free_blocks:4904:
> Journal has aborted
> [  108.431919] EXT4-fs error (device sda) in
> ext4_reserve_inode_write:5362: Corrupt filesystem
> [  108.440269] EXT4-fs error (device sda) in
> ext4_reserve_inode_write:5362: Corrupt filesystem
> [  108.448568] EXT4-fs error (device sda) in
> ext4_ext_remove_space:3058: IO failure
> [  108.456917] EXT4-fs error (device sda) in ext4_ext_truncate:4657:
> Corrupt filesystem
> [  108.465267] EXT4-fs error (device sda) in
> ext4_reserve_inode_write:5362: Corrupt filesystem
> [  108.473567] EXT4-fs error (device sda) in ext4_truncate:4150: IO failure
> [  108.481917] EXT4-fs error (device sda) in
> ext4_reserve_inode_write:5362: Corrupt filesystem
> root@(none)$ e2fsck /dev/sda
> e2fsck 1.42.9 (28-Dec-2013)
> /dev/sda is mounted.
> e2fsck: Cannot continue, aborting.
>
>
> root@(none)$ umount /mnt
> [  260.756250] EXT4-fs error (device sda): ext4_put_super:837:
> Couldn't clean up the journal
> root@(none)$ umount /mnt   e2fsck /dev/sda
> e2fsck 1.42.9 (28-Dec-2013)
> ext2fs_open2: Bad magic number in super-block
> e2fsck: Superblock invalid, trying backup blocks...
> Superblock needs_recovery flag is clear, but journal has data.
> Recovery flag not set in backup superblock, so running journal anyway.
> /dev/sda: recovering journal
> Pass 1: Checking inodes, blocks, and sizes
> Pass 2: Checking directory structure
> Pass 3: Checking directory connectivity
> Pass 4: Checking reference counts
> Pass 5: Checking group summary information
> Free blocks count wrong for group #1 (32703, counted=8127).
> Fix? yes
> Free blocks count wrong for group #2 (32768, counted=31744).
> Fix? yes
> Free blocks count wrong (249509, counted=223909).
> Fix? yes
> Free inodes count wrong for group #0 (8181, counted=8180).
> Fix? yes
> Free inodes count wrong (65525, counted=65524).
> Fix? yes
>
> /dev/sda: * FILE SYSTEM WAS MODIFIED *
> /dev/sda: 12/65536 files (8.3% non-contiguous), 38235/262144 blocks
> root@(none)$
>
> 2.
>
>  root@(none)$ rm /mnt/test
> [   71.021484] EXT4-fs error (device sda) in
> ext4_reserve_inode_write:5362: Corrupt filesystem
> [   71.044959] Aborting journal on device sda-8.
> [   71.052152] EXT4-fs (sda): Remounting filesystem read-only
> [   71.052833] EXT4-fs error (device sda) in ext4_dirty_inode:5487: IO failure
> [   71.061600] EXT4-fs error (device sda) in ext4_free_blocks:4904:
> Journal has aborted
&g

Re: [PATCH v8 07/10] iommufd: Fault-capable hwpt attach/detach/replace

2024-10-14 Thread Zhangfei Gao
Hi, Baolu

On Tue, 2 Jul 2024 at 14:39, Lu Baolu  wrote:
>
> Add iopf-capable hw page table attach/detach/replace helpers. The pointer
> to iommufd_device is stored in the domain attachment handle, so that it
> can be echo'ed back in the iopf_group.
>
> The iopf-capable hw page tables can only be attached to devices that
> support the IOMMU_DEV_FEAT_IOPF feature. On the first attachment of an
> iopf-capable hw_pagetable to the device, the IOPF feature is enabled on
> the device. Similarly, after the last iopf-capable hwpt is detached from
> the device, the IOPF feature is disabled on the device.
>
> The current implementation allows a replacement between iopf-capable and
> non-iopf-capable hw page tables. This matches the nested translation use
> case, where a parent domain is attached by default and can then be
> replaced with a nested user domain with iopf support.
>
> Signed-off-by: Lu Baolu 
> Reviewed-by: Kevin Tian 
> ---
>  drivers/iommu/iommufd/iommufd_private.h |  41 +
>  drivers/iommu/iommufd/device.c  |   7 +-
>  drivers/iommu/iommufd/fault.c   | 190 
>  3 files changed, 235 insertions(+), 3 deletions(-)
>

> diff --git a/drivers/iommu/iommufd/fault.c b/drivers/iommu/iommufd/fault.c
> index 68ff94671d48..4934ae572638 100644
> --- a/drivers/iommu/iommufd/fault.c
> +++ b/drivers/iommu/iommufd/fault.c
> @@ -8,6 +8,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -15,6 +16,195 @@
>  #include "../iommu-priv.h"
>  #include "iommufd_private.h"
>
> +static int iommufd_fault_iopf_enable(struct iommufd_device *idev)
> +{
> +   struct device *dev = idev->dev;
> +   int ret;
> +
> +   /*
> +* Once we turn on PCI/PRI support for VF, the response failure code
> +* should not be forwarded to the hardware due to PRI being a shared
> +* resource between PF and VFs. There is no coordination for this
> +* shared capability. This waits for a vPRI reset to recover.
> +*/
> +   if (dev_is_pci(dev) && to_pci_dev(dev)->is_virtfn)
> +   return -EINVAL;

I am using the SMMUv3 stall feature, and need to forward this to hardware,
And now I am hacking to comment this check.
Any suggestions?

> +
> +   mutex_lock(&idev->iopf_lock);
> +   /* Device iopf has already been on. */
> +   if (++idev->iopf_enabled > 1) {
> +   mutex_unlock(&idev->iopf_lock);
> +   return 0;
> +   }
> +
> +   ret = iommu_dev_enable_feature(dev, IOMMU_DEV_FEAT_IOPF);
> +   if (ret)
> +   --idev->iopf_enabled;
> +   mutex_unlock(&idev->iopf_lock);

Also iommu_dev_enable_feature(idev->dev, IOMMU_DEV_FEAT_SVA); is required
In thinking how to add it properly.

Thanks



Re: [PATCH v8 07/10] iommufd: Fault-capable hwpt attach/detach/replace

2024-10-16 Thread Zhangfei Gao
On Wed, 16 Oct 2024 at 23:25, Jason Gunthorpe  wrote:
>
> On Wed, Oct 16, 2024 at 09:58:36AM +0800, Zhangfei Gao wrote:
> > On Tue, 15 Oct 2024 at 20:54, Jason Gunthorpe  wrote:
> > >
> > > On Tue, Oct 15, 2024 at 11:19:33AM +0800, Zhangfei Gao wrote:
> > > > > +static int iommufd_fault_iopf_enable(struct iommufd_device *idev)
> > > > > +{
> > > > > +   struct device *dev = idev->dev;
> > > > > +   int ret;
> > > > > +
> > > > > +   /*
> > > > > +* Once we turn on PCI/PRI support for VF, the response 
> > > > > failure code
> > > > > +* should not be forwarded to the hardware due to PRI being a 
> > > > > shared
> > > > > +* resource between PF and VFs. There is no coordination for 
> > > > > this
> > > > > +* shared capability. This waits for a vPRI reset to recover.
> > > > > +*/
> > > > > +   if (dev_is_pci(dev) && to_pci_dev(dev)->is_virtfn)
> > > > > +   return -EINVAL;
> > > >
> > > > I am using the SMMUv3 stall feature, and need to forward this to 
> > > > hardware,
> > > > And now I am hacking to comment this check.
> > > > Any suggestions?
> > >
> > > Are you using PCI SRIOV and stall together?
> >
> > Only use smmuv3 stall feature.
>
> Then isn't to_pci_dev(dev)->is_virtfn == false?
>
> That should only be true with SRIOV

Do you mean
if (dev_is_pci(dev) && to_pci_dev(dev)->is_virtfn == false)
return -EINVAL;

This is fine

>
> > > FEAT_SVA needs to be deleted, not added too.
> > >
> > > smmu-v3 needs some more fixing to move that
> > > arm_smmu_master_enable_sva() logic into domain attachment.
> >
> > Will think about this, Thanks Jason
>
> Can you test it if a patch is made?

Yes, sure.

Thanks



Re: [PATCH v8 07/10] iommufd: Fault-capable hwpt attach/detach/replace

2024-10-17 Thread Zhangfei Gao
On Thu, 17 Oct 2024 at 20:05, Jason Gunthorpe  wrote:
>
> On Thu, Oct 17, 2024 at 09:44:18AM +0800, Zhangfei Gao wrote:
> > On Wed, 16 Oct 2024 at 23:25, Jason Gunthorpe  wrote:
> > >
> > > On Wed, Oct 16, 2024 at 09:58:36AM +0800, Zhangfei Gao wrote:
> > > > On Tue, 15 Oct 2024 at 20:54, Jason Gunthorpe  wrote:
> > > > >
> > > > > On Tue, Oct 15, 2024 at 11:19:33AM +0800, Zhangfei Gao wrote:
> > > > > > > +static int iommufd_fault_iopf_enable(struct iommufd_device *idev)
> > > > > > > +{
> > > > > > > +   struct device *dev = idev->dev;
> > > > > > > +   int ret;
> > > > > > > +
> > > > > > > +   /*
> > > > > > > +* Once we turn on PCI/PRI support for VF, the response 
> > > > > > > failure code
> > > > > > > +* should not be forwarded to the hardware due to PRI 
> > > > > > > being a shared
> > > > > > > +* resource between PF and VFs. There is no coordination 
> > > > > > > for this
> > > > > > > +* shared capability. This waits for a vPRI reset to 
> > > > > > > recover.
> > > > > > > +*/
> > > > > > > +   if (dev_is_pci(dev) && to_pci_dev(dev)->is_virtfn)
> > > > > > > +   return -EINVAL;
> > > > > >
> > > > > > I am using the SMMUv3 stall feature, and need to forward this to 
> > > > > > hardware,
> > > > > > And now I am hacking to comment this check.
> > > > > > Any suggestions?
> > > > >
> > > > > Are you using PCI SRIOV and stall together?
> > > >
> > > > Only use smmuv3 stall feature.
Sorry, this is not correct

> > >
> > > Then isn't to_pci_dev(dev)->is_virtfn == false?
> > >
> > > That should only be true with SRIOV
> >
> > Do you mean
> > if (dev_is_pci(dev) && to_pci_dev(dev)->is_virtfn == false)
> > return -EINVAL;
> >
> > This is fine
>
> No, I mean on your test system you are not using SRIOV so all your PCI
> devices will have is_virtfn == false and the above if shouldn't be a
> problem. is_virtfn indicates the PCI device is a SRIOV VF.
>
> Your explanation for your problem doesn't really make sense, or there
> is something wrong someplace else to get a bogus is_virtfn..
>
> If you are doing SRIOV with stall, then that is understandable.

Yes, you are right
 I am using SRIOV vf and stall feature, so is_virtfn == true

Our ACC devices are fake pci endpoint devices which supports stall,
And they also supports sriov

So I have to ignore the limitation.

Thanks



Re: [PATCH v8 07/10] iommufd: Fault-capable hwpt attach/detach/replace

2024-10-17 Thread Zhangfei Gao
Hi, Baolu

On Fri, 18 Oct 2024 at 09:58, Baolu Lu  wrote:
>
> On 2024/10/17 21:08, Jason Gunthorpe wrote:
> > On Thu, Oct 17, 2024 at 08:35:24PM +0800, Zhangfei Gao wrote:
> >
> >> Yes, you are right
> >>   I am using SRIOV vf and stall feature, so is_virtfn == true
> >>
> >> Our ACC devices are fake pci endpoint devices which supports stall,
> >> And they also supports sriov
> >>
> >> So I have to ignore the limitation.
> > I see, so that is more complicated.
> >
> > Lu, what do you think about also checking if the PCI function has PRI
> > ? If not PRI assume the fault is special and doesn't follow PRI rules?
> >
> > Or maybe we can have the iommu driver tag the event as a PRI/not-PRI
> > fault?
>
> This limitation applies to PRI on PCI/SRIOV VFs because the PRI might be
> a shared resource and current iommu subsystem is not ready to support
> enabling/disabling PRI on a VF without any impact on others.
>
> In my understanding, it's fine to remove this limitation from the use
> case of non-PRI on SRIOV VFs. Perhaps something like below?
>
#include 
> if (dev_is_pci(dev)) {
> struct pci_dev *pdev = to_pci_dev(dev);
> if (pdev->is_virtfn && pci_pri_supported(pdev))
> return -EINVAL;
> }

Yes, this works on our platform.

Thanks Baolu.



Re: [PATCH v8 07/10] iommufd: Fault-capable hwpt attach/detach/replace

2024-10-23 Thread Zhangfei Gao
On Tue, 22 Oct 2024 at 22:53, Jason Gunthorpe  wrote:
>
> On Tue, Oct 22, 2024 at 10:30:10PM +0800, Zhangfei Gao wrote:
> > On Fri, 18 Oct 2024 at 21:53, Jason Gunthorpe  wrote:
> > >
> > > On Wed, Oct 16, 2024 at 12:25:03PM -0300, Jason Gunthorpe wrote:
> > > > > > smmu-v3 needs some more fixing to move that
> > > > > > arm_smmu_master_enable_sva() logic into domain attachment.
> > > > >
> > > > > Will think about this, Thanks Jason
> > > >
> > > > Can you test it if a patch is made?
> > >
> > > Here it is:
> > >
> > > https://github.com/jgunthorpe/linux/commits/smmuv3_nesting/
> > >
> > > fa1528253d2210 iommu: Remove IOMMU_DEV_FEAT_SVA
> > > 5675560a272cf5 iommu/vt-d: Check if SVA is supported when attaching the 
> > > SVA domain
> > > 94bc2b9525b508 iommu/arm-smmu-v3: Put iopf enablement in the domain 
> > > attach path
> > >
> > > Let me know..
> >
> > With these patches, do we still need ops->user_pasid_table?
>
> It makes no change - you need user_pasid_table to make
> IOMMU_DOMAIN_NESTED work.
>
> If you aren't using IOMMU_DOMAIN_NESTED you shouldn't need it.

OK, I misunderstood.

Then with user_pasid_table=1
both with these patches and without patches, user page fault is OK.

>
> > if (fault->prm.flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID) {
> > attach_handle = iommu_attach_handle_get(dev->iommu_group,
> > fault->prm.pasid, 0);
> >
> > // is attach_handle expected effect value here?
> > if (IS_ERR(attach_handle)) {
> > const struct iommu_ops *ops = dev_iommu_ops(dev);
> >
> > if (!ops->user_pasid_table)
> > return NULL;
> > /*
> >  * The iommu driver for this device supports user-
> >  * managed PASID table. Therefore page faults for
> >  * any PASID should go through the NESTING domain
> >  * attached to the device RID.
> >  */
> > attach_handle = iommu_attach_handle_get(
> > dev->iommu_group, IOMMU_NO_PASID,
> > IOMMU_DOMAIN_NESTED);
> >
> > Now I still need set ops->user_pasid_table, since attach_handle can not
> > return from the first iommu_attach_handle_get with fault->prm.pasid = 1,
> > but the second iommu_attach_handle_get with  IOMMU_NO_PASID,
> > suppose it is not expected?
>
> The second handle_get will always fail unless you are using
> IOMMU_DOMAIN_NESTED in userspace with iommufd.
>
> What testing are you doing exactly?

I am testing vsva based on Nico's iommufd_viommu_p2-v3 branch,
which requires IOMMU_DOMAIN_NESTED in user space with iommufd.

About the three patches
1. Tested host sva, OK
2. Simply tested vsva on guests, OK, will do more tests.

Thanks



Re: [PATCH v8 07/10] iommufd: Fault-capable hwpt attach/detach/replace

2024-10-22 Thread Zhangfei Gao
On Fri, 18 Oct 2024 at 21:53, Jason Gunthorpe  wrote:
>
> On Wed, Oct 16, 2024 at 12:25:03PM -0300, Jason Gunthorpe wrote:
> > > > smmu-v3 needs some more fixing to move that
> > > > arm_smmu_master_enable_sva() logic into domain attachment.
> > >
> > > Will think about this, Thanks Jason
> >
> > Can you test it if a patch is made?
>
> Here it is:
>
> https://github.com/jgunthorpe/linux/commits/smmuv3_nesting/
>
> fa1528253d2210 iommu: Remove IOMMU_DEV_FEAT_SVA
> 5675560a272cf5 iommu/vt-d: Check if SVA is supported when attaching the SVA 
> domain
> 94bc2b9525b508 iommu/arm-smmu-v3: Put iopf enablement in the domain attach 
> path
>
> Let me know..

With these patches, do we still need ops->user_pasid_table?

if (fault->prm.flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID) {
attach_handle = iommu_attach_handle_get(dev->iommu_group,
fault->prm.pasid, 0);

// is attach_handle expected effect value here?
if (IS_ERR(attach_handle)) {
const struct iommu_ops *ops = dev_iommu_ops(dev);

if (!ops->user_pasid_table)
return NULL;
/*
 * The iommu driver for this device supports user-
 * managed PASID table. Therefore page faults for
 * any PASID should go through the NESTING domain
 * attached to the device RID.
 */
attach_handle = iommu_attach_handle_get(
dev->iommu_group, IOMMU_NO_PASID,
IOMMU_DOMAIN_NESTED);

Now I still need set ops->user_pasid_table, since attach_handle can not
return from the first iommu_attach_handle_get with fault->prm.pasid = 1,
but the second iommu_attach_handle_get with  IOMMU_NO_PASID,
suppose it is not expected?

Thanks



Re: [PATCH v8 07/10] iommufd: Fault-capable hwpt attach/detach/replace

2024-10-28 Thread Zhangfei Gao
On Sun, 27 Oct 2024 at 22:26, Baolu Lu  wrote:

>
> Can you please make this change a formal patch by yourself? As I don't
> have hardware in hand, I'm not confident to accurately describe the
> requirement or verify the new version during the upstream process.
>

OK, how about this one

Subject: [PATCH] iommufd: modify iommufd_fault_iopf_enable limitation

iommufd_fault_iopf_enable has limitation to PRI on PCI/SRIOV VFs
because the PRI might be a shared resource and current iommu
subsystem is not ready to support enabling/disabling PRI on a VF
without any impact on others.

However, we have devices that appear as PCI but are actually on the
AMBA bus. These fake PCI devices have PASID capability, support
stall as well as SRIOV, so remove the limitation for these devices.

Signed-off-by: Zhangfei Gao 
Signed-off-by: Lu Baolu 
---
 drivers/iommu/iommufd/fault.c | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/iommufd/fault.c b/drivers/iommu/iommufd/fault.c
index bca956d496bd..8b3e34250dae 100644
--- a/drivers/iommu/iommufd/fault.c
+++ b/drivers/iommu/iommufd/fault.c
@@ -10,6 +10,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 

@@ -27,8 +28,12 @@ static int iommufd_fault_iopf_enable(struct
iommufd_device *idev)
 * resource between PF and VFs. There is no coordination for this
 * shared capability. This waits for a vPRI reset to recover.
 */
-   if (dev_is_pci(dev) && to_pci_dev(dev)->is_virtfn)
-   return -EINVAL;
+   if (dev_is_pci(dev)) {
+   struct pci_dev *pdev = to_pci_dev(dev);
+
+   if (pdev->is_virtfn && pci_pri_supported(pdev))
+   return -EINVAL;
+   }

mutex_lock(&idev->iopf_lock);
/* Device iopf has already been on. */
--

2.25.1



Re: [PATCH v8 07/10] iommufd: Fault-capable hwpt attach/detach/replace

2024-10-27 Thread Zhangfei Gao
Hi, Baolu

On Fri, 18 Oct 2024 at 10:45, Zhangfei Gao  wrote:
>
> Hi, Baolu
>
> On Fri, 18 Oct 2024 at 09:58, Baolu Lu  wrote:
> >
> > On 2024/10/17 21:08, Jason Gunthorpe wrote:
> > > On Thu, Oct 17, 2024 at 08:35:24PM +0800, Zhangfei Gao wrote:
> > >
> > >> Yes, you are right
> > >>   I am using SRIOV vf and stall feature, so is_virtfn == true
> > >>
> > >> Our ACC devices are fake pci endpoint devices which supports stall,
> > >> And they also supports sriov
> > >>
> > >> So I have to ignore the limitation.
> > > I see, so that is more complicated.
> > >
> > > Lu, what do you think about also checking if the PCI function has PRI
> > > ? If not PRI assume the fault is special and doesn't follow PRI rules?
> > >
> > > Or maybe we can have the iommu driver tag the event as a PRI/not-PRI
> > > fault?
> >
> > This limitation applies to PRI on PCI/SRIOV VFs because the PRI might be
> > a shared resource and current iommu subsystem is not ready to support
> > enabling/disabling PRI on a VF without any impact on others.
> >
> > In my understanding, it's fine to remove this limitation from the use
> > case of non-PRI on SRIOV VFs. Perhaps something like below?
> >
> #include 
> > if (dev_is_pci(dev)) {
> >     struct pci_dev *pdev = to_pci_dev(dev);
> > if (pdev->is_virtfn && pci_pri_supported(pdev))
> > return -EINVAL;
> > }
>
> Yes, this works on our platform.

Will you send this patch?

Tested-by: Zhangfei Gao 

Thanks



Re: [PATCH v8 07/10] iommufd: Fault-capable hwpt attach/detach/replace

2024-10-15 Thread Zhangfei Gao
On Tue, 15 Oct 2024 at 20:54, Jason Gunthorpe  wrote:
>
> On Tue, Oct 15, 2024 at 11:19:33AM +0800, Zhangfei Gao wrote:
> > > +static int iommufd_fault_iopf_enable(struct iommufd_device *idev)
> > > +{
> > > +   struct device *dev = idev->dev;
> > > +   int ret;
> > > +
> > > +   /*
> > > +* Once we turn on PCI/PRI support for VF, the response failure 
> > > code
> > > +* should not be forwarded to the hardware due to PRI being a 
> > > shared
> > > +* resource between PF and VFs. There is no coordination for this
> > > +* shared capability. This waits for a vPRI reset to recover.
> > > +*/
> > > +   if (dev_is_pci(dev) && to_pci_dev(dev)->is_virtfn)
> > > +   return -EINVAL;
> >
> > I am using the SMMUv3 stall feature, and need to forward this to hardware,
> > And now I am hacking to comment this check.
> > Any suggestions?
>
> Are you using PCI SRIOV and stall together?

Only use smmuv3 stall feature.

>
> > > +   mutex_lock(&idev->iopf_lock);
> > > +   /* Device iopf has already been on. */
> > > +   if (++idev->iopf_enabled > 1) {
> > > +   mutex_unlock(&idev->iopf_lock);
> > > +   return 0;
> > > +   }
> > > +
> > > +   ret = iommu_dev_enable_feature(dev, IOMMU_DEV_FEAT_IOPF);
> > > +   if (ret)
> > > +   --idev->iopf_enabled;
> > > +   mutex_unlock(&idev->iopf_lock);
> >
> > Also iommu_dev_enable_feature(idev->dev, IOMMU_DEV_FEAT_SVA); is required
> > In thinking how to add it properly.
>
> FEAT_SVA needs to be deleted, not added too.
>
> smmu-v3 needs some more fixing to move that
> arm_smmu_master_enable_sva() logic into domain attachment.

Will think about this, Thanks Jason

>
> Jason