commit: 319dec94ad287c417c860cff2ff3e704e7e78f95 Author: Mike Pagano <mpagano <AT> gentoo <DOT> org> AuthorDate: Sat Feb 1 23:08:10 2025 +0000 Commit: Mike Pagano <mpagano <AT> gentoo <DOT> org> CommitDate: Sat Feb 1 23:08:10 2025 +0000 URL: https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=319dec94
Linux patch 6.1.128 Signed-off-by: Mike Pagano <mpagano <AT> gentoo.org> 0000_README | 4 + 1127_linux-6.1.128.patch | 2126 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 2130 insertions(+) diff --git a/0000_README b/0000_README index 1c0a58fe..72037a74 100644 --- a/0000_README +++ b/0000_README @@ -555,6 +555,10 @@ Patch: 1126_linux-6.1.127.patch From: https://www.kernel.org Desc: Linux 6.1.127 +Patch: 1127_linux-6.1.128.patch +From: https://www.kernel.org +Desc: Linux 6.1.128 + Patch: 1500_XATTR_USER_PREFIX.patch From: https://bugs.gentoo.org/show_bug.cgi?id=470644 Desc: Support for namespace user.pax.* on tmpfs. diff --git a/1127_linux-6.1.128.patch b/1127_linux-6.1.128.patch new file mode 100644 index 00000000..c8c1343c --- /dev/null +++ b/1127_linux-6.1.128.patch @@ -0,0 +1,2126 @@ +diff --git a/Makefile b/Makefile +index 9c703cff00bb73..1fc69d1f5b8c49 100644 +--- a/Makefile ++++ b/Makefile +@@ -1,7 +1,7 @@ + # SPDX-License-Identifier: GPL-2.0 + VERSION = 6 + PATCHLEVEL = 1 +-SUBLEVEL = 127 ++SUBLEVEL = 128 + EXTRAVERSION = + NAME = Curry Ramen + +diff --git a/block/ioctl.c b/block/ioctl.c +index c7390d8c9fc737..552da0ccbec09e 100644 +--- a/block/ioctl.c ++++ b/block/ioctl.c +@@ -115,7 +115,7 @@ static int blk_ioctl_discard(struct block_device *bdev, fmode_t mode, + return -EINVAL; + + filemap_invalidate_lock(inode->i_mapping); +- err = truncate_bdev_range(bdev, mode, start, start + len - 1); ++ err = truncate_bdev_range(bdev, mode, start, end - 1); + if (err) + goto fail; + err = blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL); +@@ -127,7 +127,7 @@ static int blk_ioctl_discard(struct block_device *bdev, fmode_t mode, + static int blk_ioctl_secure_erase(struct block_device *bdev, fmode_t mode, + void __user *argp) + { +- uint64_t start, len; ++ uint64_t start, len, end; + uint64_t range[2]; + int err; + +@@ -142,11 +142,12 @@ static int blk_ioctl_secure_erase(struct block_device *bdev, fmode_t mode, + len = range[1]; + if ((start & 511) || (len & 511)) + return -EINVAL; +- if (start + len > bdev_nr_bytes(bdev)) ++ if (check_add_overflow(start, len, &end) || ++ end > bdev_nr_bytes(bdev)) + return -EINVAL; + + filemap_invalidate_lock(bdev->bd_inode->i_mapping); +- err = truncate_bdev_range(bdev, mode, start, start + len - 1); ++ err = truncate_bdev_range(bdev, mode, start, end - 1); + if (!err) + err = blkdev_issue_secure_erase(bdev, start >> 9, len >> 9, + GFP_KERNEL); +diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c +index 15b37a4163d318..f0e314abcafc53 100644 +--- a/drivers/base/regmap/regmap.c ++++ b/drivers/base/regmap/regmap.c +@@ -652,6 +652,17 @@ int regmap_attach_dev(struct device *dev, struct regmap *map, + } + EXPORT_SYMBOL_GPL(regmap_attach_dev); + ++static int dev_get_regmap_match(struct device *dev, void *res, void *data); ++ ++static int regmap_detach_dev(struct device *dev, struct regmap *map) ++{ ++ if (!dev) ++ return 0; ++ ++ return devres_release(dev, dev_get_regmap_release, ++ dev_get_regmap_match, (void *)map->name); ++} ++ + static enum regmap_endian regmap_get_reg_endian(const struct regmap_bus *bus, + const struct regmap_config *config) + { +@@ -1536,6 +1547,7 @@ void regmap_exit(struct regmap *map) + { + struct regmap_async *async; + ++ regmap_detach_dev(map->dev, map); + regcache_exit(map); + regmap_debugfs_exit(map); + regmap_range_exit(map); +diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c +index 3f32e9c3fbaf4c..8d7b2eee8c7c39 100644 +--- a/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c ++++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c +@@ -65,7 +65,8 @@ void dmub_hw_lock_mgr_inbox0_cmd(struct dc_dmub_srv *dmub_srv, + + bool should_use_dmub_lock(struct dc_link *link) + { +- if (link->psr_settings.psr_version == DC_PSR_VERSION_SU_1) ++ if (link->psr_settings.psr_version == DC_PSR_VERSION_SU_1 || ++ link->psr_settings.psr_version == DC_PSR_VERSION_1) + return true; + return false; + } +diff --git a/drivers/gpu/drm/v3d/v3d_irq.c b/drivers/gpu/drm/v3d/v3d_irq.c +index 76806039691a2c..b2d59a16869728 100644 +--- a/drivers/gpu/drm/v3d/v3d_irq.c ++++ b/drivers/gpu/drm/v3d/v3d_irq.c +@@ -102,8 +102,10 @@ v3d_irq(int irq, void *arg) + to_v3d_fence(v3d->bin_job->base.irq_fence); + + trace_v3d_bcl_irq(&v3d->drm, fence->seqno); +- dma_fence_signal(&fence->base); ++ + v3d->bin_job = NULL; ++ dma_fence_signal(&fence->base); ++ + status = IRQ_HANDLED; + } + +@@ -112,8 +114,10 @@ v3d_irq(int irq, void *arg) + to_v3d_fence(v3d->render_job->base.irq_fence); + + trace_v3d_rcl_irq(&v3d->drm, fence->seqno); +- dma_fence_signal(&fence->base); ++ + v3d->render_job = NULL; ++ dma_fence_signal(&fence->base); ++ + status = IRQ_HANDLED; + } + +@@ -122,8 +126,10 @@ v3d_irq(int irq, void *arg) + to_v3d_fence(v3d->csd_job->base.irq_fence); + + trace_v3d_csd_irq(&v3d->drm, fence->seqno); +- dma_fence_signal(&fence->base); ++ + v3d->csd_job = NULL; ++ dma_fence_signal(&fence->base); ++ + status = IRQ_HANDLED; + } + +@@ -159,8 +165,10 @@ v3d_hub_irq(int irq, void *arg) + to_v3d_fence(v3d->tfu_job->base.irq_fence); + + trace_v3d_tfu_irq(&v3d->drm, fence->seqno); +- dma_fence_signal(&fence->base); ++ + v3d->tfu_job = NULL; ++ dma_fence_signal(&fence->base); ++ + status = IRQ_HANDLED; + } + +diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h +index 790966e5b6ec45..d8c5e24e7d44c5 100644 +--- a/drivers/hid/hid-ids.h ++++ b/drivers/hid/hid-ids.h +@@ -507,7 +507,6 @@ + #define USB_DEVICE_ID_GENERAL_TOUCH_WIN8_PIT_E100 0xe100 + + #define I2C_VENDOR_ID_GOODIX 0x27c6 +-#define I2C_DEVICE_ID_GOODIX_01E0 0x01e0 + #define I2C_DEVICE_ID_GOODIX_01E8 0x01e8 + #define I2C_DEVICE_ID_GOODIX_01E9 0x01e9 + #define I2C_DEVICE_ID_GOODIX_01F0 0x01f0 +diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c +index bf9cad71125923..e62104e1a6038b 100644 +--- a/drivers/hid/hid-multitouch.c ++++ b/drivers/hid/hid-multitouch.c +@@ -1447,8 +1447,7 @@ static __u8 *mt_report_fixup(struct hid_device *hdev, __u8 *rdesc, + { + if (hdev->vendor == I2C_VENDOR_ID_GOODIX && + (hdev->product == I2C_DEVICE_ID_GOODIX_01E8 || +- hdev->product == I2C_DEVICE_ID_GOODIX_01E9 || +- hdev->product == I2C_DEVICE_ID_GOODIX_01E0)) { ++ hdev->product == I2C_DEVICE_ID_GOODIX_01E9)) { + if (rdesc[607] == 0x15) { + rdesc[607] = 0x25; + dev_info( +@@ -2073,10 +2072,7 @@ static const struct hid_device_id mt_devices[] = { + I2C_DEVICE_ID_GOODIX_01E8) }, + { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT_NSMU, + HID_DEVICE(BUS_I2C, HID_GROUP_ANY, I2C_VENDOR_ID_GOODIX, +- I2C_DEVICE_ID_GOODIX_01E9) }, +- { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT_NSMU, +- HID_DEVICE(BUS_I2C, HID_GROUP_ANY, I2C_VENDOR_ID_GOODIX, +- I2C_DEVICE_ID_GOODIX_01E0) }, ++ I2C_DEVICE_ID_GOODIX_01E8) }, + + /* GoodTouch panels */ + { .driver_data = MT_CLS_NSMU, +diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c +index 13c36f51b93533..872381221e754f 100644 +--- a/drivers/input/joystick/xpad.c ++++ b/drivers/input/joystick/xpad.c +@@ -145,6 +145,7 @@ static const struct xpad_device { + { 0x045e, 0x028e, "Microsoft X-Box 360 pad", 0, XTYPE_XBOX360 }, + { 0x045e, 0x028f, "Microsoft X-Box 360 pad v2", 0, XTYPE_XBOX360 }, + { 0x045e, 0x0291, "Xbox 360 Wireless Receiver (XBOX)", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360W }, ++ { 0x045e, 0x02a9, "Xbox 360 Wireless Receiver (Unofficial)", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360W }, + { 0x045e, 0x02d1, "Microsoft X-Box One pad", 0, XTYPE_XBOXONE }, + { 0x045e, 0x02dd, "Microsoft X-Box One pad (Firmware 2015)", 0, XTYPE_XBOXONE }, + { 0x045e, 0x02e3, "Microsoft X-Box One Elite pad", MAP_PADDLES, XTYPE_XBOXONE }, +@@ -366,6 +367,7 @@ static const struct xpad_device { + { 0x31e3, 0x1200, "Wooting Two", 0, XTYPE_XBOX360 }, + { 0x31e3, 0x1210, "Wooting Lekker", 0, XTYPE_XBOX360 }, + { 0x31e3, 0x1220, "Wooting Two HE", 0, XTYPE_XBOX360 }, ++ { 0x31e3, 0x1230, "Wooting Two HE (ARM)", 0, XTYPE_XBOX360 }, + { 0x31e3, 0x1300, "Wooting 60HE (AVR)", 0, XTYPE_XBOX360 }, + { 0x31e3, 0x1310, "Wooting 60HE (ARM)", 0, XTYPE_XBOX360 }, + { 0x3285, 0x0607, "Nacon GC-100", 0, XTYPE_XBOX360 }, +diff --git a/drivers/input/keyboard/atkbd.c b/drivers/input/keyboard/atkbd.c +index 661d6c8b059bf5..b3a856333d4e03 100644 +--- a/drivers/input/keyboard/atkbd.c ++++ b/drivers/input/keyboard/atkbd.c +@@ -89,7 +89,7 @@ static const unsigned short atkbd_set2_keycode[ATKBD_KEYMAP_SIZE] = { + 0, 46, 45, 32, 18, 5, 4, 95, 0, 57, 47, 33, 20, 19, 6,183, + 0, 49, 48, 35, 34, 21, 7,184, 0, 0, 50, 36, 22, 8, 9,185, + 0, 51, 37, 23, 24, 11, 10, 0, 0, 52, 53, 38, 39, 25, 12, 0, +- 0, 89, 40, 0, 26, 13, 0, 0, 58, 54, 28, 27, 0, 43, 0, 85, ++ 0, 89, 40, 0, 26, 13, 0,193, 58, 54, 28, 27, 0, 43, 0, 85, + 0, 86, 91, 90, 92, 0, 14, 94, 0, 79,124, 75, 71,121, 0, 0, + 82, 83, 80, 76, 77, 72, 1, 69, 87, 78, 81, 74, 55, 73, 70, 99, + +diff --git a/drivers/irqchip/irq-sunxi-nmi.c b/drivers/irqchip/irq-sunxi-nmi.c +index 21d49791f85527..83c7417611fa52 100644 +--- a/drivers/irqchip/irq-sunxi-nmi.c ++++ b/drivers/irqchip/irq-sunxi-nmi.c +@@ -187,7 +187,8 @@ static int __init sunxi_sc_nmi_irq_init(struct device_node *node, + gc->chip_types[0].chip.irq_unmask = irq_gc_mask_set_bit; + gc->chip_types[0].chip.irq_eoi = irq_gc_ack_set_bit; + gc->chip_types[0].chip.irq_set_type = sunxi_sc_nmi_set_type; +- gc->chip_types[0].chip.flags = IRQCHIP_EOI_THREADED | IRQCHIP_EOI_IF_HANDLED; ++ gc->chip_types[0].chip.flags = IRQCHIP_EOI_THREADED | IRQCHIP_EOI_IF_HANDLED | ++ IRQCHIP_SKIP_SET_WAKE; + gc->chip_types[0].regs.ack = reg_offs->pend; + gc->chip_types[0].regs.mask = reg_offs->enable; + gc->chip_types[0].regs.type = reg_offs->ctrl; +diff --git a/drivers/net/wireless/intel/iwlwifi/dvm/rs.c b/drivers/net/wireless/intel/iwlwifi/dvm/rs.c +index 687c906a9d727c..4b1f006c105b89 100644 +--- a/drivers/net/wireless/intel/iwlwifi/dvm/rs.c ++++ b/drivers/net/wireless/intel/iwlwifi/dvm/rs.c +@@ -2,7 +2,7 @@ + /****************************************************************************** + * + * Copyright(c) 2005 - 2014 Intel Corporation. All rights reserved. +- * Copyright (C) 2019 - 2020, 2022 Intel Corporation ++ * Copyright (C) 2019 - 2020, 2022 - 2023 Intel Corporation + *****************************************************************************/ + #include <linux/kernel.h> + #include <linux/skbuff.h> +@@ -125,7 +125,7 @@ static int iwl_hwrate_to_plcp_idx(u32 rate_n_flags) + return idx; + } + +- return -1; ++ return IWL_RATE_INVALID; + } + + static void rs_rate_scale_perform(struct iwl_priv *priv, +@@ -3146,7 +3146,10 @@ static ssize_t rs_sta_dbgfs_scale_table_read(struct file *file, + for (i = 0; i < LINK_QUAL_MAX_RETRY_NUM; i++) { + index = iwl_hwrate_to_plcp_idx( + le32_to_cpu(lq_sta->lq.rs_table[i].rate_n_flags)); +- if (is_legacy(tbl->lq_type)) { ++ if (index == IWL_RATE_INVALID) { ++ desc += sprintf(buff + desc, " rate[%d] 0x%X invalid rate\n", ++ i, le32_to_cpu(lq_sta->lq.rs_table[i].rate_n_flags)); ++ } else if (is_legacy(tbl->lq_type)) { + desc += sprintf(buff+desc, " rate[%d] 0x%X %smbps\n", + i, le32_to_cpu(lq_sta->lq.rs_table[i].rate_n_flags), + iwl_rate_mcs[index].mbps); +diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c +index 2be6801d48aca1..c5c24c6ffde3a4 100644 +--- a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c ++++ b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c +@@ -1072,10 +1072,13 @@ static void rs_get_lower_rate_down_column(struct iwl_lq_sta *lq_sta, + + rate->bw = RATE_MCS_CHAN_WIDTH_20; + +- WARN_ON_ONCE(rate->index < IWL_RATE_MCS_0_INDEX || +- rate->index > IWL_RATE_MCS_9_INDEX); ++ if (WARN_ON_ONCE(rate->index < IWL_RATE_MCS_0_INDEX)) ++ rate->index = rs_ht_to_legacy[IWL_RATE_MCS_0_INDEX]; ++ else if (WARN_ON_ONCE(rate->index > IWL_RATE_MCS_9_INDEX)) ++ rate->index = rs_ht_to_legacy[IWL_RATE_MCS_9_INDEX]; ++ else ++ rate->index = rs_ht_to_legacy[rate->index]; + +- rate->index = rs_ht_to_legacy[rate->index]; + rate->ldpc = false; + } else { + /* Downgrade to SISO with same MCS if in MIMO */ +diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c +index 49dbcd67579aa5..687487ea4fd3b6 100644 +--- a/drivers/scsi/scsi_transport_iscsi.c ++++ b/drivers/scsi/scsi_transport_iscsi.c +@@ -4102,7 +4102,7 @@ iscsi_if_rx(struct sk_buff *skb) + } + do { + /* +- * special case for GET_STATS: ++ * special case for GET_STATS, GET_CHAP and GET_HOST_STATS: + * on success - sending reply and stats from + * inside of if_recv_msg(), + * on error - fall through. +@@ -4111,6 +4111,8 @@ iscsi_if_rx(struct sk_buff *skb) + break; + if (ev->type == ISCSI_UEVENT_GET_CHAP && !err) + break; ++ if (ev->type == ISCSI_UEVENT_GET_HOST_STATS && !err) ++ break; + err = iscsi_if_send_reply(portid, nlh->nlmsg_type, + ev, sizeof(*ev)); + if (err == -EAGAIN && --retries < 0) { +diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c +index 0685cbe7f0ebae..d47adab00f04a5 100644 +--- a/drivers/scsi/storvsc_drv.c ++++ b/drivers/scsi/storvsc_drv.c +@@ -171,6 +171,12 @@ do { \ + dev_warn(&(dev)->device, fmt, ##__VA_ARGS__); \ + } while (0) + ++#define storvsc_log_ratelimited(dev, level, fmt, ...) \ ++do { \ ++ if (do_logging(level)) \ ++ dev_warn_ratelimited(&(dev)->device, fmt, ##__VA_ARGS__); \ ++} while (0) ++ + struct vmscsi_request { + u16 length; + u8 srb_status; +@@ -1168,7 +1174,7 @@ static void storvsc_on_io_completion(struct storvsc_device *stor_device, + int loglevel = (stor_pkt->vm_srb.cdb[0] == TEST_UNIT_READY) ? + STORVSC_LOGGING_WARN : STORVSC_LOGGING_ERROR; + +- storvsc_log(device, loglevel, ++ storvsc_log_ratelimited(device, loglevel, + "tag#%d cmd 0x%x status: scsi 0x%x srb 0x%x hv 0x%x\n", + scsi_cmd_to_rq(request->cmd)->tag, + stor_pkt->vm_srb.cdb[0], +diff --git a/drivers/usb/gadget/function/u_serial.c b/drivers/usb/gadget/function/u_serial.c +index 35f1b8a0b506fc..8802dd53fbea9f 100644 +--- a/drivers/usb/gadget/function/u_serial.c ++++ b/drivers/usb/gadget/function/u_serial.c +@@ -1393,10 +1393,6 @@ void gserial_disconnect(struct gserial *gser) + /* REVISIT as above: how best to track this? */ + port->port_line_coding = gser->port_line_coding; + +- /* disable endpoints, aborting down any active I/O */ +- usb_ep_disable(gser->out); +- usb_ep_disable(gser->in); +- + port->port_usb = NULL; + gser->ioport = NULL; + if (port->port.count > 0) { +@@ -1408,6 +1404,10 @@ void gserial_disconnect(struct gserial *gser) + spin_unlock(&port->port_lock); + spin_unlock_irqrestore(&serial_port_lock, flags); + ++ /* disable endpoints, aborting down any active I/O */ ++ usb_ep_disable(gser->out); ++ usb_ep_disable(gser->in); ++ + /* finally, free any unused/unusable I/O buffers */ + spin_lock_irqsave(&port->port_lock, flags); + if (port->port.count == 0) +diff --git a/drivers/usb/serial/quatech2.c b/drivers/usb/serial/quatech2.c +index 6fca40ace83a01..995313f4687ad9 100644 +--- a/drivers/usb/serial/quatech2.c ++++ b/drivers/usb/serial/quatech2.c +@@ -511,7 +511,7 @@ static void qt2_process_read_urb(struct urb *urb) + + newport = *(ch + 3); + +- if (newport > serial->num_ports) { ++ if (newport >= serial->num_ports) { + dev_err(&port->dev, + "%s - port change to invalid port: %i\n", + __func__, newport); +diff --git a/drivers/vfio/platform/vfio_platform_common.c b/drivers/vfio/platform/vfio_platform_common.c +index 1a0a238ffa3543..af432b5b11ef39 100644 +--- a/drivers/vfio/platform/vfio_platform_common.c ++++ b/drivers/vfio/platform/vfio_platform_common.c +@@ -391,6 +391,11 @@ static ssize_t vfio_platform_read_mmio(struct vfio_platform_region *reg, + { + unsigned int done = 0; + ++ if (off >= reg->size) ++ return -EINVAL; ++ ++ count = min_t(size_t, count, reg->size - off); ++ + if (!reg->ioaddr) { + reg->ioaddr = + ioremap(reg->addr, reg->size); +@@ -470,6 +475,11 @@ static ssize_t vfio_platform_write_mmio(struct vfio_platform_region *reg, + { + unsigned int done = 0; + ++ if (off >= reg->size) ++ return -EINVAL; ++ ++ count = min_t(size_t, count, reg->size - off); ++ + if (!reg->ioaddr) { + reg->ioaddr = + ioremap(reg->addr, reg->size); +diff --git a/fs/ext4/super.c b/fs/ext4/super.c +index 0b2591c071665c..53f1deb049ec12 100644 +--- a/fs/ext4/super.c ++++ b/fs/ext4/super.c +@@ -5264,6 +5264,8 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) + INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ + mutex_init(&sbi->s_orphan_lock); + ++ spin_lock_init(&sbi->s_bdev_wb_lock); ++ + ext4_fast_commit_init(sb); + + sb->s_root = NULL; +@@ -5514,7 +5516,6 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) + * Save the original bdev mapping's wb_err value which could be + * used to detect the metadata async write error. + */ +- spin_lock_init(&sbi->s_bdev_wb_lock); + errseq_check_and_advance(&sb->s_bdev->bd_inode->i_mapping->wb_err, + &sbi->s_bdev_wb_err); + sb->s_bdev->bd_super = sb; +diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c +index c367f1678d5dc1..8b564c68587100 100644 +--- a/fs/gfs2/file.c ++++ b/fs/gfs2/file.c +@@ -250,6 +250,7 @@ static int do_gfs2_set_flags(struct inode *inode, u32 reqflags, u32 mask) + error = filemap_fdatawait(inode->i_mapping); + if (error) + goto out; ++ truncate_inode_pages(inode->i_mapping, 0); + if (new_flags & GFS2_DIF_JDATA) + gfs2_ordered_del_inode(ip); + } +diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c +index 62935d61192aef..4b9cd9893ac61b 100644 +--- a/fs/smb/client/smb2ops.c ++++ b/fs/smb/client/smb2ops.c +@@ -4488,7 +4488,7 @@ smb2_get_enc_key(struct TCP_Server_Info *server, __u64 ses_id, int enc, u8 *key) + */ + static int + crypt_message(struct TCP_Server_Info *server, int num_rqst, +- struct smb_rqst *rqst, int enc) ++ struct smb_rqst *rqst, int enc, struct crypto_aead *tfm) + { + struct smb2_transform_hdr *tr_hdr = + (struct smb2_transform_hdr *)rqst[0].rq_iov[0].iov_base; +@@ -4499,8 +4499,6 @@ crypt_message(struct TCP_Server_Info *server, int num_rqst, + u8 key[SMB3_ENC_DEC_KEY_SIZE]; + struct aead_request *req; + u8 *iv; +- DECLARE_CRYPTO_WAIT(wait); +- struct crypto_aead *tfm; + unsigned int crypt_len = le32_to_cpu(tr_hdr->OriginalMessageSize); + void *creq; + +@@ -4511,14 +4509,6 @@ crypt_message(struct TCP_Server_Info *server, int num_rqst, + return rc; + } + +- rc = smb3_crypto_aead_allocate(server); +- if (rc) { +- cifs_server_dbg(VFS, "%s: crypto alloc failed\n", __func__); +- return rc; +- } +- +- tfm = enc ? server->secmech.enc : server->secmech.dec; +- + if ((server->cipher_type == SMB2_ENCRYPTION_AES256_CCM) || + (server->cipher_type == SMB2_ENCRYPTION_AES256_GCM)) + rc = crypto_aead_setkey(tfm, key, SMB3_GCM256_CRYPTKEY_SIZE); +@@ -4557,11 +4547,7 @@ crypt_message(struct TCP_Server_Info *server, int num_rqst, + aead_request_set_crypt(req, sg, sg, crypt_len, iv); + aead_request_set_ad(req, assoc_data_len); + +- aead_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, +- crypto_req_done, &wait); +- +- rc = crypto_wait_req(enc ? crypto_aead_encrypt(req) +- : crypto_aead_decrypt(req), &wait); ++ rc = enc ? crypto_aead_encrypt(req) : crypto_aead_decrypt(req); + + if (!rc && enc) + memcpy(&tr_hdr->Signature, sign, SMB2_SIGNATURE_SIZE); +@@ -4650,7 +4636,7 @@ smb3_init_transform_rq(struct TCP_Server_Info *server, int num_rqst, + /* fill the 1st iov with a transform header */ + fill_transform_hdr(tr_hdr, orig_len, old_rq, server->cipher_type); + +- rc = crypt_message(server, num_rqst, new_rq, 1); ++ rc = crypt_message(server, num_rqst, new_rq, 1, server->secmech.enc); + cifs_dbg(FYI, "Encrypt message returned %d\n", rc); + if (rc) + goto err_free; +@@ -4676,8 +4662,9 @@ decrypt_raw_data(struct TCP_Server_Info *server, char *buf, + unsigned int npages, unsigned int page_data_size, + bool is_offloaded) + { +- struct kvec iov[2]; ++ struct crypto_aead *tfm; + struct smb_rqst rqst = {NULL}; ++ struct kvec iov[2]; + int rc; + + iov[0].iov_base = buf; +@@ -4692,9 +4679,31 @@ decrypt_raw_data(struct TCP_Server_Info *server, char *buf, + rqst.rq_pagesz = PAGE_SIZE; + rqst.rq_tailsz = (page_data_size % PAGE_SIZE) ? : PAGE_SIZE; + +- rc = crypt_message(server, 1, &rqst, 0); ++ if (is_offloaded) { ++ if ((server->cipher_type == SMB2_ENCRYPTION_AES128_GCM) || ++ (server->cipher_type == SMB2_ENCRYPTION_AES256_GCM)) ++ tfm = crypto_alloc_aead("gcm(aes)", 0, 0); ++ else ++ tfm = crypto_alloc_aead("ccm(aes)", 0, 0); ++ if (IS_ERR(tfm)) { ++ rc = PTR_ERR(tfm); ++ cifs_server_dbg(VFS, "%s: Failed alloc decrypt TFM, rc=%d\n", __func__, rc); ++ ++ return rc; ++ } ++ } else { ++ if (unlikely(!server->secmech.dec)) ++ return -EIO; ++ ++ tfm = server->secmech.dec; ++ } ++ ++ rc = crypt_message(server, 1, &rqst, 0, tfm); + cifs_dbg(FYI, "Decrypt message returned %d\n", rc); + ++ if (is_offloaded) ++ crypto_free_aead(tfm); ++ + if (rc) + return rc; + +diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c +index 9975711236b26c..bfe7b03307d45e 100644 +--- a/fs/smb/client/smb2pdu.c ++++ b/fs/smb/client/smb2pdu.c +@@ -1070,7 +1070,9 @@ SMB2_negotiate(const unsigned int xid, + * SMB3.0 supports only 1 cipher and doesn't have a encryption neg context + * Set the cipher type manually. + */ +- if (server->dialect == SMB30_PROT_ID && (server->capabilities & SMB2_GLOBAL_CAP_ENCRYPTION)) ++ if ((server->dialect == SMB30_PROT_ID || ++ server->dialect == SMB302_PROT_ID) && ++ (server->capabilities & SMB2_GLOBAL_CAP_ENCRYPTION)) + server->cipher_type = SMB2_ENCRYPTION_AES128_CCM; + + security_blob = smb2_get_data_area_len(&blob_offset, &blob_length, +@@ -1105,6 +1107,12 @@ SMB2_negotiate(const unsigned int xid, + else + cifs_server_dbg(VFS, "Missing expected negotiate contexts\n"); + } ++ ++ if (server->cipher_type && !rc) { ++ rc = smb3_crypto_aead_allocate(server); ++ if (rc) ++ cifs_server_dbg(VFS, "%s: crypto alloc failed, rc=%d\n", __func__, rc); ++ } + neg_exit: + free_rsp_buf(resp_buftype, rsp); + return rc; +diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c +index 8bb024b06b9563..74d039bdc9f707 100644 +--- a/fs/xfs/libxfs/xfs_alloc.c ++++ b/fs/xfs/libxfs/xfs_alloc.c +@@ -2273,16 +2273,37 @@ xfs_alloc_min_freelist( + + ASSERT(mp->m_alloc_maxlevels > 0); + ++ /* ++ * For a btree shorter than the maximum height, the worst case is that ++ * every level gets split and a new level is added, then while inserting ++ * another entry to refill the AGFL, every level under the old root gets ++ * split again. This is: ++ * ++ * (full height split reservation) + (AGFL refill split height) ++ * = (current height + 1) + (current height - 1) ++ * = (new height) + (new height - 2) ++ * = 2 * new height - 2 ++ * ++ * For a btree of maximum height, the worst case is that every level ++ * under the root gets split, then while inserting another entry to ++ * refill the AGFL, every level under the root gets split again. This is ++ * also: ++ * ++ * 2 * (current height - 1) ++ * = 2 * (new height - 1) ++ * = 2 * new height - 2 ++ */ ++ + /* space needed by-bno freespace btree */ + min_free = min_t(unsigned int, levels[XFS_BTNUM_BNOi] + 1, +- mp->m_alloc_maxlevels); ++ mp->m_alloc_maxlevels) * 2 - 2; + /* space needed by-size freespace btree */ + min_free += min_t(unsigned int, levels[XFS_BTNUM_CNTi] + 1, +- mp->m_alloc_maxlevels); ++ mp->m_alloc_maxlevels) * 2 - 2; + /* space needed reverse mapping used space btree */ + if (xfs_has_rmapbt(mp)) + min_free += min_t(unsigned int, levels[XFS_BTNUM_RMAPi] + 1, +- mp->m_rmap_maxlevels); ++ mp->m_rmap_maxlevels) * 2 - 2; + + return min_free; + } +diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c +index 9dc33cdc2ab9ca..27d3121e6da9e8 100644 +--- a/fs/xfs/libxfs/xfs_bmap.c ++++ b/fs/xfs/libxfs/xfs_bmap.c +@@ -4807,7 +4807,7 @@ xfs_bmap_del_extent_delay( + ASSERT(got_endoff >= del_endoff); + + if (isrt) { +- uint64_t rtexts = XFS_FSB_TO_B(mp, del->br_blockcount); ++ uint64_t rtexts = del->br_blockcount; + + do_div(rtexts, mp->m_sb.sb_rextsize); + xfs_mod_frextents(mp, rtexts); +@@ -5037,33 +5037,20 @@ xfs_bmap_del_extent_real( + + flags = XFS_ILOG_CORE; + if (whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip)) { +- xfs_filblks_t len; +- xfs_extlen_t mod; +- +- len = div_u64_rem(del->br_blockcount, mp->m_sb.sb_rextsize, +- &mod); +- ASSERT(mod == 0); +- + if (!(bflags & XFS_BMAPI_REMAP)) { +- xfs_fsblock_t bno; +- +- bno = div_u64_rem(del->br_startblock, +- mp->m_sb.sb_rextsize, &mod); +- ASSERT(mod == 0); +- +- error = xfs_rtfree_extent(tp, bno, (xfs_extlen_t)len); ++ error = xfs_rtfree_blocks(tp, del->br_startblock, ++ del->br_blockcount); + if (error) + goto done; + } + + do_fx = 0; +- nblks = len * mp->m_sb.sb_rextsize; + qfield = XFS_TRANS_DQ_RTBCOUNT; + } else { + do_fx = 1; +- nblks = del->br_blockcount; + qfield = XFS_TRANS_DQ_BCOUNT; + } ++ nblks = del->br_blockcount; + + del_endblock = del->br_startblock + del->br_blockcount; + if (cur) { +diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c +index 5a321b78339863..92470ed3fcbd59 100644 +--- a/fs/xfs/libxfs/xfs_defer.c ++++ b/fs/xfs/libxfs/xfs_defer.c +@@ -245,21 +245,18 @@ xfs_defer_create_intents( + return ret; + } + +-/* Abort all the intents that were committed. */ + STATIC void +-xfs_defer_trans_abort( +- struct xfs_trans *tp, +- struct list_head *dop_pending) ++xfs_defer_pending_abort( ++ struct xfs_mount *mp, ++ struct list_head *dop_list) + { + struct xfs_defer_pending *dfp; + const struct xfs_defer_op_type *ops; + +- trace_xfs_defer_trans_abort(tp, _RET_IP_); +- + /* Abort intent items that don't have a done item. */ +- list_for_each_entry(dfp, dop_pending, dfp_list) { ++ list_for_each_entry(dfp, dop_list, dfp_list) { + ops = defer_op_types[dfp->dfp_type]; +- trace_xfs_defer_pending_abort(tp->t_mountp, dfp); ++ trace_xfs_defer_pending_abort(mp, dfp); + if (dfp->dfp_intent && !dfp->dfp_done) { + ops->abort_intent(dfp->dfp_intent); + dfp->dfp_intent = NULL; +@@ -267,6 +264,16 @@ xfs_defer_trans_abort( + } + } + ++/* Abort all the intents that were committed. */ ++STATIC void ++xfs_defer_trans_abort( ++ struct xfs_trans *tp, ++ struct list_head *dop_pending) ++{ ++ trace_xfs_defer_trans_abort(tp, _RET_IP_); ++ xfs_defer_pending_abort(tp->t_mountp, dop_pending); ++} ++ + /* + * Capture resources that the caller said not to release ("held") when the + * transaction commits. Caller is responsible for zero-initializing @dres. +@@ -754,12 +761,13 @@ xfs_defer_ops_capture( + + /* Release all resources that we used to capture deferred ops. */ + void +-xfs_defer_ops_capture_free( ++xfs_defer_ops_capture_abort( + struct xfs_mount *mp, + struct xfs_defer_capture *dfc) + { + unsigned short i; + ++ xfs_defer_pending_abort(mp, &dfc->dfc_dfops); + xfs_defer_cancel_list(mp, &dfc->dfc_dfops); + + for (i = 0; i < dfc->dfc_held.dr_bufs; i++) +@@ -800,7 +808,7 @@ xfs_defer_ops_capture_and_commit( + /* Commit the transaction and add the capture structure to the list. */ + error = xfs_trans_commit(tp); + if (error) { +- xfs_defer_ops_capture_free(mp, dfc); ++ xfs_defer_ops_capture_abort(mp, dfc); + return error; + } + +diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h +index 114a3a4930a3c4..8788ad5f6a731f 100644 +--- a/fs/xfs/libxfs/xfs_defer.h ++++ b/fs/xfs/libxfs/xfs_defer.h +@@ -121,7 +121,7 @@ int xfs_defer_ops_capture_and_commit(struct xfs_trans *tp, + struct list_head *capture_list); + void xfs_defer_ops_continue(struct xfs_defer_capture *d, struct xfs_trans *tp, + struct xfs_defer_resources *dres); +-void xfs_defer_ops_capture_free(struct xfs_mount *mp, ++void xfs_defer_ops_capture_abort(struct xfs_mount *mp, + struct xfs_defer_capture *d); + void xfs_defer_resources_rele(struct xfs_defer_resources *dres); + +diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c +index 758aacd8166bfd..601b05ca5fc2f7 100644 +--- a/fs/xfs/libxfs/xfs_inode_buf.c ++++ b/fs/xfs/libxfs/xfs_inode_buf.c +@@ -507,6 +507,9 @@ xfs_dinode_verify( + if (mode && nextents + naextents > nblocks) + return __this_address; + ++ if (nextents + naextents == 0 && nblocks != 0) ++ return __this_address; ++ + if (S_ISDIR(mode) && nextents > mp->m_dir_geo->max_extents) + return __this_address; + +diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c +index fa180ab66b73a7..655108a4cd05d7 100644 +--- a/fs/xfs/libxfs/xfs_rtbitmap.c ++++ b/fs/xfs/libxfs/xfs_rtbitmap.c +@@ -1005,6 +1005,39 @@ xfs_rtfree_extent( + return 0; + } + ++/* ++ * Free some blocks in the realtime subvolume. rtbno and rtlen are in units of ++ * rt blocks, not rt extents; must be aligned to the rt extent size; and rtlen ++ * cannot exceed XFS_MAX_BMBT_EXTLEN. ++ */ ++int ++xfs_rtfree_blocks( ++ struct xfs_trans *tp, ++ xfs_fsblock_t rtbno, ++ xfs_filblks_t rtlen) ++{ ++ struct xfs_mount *mp = tp->t_mountp; ++ xfs_rtblock_t bno; ++ xfs_filblks_t len; ++ xfs_extlen_t mod; ++ ++ ASSERT(rtlen <= XFS_MAX_BMBT_EXTLEN); ++ ++ len = div_u64_rem(rtlen, mp->m_sb.sb_rextsize, &mod); ++ if (mod) { ++ ASSERT(mod == 0); ++ return -EIO; ++ } ++ ++ bno = div_u64_rem(rtbno, mp->m_sb.sb_rextsize, &mod); ++ if (mod) { ++ ASSERT(mod == 0); ++ return -EIO; ++ } ++ ++ return xfs_rtfree_extent(tp, bno, len); ++} ++ + /* Find all the free records within a given range. */ + int + xfs_rtalloc_query_range( +diff --git a/fs/xfs/libxfs/xfs_sb.h b/fs/xfs/libxfs/xfs_sb.h +index a5e14740ec9ac3..19134b23c10be3 100644 +--- a/fs/xfs/libxfs/xfs_sb.h ++++ b/fs/xfs/libxfs/xfs_sb.h +@@ -25,7 +25,7 @@ extern uint64_t xfs_sb_version_to_features(struct xfs_sb *sbp); + + extern int xfs_update_secondary_sbs(struct xfs_mount *mp); + +-#define XFS_FS_GEOM_MAX_STRUCT_VER (4) ++#define XFS_FS_GEOM_MAX_STRUCT_VER (5) + extern void xfs_fs_geometry(struct xfs_mount *mp, struct xfs_fsop_geom *geo, + int struct_version); + extern int xfs_sb_read_secondary(struct xfs_mount *mp, +diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c +index ce8e17ab543466..468bb61a5e46df 100644 +--- a/fs/xfs/xfs_bmap_util.c ++++ b/fs/xfs/xfs_bmap_util.c +@@ -780,12 +780,10 @@ xfs_alloc_file_space( + { + xfs_mount_t *mp = ip->i_mount; + xfs_off_t count; +- xfs_filblks_t allocated_fsb; + xfs_filblks_t allocatesize_fsb; + xfs_extlen_t extsz, temp; + xfs_fileoff_t startoffset_fsb; + xfs_fileoff_t endoffset_fsb; +- int nimaps; + int rt; + xfs_trans_t *tp; + xfs_bmbt_irec_t imaps[1], *imapp; +@@ -808,7 +806,6 @@ xfs_alloc_file_space( + + count = len; + imapp = &imaps[0]; +- nimaps = 1; + startoffset_fsb = XFS_B_TO_FSBT(mp, offset); + endoffset_fsb = XFS_B_TO_FSB(mp, offset + count); + allocatesize_fsb = endoffset_fsb - startoffset_fsb; +@@ -819,6 +816,7 @@ xfs_alloc_file_space( + while (allocatesize_fsb && !error) { + xfs_fileoff_t s, e; + unsigned int dblocks, rblocks, resblks; ++ int nimaps = 1; + + /* + * Determine space reservations for data/realtime. +@@ -884,15 +882,19 @@ xfs_alloc_file_space( + if (error) + break; + +- allocated_fsb = imapp->br_blockcount; +- +- if (nimaps == 0) { +- error = -ENOSPC; +- break; ++ /* ++ * If the allocator cannot find a single free extent large ++ * enough to cover the start block of the requested range, ++ * xfs_bmapi_write will return 0 but leave *nimaps set to 0. ++ * ++ * In that case we simply need to keep looping with the same ++ * startoffset_fsb so that one of the following allocations ++ * will eventually reach the requested range. ++ */ ++ if (nimaps) { ++ startoffset_fsb += imapp->br_blockcount; ++ allocatesize_fsb -= imapp->br_blockcount; + } +- +- startoffset_fsb += allocated_fsb; +- allocatesize_fsb -= allocated_fsb; + } + + return error; +diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c +index 7f071757f27857..a8b2f3b278ea5e 100644 +--- a/fs/xfs/xfs_dquot.c ++++ b/fs/xfs/xfs_dquot.c +@@ -562,7 +562,8 @@ xfs_dquot_from_disk( + struct xfs_dquot *dqp, + struct xfs_buf *bp) + { +- struct xfs_disk_dquot *ddqp = bp->b_addr + dqp->q_bufoffset; ++ struct xfs_dqblk *dqb = xfs_buf_offset(bp, dqp->q_bufoffset); ++ struct xfs_disk_dquot *ddqp = &dqb->dd_diskdq; + + /* + * Ensure that we got the type and ID we were looking for. +@@ -1250,7 +1251,7 @@ xfs_qm_dqflush( + } + + /* Flush the incore dquot to the ondisk buffer. */ +- dqblk = bp->b_addr + dqp->q_bufoffset; ++ dqblk = xfs_buf_offset(bp, dqp->q_bufoffset); + xfs_dquot_to_disk(&dqblk->dd_diskdq, dqp); + + /* +diff --git a/fs/xfs/xfs_dquot_item_recover.c b/fs/xfs/xfs_dquot_item_recover.c +index 8966ba842395bf..2c2720ce692382 100644 +--- a/fs/xfs/xfs_dquot_item_recover.c ++++ b/fs/xfs/xfs_dquot_item_recover.c +@@ -19,6 +19,7 @@ + #include "xfs_log.h" + #include "xfs_log_priv.h" + #include "xfs_log_recover.h" ++#include "xfs_error.h" + + STATIC void + xlog_recover_dquot_ra_pass2( +@@ -65,6 +66,7 @@ xlog_recover_dquot_commit_pass2( + { + struct xfs_mount *mp = log->l_mp; + struct xfs_buf *bp; ++ struct xfs_dqblk *dqb; + struct xfs_disk_dquot *ddq, *recddq; + struct xfs_dq_logformat *dq_f; + xfs_failaddr_t fa; +@@ -130,14 +132,14 @@ xlog_recover_dquot_commit_pass2( + return error; + + ASSERT(bp); +- ddq = xfs_buf_offset(bp, dq_f->qlf_boffset); ++ dqb = xfs_buf_offset(bp, dq_f->qlf_boffset); ++ ddq = &dqb->dd_diskdq; + + /* + * If the dquot has an LSN in it, recover the dquot only if it's less + * than the lsn of the transaction we are replaying. + */ + if (xfs_has_crc(mp)) { +- struct xfs_dqblk *dqb = (struct xfs_dqblk *)ddq; + xfs_lsn_t lsn = be64_to_cpu(dqb->dd_lsn); + + if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) { +@@ -147,10 +149,23 @@ xlog_recover_dquot_commit_pass2( + + memcpy(ddq, recddq, item->ri_buf[1].i_len); + if (xfs_has_crc(mp)) { +- xfs_update_cksum((char *)ddq, sizeof(struct xfs_dqblk), ++ xfs_update_cksum((char *)dqb, sizeof(struct xfs_dqblk), + XFS_DQUOT_CRC_OFF); + } + ++ /* Validate the recovered dquot. */ ++ fa = xfs_dqblk_verify(log->l_mp, dqb, dq_f->qlf_id); ++ if (fa) { ++ XFS_CORRUPTION_ERROR("Bad dquot after recovery", ++ XFS_ERRLEVEL_LOW, mp, dqb, ++ sizeof(struct xfs_dqblk)); ++ xfs_alert(mp, ++ "Metadata corruption detected at %pS, dquot 0x%x", ++ fa, dq_f->qlf_id); ++ error = -EFSCORRUPTED; ++ goto out_release; ++ } ++ + ASSERT(dq_f->qlf_size == 2); + ASSERT(bp->b_mount == mp); + bp->b_flags |= _XBF_LOGRECOVERY; +diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c +index 8de40cf63a5bfd..821cb86a83bd57 100644 +--- a/fs/xfs/xfs_file.c ++++ b/fs/xfs/xfs_file.c +@@ -214,6 +214,43 @@ xfs_ilock_iocb( + return 0; + } + ++static int ++xfs_ilock_iocb_for_write( ++ struct kiocb *iocb, ++ unsigned int *lock_mode) ++{ ++ ssize_t ret; ++ struct xfs_inode *ip = XFS_I(file_inode(iocb->ki_filp)); ++ ++ ret = xfs_ilock_iocb(iocb, *lock_mode); ++ if (ret) ++ return ret; ++ ++ if (*lock_mode == XFS_IOLOCK_EXCL) ++ return 0; ++ if (!xfs_iflags_test(ip, XFS_IREMAPPING)) ++ return 0; ++ ++ xfs_iunlock(ip, *lock_mode); ++ *lock_mode = XFS_IOLOCK_EXCL; ++ return xfs_ilock_iocb(iocb, *lock_mode); ++} ++ ++static unsigned int ++xfs_ilock_for_write_fault( ++ struct xfs_inode *ip) ++{ ++ /* get a shared lock if no remapping in progress */ ++ xfs_ilock(ip, XFS_MMAPLOCK_SHARED); ++ if (!xfs_iflags_test(ip, XFS_IREMAPPING)) ++ return XFS_MMAPLOCK_SHARED; ++ ++ /* wait for remapping to complete */ ++ xfs_iunlock(ip, XFS_MMAPLOCK_SHARED); ++ xfs_ilock(ip, XFS_MMAPLOCK_EXCL); ++ return XFS_MMAPLOCK_EXCL; ++} ++ + STATIC ssize_t + xfs_file_dio_read( + struct kiocb *iocb, +@@ -523,7 +560,7 @@ xfs_file_dio_write_aligned( + unsigned int iolock = XFS_IOLOCK_SHARED; + ssize_t ret; + +- ret = xfs_ilock_iocb(iocb, iolock); ++ ret = xfs_ilock_iocb_for_write(iocb, &iolock); + if (ret) + return ret; + ret = xfs_file_write_checks(iocb, from, &iolock); +@@ -590,7 +627,7 @@ xfs_file_dio_write_unaligned( + flags = IOMAP_DIO_FORCE_WAIT; + } + +- ret = xfs_ilock_iocb(iocb, iolock); ++ ret = xfs_ilock_iocb_for_write(iocb, &iolock); + if (ret) + return ret; + +@@ -1158,7 +1195,7 @@ xfs_file_remap_range( + if (xfs_file_sync_writes(file_in) || xfs_file_sync_writes(file_out)) + xfs_log_force_inode(dest); + out_unlock: +- xfs_iunlock2_io_mmap(src, dest); ++ xfs_iunlock2_remapping(src, dest); + if (ret) + trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_); + /* +@@ -1313,6 +1350,7 @@ __xfs_filemap_fault( + struct inode *inode = file_inode(vmf->vma->vm_file); + struct xfs_inode *ip = XFS_I(inode); + vm_fault_t ret; ++ unsigned int lock_mode = 0; + + trace_xfs_filemap_fault(ip, pe_size, write_fault); + +@@ -1321,25 +1359,24 @@ __xfs_filemap_fault( + file_update_time(vmf->vma->vm_file); + } + ++ if (IS_DAX(inode) || write_fault) ++ lock_mode = xfs_ilock_for_write_fault(XFS_I(inode)); ++ + if (IS_DAX(inode)) { + pfn_t pfn; + +- xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); + ret = xfs_dax_fault(vmf, pe_size, write_fault, &pfn); + if (ret & VM_FAULT_NEEDDSYNC) + ret = dax_finish_sync_fault(vmf, pe_size, pfn); +- xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED); ++ } else if (write_fault) { ++ ret = iomap_page_mkwrite(vmf, &xfs_page_mkwrite_iomap_ops); + } else { +- if (write_fault) { +- xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); +- ret = iomap_page_mkwrite(vmf, +- &xfs_page_mkwrite_iomap_ops); +- xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED); +- } else { +- ret = filemap_fault(vmf); +- } ++ ret = filemap_fault(vmf); + } + ++ if (lock_mode) ++ xfs_iunlock(XFS_I(inode), lock_mode); ++ + if (write_fault) + sb_end_pagefault(inode->i_sb); + return ret; +diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c +index 9090852692274f..dc84c75be85285 100644 +--- a/fs/xfs/xfs_inode.c ++++ b/fs/xfs/xfs_inode.c +@@ -919,6 +919,13 @@ xfs_droplink( + xfs_trans_t *tp, + xfs_inode_t *ip) + { ++ if (VFS_I(ip)->i_nlink == 0) { ++ xfs_alert(ip->i_mount, ++ "%s: Attempt to drop inode (%llu) with nlink zero.", ++ __func__, ip->i_ino); ++ return -EFSCORRUPTED; ++ } ++ + xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); + + drop_nlink(VFS_I(ip)); +@@ -3637,6 +3644,23 @@ xfs_iunlock2_io_mmap( + inode_unlock(VFS_I(ip1)); + } + ++/* Drop the MMAPLOCK and the IOLOCK after a remap completes. */ ++void ++xfs_iunlock2_remapping( ++ struct xfs_inode *ip1, ++ struct xfs_inode *ip2) ++{ ++ xfs_iflags_clear(ip1, XFS_IREMAPPING); ++ ++ if (ip1 != ip2) ++ xfs_iunlock(ip1, XFS_MMAPLOCK_SHARED); ++ xfs_iunlock(ip2, XFS_MMAPLOCK_EXCL); ++ ++ if (ip1 != ip2) ++ inode_unlock_shared(VFS_I(ip1)); ++ inode_unlock(VFS_I(ip2)); ++} ++ + /* + * Reload the incore inode list for this inode. Caller should ensure that + * the link count cannot change, either by taking ILOCK_SHARED or otherwise +diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h +index 85395ad2859c08..c177c92f3aa578 100644 +--- a/fs/xfs/xfs_inode.h ++++ b/fs/xfs/xfs_inode.h +@@ -347,6 +347,14 @@ static inline bool xfs_inode_has_large_extent_counts(struct xfs_inode *ip) + /* Quotacheck is running but inode has not been added to quota counts. */ + #define XFS_IQUOTAUNCHECKED (1 << 14) + ++/* ++ * Remap in progress. Callers that wish to update file data while ++ * holding a shared IOLOCK or MMAPLOCK must drop the lock and retake ++ * the lock in exclusive mode. Relocking the file will block until ++ * IREMAPPING is cleared. ++ */ ++#define XFS_IREMAPPING (1U << 15) ++ + /* All inode state flags related to inode reclaim. */ + #define XFS_ALL_IRECLAIM_FLAGS (XFS_IRECLAIMABLE | \ + XFS_IRECLAIM | \ +@@ -561,6 +569,14 @@ extern void xfs_setup_inode(struct xfs_inode *ip); + extern void xfs_setup_iops(struct xfs_inode *ip); + extern void xfs_diflags_to_iflags(struct xfs_inode *ip, bool init); + ++static inline void xfs_update_stable_writes(struct xfs_inode *ip) ++{ ++ if (bdev_stable_writes(xfs_inode_buftarg(ip)->bt_bdev)) ++ mapping_set_stable_writes(VFS_I(ip)->i_mapping); ++ else ++ mapping_clear_stable_writes(VFS_I(ip)->i_mapping); ++} ++ + /* + * When setting up a newly allocated inode, we need to call + * xfs_finish_inode_setup() once the inode is fully instantiated at +@@ -595,6 +611,7 @@ void xfs_end_io(struct work_struct *work); + + int xfs_ilock2_io_mmap(struct xfs_inode *ip1, struct xfs_inode *ip2); + void xfs_iunlock2_io_mmap(struct xfs_inode *ip1, struct xfs_inode *ip2); ++void xfs_iunlock2_remapping(struct xfs_inode *ip1, struct xfs_inode *ip2); + + static inline bool + xfs_inode_unlinked_incomplete( +diff --git a/fs/xfs/xfs_inode_item_recover.c b/fs/xfs/xfs_inode_item_recover.c +index e6609067ef2610..144198a6b2702c 100644 +--- a/fs/xfs/xfs_inode_item_recover.c ++++ b/fs/xfs/xfs_inode_item_recover.c +@@ -286,6 +286,7 @@ xlog_recover_inode_commit_pass2( + struct xfs_log_dinode *ldip; + uint isize; + int need_free = 0; ++ xfs_failaddr_t fa; + + if (item->ri_buf[0].i_len == sizeof(struct xfs_inode_log_format)) { + in_f = item->ri_buf[0].i_addr; +@@ -530,8 +531,19 @@ xlog_recover_inode_commit_pass2( + (dip->di_mode != 0)) + error = xfs_recover_inode_owner_change(mp, dip, in_f, + buffer_list); +- /* re-generate the checksum. */ ++ /* re-generate the checksum and validate the recovered inode. */ + xfs_dinode_calc_crc(log->l_mp, dip); ++ fa = xfs_dinode_verify(log->l_mp, in_f->ilf_ino, dip); ++ if (fa) { ++ XFS_CORRUPTION_ERROR( ++ "Bad dinode after recovery", ++ XFS_ERRLEVEL_LOW, mp, dip, sizeof(*dip)); ++ xfs_alert(mp, ++ "Metadata corruption detected at %pS, inode 0x%llx", ++ fa, in_f->ilf_ino); ++ error = -EFSCORRUPTED; ++ goto out_release; ++ } + + ASSERT(bp->b_mount == mp); + bp->b_flags |= _XBF_LOGRECOVERY; +diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c +index 85fbb3b71d1c67..c7cb496dc34588 100644 +--- a/fs/xfs/xfs_ioctl.c ++++ b/fs/xfs/xfs_ioctl.c +@@ -1120,23 +1120,25 @@ xfs_ioctl_setattr_xflags( + struct fileattr *fa) + { + struct xfs_mount *mp = ip->i_mount; ++ bool rtflag = (fa->fsx_xflags & FS_XFLAG_REALTIME); + uint64_t i_flags2; + +- /* Can't change realtime flag if any extents are allocated. */ +- if ((ip->i_df.if_nextents || ip->i_delayed_blks) && +- XFS_IS_REALTIME_INODE(ip) != (fa->fsx_xflags & FS_XFLAG_REALTIME)) +- return -EINVAL; ++ if (rtflag != XFS_IS_REALTIME_INODE(ip)) { ++ /* Can't change realtime flag if any extents are allocated. */ ++ if (ip->i_df.if_nextents || ip->i_delayed_blks) ++ return -EINVAL; ++ } + +- /* If realtime flag is set then must have realtime device */ +- if (fa->fsx_xflags & FS_XFLAG_REALTIME) { ++ if (rtflag) { ++ /* If realtime flag is set then must have realtime device */ + if (mp->m_sb.sb_rblocks == 0 || mp->m_sb.sb_rextsize == 0 || + (ip->i_extsize % mp->m_sb.sb_rextsize)) + return -EINVAL; +- } + +- /* Clear reflink if we are actually able to set the rt flag. */ +- if ((fa->fsx_xflags & FS_XFLAG_REALTIME) && xfs_is_reflink_inode(ip)) +- ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK; ++ /* Clear reflink if we are actually able to set the rt flag. */ ++ if (xfs_is_reflink_inode(ip)) ++ ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK; ++ } + + /* Don't allow us to set DAX mode for a reflinked file for now. */ + if ((fa->fsx_xflags & FS_XFLAG_DAX) && xfs_is_reflink_inode(ip)) +@@ -1151,6 +1153,14 @@ xfs_ioctl_setattr_xflags( + ip->i_diflags2 = i_flags2; + + xfs_diflags_to_iflags(ip, false); ++ ++ /* ++ * Make the stable writes flag match that of the device the inode ++ * resides on when flipping the RT flag. ++ */ ++ if (rtflag != XFS_IS_REALTIME_INODE(ip) && S_ISREG(VFS_I(ip)->i_mode)) ++ xfs_update_stable_writes(ip); ++ + xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + XFS_STATS_INC(mp, xs_ig_attrchg); +diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c +index 2e10e1c66ad60a..6fbdc0a19e54cb 100644 +--- a/fs/xfs/xfs_iops.c ++++ b/fs/xfs/xfs_iops.c +@@ -1291,6 +1291,13 @@ xfs_setup_inode( + gfp_mask = mapping_gfp_mask(inode->i_mapping); + mapping_set_gfp_mask(inode->i_mapping, (gfp_mask & ~(__GFP_FS))); + ++ /* ++ * For real-time inodes update the stable write flags to that of the RT ++ * device instead of the data device. ++ */ ++ if (S_ISREG(inode->i_mode) && XFS_IS_REALTIME_INODE(ip)) ++ xfs_update_stable_writes(ip); ++ + /* + * If there is no attribute fork no ACL can exist on this inode, + * and it can't have any file capabilities attached to it either. +diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c +index 59c982297503c8..ce6b303484cf19 100644 +--- a/fs/xfs/xfs_log.c ++++ b/fs/xfs/xfs_log.c +@@ -1891,9 +1891,7 @@ xlog_write_iclog( + * the buffer manually, the code needs to be kept in sync + * with the I/O completion path. + */ +- xlog_state_done_syncing(iclog); +- up(&iclog->ic_sema); +- return; ++ goto sync; + } + + /* +@@ -1923,20 +1921,17 @@ xlog_write_iclog( + * avoid shutdown re-entering this path and erroring out again. + */ + if (log->l_targ != log->l_mp->m_ddev_targp && +- blkdev_issue_flush(log->l_mp->m_ddev_targp->bt_bdev)) { +- xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR); +- return; +- } ++ blkdev_issue_flush(log->l_mp->m_ddev_targp->bt_bdev)) ++ goto shutdown; + } + if (iclog->ic_flags & XLOG_ICL_NEED_FUA) + iclog->ic_bio.bi_opf |= REQ_FUA; + + iclog->ic_flags &= ~(XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA); + +- if (xlog_map_iclog_data(&iclog->ic_bio, iclog->ic_data, count)) { +- xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR); +- return; +- } ++ if (xlog_map_iclog_data(&iclog->ic_bio, iclog->ic_data, count)) ++ goto shutdown; ++ + if (is_vmalloc_addr(iclog->ic_data)) + flush_kernel_vmap_range(iclog->ic_data, count); + +@@ -1957,6 +1952,12 @@ xlog_write_iclog( + } + + submit_bio(&iclog->ic_bio); ++ return; ++shutdown: ++ xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR); ++sync: ++ xlog_state_done_syncing(iclog); ++ up(&iclog->ic_sema); + } + + /* +diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c +index 006a376c34b255..e009bb23d8a289 100644 +--- a/fs/xfs/xfs_log_recover.c ++++ b/fs/xfs/xfs_log_recover.c +@@ -2514,7 +2514,7 @@ xlog_abort_defer_ops( + + list_for_each_entry_safe(dfc, next, capture_list, dfc_list) { + list_del_init(&dfc->dfc_list); +- xfs_defer_ops_capture_free(mp, dfc); ++ xfs_defer_ops_capture_abort(mp, dfc); + } + } + +diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c +index fe46bce8cae638..cbdc23217a42a7 100644 +--- a/fs/xfs/xfs_reflink.c ++++ b/fs/xfs/xfs_reflink.c +@@ -783,6 +783,7 @@ xfs_reflink_end_cow_extent( + } + } + del = got; ++ xfs_trim_extent(&del, *offset_fsb, end_fsb - *offset_fsb); + + /* Grab the corresponding mapping in the data fork. */ + nmaps = 1; +@@ -1539,6 +1540,10 @@ xfs_reflink_remap_prep( + if (ret) + goto out_unlock; + ++ xfs_iflags_set(src, XFS_IREMAPPING); ++ if (inode_in != inode_out) ++ xfs_ilock_demote(src, XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL); ++ + return 0; + out_unlock: + xfs_iunlock2_io_mmap(src, dest); +diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c +index 292d5e54a92cc7..0bfbbc1dd0dab1 100644 +--- a/fs/xfs/xfs_rtalloc.c ++++ b/fs/xfs/xfs_rtalloc.c +@@ -211,6 +211,23 @@ xfs_rtallocate_range( + return error; + } + ++/* ++ * Make sure we don't run off the end of the rt volume. Be careful that ++ * adjusting maxlen downwards doesn't cause us to fail the alignment checks. ++ */ ++static inline xfs_extlen_t ++xfs_rtallocate_clamp_len( ++ struct xfs_mount *mp, ++ xfs_rtblock_t startrtx, ++ xfs_extlen_t rtxlen, ++ xfs_extlen_t prod) ++{ ++ xfs_extlen_t ret; ++ ++ ret = min(mp->m_sb.sb_rextents, startrtx + rtxlen) - startrtx; ++ return rounddown(ret, prod); ++} ++ + /* + * Attempt to allocate an extent minlen<=len<=maxlen starting from + * bitmap block bbno. If we don't get maxlen then use prod to trim +@@ -248,7 +265,7 @@ xfs_rtallocate_extent_block( + i <= end; + i++) { + /* Make sure we don't scan off the end of the rt volume. */ +- maxlen = min(mp->m_sb.sb_rextents, i + maxlen) - i; ++ maxlen = xfs_rtallocate_clamp_len(mp, i, maxlen, prod); + + /* + * See if there's a free extent of maxlen starting at i. +@@ -355,7 +372,8 @@ xfs_rtallocate_extent_exact( + int isfree; /* extent is free */ + xfs_rtblock_t next; /* next block to try (dummy) */ + +- ASSERT(minlen % prod == 0 && maxlen % prod == 0); ++ ASSERT(minlen % prod == 0); ++ ASSERT(maxlen % prod == 0); + /* + * Check if the range in question (for maxlen) is free. + */ +@@ -438,7 +456,9 @@ xfs_rtallocate_extent_near( + xfs_rtblock_t n; /* next block to try */ + xfs_rtblock_t r; /* result block */ + +- ASSERT(minlen % prod == 0 && maxlen % prod == 0); ++ ASSERT(minlen % prod == 0); ++ ASSERT(maxlen % prod == 0); ++ + /* + * If the block number given is off the end, silently set it to + * the last block. +@@ -447,7 +467,7 @@ xfs_rtallocate_extent_near( + bno = mp->m_sb.sb_rextents - 1; + + /* Make sure we don't run off the end of the rt volume. */ +- maxlen = min(mp->m_sb.sb_rextents, bno + maxlen) - bno; ++ maxlen = xfs_rtallocate_clamp_len(mp, bno, maxlen, prod); + if (maxlen < minlen) { + *rtblock = NULLRTBLOCK; + return 0; +@@ -638,7 +658,8 @@ xfs_rtallocate_extent_size( + xfs_rtblock_t r; /* result block number */ + xfs_suminfo_t sum; /* summary information for extents */ + +- ASSERT(minlen % prod == 0 && maxlen % prod == 0); ++ ASSERT(minlen % prod == 0); ++ ASSERT(maxlen % prod == 0); + ASSERT(maxlen != 0); + + /* +@@ -954,7 +975,7 @@ xfs_growfs_rt( + return -EINVAL; + + /* Unsupported realtime features. */ +- if (xfs_has_rmapbt(mp) || xfs_has_reflink(mp)) ++ if (xfs_has_rmapbt(mp) || xfs_has_reflink(mp) || xfs_has_quota(mp)) + return -EOPNOTSUPP; + + nrblocks = in->newblocks; +diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h +index 62c7ad79cbb618..65c284e9d33e93 100644 +--- a/fs/xfs/xfs_rtalloc.h ++++ b/fs/xfs/xfs_rtalloc.h +@@ -58,6 +58,10 @@ xfs_rtfree_extent( + xfs_rtblock_t bno, /* starting block number to free */ + xfs_extlen_t len); /* length of extent freed */ + ++/* Same as above, but in units of rt blocks. */ ++int xfs_rtfree_blocks(struct xfs_trans *tp, xfs_fsblock_t rtbno, ++ xfs_filblks_t rtlen); ++ + /* + * Initialize realtime fields in the mount structure. + */ +@@ -137,16 +141,17 @@ int xfs_rtalloc_extent_is_free(struct xfs_mount *mp, struct xfs_trans *tp, + bool *is_free); + int xfs_rtalloc_reinit_frextents(struct xfs_mount *mp); + #else +-# define xfs_rtallocate_extent(t,b,min,max,l,f,p,rb) (ENOSYS) +-# define xfs_rtfree_extent(t,b,l) (ENOSYS) +-# define xfs_rtpick_extent(m,t,l,rb) (ENOSYS) +-# define xfs_growfs_rt(mp,in) (ENOSYS) +-# define xfs_rtalloc_query_range(t,l,h,f,p) (ENOSYS) +-# define xfs_rtalloc_query_all(m,t,f,p) (ENOSYS) +-# define xfs_rtbuf_get(m,t,b,i,p) (ENOSYS) +-# define xfs_verify_rtbno(m, r) (false) +-# define xfs_rtalloc_extent_is_free(m,t,s,l,i) (ENOSYS) +-# define xfs_rtalloc_reinit_frextents(m) (0) ++# define xfs_rtallocate_extent(t,b,min,max,l,f,p,rb) (-ENOSYS) ++# define xfs_rtfree_extent(t,b,l) (-ENOSYS) ++# define xfs_rtfree_blocks(t,rb,rl) (-ENOSYS) ++# define xfs_rtpick_extent(m,t,l,rb) (-ENOSYS) ++# define xfs_growfs_rt(mp,in) (-ENOSYS) ++# define xfs_rtalloc_query_range(m,t,l,h,f,p) (-ENOSYS) ++# define xfs_rtalloc_query_all(m,t,f,p) (-ENOSYS) ++# define xfs_rtbuf_get(m,t,b,i,p) (-ENOSYS) ++# define xfs_verify_rtbno(m, r) (false) ++# define xfs_rtalloc_extent_is_free(m,t,s,l,i) (-ENOSYS) ++# define xfs_rtalloc_reinit_frextents(m) (0) + static inline int /* error */ + xfs_rtmount_init( + xfs_mount_t *mp) /* file system mount structure */ +@@ -157,7 +162,7 @@ xfs_rtmount_init( + xfs_warn(mp, "Not built with CONFIG_XFS_RT"); + return -ENOSYS; + } +-# define xfs_rtmount_inodes(m) (((mp)->m_sb.sb_rblocks == 0)? 0 : (ENOSYS)) ++# define xfs_rtmount_inodes(m) (((mp)->m_sb.sb_rblocks == 0)? 0 : (-ENOSYS)) + # define xfs_rtunmount_inodes(m) + #endif /* CONFIG_XFS_RT */ + +diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h +index d31d76be498259..91ff537c62462e 100644 +--- a/include/linux/seccomp.h ++++ b/include/linux/seccomp.h +@@ -69,10 +69,10 @@ struct seccomp_data; + + #ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER + static inline int secure_computing(void) { return 0; } +-static inline int __secure_computing(const struct seccomp_data *sd) { return 0; } + #else + static inline void secure_computing_strict(int this_syscall) { return; } + #endif ++static inline int __secure_computing(const struct seccomp_data *sd) { return 0; } + + static inline long prctl_get_seccomp(void) + { +diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c +index 480752fc3eb6e6..64502323be5e55 100644 +--- a/io_uring/io_uring.c ++++ b/io_uring/io_uring.c +@@ -597,8 +597,10 @@ static inline void __io_cq_unlock_post_flush(struct io_ring_ctx *ctx) + io_commit_cqring(ctx); + spin_unlock(&ctx->completion_lock); + io_commit_cqring_flush(ctx); +- if (!(ctx->flags & IORING_SETUP_DEFER_TASKRUN)) ++ if (!(ctx->flags & IORING_SETUP_DEFER_TASKRUN)) { ++ smp_mb(); + __io_cqring_wake(ctx); ++ } + } + + void io_cq_unlock_post(struct io_ring_ctx *ctx) +diff --git a/kernel/softirq.c b/kernel/softirq.c +index a47396161843ab..6665f5cd60cb0f 100644 +--- a/kernel/softirq.c ++++ b/kernel/softirq.c +@@ -294,17 +294,24 @@ static inline void invoke_softirq(void) + wakeup_softirqd(); + } + ++#define SCHED_SOFTIRQ_MASK BIT(SCHED_SOFTIRQ) ++ + /* + * flush_smp_call_function_queue() can raise a soft interrupt in a function +- * call. On RT kernels this is undesired and the only known functionality +- * in the block layer which does this is disabled on RT. If soft interrupts +- * get raised which haven't been raised before the flush, warn so it can be ++ * call. On RT kernels this is undesired and the only known functionalities ++ * are in the block layer which is disabled on RT, and in the scheduler for ++ * idle load balancing. If soft interrupts get raised which haven't been ++ * raised before the flush, warn if it is not a SCHED_SOFTIRQ so it can be + * investigated. + */ + void do_softirq_post_smp_call_flush(unsigned int was_pending) + { +- if (WARN_ON_ONCE(was_pending != local_softirq_pending())) ++ unsigned int is_pending = local_softirq_pending(); ++ ++ if (unlikely(was_pending != is_pending)) { ++ WARN_ON_ONCE(was_pending != (is_pending & ~SCHED_SOFTIRQ_MASK)); + invoke_softirq(); ++ } + } + + #else /* CONFIG_PREEMPT_RT */ +diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c +index 67cabc40f1dca0..73d7372afb4317 100644 +--- a/net/ipv4/ip_tunnel.c ++++ b/net/ipv4/ip_tunnel.c +@@ -218,7 +218,7 @@ static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn, + struct ip_tunnel *t = NULL; + struct hlist_head *head = ip_bucket(itn, parms); + +- hlist_for_each_entry_rcu(t, head, hash_node) { ++ hlist_for_each_entry_rcu(t, head, hash_node, lockdep_rtnl_is_held()) { + if (local == t->parms.iph.saddr && + remote == t->parms.iph.daddr && + link == READ_ONCE(t->parms.link) && +diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c +index 0b45ef8b7ee218..b6a7cbd6bee0db 100644 +--- a/net/ipv6/ip6_fib.c ++++ b/net/ipv6/ip6_fib.c +@@ -1180,8 +1180,8 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt, + while (sibling) { + if (sibling->fib6_metric == rt->fib6_metric && + rt6_qualify_for_ecmp(sibling)) { +- list_add_tail(&rt->fib6_siblings, +- &sibling->fib6_siblings); ++ list_add_tail_rcu(&rt->fib6_siblings, ++ &sibling->fib6_siblings); + break; + } + sibling = rcu_dereference_protected(sibling->fib6_next, +@@ -1242,7 +1242,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt, + fib6_siblings) + sibling->fib6_nsiblings--; + rt->fib6_nsiblings = 0; +- list_del_init(&rt->fib6_siblings); ++ list_del_rcu(&rt->fib6_siblings); + rt6_multipath_rebalance(next_sibling); + return err; + } +@@ -1955,7 +1955,7 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn, + &rt->fib6_siblings, fib6_siblings) + sibling->fib6_nsiblings--; + rt->fib6_nsiblings = 0; +- list_del_init(&rt->fib6_siblings); ++ list_del_rcu(&rt->fib6_siblings); + rt6_multipath_rebalance(next_sibling); + } + +diff --git a/net/ipv6/route.c b/net/ipv6/route.c +index 5ae3ff6ffb7e92..f3268bac9f198d 100644 +--- a/net/ipv6/route.c ++++ b/net/ipv6/route.c +@@ -420,8 +420,8 @@ void fib6_select_path(const struct net *net, struct fib6_result *res, + struct flowi6 *fl6, int oif, bool have_oif_match, + const struct sk_buff *skb, int strict) + { +- struct fib6_info *sibling, *next_sibling; + struct fib6_info *match = res->f6i; ++ struct fib6_info *sibling; + + if (!match->nh && (!match->fib6_nsiblings || have_oif_match)) + goto out; +@@ -447,8 +447,8 @@ void fib6_select_path(const struct net *net, struct fib6_result *res, + if (fl6->mp_hash <= atomic_read(&match->fib6_nh->fib_nh_upper_bound)) + goto out; + +- list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings, +- fib6_siblings) { ++ list_for_each_entry_rcu(sibling, &match->fib6_siblings, ++ fib6_siblings) { + const struct fib6_nh *nh = sibling->fib6_nh; + int nh_upper_bound; + +@@ -5189,14 +5189,18 @@ static void ip6_route_mpath_notify(struct fib6_info *rt, + * nexthop. Since sibling routes are always added at the end of + * the list, find the first sibling of the last route appended + */ ++ rcu_read_lock(); ++ + if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->fib6_nsiblings) { +- rt = list_first_entry(&rt_last->fib6_siblings, +- struct fib6_info, +- fib6_siblings); ++ rt = list_first_or_null_rcu(&rt_last->fib6_siblings, ++ struct fib6_info, ++ fib6_siblings); + } + + if (rt) + inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags); ++ ++ rcu_read_unlock(); + } + + static bool ip6_route_mpath_should_notify(const struct fib6_info *rt) +@@ -5541,17 +5545,21 @@ static size_t rt6_nlmsg_size(struct fib6_info *f6i) + nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_nlmsg_size, + &nexthop_len); + } else { +- struct fib6_info *sibling, *next_sibling; + struct fib6_nh *nh = f6i->fib6_nh; ++ struct fib6_info *sibling; + + nexthop_len = 0; + if (f6i->fib6_nsiblings) { + rt6_nh_nlmsg_size(nh, &nexthop_len); + +- list_for_each_entry_safe(sibling, next_sibling, +- &f6i->fib6_siblings, fib6_siblings) { ++ rcu_read_lock(); ++ ++ list_for_each_entry_rcu(sibling, &f6i->fib6_siblings, ++ fib6_siblings) { + rt6_nh_nlmsg_size(sibling->fib6_nh, &nexthop_len); + } ++ ++ rcu_read_unlock(); + } + nexthop_len += lwtunnel_get_encap_size(nh->fib_nh_lws); + } +@@ -5715,7 +5723,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb, + lwtunnel_fill_encap(skb, dst->lwtstate, RTA_ENCAP, RTA_ENCAP_TYPE) < 0) + goto nla_put_failure; + } else if (rt->fib6_nsiblings) { +- struct fib6_info *sibling, *next_sibling; ++ struct fib6_info *sibling; + struct nlattr *mp; + + mp = nla_nest_start_noflag(skb, RTA_MULTIPATH); +@@ -5727,14 +5735,21 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb, + 0) < 0) + goto nla_put_failure; + +- list_for_each_entry_safe(sibling, next_sibling, +- &rt->fib6_siblings, fib6_siblings) { ++ rcu_read_lock(); ++ ++ list_for_each_entry_rcu(sibling, &rt->fib6_siblings, ++ fib6_siblings) { + if (fib_add_nexthop(skb, &sibling->fib6_nh->nh_common, + sibling->fib6_nh->fib_nh_weight, +- AF_INET6, 0) < 0) ++ AF_INET6, 0) < 0) { ++ rcu_read_unlock(); ++ + goto nla_put_failure; ++ } + } + ++ rcu_read_unlock(); ++ + nla_nest_end(skb, mp); + } else if (rt->nh) { + if (nla_put_u32(skb, RTA_NH_ID, rt->nh->id)) +@@ -6171,7 +6186,7 @@ void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info, + err = -ENOBUFS; + seq = info->nlh ? info->nlh->nlmsg_seq : 0; + +- skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any()); ++ skb = nlmsg_new(rt6_nlmsg_size(rt), GFP_ATOMIC); + if (!skb) + goto errout; + +@@ -6184,7 +6199,7 @@ void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info, + goto errout; + } + rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE, +- info->nlh, gfp_any()); ++ info->nlh, GFP_ATOMIC); + return; + errout: + if (err < 0) +diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c +index b10efeaf0629d2..9fd70462b41d5a 100644 +--- a/net/sched/sch_ets.c ++++ b/net/sched/sch_ets.c +@@ -91,6 +91,8 @@ ets_class_from_arg(struct Qdisc *sch, unsigned long arg) + { + struct ets_sched *q = qdisc_priv(sch); + ++ if (arg == 0 || arg > q->nbands) ++ return NULL; + return &q->classes[arg - 1]; + } + +diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig +index 0904827e2f3db7..627e9fc92f0b8a 100644 +--- a/sound/soc/codecs/Kconfig ++++ b/sound/soc/codecs/Kconfig +@@ -1982,6 +1982,7 @@ config SND_SOC_WM8993 + + config SND_SOC_WM8994 + tristate ++ depends on MFD_WM8994 + + config SND_SOC_WM8995 + tristate +diff --git a/sound/soc/samsung/Kconfig b/sound/soc/samsung/Kconfig +index 2a61e620cd3b0c..a529852144f988 100644 +--- a/sound/soc/samsung/Kconfig ++++ b/sound/soc/samsung/Kconfig +@@ -220,8 +220,9 @@ config SND_SOC_SAMSUNG_TM2_WM5110 + + config SND_SOC_SAMSUNG_ARIES_WM8994 + tristate "SoC I2S Audio support for WM8994 on Aries" +- depends on SND_SOC_SAMSUNG && MFD_WM8994 && IIO && EXTCON ++ depends on SND_SOC_SAMSUNG && I2C && IIO && EXTCON + select SND_SOC_BT_SCO ++ select MFD_WM8994 + select SND_SOC_WM8994 + select SND_SAMSUNG_I2S + help +@@ -233,8 +234,9 @@ config SND_SOC_SAMSUNG_ARIES_WM8994 + + config SND_SOC_SAMSUNG_MIDAS_WM1811 + tristate "SoC I2S Audio support for Midas boards" +- depends on SND_SOC_SAMSUNG ++ depends on SND_SOC_SAMSUNG && I2C + select SND_SAMSUNG_I2S ++ select MFD_WM8994 + select SND_SOC_WM8994 + help + Say Y if you want to add support for SoC audio on the Midas boards. +diff --git a/sound/soc/samsung/midas_wm1811.c b/sound/soc/samsung/midas_wm1811.c +index 6931b9a45b3e5e..126098fdcf1b4e 100644 +--- a/sound/soc/samsung/midas_wm1811.c ++++ b/sound/soc/samsung/midas_wm1811.c +@@ -38,6 +38,17 @@ struct midas_priv { + struct snd_soc_jack headset_jack; + }; + ++static struct snd_soc_jack_pin headset_jack_pins[] = { ++ { ++ .pin = "Headphone", ++ .mask = SND_JACK_HEADPHONE, ++ }, ++ { ++ .pin = "Headset Mic", ++ .mask = SND_JACK_MICROPHONE, ++ }, ++}; ++ + static int midas_start_fll1(struct snd_soc_pcm_runtime *rtd, unsigned int rate) + { + struct snd_soc_card *card = rtd->card; +@@ -261,6 +272,7 @@ static const struct snd_soc_dapm_widget midas_dapm_widgets[] = { + SND_SOC_DAPM_LINE("HDMI", NULL), + SND_SOC_DAPM_LINE("FM In", midas_fm_set), + ++ SND_SOC_DAPM_HP("Headphone", NULL), + SND_SOC_DAPM_MIC("Headset Mic", NULL), + SND_SOC_DAPM_MIC("Main Mic", midas_mic_bias), + SND_SOC_DAPM_MIC("Sub Mic", midas_submic_bias), +@@ -305,11 +317,13 @@ static int midas_late_probe(struct snd_soc_card *card) + return ret; + } + +- ret = snd_soc_card_jack_new(card, "Headset", +- SND_JACK_HEADSET | SND_JACK_MECHANICAL | +- SND_JACK_BTN_0 | SND_JACK_BTN_1 | SND_JACK_BTN_2 | +- SND_JACK_BTN_3 | SND_JACK_BTN_4 | SND_JACK_BTN_5, +- &priv->headset_jack); ++ ret = snd_soc_card_jack_new_pins(card, "Headset", ++ SND_JACK_HEADSET | SND_JACK_MECHANICAL | ++ SND_JACK_BTN_0 | SND_JACK_BTN_1 | SND_JACK_BTN_2 | ++ SND_JACK_BTN_3 | SND_JACK_BTN_4 | SND_JACK_BTN_5, ++ &priv->headset_jack, ++ headset_jack_pins, ++ ARRAY_SIZE(headset_jack_pins)); + if (ret) + return ret; + +diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c +index 6525b02af1b0ff..219fa6ff14bd81 100644 +--- a/sound/usb/quirks.c ++++ b/sound/usb/quirks.c +@@ -2137,6 +2137,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { + QUIRK_FLAG_CTL_MSG_DELAY_1M), + DEVICE_FLG(0x0c45, 0x6340, /* Sonix HD USB Camera */ + QUIRK_FLAG_GET_SAMPLE_RATE), ++ DEVICE_FLG(0x0d8c, 0x0014, /* USB Audio Device */ ++ QUIRK_FLAG_CTL_MSG_DELAY_1M), + DEVICE_FLG(0x0ecb, 0x205c, /* JBL Quantum610 Wireless */ + QUIRK_FLAG_FIXED_RATE), + DEVICE_FLG(0x0ecb, 0x2069, /* JBL Quantum810 Wireless */ +diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile +index 48d1a68be1d525..080860a8826b1c 100644 +--- a/tools/testing/selftests/net/Makefile ++++ b/tools/testing/selftests/net/Makefile +@@ -72,6 +72,7 @@ TEST_GEN_PROGS += sk_bind_sendto_listen + TEST_GEN_PROGS += sk_connect_zero_addr + TEST_PROGS += test_ingress_egress_chaining.sh + TEST_GEN_FILES += nat6to4.o ++TEST_PROGS += ipv6_route_update_soft_lockup.sh + + TEST_FILES := settings + +diff --git a/tools/testing/selftests/net/ipv6_route_update_soft_lockup.sh b/tools/testing/selftests/net/ipv6_route_update_soft_lockup.sh +new file mode 100644 +index 00000000000000..a6b2b1f9c641c9 +--- /dev/null ++++ b/tools/testing/selftests/net/ipv6_route_update_soft_lockup.sh +@@ -0,0 +1,262 @@ ++#!/bin/bash ++# SPDX-License-Identifier: GPL-2.0 ++# ++# Testing for potential kernel soft lockup during IPv6 routing table ++# refresh under heavy outgoing IPv6 traffic. If a kernel soft lockup ++# occurs, a kernel panic will be triggered to prevent associated issues. ++# ++# ++# Test Environment Layout ++# ++# ┌----------------┐ ┌----------------┐ ++# | SOURCE_NS | | SINK_NS | ++# | NAMESPACE | | NAMESPACE | ++# |(iperf3 clients)| |(iperf3 servers)| ++# | | | | ++# | | | | ++# | ┌-----------| nexthops |---------┐ | ++# | |veth_source|<--------------------------------------->|veth_sink|<┐ | ++# | └-----------|2001:0DB8:1::0:1/96 2001:0DB8:1::1:1/96 |---------┘ | | ++# | | ^ 2001:0DB8:1::1:2/96 | | | ++# | | . . | fwd | | ++# | ┌---------┐ | . . | | | ++# | | IPv6 | | . . | V | ++# | | routing | | . 2001:0DB8:1::1:80/96| ┌-----┐ | ++# | | table | | . | | lo | | ++# | | nexthop | | . └--------┴-----┴-┘ ++# | | update | | ............................> 2001:0DB8:2::1:1/128 ++# | └-------- ┘ | ++# └----------------┘ ++# ++# The test script sets up two network namespaces, source_ns and sink_ns, ++# connected via a veth link. Within source_ns, it continuously updates the ++# IPv6 routing table by flushing and inserting IPV6_NEXTHOP_ADDR_COUNT nexthop ++# IPs destined for SINK_LOOPBACK_IP_ADDR in sink_ns. This refresh occurs at a ++# rate of 1/ROUTING_TABLE_REFRESH_PERIOD per second for TEST_DURATION seconds. ++# ++# Simultaneously, multiple iperf3 clients within source_ns generate heavy ++# outgoing IPv6 traffic. Each client is assigned a unique port number starting ++# at 5000 and incrementing sequentially. Each client targets a unique iperf3 ++# server running in sink_ns, connected to the SINK_LOOPBACK_IFACE interface ++# using the same port number. ++# ++# The number of iperf3 servers and clients is set to half of the total ++# available cores on each machine. ++# ++# NOTE: We have tested this script on machines with various CPU specifications, ++# ranging from lower to higher performance as listed below. The test script ++# effectively triggered a kernel soft lockup on machines running an unpatched ++# kernel in under a minute: ++# ++# - 1x Intel Xeon E-2278G 8-Core Processor @ 3.40GHz ++# - 1x Intel Xeon E-2378G Processor 8-Core @ 2.80GHz ++# - 1x AMD EPYC 7401P 24-Core Processor @ 2.00GHz ++# - 1x AMD EPYC 7402P 24-Core Processor @ 2.80GHz ++# - 2x Intel Xeon Gold 5120 14-Core Processor @ 2.20GHz ++# - 1x Ampere Altra Q80-30 80-Core Processor @ 3.00GHz ++# - 2x Intel Xeon Gold 5120 14-Core Processor @ 2.20GHz ++# - 2x Intel Xeon Silver 4214 24-Core Processor @ 2.20GHz ++# - 1x AMD EPYC 7502P 32-Core @ 2.50GHz ++# - 1x Intel Xeon Gold 6314U 32-Core Processor @ 2.30GHz ++# - 2x Intel Xeon Gold 6338 32-Core Processor @ 2.00GHz ++# ++# On less performant machines, you may need to increase the TEST_DURATION ++# parameter to enhance the likelihood of encountering a race condition leading ++# to a kernel soft lockup and avoid a false negative result. ++# ++# NOTE: The test may not produce the expected result in virtualized ++# environments (e.g., qemu) due to differences in timing and CPU handling, ++# which can affect the conditions needed to trigger a soft lockup. ++ ++source lib.sh ++source net_helper.sh ++ ++TEST_DURATION=300 ++ROUTING_TABLE_REFRESH_PERIOD=0.01 ++ ++IPERF3_BITRATE="300m" ++ ++ ++IPV6_NEXTHOP_ADDR_COUNT="128" ++IPV6_NEXTHOP_ADDR_MASK="96" ++IPV6_NEXTHOP_PREFIX="2001:0DB8:1" ++ ++ ++SOURCE_TEST_IFACE="veth_source" ++SOURCE_TEST_IP_ADDR="2001:0DB8:1::0:1/96" ++ ++SINK_TEST_IFACE="veth_sink" ++# ${SINK_TEST_IFACE} is populated with the following range of IPv6 addresses: ++# 2001:0DB8:1::1:1 to 2001:0DB8:1::1:${IPV6_NEXTHOP_ADDR_COUNT} ++SINK_LOOPBACK_IFACE="lo" ++SINK_LOOPBACK_IP_MASK="128" ++SINK_LOOPBACK_IP_ADDR="2001:0DB8:2::1:1" ++ ++nexthop_ip_list="" ++termination_signal="" ++kernel_softlokup_panic_prev_val="" ++ ++terminate_ns_processes_by_pattern() { ++ local ns=$1 ++ local pattern=$2 ++ ++ for pid in $(ip netns pids ${ns}); do ++ [ -e /proc/$pid/cmdline ] && grep -qe "${pattern}" /proc/$pid/cmdline && kill -9 $pid ++ done ++} ++ ++cleanup() { ++ echo "info: cleaning up namespaces and terminating all processes within them..." ++ ++ ++ # Terminate iperf3 instances running in the source_ns. To avoid race ++ # conditions, first iterate over the PIDs and terminate those ++ # associated with the bash shells running the ++ # `while true; do iperf3 -c ...; done` loops. In a second iteration, ++ # terminate the individual `iperf3 -c ...` instances. ++ terminate_ns_processes_by_pattern ${source_ns} while ++ terminate_ns_processes_by_pattern ${source_ns} iperf3 ++ ++ # Repeat the same process for sink_ns ++ terminate_ns_processes_by_pattern ${sink_ns} while ++ terminate_ns_processes_by_pattern ${sink_ns} iperf3 ++ ++ # Check if any iperf3 instances are still running. This could happen ++ # if a core has entered an infinite loop and the timeout for detecting ++ # the soft lockup has not expired, but either the test interval has ++ # already elapsed or the test was terminated manually (e.g., with ^C) ++ for pid in $(ip netns pids ${source_ns}); do ++ if [ -e /proc/$pid/cmdline ] && grep -qe 'iperf3' /proc/$pid/cmdline; then ++ echo "FAIL: unable to terminate some iperf3 instances. Soft lockup is underway. A kernel panic is on the way!" ++ exit ${ksft_fail} ++ fi ++ done ++ ++ if [ "$termination_signal" == "SIGINT" ]; then ++ echo "SKIP: Termination due to ^C (SIGINT)" ++ elif [ "$termination_signal" == "SIGALRM" ]; then ++ echo "PASS: No kernel soft lockup occurred during this ${TEST_DURATION} second test" ++ fi ++ ++ cleanup_ns ${source_ns} ${sink_ns} ++ ++ sysctl -qw kernel.softlockup_panic=${kernel_softlokup_panic_prev_val} ++} ++ ++setup_prepare() { ++ setup_ns source_ns sink_ns ++ ++ ip -n ${source_ns} link add name ${SOURCE_TEST_IFACE} type veth peer name ${SINK_TEST_IFACE} netns ${sink_ns} ++ ++ # Setting up the Source namespace ++ ip -n ${source_ns} addr add ${SOURCE_TEST_IP_ADDR} dev ${SOURCE_TEST_IFACE} ++ ip -n ${source_ns} link set dev ${SOURCE_TEST_IFACE} qlen 10000 ++ ip -n ${source_ns} link set dev ${SOURCE_TEST_IFACE} up ++ ip netns exec ${source_ns} sysctl -qw net.ipv6.fib_multipath_hash_policy=1 ++ ++ # Setting up the Sink namespace ++ ip -n ${sink_ns} addr add ${SINK_LOOPBACK_IP_ADDR}/${SINK_LOOPBACK_IP_MASK} dev ${SINK_LOOPBACK_IFACE} ++ ip -n ${sink_ns} link set dev ${SINK_LOOPBACK_IFACE} up ++ ip netns exec ${sink_ns} sysctl -qw net.ipv6.conf.${SINK_LOOPBACK_IFACE}.forwarding=1 ++ ++ ip -n ${sink_ns} link set ${SINK_TEST_IFACE} up ++ ip netns exec ${sink_ns} sysctl -qw net.ipv6.conf.${SINK_TEST_IFACE}.forwarding=1 ++ ++ ++ # Populate nexthop IPv6 addresses on the test interface in the sink_ns ++ echo "info: populating ${IPV6_NEXTHOP_ADDR_COUNT} IPv6 addresses on the ${SINK_TEST_IFACE} interface ..." ++ for IP in $(seq 1 ${IPV6_NEXTHOP_ADDR_COUNT}); do ++ ip -n ${sink_ns} addr add ${IPV6_NEXTHOP_PREFIX}::$(printf "1:%x" "${IP}")/${IPV6_NEXTHOP_ADDR_MASK} dev ${SINK_TEST_IFACE}; ++ done ++ ++ # Preparing list of nexthops ++ for IP in $(seq 1 ${IPV6_NEXTHOP_ADDR_COUNT}); do ++ nexthop_ip_list=$nexthop_ip_list" nexthop via ${IPV6_NEXTHOP_PREFIX}::$(printf "1:%x" $IP) dev ${SOURCE_TEST_IFACE} weight 1" ++ done ++} ++ ++ ++test_soft_lockup_during_routing_table_refresh() { ++ # Start num_of_iperf_servers iperf3 servers in the sink_ns namespace, ++ # each listening on ports starting at 5001 and incrementing ++ # sequentially. Since iperf3 instances may terminate unexpectedly, a ++ # while loop is used to automatically restart them in such cases. ++ echo "info: starting ${num_of_iperf_servers} iperf3 servers in the sink_ns namespace ..." ++ for i in $(seq 1 ${num_of_iperf_servers}); do ++ cmd="iperf3 --bind ${SINK_LOOPBACK_IP_ADDR} -s -p $(printf '5%03d' ${i}) --rcv-timeout 200 &>/dev/null" ++ ip netns exec ${sink_ns} bash -c "while true; do ${cmd}; done &" &>/dev/null ++ done ++ ++ # Wait for the iperf3 servers to be ready ++ for i in $(seq ${num_of_iperf_servers}); do ++ port=$(printf '5%03d' ${i}); ++ wait_local_port_listen ${sink_ns} ${port} tcp ++ done ++ ++ # Continuously refresh the routing table in the background within ++ # the source_ns namespace ++ ip netns exec ${source_ns} bash -c " ++ while \$(ip netns list | grep -q ${source_ns}); do ++ ip -6 route add ${SINK_LOOPBACK_IP_ADDR}/${SINK_LOOPBACK_IP_MASK} ${nexthop_ip_list}; ++ sleep ${ROUTING_TABLE_REFRESH_PERIOD}; ++ ip -6 route delete ${SINK_LOOPBACK_IP_ADDR}/${SINK_LOOPBACK_IP_MASK}; ++ done &" ++ ++ # Start num_of_iperf_servers iperf3 clients in the source_ns namespace, ++ # each sending TCP traffic on sequential ports starting at 5001. ++ # Since iperf3 instances may terminate unexpectedly (e.g., if the route ++ # to the server is deleted in the background during a route refresh), a ++ # while loop is used to automatically restart them in such cases. ++ echo "info: starting ${num_of_iperf_servers} iperf3 clients in the source_ns namespace ..." ++ for i in $(seq 1 ${num_of_iperf_servers}); do ++ cmd="iperf3 -c ${SINK_LOOPBACK_IP_ADDR} -p $(printf '5%03d' ${i}) --length 64 --bitrate ${IPERF3_BITRATE} -t 0 --connect-timeout 150 &>/dev/null" ++ ip netns exec ${source_ns} bash -c "while true; do ${cmd}; done &" &>/dev/null ++ done ++ ++ echo "info: IPv6 routing table is being updated at the rate of $(echo "1/${ROUTING_TABLE_REFRESH_PERIOD}" | bc)/s for ${TEST_DURATION} seconds ..." ++ echo "info: A kernel soft lockup, if detected, results in a kernel panic!" ++ ++ wait ++} ++ ++# Make sure 'iperf3' is installed, skip the test otherwise ++if [ ! -x "$(command -v "iperf3")" ]; then ++ echo "SKIP: 'iperf3' is not installed. Skipping the test." ++ exit ${ksft_skip} ++fi ++ ++# Determine the number of cores on the machine ++num_of_iperf_servers=$(( $(nproc)/2 )) ++ ++# Check if we are running on a multi-core machine, skip the test otherwise ++if [ "${num_of_iperf_servers}" -eq 0 ]; then ++ echo "SKIP: This test is not valid on a single core machine!" ++ exit ${ksft_skip} ++fi ++ ++# Since the kernel soft lockup we're testing causes at least one core to enter ++# an infinite loop, destabilizing the host and likely affecting subsequent ++# tests, we trigger a kernel panic instead of reporting a failure and ++# continuing ++kernel_softlokup_panic_prev_val=$(sysctl -n kernel.softlockup_panic) ++sysctl -qw kernel.softlockup_panic=1 ++ ++handle_sigint() { ++ termination_signal="SIGINT" ++ cleanup ++ exit ${ksft_skip} ++} ++ ++handle_sigalrm() { ++ termination_signal="SIGALRM" ++ cleanup ++ exit ${ksft_pass} ++} ++ ++trap handle_sigint SIGINT ++trap handle_sigalrm SIGALRM ++ ++(sleep ${TEST_DURATION} && kill -s SIGALRM $$)& ++ ++setup_prepare ++test_soft_lockup_during_routing_table_refresh