commit: 077ba01d4280501028651068c5276ca67f0a827f Author: Mike Pagano <mpagano <AT> gentoo <DOT> org> AuthorDate: Sun Jul 28 16:25:12 2019 +0000 Commit: Mike Pagano <mpagano <AT> gentoo <DOT> org> CommitDate: Sun Jul 28 16:25:12 2019 +0000 URL: https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=077ba01d
Linux patch 5.1.21 Signed-off-by: Mike Pagano <mpagano <AT> gentoo.org> 0000_README | 4 + 1020_linux-5.1.21.patch | 2779 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 2783 insertions(+) diff --git a/0000_README b/0000_README index 46f6f44..c48024b 100644 --- a/0000_README +++ b/0000_README @@ -123,6 +123,10 @@ Patch: 1019_linux-5.1.20.patch From: https://www.kernel.org Desc: Linux 5.1.20 +Patch: 1020_linux-5.1.21.patch +From: https://www.kernel.org +Desc: Linux 5.1.21 + Patch: 1500_XATTR_USER_PREFIX.patch From: https://bugs.gentoo.org/show_bug.cgi?id=470644 Desc: Support for namespace user.pax.* on tmpfs. diff --git a/1020_linux-5.1.21.patch b/1020_linux-5.1.21.patch new file mode 100644 index 0000000..bb1da07 --- /dev/null +++ b/1020_linux-5.1.21.patch @@ -0,0 +1,2779 @@ +diff --git a/Makefile b/Makefile +index ef6daeb823b9..254b5d831328 100644 +--- a/Makefile ++++ b/Makefile +@@ -1,7 +1,7 @@ + # SPDX-License-Identifier: GPL-2.0 + VERSION = 5 + PATCHLEVEL = 1 +-SUBLEVEL = 20 ++SUBLEVEL = 21 + EXTRAVERSION = + NAME = Shy Crocodile + +diff --git a/arch/mips/jz4740/board-qi_lb60.c b/arch/mips/jz4740/board-qi_lb60.c +index 6718efb400f4..e6e86fdfd4a7 100644 +--- a/arch/mips/jz4740/board-qi_lb60.c ++++ b/arch/mips/jz4740/board-qi_lb60.c +@@ -469,27 +469,27 @@ static unsigned long pin_cfg_bias_disable[] = { + static struct pinctrl_map pin_map[] __initdata = { + /* NAND pin configuration */ + PIN_MAP_MUX_GROUP_DEFAULT("jz4740-nand", +- "10010000.jz4740-pinctrl", "nand", "nand-cs1"), ++ "10010000.pin-controller", "nand-cs1", "nand"), + + /* fbdev pin configuration */ + PIN_MAP_MUX_GROUP("jz4740-fb", PINCTRL_STATE_DEFAULT, +- "10010000.jz4740-pinctrl", "lcd", "lcd-8bit"), ++ "10010000.pin-controller", "lcd-8bit", "lcd"), + PIN_MAP_MUX_GROUP("jz4740-fb", PINCTRL_STATE_SLEEP, +- "10010000.jz4740-pinctrl", "lcd", "lcd-no-pins"), ++ "10010000.pin-controller", "lcd-no-pins", "lcd"), + + /* MMC pin configuration */ + PIN_MAP_MUX_GROUP_DEFAULT("jz4740-mmc.0", +- "10010000.jz4740-pinctrl", "mmc", "mmc-1bit"), ++ "10010000.pin-controller", "mmc-1bit", "mmc"), + PIN_MAP_MUX_GROUP_DEFAULT("jz4740-mmc.0", +- "10010000.jz4740-pinctrl", "mmc", "mmc-4bit"), ++ "10010000.pin-controller", "mmc-4bit", "mmc"), + PIN_MAP_CONFIGS_PIN_DEFAULT("jz4740-mmc.0", +- "10010000.jz4740-pinctrl", "PD0", pin_cfg_bias_disable), ++ "10010000.pin-controller", "PD0", pin_cfg_bias_disable), + PIN_MAP_CONFIGS_PIN_DEFAULT("jz4740-mmc.0", +- "10010000.jz4740-pinctrl", "PD2", pin_cfg_bias_disable), ++ "10010000.pin-controller", "PD2", pin_cfg_bias_disable), + + /* PWM pin configuration */ + PIN_MAP_MUX_GROUP_DEFAULT("jz4740-pwm", +- "10010000.jz4740-pinctrl", "pwm4", "pwm4"), ++ "10010000.pin-controller", "pwm4", "pwm4"), + }; + + +diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h +index c79abe7ca093..564729a4a25c 100644 +--- a/arch/x86/include/asm/kvm_host.h ++++ b/arch/x86/include/asm/kvm_host.h +@@ -609,15 +609,16 @@ struct kvm_vcpu_arch { + + /* + * QEMU userspace and the guest each have their own FPU state. +- * In vcpu_run, we switch between the user, maintained in the +- * task_struct struct, and guest FPU contexts. While running a VCPU, +- * the VCPU thread will have the guest FPU context. ++ * In vcpu_run, we switch between the user and guest FPU contexts. ++ * While running a VCPU, the VCPU thread will have the guest FPU ++ * context. + * + * Note that while the PKRU state lives inside the fpu registers, + * it is switched out separately at VMENTER and VMEXIT time. The + * "guest_fpu" state here contains the guest FPU context, with the + * host PRKU bits. + */ ++ struct fpu user_fpu; + struct fpu *guest_fpu; + + u64 xcr0; +diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c +index f83e79a4d0b2..5ce6bd1eb43d 100644 +--- a/arch/x86/kvm/vmx/nested.c ++++ b/arch/x86/kvm/vmx/nested.c +@@ -184,6 +184,7 @@ static void vmx_disable_shadow_vmcs(struct vcpu_vmx *vmx) + { + vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL, SECONDARY_EXEC_SHADOW_VMCS); + vmcs_write64(VMCS_LINK_POINTER, -1ull); ++ vmx->nested.need_vmcs12_sync = false; + } + + static inline void nested_release_evmcs(struct kvm_vcpu *vcpu) +@@ -211,6 +212,8 @@ static void free_nested(struct kvm_vcpu *vcpu) + if (!vmx->nested.vmxon && !vmx->nested.smm.vmxon) + return; + ++ kvm_clear_request(KVM_REQ_GET_VMCS12_PAGES, vcpu); ++ + vmx->nested.vmxon = false; + vmx->nested.smm.vmxon = false; + free_vpid(vmx->nested.vpid02); +@@ -1328,6 +1331,9 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx) + u64 field_value; + struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs; + ++ if (WARN_ON(!shadow_vmcs)) ++ return; ++ + preempt_disable(); + + vmcs_load(shadow_vmcs); +@@ -1366,6 +1372,9 @@ static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx) + u64 field_value = 0; + struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs; + ++ if (WARN_ON(!shadow_vmcs)) ++ return; ++ + vmcs_load(shadow_vmcs); + + for (q = 0; q < ARRAY_SIZE(fields); q++) { +@@ -4336,7 +4345,6 @@ static inline void nested_release_vmcs12(struct kvm_vcpu *vcpu) + /* copy to memory all shadowed fields in case + they were modified */ + copy_shadow_to_vmcs12(vmx); +- vmx->nested.need_vmcs12_sync = false; + vmx_disable_shadow_vmcs(vmx); + } + vmx->nested.posted_intr_nv = -1; +diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c +index 37028ea85d4c..aede8fa2ea9a 100644 +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -8172,7 +8172,7 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu) + static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) + { + preempt_disable(); +- copy_fpregs_to_fpstate(¤t->thread.fpu); ++ copy_fpregs_to_fpstate(&vcpu->arch.user_fpu); + /* PKRU is separately restored in kvm_x86_ops->run. */ + __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu->state, + ~XFEATURE_MASK_PKRU); +@@ -8185,7 +8185,7 @@ static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) + { + preempt_disable(); + copy_fpregs_to_fpstate(vcpu->arch.guest_fpu); +- copy_kernel_to_fpregs(¤t->thread.fpu.state); ++ copy_kernel_to_fpregs(&vcpu->arch.user_fpu.state); + preempt_enable(); + ++vcpu->stat.fpu_reload; + trace_kvm_fpu(0); +diff --git a/block/blk-zoned.c b/block/blk-zoned.c +index 6ea455b62cb4..6eff3c4712f4 100644 +--- a/block/blk-zoned.c ++++ b/block/blk-zoned.c +@@ -13,6 +13,9 @@ + #include <linux/rbtree.h> + #include <linux/blkdev.h> + #include <linux/blk-mq.h> ++#include <linux/mm.h> ++#include <linux/vmalloc.h> ++#include <linux/sched/mm.h> + + #include "blk.h" + +@@ -372,22 +375,25 @@ static inline unsigned long *blk_alloc_zone_bitmap(int node, + * Allocate an array of struct blk_zone to get nr_zones zone information. + * The allocated array may be smaller than nr_zones. + */ +-static struct blk_zone *blk_alloc_zones(int node, unsigned int *nr_zones) ++static struct blk_zone *blk_alloc_zones(unsigned int *nr_zones) + { +- size_t size = *nr_zones * sizeof(struct blk_zone); +- struct page *page; +- int order; +- +- for (order = get_order(size); order >= 0; order--) { +- page = alloc_pages_node(node, GFP_NOIO | __GFP_ZERO, order); +- if (page) { +- *nr_zones = min_t(unsigned int, *nr_zones, +- (PAGE_SIZE << order) / sizeof(struct blk_zone)); +- return page_address(page); +- } ++ struct blk_zone *zones; ++ size_t nrz = min(*nr_zones, BLK_ZONED_REPORT_MAX_ZONES); ++ ++ /* ++ * GFP_KERNEL here is meaningless as the caller task context has ++ * the PF_MEMALLOC_NOIO flag set in blk_revalidate_disk_zones() ++ * with memalloc_noio_save(). ++ */ ++ zones = kvcalloc(nrz, sizeof(struct blk_zone), GFP_KERNEL); ++ if (!zones) { ++ *nr_zones = 0; ++ return NULL; + } + +- return NULL; ++ *nr_zones = nrz; ++ ++ return zones; + } + + void blk_queue_free_zone_bitmaps(struct request_queue *q) +@@ -414,6 +420,7 @@ int blk_revalidate_disk_zones(struct gendisk *disk) + unsigned long *seq_zones_wlock = NULL, *seq_zones_bitmap = NULL; + unsigned int i, rep_nr_zones = 0, z = 0, nrz; + struct blk_zone *zones = NULL; ++ unsigned int noio_flag; + sector_t sector = 0; + int ret = 0; + +@@ -426,6 +433,12 @@ int blk_revalidate_disk_zones(struct gendisk *disk) + return 0; + } + ++ /* ++ * Ensure that all memory allocations in this context are done as ++ * if GFP_NOIO was specified. ++ */ ++ noio_flag = memalloc_noio_save(); ++ + if (!blk_queue_is_zoned(q) || !nr_zones) { + nr_zones = 0; + goto update; +@@ -442,7 +455,7 @@ int blk_revalidate_disk_zones(struct gendisk *disk) + + /* Get zone information and initialize seq_zones_bitmap */ + rep_nr_zones = nr_zones; +- zones = blk_alloc_zones(q->node, &rep_nr_zones); ++ zones = blk_alloc_zones(&rep_nr_zones); + if (!zones) + goto out; + +@@ -479,8 +492,9 @@ update: + blk_mq_unfreeze_queue(q); + + out: +- free_pages((unsigned long)zones, +- get_order(rep_nr_zones * sizeof(struct blk_zone))); ++ memalloc_noio_restore(noio_flag); ++ ++ kvfree(zones); + kfree(seq_zones_wlock); + kfree(seq_zones_bitmap); + +diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c +index 7c858020d14b..efd5d09d56ad 100644 +--- a/drivers/dma-buf/dma-buf.c ++++ b/drivers/dma-buf/dma-buf.c +@@ -1068,6 +1068,7 @@ static int dma_buf_debug_show(struct seq_file *s, void *unused) + fence->ops->get_driver_name(fence), + fence->ops->get_timeline_name(fence), + dma_fence_is_signaled(fence) ? "" : "un"); ++ dma_fence_put(fence); + } + rcu_read_unlock(); + +diff --git a/drivers/dma-buf/reservation.c b/drivers/dma-buf/reservation.c +index c1618335ca99..03004a218ec1 100644 +--- a/drivers/dma-buf/reservation.c ++++ b/drivers/dma-buf/reservation.c +@@ -357,6 +357,10 @@ int reservation_object_get_fences_rcu(struct reservation_object *obj, + GFP_NOWAIT | __GFP_NOWARN); + if (!nshared) { + rcu_read_unlock(); ++ ++ dma_fence_put(fence_excl); ++ fence_excl = NULL; ++ + nshared = krealloc(shared, sz, GFP_KERNEL); + if (nshared) { + shared = nshared; +diff --git a/drivers/gpio/gpio-davinci.c b/drivers/gpio/gpio-davinci.c +index 188b8e5c8e67..be34c672cf25 100644 +--- a/drivers/gpio/gpio-davinci.c ++++ b/drivers/gpio/gpio-davinci.c +@@ -242,8 +242,9 @@ static int davinci_gpio_probe(struct platform_device *pdev) + for (i = 0; i < nirq; i++) { + chips->irqs[i] = platform_get_irq(pdev, i); + if (chips->irqs[i] < 0) { +- dev_info(dev, "IRQ not populated, err = %d\n", +- chips->irqs[i]); ++ if (chips->irqs[i] != -EPROBE_DEFER) ++ dev_info(dev, "IRQ not populated, err = %d\n", ++ chips->irqs[i]); + return chips->irqs[i]; + } + } +diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c +index 6a3ec575a404..6779853f166b 100644 +--- a/drivers/gpio/gpiolib-of.c ++++ b/drivers/gpio/gpiolib-of.c +@@ -155,6 +155,7 @@ static void of_gpio_flags_quirks(struct device_node *np, + of_node_full_name(child)); + *flags |= OF_GPIO_ACTIVE_LOW; + } ++ of_node_put(child); + break; + } + } +diff --git a/drivers/net/caif/caif_hsi.c b/drivers/net/caif/caif_hsi.c +index 433a14b9f731..253a1bbe37e8 100644 +--- a/drivers/net/caif/caif_hsi.c ++++ b/drivers/net/caif/caif_hsi.c +@@ -1455,7 +1455,7 @@ static void __exit cfhsi_exit_module(void) + rtnl_lock(); + list_for_each_safe(list_node, n, &cfhsi_list) { + cfhsi = list_entry(list_node, struct cfhsi, list); +- unregister_netdev(cfhsi->ndev); ++ unregister_netdevice(cfhsi->ndev); + } + rtnl_unlock(); + } +diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c +index ae750ab9a4d7..5f81d9a3a2a6 100644 +--- a/drivers/net/dsa/mv88e6xxx/chip.c ++++ b/drivers/net/dsa/mv88e6xxx/chip.c +@@ -4910,6 +4910,8 @@ static int mv88e6xxx_probe(struct mdio_device *mdiodev) + err = PTR_ERR(chip->reset); + goto out; + } ++ if (chip->reset) ++ usleep_range(1000, 2000); + + err = mv88e6xxx_detect(chip); + if (err) +diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +index 78a01880931c..9f07b85091f3 100644 +--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c ++++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +@@ -285,6 +285,9 @@ int bnx2x_tx_int(struct bnx2x *bp, struct bnx2x_fp_txdata *txdata) + hw_cons = le16_to_cpu(*txdata->tx_cons_sb); + sw_cons = txdata->tx_pkt_cons; + ++ /* Ensure subsequent loads occur after hw_cons */ ++ smp_rmb(); ++ + while (sw_cons != hw_cons) { + u16 pkt_cons; + +diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c +index 983245c0867c..2b79ef17e846 100644 +--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c ++++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c +@@ -3086,39 +3086,42 @@ static void bcmgenet_timeout(struct net_device *dev) + netif_tx_wake_all_queues(dev); + } + +-#define MAX_MC_COUNT 16 ++#define MAX_MDF_FILTER 17 + + static inline void bcmgenet_set_mdf_addr(struct bcmgenet_priv *priv, + unsigned char *addr, +- int *i, +- int *mc) ++ int *i) + { +- u32 reg; +- + bcmgenet_umac_writel(priv, addr[0] << 8 | addr[1], + UMAC_MDF_ADDR + (*i * 4)); + bcmgenet_umac_writel(priv, addr[2] << 24 | addr[3] << 16 | + addr[4] << 8 | addr[5], + UMAC_MDF_ADDR + ((*i + 1) * 4)); +- reg = bcmgenet_umac_readl(priv, UMAC_MDF_CTRL); +- reg |= (1 << (MAX_MC_COUNT - *mc)); +- bcmgenet_umac_writel(priv, reg, UMAC_MDF_CTRL); + *i += 2; +- (*mc)++; + } + + static void bcmgenet_set_rx_mode(struct net_device *dev) + { + struct bcmgenet_priv *priv = netdev_priv(dev); + struct netdev_hw_addr *ha; +- int i, mc; ++ int i, nfilter; + u32 reg; + + netif_dbg(priv, hw, dev, "%s: %08X\n", __func__, dev->flags); + +- /* Promiscuous mode */ ++ /* Number of filters needed */ ++ nfilter = netdev_uc_count(dev) + netdev_mc_count(dev) + 2; ++ ++ /* ++ * Turn on promicuous mode for three scenarios ++ * 1. IFF_PROMISC flag is set ++ * 2. IFF_ALLMULTI flag is set ++ * 3. The number of filters needed exceeds the number filters ++ * supported by the hardware. ++ */ + reg = bcmgenet_umac_readl(priv, UMAC_CMD); +- if (dev->flags & IFF_PROMISC) { ++ if ((dev->flags & (IFF_PROMISC | IFF_ALLMULTI)) || ++ (nfilter > MAX_MDF_FILTER)) { + reg |= CMD_PROMISC; + bcmgenet_umac_writel(priv, reg, UMAC_CMD); + bcmgenet_umac_writel(priv, 0, UMAC_MDF_CTRL); +@@ -3128,32 +3131,24 @@ static void bcmgenet_set_rx_mode(struct net_device *dev) + bcmgenet_umac_writel(priv, reg, UMAC_CMD); + } + +- /* UniMac doesn't support ALLMULTI */ +- if (dev->flags & IFF_ALLMULTI) { +- netdev_warn(dev, "ALLMULTI is not supported\n"); +- return; +- } +- + /* update MDF filter */ + i = 0; +- mc = 0; + /* Broadcast */ +- bcmgenet_set_mdf_addr(priv, dev->broadcast, &i, &mc); ++ bcmgenet_set_mdf_addr(priv, dev->broadcast, &i); + /* my own address.*/ +- bcmgenet_set_mdf_addr(priv, dev->dev_addr, &i, &mc); +- /* Unicast list*/ +- if (netdev_uc_count(dev) > (MAX_MC_COUNT - mc)) +- return; ++ bcmgenet_set_mdf_addr(priv, dev->dev_addr, &i); + +- if (!netdev_uc_empty(dev)) +- netdev_for_each_uc_addr(ha, dev) +- bcmgenet_set_mdf_addr(priv, ha->addr, &i, &mc); +- /* Multicast */ +- if (netdev_mc_empty(dev) || netdev_mc_count(dev) >= (MAX_MC_COUNT - mc)) +- return; ++ /* Unicast */ ++ netdev_for_each_uc_addr(ha, dev) ++ bcmgenet_set_mdf_addr(priv, ha->addr, &i); + ++ /* Multicast */ + netdev_for_each_mc_addr(ha, dev) +- bcmgenet_set_mdf_addr(priv, ha->addr, &i, &mc); ++ bcmgenet_set_mdf_addr(priv, ha->addr, &i); ++ ++ /* Enable filters */ ++ reg = GENMASK(MAX_MDF_FILTER - 1, MAX_MDF_FILTER - nfilter); ++ bcmgenet_umac_writel(priv, reg, UMAC_MDF_CTRL); + } + + /* Set the hardware MAC address. */ +diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c +index 8b3495ee2b6e..d097530af78a 100644 +--- a/drivers/net/ethernet/marvell/sky2.c ++++ b/drivers/net/ethernet/marvell/sky2.c +@@ -4933,6 +4933,13 @@ static const struct dmi_system_id msi_blacklist[] = { + DMI_MATCH(DMI_PRODUCT_NAME, "P-79"), + }, + }, ++ { ++ .ident = "ASUS P6T", ++ .matches = { ++ DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), ++ DMI_MATCH(DMI_BOARD_NAME, "P6T"), ++ }, ++ }, + {} + }; + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h +index a80031b2cfaf..9a1a21a8ae45 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h +@@ -294,6 +294,7 @@ enum { + MLX5E_RQ_STATE_ENABLED, + MLX5E_RQ_STATE_AM, + MLX5E_RQ_STATE_NO_CSUM_COMPLETE, ++ MLX5E_RQ_STATE_CSUM_FULL, /* cqe_csum_full hw bit is set */ + }; + + struct mlx5e_cq { +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +index 476dd97f7f2f..f3d98748b211 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +@@ -142,22 +142,20 @@ static int mlx5e_tx_reporter_timeout_recover(struct mlx5e_txqsq *sq) + { + struct mlx5_eq_comp *eq = sq->cq.mcq.eq; + u32 eqe_count; +- int ret; + + netdev_err(sq->channel->netdev, "EQ 0x%x: Cons = 0x%x, irqn = 0x%x\n", + eq->core.eqn, eq->core.cons_index, eq->core.irqn); + + eqe_count = mlx5_eq_poll_irq_disabled(eq); +- ret = eqe_count ? false : true; + if (!eqe_count) { + clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); +- return ret; ++ return -EIO; + } + + netdev_err(sq->channel->netdev, "Recover %d eqes on EQ 0x%x\n", + eqe_count, eq->core.eqn); + sq->channel->stats->eq_rearm++; +- return ret; ++ return 0; + } + + int mlx5e_tx_reporter_timeout(struct mlx5e_txqsq *sq) +@@ -264,13 +262,13 @@ static int mlx5e_tx_reporter_diagnose(struct devlink_health_reporter *reporter, + + err = mlx5_core_query_sq_state(priv->mdev, sq->sqn, &state); + if (err) +- break; ++ goto unlock; + + err = mlx5e_tx_reporter_build_diagnose_output(fmsg, sq->sqn, + state, + netif_xmit_stopped(sq->txq)); + if (err) +- break; ++ goto unlock; + } + err = devlink_fmsg_arr_pair_nest_end(fmsg); + if (err) +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +index 6a8dc73855c9..2793e4036953 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +@@ -948,6 +948,9 @@ static int mlx5e_open_rq(struct mlx5e_channel *c, + if (err) + goto err_destroy_rq; + ++ if (MLX5_CAP_ETH(c->mdev, cqe_checksum_full)) ++ __set_bit(MLX5E_RQ_STATE_CSUM_FULL, &c->rq.state); ++ + if (params->rx_dim_enabled) + __set_bit(MLX5E_RQ_STATE_AM, &c->rq.state); + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +index c3b3002ff62f..e8a3656d631d 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +@@ -829,8 +829,14 @@ static inline void mlx5e_handle_csum(struct net_device *netdev, + if (unlikely(get_ip_proto(skb, network_depth, proto) == IPPROTO_SCTP)) + goto csum_unnecessary; + ++ stats->csum_complete++; + skb->ip_summed = CHECKSUM_COMPLETE; + skb->csum = csum_unfold((__force __sum16)cqe->check_sum); ++ ++ if (test_bit(MLX5E_RQ_STATE_CSUM_FULL, &rq->state)) ++ return; /* CQE csum covers all received bytes */ ++ ++ /* csum might need some fixups ...*/ + if (network_depth > ETH_HLEN) + /* CQE csum is calculated from the IP header and does + * not cover VLAN headers (if present). This will add +@@ -841,7 +847,6 @@ static inline void mlx5e_handle_csum(struct net_device *netdev, + skb->csum); + + mlx5e_skb_padding_csum(skb, network_depth, proto, stats); +- stats->csum_complete++; + return; + } + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +index 4eac42555c7d..5d0783e55f42 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +@@ -698,7 +698,9 @@ static int mlx5_rdma_setup_rn(struct ib_device *ibdev, u8 port_num, + + prof->init(mdev, netdev, prof, ipriv); + +- mlx5e_attach_netdev(epriv); ++ err = mlx5e_attach_netdev(epriv); ++ if (err) ++ goto detach; + netif_carrier_off(netdev); + + /* set rdma_netdev func pointers */ +@@ -714,6 +716,11 @@ static int mlx5_rdma_setup_rn(struct ib_device *ibdev, u8 port_num, + + return 0; + ++detach: ++ prof->cleanup(epriv); ++ if (ipriv->sub_interface) ++ return err; ++ mlx5e_destroy_mdev_resources(mdev); + destroy_ht: + mlx5i_pkey_qpn_ht_cleanup(netdev); + return err; +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c +index 40f4a19b1ce1..5e2cea26f335 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c +@@ -100,27 +100,12 @@ static int mlx5_set_entropy(struct mlx5_tun_entropy *tun_entropy, + */ + if (entropy_flags.gre_calc_supported && + reformat_type == MLX5_REFORMAT_TYPE_L2_TO_NVGRE) { +- /* Other applications may change the global FW entropy +- * calculations settings. Check that the current entropy value +- * is the negative of the updated value. +- */ +- if (entropy_flags.force_enabled && +- enable == entropy_flags.gre_calc_enabled) { +- mlx5_core_warn(tun_entropy->mdev, +- "Unexpected GRE entropy calc setting - expected %d", +- !entropy_flags.gre_calc_enabled); +- return -EOPNOTSUPP; +- } +- err = mlx5_set_port_gre_tun_entropy_calc(tun_entropy->mdev, enable, +- entropy_flags.force_supported); ++ if (!entropy_flags.force_supported) ++ return 0; ++ err = mlx5_set_port_gre_tun_entropy_calc(tun_entropy->mdev, ++ enable, !enable); + if (err) + return err; +- /* if we turn on the entropy we don't need to force it anymore */ +- if (entropy_flags.force_supported && enable) { +- err = mlx5_set_port_gre_tun_entropy_calc(tun_entropy->mdev, 1, 0); +- if (err) +- return err; +- } + } else if (entropy_flags.calc_supported) { + /* Other applications may change the global FW entropy + * calculations settings. Check that the current entropy value +diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c +index 6d176be51a6b..309400fbf69d 100644 +--- a/drivers/net/ethernet/realtek/r8169.c ++++ b/drivers/net/ethernet/realtek/r8169.c +@@ -5241,6 +5241,143 @@ static void rtl_hw_start_8411_2(struct rtl8169_private *tp) + /* disable aspm and clock request before access ephy */ + rtl_hw_aspm_clkreq_enable(tp, false); + rtl_ephy_init(tp, e_info_8411_2, ARRAY_SIZE(e_info_8411_2)); ++ ++ /* The following Realtek-provided magic fixes an issue with the RX unit ++ * getting confused after the PHY having been powered-down. ++ */ ++ r8168_mac_ocp_write(tp, 0xFC28, 0x0000); ++ r8168_mac_ocp_write(tp, 0xFC2A, 0x0000); ++ r8168_mac_ocp_write(tp, 0xFC2C, 0x0000); ++ r8168_mac_ocp_write(tp, 0xFC2E, 0x0000); ++ r8168_mac_ocp_write(tp, 0xFC30, 0x0000); ++ r8168_mac_ocp_write(tp, 0xFC32, 0x0000); ++ r8168_mac_ocp_write(tp, 0xFC34, 0x0000); ++ r8168_mac_ocp_write(tp, 0xFC36, 0x0000); ++ mdelay(3); ++ r8168_mac_ocp_write(tp, 0xFC26, 0x0000); ++ ++ r8168_mac_ocp_write(tp, 0xF800, 0xE008); ++ r8168_mac_ocp_write(tp, 0xF802, 0xE00A); ++ r8168_mac_ocp_write(tp, 0xF804, 0xE00C); ++ r8168_mac_ocp_write(tp, 0xF806, 0xE00E); ++ r8168_mac_ocp_write(tp, 0xF808, 0xE027); ++ r8168_mac_ocp_write(tp, 0xF80A, 0xE04F); ++ r8168_mac_ocp_write(tp, 0xF80C, 0xE05E); ++ r8168_mac_ocp_write(tp, 0xF80E, 0xE065); ++ r8168_mac_ocp_write(tp, 0xF810, 0xC602); ++ r8168_mac_ocp_write(tp, 0xF812, 0xBE00); ++ r8168_mac_ocp_write(tp, 0xF814, 0x0000); ++ r8168_mac_ocp_write(tp, 0xF816, 0xC502); ++ r8168_mac_ocp_write(tp, 0xF818, 0xBD00); ++ r8168_mac_ocp_write(tp, 0xF81A, 0x074C); ++ r8168_mac_ocp_write(tp, 0xF81C, 0xC302); ++ r8168_mac_ocp_write(tp, 0xF81E, 0xBB00); ++ r8168_mac_ocp_write(tp, 0xF820, 0x080A); ++ r8168_mac_ocp_write(tp, 0xF822, 0x6420); ++ r8168_mac_ocp_write(tp, 0xF824, 0x48C2); ++ r8168_mac_ocp_write(tp, 0xF826, 0x8C20); ++ r8168_mac_ocp_write(tp, 0xF828, 0xC516); ++ r8168_mac_ocp_write(tp, 0xF82A, 0x64A4); ++ r8168_mac_ocp_write(tp, 0xF82C, 0x49C0); ++ r8168_mac_ocp_write(tp, 0xF82E, 0xF009); ++ r8168_mac_ocp_write(tp, 0xF830, 0x74A2); ++ r8168_mac_ocp_write(tp, 0xF832, 0x8CA5); ++ r8168_mac_ocp_write(tp, 0xF834, 0x74A0); ++ r8168_mac_ocp_write(tp, 0xF836, 0xC50E); ++ r8168_mac_ocp_write(tp, 0xF838, 0x9CA2); ++ r8168_mac_ocp_write(tp, 0xF83A, 0x1C11); ++ r8168_mac_ocp_write(tp, 0xF83C, 0x9CA0); ++ r8168_mac_ocp_write(tp, 0xF83E, 0xE006); ++ r8168_mac_ocp_write(tp, 0xF840, 0x74F8); ++ r8168_mac_ocp_write(tp, 0xF842, 0x48C4); ++ r8168_mac_ocp_write(tp, 0xF844, 0x8CF8); ++ r8168_mac_ocp_write(tp, 0xF846, 0xC404); ++ r8168_mac_ocp_write(tp, 0xF848, 0xBC00); ++ r8168_mac_ocp_write(tp, 0xF84A, 0xC403); ++ r8168_mac_ocp_write(tp, 0xF84C, 0xBC00); ++ r8168_mac_ocp_write(tp, 0xF84E, 0x0BF2); ++ r8168_mac_ocp_write(tp, 0xF850, 0x0C0A); ++ r8168_mac_ocp_write(tp, 0xF852, 0xE434); ++ r8168_mac_ocp_write(tp, 0xF854, 0xD3C0); ++ r8168_mac_ocp_write(tp, 0xF856, 0x49D9); ++ r8168_mac_ocp_write(tp, 0xF858, 0xF01F); ++ r8168_mac_ocp_write(tp, 0xF85A, 0xC526); ++ r8168_mac_ocp_write(tp, 0xF85C, 0x64A5); ++ r8168_mac_ocp_write(tp, 0xF85E, 0x1400); ++ r8168_mac_ocp_write(tp, 0xF860, 0xF007); ++ r8168_mac_ocp_write(tp, 0xF862, 0x0C01); ++ r8168_mac_ocp_write(tp, 0xF864, 0x8CA5); ++ r8168_mac_ocp_write(tp, 0xF866, 0x1C15); ++ r8168_mac_ocp_write(tp, 0xF868, 0xC51B); ++ r8168_mac_ocp_write(tp, 0xF86A, 0x9CA0); ++ r8168_mac_ocp_write(tp, 0xF86C, 0xE013); ++ r8168_mac_ocp_write(tp, 0xF86E, 0xC519); ++ r8168_mac_ocp_write(tp, 0xF870, 0x74A0); ++ r8168_mac_ocp_write(tp, 0xF872, 0x48C4); ++ r8168_mac_ocp_write(tp, 0xF874, 0x8CA0); ++ r8168_mac_ocp_write(tp, 0xF876, 0xC516); ++ r8168_mac_ocp_write(tp, 0xF878, 0x74A4); ++ r8168_mac_ocp_write(tp, 0xF87A, 0x48C8); ++ r8168_mac_ocp_write(tp, 0xF87C, 0x48CA); ++ r8168_mac_ocp_write(tp, 0xF87E, 0x9CA4); ++ r8168_mac_ocp_write(tp, 0xF880, 0xC512); ++ r8168_mac_ocp_write(tp, 0xF882, 0x1B00); ++ r8168_mac_ocp_write(tp, 0xF884, 0x9BA0); ++ r8168_mac_ocp_write(tp, 0xF886, 0x1B1C); ++ r8168_mac_ocp_write(tp, 0xF888, 0x483F); ++ r8168_mac_ocp_write(tp, 0xF88A, 0x9BA2); ++ r8168_mac_ocp_write(tp, 0xF88C, 0x1B04); ++ r8168_mac_ocp_write(tp, 0xF88E, 0xC508); ++ r8168_mac_ocp_write(tp, 0xF890, 0x9BA0); ++ r8168_mac_ocp_write(tp, 0xF892, 0xC505); ++ r8168_mac_ocp_write(tp, 0xF894, 0xBD00); ++ r8168_mac_ocp_write(tp, 0xF896, 0xC502); ++ r8168_mac_ocp_write(tp, 0xF898, 0xBD00); ++ r8168_mac_ocp_write(tp, 0xF89A, 0x0300); ++ r8168_mac_ocp_write(tp, 0xF89C, 0x051E); ++ r8168_mac_ocp_write(tp, 0xF89E, 0xE434); ++ r8168_mac_ocp_write(tp, 0xF8A0, 0xE018); ++ r8168_mac_ocp_write(tp, 0xF8A2, 0xE092); ++ r8168_mac_ocp_write(tp, 0xF8A4, 0xDE20); ++ r8168_mac_ocp_write(tp, 0xF8A6, 0xD3C0); ++ r8168_mac_ocp_write(tp, 0xF8A8, 0xC50F); ++ r8168_mac_ocp_write(tp, 0xF8AA, 0x76A4); ++ r8168_mac_ocp_write(tp, 0xF8AC, 0x49E3); ++ r8168_mac_ocp_write(tp, 0xF8AE, 0xF007); ++ r8168_mac_ocp_write(tp, 0xF8B0, 0x49C0); ++ r8168_mac_ocp_write(tp, 0xF8B2, 0xF103); ++ r8168_mac_ocp_write(tp, 0xF8B4, 0xC607); ++ r8168_mac_ocp_write(tp, 0xF8B6, 0xBE00); ++ r8168_mac_ocp_write(tp, 0xF8B8, 0xC606); ++ r8168_mac_ocp_write(tp, 0xF8BA, 0xBE00); ++ r8168_mac_ocp_write(tp, 0xF8BC, 0xC602); ++ r8168_mac_ocp_write(tp, 0xF8BE, 0xBE00); ++ r8168_mac_ocp_write(tp, 0xF8C0, 0x0C4C); ++ r8168_mac_ocp_write(tp, 0xF8C2, 0x0C28); ++ r8168_mac_ocp_write(tp, 0xF8C4, 0x0C2C); ++ r8168_mac_ocp_write(tp, 0xF8C6, 0xDC00); ++ r8168_mac_ocp_write(tp, 0xF8C8, 0xC707); ++ r8168_mac_ocp_write(tp, 0xF8CA, 0x1D00); ++ r8168_mac_ocp_write(tp, 0xF8CC, 0x8DE2); ++ r8168_mac_ocp_write(tp, 0xF8CE, 0x48C1); ++ r8168_mac_ocp_write(tp, 0xF8D0, 0xC502); ++ r8168_mac_ocp_write(tp, 0xF8D2, 0xBD00); ++ r8168_mac_ocp_write(tp, 0xF8D4, 0x00AA); ++ r8168_mac_ocp_write(tp, 0xF8D6, 0xE0C0); ++ r8168_mac_ocp_write(tp, 0xF8D8, 0xC502); ++ r8168_mac_ocp_write(tp, 0xF8DA, 0xBD00); ++ r8168_mac_ocp_write(tp, 0xF8DC, 0x0132); ++ ++ r8168_mac_ocp_write(tp, 0xFC26, 0x8000); ++ ++ r8168_mac_ocp_write(tp, 0xFC2A, 0x0743); ++ r8168_mac_ocp_write(tp, 0xFC2C, 0x0801); ++ r8168_mac_ocp_write(tp, 0xFC2E, 0x0BE9); ++ r8168_mac_ocp_write(tp, 0xFC30, 0x02FD); ++ r8168_mac_ocp_write(tp, 0xFC32, 0x0C25); ++ r8168_mac_ocp_write(tp, 0xFC34, 0x00A9); ++ r8168_mac_ocp_write(tp, 0xFC36, 0x012D); ++ + rtl_hw_aspm_clkreq_enable(tp, true); + } + +diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +index f3735d0458eb..ee3a5a4b2042 100644 +--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c ++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +@@ -3058,17 +3058,8 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) + + /* Manage oversized TCP frames for GMAC4 device */ + if (skb_is_gso(skb) && priv->tso) { +- if (skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)) { +- /* +- * There is no way to determine the number of TSO +- * capable Queues. Let's use always the Queue 0 +- * because if TSO is supported then at least this +- * one will be capable. +- */ +- skb_set_queue_mapping(skb, 0); +- ++ if (skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)) + return stmmac_tso_xmit(skb, dev); +- } + } + + if (unlikely(stmmac_tx_avail(priv, queue) < nfrags + 1)) { +@@ -3885,6 +3876,23 @@ static int stmmac_setup_tc(struct net_device *ndev, enum tc_setup_type type, + } + } + ++static u16 stmmac_select_queue(struct net_device *dev, struct sk_buff *skb, ++ struct net_device *sb_dev, ++ select_queue_fallback_t fallback) ++{ ++ if (skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)) { ++ /* ++ * There is no way to determine the number of TSO ++ * capable Queues. Let's use always the Queue 0 ++ * because if TSO is supported then at least this ++ * one will be capable. ++ */ ++ return 0; ++ } ++ ++ return fallback(dev, skb, NULL) % dev->real_num_tx_queues; ++} ++ + static int stmmac_set_mac_address(struct net_device *ndev, void *addr) + { + struct stmmac_priv *priv = netdev_priv(ndev); +@@ -4101,6 +4109,7 @@ static const struct net_device_ops stmmac_netdev_ops = { + .ndo_tx_timeout = stmmac_tx_timeout, + .ndo_do_ioctl = stmmac_ioctl, + .ndo_setup_tc = stmmac_setup_tc, ++ .ndo_select_queue = stmmac_select_queue, + #ifdef CONFIG_NET_POLL_CONTROLLER + .ndo_poll_controller = stmmac_poll_controller, + #endif +diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c +index e7d8884b1a10..e60a620f9e31 100644 +--- a/drivers/net/hyperv/netvsc_drv.c ++++ b/drivers/net/hyperv/netvsc_drv.c +@@ -849,7 +849,6 @@ int netvsc_recv_callback(struct net_device *net, + + if (unlikely(!skb)) { + ++net_device_ctx->eth_stats.rx_no_memory; +- rcu_read_unlock(); + return NVSP_STAT_FAIL; + } + +diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c +index 64a982563d59..bb65eaccbfad 100644 +--- a/drivers/net/macsec.c ++++ b/drivers/net/macsec.c +@@ -869,6 +869,7 @@ static void macsec_reset_skb(struct sk_buff *skb, struct net_device *dev) + + static void macsec_finalize_skb(struct sk_buff *skb, u8 icv_len, u8 hdr_len) + { ++ skb->ip_summed = CHECKSUM_NONE; + memmove(skb->data + hdr_len, skb->data, 2 * ETH_ALEN); + skb_pull(skb, hdr_len); + pskb_trim_unique(skb, skb->len - icv_len); +@@ -1103,10 +1104,9 @@ static rx_handler_result_t macsec_handle_frame(struct sk_buff **pskb) + } + + skb = skb_unshare(skb, GFP_ATOMIC); +- if (!skb) { +- *pskb = NULL; ++ *pskb = skb; ++ if (!skb) + return RX_HANDLER_CONSUMED; +- } + + pulled_sci = pskb_may_pull(skb, macsec_extra_len(true)); + if (!pulled_sci) { +diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c +index b6efd2d41dce..be0271a51b0a 100644 +--- a/drivers/net/phy/sfp.c ++++ b/drivers/net/phy/sfp.c +@@ -515,7 +515,7 @@ static int sfp_hwmon_read_sensor(struct sfp *sfp, int reg, long *value) + + static void sfp_hwmon_to_rx_power(long *value) + { +- *value = DIV_ROUND_CLOSEST(*value, 100); ++ *value = DIV_ROUND_CLOSEST(*value, 10); + } + + static void sfp_hwmon_calibrate(struct sfp *sfp, unsigned int slope, int offset, +diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c +index 9ee4d7402ca2..b4ac87aa09fd 100644 +--- a/drivers/net/vrf.c ++++ b/drivers/net/vrf.c +@@ -169,23 +169,29 @@ static int vrf_ip6_local_out(struct net *net, struct sock *sk, + static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb, + struct net_device *dev) + { +- const struct ipv6hdr *iph = ipv6_hdr(skb); ++ const struct ipv6hdr *iph; + struct net *net = dev_net(skb->dev); +- struct flowi6 fl6 = { +- /* needed to match OIF rule */ +- .flowi6_oif = dev->ifindex, +- .flowi6_iif = LOOPBACK_IFINDEX, +- .daddr = iph->daddr, +- .saddr = iph->saddr, +- .flowlabel = ip6_flowinfo(iph), +- .flowi6_mark = skb->mark, +- .flowi6_proto = iph->nexthdr, +- .flowi6_flags = FLOWI_FLAG_SKIP_NH_OIF, +- }; ++ struct flowi6 fl6; + int ret = NET_XMIT_DROP; + struct dst_entry *dst; + struct dst_entry *dst_null = &net->ipv6.ip6_null_entry->dst; + ++ if (!pskb_may_pull(skb, ETH_HLEN + sizeof(struct ipv6hdr))) ++ goto err; ++ ++ iph = ipv6_hdr(skb); ++ ++ memset(&fl6, 0, sizeof(fl6)); ++ /* needed to match OIF rule */ ++ fl6.flowi6_oif = dev->ifindex; ++ fl6.flowi6_iif = LOOPBACK_IFINDEX; ++ fl6.daddr = iph->daddr; ++ fl6.saddr = iph->saddr; ++ fl6.flowlabel = ip6_flowinfo(iph); ++ fl6.flowi6_mark = skb->mark; ++ fl6.flowi6_proto = iph->nexthdr; ++ fl6.flowi6_flags = FLOWI_FLAG_SKIP_NH_OIF; ++ + dst = ip6_route_output(net, NULL, &fl6); + if (dst == dst_null) + goto err; +@@ -241,21 +247,27 @@ static int vrf_ip_local_out(struct net *net, struct sock *sk, + static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb, + struct net_device *vrf_dev) + { +- struct iphdr *ip4h = ip_hdr(skb); ++ struct iphdr *ip4h; + int ret = NET_XMIT_DROP; +- struct flowi4 fl4 = { +- /* needed to match OIF rule */ +- .flowi4_oif = vrf_dev->ifindex, +- .flowi4_iif = LOOPBACK_IFINDEX, +- .flowi4_tos = RT_TOS(ip4h->tos), +- .flowi4_flags = FLOWI_FLAG_ANYSRC | FLOWI_FLAG_SKIP_NH_OIF, +- .flowi4_proto = ip4h->protocol, +- .daddr = ip4h->daddr, +- .saddr = ip4h->saddr, +- }; ++ struct flowi4 fl4; + struct net *net = dev_net(vrf_dev); + struct rtable *rt; + ++ if (!pskb_may_pull(skb, ETH_HLEN + sizeof(struct iphdr))) ++ goto err; ++ ++ ip4h = ip_hdr(skb); ++ ++ memset(&fl4, 0, sizeof(fl4)); ++ /* needed to match OIF rule */ ++ fl4.flowi4_oif = vrf_dev->ifindex; ++ fl4.flowi4_iif = LOOPBACK_IFINDEX; ++ fl4.flowi4_tos = RT_TOS(ip4h->tos); ++ fl4.flowi4_flags = FLOWI_FLAG_ANYSRC | FLOWI_FLAG_SKIP_NH_OIF; ++ fl4.flowi4_proto = ip4h->protocol; ++ fl4.daddr = ip4h->daddr; ++ fl4.saddr = ip4h->saddr; ++ + rt = ip_route_output_flow(net, &fl4, NULL); + if (IS_ERR(rt)) + goto err; +diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c +index bd70843940dc..07be36aca0da 100644 +--- a/drivers/scsi/sd_zbc.c ++++ b/drivers/scsi/sd_zbc.c +@@ -23,6 +23,8 @@ + */ + + #include <linux/blkdev.h> ++#include <linux/vmalloc.h> ++#include <linux/sched/mm.h> + + #include <asm/unaligned.h> + +@@ -64,7 +66,7 @@ static void sd_zbc_parse_report(struct scsi_disk *sdkp, u8 *buf, + /** + * sd_zbc_do_report_zones - Issue a REPORT ZONES scsi command. + * @sdkp: The target disk +- * @buf: Buffer to use for the reply ++ * @buf: vmalloc-ed buffer to use for the reply + * @buflen: the buffer size + * @lba: Start LBA of the report + * @partial: Do partial report +@@ -93,7 +95,6 @@ static int sd_zbc_do_report_zones(struct scsi_disk *sdkp, unsigned char *buf, + put_unaligned_be32(buflen, &cmd[10]); + if (partial) + cmd[14] = ZBC_REPORT_ZONE_PARTIAL; +- memset(buf, 0, buflen); + + result = scsi_execute_req(sdp, cmd, DMA_FROM_DEVICE, + buf, buflen, &sshdr, +@@ -117,6 +118,53 @@ static int sd_zbc_do_report_zones(struct scsi_disk *sdkp, unsigned char *buf, + return 0; + } + ++/* ++ * Maximum number of zones to get with one report zones command. ++ */ ++#define SD_ZBC_REPORT_MAX_ZONES 8192U ++ ++/** ++ * Allocate a buffer for report zones reply. ++ * @sdkp: The target disk ++ * @nr_zones: Maximum number of zones to report ++ * @buflen: Size of the buffer allocated ++ * ++ * Try to allocate a reply buffer for the number of requested zones. ++ * The size of the buffer allocated may be smaller than requested to ++ * satify the device constraint (max_hw_sectors, max_segments, etc). ++ * ++ * Return the address of the allocated buffer and update @buflen with ++ * the size of the allocated buffer. ++ */ ++static void *sd_zbc_alloc_report_buffer(struct scsi_disk *sdkp, ++ unsigned int nr_zones, size_t *buflen) ++{ ++ struct request_queue *q = sdkp->disk->queue; ++ size_t bufsize; ++ void *buf; ++ ++ /* ++ * Report zone buffer size should be at most 64B times the number of ++ * zones requested plus the 64B reply header, but should be at least ++ * SECTOR_SIZE for ATA devices. ++ * Make sure that this size does not exceed the hardware capabilities. ++ * Furthermore, since the report zone command cannot be split, make ++ * sure that the allocated buffer can always be mapped by limiting the ++ * number of pages allocated to the HBA max segments limit. ++ */ ++ nr_zones = min(nr_zones, SD_ZBC_REPORT_MAX_ZONES); ++ bufsize = roundup((nr_zones + 1) * 64, 512); ++ bufsize = min_t(size_t, bufsize, ++ queue_max_hw_sectors(q) << SECTOR_SHIFT); ++ bufsize = min_t(size_t, bufsize, queue_max_segments(q) << PAGE_SHIFT); ++ ++ buf = vzalloc(bufsize); ++ if (buf) ++ *buflen = bufsize; ++ ++ return buf; ++} ++ + /** + * sd_zbc_report_zones - Disk report zones operation. + * @disk: The target disk +@@ -132,30 +180,23 @@ int sd_zbc_report_zones(struct gendisk *disk, sector_t sector, + gfp_t gfp_mask) + { + struct scsi_disk *sdkp = scsi_disk(disk); +- unsigned int i, buflen, nrz = *nr_zones; ++ unsigned int i, nrz = *nr_zones; + unsigned char *buf; +- size_t offset = 0; ++ size_t buflen = 0, offset = 0; + int ret = 0; + + if (!sd_is_zoned(sdkp)) + /* Not a zoned device */ + return -EOPNOTSUPP; + +- /* +- * Get a reply buffer for the number of requested zones plus a header, +- * without exceeding the device maximum command size. For ATA disks, +- * buffers must be aligned to 512B. +- */ +- buflen = min(queue_max_hw_sectors(disk->queue) << 9, +- roundup((nrz + 1) * 64, 512)); +- buf = kmalloc(buflen, gfp_mask); ++ buf = sd_zbc_alloc_report_buffer(sdkp, nrz, &buflen); + if (!buf) + return -ENOMEM; + + ret = sd_zbc_do_report_zones(sdkp, buf, buflen, + sectors_to_logical(sdkp->device, sector), true); + if (ret) +- goto out_free_buf; ++ goto out; + + nrz = min(nrz, get_unaligned_be32(&buf[0]) / 64); + for (i = 0; i < nrz; i++) { +@@ -166,8 +207,8 @@ int sd_zbc_report_zones(struct gendisk *disk, sector_t sector, + + *nr_zones = nrz; + +-out_free_buf: +- kfree(buf); ++out: ++ kvfree(buf); + + return ret; + } +@@ -301,8 +342,6 @@ static int sd_zbc_check_zoned_characteristics(struct scsi_disk *sdkp, + return 0; + } + +-#define SD_ZBC_BUF_SIZE 131072U +- + /** + * sd_zbc_check_zones - Check the device capacity and zone sizes + * @sdkp: Target disk +@@ -318,22 +357,28 @@ static int sd_zbc_check_zoned_characteristics(struct scsi_disk *sdkp, + */ + static int sd_zbc_check_zones(struct scsi_disk *sdkp, u32 *zblocks) + { ++ size_t bufsize, buflen; ++ unsigned int noio_flag; + u64 zone_blocks = 0; + sector_t max_lba, block = 0; + unsigned char *buf; + unsigned char *rec; +- unsigned int buf_len; +- unsigned int list_length; + int ret; + u8 same; + ++ /* Do all memory allocations as if GFP_NOIO was specified */ ++ noio_flag = memalloc_noio_save(); ++ + /* Get a buffer */ +- buf = kmalloc(SD_ZBC_BUF_SIZE, GFP_KERNEL); +- if (!buf) +- return -ENOMEM; ++ buf = sd_zbc_alloc_report_buffer(sdkp, SD_ZBC_REPORT_MAX_ZONES, ++ &bufsize); ++ if (!buf) { ++ ret = -ENOMEM; ++ goto out; ++ } + + /* Do a report zone to get max_lba and the same field */ +- ret = sd_zbc_do_report_zones(sdkp, buf, SD_ZBC_BUF_SIZE, 0, false); ++ ret = sd_zbc_do_report_zones(sdkp, buf, bufsize, 0, false); + if (ret) + goto out_free; + +@@ -369,12 +414,12 @@ static int sd_zbc_check_zones(struct scsi_disk *sdkp, u32 *zblocks) + do { + + /* Parse REPORT ZONES header */ +- list_length = get_unaligned_be32(&buf[0]) + 64; ++ buflen = min_t(size_t, get_unaligned_be32(&buf[0]) + 64, ++ bufsize); + rec = buf + 64; +- buf_len = min(list_length, SD_ZBC_BUF_SIZE); + + /* Parse zone descriptors */ +- while (rec < buf + buf_len) { ++ while (rec < buf + buflen) { + u64 this_zone_blocks = get_unaligned_be64(&rec[8]); + + if (zone_blocks == 0) { +@@ -390,8 +435,8 @@ static int sd_zbc_check_zones(struct scsi_disk *sdkp, u32 *zblocks) + } + + if (block < sdkp->capacity) { +- ret = sd_zbc_do_report_zones(sdkp, buf, SD_ZBC_BUF_SIZE, +- block, true); ++ ret = sd_zbc_do_report_zones(sdkp, buf, bufsize, block, ++ true); + if (ret) + goto out_free; + } +@@ -422,7 +467,8 @@ out: + } + + out_free: +- kfree(buf); ++ memalloc_noio_restore(noio_flag); ++ kvfree(buf); + + return ret; + } +diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c +index 0ccd51f72048..44c2fff2a8b7 100644 +--- a/fs/ext4/dir.c ++++ b/fs/ext4/dir.c +@@ -108,7 +108,6 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) + struct inode *inode = file_inode(file); + struct super_block *sb = inode->i_sb; + struct buffer_head *bh = NULL; +- int dir_has_error = 0; + struct fscrypt_str fstr = FSTR_INIT(NULL, 0); + + if (IS_ENCRYPTED(inode)) { +@@ -144,8 +143,6 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) + return err; + } + +- offset = ctx->pos & (sb->s_blocksize - 1); +- + while (ctx->pos < inode->i_size) { + struct ext4_map_blocks map; + +@@ -154,9 +151,18 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) + goto errout; + } + cond_resched(); ++ offset = ctx->pos & (sb->s_blocksize - 1); + map.m_lblk = ctx->pos >> EXT4_BLOCK_SIZE_BITS(sb); + map.m_len = 1; + err = ext4_map_blocks(NULL, inode, &map, 0); ++ if (err == 0) { ++ /* m_len should never be zero but let's avoid ++ * an infinite loop if it somehow is */ ++ if (map.m_len == 0) ++ map.m_len = 1; ++ ctx->pos += map.m_len * sb->s_blocksize; ++ continue; ++ } + if (err > 0) { + pgoff_t index = map.m_pblk >> + (PAGE_SHIFT - inode->i_blkbits); +@@ -175,13 +181,6 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) + } + + if (!bh) { +- if (!dir_has_error) { +- EXT4_ERROR_FILE(file, 0, +- "directory contains a " +- "hole at offset %llu", +- (unsigned long long) ctx->pos); +- dir_has_error = 1; +- } + /* corrupt size? Maybe no more blocks to read */ + if (ctx->pos > inode->i_blocks << 9) + break; +diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h +index 75a5309f2231..ef8fcf7d0d3b 100644 +--- a/fs/ext4/ext4_jbd2.h ++++ b/fs/ext4/ext4_jbd2.h +@@ -361,20 +361,20 @@ static inline int ext4_journal_force_commit(journal_t *journal) + } + + static inline int ext4_jbd2_inode_add_write(handle_t *handle, +- struct inode *inode) ++ struct inode *inode, loff_t start_byte, loff_t length) + { + if (ext4_handle_valid(handle)) +- return jbd2_journal_inode_add_write(handle, +- EXT4_I(inode)->jinode); ++ return jbd2_journal_inode_ranged_write(handle, ++ EXT4_I(inode)->jinode, start_byte, length); + return 0; + } + + static inline int ext4_jbd2_inode_add_wait(handle_t *handle, +- struct inode *inode) ++ struct inode *inode, loff_t start_byte, loff_t length) + { + if (ext4_handle_valid(handle)) +- return jbd2_journal_inode_add_wait(handle, +- EXT4_I(inode)->jinode); ++ return jbd2_journal_inode_ranged_wait(handle, ++ EXT4_I(inode)->jinode, start_byte, length); + return 0; + } + +diff --git a/fs/ext4/file.c b/fs/ext4/file.c +index 2c5baa5e8291..f4a24a46245e 100644 +--- a/fs/ext4/file.c ++++ b/fs/ext4/file.c +@@ -165,6 +165,10 @@ static ssize_t ext4_write_checks(struct kiocb *iocb, struct iov_iter *from) + ret = generic_write_checks(iocb, from); + if (ret <= 0) + return ret; ++ ++ if (unlikely(IS_IMMUTABLE(inode))) ++ return -EPERM; ++ + /* + * If we have encountered a bitmap-format file, the size limit + * is smaller than s_maxbytes, which is for extent-mapped files. +diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c +index 7fd2d14dc27c..aa1987b23ffb 100644 +--- a/fs/ext4/inode.c ++++ b/fs/ext4/inode.c +@@ -727,10 +727,16 @@ out_sem: + !(flags & EXT4_GET_BLOCKS_ZERO) && + !ext4_is_quota_file(inode) && + ext4_should_order_data(inode)) { ++ loff_t start_byte = ++ (loff_t)map->m_lblk << inode->i_blkbits; ++ loff_t length = (loff_t)map->m_len << inode->i_blkbits; ++ + if (flags & EXT4_GET_BLOCKS_IO_SUBMIT) +- ret = ext4_jbd2_inode_add_wait(handle, inode); ++ ret = ext4_jbd2_inode_add_wait(handle, inode, ++ start_byte, length); + else +- ret = ext4_jbd2_inode_add_write(handle, inode); ++ ret = ext4_jbd2_inode_add_write(handle, inode, ++ start_byte, length); + if (ret) + return ret; + } +@@ -4081,7 +4087,8 @@ static int __ext4_block_zero_page_range(handle_t *handle, + err = 0; + mark_buffer_dirty(bh); + if (ext4_should_order_data(inode)) +- err = ext4_jbd2_inode_add_write(handle, inode); ++ err = ext4_jbd2_inode_add_write(handle, inode, from, ++ length); + } + + unlock: +@@ -5514,6 +5521,14 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) + if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) + return -EIO; + ++ if (unlikely(IS_IMMUTABLE(inode))) ++ return -EPERM; ++ ++ if (unlikely(IS_APPEND(inode) && ++ (ia_valid & (ATTR_MODE | ATTR_UID | ++ ATTR_GID | ATTR_TIMES_SET)))) ++ return -EPERM; ++ + error = setattr_prepare(dentry, attr); + if (error) + return error; +@@ -6184,6 +6199,9 @@ vm_fault_t ext4_page_mkwrite(struct vm_fault *vmf) + get_block_t *get_block; + int retries = 0; + ++ if (unlikely(IS_IMMUTABLE(inode))) ++ return VM_FAULT_SIGBUS; ++ + sb_start_pagefault(inode->i_sb); + file_update_time(vma->vm_file); + +diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c +index 20faa6a69238..c8fa2d140325 100644 +--- a/fs/ext4/ioctl.c ++++ b/fs/ext4/ioctl.c +@@ -269,6 +269,29 @@ static int uuid_is_zero(__u8 u[16]) + } + #endif + ++/* ++ * If immutable is set and we are not clearing it, we're not allowed to change ++ * anything else in the inode. Don't error out if we're only trying to set ++ * immutable on an immutable file. ++ */ ++static int ext4_ioctl_check_immutable(struct inode *inode, __u32 new_projid, ++ unsigned int flags) ++{ ++ struct ext4_inode_info *ei = EXT4_I(inode); ++ unsigned int oldflags = ei->i_flags; ++ ++ if (!(oldflags & EXT4_IMMUTABLE_FL) || !(flags & EXT4_IMMUTABLE_FL)) ++ return 0; ++ ++ if ((oldflags & ~EXT4_IMMUTABLE_FL) != (flags & ~EXT4_IMMUTABLE_FL)) ++ return -EPERM; ++ if (ext4_has_feature_project(inode->i_sb) && ++ __kprojid_val(ei->i_projid) != new_projid) ++ return -EPERM; ++ ++ return 0; ++} ++ + static int ext4_ioctl_setflags(struct inode *inode, + unsigned int flags) + { +@@ -322,6 +345,20 @@ static int ext4_ioctl_setflags(struct inode *inode, + goto flags_out; + } + ++ /* ++ * Wait for all pending directio and then flush all the dirty pages ++ * for this file. The flush marks all the pages readonly, so any ++ * subsequent attempt to write to the file (particularly mmap pages) ++ * will come through the filesystem and fail. ++ */ ++ if (S_ISREG(inode->i_mode) && !IS_IMMUTABLE(inode) && ++ (flags & EXT4_IMMUTABLE_FL)) { ++ inode_dio_wait(inode); ++ err = filemap_write_and_wait(inode->i_mapping); ++ if (err) ++ goto flags_out; ++ } ++ + handle = ext4_journal_start(inode, EXT4_HT_INODE, 1); + if (IS_ERR(handle)) { + err = PTR_ERR(handle); +@@ -751,7 +788,11 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) + return err; + + inode_lock(inode); +- err = ext4_ioctl_setflags(inode, flags); ++ err = ext4_ioctl_check_immutable(inode, ++ from_kprojid(&init_user_ns, ei->i_projid), ++ flags); ++ if (!err) ++ err = ext4_ioctl_setflags(inode, flags); + inode_unlock(inode); + mnt_drop_write_file(filp); + return err; +@@ -1121,6 +1162,9 @@ resizefs_out: + goto out; + flags = (ei->i_flags & ~EXT4_FL_XFLAG_VISIBLE) | + (flags & EXT4_FL_XFLAG_VISIBLE); ++ err = ext4_ioctl_check_immutable(inode, fa.fsx_projid, flags); ++ if (err) ++ goto out; + err = ext4_ioctl_setflags(inode, flags); + if (err) + goto out; +diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c +index 1083a9f3f16a..c7ded4e2adff 100644 +--- a/fs/ext4/move_extent.c ++++ b/fs/ext4/move_extent.c +@@ -390,7 +390,8 @@ data_copy: + + /* Even in case of data=writeback it is reasonable to pin + * inode to transaction, to prevent unexpected data loss */ +- *err = ext4_jbd2_inode_add_write(handle, orig_inode); ++ *err = ext4_jbd2_inode_add_write(handle, orig_inode, ++ (loff_t)orig_page_offset << PAGE_SHIFT, replaced_size); + + unlock_pages: + unlock_page(pagep[0]); +diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c +index 5d9ffa8efbfd..27b1fb2612bc 100644 +--- a/fs/ext4/namei.c ++++ b/fs/ext4/namei.c +@@ -81,8 +81,18 @@ static struct buffer_head *ext4_append(handle_t *handle, + static int ext4_dx_csum_verify(struct inode *inode, + struct ext4_dir_entry *dirent); + ++/* ++ * Hints to ext4_read_dirblock regarding whether we expect a directory ++ * block being read to be an index block, or a block containing ++ * directory entries (and if the latter, whether it was found via a ++ * logical block in an htree index block). This is used to control ++ * what sort of sanity checkinig ext4_read_dirblock() will do on the ++ * directory block read from the storage device. EITHER will means ++ * the caller doesn't know what kind of directory block will be read, ++ * so no specific verification will be done. ++ */ + typedef enum { +- EITHER, INDEX, DIRENT ++ EITHER, INDEX, DIRENT, DIRENT_HTREE + } dirblock_type_t; + + #define ext4_read_dirblock(inode, block, type) \ +@@ -108,11 +118,14 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode, + + return bh; + } +- if (!bh) { ++ if (!bh && (type == INDEX || type == DIRENT_HTREE)) { + ext4_error_inode(inode, func, line, block, +- "Directory hole found"); ++ "Directory hole found for htree %s block", ++ (type == INDEX) ? "index" : "leaf"); + return ERR_PTR(-EFSCORRUPTED); + } ++ if (!bh) ++ return NULL; + dirent = (struct ext4_dir_entry *) bh->b_data; + /* Determine whether or not we have an index block */ + if (is_dx(inode)) { +@@ -979,7 +992,7 @@ static int htree_dirblock_to_tree(struct file *dir_file, + + dxtrace(printk(KERN_INFO "In htree dirblock_to_tree: block %lu\n", + (unsigned long)block)); +- bh = ext4_read_dirblock(dir, block, DIRENT); ++ bh = ext4_read_dirblock(dir, block, DIRENT_HTREE); + if (IS_ERR(bh)) + return PTR_ERR(bh); + +@@ -1509,7 +1522,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, + return (struct buffer_head *) frame; + do { + block = dx_get_block(frame->at); +- bh = ext4_read_dirblock(dir, block, DIRENT); ++ bh = ext4_read_dirblock(dir, block, DIRENT_HTREE); + if (IS_ERR(bh)) + goto errout; + +@@ -2079,6 +2092,11 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, + blocks = dir->i_size >> sb->s_blocksize_bits; + for (block = 0; block < blocks; block++) { + bh = ext4_read_dirblock(dir, block, DIRENT); ++ if (bh == NULL) { ++ bh = ext4_bread(handle, dir, block, ++ EXT4_GET_BLOCKS_CREATE); ++ goto add_to_new_block; ++ } + if (IS_ERR(bh)) { + retval = PTR_ERR(bh); + bh = NULL; +@@ -2099,6 +2117,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, + brelse(bh); + } + bh = ext4_append(handle, dir, &block); ++add_to_new_block: + if (IS_ERR(bh)) { + retval = PTR_ERR(bh); + bh = NULL; +@@ -2143,7 +2162,7 @@ again: + return PTR_ERR(frame); + entries = frame->entries; + at = frame->at; +- bh = ext4_read_dirblock(dir, dx_get_block(frame->at), DIRENT); ++ bh = ext4_read_dirblock(dir, dx_get_block(frame->at), DIRENT_HTREE); + if (IS_ERR(bh)) { + err = PTR_ERR(bh); + bh = NULL; +@@ -2691,7 +2710,10 @@ bool ext4_empty_dir(struct inode *inode) + EXT4_ERROR_INODE(inode, "invalid size"); + return true; + } +- bh = ext4_read_dirblock(inode, 0, EITHER); ++ /* The first directory block must not be a hole, ++ * so treat it as DIRENT_HTREE ++ */ ++ bh = ext4_read_dirblock(inode, 0, DIRENT_HTREE); + if (IS_ERR(bh)) + return true; + +@@ -2713,6 +2735,10 @@ bool ext4_empty_dir(struct inode *inode) + brelse(bh); + lblock = offset >> EXT4_BLOCK_SIZE_BITS(sb); + bh = ext4_read_dirblock(inode, lblock, EITHER); ++ if (bh == NULL) { ++ offset += sb->s_blocksize; ++ continue; ++ } + if (IS_ERR(bh)) + return true; + de = (struct ext4_dir_entry_2 *) bh->b_data; +@@ -3256,7 +3282,10 @@ static struct buffer_head *ext4_get_first_dir_block(handle_t *handle, + struct buffer_head *bh; + + if (!ext4_has_inline_data(inode)) { +- bh = ext4_read_dirblock(inode, 0, EITHER); ++ /* The first directory block must not be a hole, so ++ * treat it as DIRENT_HTREE ++ */ ++ bh = ext4_read_dirblock(inode, 0, DIRENT_HTREE); + if (IS_ERR(bh)) { + *retval = PTR_ERR(bh); + return NULL; +diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c +index efd0ce9489ae..668f9021cf11 100644 +--- a/fs/jbd2/commit.c ++++ b/fs/jbd2/commit.c +@@ -187,14 +187,15 @@ static int journal_wait_on_commit_record(journal_t *journal, + * use writepages() because with dealyed allocation we may be doing + * block allocation in writepages(). + */ +-static int journal_submit_inode_data_buffers(struct address_space *mapping) ++static int journal_submit_inode_data_buffers(struct address_space *mapping, ++ loff_t dirty_start, loff_t dirty_end) + { + int ret; + struct writeback_control wbc = { + .sync_mode = WB_SYNC_ALL, + .nr_to_write = mapping->nrpages * 2, +- .range_start = 0, +- .range_end = i_size_read(mapping->host), ++ .range_start = dirty_start, ++ .range_end = dirty_end, + }; + + ret = generic_writepages(mapping, &wbc); +@@ -218,6 +219,9 @@ static int journal_submit_data_buffers(journal_t *journal, + + spin_lock(&journal->j_list_lock); + list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) { ++ loff_t dirty_start = jinode->i_dirty_start; ++ loff_t dirty_end = jinode->i_dirty_end; ++ + if (!(jinode->i_flags & JI_WRITE_DATA)) + continue; + mapping = jinode->i_vfs_inode->i_mapping; +@@ -230,7 +234,8 @@ static int journal_submit_data_buffers(journal_t *journal, + * only allocated blocks here. + */ + trace_jbd2_submit_inode_data(jinode->i_vfs_inode); +- err = journal_submit_inode_data_buffers(mapping); ++ err = journal_submit_inode_data_buffers(mapping, dirty_start, ++ dirty_end); + if (!ret) + ret = err; + spin_lock(&journal->j_list_lock); +@@ -257,12 +262,16 @@ static int journal_finish_inode_data_buffers(journal_t *journal, + /* For locking, see the comment in journal_submit_data_buffers() */ + spin_lock(&journal->j_list_lock); + list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) { ++ loff_t dirty_start = jinode->i_dirty_start; ++ loff_t dirty_end = jinode->i_dirty_end; ++ + if (!(jinode->i_flags & JI_WAIT_DATA)) + continue; + jinode->i_flags |= JI_COMMIT_RUNNING; + spin_unlock(&journal->j_list_lock); +- err = filemap_fdatawait_keep_errors( +- jinode->i_vfs_inode->i_mapping); ++ err = filemap_fdatawait_range_keep_errors( ++ jinode->i_vfs_inode->i_mapping, dirty_start, ++ dirty_end); + if (!ret) + ret = err; + spin_lock(&journal->j_list_lock); +@@ -282,6 +291,8 @@ static int journal_finish_inode_data_buffers(journal_t *journal, + &jinode->i_transaction->t_inode_list); + } else { + jinode->i_transaction = NULL; ++ jinode->i_dirty_start = 0; ++ jinode->i_dirty_end = 0; + } + } + spin_unlock(&journal->j_list_lock); +diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c +index 43df0c943229..e0382067c824 100644 +--- a/fs/jbd2/journal.c ++++ b/fs/jbd2/journal.c +@@ -94,6 +94,8 @@ EXPORT_SYMBOL(jbd2_journal_try_to_free_buffers); + EXPORT_SYMBOL(jbd2_journal_force_commit); + EXPORT_SYMBOL(jbd2_journal_inode_add_write); + EXPORT_SYMBOL(jbd2_journal_inode_add_wait); ++EXPORT_SYMBOL(jbd2_journal_inode_ranged_write); ++EXPORT_SYMBOL(jbd2_journal_inode_ranged_wait); + EXPORT_SYMBOL(jbd2_journal_init_jbd_inode); + EXPORT_SYMBOL(jbd2_journal_release_jbd_inode); + EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate); +@@ -2574,6 +2576,8 @@ void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode) + jinode->i_next_transaction = NULL; + jinode->i_vfs_inode = inode; + jinode->i_flags = 0; ++ jinode->i_dirty_start = 0; ++ jinode->i_dirty_end = 0; + INIT_LIST_HEAD(&jinode->i_list); + } + +diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c +index 8ca4fddc705f..990e7b5062e7 100644 +--- a/fs/jbd2/transaction.c ++++ b/fs/jbd2/transaction.c +@@ -2565,7 +2565,7 @@ void jbd2_journal_refile_buffer(journal_t *journal, struct journal_head *jh) + * File inode in the inode list of the handle's transaction + */ + static int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode, +- unsigned long flags) ++ unsigned long flags, loff_t start_byte, loff_t end_byte) + { + transaction_t *transaction = handle->h_transaction; + journal_t *journal; +@@ -2577,26 +2577,17 @@ static int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode, + jbd_debug(4, "Adding inode %lu, tid:%d\n", jinode->i_vfs_inode->i_ino, + transaction->t_tid); + +- /* +- * First check whether inode isn't already on the transaction's +- * lists without taking the lock. Note that this check is safe +- * without the lock as we cannot race with somebody removing inode +- * from the transaction. The reason is that we remove inode from the +- * transaction only in journal_release_jbd_inode() and when we commit +- * the transaction. We are guarded from the first case by holding +- * a reference to the inode. We are safe against the second case +- * because if jinode->i_transaction == transaction, commit code +- * cannot touch the transaction because we hold reference to it, +- * and if jinode->i_next_transaction == transaction, commit code +- * will only file the inode where we want it. +- */ +- if ((jinode->i_transaction == transaction || +- jinode->i_next_transaction == transaction) && +- (jinode->i_flags & flags) == flags) +- return 0; +- + spin_lock(&journal->j_list_lock); + jinode->i_flags |= flags; ++ ++ if (jinode->i_dirty_end) { ++ jinode->i_dirty_start = min(jinode->i_dirty_start, start_byte); ++ jinode->i_dirty_end = max(jinode->i_dirty_end, end_byte); ++ } else { ++ jinode->i_dirty_start = start_byte; ++ jinode->i_dirty_end = end_byte; ++ } ++ + /* Is inode already attached where we need it? */ + if (jinode->i_transaction == transaction || + jinode->i_next_transaction == transaction) +@@ -2631,12 +2622,28 @@ done: + int jbd2_journal_inode_add_write(handle_t *handle, struct jbd2_inode *jinode) + { + return jbd2_journal_file_inode(handle, jinode, +- JI_WRITE_DATA | JI_WAIT_DATA); ++ JI_WRITE_DATA | JI_WAIT_DATA, 0, LLONG_MAX); + } + + int jbd2_journal_inode_add_wait(handle_t *handle, struct jbd2_inode *jinode) + { +- return jbd2_journal_file_inode(handle, jinode, JI_WAIT_DATA); ++ return jbd2_journal_file_inode(handle, jinode, JI_WAIT_DATA, 0, ++ LLONG_MAX); ++} ++ ++int jbd2_journal_inode_ranged_write(handle_t *handle, ++ struct jbd2_inode *jinode, loff_t start_byte, loff_t length) ++{ ++ return jbd2_journal_file_inode(handle, jinode, ++ JI_WRITE_DATA | JI_WAIT_DATA, start_byte, ++ start_byte + length - 1); ++} ++ ++int jbd2_journal_inode_ranged_wait(handle_t *handle, struct jbd2_inode *jinode, ++ loff_t start_byte, loff_t length) ++{ ++ return jbd2_journal_file_inode(handle, jinode, JI_WAIT_DATA, ++ start_byte, start_byte + length - 1); + } + + /* +diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h +index 08a84d130120..1f270be8204f 100644 +--- a/include/linux/blkdev.h ++++ b/include/linux/blkdev.h +@@ -344,6 +344,11 @@ struct queue_limits { + + #ifdef CONFIG_BLK_DEV_ZONED + ++/* ++ * Maximum number of zones to report with a single report zones command. ++ */ ++#define BLK_ZONED_REPORT_MAX_ZONES 8192U ++ + extern unsigned int blkdev_nr_zones(struct block_device *bdev); + extern int blkdev_report_zones(struct block_device *bdev, + sector_t sector, struct blk_zone *zones, +diff --git a/include/linux/fs.h b/include/linux/fs.h +index dd28e7679089..c26d24caeb14 100644 +--- a/include/linux/fs.h ++++ b/include/linux/fs.h +@@ -2703,6 +2703,8 @@ extern int filemap_flush(struct address_space *); + extern int filemap_fdatawait_keep_errors(struct address_space *mapping); + extern int filemap_fdatawait_range(struct address_space *, loff_t lstart, + loff_t lend); ++extern int filemap_fdatawait_range_keep_errors(struct address_space *mapping, ++ loff_t start_byte, loff_t end_byte); + + static inline int filemap_fdatawait(struct address_space *mapping) + { +diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h +index 2cf6e04b08fc..f7325f32f78f 100644 +--- a/include/linux/jbd2.h ++++ b/include/linux/jbd2.h +@@ -454,6 +454,22 @@ struct jbd2_inode { + * @i_flags: Flags of inode [j_list_lock] + */ + unsigned long i_flags; ++ ++ /** ++ * @i_dirty_start: ++ * ++ * Offset in bytes where the dirty range for this inode starts. ++ * [j_list_lock] ++ */ ++ loff_t i_dirty_start; ++ ++ /** ++ * @i_dirty_end: ++ * ++ * Inclusive offset in bytes where the dirty range for this inode ++ * ends. [j_list_lock] ++ */ ++ loff_t i_dirty_end; + }; + + struct jbd2_revoke_table_s; +@@ -1400,6 +1416,12 @@ extern int jbd2_journal_force_commit(journal_t *); + extern int jbd2_journal_force_commit_nested(journal_t *); + extern int jbd2_journal_inode_add_write(handle_t *handle, struct jbd2_inode *inode); + extern int jbd2_journal_inode_add_wait(handle_t *handle, struct jbd2_inode *inode); ++extern int jbd2_journal_inode_ranged_write(handle_t *handle, ++ struct jbd2_inode *inode, loff_t start_byte, ++ loff_t length); ++extern int jbd2_journal_inode_ranged_wait(handle_t *handle, ++ struct jbd2_inode *inode, loff_t start_byte, ++ loff_t length); + extern int jbd2_journal_begin_ordered_truncate(journal_t *journal, + struct jbd2_inode *inode, loff_t new_size); + extern void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode); +diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h +index 3b83288749c6..c5dabaff1732 100644 +--- a/include/linux/mlx5/mlx5_ifc.h ++++ b/include/linux/mlx5/mlx5_ifc.h +@@ -716,7 +716,8 @@ struct mlx5_ifc_per_protocol_networking_offload_caps_bits { + u8 swp[0x1]; + u8 swp_csum[0x1]; + u8 swp_lso[0x1]; +- u8 reserved_at_23[0xd]; ++ u8 cqe_checksum_full[0x1]; ++ u8 reserved_at_24[0xc]; + u8 max_vxlan_udp_ports[0x8]; + u8 reserved_at_38[0x6]; + u8 max_geneve_opt_len[0x1]; +diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h +index 1f678f023850..0d35c4df8108 100644 +--- a/include/linux/perf_event.h ++++ b/include/linux/perf_event.h +@@ -1044,6 +1044,11 @@ static inline int in_software_context(struct perf_event *event) + return event->ctx->pmu->task_ctx_nr == perf_sw_context; + } + ++static inline int is_exclusive_pmu(struct pmu *pmu) ++{ ++ return pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE; ++} ++ + extern struct static_key perf_swevent_enabled[PERF_COUNT_SW_MAX]; + + extern void ___perf_sw_event(u32, u64, struct pt_regs *, u64); +diff --git a/include/net/dst.h b/include/net/dst.h +index 6cf0870414c7..ffc8ee0ea5e5 100644 +--- a/include/net/dst.h ++++ b/include/net/dst.h +@@ -313,8 +313,9 @@ static inline bool dst_hold_safe(struct dst_entry *dst) + * @skb: buffer + * + * If dst is not yet refcounted and not destroyed, grab a ref on it. ++ * Returns true if dst is refcounted. + */ +-static inline void skb_dst_force(struct sk_buff *skb) ++static inline bool skb_dst_force(struct sk_buff *skb) + { + if (skb_dst_is_noref(skb)) { + struct dst_entry *dst = skb_dst(skb); +@@ -325,6 +326,8 @@ static inline void skb_dst_force(struct sk_buff *skb) + + skb->_skb_refdst = (unsigned long)dst; + } ++ ++ return skb->_skb_refdst != 0UL; + } + + +diff --git a/include/net/tcp.h b/include/net/tcp.h +index 36fcd0ad0515..51f07f57ffa4 100644 +--- a/include/net/tcp.h ++++ b/include/net/tcp.h +@@ -1067,7 +1067,8 @@ void tcp_get_default_congestion_control(struct net *net, char *name); + void tcp_get_available_congestion_control(char *buf, size_t len); + void tcp_get_allowed_congestion_control(char *buf, size_t len); + int tcp_set_allowed_congestion_control(char *allowed); +-int tcp_set_congestion_control(struct sock *sk, const char *name, bool load, bool reinit); ++int tcp_set_congestion_control(struct sock *sk, const char *name, bool load, ++ bool reinit, bool cap_net_admin); + u32 tcp_slow_start(struct tcp_sock *tp, u32 acked); + void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked); + +@@ -1679,6 +1680,11 @@ static inline struct sk_buff *tcp_rtx_queue_head(const struct sock *sk) + return skb_rb_first(&sk->tcp_rtx_queue); + } + ++static inline struct sk_buff *tcp_rtx_queue_tail(const struct sock *sk) ++{ ++ return skb_rb_last(&sk->tcp_rtx_queue); ++} ++ + static inline struct sk_buff *tcp_write_queue_head(const struct sock *sk) + { + return skb_peek(&sk->sk_write_queue); +diff --git a/include/net/tls.h b/include/net/tls.h +index a67ad7d56ff2..22de0f06d455 100644 +--- a/include/net/tls.h ++++ b/include/net/tls.h +@@ -285,6 +285,7 @@ struct tls_offload_context_rx { + (ALIGN(sizeof(struct tls_offload_context_rx), sizeof(void *)) + \ + TLS_DRIVER_STATE_SIZE) + ++void tls_ctx_free(struct tls_context *ctx); + int wait_on_pending_writer(struct sock *sk, long *timeo); + int tls_sk_query(struct sock *sk, int optname, char __user *optval, + int __user *optlen); +diff --git a/kernel/events/core.c b/kernel/events/core.c +index f33bd0a89391..28fa3e7fbc02 100644 +--- a/kernel/events/core.c ++++ b/kernel/events/core.c +@@ -2543,6 +2543,9 @@ unlock: + return ret; + } + ++static bool exclusive_event_installable(struct perf_event *event, ++ struct perf_event_context *ctx); ++ + /* + * Attach a performance event to a context. + * +@@ -2557,6 +2560,8 @@ perf_install_in_context(struct perf_event_context *ctx, + + lockdep_assert_held(&ctx->mutex); + ++ WARN_ON_ONCE(!exclusive_event_installable(event, ctx)); ++ + if (event->cpu != -1) + event->cpu = cpu; + +@@ -4348,7 +4353,7 @@ static int exclusive_event_init(struct perf_event *event) + { + struct pmu *pmu = event->pmu; + +- if (!(pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE)) ++ if (!is_exclusive_pmu(pmu)) + return 0; + + /* +@@ -4379,7 +4384,7 @@ static void exclusive_event_destroy(struct perf_event *event) + { + struct pmu *pmu = event->pmu; + +- if (!(pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE)) ++ if (!is_exclusive_pmu(pmu)) + return; + + /* see comment in exclusive_event_init() */ +@@ -4399,14 +4404,15 @@ static bool exclusive_event_match(struct perf_event *e1, struct perf_event *e2) + return false; + } + +-/* Called under the same ctx::mutex as perf_install_in_context() */ + static bool exclusive_event_installable(struct perf_event *event, + struct perf_event_context *ctx) + { + struct perf_event *iter_event; + struct pmu *pmu = event->pmu; + +- if (!(pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE)) ++ lockdep_assert_held(&ctx->mutex); ++ ++ if (!is_exclusive_pmu(pmu)) + return true; + + list_for_each_entry(iter_event, &ctx->event_list, event_entry) { +@@ -4453,12 +4459,20 @@ static void _free_event(struct perf_event *event) + if (event->destroy) + event->destroy(event); + +- if (event->ctx) +- put_ctx(event->ctx); +- ++ /* ++ * Must be after ->destroy(), due to uprobe_perf_close() using ++ * hw.target. ++ */ + if (event->hw.target) + put_task_struct(event->hw.target); + ++ /* ++ * perf_event_free_task() relies on put_ctx() being 'last', in particular ++ * all task references must be cleaned up. ++ */ ++ if (event->ctx) ++ put_ctx(event->ctx); ++ + exclusive_event_destroy(event); + module_put(event->pmu->module); + +@@ -4638,8 +4652,17 @@ again: + mutex_unlock(&event->child_mutex); + + list_for_each_entry_safe(child, tmp, &free_list, child_list) { ++ void *var = &child->ctx->refcount; ++ + list_del(&child->child_list); + free_event(child); ++ ++ /* ++ * Wake any perf_event_free_task() waiting for this event to be ++ * freed. ++ */ ++ smp_mb(); /* pairs with wait_var_event() */ ++ wake_up_var(var); + } + + no_ctx: +@@ -10899,11 +10922,6 @@ SYSCALL_DEFINE5(perf_event_open, + goto err_alloc; + } + +- if ((pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE) && group_leader) { +- err = -EBUSY; +- goto err_context; +- } +- + /* + * Look up the group leader (we will attach this event to it): + */ +@@ -10991,6 +11009,18 @@ SYSCALL_DEFINE5(perf_event_open, + move_group = 0; + } + } ++ ++ /* ++ * Failure to create exclusive events returns -EBUSY. ++ */ ++ err = -EBUSY; ++ if (!exclusive_event_installable(group_leader, ctx)) ++ goto err_locked; ++ ++ for_each_sibling_event(sibling, group_leader) { ++ if (!exclusive_event_installable(sibling, ctx)) ++ goto err_locked; ++ } + } else { + mutex_lock(&ctx->mutex); + } +@@ -11027,9 +11057,6 @@ SYSCALL_DEFINE5(perf_event_open, + * because we need to serialize with concurrent event creation. + */ + if (!exclusive_event_installable(event, ctx)) { +- /* exclusive and group stuff are assumed mutually exclusive */ +- WARN_ON_ONCE(move_group); +- + err = -EBUSY; + goto err_locked; + } +@@ -11496,11 +11523,11 @@ static void perf_free_event(struct perf_event *event, + } + + /* +- * Free an unexposed, unused context as created by inheritance by +- * perf_event_init_task below, used by fork() in case of fail. ++ * Free a context as created by inheritance by perf_event_init_task() below, ++ * used by fork() in case of fail. + * +- * Not all locks are strictly required, but take them anyway to be nice and +- * help out with the lockdep assertions. ++ * Even though the task has never lived, the context and events have been ++ * exposed through the child_list, so we must take care tearing it all down. + */ + void perf_event_free_task(struct task_struct *task) + { +@@ -11530,7 +11557,23 @@ void perf_event_free_task(struct task_struct *task) + perf_free_event(event, ctx); + + mutex_unlock(&ctx->mutex); +- put_ctx(ctx); ++ ++ /* ++ * perf_event_release_kernel() could've stolen some of our ++ * child events and still have them on its free_list. In that ++ * case we must wait for these events to have been freed (in ++ * particular all their references to this task must've been ++ * dropped). ++ * ++ * Without this copy_process() will unconditionally free this ++ * task (irrespective of its reference count) and ++ * _free_event()'s put_task_struct(event->hw.target) will be a ++ * use-after-free. ++ * ++ * Wait for all events to drop their context reference. ++ */ ++ wait_var_event(&ctx->refcount, refcount_read(&ctx->refcount) == 1); ++ put_ctx(ctx); /* must be last */ + } + } + +diff --git a/mm/filemap.c b/mm/filemap.c +index d78f577baef2..60177605c633 100644 +--- a/mm/filemap.c ++++ b/mm/filemap.c +@@ -547,6 +547,28 @@ int filemap_fdatawait_range(struct address_space *mapping, loff_t start_byte, + } + EXPORT_SYMBOL(filemap_fdatawait_range); + ++/** ++ * filemap_fdatawait_range_keep_errors - wait for writeback to complete ++ * @mapping: address space structure to wait for ++ * @start_byte: offset in bytes where the range starts ++ * @end_byte: offset in bytes where the range ends (inclusive) ++ * ++ * Walk the list of under-writeback pages of the given address space in the ++ * given range and wait for all of them. Unlike filemap_fdatawait_range(), ++ * this function does not clear error status of the address space. ++ * ++ * Use this function if callers don't handle errors themselves. Expected ++ * call sites are system-wide / filesystem-wide data flushers: e.g. sync(2), ++ * fsfreeze(8) ++ */ ++int filemap_fdatawait_range_keep_errors(struct address_space *mapping, ++ loff_t start_byte, loff_t end_byte) ++{ ++ __filemap_fdatawait_range(mapping, start_byte, end_byte); ++ return filemap_check_and_keep_errors(mapping); ++} ++EXPORT_SYMBOL(filemap_fdatawait_range_keep_errors); ++ + /** + * file_fdatawait_range - wait for writeback to complete + * @file: file pointing to address space structure to wait for +diff --git a/mm/vmscan.c b/mm/vmscan.c +index dbcf2cd5e7e9..223566fb11ca 100644 +--- a/mm/vmscan.c ++++ b/mm/vmscan.c +@@ -2176,7 +2176,7 @@ static void shrink_active_list(unsigned long nr_to_scan, + * 10TB 320 32GB + */ + static bool inactive_list_is_low(struct lruvec *lruvec, bool file, +- struct scan_control *sc, bool actual_reclaim) ++ struct scan_control *sc, bool trace) + { + enum lru_list active_lru = file * LRU_FILE + LRU_ACTIVE; + struct pglist_data *pgdat = lruvec_pgdat(lruvec); +@@ -2202,7 +2202,7 @@ static bool inactive_list_is_low(struct lruvec *lruvec, bool file, + * rid of the stale workingset quickly. + */ + refaults = lruvec_page_state(lruvec, WORKINGSET_ACTIVATE); +- if (file && actual_reclaim && lruvec->refaults != refaults) { ++ if (file && lruvec->refaults != refaults) { + inactive_ratio = 0; + } else { + gb = (inactive + active) >> (30 - PAGE_SHIFT); +@@ -2212,7 +2212,7 @@ static bool inactive_list_is_low(struct lruvec *lruvec, bool file, + inactive_ratio = 1; + } + +- if (actual_reclaim) ++ if (trace) + trace_mm_vmscan_inactive_list_is_low(pgdat->node_id, sc->reclaim_idx, + lruvec_lru_size(lruvec, inactive_lru, MAX_NR_ZONES), inactive, + lruvec_lru_size(lruvec, active_lru, MAX_NR_ZONES), active, +diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c +index ba303ee99b9b..6a9f48322bb9 100644 +--- a/net/bridge/br_input.c ++++ b/net/bridge/br_input.c +@@ -79,7 +79,6 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb + struct net_bridge_fdb_entry *dst = NULL; + struct net_bridge_mdb_entry *mdst; + bool local_rcv, mcast_hit = false; +- const unsigned char *dest; + struct net_bridge *br; + u16 vid = 0; + +@@ -97,10 +96,9 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb + br_fdb_update(br, p, eth_hdr(skb)->h_source, vid, false); + + local_rcv = !!(br->dev->flags & IFF_PROMISC); +- dest = eth_hdr(skb)->h_dest; +- if (is_multicast_ether_addr(dest)) { ++ if (is_multicast_ether_addr(eth_hdr(skb)->h_dest)) { + /* by definition the broadcast is also a multicast address */ +- if (is_broadcast_ether_addr(dest)) { ++ if (is_broadcast_ether_addr(eth_hdr(skb)->h_dest)) { + pkt_type = BR_PKT_BROADCAST; + local_rcv = true; + } else { +@@ -150,7 +148,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb + } + break; + case BR_PKT_UNICAST: +- dst = br_fdb_find_rcu(br, dest, vid); ++ dst = br_fdb_find_rcu(br, eth_hdr(skb)->h_dest, vid); + default: + break; + } +diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c +index 45e7f4173bba..0ef4092202d0 100644 +--- a/net/bridge/br_multicast.c ++++ b/net/bridge/br_multicast.c +@@ -934,6 +934,7 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br, + int type; + int err = 0; + __be32 group; ++ u16 nsrcs; + + ih = igmpv3_report_hdr(skb); + num = ntohs(ih->ngrec); +@@ -947,8 +948,9 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br, + grec = (void *)(skb->data + len - sizeof(*grec)); + group = grec->grec_mca; + type = grec->grec_type; ++ nsrcs = ntohs(grec->grec_nsrcs); + +- len += ntohs(grec->grec_nsrcs) * 4; ++ len += nsrcs * 4; + if (!ip_mc_may_pull(skb, len)) + return -EINVAL; + +@@ -969,7 +971,7 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br, + src = eth_hdr(skb)->h_source; + if ((type == IGMPV3_CHANGE_TO_INCLUDE || + type == IGMPV3_MODE_IS_INCLUDE) && +- ntohs(grec->grec_nsrcs) == 0) { ++ nsrcs == 0) { + br_ip4_multicast_leave_group(br, port, group, vid, src); + } else { + err = br_ip4_multicast_add_group(br, port, group, vid, +@@ -1006,7 +1008,8 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br, + len = skb_transport_offset(skb) + sizeof(*icmp6h); + + for (i = 0; i < num; i++) { +- __be16 *nsrcs, _nsrcs; ++ __be16 *_nsrcs, __nsrcs; ++ u16 nsrcs; + + nsrcs_offset = len + offsetof(struct mld2_grec, grec_nsrcs); + +@@ -1014,12 +1017,13 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br, + nsrcs_offset + sizeof(_nsrcs)) + return -EINVAL; + +- nsrcs = skb_header_pointer(skb, nsrcs_offset, +- sizeof(_nsrcs), &_nsrcs); +- if (!nsrcs) ++ _nsrcs = skb_header_pointer(skb, nsrcs_offset, ++ sizeof(__nsrcs), &__nsrcs); ++ if (!_nsrcs) + return -EINVAL; + +- grec_len = struct_size(grec, grec_src, ntohs(*nsrcs)); ++ nsrcs = ntohs(*_nsrcs); ++ grec_len = struct_size(grec, grec_src, nsrcs); + + if (!ipv6_mc_may_pull(skb, len + grec_len)) + return -EINVAL; +@@ -1044,7 +1048,7 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br, + src = eth_hdr(skb)->h_source; + if ((grec->grec_type == MLD2_CHANGE_TO_INCLUDE || + grec->grec_type == MLD2_MODE_IS_INCLUDE) && +- ntohs(*nsrcs) == 0) { ++ nsrcs == 0) { + br_ip6_multicast_leave_group(br, port, &grec->grec_mca, + vid, src); + } else { +@@ -1298,7 +1302,6 @@ static int br_ip6_multicast_query(struct net_bridge *br, + u16 vid) + { + unsigned int transport_len = ipv6_transport_len(skb); +- const struct ipv6hdr *ip6h = ipv6_hdr(skb); + struct mld_msg *mld; + struct net_bridge_mdb_entry *mp; + struct mld2_query *mld2q; +@@ -1342,7 +1345,7 @@ static int br_ip6_multicast_query(struct net_bridge *br, + + if (is_general_query) { + saddr.proto = htons(ETH_P_IPV6); +- saddr.u.ip6 = ip6h->saddr; ++ saddr.u.ip6 = ipv6_hdr(skb)->saddr; + + br_multicast_query_received(br, port, &br->ip6_other_query, + &saddr, max_delay); +diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c +index 1b75d6bf12bd..37ddcea3fc96 100644 +--- a/net/bridge/br_stp_bpdu.c ++++ b/net/bridge/br_stp_bpdu.c +@@ -147,7 +147,6 @@ void br_send_tcn_bpdu(struct net_bridge_port *p) + void br_stp_rcv(const struct stp_proto *proto, struct sk_buff *skb, + struct net_device *dev) + { +- const unsigned char *dest = eth_hdr(skb)->h_dest; + struct net_bridge_port *p; + struct net_bridge *br; + const unsigned char *buf; +@@ -176,7 +175,7 @@ void br_stp_rcv(const struct stp_proto *proto, struct sk_buff *skb, + if (p->state == BR_STATE_DISABLED) + goto out; + +- if (!ether_addr_equal(dest, br->group_addr)) ++ if (!ether_addr_equal(eth_hdr(skb)->h_dest, br->group_addr)) + goto out; + + if (p->flags & BR_BPDU_GUARD) { +diff --git a/net/core/filter.c b/net/core/filter.c +index b76f14197128..b8893566339f 100644 +--- a/net/core/filter.c ++++ b/net/core/filter.c +@@ -4211,7 +4211,7 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock, + TCP_CA_NAME_MAX-1)); + name[TCP_CA_NAME_MAX-1] = 0; + ret = tcp_set_congestion_control(sk, name, false, +- reinit); ++ reinit, true); + } else { + struct tcp_sock *tp = tcp_sk(sk); + +diff --git a/net/core/neighbour.c b/net/core/neighbour.c +index cce4fbcd7dcb..2f693f1168e1 100644 +--- a/net/core/neighbour.c ++++ b/net/core/neighbour.c +@@ -1126,6 +1126,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) + + atomic_set(&neigh->probes, + NEIGH_VAR(neigh->parms, UCAST_PROBES)); ++ neigh_del_timer(neigh); + neigh->nud_state = NUD_INCOMPLETE; + neigh->updated = now; + next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME), +@@ -1142,6 +1143,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) + } + } else if (neigh->nud_state & NUD_STALE) { + neigh_dbg(2, "neigh %p is delayed\n", neigh); ++ neigh_del_timer(neigh); + neigh->nud_state = NUD_DELAY; + neigh->updated = jiffies; + neigh_add_timer(neigh, jiffies + +diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c +index eb514f312e6f..83944b7480c8 100644 +--- a/net/ipv4/devinet.c ++++ b/net/ipv4/devinet.c +@@ -66,6 +66,11 @@ + #include <net/net_namespace.h> + #include <net/addrconf.h> + ++#define IPV6ONLY_FLAGS \ ++ (IFA_F_NODAD | IFA_F_OPTIMISTIC | IFA_F_DADFAILED | \ ++ IFA_F_HOMEADDRESS | IFA_F_TENTATIVE | \ ++ IFA_F_MANAGETEMPADDR | IFA_F_STABLE_PRIVACY) ++ + static struct ipv4_devconf ipv4_devconf = { + .data = { + [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1, +@@ -472,6 +477,9 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh, + ifa->ifa_flags &= ~IFA_F_SECONDARY; + last_primary = &in_dev->ifa_list; + ++ /* Don't set IPv6 only flags to IPv4 addresses */ ++ ifa->ifa_flags &= ~IPV6ONLY_FLAGS; ++ + for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL; + ifap = &ifa1->ifa_next) { + if (!(ifa1->ifa_flags & IFA_F_SECONDARY) && +diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c +index eb03153dfe12..792d16f7b62d 100644 +--- a/net/ipv4/igmp.c ++++ b/net/ipv4/igmp.c +@@ -1232,12 +1232,8 @@ static void igmpv3_del_delrec(struct in_device *in_dev, struct ip_mc_list *im) + if (pmc) { + im->interface = pmc->interface; + if (im->sfmode == MCAST_INCLUDE) { +- im->tomb = pmc->tomb; +- pmc->tomb = NULL; +- +- im->sources = pmc->sources; +- pmc->sources = NULL; +- ++ swap(im->tomb, pmc->tomb); ++ swap(im->sources, pmc->sources); + for (psf = im->sources; psf; psf = psf->sf_next) + psf->sf_crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; + } else { +diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c +index 365c8490b34b..caac580e1f1d 100644 +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -2630,6 +2630,8 @@ int tcp_disconnect(struct sock *sk, int flags) + tcp_saved_syn_free(tp); + tp->compressed_ack = 0; + tp->bytes_sent = 0; ++ tp->bytes_acked = 0; ++ tp->bytes_received = 0; + tp->bytes_retrans = 0; + tp->duplicate_sack[0].start_seq = 0; + tp->duplicate_sack[0].end_seq = 0; +@@ -2784,7 +2786,9 @@ static int do_tcp_setsockopt(struct sock *sk, int level, + name[val] = 0; + + lock_sock(sk); +- err = tcp_set_congestion_control(sk, name, true, true); ++ err = tcp_set_congestion_control(sk, name, true, true, ++ ns_capable(sock_net(sk)->user_ns, ++ CAP_NET_ADMIN)); + release_sock(sk); + return err; + } +diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c +index bc6c02f16243..48f79db446a0 100644 +--- a/net/ipv4/tcp_cong.c ++++ b/net/ipv4/tcp_cong.c +@@ -332,7 +332,8 @@ out: + * tcp_reinit_congestion_control (if the current congestion control was + * already initialized. + */ +-int tcp_set_congestion_control(struct sock *sk, const char *name, bool load, bool reinit) ++int tcp_set_congestion_control(struct sock *sk, const char *name, bool load, ++ bool reinit, bool cap_net_admin) + { + struct inet_connection_sock *icsk = inet_csk(sk); + const struct tcp_congestion_ops *ca; +@@ -368,8 +369,7 @@ int tcp_set_congestion_control(struct sock *sk, const char *name, bool load, boo + } else { + err = -EBUSY; + } +- } else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) || +- ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))) { ++ } else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) || cap_net_admin)) { + err = -EPERM; + } else if (!try_module_get(ca->owner)) { + err = -EBUSY; +diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c +index b8b4ae555e34..32bd52e06ef1 100644 +--- a/net/ipv4/tcp_output.c ++++ b/net/ipv4/tcp_output.c +@@ -1289,6 +1289,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, + struct tcp_sock *tp = tcp_sk(sk); + struct sk_buff *buff; + int nsize, old_factor; ++ long limit; + int nlen; + u8 flags; + +@@ -1299,8 +1300,16 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, + if (nsize < 0) + nsize = 0; + +- if (unlikely((sk->sk_wmem_queued >> 1) > sk->sk_sndbuf && +- tcp_queue != TCP_FRAG_IN_WRITE_QUEUE)) { ++ /* tcp_sendmsg() can overshoot sk_wmem_queued by one full size skb. ++ * We need some allowance to not penalize applications setting small ++ * SO_SNDBUF values. ++ * Also allow first and last skb in retransmit queue to be split. ++ */ ++ limit = sk->sk_sndbuf + 2 * SKB_TRUESIZE(GSO_MAX_SIZE); ++ if (unlikely((sk->sk_wmem_queued >> 1) > limit && ++ tcp_queue != TCP_FRAG_IN_WRITE_QUEUE && ++ skb != tcp_rtx_queue_head(sk) && ++ skb != tcp_rtx_queue_tail(sk))) { + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPWQUEUETOOBIG); + return -ENOMEM; + } +diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c +index 9915f64b38a0..4b1a898982d0 100644 +--- a/net/ipv6/ip6_fib.c ++++ b/net/ipv6/ip6_fib.c +@@ -1113,8 +1113,24 @@ add: + err = call_fib6_entry_notifiers(info->nl_net, + FIB_EVENT_ENTRY_ADD, + rt, extack); +- if (err) ++ if (err) { ++ struct fib6_info *sibling, *next_sibling; ++ ++ /* If the route has siblings, then it first ++ * needs to be unlinked from them. ++ */ ++ if (!rt->fib6_nsiblings) ++ return err; ++ ++ list_for_each_entry_safe(sibling, next_sibling, ++ &rt->fib6_siblings, ++ fib6_siblings) ++ sibling->fib6_nsiblings--; ++ rt->fib6_nsiblings = 0; ++ list_del_init(&rt->fib6_siblings); ++ rt6_multipath_rebalance(next_sibling); + return err; ++ } + + rcu_assign_pointer(rt->fib6_next, iter); + atomic_inc(&rt->fib6_ref); +diff --git a/net/ipv6/route.c b/net/ipv6/route.c +index ab348489bd8a..9fc2d803c684 100644 +--- a/net/ipv6/route.c ++++ b/net/ipv6/route.c +@@ -2183,7 +2183,7 @@ static struct dst_entry *rt6_check(struct rt6_info *rt, + { + u32 rt_cookie = 0; + +- if ((from && !fib6_get_cookie_safe(from, &rt_cookie)) || ++ if (!from || !fib6_get_cookie_safe(from, &rt_cookie) || + rt_cookie != cookie) + return NULL; + +diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c +index 5b86574e7b89..12a008cf8865 100644 +--- a/net/netfilter/nf_queue.c ++++ b/net/netfilter/nf_queue.c +@@ -190,6 +190,11 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state, + goto err; + } + ++ if (!skb_dst_force(skb) && state->hook != NF_INET_PRE_ROUTING) { ++ status = -ENETDOWN; ++ goto err; ++ } ++ + *entry = (struct nf_queue_entry) { + .skb = skb, + .state = *state, +@@ -198,7 +203,6 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state, + }; + + nf_queue_entry_get_refs(entry); +- skb_dst_force(skb); + + switch (entry->state.pf) { + case AF_INET: +diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c +index 71ffd1a6dc7c..43910e50752c 100644 +--- a/net/netrom/af_netrom.c ++++ b/net/netrom/af_netrom.c +@@ -872,7 +872,7 @@ int nr_rx_frame(struct sk_buff *skb, struct net_device *dev) + unsigned short frametype, flags, window, timeout; + int ret; + +- skb->sk = NULL; /* Initially we don't know who it's for */ ++ skb_orphan(skb); + + /* + * skb->data points to the netrom frame start +@@ -970,7 +970,9 @@ int nr_rx_frame(struct sk_buff *skb, struct net_device *dev) + + window = skb->data[20]; + ++ sock_hold(make); + skb->sk = make; ++ skb->destructor = sock_efree; + make->sk_state = TCP_ESTABLISHED; + + /* Fill in his circuit details */ +diff --git a/net/nfc/nci/data.c b/net/nfc/nci/data.c +index 908f25e3773e..5405d073804c 100644 +--- a/net/nfc/nci/data.c ++++ b/net/nfc/nci/data.c +@@ -119,7 +119,7 @@ static int nci_queue_tx_data_frags(struct nci_dev *ndev, + conn_info = nci_get_conn_info_by_conn_id(ndev, conn_id); + if (!conn_info) { + rc = -EPROTO; +- goto free_exit; ++ goto exit; + } + + __skb_queue_head_init(&frags_q); +diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c +index e47ebbbe71b8..b85b37518fc5 100644 +--- a/net/openvswitch/actions.c ++++ b/net/openvswitch/actions.c +@@ -175,8 +175,7 @@ static void update_ethertype(struct sk_buff *skb, struct ethhdr *hdr, + if (skb->ip_summed == CHECKSUM_COMPLETE) { + __be16 diff[] = { ~(hdr->h_proto), ethertype }; + +- skb->csum = ~csum_partial((char *)diff, sizeof(diff), +- ~skb->csum); ++ skb->csum = csum_partial((char *)diff, sizeof(diff), skb->csum); + } + + hdr->h_proto = ethertype; +@@ -268,8 +267,7 @@ static int set_mpls(struct sk_buff *skb, struct sw_flow_key *flow_key, + if (skb->ip_summed == CHECKSUM_COMPLETE) { + __be32 diff[] = { ~(stack->label_stack_entry), lse }; + +- skb->csum = ~csum_partial((char *)diff, sizeof(diff), +- ~skb->csum); ++ skb->csum = csum_partial((char *)diff, sizeof(diff), skb->csum); + } + + stack->label_stack_entry = lse; +diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c +index ae8c5d7f3bf1..c77476273179 100644 +--- a/net/rxrpc/af_rxrpc.c ++++ b/net/rxrpc/af_rxrpc.c +@@ -521,6 +521,7 @@ static int rxrpc_sendmsg(struct socket *sock, struct msghdr *m, size_t len) + + switch (rx->sk.sk_state) { + case RXRPC_UNBOUND: ++ case RXRPC_CLIENT_UNBOUND: + rx->srx.srx_family = AF_RXRPC; + rx->srx.srx_service = 0; + rx->srx.transport_type = SOCK_DGRAM; +@@ -545,10 +546,9 @@ static int rxrpc_sendmsg(struct socket *sock, struct msghdr *m, size_t len) + } + + rx->local = local; +- rx->sk.sk_state = RXRPC_CLIENT_UNBOUND; ++ rx->sk.sk_state = RXRPC_CLIENT_BOUND; + /* Fall through */ + +- case RXRPC_CLIENT_UNBOUND: + case RXRPC_CLIENT_BOUND: + if (!m->msg_name && + test_bit(RXRPC_SOCK_CONNECTED, &rx->flags)) { +diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c +index 99ae30c177c7..23568679f07d 100644 +--- a/net/sched/cls_api.c ++++ b/net/sched/cls_api.c +@@ -2162,6 +2162,9 @@ replay: + tfilter_notify(net, skb, n, tp, block, q, parent, fh, + RTM_NEWTFILTER, false, rtnl_held); + tfilter_put(tp, fh); ++ /* q pointer is NULL for shared blocks */ ++ if (q) ++ q->flags &= ~TCQ_F_CAN_BYPASS; + } + + errout: +diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c +index cd04d40c30b6..1971f3a29730 100644 +--- a/net/sched/sch_fq_codel.c ++++ b/net/sched/sch_fq_codel.c +@@ -600,8 +600,6 @@ static unsigned long fq_codel_find(struct Qdisc *sch, u32 classid) + static unsigned long fq_codel_bind(struct Qdisc *sch, unsigned long parent, + u32 classid) + { +- /* we cannot bypass queue discipline anymore */ +- sch->flags &= ~TCQ_F_CAN_BYPASS; + return 0; + } + +diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c +index 2f2678197760..650f21463853 100644 +--- a/net/sched/sch_sfq.c ++++ b/net/sched/sch_sfq.c +@@ -828,8 +828,6 @@ static unsigned long sfq_find(struct Qdisc *sch, u32 classid) + static unsigned long sfq_bind(struct Qdisc *sch, unsigned long parent, + u32 classid) + { +- /* we cannot bypass queue discipline anymore */ +- sch->flags &= ~TCQ_F_CAN_BYPASS; + return 0; + } + +diff --git a/net/sctp/socket.c b/net/sctp/socket.c +index 4583fa914e62..e33382b3f82a 100644 +--- a/net/sctp/socket.c ++++ b/net/sctp/socket.c +@@ -4828,35 +4828,17 @@ out_nounlock: + static int sctp_connect(struct sock *sk, struct sockaddr *addr, + int addr_len, int flags) + { +- struct inet_sock *inet = inet_sk(sk); + struct sctp_af *af; +- int err = 0; ++ int err = -EINVAL; + + lock_sock(sk); +- + pr_debug("%s: sk:%p, sockaddr:%p, addr_len:%d\n", __func__, sk, + addr, addr_len); + +- /* We may need to bind the socket. */ +- if (!inet->inet_num) { +- if (sk->sk_prot->get_port(sk, 0)) { +- release_sock(sk); +- return -EAGAIN; +- } +- inet->inet_sport = htons(inet->inet_num); +- } +- + /* Validate addr_len before calling common connect/connectx routine. */ +- af = addr_len < offsetofend(struct sockaddr, sa_family) ? NULL : +- sctp_get_af_specific(addr->sa_family); +- if (!af || addr_len < af->sockaddr_len) { +- err = -EINVAL; +- } else { +- /* Pass correct addr len to common routine (so it knows there +- * is only one address being passed. +- */ ++ af = sctp_get_af_specific(addr->sa_family); ++ if (af && addr_len >= af->sockaddr_len) + err = __sctp_connect(sk, addr, af->sockaddr_len, flags, NULL); +- } + + release_sock(sk); + return err; +diff --git a/net/sctp/stream.c b/net/sctp/stream.c +index b6bb68adac6e..f72dfda4025d 100644 +--- a/net/sctp/stream.c ++++ b/net/sctp/stream.c +@@ -168,13 +168,20 @@ out: + int sctp_stream_init_ext(struct sctp_stream *stream, __u16 sid) + { + struct sctp_stream_out_ext *soute; ++ int ret; + + soute = kzalloc(sizeof(*soute), GFP_KERNEL); + if (!soute) + return -ENOMEM; + SCTP_SO(stream, sid)->ext = soute; + +- return sctp_sched_init_sid(stream, sid, GFP_KERNEL); ++ ret = sctp_sched_init_sid(stream, sid, GFP_KERNEL); ++ if (ret) { ++ kfree(SCTP_SO(stream, sid)->ext); ++ SCTP_SO(stream, sid)->ext = NULL; ++ } ++ ++ return ret; + } + + void sctp_stream_free(struct sctp_stream *stream) +diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c +index 12454f0d5a63..fdcf18c78bb5 100644 +--- a/net/tls/tls_device.c ++++ b/net/tls/tls_device.c +@@ -61,7 +61,7 @@ static void tls_device_free_ctx(struct tls_context *ctx) + if (ctx->rx_conf == TLS_HW) + kfree(tls_offload_ctx_rx(ctx)); + +- kfree(ctx); ++ tls_ctx_free(ctx); + } + + static void tls_device_gc_task(struct work_struct *work) +@@ -746,6 +746,11 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx) + } + + crypto_info = &ctx->crypto_send.info; ++ if (crypto_info->version != TLS_1_2_VERSION) { ++ rc = -EOPNOTSUPP; ++ goto free_offload_ctx; ++ } ++ + switch (crypto_info->cipher_type) { + case TLS_CIPHER_AES_GCM_128: + nonce_size = TLS_CIPHER_AES_GCM_128_IV_SIZE; +@@ -880,6 +885,9 @@ int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx) + struct net_device *netdev; + int rc = 0; + ++ if (ctx->crypto_recv.info.version != TLS_1_2_VERSION) ++ return -EOPNOTSUPP; ++ + /* We support starting offload on multiple sockets + * concurrently, so we only need a read lock here. + * This lock must precede get_netdev_for_sock to prevent races between +diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c +index f4f632824247..0c22af7b113f 100644 +--- a/net/tls/tls_main.c ++++ b/net/tls/tls_main.c +@@ -251,7 +251,7 @@ static void tls_write_space(struct sock *sk) + ctx->sk_write_space(sk); + } + +-static void tls_ctx_free(struct tls_context *ctx) ++void tls_ctx_free(struct tls_context *ctx) + { + if (!ctx) + return; +@@ -638,7 +638,7 @@ static void tls_hw_sk_destruct(struct sock *sk) + + ctx->sk_destruct(sk); + /* Free ctx */ +- kfree(ctx); ++ tls_ctx_free(ctx); + icsk->icsk_ulp_data = NULL; + } + +diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c +index 41e17ed0c94e..fd931294f66f 100644 +--- a/net/tls/tls_sw.c ++++ b/net/tls/tls_sw.c +@@ -1931,7 +1931,8 @@ bool tls_sw_stream_read(const struct sock *sk) + ingress_empty = list_empty(&psock->ingress_msg); + rcu_read_unlock(); + +- return !ingress_empty || ctx->recv_pkt; ++ return !ingress_empty || ctx->recv_pkt || ++ !skb_queue_empty(&ctx->rx_list); + } + + static int tls_read_size(struct strparser *strp, struct sk_buff *skb) +diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c +index 61cfd8f70989..d089eb706d18 100644 +--- a/tools/perf/builtin-script.c ++++ b/tools/perf/builtin-script.c +@@ -3669,7 +3669,8 @@ int cmd_script(int argc, const char **argv) + goto out_delete; + + uname(&uts); +- if (!strcmp(uts.machine, session->header.env.arch) || ++ if (data.is_pipe || /* assume pipe_mode indicates native_arch */ ++ !strcmp(uts.machine, session->header.env.arch) || + (!strcmp(uts.machine, "x86_64") && + !strcmp(session->header.env.arch, "i386"))) + native_arch = true; +diff --git a/tools/testing/selftests/net/txring_overwrite.c b/tools/testing/selftests/net/txring_overwrite.c +index fd8b1c663c39..7d9ea039450a 100644 +--- a/tools/testing/selftests/net/txring_overwrite.c ++++ b/tools/testing/selftests/net/txring_overwrite.c +@@ -113,7 +113,7 @@ static int setup_tx(char **ring) + + *ring = mmap(0, req.tp_block_size * req.tp_block_nr, + PROT_READ | PROT_WRITE, MAP_SHARED, fdt, 0); +- if (!*ring) ++ if (*ring == MAP_FAILED) + error(1, errno, "mmap"); + + return fdt;
