From 94b0818fa63555a65f6ba107080659ea6bcca63e Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 4 Sep 2024 18:22:37 -0700 Subject: [PATCH 01/15] can: bcm: Clear bo->bcm_proc_read after remove_proc_entry(). syzbot reported a warning in bcm_release(). [0] The blamed change fixed another warning that is triggered when connect() is issued again for a socket whose connect()ed device has been unregistered. However, if the socket is just close()d without the 2nd connect(), the remaining bo->bcm_proc_read triggers unnecessary remove_proc_entry() in bcm_release(). Let's clear bo->bcm_proc_read after remove_proc_entry() in bcm_notify(). [0] name '4986' WARNING: CPU: 0 PID: 5234 at fs/proc/generic.c:711 remove_proc_entry+0x2e7/0x5d0 fs/proc/generic.c:711 Modules linked in: CPU: 0 UID: 0 PID: 5234 Comm: syz-executor606 Not tainted 6.11.0-rc5-syzkaller-00178-g5517ae241919 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 08/06/2024 RIP: 0010:remove_proc_entry+0x2e7/0x5d0 fs/proc/generic.c:711 Code: ff eb 05 e8 cb 1e 5e ff 48 8b 5c 24 10 48 c7 c7 e0 f7 aa 8e e8 2a 38 8e 09 90 48 c7 c7 60 3a 1b 8c 48 89 de e8 da 42 20 ff 90 <0f> 0b 90 90 48 8b 44 24 18 48 c7 44 24 40 0e 36 e0 45 49 c7 04 07 RSP: 0018:ffffc9000345fa20 EFLAGS: 00010246 RAX: 2a2d0aee2eb64600 RBX: ffff888032f1f548 RCX: ffff888029431e00 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: ffffc9000345fb08 R08: ffffffff8155b2f2 R09: 1ffff1101710519a R10: dffffc0000000000 R11: ffffed101710519b R12: ffff888011d38640 R13: 0000000000000004 R14: 0000000000000000 R15: dffffc0000000000 FS: 0000000000000000(0000) GS:ffff8880b8800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fcfb52722f0 CR3: 000000000e734000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: bcm_release+0x250/0x880 net/can/bcm.c:1578 __sock_release net/socket.c:659 [inline] sock_close+0xbc/0x240 net/socket.c:1421 __fput+0x24a/0x8a0 fs/file_table.c:422 task_work_run+0x24f/0x310 kernel/task_work.c:228 exit_task_work include/linux/task_work.h:40 [inline] do_exit+0xa2f/0x27f0 kernel/exit.c:882 do_group_exit+0x207/0x2c0 kernel/exit.c:1031 __do_sys_exit_group kernel/exit.c:1042 [inline] __se_sys_exit_group kernel/exit.c:1040 [inline] __x64_sys_exit_group+0x3f/0x40 kernel/exit.c:1040 x64_sys_call+0x2634/0x2640 arch/x86/include/generated/asm/syscalls_64.h:232 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7fcfb51ee969 Code: Unable to access opcode bytes at 0x7fcfb51ee93f. RSP: 002b:00007ffce0109ca8 EFLAGS: 00000246 ORIG_RAX: 00000000000000e7 RAX: ffffffffffffffda RBX: 0000000000000001 RCX: 00007fcfb51ee969 RDX: 000000000000003c RSI: 00000000000000e7 RDI: 0000000000000001 RBP: 00007fcfb526f3b0 R08: ffffffffffffffb8 R09: 0000555500000000 R10: 0000555500000000 R11: 0000000000000246 R12: 00007fcfb526f3b0 R13: 0000000000000000 R14: 00007fcfb5271ee0 R15: 00007fcfb51bf160 Fixes: 76fe372ccb81 ("can: bcm: Remove proc entry when dev is unregistered.") Reported-by: syzbot+0532ac7a06fb1a03187e@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=0532ac7a06fb1a03187e Tested-by: syzbot+0532ac7a06fb1a03187e@syzkaller.appspotmail.com Signed-off-by: Kuniyuki Iwashima Reviewed-by: Vincent Mailhol Link: https://patch.msgid.link/20240905012237.79683-1-kuniyu@amazon.com Signed-off-by: Marc Kleine-Budde --- net/can/bcm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/can/bcm.c b/net/can/bcm.c index 46d3ec3aa44b..217049fa496e 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -1471,8 +1471,10 @@ static void bcm_notify(struct bcm_sock *bo, unsigned long msg, /* remove device reference, if this is our bound device */ if (bo->bound && bo->ifindex == dev->ifindex) { #if IS_ENABLED(CONFIG_PROC_FS) - if (sock_net(sk)->can.bcmproc_dir && bo->bcm_proc_read) + if (sock_net(sk)->can.bcmproc_dir && bo->bcm_proc_read) { remove_proc_entry(bo->procname, sock_net(sk)->can.bcmproc_dir); + bo->bcm_proc_read = NULL; + } #endif bo->bound = 0; bo->ifindex = 0; From 75b3189540578f96b4996e4849b6649998f49455 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stefan=20M=C3=A4tje?= Date: Thu, 5 Sep 2024 00:27:40 +0200 Subject: [PATCH 02/15] can: esd_usb: Remove CAN_CTRLMODE_3_SAMPLES for CAN-USB/3-FD MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the CAN_CTRLMODE_3_SAMPLES announcement for CAN-USB/3-FD devices because these devices don't support it. The hardware has a Microchip SAM E70 microcontroller that uses a Bosch MCAN IP core as CAN FD controller. But this MCAN core doesn't support triple sampling. Fixes: 80662d943075 ("can: esd_usb: Add support for esd CAN-USB/3") Cc: stable@vger.kernel.org Signed-off-by: Stefan Mätje Reviewed-by: Vincent Mailhol Link: https://patch.msgid.link/20240904222740.2985864-2-stefan.maetje@esd.eu Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/esd_usb.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/net/can/usb/esd_usb.c b/drivers/net/can/usb/esd_usb.c index 41a0e4261d15..03ad10b01867 100644 --- a/drivers/net/can/usb/esd_usb.c +++ b/drivers/net/can/usb/esd_usb.c @@ -3,7 +3,7 @@ * CAN driver for esd electronics gmbh CAN-USB/2, CAN-USB/3 and CAN-USB/Micro * * Copyright (C) 2010-2012 esd electronic system design gmbh, Matthias Fuchs - * Copyright (C) 2022-2023 esd electronics gmbh, Frank Jungclaus + * Copyright (C) 2022-2024 esd electronics gmbh, Frank Jungclaus */ #include @@ -1116,9 +1116,6 @@ static int esd_usb_3_set_bittiming(struct net_device *netdev) if (priv->can.ctrlmode & CAN_CTRLMODE_LISTENONLY) flags |= ESD_USB_3_BAUDRATE_FLAG_LOM; - if (priv->can.ctrlmode & CAN_CTRLMODE_3_SAMPLES) - flags |= ESD_USB_3_BAUDRATE_FLAG_TRS; - baud_x->nom.brp = cpu_to_le16(nom_bt->brp & (nom_btc->brp_max - 1)); baud_x->nom.sjw = cpu_to_le16(nom_bt->sjw & (nom_btc->sjw_max - 1)); baud_x->nom.tseg1 = cpu_to_le16((nom_bt->prop_seg + nom_bt->phase_seg1) @@ -1219,7 +1216,6 @@ static int esd_usb_probe_one_net(struct usb_interface *intf, int index) switch (le16_to_cpu(dev->udev->descriptor.idProduct)) { case ESD_USB_CANUSB3_PRODUCT_ID: priv->can.clock.freq = ESD_USB_3_CAN_CLOCK; - priv->can.ctrlmode_supported |= CAN_CTRLMODE_3_SAMPLES; priv->can.ctrlmode_supported |= CAN_CTRLMODE_FD; priv->can.bittiming_const = &esd_usb_3_nom_bittiming_const; priv->can.data_bittiming_const = &esd_usb_3_data_bittiming_const; From d0fa06408ccf96b1d3d93d97fad6618e942efd38 Mon Sep 17 00:00:00 2001 From: Martin Jocic Date: Mon, 9 Sep 2024 13:35:12 +0200 Subject: [PATCH 03/15] can: kvaser_pciefd: Enable 64-bit DMA addressing Enabling 64-bit addressing for DMA buffers will prevent issues on some memory constrained platforms like e.g. Raspberry Pi 5, where the driver won't load because it cannot allocate enough continuous memory in the default 32-bit memory address range. Signed-off-by: Martin Jocic Link: https://patch.msgid.link/d7340f78e3db305bfeeb8229d2dd1c9077e10b92.1725875278.git.martin.jocic@kvaser.com Signed-off-by: Marc Kleine-Budde --- drivers/net/can/kvaser_pciefd.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/can/kvaser_pciefd.c b/drivers/net/can/kvaser_pciefd.c index 9ffc3ffb4e8f..f86c9671a03a 100644 --- a/drivers/net/can/kvaser_pciefd.c +++ b/drivers/net/can/kvaser_pciefd.c @@ -1104,6 +1104,9 @@ static int kvaser_pciefd_setup_dma(struct kvaser_pciefd *pcie) /* Disable the DMA */ iowrite32(0, KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_CTRL_REG); + + dma_set_mask_and_coherent(&pcie->pci->dev, DMA_BIT_MASK(64)); + for (i = 0; i < KVASER_PCIEFD_DMA_COUNT; i++) { pcie->dma_data[i] = dmam_alloc_coherent(&pcie->pci->dev, KVASER_PCIEFD_DMA_SIZE, From 801ad2f87b0c6d0c34a75a4efd6bfd3a2d9f9298 Mon Sep 17 00:00:00 2001 From: Jake Hamby Date: Fri, 6 Sep 2024 23:19:51 +0000 Subject: [PATCH 04/15] can: m_can: enable NAPI before enabling interrupts If an interrupt (RX-complete or error flag) is set when bringing up the CAN device, e.g. due to CAN bus traffic before initializing the device, when m_can_start() is called and interrupts are enabled, m_can_isr() is called immediately, which disables all CAN interrupts and calls napi_schedule(). Because napi_enable() isn't called until later in m_can_open(), the call to napi_schedule() never schedules the m_can_poll() callback and the device is left with interrupts disabled and can't receive any CAN packets until rebooted. This can be verified by running "cansend" from another device before setting the bitrate and calling "ip link set up can0" on the test device. Adding debug lines to m_can_isr() shows it's called with flags (IR_EP | IR_EW | IR_CRCE), which calls m_can_disable_all_interrupts() and napi_schedule(), and then m_can_poll() is never called. Move the call to napi_enable() above the call to m_can_start() to enable any initial interrupt flags to be handled by m_can_poll() so that interrupts are reenabled. Add a call to napi_disable() in the error handling section of m_can_open(), to handle the case where later functions return errors. Also, in m_can_close(), move the call to napi_disable() below the call to m_can_stop() to ensure all interrupts are handled when bringing down the device. This race condition is much less likely to occur. Tested on a Microchip SAMA7G54 MPU. The fix should be applicable to any SoC with a Bosch M_CAN controller. Signed-off-by: Jake Hamby Fixes: e0d1f4816f2a ("can: m_can: add Bosch M_CAN controller support") Link: https://patch.msgid.link/20240910-can-m_can-fix-ifup-v3-1-6c1720ba45ce@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 012c3d22b01d..c1a07013433e 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -1763,9 +1763,6 @@ static int m_can_close(struct net_device *dev) netif_stop_queue(dev); - if (!cdev->is_peripheral) - napi_disable(&cdev->napi); - m_can_stop(dev); m_can_clk_stop(cdev); free_irq(dev->irq, dev); @@ -1776,6 +1773,8 @@ static int m_can_close(struct net_device *dev) destroy_workqueue(cdev->tx_wq); cdev->tx_wq = NULL; can_rx_offload_disable(&cdev->offload); + } else { + napi_disable(&cdev->napi); } close_candev(dev); @@ -2030,6 +2029,8 @@ static int m_can_open(struct net_device *dev) if (cdev->is_peripheral) can_rx_offload_enable(&cdev->offload); + else + napi_enable(&cdev->napi); /* register interrupt handler */ if (cdev->is_peripheral) { @@ -2063,9 +2064,6 @@ static int m_can_open(struct net_device *dev) if (err) goto exit_start_fail; - if (!cdev->is_peripheral) - napi_enable(&cdev->napi); - netif_start_queue(dev); return 0; @@ -2079,6 +2077,8 @@ static int m_can_open(struct net_device *dev) out_wq_fail: if (cdev->is_peripheral) can_rx_offload_disable(&cdev->offload); + else + napi_disable(&cdev->napi); close_candev(dev); exit_disable_clks: m_can_clk_stop(cdev); From 2c09b50efcad985cf920ca88baa9aa52b1999dcc Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Mon, 9 Sep 2024 15:07:41 +0200 Subject: [PATCH 05/15] can: m_can: m_can_close(): stop clocks after device has been shut down After calling m_can_stop() an interrupt may be pending or NAPI might still be executed. This means the driver might still touch registers of the IP core after the clocks have been disabled. This is not good practice and might lead to aborts depending on the SoC integration. To avoid these potential problems, make m_can_close() symmetric to m_can_open(), i.e. stop the clocks at the end, right before shutting down the transceiver. Fixes: e0d1f4816f2a ("can: m_can: add Bosch M_CAN controller support") Link: https://patch.msgid.link/20240910-can-m_can-fix-ifup-v3-2-6c1720ba45ce@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index c1a07013433e..7fec04b024d5 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -1764,7 +1764,6 @@ static int m_can_close(struct net_device *dev) netif_stop_queue(dev); m_can_stop(dev); - m_can_clk_stop(cdev); free_irq(dev->irq, dev); m_can_clean(dev); @@ -1779,6 +1778,7 @@ static int m_can_close(struct net_device *dev) close_candev(dev); + m_can_clk_stop(cdev); phy_power_off(cdev->transceiver); return 0; From 45fa29c85117170b0508790f878b13ec6593c888 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Wed, 11 Sep 2024 11:20:58 +0200 Subject: [PATCH 06/15] bareudp: Pull inner IP header in bareudp_udp_encap_recv(). Bareudp reads the inner IP header to get the ECN value. Therefore, it needs to ensure that it's part of the skb's linear data. This is similar to the vxlan and geneve fixes for that same problem: * commit f7789419137b ("vxlan: Pull inner IP header in vxlan_rcv().") * commit 1ca1ba465e55 ("geneve: make sure to pull inner header in geneve_rx()") Fixes: 571912c69f0e ("net: UDP tunnel encapsulation module for tunnelling different protocols like MPLS, IP, NSH etc.") Signed-off-by: Guillaume Nault Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/5205940067c40218a70fbb888080466b2fc288db.1726046181.git.gnault@redhat.com Signed-off-by: Jakub Kicinski --- drivers/net/bareudp.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/net/bareudp.c b/drivers/net/bareudp.c index 7aca0544fb29..b4e820a123ca 100644 --- a/drivers/net/bareudp.c +++ b/drivers/net/bareudp.c @@ -68,6 +68,7 @@ static int bareudp_udp_encap_recv(struct sock *sk, struct sk_buff *skb) __be16 proto; void *oiph; int err; + int nh; bareudp = rcu_dereference_sk_user_data(sk); if (!bareudp) @@ -148,10 +149,25 @@ static int bareudp_udp_encap_recv(struct sock *sk, struct sk_buff *skb) } skb_dst_set(skb, &tun_dst->dst); skb->dev = bareudp->dev; - oiph = skb_network_header(skb); - skb_reset_network_header(skb); skb_reset_mac_header(skb); + /* Save offset of outer header relative to skb->head, + * because we are going to reset the network header to the inner header + * and might change skb->head. + */ + nh = skb_network_header(skb) - skb->head; + + skb_reset_network_header(skb); + + if (!pskb_inet_may_pull(skb)) { + DEV_STATS_INC(bareudp->dev, rx_length_errors); + DEV_STATS_INC(bareudp->dev, rx_errors); + goto drop; + } + + /* Get the outer header. */ + oiph = skb->head + nh; + if (!ipv6_mod_enabled() || family == AF_INET) err = IP_ECN_decapsulate(oiph, skb); else From c471236b2359e6b27388475dd04fff0a5e2bf922 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Wed, 11 Sep 2024 11:21:05 +0200 Subject: [PATCH 07/15] bareudp: Pull inner IP header on xmit. Both bareudp_xmit_skb() and bareudp6_xmit_skb() read their skb's inner IP header to get its ECN value (with ip_tunnel_ecn_encap()). Therefore we need to ensure that the inner IP header is part of the skb's linear data. Fixes: 571912c69f0e ("net: UDP tunnel encapsulation module for tunnelling different protocols like MPLS, IP, NSH etc.") Signed-off-by: Guillaume Nault Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/267328222f0a11519c6de04c640a4f87a38ea9ed.1726046181.git.gnault@redhat.com Signed-off-by: Jakub Kicinski --- drivers/net/bareudp.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/bareudp.c b/drivers/net/bareudp.c index b4e820a123ca..e80992b4f9de 100644 --- a/drivers/net/bareudp.c +++ b/drivers/net/bareudp.c @@ -317,6 +317,9 @@ static int bareudp_xmit_skb(struct sk_buff *skb, struct net_device *dev, __be32 saddr; int err; + if (!skb_vlan_inet_prepare(skb, skb->protocol != htons(ETH_P_TEB))) + return -EINVAL; + if (!sock) return -ESHUTDOWN; @@ -384,6 +387,9 @@ static int bareudp6_xmit_skb(struct sk_buff *skb, struct net_device *dev, __be16 sport; int err; + if (!skb_vlan_inet_prepare(skb, skb->protocol != htons(ETH_P_TEB))) + return -EINVAL; + if (!sock) return -ESHUTDOWN; From 4144a1059b47e821c82c3c82eb23a4c7312dce3a Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Wed, 11 Sep 2024 21:10:19 +0200 Subject: [PATCH 08/15] xsk: fix batch alloc API on non-coherent systems In cases when synchronizing DMA operations is necessary, xsk_buff_alloc_batch() returns a single buffer instead of the requested count. This puts the pressure on drivers that use batch API as they have to check for this corner case on their side and take care of allocations by themselves, which feels counter productive. Let us improve the core by looping over xp_alloc() @max times when slow path needs to be taken. Another issue with current interface, as spotted and fixed by Dries, was that when driver called xsk_buff_alloc_batch() with @max == 0, for slow path case it still allocated and returned a single buffer, which should not happen. By introducing the logic from first paragraph we kill two birds with one stone and address this problem as well. Fixes: 47e4075df300 ("xsk: Batched buffer allocation for the pool") Reported-and-tested-by: Dries De Winter Co-developed-by: Dries De Winter Signed-off-by: Dries De Winter Signed-off-by: Maciej Fijalkowski Acked-by: Magnus Karlsson Acked-by: Alexei Starovoitov Link: https://patch.msgid.link/20240911191019.296480-1-maciej.fijalkowski@intel.com Signed-off-by: Jakub Kicinski --- net/xdp/xsk_buff_pool.c | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index c0e0204b9630..b0f24ebd05f0 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -623,19 +623,30 @@ static u32 xp_alloc_reused(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u3 return nb_entries; } +static u32 xp_alloc_slow(struct xsk_buff_pool *pool, struct xdp_buff **xdp, + u32 max) +{ + int i; + + for (i = 0; i < max; i++) { + struct xdp_buff *buff; + + buff = xp_alloc(pool); + if (unlikely(!buff)) + return i; + *xdp = buff; + xdp++; + } + + return max; +} + u32 xp_alloc_batch(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 max) { u32 nb_entries1 = 0, nb_entries2; - if (unlikely(pool->dev && dma_dev_need_sync(pool->dev))) { - struct xdp_buff *buff; - - /* Slow path */ - buff = xp_alloc(pool); - if (buff) - *xdp = buff; - return !!buff; - } + if (unlikely(pool->dev && dma_dev_need_sync(pool->dev))) + return xp_alloc_slow(pool, xdp, max); if (unlikely(pool->free_list_cnt)) { nb_entries1 = xp_alloc_reused(pool, xdp, max); From 157f29152b61ca41809dd7ead29f5733adeced19 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Thu, 12 Sep 2024 08:56:19 -0700 Subject: [PATCH 09/15] netkit: Assign missing bpf_net_context MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During the introduction of struct bpf_net_context handling for XDP-redirect, the netkit driver has been missed, which also requires it because NETKIT_REDIRECT invokes skb_do_redirect() which is accessing the per-CPU variables. Otherwise we see the following crash: BUG: kernel NULL pointer dereference, address: 0000000000000038 bpf_redirect() netkit_xmit() dev_hard_start_xmit() Set the bpf_net_context before invoking netkit_xmit() program within the netkit driver. Fixes: 401cb7dae813 ("net: Reference bpf_redirect_info via task_struct on PREEMPT_RT.") Signed-off-by: Breno Leitao Acked-by: Daniel Borkmann Reviewed-by: Sebastian Andrzej Siewior Reviewed-by: Toke Høiland-Jørgensen Acked-by: Nikolay Aleksandrov Acked-by: Martin KaFai Lau Link: https://patch.msgid.link/20240912155620.1334587-1-leitao@debian.org Signed-off-by: Jakub Kicinski --- drivers/net/netkit.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/netkit.c b/drivers/net/netkit.c index 16789cd446e9..3f4187102e77 100644 --- a/drivers/net/netkit.c +++ b/drivers/net/netkit.c @@ -65,6 +65,7 @@ static struct netkit *netkit_priv(const struct net_device *dev) static netdev_tx_t netkit_xmit(struct sk_buff *skb, struct net_device *dev) { + struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx; struct netkit *nk = netkit_priv(dev); enum netkit_action ret = READ_ONCE(nk->policy); netdev_tx_t ret_dev = NET_XMIT_SUCCESS; @@ -72,6 +73,7 @@ static netdev_tx_t netkit_xmit(struct sk_buff *skb, struct net_device *dev) struct net_device *peer; int len = skb->len; + bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx); rcu_read_lock(); peer = rcu_dereference(nk->peer); if (unlikely(!peer || !(peer->flags & IFF_UP) || @@ -110,6 +112,7 @@ static netdev_tx_t netkit_xmit(struct sk_buff *skb, struct net_device *dev) break; } rcu_read_unlock(); + bpf_net_ctx_clear(bpf_net_ctx); return ret_dev; } From b9c7ac4fe22c608acf6153a3329df2b6b6cd416c Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Wed, 11 Sep 2024 15:51:11 +0200 Subject: [PATCH 10/15] r8169: disable ALDPS per default for RTL8125 En-Wei reported that traffic breaks if cable is unplugged for more than 3s and then re-plugged. This was supposed to be fixed by 621735f59064 ("r8169: fix rare issue with broken rx after link-down on RTL8125"). But apparently this didn't fix the issue for everybody. The 3s threshold rang a bell, as this is the delay after which ALDPS kicks in. And indeed disabling ALDPS fixes the issue for this user. Maybe this fixes the issue in general. In a follow-up step we could remove the first fix attempt and see whether anybody complains. Fixes: f1bce4ad2f1c ("r8169: add support for RTL8125") Tested-by: En-Wei WU Signed-off-by: Heiner Kallweit Link: https://patch.msgid.link/778b9d86-05c4-4856-be59-cde4487b9e52@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/realtek/r8169_phy_config.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/realtek/r8169_phy_config.c b/drivers/net/ethernet/realtek/r8169_phy_config.c index 1f74317beb88..e1e5d9672ae4 100644 --- a/drivers/net/ethernet/realtek/r8169_phy_config.c +++ b/drivers/net/ethernet/realtek/r8169_phy_config.c @@ -1060,6 +1060,7 @@ static void rtl8125a_2_hw_phy_config(struct rtl8169_private *tp, phy_modify_paged(phydev, 0xa86, 0x15, 0x0001, 0x0000); rtl8168g_enable_gphy_10m(phydev); + rtl8168g_disable_aldps(phydev); rtl8125a_config_eee_phy(phydev); } @@ -1099,6 +1100,7 @@ static void rtl8125b_hw_phy_config(struct rtl8169_private *tp, phy_modify_paged(phydev, 0xbf8, 0x12, 0xe000, 0xa000); rtl8125_legacy_force_mode(phydev); + rtl8168g_disable_aldps(phydev); rtl8125b_config_eee_phy(phydev); } From 2c84b0aa28b9e73e8c4b4ce038269469434ae372 Mon Sep 17 00:00:00 2001 From: Justin Iurman Date: Wed, 11 Sep 2024 19:45:57 +0200 Subject: [PATCH 11/15] net: ipv6: rpl_iptunnel: Fix memory leak in rpl_input Free the skb before returning from rpl_input when skb_cow_head() fails. Use a "drop" label and goto instructions. Fixes: a7a29f9c361f ("net: ipv6: add rpl sr tunnel") Signed-off-by: Justin Iurman Reviewed-by: Simon Horman Link: https://patch.msgid.link/20240911174557.11536-1-justin.iurman@uliege.be Signed-off-by: Jakub Kicinski --- net/ipv6/rpl_iptunnel.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/net/ipv6/rpl_iptunnel.c b/net/ipv6/rpl_iptunnel.c index 2c83b7586422..db3c19a42e1c 100644 --- a/net/ipv6/rpl_iptunnel.c +++ b/net/ipv6/rpl_iptunnel.c @@ -263,10 +263,8 @@ static int rpl_input(struct sk_buff *skb) rlwt = rpl_lwt_lwtunnel(orig_dst->lwtstate); err = rpl_do_srh(skb, rlwt); - if (unlikely(err)) { - kfree_skb(skb); - return err; - } + if (unlikely(err)) + goto drop; local_bh_disable(); dst = dst_cache_get(&rlwt->cache); @@ -286,9 +284,13 @@ static int rpl_input(struct sk_buff *skb) err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); if (unlikely(err)) - return err; + goto drop; return dst_input(skb); + +drop: + kfree_skb(skb); + return err; } static int nla_put_rpl_srh(struct sk_buff *skb, int attrtype, From 9f3e7f11f21ac83cd99428390165177d4953b005 Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Thu, 12 Sep 2024 10:49:22 -0700 Subject: [PATCH 12/15] fbnic: Set napi irq value after calling netif_napi_add The driver calls netif_napi_set_irq() and then calls netif_napi_add(), which calls netif_napi_add_weight(). At the end of netif_napi_add_weight() is a call to netif_napi_set_irq(napi, -1), which clears the previously set napi->irq value. Fix this by calling netif_napi_set_irq() after calling netif_napi_add(). This was found when reviewing another patch and I have no way to test this, but the fix seemed relatively straight forward. Fixes: bc6107771bb4 ("eth: fbnic: Allocate a netdevice and napi vectors with queues") Signed-off-by: Brett Creeley Reviewed-by: Joe Damato Reviewed-by: Vadim Fedorenko Link: https://patch.msgid.link/20240912174922.10550-1-brett.creeley@amd.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/meta/fbnic/fbnic_txrx.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c index 0ed4c9fff5d8..72f88ae7815f 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c @@ -1012,14 +1012,14 @@ static int fbnic_alloc_napi_vector(struct fbnic_dev *fbd, struct fbnic_net *fbn, nv->fbd = fbd; nv->v_idx = v_idx; - /* Record IRQ to NAPI struct */ - netif_napi_set_irq(&nv->napi, - pci_irq_vector(to_pci_dev(fbd->dev), nv->v_idx)); - /* Tie napi to netdev */ list_add(&nv->napis, &fbn->napis); netif_napi_add(fbn->netdev, &nv->napi, fbnic_poll); + /* Record IRQ to NAPI struct */ + netif_napi_set_irq(&nv->napi, + pci_irq_vector(to_pci_dev(fbd->dev), nv->v_idx)); + /* Tie nv back to PCIe dev */ nv->dev = fbd->dev; From 99655a304e450baaae6b396cb942b9e47659d644 Mon Sep 17 00:00:00 2001 From: Su Hui Date: Thu, 12 Sep 2024 19:01:20 +0800 Subject: [PATCH 13/15] net: tipc: avoid possible garbage value Clang static checker (scan-build) warning: net/tipc/bcast.c:305:4: The expression is an uninitialized value. The computed value will also be garbage [core.uninitialized.Assign] 305 | (*cong_link_cnt)++; | ^~~~~~~~~~~~~~~~~~ tipc_rcast_xmit() will increase cong_link_cnt's value, but cong_link_cnt is uninitialized. Although it won't really cause a problem, it's better to fix it. Fixes: dca4a17d24ee ("tipc: fix potential hanging after b/rcast changing") Signed-off-by: Su Hui Reviewed-by: Justin Stitt Link: https://patch.msgid.link/20240912110119.2025503-1-suhui@nfschina.com Signed-off-by: Jakub Kicinski --- net/tipc/bcast.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 593846d25214..114fef65f92e 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -320,8 +320,8 @@ static int tipc_mcast_send_sync(struct net *net, struct sk_buff *skb, { struct tipc_msg *hdr, *_hdr; struct sk_buff_head tmpq; + u16 cong_link_cnt = 0; struct sk_buff *_skb; - u16 cong_link_cnt; int rc = 0; /* Is a cluster supporting with new capabilities ? */ From 04ccecfa959d3b9ae7348780d8e379c6486176ac Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 13 Sep 2024 08:31:47 +0000 Subject: [PATCH 14/15] ipv6: avoid possible NULL deref in rt6_uncached_list_flush_dev() Blamed commit accidentally removed a check for rt->rt6i_idev being NULL, as spotted by syzbot: Oops: general protection fault, probably for non-canonical address 0xdffffc0000000000: 0000 [#1] PREEMPT SMP KASAN PTI KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007] CPU: 1 UID: 0 PID: 10998 Comm: syz-executor Not tainted 6.11.0-rc6-syzkaller-00208-g625403177711 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 08/06/2024 RIP: 0010:rt6_uncached_list_flush_dev net/ipv6/route.c:177 [inline] RIP: 0010:rt6_disable_ip+0x33e/0x7e0 net/ipv6/route.c:4914 Code: 41 80 3c 04 00 74 0a e8 90 d0 9b f7 48 8b 7c 24 08 48 8b 07 48 89 44 24 10 4c 89 f0 48 c1 e8 03 48 b9 00 00 00 00 00 fc ff df <80> 3c 08 00 74 08 4c 89 f7 e8 64 d0 9b f7 48 8b 44 24 18 49 39 06 RSP: 0018:ffffc900047374e0 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 1ffff1100fdf8f33 RCX: dffffc0000000000 RDX: 0000000000000000 RSI: 0000000000000004 RDI: ffff88807efc78c0 RBP: ffffc900047375d0 R08: 0000000000000003 R09: fffff520008e6e8c R10: dffffc0000000000 R11: fffff520008e6e8c R12: 1ffff1100fdf8f18 R13: ffff88807efc7998 R14: 0000000000000000 R15: ffff88807efc7930 FS: 0000000000000000(0000) GS:ffff8880b8900000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020002a80 CR3: 0000000022f62000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: addrconf_ifdown+0x15d/0x1bd0 net/ipv6/addrconf.c:3856 addrconf_notify+0x3cb/0x1020 notifier_call_chain+0x19f/0x3e0 kernel/notifier.c:93 call_netdevice_notifiers_extack net/core/dev.c:2032 [inline] call_netdevice_notifiers net/core/dev.c:2046 [inline] unregister_netdevice_many_notify+0xd81/0x1c40 net/core/dev.c:11352 unregister_netdevice_many net/core/dev.c:11414 [inline] unregister_netdevice_queue+0x303/0x370 net/core/dev.c:11289 unregister_netdevice include/linux/netdevice.h:3129 [inline] __tun_detach+0x6b9/0x1600 drivers/net/tun.c:685 tun_detach drivers/net/tun.c:701 [inline] tun_chr_close+0x108/0x1b0 drivers/net/tun.c:3510 __fput+0x24a/0x8a0 fs/file_table.c:422 task_work_run+0x24f/0x310 kernel/task_work.c:228 exit_task_work include/linux/task_work.h:40 [inline] do_exit+0xa2f/0x27f0 kernel/exit.c:882 do_group_exit+0x207/0x2c0 kernel/exit.c:1031 __do_sys_exit_group kernel/exit.c:1042 [inline] __se_sys_exit_group kernel/exit.c:1040 [inline] __x64_sys_exit_group+0x3f/0x40 kernel/exit.c:1040 x64_sys_call+0x2634/0x2640 arch/x86/include/generated/asm/syscalls_64.h:232 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7f1acc77def9 Code: Unable to access opcode bytes at 0x7f1acc77decf. RSP: 002b:00007ffeb26fa738 EFLAGS: 00000246 ORIG_RAX: 00000000000000e7 RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f1acc77def9 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000043 RBP: 00007f1acc7dd508 R08: 00007ffeb26f84d7 R09: 0000000000000003 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000001 R13: 0000000000000003 R14: 00000000ffffffff R15: 00007ffeb26fa8e0 Modules linked in: ---[ end trace 0000000000000000 ]--- RIP: 0010:rt6_uncached_list_flush_dev net/ipv6/route.c:177 [inline] RIP: 0010:rt6_disable_ip+0x33e/0x7e0 net/ipv6/route.c:4914 Code: 41 80 3c 04 00 74 0a e8 90 d0 9b f7 48 8b 7c 24 08 48 8b 07 48 89 44 24 10 4c 89 f0 48 c1 e8 03 48 b9 00 00 00 00 00 fc ff df <80> 3c 08 00 74 08 4c 89 f7 e8 64 d0 9b f7 48 8b 44 24 18 49 39 06 RSP: 0018:ffffc900047374e0 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 1ffff1100fdf8f33 RCX: dffffc0000000000 RDX: 0000000000000000 RSI: 0000000000000004 RDI: ffff88807efc78c0 RBP: ffffc900047375d0 R08: 0000000000000003 R09: fffff520008e6e8c R10: dffffc0000000000 R11: fffff520008e6e8c R12: 1ffff1100fdf8f18 R13: ffff88807efc7998 R14: 0000000000000000 R15: ffff88807efc7930 FS: 0000000000000000(0000) GS:ffff8880b8900000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020002a80 CR3: 0000000022f62000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Fixes: e332bc67cf5e ("ipv6: Don't call with rt6_uncached_list_flush_dev") Signed-off-by: Eric Dumazet Reviewed-by: Simon Horman Reviewed-by: David Ahern Acked-by: Martin KaFai Lau Link: https://patch.msgid.link/20240913083147.3095442-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv6/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 219701caba1e..b4dcd8f3e7ba 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -174,7 +174,7 @@ static void rt6_uncached_list_flush_dev(struct net_device *dev) struct net_device *rt_dev = rt->dst.dev; bool handled = false; - if (rt_idev->dev == dev) { + if (rt_idev && rt_idev->dev == dev) { rt->rt6i_idev = in6_dev_get(blackhole_netdev); in6_dev_put(rt_idev); handled = true; From 7052622fccb1efb850c6b55de477f65d03525a30 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 14 Sep 2024 12:56:51 +0300 Subject: [PATCH 15/15] netfilter: nft_socket: Fix a NULL vs IS_ERR() bug in nft_socket_cgroup_subtree_level() The cgroup_get_from_path() function never returns NULL, it returns error pointers. Update the error handling to match. Fixes: 7f3287db6543 ("netfilter: nft_socket: make cgroupsv2 matching work with namespaces") Signed-off-by: Dan Carpenter Acked-by: Florian Westphal Acked-by: Pablo Neira Ayuso Link: https://patch.msgid.link/bbc0c4e0-05cc-4f44-8797-2f4b3920a820@stanley.mountain Signed-off-by: Jakub Kicinski --- net/netfilter/nft_socket.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c index 12cdff640492..0a8883a93e83 100644 --- a/net/netfilter/nft_socket.c +++ b/net/netfilter/nft_socket.c @@ -61,8 +61,8 @@ static noinline int nft_socket_cgroup_subtree_level(void) struct cgroup *cgrp = cgroup_get_from_path("/"); int level; - if (!cgrp) - return -ENOENT; + if (IS_ERR(cgrp)) + return PTR_ERR(cgrp); level = cgrp->level;