From 6631c8da02cfad96c53b217cf647b511c7f34faf Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 6 Mar 2023 13:54:50 -0500 Subject: [PATCH 01/55] fs: prevent out-of-bounds array speculation when closing a file descriptor commit 609d54441493c99f21c1823dfd66fa7f4c512ff4 upstream. Google-Bug-Id: 114199369 Signed-off-by: Theodore Ts'o Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/file.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/file.c b/fs/file.c index 51f53a7dc221..e56059fa1b30 100644 --- a/fs/file.c +++ b/fs/file.c @@ -654,6 +654,7 @@ int __close_fd_get_file(unsigned int fd, struct file **res) fdt = files_fdtable(files); if (fd >= fdt->max_fds) goto out_unlock; + fd = array_index_nospec(fd, fdt->max_fds); file = fdt->fd[fd]; if (!file) goto out_unlock; From e40c1e9da1ece7122a9f9967e0840d955e98c471 Mon Sep 17 00:00:00 2001 From: Andrew Cooper Date: Tue, 7 Mar 2023 17:46:43 +0000 Subject: [PATCH 02/55] x86/CPU/AMD: Disable XSAVES on AMD family 0x17 commit b0563468eeac88ebc70559d52a0b66efc37e4e9d upstream. AMD Erratum 1386 is summarised as: XSAVES Instruction May Fail to Save XMM Registers to the Provided State Save Area This piece of accidental chronomancy causes the %xmm registers to occasionally reset back to an older value. Ignore the XSAVES feature on all AMD Zen1/2 hardware. The XSAVEC instruction (which works fine) is equivalent on affected parts. [ bp: Typos, move it into the F17h-specific function. ] Reported-by: Tavis Ormandy Signed-off-by: Andrew Cooper Signed-off-by: Borislav Petkov (AMD) Cc: Link: https://lore.kernel.org/r/20230307174643.1240184-1-andrew.cooper3@citrix.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/amd.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index bc6cd29ddf16..b212771df022 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -205,6 +205,15 @@ static void init_amd_k6(struct cpuinfo_x86 *c) return; } #endif + /* + * Work around Erratum 1386. The XSAVES instruction malfunctions in + * certain circumstances on Zen1/2 uarch, and not all parts have had + * updated microcode at the time of writing (March 2023). + * + * Affected parts all have no supervisor XSAVE states, meaning that + * the XSAVEC instruction (which works fine) is equivalent. + */ + clear_cpu_cap(c, X86_FEATURE_XSAVES); } static void init_amd_k7(struct cpuinfo_x86 *c) From 1277ba3db6dc058dcab7f12e004065cb6d8c93e9 Mon Sep 17 00:00:00 2001 From: Harry Wentland Date: Fri, 13 Jan 2023 11:24:09 -0500 Subject: [PATCH 03/55] drm/connector: print max_requested_bpc in state debugfs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 7d386975f6a495902e679a3a250a7456d7e54765 upstream. This is useful to understand the bpc defaults and support of a driver. Signed-off-by: Harry Wentland Cc: Pekka Paalanen Cc: Sebastian Wick Cc: Vitaly.Prosyak@amd.com Cc: Uma Shankar Cc: Ville Syrjälä Cc: Joshua Ashton Cc: Jani Nikula Cc: dri-devel@lists.freedesktop.org Cc: amd-gfx@lists.freedesktop.org Reviewed-By: Joshua Ashton Link: https://patchwork.freedesktop.org/patch/msgid/20230113162428.33874-3-harry.wentland@amd.com Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_atomic.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index 14aeaf736321..2fc0f221fb4e 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -1006,6 +1006,7 @@ static void drm_atomic_connector_print_state(struct drm_printer *p, drm_printf(p, "connector[%u]: %s\n", connector->base.id, connector->name); drm_printf(p, "\tcrtc=%s\n", state->crtc ? state->crtc->name : "(null)"); drm_printf(p, "\tself_refresh_aware=%d\n", state->self_refresh_aware); + drm_printf(p, "\tmax_requested_bpc=%d\n", state->max_requested_bpc); if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK) if (state->writeback_job && state->writeback_job->fb) From aee90b0278e3ac8e0c446380c0d6acc2a02b14b9 Mon Sep 17 00:00:00 2001 From: Eric Whitney Date: Fri, 10 Feb 2023 12:32:44 -0500 Subject: [PATCH 04/55] ext4: fix RENAME_WHITEOUT handling for inline directories commit c9f62c8b2dbf7240536c0cc9a4529397bb8bf38e upstream. A significant number of xfstests can cause ext4 to log one or more warning messages when they are run on a test file system where the inline_data feature has been enabled. An example: "EXT4-fs warning (device vdc): ext4_dirblock_csum_set:425: inode #16385: comm fsstress: No space for directory leaf checksum. Please run e2fsck -D." The xfstests include: ext4/057, 058, and 307; generic/013, 051, 068, 070, 076, 078, 083, 232, 269, 270, 390, 461, 475, 476, 482, 579, 585, 589, 626, 631, and 650. In this situation, the warning message indicates a bug in the code that performs the RENAME_WHITEOUT operation on a directory entry that has been stored inline. It doesn't detect that the directory is stored inline, and incorrectly attempts to compute a dirent block checksum on the whiteout inode when creating it. This attempt fails as a result of the integrity checking in get_dirent_tail (usually due to a failure to match the EXT4_FT_DIR_CSUM magic cookie), and the warning message is then emitted. Fix this by simply collecting the inlined data state at the time the search for the source directory entry is performed. Existing code handles the rest, and this is sufficient to eliminate all spurious warning messages produced by the tests above. Go one step further and do the same in the code that resets the source directory entry in the event of failure. The inlined state should be present in the "old" struct, but given the possibility of a race there's no harm in taking a conservative approach and getting that information again since the directory entry is being reread anyway. Fixes: b7ff91fd030d ("ext4: find old entry again if failed to rename whiteout") Cc: stable@kernel.org Signed-off-by: Eric Whitney Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20230210173244.679890-1-enwlinux@gmail.com Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/namei.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index ee9e931e2e92..30c37ef8b8af 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1502,11 +1502,10 @@ static struct buffer_head *__ext4_find_entry(struct inode *dir, int has_inline_data = 1; ret = ext4_find_inline_entry(dir, fname, res_dir, &has_inline_data); - if (has_inline_data) { - if (inlined) - *inlined = 1; + if (inlined) + *inlined = has_inline_data; + if (has_inline_data) goto cleanup_and_exit; - } } if ((namelen <= 2) && (name[0] == '.') && @@ -3630,7 +3629,8 @@ static void ext4_resetent(handle_t *handle, struct ext4_renament *ent, * so the old->de may no longer valid and need to find it again * before reset old inode info. */ - old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, NULL); + old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, + &old.inlined); if (IS_ERR(old.bh)) retval = PTR_ERR(old.bh); if (!old.bh) @@ -3795,7 +3795,8 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, return retval; } - old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, NULL); + old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, + &old.inlined); if (IS_ERR(old.bh)) return PTR_ERR(old.bh); /* From c24f838493792b5e78a3596b4ca96375aa0af4c2 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 16 Feb 2023 10:55:48 -0800 Subject: [PATCH 05/55] ext4: fix another off-by-one fsmap error on 1k block filesystems commit c993799baf9c5861f8df91beb80e1611b12efcbd upstream. Apparently syzbot figured out that issuing this FSMAP call: struct fsmap_head cmd = { .fmh_count = ...; .fmh_keys = { { .fmr_device = /* ext4 dev */, .fmr_physical = 0, }, { .fmr_device = /* ext4 dev */, .fmr_physical = 0, }, }, ... }; ret = ioctl(fd, FS_IOC_GETFSMAP, &cmd); Produces this crash if the underlying filesystem is a 1k-block ext4 filesystem: kernel BUG at fs/ext4/ext4.h:3331! invalid opcode: 0000 [#1] PREEMPT SMP CPU: 3 PID: 3227965 Comm: xfs_io Tainted: G W O 6.2.0-rc8-achx Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.15.0-1 04/01/2014 RIP: 0010:ext4_mb_load_buddy_gfp+0x47c/0x570 [ext4] RSP: 0018:ffffc90007c03998 EFLAGS: 00010246 RAX: ffff888004978000 RBX: ffffc90007c03a20 RCX: ffff888041618000 RDX: 0000000000000000 RSI: 00000000000005a4 RDI: ffffffffa0c99b11 RBP: ffff888012330000 R08: ffffffffa0c2b7d0 R09: 0000000000000400 R10: ffffc90007c03950 R11: 0000000000000000 R12: 0000000000000001 R13: 00000000ffffffff R14: 0000000000000c40 R15: ffff88802678c398 FS: 00007fdf2020c880(0000) GS:ffff88807e100000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007ffd318a5fe8 CR3: 000000007f80f001 CR4: 00000000001706e0 Call Trace: ext4_mballoc_query_range+0x4b/0x210 [ext4 dfa189daddffe8fecd3cdfd00564e0f265a8ab80] ext4_getfsmap_datadev+0x713/0x890 [ext4 dfa189daddffe8fecd3cdfd00564e0f265a8ab80] ext4_getfsmap+0x2b7/0x330 [ext4 dfa189daddffe8fecd3cdfd00564e0f265a8ab80] ext4_ioc_getfsmap+0x153/0x2b0 [ext4 dfa189daddffe8fecd3cdfd00564e0f265a8ab80] __ext4_ioctl+0x2a7/0x17e0 [ext4 dfa189daddffe8fecd3cdfd00564e0f265a8ab80] __x64_sys_ioctl+0x82/0xa0 do_syscall_64+0x2b/0x80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 RIP: 0033:0x7fdf20558aff RSP: 002b:00007ffd318a9e30 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 00000000000200c0 RCX: 00007fdf20558aff RDX: 00007fdf1feb2010 RSI: 00000000c0c0583b RDI: 0000000000000003 RBP: 00005625c0634be0 R08: 00005625c0634c40 R09: 0000000000000001 R10: 0000000000000000 R11: 0000000000000246 R12: 00007fdf1feb2010 R13: 00005625be70d994 R14: 0000000000000800 R15: 0000000000000000 For GETFSMAP calls, the caller selects a physical block device by writing its block number into fsmap_head.fmh_keys[01].fmr_device. To query mappings for a subrange of the device, the starting byte of the range is written to fsmap_head.fmh_keys[0].fmr_physical and the last byte of the range goes in fsmap_head.fmh_keys[1].fmr_physical. IOWs, to query what mappings overlap with bytes 3-14 of /dev/sda, you'd set the inputs as follows: fmh_keys[0] = { .fmr_device = major(8, 0), .fmr_physical = 3}, fmh_keys[1] = { .fmr_device = major(8, 0), .fmr_physical = 14}, Which would return you whatever is mapped in the 12 bytes starting at physical offset 3. The crash is due to insufficient range validation of keys[1] in ext4_getfsmap_datadev. On 1k-block filesystems, block 0 is not part of the filesystem, which means that s_first_data_block is nonzero. ext4_get_group_no_and_offset subtracts this quantity from the blocknr argument before cracking it into a group number and a block number within a group. IOWs, block group 0 spans blocks 1-8192 (1-based) instead of 0-8191 (0-based) like what happens with larger blocksizes. The net result of this encoding is that blocknr < s_first_data_block is not a valid input to this function. The end_fsb variable is set from the keys that are copied from userspace, which means that in the above example, its value is zero. That leads to an underflow here: blocknr = blocknr - le32_to_cpu(es->s_first_data_block); The division then operates on -1: offset = do_div(blocknr, EXT4_BLOCKS_PER_GROUP(sb)) >> EXT4_SB(sb)->s_cluster_bits; Leaving an impossibly large group number (2^32-1) in blocknr. ext4_getfsmap_check_keys checked that keys[0].fmr_physical and keys[1].fmr_physical are in increasing order, but ext4_getfsmap_datadev adjusts keys[0].fmr_physical to be at least s_first_data_block. This implies that we have to check it again after the adjustment, which is the piece that I forgot. Reported-by: syzbot+6be2b977c89f79b6b153@syzkaller.appspotmail.com Fixes: 4a4956249dac ("ext4: fix off-by-one fsmap error on 1k block filesystems") Link: https://syzkaller.appspot.com/bug?id=79d5768e9bfe362911ac1a5057a36fc6b5c30002 Cc: stable@vger.kernel.org Signed-off-by: Darrick J. Wong Link: https://lore.kernel.org/r/Y+58NPTH7VNGgzdd@magnolia Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/fsmap.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/ext4/fsmap.c b/fs/ext4/fsmap.c index 37347ba868b7..d1ef651948d7 100644 --- a/fs/ext4/fsmap.c +++ b/fs/ext4/fsmap.c @@ -486,6 +486,8 @@ static int ext4_getfsmap_datadev(struct super_block *sb, keys[0].fmr_physical = bofs; if (keys[1].fmr_physical >= eofs) keys[1].fmr_physical = eofs - 1; + if (keys[1].fmr_physical < keys[0].fmr_physical) + return 0; start_fsb = keys[0].fmr_physical; end_fsb = keys[1].fmr_physical; From b36093c6f77256f5f34160b610bc19d59455ff94 Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Tue, 7 Mar 2023 09:52:52 +0800 Subject: [PATCH 06/55] ext4: move where set the MAY_INLINE_DATA flag is set commit 1dcdce5919115a471bf4921a57f20050c545a236 upstream. The only caller of ext4_find_inline_data_nolock() that needs setting of EXT4_STATE_MAY_INLINE_DATA flag is ext4_iget_extra_inode(). In ext4_write_inline_data_end() we just need to update inode->i_inline_off. Since we are going to add one more caller that does not need to set EXT4_STATE_MAY_INLINE_DATA, just move setting of EXT4_STATE_MAY_INLINE_DATA out to ext4_iget_extra_inode(). Signed-off-by: Ye Bin Cc: stable@kernel.org Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20230307015253.2232062-2-yebin@huaweicloud.com Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/inline.c | 1 - fs/ext4/inode.c | 7 ++++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index 5ef13ede0445..4eb4c23b04b9 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -157,7 +157,6 @@ int ext4_find_inline_data_nolock(struct inode *inode) (void *)ext4_raw_inode(&is.iloc)); EXT4_I(inode)->i_inline_size = EXT4_MIN_INLINE_DATA_SIZE + le32_to_cpu(is.s.here->e_value_size); - ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); } out: brelse(is.iloc.bh); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index cafa06114a1d..63b10ee986d4 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4862,8 +4862,13 @@ static inline int ext4_iget_extra_inode(struct inode *inode, if (EXT4_INODE_HAS_XATTR_SPACE(inode) && *magic == cpu_to_le32(EXT4_XATTR_MAGIC)) { + int err; + ext4_set_inode_state(inode, EXT4_STATE_XATTR); - return ext4_find_inline_data_nolock(inode); + err = ext4_find_inline_data_nolock(inode); + if (!err && ext4_has_inline_data(inode)) + ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); + return err; } else EXT4_I(inode)->i_inline_off = 0; return 0; From 74d775083e9f3d9dadf9e3b5f3e0028d1ad0bd5c Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Tue, 7 Mar 2023 09:52:53 +0800 Subject: [PATCH 07/55] ext4: fix WARNING in ext4_update_inline_data commit 2b96b4a5d9443ca4cad58b0040be455803c05a42 upstream. Syzbot found the following issue: EXT4-fs (loop0): mounted filesystem 00000000-0000-0000-0000-000000000000 without journal. Quota mode: none. fscrypt: AES-256-CTS-CBC using implementation "cts-cbc-aes-aesni" fscrypt: AES-256-XTS using implementation "xts-aes-aesni" ------------[ cut here ]------------ WARNING: CPU: 0 PID: 5071 at mm/page_alloc.c:5525 __alloc_pages+0x30a/0x560 mm/page_alloc.c:5525 Modules linked in: CPU: 1 PID: 5071 Comm: syz-executor263 Not tainted 6.2.0-rc1-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/26/2022 RIP: 0010:__alloc_pages+0x30a/0x560 mm/page_alloc.c:5525 RSP: 0018:ffffc90003c2f1c0 EFLAGS: 00010246 RAX: ffffc90003c2f220 RBX: 0000000000000014 RCX: 0000000000000000 RDX: 0000000000000028 RSI: 0000000000000000 RDI: ffffc90003c2f248 RBP: ffffc90003c2f2d8 R08: dffffc0000000000 R09: ffffc90003c2f220 R10: fffff52000785e49 R11: 1ffff92000785e44 R12: 0000000000040d40 R13: 1ffff92000785e40 R14: dffffc0000000000 R15: 1ffff92000785e3c FS: 0000555556c0d300(0000) GS:ffff8880b9800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f95d5e04138 CR3: 00000000793aa000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: __alloc_pages_node include/linux/gfp.h:237 [inline] alloc_pages_node include/linux/gfp.h:260 [inline] __kmalloc_large_node+0x95/0x1e0 mm/slab_common.c:1113 __do_kmalloc_node mm/slab_common.c:956 [inline] __kmalloc+0xfe/0x190 mm/slab_common.c:981 kmalloc include/linux/slab.h:584 [inline] kzalloc include/linux/slab.h:720 [inline] ext4_update_inline_data+0x236/0x6b0 fs/ext4/inline.c:346 ext4_update_inline_dir fs/ext4/inline.c:1115 [inline] ext4_try_add_inline_entry+0x328/0x990 fs/ext4/inline.c:1307 ext4_add_entry+0x5a4/0xeb0 fs/ext4/namei.c:2385 ext4_add_nondir+0x96/0x260 fs/ext4/namei.c:2772 ext4_create+0x36c/0x560 fs/ext4/namei.c:2817 lookup_open fs/namei.c:3413 [inline] open_last_lookups fs/namei.c:3481 [inline] path_openat+0x12ac/0x2dd0 fs/namei.c:3711 do_filp_open+0x264/0x4f0 fs/namei.c:3741 do_sys_openat2+0x124/0x4e0 fs/open.c:1310 do_sys_open fs/open.c:1326 [inline] __do_sys_openat fs/open.c:1342 [inline] __se_sys_openat fs/open.c:1337 [inline] __x64_sys_openat+0x243/0x290 fs/open.c:1337 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3d/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd Above issue happens as follows: ext4_iget ext4_find_inline_data_nolock ->i_inline_off=164 i_inline_size=60 ext4_try_add_inline_entry __ext4_mark_inode_dirty ext4_expand_extra_isize_ea ->i_extra_isize=32 s_want_extra_isize=44 ext4_xattr_shift_entries ->after shift i_inline_off is incorrect, actually is change to 176 ext4_try_add_inline_entry ext4_update_inline_dir get_max_inline_xattr_value_size if (EXT4_I(inode)->i_inline_off) entry = (struct ext4_xattr_entry *)((void *)raw_inode + EXT4_I(inode)->i_inline_off); free += EXT4_XATTR_SIZE(le32_to_cpu(entry->e_value_size)); ->As entry is incorrect, then 'free' may be negative ext4_update_inline_data value = kzalloc(len, GFP_NOFS); -> len is unsigned int, maybe very large, then trigger warning when 'kzalloc()' To resolve the above issue we need to update 'i_inline_off' after 'ext4_xattr_shift_entries()'. We do not need to set EXT4_STATE_MAY_INLINE_DATA flag here, since ext4_mark_inode_dirty() already sets this flag if needed. Setting EXT4_STATE_MAY_INLINE_DATA when it is needed may trigger a BUG_ON in ext4_writepages(). Reported-by: syzbot+d30838395804afc2fa6f@syzkaller.appspotmail.com Cc: stable@kernel.org Signed-off-by: Ye Bin Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20230307015253.2232062-3-yebin@huaweicloud.com Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/xattr.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index cf794afbd52f..254bc6b26d69 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -2819,6 +2819,9 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, (void *)header, total_ino); EXT4_I(inode)->i_extra_isize = new_extra_isize; + if (ext4_has_inline_data(inode)) + error = ext4_find_inline_data_nolock(inode); + cleanup: if (error && (mnt_count != le16_to_cpu(sbi->s_es->s_mnt_count))) { ext4_warning(inode->i_sb, "Unable to expand inode %lu. Delete some EAs or run e2fsck.", From 0d8a6c9a6415999fee1259ccf1796480c026b7d6 Mon Sep 17 00:00:00 2001 From: Zhihao Cheng Date: Wed, 8 Mar 2023 11:26:43 +0800 Subject: [PATCH 08/55] ext4: zero i_disksize when initializing the bootloader inode commit f5361da1e60d54ec81346aee8e3d8baf1be0b762 upstream. If the boot loader inode has never been used before, the EXT4_IOC_SWAP_BOOT inode will initialize it, including setting the i_size to 0. However, if the "never before used" boot loader has a non-zero i_size, then i_disksize will be non-zero, and the inconsistency between i_size and i_disksize can trigger a kernel warning: WARNING: CPU: 0 PID: 2580 at fs/ext4/file.c:319 CPU: 0 PID: 2580 Comm: bb Not tainted 6.3.0-rc1-00004-g703695902cfa RIP: 0010:ext4_file_write_iter+0xbc7/0xd10 Call Trace: vfs_write+0x3b1/0x5c0 ksys_write+0x77/0x160 __x64_sys_write+0x22/0x30 do_syscall_64+0x39/0x80 Reproducer: 1. create corrupted image and mount it: mke2fs -t ext4 /tmp/foo.img 200 debugfs -wR "sif <5> size 25700" /tmp/foo.img mount -t ext4 /tmp/foo.img /mnt cd /mnt echo 123 > file 2. Run the reproducer program: posix_memalign(&buf, 1024, 1024) fd = open("file", O_RDWR | O_DIRECT); ioctl(fd, EXT4_IOC_SWAP_BOOT); write(fd, buf, 1024); Fix this by setting i_disksize as well as i_size to zero when initiaizing the boot loader inode. Link: https://bugzilla.kernel.org/show_bug.cgi?id=217159 Cc: stable@kernel.org Signed-off-by: Zhihao Cheng Link: https://lore.kernel.org/r/20230308032643.641113-1-chengzhihao1@huawei.com Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/ioctl.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 645186927c1f..306ad7d0003b 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -179,6 +179,7 @@ static long swap_inode_boot_loader(struct super_block *sb, ei_bl->i_flags = 0; inode_set_iversion(inode_bl, 1); i_size_write(inode_bl, 0); + EXT4_I(inode_bl)->i_disksize = inode_bl->i_size; inode_bl->i_mode = S_IFREG; if (ext4_has_feature_extents(sb)) { ext4_set_inode_flag(inode_bl, EXT4_INODE_EXTENTS); From 3cdf19a29cc1cfe71a0761c5178208d1aec505ed Mon Sep 17 00:00:00 2001 From: Fedor Pchelkin Date: Tue, 7 Mar 2023 00:26:50 +0300 Subject: [PATCH 09/55] nfc: change order inside nfc_se_io error path commit 7d834b4d1ab66c48e8c0810fdeadaabb80fa2c81 upstream. cb_context should be freed on the error path in nfc_se_io as stated by commit 25ff6f8a5a3b ("nfc: fix memory leak of se_io context in nfc_genl_se_io"). Make the error path in nfc_se_io unwind everything in reverse order, i.e. free the cb_context after unlocking the device. Suggested-by: Krzysztof Kozlowski Signed-off-by: Fedor Pchelkin Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20230306212650.230322-1-pchelkin@ispras.ru Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/nfc/netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index c4c57830e963..66ab97131fd2 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -1454,8 +1454,8 @@ static int nfc_se_io(struct nfc_dev *dev, u32 se_idx, return rc; error: - kfree(cb_context); device_unlock(&dev->dev); + kfree(cb_context); return rc; } From 11852cc78f6700dbea2930eb2fc3d296eb4303c5 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Wed, 6 Jul 2022 17:08:22 +0530 Subject: [PATCH 10/55] iommu/amd: Add PCI segment support for ivrs_[ioapic/hpet/acpihid] commands [ Upstream commit bbe3a106580c21bc883fb0c9fa3da01534392fe8 ] By default, PCI segment is zero and can be omitted. To support system with non-zero PCI segment ID, modify the parsing functions to allow PCI segment ID. Co-developed-by: Vasant Hegde Signed-off-by: Vasant Hegde Signed-off-by: Suravee Suthikulpanit Link: https://lore.kernel.org/r/20220706113825.25582-33-vasant.hegde@amd.com Signed-off-by: Joerg Roedel Stable-dep-of: b6b26d86c61c ("iommu/amd: Add a length limitation for the ivrs_acpihid command-line parameter") Signed-off-by: Sasha Levin --- .../admin-guide/kernel-parameters.txt | 34 ++++++++++---- drivers/iommu/amd_iommu_init.c | 44 ++++++++++++------- 2 files changed, 52 insertions(+), 26 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 8f71a17ad544..916426383921 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1944,23 +1944,39 @@ ivrs_ioapic [HW,X86_64] Provide an override to the IOAPIC-ID<->DEVICE-ID - mapping provided in the IVRS ACPI table. For - example, to map IOAPIC-ID decimal 10 to - PCI device 00:14.0 write the parameter as: + mapping provided in the IVRS ACPI table. + By default, PCI segment is 0, and can be omitted. + For example: + * To map IOAPIC-ID decimal 10 to PCI device 00:14.0 + write the parameter as: ivrs_ioapic[10]=00:14.0 + * To map IOAPIC-ID decimal 10 to PCI segment 0x1 and + PCI device 00:14.0 write the parameter as: + ivrs_ioapic[10]=0001:00:14.0 ivrs_hpet [HW,X86_64] Provide an override to the HPET-ID<->DEVICE-ID - mapping provided in the IVRS ACPI table. For - example, to map HPET-ID decimal 0 to - PCI device 00:14.0 write the parameter as: + mapping provided in the IVRS ACPI table. + By default, PCI segment is 0, and can be omitted. + For example: + * To map HPET-ID decimal 0 to PCI device 00:14.0 + write the parameter as: ivrs_hpet[0]=00:14.0 + * To map HPET-ID decimal 10 to PCI segment 0x1 and + PCI device 00:14.0 write the parameter as: + ivrs_ioapic[10]=0001:00:14.0 ivrs_acpihid [HW,X86_64] Provide an override to the ACPI-HID:UID<->DEVICE-ID - mapping provided in the IVRS ACPI table. For - example, to map UART-HID:UID AMD0020:0 to - PCI device 00:14.5 write the parameter as: + mapping provided in the IVRS ACPI table. + + For example, to map UART-HID:UID AMD0020:0 to + PCI segment 0x1 and PCI device ID 00:14.5, + write the parameter as: + ivrs_acpihid[0001:00:14.5]=AMD0020:0 + + By default, PCI segment is 0, and can be omitted. + For example, PCI device 00:14.5 write the parameter as: ivrs_acpihid[00:14.5]=AMD0020:0 js= [HW,JOY] Analog joystick diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 533b920ed7df..f4e6173e749a 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -84,6 +84,10 @@ #define ACPI_DEVFLAG_ATSDIS 0x10000000 #define LOOP_TIMEOUT 2000000 + +#define IVRS_GET_SBDF_ID(seg, bus, dev, fd) (((seg & 0xffff) << 16) | ((bus & 0xff) << 8) \ + | ((dev & 0x1f) << 3) | (fn & 0x7)) + /* * ACPI table definitions * @@ -2971,15 +2975,17 @@ static int __init parse_amd_iommu_options(char *str) static int __init parse_ivrs_ioapic(char *str) { - unsigned int bus, dev, fn; + u32 seg = 0, bus, dev, fn; int ret, id, i; - u16 devid; + u32 devid; ret = sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn); - if (ret != 4) { - pr_err("Invalid command line: ivrs_ioapic%s\n", str); - return 1; + ret = sscanf(str, "[%d]=%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn); + if (ret != 5) { + pr_err("Invalid command line: ivrs_ioapic%s\n", str); + return 1; + } } if (early_ioapic_map_size == EARLY_MAP_SIZE) { @@ -2988,7 +2994,7 @@ static int __init parse_ivrs_ioapic(char *str) return 1; } - devid = ((bus & 0xff) << 8) | ((dev & 0x1f) << 3) | (fn & 0x7); + devid = IVRS_GET_SBDF_ID(seg, bus, dev, fn); cmdline_maps = true; i = early_ioapic_map_size++; @@ -3001,15 +3007,17 @@ static int __init parse_ivrs_ioapic(char *str) static int __init parse_ivrs_hpet(char *str) { - unsigned int bus, dev, fn; + u32 seg = 0, bus, dev, fn; int ret, id, i; - u16 devid; + u32 devid; ret = sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn); - if (ret != 4) { - pr_err("Invalid command line: ivrs_hpet%s\n", str); - return 1; + ret = sscanf(str, "[%d]=%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn); + if (ret != 5) { + pr_err("Invalid command line: ivrs_hpet%s\n", str); + return 1; + } } if (early_hpet_map_size == EARLY_MAP_SIZE) { @@ -3018,7 +3026,7 @@ static int __init parse_ivrs_hpet(char *str) return 1; } - devid = ((bus & 0xff) << 8) | ((dev & 0x1f) << 3) | (fn & 0x7); + devid = IVRS_GET_SBDF_ID(seg, bus, dev, fn); cmdline_maps = true; i = early_hpet_map_size++; @@ -3031,15 +3039,18 @@ static int __init parse_ivrs_hpet(char *str) static int __init parse_ivrs_acpihid(char *str) { - u32 bus, dev, fn; + u32 seg = 0, bus, dev, fn; char *hid, *uid, *p; char acpiid[ACPIHID_UID_LEN + ACPIHID_HID_LEN] = {0}; int ret, i; ret = sscanf(str, "[%x:%x.%x]=%s", &bus, &dev, &fn, acpiid); if (ret != 4) { - pr_err("Invalid command line: ivrs_acpihid(%s)\n", str); - return 1; + ret = sscanf(str, "[%x:%x:%x.%x]=%s", &seg, &bus, &dev, &fn, acpiid); + if (ret != 5) { + pr_err("Invalid command line: ivrs_acpihid(%s)\n", str); + return 1; + } } p = acpiid; @@ -3061,8 +3072,7 @@ static int __init parse_ivrs_acpihid(char *str) i = early_acpihid_map_size++; memcpy(early_acpihid_map[i].hid, hid, strlen(hid)); memcpy(early_acpihid_map[i].uid, uid, strlen(uid)); - early_acpihid_map[i].devid = - ((bus & 0xff) << 8) | ((dev & 0x1f) << 3) | (fn & 0x7); + early_acpihid_map[i].devid = IVRS_GET_SBDF_ID(seg, bus, dev, fn); early_acpihid_map[i].cmd_line = true; return 1; From 774c63f536885b43310c72ab9e8adc7ca52c1942 Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Mon, 19 Sep 2022 10:56:38 -0500 Subject: [PATCH 11/55] iommu/amd: Fix ill-formed ivrs_ioapic, ivrs_hpet and ivrs_acpihid options [ Upstream commit 1198d2316dc4265a97d0e8445a22c7a6d17580a4 ] Currently, these options cause the following libkmod error: libkmod: ERROR ../libkmod/libkmod-config.c:489 kcmdline_parse_result: \ Ignoring bad option on kernel command line while parsing module \ name: 'ivrs_xxxx[XX:XX' Fix by introducing a new parameter format for these options and throw a warning for the deprecated format. Users are still allowed to omit the PCI Segment if zero. Adding a Link: to the reason why we're modding the syntax parsing in the driver and not in libkmod. Fixes: ca3bf5d47cec ("iommu/amd: Introduces ivrs_acpihid kernel parameter") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/linux-modules/20200310082308.14318-2-lucas.demarchi@intel.com/ Reported-by: Kim Phillips Co-developed-by: Suravee Suthikulpanit Signed-off-by: Suravee Suthikulpanit Signed-off-by: Kim Phillips Link: https://lore.kernel.org/r/20220919155638.391481-2-kim.phillips@amd.com Signed-off-by: Joerg Roedel Stable-dep-of: b6b26d86c61c ("iommu/amd: Add a length limitation for the ivrs_acpihid command-line parameter") Signed-off-by: Sasha Levin --- .../admin-guide/kernel-parameters.txt | 27 +++++-- drivers/iommu/amd_iommu_init.c | 79 +++++++++++++------ 2 files changed, 76 insertions(+), 30 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 916426383921..5e5704faae24 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1946,7 +1946,13 @@ Provide an override to the IOAPIC-ID<->DEVICE-ID mapping provided in the IVRS ACPI table. By default, PCI segment is 0, and can be omitted. - For example: + + For example, to map IOAPIC-ID decimal 10 to + PCI segment 0x1 and PCI device 00:14.0, + write the parameter as: + ivrs_ioapic=10@0001:00:14.0 + + Deprecated formats: * To map IOAPIC-ID decimal 10 to PCI device 00:14.0 write the parameter as: ivrs_ioapic[10]=00:14.0 @@ -1958,7 +1964,13 @@ Provide an override to the HPET-ID<->DEVICE-ID mapping provided in the IVRS ACPI table. By default, PCI segment is 0, and can be omitted. - For example: + + For example, to map HPET-ID decimal 10 to + PCI segment 0x1 and PCI device 00:14.0, + write the parameter as: + ivrs_hpet=10@0001:00:14.0 + + Deprecated formats: * To map HPET-ID decimal 0 to PCI device 00:14.0 write the parameter as: ivrs_hpet[0]=00:14.0 @@ -1969,15 +1981,20 @@ ivrs_acpihid [HW,X86_64] Provide an override to the ACPI-HID:UID<->DEVICE-ID mapping provided in the IVRS ACPI table. + By default, PCI segment is 0, and can be omitted. For example, to map UART-HID:UID AMD0020:0 to PCI segment 0x1 and PCI device ID 00:14.5, write the parameter as: - ivrs_acpihid[0001:00:14.5]=AMD0020:0 + ivrs_acpihid=AMD0020:0@0001:00:14.5 - By default, PCI segment is 0, and can be omitted. - For example, PCI device 00:14.5 write the parameter as: + Deprecated formats: + * To map UART-HID:UID AMD0020:0 to PCI segment is 0, + PCI device ID 00:14.5, write the parameter as: ivrs_acpihid[00:14.5]=AMD0020:0 + * To map UART-HID:UID AMD0020:0 to PCI segment 0x1 and + PCI device ID 00:14.5, write the parameter as: + ivrs_acpihid[0001:00:14.5]=AMD0020:0 js= [HW,JOY] Analog joystick See Documentation/input/joydev/joystick.rst. diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index f4e6173e749a..71e4a8eac3c9 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -2976,18 +2976,24 @@ static int __init parse_amd_iommu_options(char *str) static int __init parse_ivrs_ioapic(char *str) { u32 seg = 0, bus, dev, fn; - int ret, id, i; + int id, i; u32 devid; - ret = sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn); - if (ret != 4) { - ret = sscanf(str, "[%d]=%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn); - if (ret != 5) { - pr_err("Invalid command line: ivrs_ioapic%s\n", str); - return 1; - } + if (sscanf(str, "=%d@%x:%x.%x", &id, &bus, &dev, &fn) == 4 || + sscanf(str, "=%d@%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5) + goto found; + + if (sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn) == 4 || + sscanf(str, "[%d]=%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5) { + pr_warn("ivrs_ioapic%s option format deprecated; use ivrs_ioapic=%d@%04x:%02x:%02x.%d instead\n", + str, id, seg, bus, dev, fn); + goto found; } + pr_err("Invalid command line: ivrs_ioapic%s\n", str); + return 1; + +found: if (early_ioapic_map_size == EARLY_MAP_SIZE) { pr_err("Early IOAPIC map overflow - ignoring ivrs_ioapic%s\n", str); @@ -3008,18 +3014,24 @@ static int __init parse_ivrs_ioapic(char *str) static int __init parse_ivrs_hpet(char *str) { u32 seg = 0, bus, dev, fn; - int ret, id, i; + int id, i; u32 devid; - ret = sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn); - if (ret != 4) { - ret = sscanf(str, "[%d]=%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn); - if (ret != 5) { - pr_err("Invalid command line: ivrs_hpet%s\n", str); - return 1; - } + if (sscanf(str, "=%d@%x:%x.%x", &id, &bus, &dev, &fn) == 4 || + sscanf(str, "=%d@%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5) + goto found; + + if (sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn) == 4 || + sscanf(str, "[%d]=%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5) { + pr_warn("ivrs_hpet%s option format deprecated; use ivrs_hpet=%d@%04x:%02x:%02x.%d instead\n", + str, id, seg, bus, dev, fn); + goto found; } + pr_err("Invalid command line: ivrs_hpet%s\n", str); + return 1; + +found: if (early_hpet_map_size == EARLY_MAP_SIZE) { pr_err("Early HPET map overflow - ignoring ivrs_hpet%s\n", str); @@ -3040,19 +3052,36 @@ static int __init parse_ivrs_hpet(char *str) static int __init parse_ivrs_acpihid(char *str) { u32 seg = 0, bus, dev, fn; - char *hid, *uid, *p; + char *hid, *uid, *p, *addr; char acpiid[ACPIHID_UID_LEN + ACPIHID_HID_LEN] = {0}; - int ret, i; - - ret = sscanf(str, "[%x:%x.%x]=%s", &bus, &dev, &fn, acpiid); - if (ret != 4) { - ret = sscanf(str, "[%x:%x:%x.%x]=%s", &seg, &bus, &dev, &fn, acpiid); - if (ret != 5) { - pr_err("Invalid command line: ivrs_acpihid(%s)\n", str); - return 1; + int i; + + addr = strchr(str, '@'); + if (!addr) { + if (sscanf(str, "[%x:%x.%x]=%s", &bus, &dev, &fn, acpiid) == 4 || + sscanf(str, "[%x:%x:%x.%x]=%s", &seg, &bus, &dev, &fn, acpiid) == 5) { + pr_warn("ivrs_acpihid%s option format deprecated; use ivrs_acpihid=%s@%04x:%02x:%02x.%d instead\n", + str, acpiid, seg, bus, dev, fn); + goto found; } + goto not_found; } + /* We have the '@', make it the terminator to get just the acpiid */ + *addr++ = 0; + + if (sscanf(str, "=%s", acpiid) != 1) + goto not_found; + + if (sscanf(addr, "%x:%x.%x", &bus, &dev, &fn) == 3 || + sscanf(addr, "%x:%x:%x.%x", &seg, &bus, &dev, &fn) == 4) + goto found; + +not_found: + pr_err("Invalid command line: ivrs_acpihid%s\n", str); + return 1; + +found: p = acpiid; hid = strsep(&p, ":"); uid = p; From 5e97dc748d13fad582136ba0c8cec215c7aeeb17 Mon Sep 17 00:00:00 2001 From: Gavrilov Ilia Date: Thu, 2 Feb 2023 08:26:56 +0000 Subject: [PATCH 12/55] iommu/amd: Add a length limitation for the ivrs_acpihid command-line parameter [ Upstream commit b6b26d86c61c441144c72f842f7469bb686e1211 ] The 'acpiid' buffer in the parse_ivrs_acpihid function may overflow, because the string specifier in the format string sscanf() has no width limitation. Found by InfoTeCS on behalf of Linux Verification Center (linuxtesting.org) with SVACE. Fixes: ca3bf5d47cec ("iommu/amd: Introduces ivrs_acpihid kernel parameter") Cc: stable@vger.kernel.org Signed-off-by: Ilia.Gavrilov Reviewed-by: Kim Phillips Link: https://lore.kernel.org/r/20230202082719.1513849-1-Ilia.Gavrilov@infotecs.ru Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- drivers/iommu/amd_iommu_init.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 71e4a8eac3c9..4a9feff340da 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -3049,15 +3049,26 @@ static int __init parse_ivrs_hpet(char *str) return 1; } +#define ACPIID_LEN (ACPIHID_UID_LEN + ACPIHID_HID_LEN) + static int __init parse_ivrs_acpihid(char *str) { u32 seg = 0, bus, dev, fn; char *hid, *uid, *p, *addr; - char acpiid[ACPIHID_UID_LEN + ACPIHID_HID_LEN] = {0}; + char acpiid[ACPIID_LEN] = {0}; int i; addr = strchr(str, '@'); if (!addr) { + addr = strchr(str, '='); + if (!addr) + goto not_found; + + ++addr; + + if (strlen(addr) > ACPIID_LEN) + goto not_found; + if (sscanf(str, "[%x:%x.%x]=%s", &bus, &dev, &fn, acpiid) == 4 || sscanf(str, "[%x:%x:%x.%x]=%s", &seg, &bus, &dev, &fn, acpiid) == 5) { pr_warn("ivrs_acpihid%s option format deprecated; use ivrs_acpihid=%s@%04x:%02x:%02x.%d instead\n", @@ -3070,6 +3081,9 @@ static int __init parse_ivrs_acpihid(char *str) /* We have the '@', make it the terminator to get just the acpiid */ *addr++ = 0; + if (strlen(str) > ACPIID_LEN + 1) + goto not_found; + if (sscanf(str, "=%s", acpiid) != 1) goto not_found; From 59349bfcffb1075b5f42da45bf33435f09ca62ca Mon Sep 17 00:00:00 2001 From: Liguang Zhang Date: Mon, 1 Mar 2021 22:05:15 +0800 Subject: [PATCH 13/55] ipmi:ssif: make ssif_i2c_send() void [ Upstream commit dcd10526ac5a0d6cc94ce60b9acfca458163277b ] This function actually needs no return value. So remove the unneeded check and make it void. Signed-off-by: Liguang Zhang Message-Id: <20210301140515.18951-1-zhangliguang@linux.alibaba.com> Signed-off-by: Corey Minyard Stable-dep-of: 95767ed78a18 ("ipmi:ssif: resend_msg() cannot fail") Signed-off-by: Sasha Levin --- drivers/char/ipmi/ipmi_ssif.c | 81 +++++++++-------------------------- 1 file changed, 20 insertions(+), 61 deletions(-) diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c index d6b69e19f78a..5ead5e7f0ce1 100644 --- a/drivers/char/ipmi/ipmi_ssif.c +++ b/drivers/char/ipmi/ipmi_ssif.c @@ -515,7 +515,7 @@ static int ipmi_ssif_thread(void *data) return 0; } -static int ssif_i2c_send(struct ssif_info *ssif_info, +static void ssif_i2c_send(struct ssif_info *ssif_info, ssif_i2c_done handler, int read_write, int command, unsigned char *data, unsigned int size) @@ -527,7 +527,6 @@ static int ssif_i2c_send(struct ssif_info *ssif_info, ssif_info->i2c_data = data; ssif_info->i2c_size = size; complete(&ssif_info->wake_thread); - return 0; } @@ -536,22 +535,12 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result, static void start_get(struct ssif_info *ssif_info) { - int rv; - ssif_info->rtc_us_timer = 0; ssif_info->multi_pos = 0; - rv = ssif_i2c_send(ssif_info, msg_done_handler, I2C_SMBUS_READ, - SSIF_IPMI_RESPONSE, - ssif_info->recv, I2C_SMBUS_BLOCK_DATA); - if (rv < 0) { - /* request failed, just return the error. */ - if (ssif_info->ssif_debug & SSIF_DEBUG_MSG) - dev_dbg(&ssif_info->client->dev, - "Error from i2c_non_blocking_op(5)\n"); - - msg_done_handler(ssif_info, -EIO, NULL, 0); - } + ssif_i2c_send(ssif_info, msg_done_handler, I2C_SMBUS_READ, + SSIF_IPMI_RESPONSE, + ssif_info->recv, I2C_SMBUS_BLOCK_DATA); } static void retry_timeout(struct timer_list *t) @@ -625,7 +614,6 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result, { struct ipmi_smi_msg *msg; unsigned long oflags, *flags; - int rv; /* * We are single-threaded here, so no need for a lock until we @@ -671,17 +659,10 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result, ssif_info->multi_len = len; ssif_info->multi_pos = 1; - rv = ssif_i2c_send(ssif_info, msg_done_handler, I2C_SMBUS_READ, - SSIF_IPMI_MULTI_PART_RESPONSE_MIDDLE, - ssif_info->recv, I2C_SMBUS_BLOCK_DATA); - if (rv < 0) { - if (ssif_info->ssif_debug & SSIF_DEBUG_MSG) - dev_dbg(&ssif_info->client->dev, - "Error from i2c_non_blocking_op(1)\n"); - - result = -EIO; - } else - return; + ssif_i2c_send(ssif_info, msg_done_handler, I2C_SMBUS_READ, + SSIF_IPMI_MULTI_PART_RESPONSE_MIDDLE, + ssif_info->recv, I2C_SMBUS_BLOCK_DATA); + return; } else if (ssif_info->multi_pos) { /* Middle of multi-part read. Start the next transaction. */ int i; @@ -743,19 +724,12 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result, ssif_info->multi_pos++; - rv = ssif_i2c_send(ssif_info, msg_done_handler, - I2C_SMBUS_READ, - SSIF_IPMI_MULTI_PART_RESPONSE_MIDDLE, - ssif_info->recv, - I2C_SMBUS_BLOCK_DATA); - if (rv < 0) { - if (ssif_info->ssif_debug & SSIF_DEBUG_MSG) - dev_dbg(&ssif_info->client->dev, - "Error from ssif_i2c_send\n"); - - result = -EIO; - } else - return; + ssif_i2c_send(ssif_info, msg_done_handler, + I2C_SMBUS_READ, + SSIF_IPMI_MULTI_PART_RESPONSE_MIDDLE, + ssif_info->recv, + I2C_SMBUS_BLOCK_DATA); + return; } } @@ -936,8 +910,6 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result, static void msg_written_handler(struct ssif_info *ssif_info, int result, unsigned char *data, unsigned int len) { - int rv; - /* We are single-threaded here, so no need for a lock. */ if (result < 0) { ssif_info->retries_left--; @@ -1000,18 +972,9 @@ static void msg_written_handler(struct ssif_info *ssif_info, int result, ssif_info->multi_data = NULL; } - rv = ssif_i2c_send(ssif_info, msg_written_handler, - I2C_SMBUS_WRITE, cmd, - data_to_send, I2C_SMBUS_BLOCK_DATA); - if (rv < 0) { - /* request failed, just return the error. */ - ssif_inc_stat(ssif_info, send_errors); - - if (ssif_info->ssif_debug & SSIF_DEBUG_MSG) - dev_dbg(&ssif_info->client->dev, - "Error from i2c_non_blocking_op(3)\n"); - msg_done_handler(ssif_info, -EIO, NULL, 0); - } + ssif_i2c_send(ssif_info, msg_written_handler, + I2C_SMBUS_WRITE, cmd, + data_to_send, I2C_SMBUS_BLOCK_DATA); } else { /* Ready to request the result. */ unsigned long oflags, *flags; @@ -1040,7 +1003,6 @@ static void msg_written_handler(struct ssif_info *ssif_info, int result, static int start_resend(struct ssif_info *ssif_info) { - int rv; int command; ssif_info->got_alert = false; @@ -1062,12 +1024,9 @@ static int start_resend(struct ssif_info *ssif_info) ssif_info->data[0] = ssif_info->data_len; } - rv = ssif_i2c_send(ssif_info, msg_written_handler, I2C_SMBUS_WRITE, - command, ssif_info->data, I2C_SMBUS_BLOCK_DATA); - if (rv && (ssif_info->ssif_debug & SSIF_DEBUG_MSG)) - dev_dbg(&ssif_info->client->dev, - "Error from i2c_non_blocking_op(4)\n"); - return rv; + ssif_i2c_send(ssif_info, msg_written_handler, I2C_SMBUS_WRITE, + command, ssif_info->data, I2C_SMBUS_BLOCK_DATA); + return 0; } static int start_send(struct ssif_info *ssif_info, From d1a7f56b20da83805a84a4050594a5872c088328 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Wed, 25 Jan 2023 10:11:06 -0600 Subject: [PATCH 14/55] ipmi:ssif: resend_msg() cannot fail [ Upstream commit 95767ed78a181d5404202627499f9cde56053b96 ] The resend_msg() function cannot fail, but there was error handling around using it. Rework the handling of the error, and fix the out of retries debug reporting that was wrong around this, too. Cc: stable@vger.kernel.org Signed-off-by: Corey Minyard Signed-off-by: Sasha Levin --- drivers/char/ipmi/ipmi_ssif.c | 28 +++++++--------------------- 1 file changed, 7 insertions(+), 21 deletions(-) diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c index 5ead5e7f0ce1..ff93009583ab 100644 --- a/drivers/char/ipmi/ipmi_ssif.c +++ b/drivers/char/ipmi/ipmi_ssif.c @@ -607,7 +607,7 @@ static void ssif_alert(struct i2c_client *client, enum i2c_alert_protocol type, start_get(ssif_info); } -static int start_resend(struct ssif_info *ssif_info); +static void start_resend(struct ssif_info *ssif_info); static void msg_done_handler(struct ssif_info *ssif_info, int result, unsigned char *data, unsigned int len) @@ -914,31 +914,17 @@ static void msg_written_handler(struct ssif_info *ssif_info, int result, if (result < 0) { ssif_info->retries_left--; if (ssif_info->retries_left > 0) { - if (!start_resend(ssif_info)) { - ssif_inc_stat(ssif_info, send_retries); - return; - } - /* request failed, just return the error. */ - ssif_inc_stat(ssif_info, send_errors); - - if (ssif_info->ssif_debug & SSIF_DEBUG_MSG) - dev_dbg(&ssif_info->client->dev, - "%s: Out of retries\n", __func__); - msg_done_handler(ssif_info, -EIO, NULL, 0); + start_resend(ssif_info); return; } ssif_inc_stat(ssif_info, send_errors); - /* - * Got an error on transmit, let the done routine - * handle it. - */ if (ssif_info->ssif_debug & SSIF_DEBUG_MSG) dev_dbg(&ssif_info->client->dev, - "%s: Error %d\n", __func__, result); + "%s: Out of retries\n", __func__); - msg_done_handler(ssif_info, result, NULL, 0); + msg_done_handler(ssif_info, -EIO, NULL, 0); return; } @@ -1001,7 +987,7 @@ static void msg_written_handler(struct ssif_info *ssif_info, int result, } } -static int start_resend(struct ssif_info *ssif_info) +static void start_resend(struct ssif_info *ssif_info) { int command; @@ -1026,7 +1012,6 @@ static int start_resend(struct ssif_info *ssif_info) ssif_i2c_send(ssif_info, msg_written_handler, I2C_SMBUS_WRITE, command, ssif_info->data, I2C_SMBUS_BLOCK_DATA); - return 0; } static int start_send(struct ssif_info *ssif_info, @@ -1041,7 +1026,8 @@ static int start_send(struct ssif_info *ssif_info, ssif_info->retries_left = SSIF_SEND_RETRIES; memcpy(ssif_info->data + 1, data, len); ssif_info->data_len = len; - return start_resend(ssif_info); + start_resend(ssif_info); + return 0; } /* Must be called with the message lock held. */ From 89fb3fa848838918d4b7956f429c533e243f0ce6 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Wed, 25 Jan 2023 10:41:48 -0600 Subject: [PATCH 15/55] ipmi:ssif: Remove rtc_us_timer [ Upstream commit 9e8b89926fb87e5625bdde6fd5de2c31fb1d83bf ] It was cruft left over from older handling of run to completion. Cc: stable@vger.kernel.org Signed-off-by: Corey Minyard Signed-off-by: Sasha Levin --- drivers/char/ipmi/ipmi_ssif.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c index ff93009583ab..9f1fb8a30a82 100644 --- a/drivers/char/ipmi/ipmi_ssif.c +++ b/drivers/char/ipmi/ipmi_ssif.c @@ -248,12 +248,6 @@ struct ssif_info { */ bool req_flags; - /* - * Used to perform timer operations when run-to-completion - * mode is on. This is a countdown timer. - */ - int rtc_us_timer; - /* Used for sending/receiving data. +1 for the length. */ unsigned char data[IPMI_MAX_MSG_LENGTH + 1]; unsigned int data_len; @@ -535,7 +529,6 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result, static void start_get(struct ssif_info *ssif_info) { - ssif_info->rtc_us_timer = 0; ssif_info->multi_pos = 0; ssif_i2c_send(ssif_info, msg_done_handler, I2C_SMBUS_READ, @@ -627,7 +620,6 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result, flags = ipmi_ssif_lock_cond(ssif_info, &oflags); ssif_info->waiting_alert = true; - ssif_info->rtc_us_timer = SSIF_MSG_USEC; if (!ssif_info->stopping) mod_timer(&ssif_info->retry_timer, jiffies + SSIF_MSG_JIFFIES); @@ -978,7 +970,6 @@ static void msg_written_handler(struct ssif_info *ssif_info, int result, /* Wait a jiffie then request the next message */ ssif_info->waiting_alert = true; ssif_info->retries_left = SSIF_RECV_RETRIES; - ssif_info->rtc_us_timer = SSIF_MSG_PART_USEC; if (!ssif_info->stopping) mod_timer(&ssif_info->retry_timer, jiffies + SSIF_MSG_PART_JIFFIES); From e8ba1b693a8ba405394e4f3c65024e3e84be563d Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Thu, 3 Nov 2022 15:03:11 -0500 Subject: [PATCH 16/55] ipmi:ssif: Increase the message retry time [ Upstream commit 39721d62bbc16ebc9bb2bdc2c163658f33da3b0b ] The spec states that the minimum message retry time is 60ms, but it was set to 20ms. Correct it. Reported by: Tony Camuso Signed-off-by: Corey Minyard Stable-dep-of: 00bb7e763ec9 ("ipmi:ssif: Add a timer between request retries") Signed-off-by: Sasha Levin --- drivers/char/ipmi/ipmi_ssif.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c index 9f1fb8a30a82..02ca0d9a0d8c 100644 --- a/drivers/char/ipmi/ipmi_ssif.c +++ b/drivers/char/ipmi/ipmi_ssif.c @@ -79,7 +79,7 @@ /* * Timer values */ -#define SSIF_MSG_USEC 20000 /* 20ms between message tries. */ +#define SSIF_MSG_USEC 60000 /* 60ms between message tries. */ #define SSIF_MSG_PART_USEC 5000 /* 5ms for a message part */ /* How many times to we retry sending/receiving the message. */ From 36c5682cbb46918becb7a7aff906931c7a5e147b Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Wed, 25 Jan 2023 10:34:47 -0600 Subject: [PATCH 17/55] ipmi:ssif: Add a timer between request retries [ Upstream commit 00bb7e763ec9f384cb382455cb6ba5588b5375cf ] The IPMI spec has a time (T6) specified between request retries. Add the handling for that. Reported by: Tony Camuso Cc: stable@vger.kernel.org Signed-off-by: Corey Minyard Signed-off-by: Sasha Levin --- drivers/char/ipmi/ipmi_ssif.c | 34 +++++++++++++++++++++++++++------- 1 file changed, 27 insertions(+), 7 deletions(-) diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c index 02ca0d9a0d8c..d5f068a10a5a 100644 --- a/drivers/char/ipmi/ipmi_ssif.c +++ b/drivers/char/ipmi/ipmi_ssif.c @@ -79,7 +79,8 @@ /* * Timer values */ -#define SSIF_MSG_USEC 60000 /* 60ms between message tries. */ +#define SSIF_MSG_USEC 60000 /* 60ms between message tries (T3). */ +#define SSIF_REQ_RETRY_USEC 60000 /* 60ms between send retries (T6). */ #define SSIF_MSG_PART_USEC 5000 /* 5ms for a message part */ /* How many times to we retry sending/receiving the message. */ @@ -87,7 +88,9 @@ #define SSIF_RECV_RETRIES 250 #define SSIF_MSG_MSEC (SSIF_MSG_USEC / 1000) +#define SSIF_REQ_RETRY_MSEC (SSIF_REQ_RETRY_USEC / 1000) #define SSIF_MSG_JIFFIES ((SSIF_MSG_USEC * 1000) / TICK_NSEC) +#define SSIF_REQ_RETRY_JIFFIES ((SSIF_REQ_RETRY_USEC * 1000) / TICK_NSEC) #define SSIF_MSG_PART_JIFFIES ((SSIF_MSG_PART_USEC * 1000) / TICK_NSEC) /* @@ -236,6 +239,9 @@ struct ssif_info { bool got_alert; bool waiting_alert; + /* Used to inform the timeout that it should do a resend. */ + bool do_resend; + /* * If set to true, this will request events the next time the * state machine is idle. @@ -536,22 +542,28 @@ static void start_get(struct ssif_info *ssif_info) ssif_info->recv, I2C_SMBUS_BLOCK_DATA); } +static void start_resend(struct ssif_info *ssif_info); + static void retry_timeout(struct timer_list *t) { struct ssif_info *ssif_info = from_timer(ssif_info, t, retry_timer); unsigned long oflags, *flags; - bool waiting; + bool waiting, resend; if (ssif_info->stopping) return; flags = ipmi_ssif_lock_cond(ssif_info, &oflags); + resend = ssif_info->do_resend; + ssif_info->do_resend = false; waiting = ssif_info->waiting_alert; ssif_info->waiting_alert = false; ipmi_ssif_unlock_cond(ssif_info, flags); if (waiting) start_get(ssif_info); + if (resend) + start_resend(ssif_info); } static void watch_timeout(struct timer_list *t) @@ -600,8 +612,6 @@ static void ssif_alert(struct i2c_client *client, enum i2c_alert_protocol type, start_get(ssif_info); } -static void start_resend(struct ssif_info *ssif_info); - static void msg_done_handler(struct ssif_info *ssif_info, int result, unsigned char *data, unsigned int len) { @@ -906,7 +916,13 @@ static void msg_written_handler(struct ssif_info *ssif_info, int result, if (result < 0) { ssif_info->retries_left--; if (ssif_info->retries_left > 0) { - start_resend(ssif_info); + /* + * Wait the retry timeout time per the spec, + * then redo the send. + */ + ssif_info->do_resend = true; + mod_timer(&ssif_info->retry_timer, + jiffies + SSIF_REQ_RETRY_JIFFIES); return; } @@ -1318,8 +1334,10 @@ static int do_cmd(struct i2c_client *client, int len, unsigned char *msg, ret = i2c_smbus_write_block_data(client, SSIF_IPMI_REQUEST, len, msg); if (ret) { retry_cnt--; - if (retry_cnt > 0) + if (retry_cnt > 0) { + msleep(SSIF_REQ_RETRY_MSEC); goto retry1; + } return -ENODEV; } @@ -1459,8 +1477,10 @@ static int start_multipart_test(struct i2c_client *client, 32, msg); if (ret) { retry_cnt--; - if (retry_cnt > 0) + if (retry_cnt > 0) { + msleep(SSIF_REQ_RETRY_MSEC); goto retry_write; + } dev_err(&client->dev, "Could not write multi-part start, though the BMC said it could handle it. Just limit sends to one part.\n"); return ret; } From 01ed8ff22a5b03aae57aef39e6d880a8ced3cda6 Mon Sep 17 00:00:00 2001 From: Bixuan Cui Date: Thu, 16 Sep 2021 10:52:03 +0800 Subject: [PATCH 18/55] irqdomain: Change the type of 'size' in __irq_domain_add() to be consistent [ Upstream commit 20c36ce2164f1774b487d443ece99b754bc6ad43 ] The 'size' is used in struct_size(domain, revmap, size) and its input parameter type is 'size_t'(unsigned int). Changing the size to 'unsigned int' to make the type consistent. Signed-off-by: Bixuan Cui Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210916025203.44841-1-cuibixuan@huawei.com Stable-dep-of: 8932c32c3053 ("irqdomain: Fix domain registration race") Signed-off-by: Sasha Levin --- include/linux/irqdomain.h | 2 +- kernel/irq/irqdomain.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 824d7a19dd66..2552f66a7a89 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -254,7 +254,7 @@ static inline struct fwnode_handle *irq_domain_alloc_fwnode(phys_addr_t *pa) } void irq_domain_free_fwnode(struct fwnode_handle *fwnode); -struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, int size, +struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, unsigned int size, irq_hw_number_t hwirq_max, int direct_max, const struct irq_domain_ops *ops, void *host_data); diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 3d1b570a1dad..cfb5a96f023f 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -127,7 +127,7 @@ EXPORT_SYMBOL_GPL(irq_domain_free_fwnode); * Allocates and initializes an irq_domain structure. * Returns pointer to IRQ domain, or NULL on failure. */ -struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, int size, +struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, unsigned int size, irq_hw_number_t hwirq_max, int direct_max, const struct irq_domain_ops *ops, void *host_data) From 985d9fa06b4b90f67e18e4ae3573421406ef253e Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 13 Feb 2023 11:42:49 +0100 Subject: [PATCH 19/55] irqdomain: Fix domain registration race [ Upstream commit 8932c32c3053accd50702b36e944ac2016cd103c ] Hierarchical domains created using irq_domain_create_hierarchy() are currently added to the domain list before having been fully initialised. This specifically means that a racing allocation request might fail to allocate irq data for the inner domains of a hierarchy in case the parent domain pointer has not yet been set up. Note that this is not really any issue for irqchip drivers that are registered early (e.g. via IRQCHIP_DECLARE() or IRQCHIP_ACPI_DECLARE()) but could potentially cause trouble with drivers that are registered later (e.g. modular drivers using IRQCHIP_PLATFORM_DRIVER_BEGIN(), gpiochip drivers, etc.). Fixes: afb7da83b9f4 ("irqdomain: Introduce helper function irq_domain_add_hierarchy()") Cc: stable@vger.kernel.org # 3.19 Signed-off-by: Marc Zyngier [ johan: add commit message ] Signed-off-by: Johan Hovold Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230213104302.17307-8-johan+linaro@kernel.org Signed-off-by: Sasha Levin --- kernel/irq/irqdomain.c | 62 +++++++++++++++++++++++++++++------------- 1 file changed, 43 insertions(+), 19 deletions(-) diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index cfb5a96f023f..d40ae18fe661 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -114,23 +114,12 @@ void irq_domain_free_fwnode(struct fwnode_handle *fwnode) } EXPORT_SYMBOL_GPL(irq_domain_free_fwnode); -/** - * __irq_domain_add() - Allocate a new irq_domain data structure - * @fwnode: firmware node for the interrupt controller - * @size: Size of linear map; 0 for radix mapping only - * @hwirq_max: Maximum number of interrupts supported by controller - * @direct_max: Maximum value of direct maps; Use ~0 for no limit; 0 for no - * direct mapping - * @ops: domain callbacks - * @host_data: Controller private data pointer - * - * Allocates and initializes an irq_domain structure. - * Returns pointer to IRQ domain, or NULL on failure. - */ -struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, unsigned int size, - irq_hw_number_t hwirq_max, int direct_max, - const struct irq_domain_ops *ops, - void *host_data) +static struct irq_domain *__irq_domain_create(struct fwnode_handle *fwnode, + unsigned int size, + irq_hw_number_t hwirq_max, + int direct_max, + const struct irq_domain_ops *ops, + void *host_data) { struct device_node *of_node = to_of_node(fwnode); struct irqchip_fwid *fwid; @@ -222,12 +211,44 @@ struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, unsigned int s domain->revmap_direct_max_irq = direct_max; irq_domain_check_hierarchy(domain); + return domain; +} + +static void __irq_domain_publish(struct irq_domain *domain) +{ mutex_lock(&irq_domain_mutex); debugfs_add_domain_dir(domain); list_add(&domain->link, &irq_domain_list); mutex_unlock(&irq_domain_mutex); pr_debug("Added domain %s\n", domain->name); +} + +/** + * __irq_domain_add() - Allocate a new irq_domain data structure + * @fwnode: firmware node for the interrupt controller + * @size: Size of linear map; 0 for radix mapping only + * @hwirq_max: Maximum number of interrupts supported by controller + * @direct_max: Maximum value of direct maps; Use ~0 for no limit; 0 for no + * direct mapping + * @ops: domain callbacks + * @host_data: Controller private data pointer + * + * Allocates and initializes an irq_domain structure. + * Returns pointer to IRQ domain, or NULL on failure. + */ +struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, unsigned int size, + irq_hw_number_t hwirq_max, int direct_max, + const struct irq_domain_ops *ops, + void *host_data) +{ + struct irq_domain *domain; + + domain = __irq_domain_create(fwnode, size, hwirq_max, direct_max, + ops, host_data); + if (domain) + __irq_domain_publish(domain); + return domain; } EXPORT_SYMBOL_GPL(__irq_domain_add); @@ -1068,12 +1089,15 @@ struct irq_domain *irq_domain_create_hierarchy(struct irq_domain *parent, struct irq_domain *domain; if (size) - domain = irq_domain_create_linear(fwnode, size, ops, host_data); + domain = __irq_domain_create(fwnode, size, size, 0, ops, host_data); else - domain = irq_domain_create_tree(fwnode, ops, host_data); + domain = __irq_domain_create(fwnode, 0, ~0, 0, ops, host_data); + if (domain) { domain->parent = parent; domain->flags |= flags; + + __irq_domain_publish(domain); } return domain; From 1b48c70feefc499b62670521efa002ab01e05df5 Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Thu, 16 Feb 2023 21:08:15 +0800 Subject: [PATCH 20/55] iommu/vt-d: Fix PASID directory pointer coherency [ Upstream commit 194b3348bdbb7db65375c72f3f774aee4cc6614e ] On platforms that do not support IOMMU Extended capability bit 0 Page-walk Coherency, CPU caches are not snooped when IOMMU is accessing any translation structures. IOMMU access goes only directly to memory. Intel IOMMU code was missing a flush for the PASID table directory that resulted in the unrecoverable fault as shown below. This patch adds clflush calls whenever allocating and updating a PASID table directory to ensure cache coherency. On the reverse direction, there's no need to clflush the PASID directory pointer when we deactivate a context entry in that IOMMU hardware will not see the old PASID directory pointer after we clear the context entry. PASID directory entries are also never freed once allocated. DMAR: DRHD: handling fault status reg 3 DMAR: [DMA Read NO_PASID] Request device [00:0d.2] fault addr 0x1026a4000 [fault reason 0x51] SM: Present bit in Directory Entry is clear DMAR: Dump dmar1 table entries for IOVA 0x1026a4000 DMAR: scalable mode root entry: hi 0x0000000102448001, low 0x0000000101b3e001 DMAR: context entry: hi 0x0000000000000000, low 0x0000000101b4d401 DMAR: pasid dir entry: 0x0000000101b4e001 DMAR: pasid table entry[0]: 0x0000000000000109 DMAR: pasid table entry[1]: 0x0000000000000001 DMAR: pasid table entry[2]: 0x0000000000000000 DMAR: pasid table entry[3]: 0x0000000000000000 DMAR: pasid table entry[4]: 0x0000000000000000 DMAR: pasid table entry[5]: 0x0000000000000000 DMAR: pasid table entry[6]: 0x0000000000000000 DMAR: pasid table entry[7]: 0x0000000000000000 DMAR: PTE not present at level 4 Cc: Fixes: 0bbeb01a4faf ("iommu/vt-d: Manage scalalble mode PASID tables") Reviewed-by: Kevin Tian Reported-by: Sukumar Ghorai Signed-off-by: Ashok Raj Signed-off-by: Jacob Pan Link: https://lore.kernel.org/r/20230209212843.1788125-1-jacob.jun.pan@linux.intel.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- drivers/iommu/intel-pasid.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/iommu/intel-pasid.c b/drivers/iommu/intel-pasid.c index e7cb0b8a7332..58f060006ba3 100644 --- a/drivers/iommu/intel-pasid.c +++ b/drivers/iommu/intel-pasid.c @@ -166,6 +166,9 @@ int intel_pasid_alloc_table(struct device *dev) attach_out: device_attach_pasid_table(info, pasid_table); + if (!ecap_coherent(info->iommu->ecap)) + clflush_cache_range(pasid_table->table, size); + return 0; } @@ -250,6 +253,10 @@ struct pasid_entry *intel_pasid_get_entry(struct device *dev, int pasid) WRITE_ONCE(dir[dir_index].val, (u64)virt_to_phys(entries) | PASID_PTE_PRESENT); + if (!ecap_coherent(info->iommu->ecap)) { + clflush_cache_range(entries, VTD_PAGE_SIZE); + clflush_cache_range(&dir[dir_index].val, sizeof(*dir)); + } } spin_unlock(&pasid_lock); From a6e44cb21534d2377b8c74a2436d365d38145198 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Mon, 3 Feb 2020 21:46:43 +0200 Subject: [PATCH 21/55] SMB3: Backup intent flag missing from some more ops [ Upstream commit 0f060936e490c6279dfe773d75d526d3d3d77111 ] When "backup intent" is requested on the mount (e.g. backupuid or backupgid mount options), the corresponding flag was missing from some of the operations. Change all operations to use the macro cifs_create_options() to set the backup intent flag if needed. Signed-off-by: Amir Goldstein Signed-off-by: Steve French Stable-dep-of: d447e794a372 ("cifs: Fix uninitialized memory read in smb3_qfs_tcon()") Signed-off-by: Sasha Levin --- fs/cifs/cifsacl.c | 14 +++----- fs/cifs/cifsfs.c | 2 +- fs/cifs/cifsglob.h | 6 ++-- fs/cifs/cifsproto.h | 8 +++++ fs/cifs/connect.c | 2 +- fs/cifs/dir.c | 5 +-- fs/cifs/file.c | 10 ++---- fs/cifs/inode.c | 8 ++--- fs/cifs/ioctl.c | 2 +- fs/cifs/link.c | 18 +++------- fs/cifs/smb1ops.c | 19 +++++------ fs/cifs/smb2inode.c | 9 ++--- fs/cifs/smb2ops.c | 81 +++++++++++++++------------------------------ fs/cifs/smb2proto.h | 2 +- 14 files changed, 68 insertions(+), 118 deletions(-) diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index 1f55072aa302..1766d6d077f2 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c @@ -1056,7 +1056,7 @@ static struct cifs_ntsd *get_cifs_acl_by_path(struct cifs_sb_info *cifs_sb, struct cifs_ntsd *pntsd = NULL; int oplock = 0; unsigned int xid; - int rc, create_options = 0; + int rc; struct cifs_tcon *tcon; struct tcon_link *tlink = cifs_sb_tlink(cifs_sb); struct cifs_fid fid; @@ -1068,13 +1068,10 @@ static struct cifs_ntsd *get_cifs_acl_by_path(struct cifs_sb_info *cifs_sb, tcon = tlink_tcon(tlink); xid = get_xid(); - if (backup_cred(cifs_sb)) - create_options |= CREATE_OPEN_BACKUP_INTENT; - oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = READ_CONTROL; - oparms.create_options = create_options; + oparms.create_options = cifs_create_options(cifs_sb, 0); oparms.disposition = FILE_OPEN; oparms.path = path; oparms.fid = &fid; @@ -1119,7 +1116,7 @@ int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen, { int oplock = 0; unsigned int xid; - int rc, access_flags, create_options = 0; + int rc, access_flags; struct cifs_tcon *tcon; struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); struct tcon_link *tlink = cifs_sb_tlink(cifs_sb); @@ -1132,9 +1129,6 @@ int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen, tcon = tlink_tcon(tlink); xid = get_xid(); - if (backup_cred(cifs_sb)) - create_options |= CREATE_OPEN_BACKUP_INTENT; - if (aclflag == CIFS_ACL_OWNER || aclflag == CIFS_ACL_GROUP) access_flags = WRITE_OWNER; else @@ -1143,7 +1137,7 @@ int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen, oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = access_flags; - oparms.create_options = create_options; + oparms.create_options = cifs_create_options(cifs_sb, 0); oparms.disposition = FILE_OPEN; oparms.path = path; oparms.fid = &fid; diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index aa7827da7b17..871a7b044c1b 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -275,7 +275,7 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_ffree = 0; /* unlimited */ if (server->ops->queryfs) - rc = server->ops->queryfs(xid, tcon, buf); + rc = server->ops->queryfs(xid, tcon, cifs_sb, buf); free_xid(xid); return rc; diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 7c0eb110e263..253321adc266 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -300,7 +300,8 @@ struct smb_version_operations { const char *, struct dfs_info3_param **, unsigned int *, const struct nls_table *, int); /* informational QFS call */ - void (*qfs_tcon)(const unsigned int, struct cifs_tcon *); + void (*qfs_tcon)(const unsigned int, struct cifs_tcon *, + struct cifs_sb_info *); /* check if a path is accessible or not */ int (*is_path_accessible)(const unsigned int, struct cifs_tcon *, struct cifs_sb_info *, const char *); @@ -408,7 +409,7 @@ struct smb_version_operations { struct cifsInodeInfo *); /* query remote filesystem */ int (*queryfs)(const unsigned int, struct cifs_tcon *, - struct kstatfs *); + struct cifs_sb_info *, struct kstatfs *); /* send mandatory brlock to the server */ int (*mand_lock)(const unsigned int, struct cifsFileInfo *, __u64, __u64, __u32, int, int, bool); @@ -489,6 +490,7 @@ struct smb_version_operations { /* ioctl passthrough for query_info */ int (*ioctl_query_info)(const unsigned int xid, struct cifs_tcon *tcon, + struct cifs_sb_info *cifs_sb, __le16 *path, int is_dir, unsigned long p); /* make unix special files (block, char, fifo, socket) */ diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 56a4740ae93a..a5fab9afd699 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -600,4 +600,12 @@ static inline int get_dfs_path(const unsigned int xid, struct cifs_ses *ses, } #endif +static inline int cifs_create_options(struct cifs_sb_info *cifs_sb, int options) +{ + if (backup_cred(cifs_sb)) + return options | CREATE_OPEN_BACKUP_INTENT; + else + return options; +} + #endif /* _CIFSPROTO_H */ diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 25a2a98ebda8..6c8dd7c0b83a 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -4319,7 +4319,7 @@ static int mount_get_conns(struct smb_vol *vol, struct cifs_sb_info *cifs_sb, /* do not care if a following call succeed - informational */ if (!tcon->pipe && server->ops->qfs_tcon) { - server->ops->qfs_tcon(*xid, tcon); + server->ops->qfs_tcon(*xid, tcon, cifs_sb); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RO_CACHE) { if (tcon->fsDevInfo.DeviceCharacteristics & cpu_to_le32(FILE_READ_ONLY_DEVICE)) diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 9ae9a514676c..548047a709bf 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -357,13 +357,10 @@ cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid, if (!tcon->unix_ext && (mode & S_IWUGO) == 0) create_options |= CREATE_OPTION_READONLY; - if (backup_cred(cifs_sb)) - create_options |= CREATE_OPEN_BACKUP_INTENT; - oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = desired_access; - oparms.create_options = create_options; + oparms.create_options = cifs_create_options(cifs_sb, create_options); oparms.disposition = disposition; oparms.path = full_path; oparms.fid = fid; diff --git a/fs/cifs/file.c b/fs/cifs/file.c index eb9b34442b1d..86924831fd4b 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -223,9 +223,6 @@ cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb, if (!buf) return -ENOMEM; - if (backup_cred(cifs_sb)) - create_options |= CREATE_OPEN_BACKUP_INTENT; - /* O_SYNC also has bit for O_DSYNC so following check picks up either */ if (f_flags & O_SYNC) create_options |= CREATE_WRITE_THROUGH; @@ -236,7 +233,7 @@ cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb, oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = desired_access; - oparms.create_options = create_options; + oparms.create_options = cifs_create_options(cifs_sb, create_options); oparms.disposition = disposition; oparms.path = full_path; oparms.fid = fid; @@ -751,9 +748,6 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush) desired_access = cifs_convert_flags(cfile->f_flags); - if (backup_cred(cifs_sb)) - create_options |= CREATE_OPEN_BACKUP_INTENT; - /* O_SYNC also has bit for O_DSYNC so following check picks up either */ if (cfile->f_flags & O_SYNC) create_options |= CREATE_WRITE_THROUGH; @@ -767,7 +761,7 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush) oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = desired_access; - oparms.create_options = create_options; + oparms.create_options = cifs_create_options(cifs_sb, create_options); oparms.disposition = disposition; oparms.path = full_path; oparms.fid = &cfile->fid; diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index fd9e289f3e72..af0980c720c7 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -472,9 +472,7 @@ cifs_sfu_type(struct cifs_fattr *fattr, const char *path, oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = GENERIC_READ; - oparms.create_options = CREATE_NOT_DIR; - if (backup_cred(cifs_sb)) - oparms.create_options |= CREATE_OPEN_BACKUP_INTENT; + oparms.create_options = cifs_create_options(cifs_sb, CREATE_NOT_DIR); oparms.disposition = FILE_OPEN; oparms.path = path; oparms.fid = &fid; @@ -1225,7 +1223,7 @@ cifs_rename_pending_delete(const char *full_path, struct dentry *dentry, oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = DELETE | FILE_WRITE_ATTRIBUTES; - oparms.create_options = CREATE_NOT_DIR; + oparms.create_options = cifs_create_options(cifs_sb, CREATE_NOT_DIR); oparms.disposition = FILE_OPEN; oparms.path = full_path; oparms.fid = &fid; @@ -1763,7 +1761,7 @@ cifs_do_rename(const unsigned int xid, struct dentry *from_dentry, oparms.cifs_sb = cifs_sb; /* open the file to be renamed -- we need DELETE perms */ oparms.desired_access = DELETE; - oparms.create_options = CREATE_NOT_DIR; + oparms.create_options = cifs_create_options(cifs_sb, CREATE_NOT_DIR); oparms.disposition = FILE_OPEN; oparms.path = from_path; oparms.fid = &fid; diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c index 9266dddd4b1e..bc08d856ee05 100644 --- a/fs/cifs/ioctl.c +++ b/fs/cifs/ioctl.c @@ -65,7 +65,7 @@ static long cifs_ioctl_query_info(unsigned int xid, struct file *filep, if (tcon->ses->server->ops->ioctl_query_info) rc = tcon->ses->server->ops->ioctl_query_info( - xid, tcon, utf16_path, + xid, tcon, cifs_sb, utf16_path, filep->private_data ? 0 : 1, p); else rc = -EOPNOTSUPP; diff --git a/fs/cifs/link.c b/fs/cifs/link.c index b4b15d611ded..02949a2f2860 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c @@ -318,7 +318,7 @@ cifs_query_mf_symlink(unsigned int xid, struct cifs_tcon *tcon, oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = GENERIC_READ; - oparms.create_options = CREATE_NOT_DIR; + oparms.create_options = cifs_create_options(cifs_sb, CREATE_NOT_DIR); oparms.disposition = FILE_OPEN; oparms.path = path; oparms.fid = &fid; @@ -356,15 +356,11 @@ cifs_create_mf_symlink(unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid fid; struct cifs_open_parms oparms; struct cifs_io_parms io_parms; - int create_options = CREATE_NOT_DIR; - - if (backup_cred(cifs_sb)) - create_options |= CREATE_OPEN_BACKUP_INTENT; oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = GENERIC_WRITE; - oparms.create_options = create_options; + oparms.create_options = cifs_create_options(cifs_sb, CREATE_NOT_DIR); oparms.disposition = FILE_CREATE; oparms.path = path; oparms.fid = &fid; @@ -405,9 +401,7 @@ smb3_query_mf_symlink(unsigned int xid, struct cifs_tcon *tcon, oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = GENERIC_READ; - oparms.create_options = CREATE_NOT_DIR; - if (backup_cred(cifs_sb)) - oparms.create_options |= CREATE_OPEN_BACKUP_INTENT; + oparms.create_options = cifs_create_options(cifs_sb, CREATE_NOT_DIR); oparms.disposition = FILE_OPEN; oparms.fid = &fid; oparms.reconnect = false; @@ -460,14 +454,10 @@ smb3_create_mf_symlink(unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid fid; struct cifs_open_parms oparms; struct cifs_io_parms io_parms; - int create_options = CREATE_NOT_DIR; __le16 *utf16_path; __u8 oplock = SMB2_OPLOCK_LEVEL_NONE; struct kvec iov[2]; - if (backup_cred(cifs_sb)) - create_options |= CREATE_OPEN_BACKUP_INTENT; - cifs_dbg(FYI, "%s: path: %s\n", __func__, path); utf16_path = cifs_convert_path_to_utf16(path, cifs_sb); @@ -477,7 +467,7 @@ smb3_create_mf_symlink(unsigned int xid, struct cifs_tcon *tcon, oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = GENERIC_WRITE; - oparms.create_options = create_options; + oparms.create_options = cifs_create_options(cifs_sb, CREATE_NOT_DIR); oparms.disposition = FILE_CREATE; oparms.fid = &fid; oparms.reconnect = false; diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index e523c05a4487..b130efaf8feb 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -504,7 +504,8 @@ cifs_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *volume_info) } static void -cifs_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon) +cifs_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon, + struct cifs_sb_info *cifs_sb) { CIFSSMBQFSDeviceInfo(xid, tcon); CIFSSMBQFSAttributeInfo(xid, tcon); @@ -565,7 +566,7 @@ cifs_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = FILE_READ_ATTRIBUTES; - oparms.create_options = 0; + oparms.create_options = cifs_create_options(cifs_sb, 0); oparms.disposition = FILE_OPEN; oparms.path = full_path; oparms.fid = &fid; @@ -793,7 +794,7 @@ smb_set_file_info(struct inode *inode, const char *full_path, oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = SYNCHRONIZE | FILE_WRITE_ATTRIBUTES; - oparms.create_options = CREATE_NOT_DIR; + oparms.create_options = cifs_create_options(cifs_sb, CREATE_NOT_DIR); oparms.disposition = FILE_OPEN; oparms.path = full_path; oparms.fid = &fid; @@ -872,7 +873,7 @@ cifs_oplock_response(struct cifs_tcon *tcon, struct cifs_fid *fid, static int cifs_queryfs(const unsigned int xid, struct cifs_tcon *tcon, - struct kstatfs *buf) + struct cifs_sb_info *cifs_sb, struct kstatfs *buf) { int rc = -EOPNOTSUPP; @@ -970,7 +971,8 @@ cifs_query_symlink(const unsigned int xid, struct cifs_tcon *tcon, oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = FILE_READ_ATTRIBUTES; - oparms.create_options = OPEN_REPARSE_POINT; + oparms.create_options = cifs_create_options(cifs_sb, + OPEN_REPARSE_POINT); oparms.disposition = FILE_OPEN; oparms.path = full_path; oparms.fid = &fid; @@ -1029,7 +1031,6 @@ cifs_make_node(unsigned int xid, struct inode *inode, struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); struct inode *newinode = NULL; int rc = -EPERM; - int create_options = CREATE_NOT_DIR | CREATE_OPTION_SPECIAL; FILE_ALL_INFO *buf = NULL; struct cifs_io_parms io_parms; __u32 oplock = 0; @@ -1090,13 +1091,11 @@ cifs_make_node(unsigned int xid, struct inode *inode, goto out; } - if (backup_cred(cifs_sb)) - create_options |= CREATE_OPEN_BACKUP_INTENT; - oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = GENERIC_WRITE; - oparms.create_options = create_options; + oparms.create_options = cifs_create_options(cifs_sb, CREATE_NOT_DIR | + CREATE_OPTION_SPECIAL); oparms.disposition = FILE_CREATE; oparms.path = full_path; oparms.fid = &fid; diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c index f2a6f7f28340..c9abda93d65b 100644 --- a/fs/cifs/smb2inode.c +++ b/fs/cifs/smb2inode.c @@ -98,9 +98,7 @@ smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon, oparms.tcon = tcon; oparms.desired_access = desired_access; oparms.disposition = create_disposition; - oparms.create_options = create_options; - if (backup_cred(cifs_sb)) - oparms.create_options |= CREATE_OPEN_BACKUP_INTENT; + oparms.create_options = cifs_create_options(cifs_sb, create_options); oparms.fid = &fid; oparms.reconnect = false; oparms.mode = mode; @@ -456,7 +454,7 @@ smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, /* If it is a root and its handle is cached then use it */ if (!strlen(full_path) && !no_cached_open) { - rc = open_shroot(xid, tcon, &fid); + rc = open_shroot(xid, tcon, cifs_sb, &fid); if (rc) goto out; @@ -473,9 +471,6 @@ smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, goto out; } - if (backup_cred(cifs_sb)) - create_options |= CREATE_OPEN_BACKUP_INTENT; - cifs_get_readable_path(tcon, full_path, &cfile); rc = smb2_compound_op(xid, tcon, cifs_sb, full_path, FILE_READ_ATTRIBUTES, FILE_OPEN, create_options, diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 944c575a4a70..a3bb2c7468c7 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -644,7 +644,8 @@ smb2_cached_lease_break(struct work_struct *work) /* * Open the directory at the root of a share */ -int open_shroot(unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid *pfid) +int open_shroot(unsigned int xid, struct cifs_tcon *tcon, + struct cifs_sb_info *cifs_sb, struct cifs_fid *pfid) { struct cifs_ses *ses = tcon->ses; struct TCP_Server_Info *server = ses->server; @@ -696,7 +697,7 @@ int open_shroot(unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid *pfid) rqst[0].rq_nvec = SMB2_CREATE_IOV_SIZE; oparms.tcon = tcon; - oparms.create_options = 0; + oparms.create_options = cifs_create_options(cifs_sb, 0); oparms.desired_access = FILE_READ_ATTRIBUTES; oparms.disposition = FILE_OPEN; oparms.fid = pfid; @@ -812,7 +813,8 @@ int open_shroot(unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid *pfid) } static void -smb3_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon) +smb3_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon, + struct cifs_sb_info *cifs_sb) { int rc; __le16 srch_path = 0; /* Null - open root of share */ @@ -824,7 +826,7 @@ smb3_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon) oparms.tcon = tcon; oparms.desired_access = FILE_READ_ATTRIBUTES; oparms.disposition = FILE_OPEN; - oparms.create_options = 0; + oparms.create_options = cifs_create_options(cifs_sb, 0); oparms.fid = &fid; oparms.reconnect = false; @@ -832,7 +834,7 @@ smb3_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon) rc = SMB2_open(xid, &oparms, &srch_path, &oplock, NULL, NULL, NULL); else - rc = open_shroot(xid, tcon, &fid); + rc = open_shroot(xid, tcon, cifs_sb, &fid); if (rc) return; @@ -854,7 +856,8 @@ smb3_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon) } static void -smb2_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon) +smb2_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon, + struct cifs_sb_info *cifs_sb) { int rc; __le16 srch_path = 0; /* Null - open root of share */ @@ -865,7 +868,7 @@ smb2_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon) oparms.tcon = tcon; oparms.desired_access = FILE_READ_ATTRIBUTES; oparms.disposition = FILE_OPEN; - oparms.create_options = 0; + oparms.create_options = cifs_create_options(cifs_sb, 0); oparms.fid = &fid; oparms.reconnect = false; @@ -900,10 +903,7 @@ smb2_is_path_accessible(const unsigned int xid, struct cifs_tcon *tcon, oparms.tcon = tcon; oparms.desired_access = FILE_READ_ATTRIBUTES; oparms.disposition = FILE_OPEN; - if (backup_cred(cifs_sb)) - oparms.create_options = CREATE_OPEN_BACKUP_INTENT; - else - oparms.create_options = 0; + oparms.create_options = cifs_create_options(cifs_sb, 0); oparms.fid = &fid; oparms.reconnect = false; @@ -1179,10 +1179,7 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon, oparms.tcon = tcon; oparms.desired_access = FILE_WRITE_EA; oparms.disposition = FILE_OPEN; - if (backup_cred(cifs_sb)) - oparms.create_options = CREATE_OPEN_BACKUP_INTENT; - else - oparms.create_options = 0; + oparms.create_options = cifs_create_options(cifs_sb, 0); oparms.fid = &fid; oparms.reconnect = false; @@ -1414,6 +1411,7 @@ SMB2_request_res_key(const unsigned int xid, struct cifs_tcon *tcon, static int smb2_ioctl_query_info(const unsigned int xid, struct cifs_tcon *tcon, + struct cifs_sb_info *cifs_sb, __le16 *path, int is_dir, unsigned long p) { @@ -1439,6 +1437,7 @@ smb2_ioctl_query_info(const unsigned int xid, struct kvec close_iov[1]; unsigned int size[2]; void *data[2]; + int create_options = is_dir ? CREATE_NOT_FILE : CREATE_NOT_DIR; memset(rqst, 0, sizeof(rqst)); resp_buftype[0] = resp_buftype[1] = resp_buftype[2] = CIFS_NO_BUFFER; @@ -1474,10 +1473,7 @@ smb2_ioctl_query_info(const unsigned int xid, memset(&oparms, 0, sizeof(oparms)); oparms.tcon = tcon; oparms.disposition = FILE_OPEN; - if (is_dir) - oparms.create_options = CREATE_NOT_FILE; - else - oparms.create_options = CREATE_NOT_DIR; + oparms.create_options = cifs_create_options(cifs_sb, create_options); oparms.fid = &fid; oparms.reconnect = false; @@ -2074,10 +2070,7 @@ smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon, oparms.tcon = tcon; oparms.desired_access = FILE_READ_ATTRIBUTES | FILE_READ_DATA; oparms.disposition = FILE_OPEN; - if (backup_cred(cifs_sb)) - oparms.create_options = CREATE_OPEN_BACKUP_INTENT; - else - oparms.create_options = 0; + oparms.create_options = cifs_create_options(cifs_sb, 0); oparms.fid = fid; oparms.reconnect = false; @@ -2278,10 +2271,7 @@ smb2_query_info_compound(const unsigned int xid, struct cifs_tcon *tcon, oparms.tcon = tcon; oparms.desired_access = desired_access; oparms.disposition = FILE_OPEN; - if (cifs_sb && backup_cred(cifs_sb)) - oparms.create_options = CREATE_OPEN_BACKUP_INTENT; - else - oparms.create_options = 0; + oparms.create_options = cifs_create_options(cifs_sb, 0); oparms.fid = &fid; oparms.reconnect = false; @@ -2337,7 +2327,7 @@ smb2_query_info_compound(const unsigned int xid, struct cifs_tcon *tcon, static int smb2_queryfs(const unsigned int xid, struct cifs_tcon *tcon, - struct kstatfs *buf) + struct cifs_sb_info *cifs_sb, struct kstatfs *buf) { struct smb2_query_info_rsp *rsp; struct smb2_fs_full_size_info *info = NULL; @@ -2374,7 +2364,7 @@ smb2_queryfs(const unsigned int xid, struct cifs_tcon *tcon, static int smb311_queryfs(const unsigned int xid, struct cifs_tcon *tcon, - struct kstatfs *buf) + struct cifs_sb_info *cifs_sb, struct kstatfs *buf) { int rc; __le16 srch_path = 0; /* Null - open root of share */ @@ -2383,12 +2373,12 @@ smb311_queryfs(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid fid; if (!tcon->posix_extensions) - return smb2_queryfs(xid, tcon, buf); + return smb2_queryfs(xid, tcon, cifs_sb, buf); oparms.tcon = tcon; oparms.desired_access = FILE_READ_ATTRIBUTES; oparms.disposition = FILE_OPEN; - oparms.create_options = 0; + oparms.create_options = cifs_create_options(cifs_sb, 0); oparms.fid = &fid; oparms.reconnect = false; @@ -2657,6 +2647,7 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon, struct smb2_create_rsp *create_rsp; struct smb2_ioctl_rsp *ioctl_rsp; struct reparse_data_buffer *reparse_buf; + int create_options = is_reparse_point ? OPEN_REPARSE_POINT : 0; u32 plen; cifs_dbg(FYI, "%s: path: %s\n", __func__, full_path); @@ -2683,14 +2674,7 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon, oparms.tcon = tcon; oparms.desired_access = FILE_READ_ATTRIBUTES; oparms.disposition = FILE_OPEN; - - if (backup_cred(cifs_sb)) - oparms.create_options = CREATE_OPEN_BACKUP_INTENT; - else - oparms.create_options = 0; - if (is_reparse_point) - oparms.create_options = OPEN_REPARSE_POINT; - + oparms.create_options = cifs_create_options(cifs_sb, create_options); oparms.fid = &fid; oparms.reconnect = false; @@ -2869,11 +2853,6 @@ get_smb2_acl_by_path(struct cifs_sb_info *cifs_sb, tcon = tlink_tcon(tlink); xid = get_xid(); - if (backup_cred(cifs_sb)) - oparms.create_options = CREATE_OPEN_BACKUP_INTENT; - else - oparms.create_options = 0; - utf16_path = cifs_convert_path_to_utf16(path, cifs_sb); if (!utf16_path) { rc = -ENOMEM; @@ -2884,6 +2863,7 @@ get_smb2_acl_by_path(struct cifs_sb_info *cifs_sb, oparms.tcon = tcon; oparms.desired_access = READ_CONTROL; oparms.disposition = FILE_OPEN; + oparms.create_options = cifs_create_options(cifs_sb, 0); oparms.fid = &fid; oparms.reconnect = false; @@ -2925,11 +2905,6 @@ set_smb2_acl(struct cifs_ntsd *pnntsd, __u32 acllen, tcon = tlink_tcon(tlink); xid = get_xid(); - if (backup_cred(cifs_sb)) - oparms.create_options = CREATE_OPEN_BACKUP_INTENT; - else - oparms.create_options = 0; - if (aclflag == CIFS_ACL_OWNER || aclflag == CIFS_ACL_GROUP) access_flags = WRITE_OWNER; else @@ -2944,6 +2919,7 @@ set_smb2_acl(struct cifs_ntsd *pnntsd, __u32 acllen, oparms.tcon = tcon; oparms.desired_access = access_flags; + oparms.create_options = cifs_create_options(cifs_sb, 0); oparms.disposition = FILE_OPEN; oparms.path = path; oparms.fid = &fid; @@ -4481,7 +4457,6 @@ smb2_make_node(unsigned int xid, struct inode *inode, { struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); int rc = -EPERM; - int create_options = CREATE_NOT_DIR | CREATE_OPTION_SPECIAL; FILE_ALL_INFO *buf = NULL; struct cifs_io_parms io_parms; __u32 oplock = 0; @@ -4517,13 +4492,11 @@ smb2_make_node(unsigned int xid, struct inode *inode, goto out; } - if (backup_cred(cifs_sb)) - create_options |= CREATE_OPEN_BACKUP_INTENT; - oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = GENERIC_WRITE; - oparms.create_options = create_options; + oparms.create_options = cifs_create_options(cifs_sb, CREATE_NOT_DIR | + CREATE_OPTION_SPECIAL); oparms.disposition = FILE_CREATE; oparms.path = full_path; oparms.fid = &fid; diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index 57f7075a3587..4d4c0faa3d8a 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -67,7 +67,7 @@ extern int smb3_handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid); extern int open_shroot(unsigned int xid, struct cifs_tcon *tcon, - struct cifs_fid *pfid); + struct cifs_sb_info *cifs_sb, struct cifs_fid *pfid); extern void close_shroot(struct cached_fid *cfid); extern void move_smb2_info_to_cifs(FILE_ALL_INFO *dst, struct smb2_file_all_info *src); From 57f78226b12753f5c6ce2a29e0f4a259c6282c82 Mon Sep 17 00:00:00 2001 From: Volker Lendecke Date: Wed, 11 Jan 2023 12:37:58 +0100 Subject: [PATCH 22/55] cifs: Fix uninitialized memory read in smb3_qfs_tcon() [ Upstream commit d447e794a37288ec7a080aa1b044a8d9deebbab7 ] oparms was not fully initialized Signed-off-by: Volker Lendecke Reviewed-by: Paulo Alcantara (SUSE) Cc: stable@vger.kernel.org Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/cifs/smb2ops.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index a3bb2c7468c7..4cb0ebe7330e 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -823,12 +823,13 @@ smb3_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid fid; bool no_cached_open = tcon->nohandlecache; - oparms.tcon = tcon; - oparms.desired_access = FILE_READ_ATTRIBUTES; - oparms.disposition = FILE_OPEN; - oparms.create_options = cifs_create_options(cifs_sb, 0); - oparms.fid = &fid; - oparms.reconnect = false; + oparms = (struct cifs_open_parms) { + .tcon = tcon, + .desired_access = FILE_READ_ATTRIBUTES, + .disposition = FILE_OPEN, + .create_options = cifs_create_options(cifs_sb, 0), + .fid = &fid, + }; if (no_cached_open) rc = SMB2_open(xid, &oparms, &srch_path, &oplock, NULL, NULL, From 891a3cba425cf483d96facca55aebd6ff1da4338 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 10 Feb 2023 12:52:00 -0800 Subject: [PATCH 23/55] scsi: core: Remove the /proc/scsi/${proc_name} directory earlier [ Upstream commit fc663711b94468f4e1427ebe289c9f05669699c9 ] Remove the /proc/scsi/${proc_name} directory earlier to fix a race condition between unloading and reloading kernel modules. This fixes a bug introduced in 2009 by commit 77c019768f06 ("[SCSI] fix /proc memory leak in the SCSI core"). Fix the following kernel warning: proc_dir_entry 'scsi/scsi_debug' already registered WARNING: CPU: 19 PID: 27986 at fs/proc/generic.c:376 proc_register+0x27d/0x2e0 Call Trace: proc_mkdir+0xb5/0xe0 scsi_proc_hostdir_add+0xb5/0x170 scsi_host_alloc+0x683/0x6c0 sdebug_driver_probe+0x6b/0x2d0 [scsi_debug] really_probe+0x159/0x540 __driver_probe_device+0xdc/0x230 driver_probe_device+0x4f/0x120 __device_attach_driver+0xef/0x180 bus_for_each_drv+0xe5/0x130 __device_attach+0x127/0x290 device_initial_probe+0x17/0x20 bus_probe_device+0x110/0x130 device_add+0x673/0xc80 device_register+0x1e/0x30 sdebug_add_host_helper+0x1a7/0x3b0 [scsi_debug] scsi_debug_init+0x64f/0x1000 [scsi_debug] do_one_initcall+0xd7/0x470 do_init_module+0xe7/0x330 load_module+0x122a/0x12c0 __do_sys_finit_module+0x124/0x1a0 __x64_sys_finit_module+0x46/0x50 do_syscall_64+0x38/0x80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 Link: https://lore.kernel.org/r/20230210205200.36973-3-bvanassche@acm.org Cc: Alan Stern Cc: Yi Zhang Cc: stable@vger.kernel.org Fixes: 77c019768f06 ("[SCSI] fix /proc memory leak in the SCSI core") Reported-by: Yi Zhang Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/hosts.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c index 45885e80992f..b08d963013db 100644 --- a/drivers/scsi/hosts.c +++ b/drivers/scsi/hosts.c @@ -179,6 +179,7 @@ void scsi_remove_host(struct Scsi_Host *shost) scsi_forget_host(shost); mutex_unlock(&shost->scan_mutex); scsi_proc_host_rm(shost); + scsi_proc_hostdir_rm(shost->hostt); spin_lock_irqsave(shost->host_lock, flags); if (scsi_host_set_state(shost, SHOST_DEL)) @@ -318,6 +319,7 @@ static void scsi_host_dev_release(struct device *dev) struct Scsi_Host *shost = dev_to_shost(dev); struct device *parent = dev->parent; + /* In case scsi_remove_host() has not been called. */ scsi_proc_hostdir_rm(shost->hostt); /* Wait for functions invoked through call_rcu(&shost->rcu, ...) */ From 8dac5a63cf79707b547ea3d425fead5f4482198f Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 26 Jan 2023 12:22:21 +0100 Subject: [PATCH 24/55] ext4: Fix possible corruption when moving a directory [ Upstream commit 0813299c586b175d7edb25f56412c54b812d0379 ] When we are renaming a directory to a different directory, we need to update '..' entry in the moved directory. However nothing prevents moved directory from being modified and even converted from the inline format to the normal format. When such race happens the rename code gets confused and we crash. Fix the problem by locking the moved directory. CC: stable@vger.kernel.org Fixes: 32f7f22c0b52 ("ext4: let ext4_rename handle inline dir") Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20230126112221.11866-1-jack@suse.cz Signed-off-by: Theodore Ts'o Signed-off-by: Sasha Levin --- fs/ext4/namei.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 30c37ef8b8af..f9d11f59df7d 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -3855,9 +3855,16 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, if (new.dir != old.dir && EXT4_DIR_LINK_MAX(new.dir)) goto end_rename; } + /* + * We need to protect against old.inode directory getting + * converted from inline directory format into a normal one. + */ + inode_lock_nested(old.inode, I_MUTEX_NONDIR2); retval = ext4_rename_dir_prepare(handle, &old); - if (retval) + if (retval) { + inode_unlock(old.inode); goto end_rename; + } } /* * If we're renaming a file within an inline_data dir and adding or @@ -3953,6 +3960,8 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, } else { ext4_journal_stop(handle); } + if (old.dir_bh) + inode_unlock(old.inode); release_bh: brelse(old.dir_bh); brelse(old.bh); From 43f33642f260cebc0d9a4da8a4a2ab48bf293741 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Tue, 14 Feb 2023 05:09:53 +0300 Subject: [PATCH 25/55] drm/msm/a5xx: fix setting of the CP_PREEMPT_ENABLE_LOCAL register [ Upstream commit a7a4c19c36de1e4b99b06e4060ccc8ab837725bc ] Rather than writing CP_PREEMPT_ENABLE_GLOBAL twice, follow the vendor kernel and set CP_PREEMPT_ENABLE_LOCAL register instead. a5xx_submit() will override it during submission, but let's get the sequence correct. Fixes: b1fc2839d2f9 ("drm/msm: Implement preemption for A5XX targets") Signed-off-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/522638/ Link: https://lore.kernel.org/r/20230214020956.164473-2-dmitry.baryshkov@linaro.org Signed-off-by: Rob Clark Signed-off-by: Sasha Levin --- drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c index e3579e5ffa14..593b8d83179c 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c @@ -135,8 +135,8 @@ static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, OUT_RING(ring, 1); /* Enable local preemption for finegrain preemption */ - OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1); - OUT_RING(ring, 0x02); + OUT_PKT7(ring, CP_PREEMPT_ENABLE_LOCAL, 1); + OUT_RING(ring, 0x1); /* Allow CP_CONTEXT_SWITCH_YIELD packets in the IB2 */ OUT_PKT7(ring, CP_YIELD_ENABLE, 1); From 0a3664a1058d4b2b1ea2112cc275ca47fba7fc08 Mon Sep 17 00:00:00 2001 From: Kang Chen Date: Mon, 27 Feb 2023 17:30:37 +0800 Subject: [PATCH 26/55] nfc: fdp: add null check of devm_kmalloc_array in fdp_nci_i2c_read_device_properties [ Upstream commit 11f180a5d62a51b484e9648f9b310e1bd50b1a57 ] devm_kmalloc_array may fails, *fw_vsc_cfg might be null and cause out-of-bounds write in device_property_read_u8_array later. Fixes: a06347c04c13 ("NFC: Add Intel Fields Peak NFC solution driver") Signed-off-by: Kang Chen Reviewed-by: Krzysztof Kozlowski Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230227093037.907654-1-void0red@gmail.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/nfc/fdp/i2c.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/nfc/fdp/i2c.c b/drivers/nfc/fdp/i2c.c index ad0abb1f0bae..7305f1a06e97 100644 --- a/drivers/nfc/fdp/i2c.c +++ b/drivers/nfc/fdp/i2c.c @@ -255,6 +255,9 @@ static void fdp_nci_i2c_read_device_properties(struct device *dev, len, sizeof(**fw_vsc_cfg), GFP_KERNEL); + if (!*fw_vsc_cfg) + goto alloc_err; + r = device_property_read_u8_array(dev, FDP_DP_FW_VSC_CFG_NAME, *fw_vsc_cfg, len); @@ -268,6 +271,7 @@ static void fdp_nci_i2c_read_device_properties(struct device *dev, *fw_vsc_cfg = NULL; } +alloc_err: dev_dbg(dev, "Clock type: %d, clock frequency: %d, VSC: %s", *clock_type, *clock_freq, *fw_vsc_cfg != NULL ? "yes" : "no"); } From 783f218940b3c7b872e4111d0145000f26ecbdf6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 27 Feb 2023 15:30:24 +0000 Subject: [PATCH 27/55] ila: do not generate empty messages in ila_xlat_nl_cmd_get_mapping() [ Upstream commit 693aa2c0d9b6d5b1f2745d31b6e70d09dbbaf06e ] ila_xlat_nl_cmd_get_mapping() generates an empty skb, triggerring a recent sanity check [1]. Instead, return an error code, so that user space can get it. [1] skb_assert_len WARNING: CPU: 0 PID: 5923 at include/linux/skbuff.h:2527 skb_assert_len include/linux/skbuff.h:2527 [inline] WARNING: CPU: 0 PID: 5923 at include/linux/skbuff.h:2527 __dev_queue_xmit+0x1bc0/0x3488 net/core/dev.c:4156 Modules linked in: CPU: 0 PID: 5923 Comm: syz-executor269 Not tainted 6.2.0-syzkaller-18300-g2ebd1fbb946d #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/21/2023 pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : skb_assert_len include/linux/skbuff.h:2527 [inline] pc : __dev_queue_xmit+0x1bc0/0x3488 net/core/dev.c:4156 lr : skb_assert_len include/linux/skbuff.h:2527 [inline] lr : __dev_queue_xmit+0x1bc0/0x3488 net/core/dev.c:4156 sp : ffff80001e0d6c40 x29: ffff80001e0d6e60 x28: dfff800000000000 x27: ffff0000c86328c0 x26: dfff800000000000 x25: ffff0000c8632990 x24: ffff0000c8632a00 x23: 0000000000000000 x22: 1fffe000190c6542 x21: ffff0000c8632a10 x20: ffff0000c8632a00 x19: ffff80001856e000 x18: ffff80001e0d5fc0 x17: 0000000000000000 x16: ffff80001235d16c x15: 0000000000000000 x14: 0000000000000000 x13: 0000000000000001 x12: 0000000000000001 x11: ff80800008353a30 x10: 0000000000000000 x9 : 21567eaf25bfb600 x8 : 21567eaf25bfb600 x7 : 0000000000000001 x6 : 0000000000000001 x5 : ffff80001e0d6558 x4 : ffff800015c74760 x3 : ffff800008596744 x2 : 0000000000000001 x1 : 0000000100000000 x0 : 000000000000000e Call trace: skb_assert_len include/linux/skbuff.h:2527 [inline] __dev_queue_xmit+0x1bc0/0x3488 net/core/dev.c:4156 dev_queue_xmit include/linux/netdevice.h:3033 [inline] __netlink_deliver_tap_skb net/netlink/af_netlink.c:307 [inline] __netlink_deliver_tap+0x45c/0x6f8 net/netlink/af_netlink.c:325 netlink_deliver_tap+0xf4/0x174 net/netlink/af_netlink.c:338 __netlink_sendskb net/netlink/af_netlink.c:1283 [inline] netlink_sendskb+0x6c/0x154 net/netlink/af_netlink.c:1292 netlink_unicast+0x334/0x8d4 net/netlink/af_netlink.c:1380 nlmsg_unicast include/net/netlink.h:1099 [inline] genlmsg_unicast include/net/genetlink.h:433 [inline] genlmsg_reply include/net/genetlink.h:443 [inline] ila_xlat_nl_cmd_get_mapping+0x620/0x7d0 net/ipv6/ila/ila_xlat.c:493 genl_family_rcv_msg_doit net/netlink/genetlink.c:968 [inline] genl_family_rcv_msg net/netlink/genetlink.c:1048 [inline] genl_rcv_msg+0x938/0xc1c net/netlink/genetlink.c:1065 netlink_rcv_skb+0x214/0x3c4 net/netlink/af_netlink.c:2574 genl_rcv+0x38/0x50 net/netlink/genetlink.c:1076 netlink_unicast_kernel net/netlink/af_netlink.c:1339 [inline] netlink_unicast+0x660/0x8d4 net/netlink/af_netlink.c:1365 netlink_sendmsg+0x800/0xae0 net/netlink/af_netlink.c:1942 sock_sendmsg_nosec net/socket.c:714 [inline] sock_sendmsg net/socket.c:734 [inline] ____sys_sendmsg+0x558/0x844 net/socket.c:2479 ___sys_sendmsg net/socket.c:2533 [inline] __sys_sendmsg+0x26c/0x33c net/socket.c:2562 __do_sys_sendmsg net/socket.c:2571 [inline] __se_sys_sendmsg net/socket.c:2569 [inline] __arm64_sys_sendmsg+0x80/0x94 net/socket.c:2569 __invoke_syscall arch/arm64/kernel/syscall.c:38 [inline] invoke_syscall+0x98/0x2c0 arch/arm64/kernel/syscall.c:52 el0_svc_common+0x138/0x258 arch/arm64/kernel/syscall.c:142 do_el0_svc+0x64/0x198 arch/arm64/kernel/syscall.c:193 el0_svc+0x58/0x168 arch/arm64/kernel/entry-common.c:637 el0t_64_sync_handler+0x84/0xf0 arch/arm64/kernel/entry-common.c:655 el0t_64_sync+0x190/0x194 arch/arm64/kernel/entry.S:591 irq event stamp: 136484 hardirqs last enabled at (136483): [] __up_console_sem+0x60/0xb4 kernel/printk/printk.c:345 hardirqs last disabled at (136484): [] el1_dbg+0x24/0x80 arch/arm64/kernel/entry-common.c:405 softirqs last enabled at (136418): [] softirq_handle_end kernel/softirq.c:414 [inline] softirqs last enabled at (136418): [] __do_softirq+0xd4c/0xfa4 kernel/softirq.c:600 softirqs last disabled at (136371): [] ____do_softirq+0x14/0x20 arch/arm64/kernel/irq.c:80 ---[ end trace 0000000000000000 ]--- skb len=0 headroom=0 headlen=0 tailroom=192 mac=(0,0) net=(0,-1) trans=-1 shinfo(txflags=0 nr_frags=0 gso(size=0 type=0 segs=0)) csum(0x0 ip_summed=0 complete_sw=0 valid=0 level=0) hash(0x0 sw=0 l4=0) proto=0x0010 pkttype=6 iif=0 dev name=nlmon0 feat=0x0000000000005861 Fixes: 7f00feaf1076 ("ila: Add generic ILA translation facility") Reported-by: syzbot Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv6/ila/ila_xlat.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c index 5fc1f4e0c0cf..10f1367eb4ca 100644 --- a/net/ipv6/ila/ila_xlat.c +++ b/net/ipv6/ila/ila_xlat.c @@ -477,6 +477,7 @@ int ila_xlat_nl_cmd_get_mapping(struct sk_buff *skb, struct genl_info *info) rcu_read_lock(); + ret = -ESRCH; ila = ila_lookup_by_params(&xp, ilan); if (ila) { ret = ila_dump_info(ila, From e4e5006c13f35f2543d6f48842e6460c892e8d7b Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Mon, 27 Feb 2023 17:36:46 +0800 Subject: [PATCH 28/55] selftests: nft_nat: ensuring the listening side is up before starting the client [ Upstream commit 2067e7a00aa604b94de31d64f29b8893b1696f26 ] The test_local_dnat_portonly() function initiates the client-side as soon as it sets the listening side to the background. This could lead to a race condition where the server may not be ready to listen. To ensure that the server-side is up and running before initiating the client-side, a delay is introduced to the test_local_dnat_portonly() function. Before the fix: # ./nft_nat.sh PASS: netns routing/connectivity: ns0-rthlYrBU can reach ns1-rthlYrBU and ns2-rthlYrBU PASS: ping to ns1-rthlYrBU was ip NATted to ns2-rthlYrBU PASS: ping to ns1-rthlYrBU OK after ip nat output chain flush PASS: ipv6 ping to ns1-rthlYrBU was ip6 NATted to ns2-rthlYrBU 2023/02/27 04:11:03 socat[6055] E connect(5, AF=2 10.0.1.99:2000, 16): Connection refused ERROR: inet port rewrite After the fix: # ./nft_nat.sh PASS: netns routing/connectivity: ns0-9sPJV6JJ can reach ns1-9sPJV6JJ and ns2-9sPJV6JJ PASS: ping to ns1-9sPJV6JJ was ip NATted to ns2-9sPJV6JJ PASS: ping to ns1-9sPJV6JJ OK after ip nat output chain flush PASS: ipv6 ping to ns1-9sPJV6JJ was ip6 NATted to ns2-9sPJV6JJ PASS: inet port rewrite without l3 address Fixes: 282e5f8fe907 ("netfilter: nat: really support inet nat without l3 address") Signed-off-by: Hangbin Liu Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- tools/testing/selftests/netfilter/nft_nat.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/selftests/netfilter/nft_nat.sh b/tools/testing/selftests/netfilter/nft_nat.sh index 4e15e8167310..67697d8ea59a 100755 --- a/tools/testing/selftests/netfilter/nft_nat.sh +++ b/tools/testing/selftests/netfilter/nft_nat.sh @@ -404,6 +404,8 @@ EOF echo SERVER-$family | ip netns exec "$ns1" timeout 5 socat -u STDIN TCP-LISTEN:2000 & sc_s=$! + sleep 1 + result=$(ip netns exec "$ns0" timeout 1 socat TCP:$daddr:2000 STDOUT) if [ "$result" = "SERVER-inet" ];then From 1c618f150c82cdd4d9e9156d2595c0d18cca24aa Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Mon, 2 Nov 2020 11:45:06 +0000 Subject: [PATCH 29/55] net: usb: lan78xx: Remove lots of set but unused 'ret' variables MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 06cd7c46b3ab3f2252c61bf85b191236cf0254e1 ] Fixes the following W=1 kernel build warning(s): drivers/net/usb/lan78xx.c: In function ‘lan78xx_read_raw_otp’: drivers/net/usb/lan78xx.c:825:6: warning: variable ‘ret’ set but not used [-Wunused-but-set-variable] drivers/net/usb/lan78xx.c: In function ‘lan78xx_write_raw_otp’: drivers/net/usb/lan78xx.c:879:6: warning: variable ‘ret’ set but not used [-Wunused-but-set-variable] drivers/net/usb/lan78xx.c: In function ‘lan78xx_deferred_multicast_write’: drivers/net/usb/lan78xx.c:1041:6: warning: variable ‘ret’ set but not used [-Wunused-but-set-variable] drivers/net/usb/lan78xx.c: In function ‘lan78xx_update_flowcontrol’: drivers/net/usb/lan78xx.c:1127:6: warning: variable ‘ret’ set but not used [-Wunused-but-set-variable] drivers/net/usb/lan78xx.c: In function ‘lan78xx_init_mac_address’: drivers/net/usb/lan78xx.c:1666:6: warning: variable ‘ret’ set but not used [-Wunused-but-set-variable] drivers/net/usb/lan78xx.c: In function ‘lan78xx_link_status_change’: drivers/net/usb/lan78xx.c:1841:6: warning: variable ‘ret’ set but not used [-Wunused-but-set-variable] drivers/net/usb/lan78xx.c: In function ‘lan78xx_irq_bus_sync_unlock’: drivers/net/usb/lan78xx.c:1920:6: warning: variable ‘ret’ set but not used [-Wunused-but-set-variable] drivers/net/usb/lan78xx.c: In function ‘lan8835_fixup’: drivers/net/usb/lan78xx.c:1994:6: warning: variable ‘ret’ set but not used [-Wunused-but-set-variable] drivers/net/usb/lan78xx.c: In function ‘lan78xx_set_rx_max_frame_length’: drivers/net/usb/lan78xx.c:2192:6: warning: variable ‘ret’ set but not used [-Wunused-but-set-variable] drivers/net/usb/lan78xx.c: In function ‘lan78xx_change_mtu’: drivers/net/usb/lan78xx.c:2270:6: warning: variable ‘ret’ set but not used [-Wunused-but-set-variable] drivers/net/usb/lan78xx.c: In function ‘lan78xx_set_mac_addr’: drivers/net/usb/lan78xx.c:2299:6: warning: variable ‘ret’ set but not used [-Wunused-but-set-variable] drivers/net/usb/lan78xx.c: In function ‘lan78xx_set_features’: drivers/net/usb/lan78xx.c:2333:6: warning: variable ‘ret’ set but not used [-Wunused-but-set-variable] drivers/net/usb/lan78xx.c: In function ‘lan78xx_set_suspend’: drivers/net/usb/lan78xx.c:3807:6: warning: variable ‘ret’ set but not used [-Wunused-but-set-variable] Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20201102114512.1062724-25-lee.jones@linaro.org Signed-off-by: Jakub Kicinski Stable-dep-of: e57cf3639c32 ("net: lan78xx: fix accessing the LAN7800's internal phy specific registers from the MAC driver") Signed-off-by: Sasha Levin --- drivers/net/usb/lan78xx.c | 168 ++++++++++++++++++-------------------- 1 file changed, 78 insertions(+), 90 deletions(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index ce3c8f476d75..5454a2038428 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -824,20 +824,19 @@ static int lan78xx_read_raw_otp(struct lan78xx_net *dev, u32 offset, u32 length, u8 *data) { int i; - int ret; u32 buf; unsigned long timeout; - ret = lan78xx_read_reg(dev, OTP_PWR_DN, &buf); + lan78xx_read_reg(dev, OTP_PWR_DN, &buf); if (buf & OTP_PWR_DN_PWRDN_N_) { /* clear it and wait to be cleared */ - ret = lan78xx_write_reg(dev, OTP_PWR_DN, 0); + lan78xx_write_reg(dev, OTP_PWR_DN, 0); timeout = jiffies + HZ; do { usleep_range(1, 10); - ret = lan78xx_read_reg(dev, OTP_PWR_DN, &buf); + lan78xx_read_reg(dev, OTP_PWR_DN, &buf); if (time_after(jiffies, timeout)) { netdev_warn(dev->net, "timeout on OTP_PWR_DN"); @@ -847,18 +846,18 @@ static int lan78xx_read_raw_otp(struct lan78xx_net *dev, u32 offset, } for (i = 0; i < length; i++) { - ret = lan78xx_write_reg(dev, OTP_ADDR1, + lan78xx_write_reg(dev, OTP_ADDR1, ((offset + i) >> 8) & OTP_ADDR1_15_11); - ret = lan78xx_write_reg(dev, OTP_ADDR2, + lan78xx_write_reg(dev, OTP_ADDR2, ((offset + i) & OTP_ADDR2_10_3)); - ret = lan78xx_write_reg(dev, OTP_FUNC_CMD, OTP_FUNC_CMD_READ_); - ret = lan78xx_write_reg(dev, OTP_CMD_GO, OTP_CMD_GO_GO_); + lan78xx_write_reg(dev, OTP_FUNC_CMD, OTP_FUNC_CMD_READ_); + lan78xx_write_reg(dev, OTP_CMD_GO, OTP_CMD_GO_GO_); timeout = jiffies + HZ; do { udelay(1); - ret = lan78xx_read_reg(dev, OTP_STATUS, &buf); + lan78xx_read_reg(dev, OTP_STATUS, &buf); if (time_after(jiffies, timeout)) { netdev_warn(dev->net, "timeout on OTP_STATUS"); @@ -866,7 +865,7 @@ static int lan78xx_read_raw_otp(struct lan78xx_net *dev, u32 offset, } } while (buf & OTP_STATUS_BUSY_); - ret = lan78xx_read_reg(dev, OTP_RD_DATA, &buf); + lan78xx_read_reg(dev, OTP_RD_DATA, &buf); data[i] = (u8)(buf & 0xFF); } @@ -878,20 +877,19 @@ static int lan78xx_write_raw_otp(struct lan78xx_net *dev, u32 offset, u32 length, u8 *data) { int i; - int ret; u32 buf; unsigned long timeout; - ret = lan78xx_read_reg(dev, OTP_PWR_DN, &buf); + lan78xx_read_reg(dev, OTP_PWR_DN, &buf); if (buf & OTP_PWR_DN_PWRDN_N_) { /* clear it and wait to be cleared */ - ret = lan78xx_write_reg(dev, OTP_PWR_DN, 0); + lan78xx_write_reg(dev, OTP_PWR_DN, 0); timeout = jiffies + HZ; do { udelay(1); - ret = lan78xx_read_reg(dev, OTP_PWR_DN, &buf); + lan78xx_read_reg(dev, OTP_PWR_DN, &buf); if (time_after(jiffies, timeout)) { netdev_warn(dev->net, "timeout on OTP_PWR_DN completion"); @@ -901,21 +899,21 @@ static int lan78xx_write_raw_otp(struct lan78xx_net *dev, u32 offset, } /* set to BYTE program mode */ - ret = lan78xx_write_reg(dev, OTP_PRGM_MODE, OTP_PRGM_MODE_BYTE_); + lan78xx_write_reg(dev, OTP_PRGM_MODE, OTP_PRGM_MODE_BYTE_); for (i = 0; i < length; i++) { - ret = lan78xx_write_reg(dev, OTP_ADDR1, + lan78xx_write_reg(dev, OTP_ADDR1, ((offset + i) >> 8) & OTP_ADDR1_15_11); - ret = lan78xx_write_reg(dev, OTP_ADDR2, + lan78xx_write_reg(dev, OTP_ADDR2, ((offset + i) & OTP_ADDR2_10_3)); - ret = lan78xx_write_reg(dev, OTP_PRGM_DATA, data[i]); - ret = lan78xx_write_reg(dev, OTP_TST_CMD, OTP_TST_CMD_PRGVRFY_); - ret = lan78xx_write_reg(dev, OTP_CMD_GO, OTP_CMD_GO_GO_); + lan78xx_write_reg(dev, OTP_PRGM_DATA, data[i]); + lan78xx_write_reg(dev, OTP_TST_CMD, OTP_TST_CMD_PRGVRFY_); + lan78xx_write_reg(dev, OTP_CMD_GO, OTP_CMD_GO_GO_); timeout = jiffies + HZ; do { udelay(1); - ret = lan78xx_read_reg(dev, OTP_STATUS, &buf); + lan78xx_read_reg(dev, OTP_STATUS, &buf); if (time_after(jiffies, timeout)) { netdev_warn(dev->net, "Timeout on OTP_STATUS completion"); @@ -1040,7 +1038,6 @@ static void lan78xx_deferred_multicast_write(struct work_struct *param) container_of(param, struct lan78xx_priv, set_multicast); struct lan78xx_net *dev = pdata->dev; int i; - int ret; netif_dbg(dev, drv, dev->net, "deferred multicast write 0x%08x\n", pdata->rfe_ctl); @@ -1049,14 +1046,14 @@ static void lan78xx_deferred_multicast_write(struct work_struct *param) DP_SEL_VHF_HASH_LEN, pdata->mchash_table); for (i = 1; i < NUM_OF_MAF; i++) { - ret = lan78xx_write_reg(dev, MAF_HI(i), 0); - ret = lan78xx_write_reg(dev, MAF_LO(i), + lan78xx_write_reg(dev, MAF_HI(i), 0); + lan78xx_write_reg(dev, MAF_LO(i), pdata->pfilter_table[i][1]); - ret = lan78xx_write_reg(dev, MAF_HI(i), + lan78xx_write_reg(dev, MAF_HI(i), pdata->pfilter_table[i][0]); } - ret = lan78xx_write_reg(dev, RFE_CTL, pdata->rfe_ctl); + lan78xx_write_reg(dev, RFE_CTL, pdata->rfe_ctl); } static void lan78xx_set_multicast(struct net_device *netdev) @@ -1126,7 +1123,6 @@ static int lan78xx_update_flowcontrol(struct lan78xx_net *dev, u8 duplex, u16 lcladv, u16 rmtadv) { u32 flow = 0, fct_flow = 0; - int ret; u8 cap; if (dev->fc_autoneg) @@ -1149,10 +1145,10 @@ static int lan78xx_update_flowcontrol(struct lan78xx_net *dev, u8 duplex, (cap & FLOW_CTRL_RX ? "enabled" : "disabled"), (cap & FLOW_CTRL_TX ? "enabled" : "disabled")); - ret = lan78xx_write_reg(dev, FCT_FLOW, fct_flow); + lan78xx_write_reg(dev, FCT_FLOW, fct_flow); /* threshold value should be set before enabling flow */ - ret = lan78xx_write_reg(dev, FLOW, flow); + lan78xx_write_reg(dev, FLOW, flow); return 0; } @@ -1681,11 +1677,10 @@ static int lan78xx_ioctl(struct net_device *netdev, struct ifreq *rq, int cmd) static void lan78xx_init_mac_address(struct lan78xx_net *dev) { u32 addr_lo, addr_hi; - int ret; u8 addr[6]; - ret = lan78xx_read_reg(dev, RX_ADDRL, &addr_lo); - ret = lan78xx_read_reg(dev, RX_ADDRH, &addr_hi); + lan78xx_read_reg(dev, RX_ADDRL, &addr_lo); + lan78xx_read_reg(dev, RX_ADDRH, &addr_hi); addr[0] = addr_lo & 0xFF; addr[1] = (addr_lo >> 8) & 0xFF; @@ -1718,12 +1713,12 @@ static void lan78xx_init_mac_address(struct lan78xx_net *dev) (addr[2] << 16) | (addr[3] << 24); addr_hi = addr[4] | (addr[5] << 8); - ret = lan78xx_write_reg(dev, RX_ADDRL, addr_lo); - ret = lan78xx_write_reg(dev, RX_ADDRH, addr_hi); + lan78xx_write_reg(dev, RX_ADDRL, addr_lo); + lan78xx_write_reg(dev, RX_ADDRH, addr_hi); } - ret = lan78xx_write_reg(dev, MAF_LO(0), addr_lo); - ret = lan78xx_write_reg(dev, MAF_HI(0), addr_hi | MAF_HI_VALID_); + lan78xx_write_reg(dev, MAF_LO(0), addr_lo); + lan78xx_write_reg(dev, MAF_HI(0), addr_hi | MAF_HI_VALID_); ether_addr_copy(dev->net->dev_addr, addr); } @@ -1856,7 +1851,7 @@ static void lan78xx_remove_mdio(struct lan78xx_net *dev) static void lan78xx_link_status_change(struct net_device *net) { struct phy_device *phydev = net->phydev; - int ret, temp; + int temp; /* At forced 100 F/H mode, chip may fail to set mode correctly * when cable is switched between long(~50+m) and short one. @@ -1867,7 +1862,7 @@ static void lan78xx_link_status_change(struct net_device *net) /* disable phy interrupt */ temp = phy_read(phydev, LAN88XX_INT_MASK); temp &= ~LAN88XX_INT_MASK_MDINTPIN_EN_; - ret = phy_write(phydev, LAN88XX_INT_MASK, temp); + phy_write(phydev, LAN88XX_INT_MASK, temp); temp = phy_read(phydev, MII_BMCR); temp &= ~(BMCR_SPEED100 | BMCR_SPEED1000); @@ -1881,7 +1876,7 @@ static void lan78xx_link_status_change(struct net_device *net) /* enable phy interrupt back */ temp = phy_read(phydev, LAN88XX_INT_MASK); temp |= LAN88XX_INT_MASK_MDINTPIN_EN_; - ret = phy_write(phydev, LAN88XX_INT_MASK, temp); + phy_write(phydev, LAN88XX_INT_MASK, temp); } } @@ -1935,14 +1930,13 @@ static void lan78xx_irq_bus_sync_unlock(struct irq_data *irqd) struct lan78xx_net *dev = container_of(data, struct lan78xx_net, domain_data); u32 buf; - int ret; /* call register access here because irq_bus_lock & irq_bus_sync_unlock * are only two callbacks executed in non-atomic contex. */ - ret = lan78xx_read_reg(dev, INT_EP_CTL, &buf); + lan78xx_read_reg(dev, INT_EP_CTL, &buf); if (buf != data->irqenable) - ret = lan78xx_write_reg(dev, INT_EP_CTL, data->irqenable); + lan78xx_write_reg(dev, INT_EP_CTL, data->irqenable); mutex_unlock(&data->irq_lock); } @@ -2009,7 +2003,6 @@ static void lan78xx_remove_irq_domain(struct lan78xx_net *dev) static int lan8835_fixup(struct phy_device *phydev) { int buf; - int ret; struct lan78xx_net *dev = netdev_priv(phydev->attached_dev); /* LED2/PME_N/IRQ_N/RGMII_ID pin to IRQ_N mode */ @@ -2019,11 +2012,11 @@ static int lan8835_fixup(struct phy_device *phydev) phy_write_mmd(phydev, MDIO_MMD_PCS, 0x8010, buf); /* RGMII MAC TXC Delay Enable */ - ret = lan78xx_write_reg(dev, MAC_RGMII_ID, + lan78xx_write_reg(dev, MAC_RGMII_ID, MAC_RGMII_ID_TXC_DELAY_EN_); /* RGMII TX DLL Tune Adjust */ - ret = lan78xx_write_reg(dev, RGMII_TX_BYP_DLL, 0x3D00); + lan78xx_write_reg(dev, RGMII_TX_BYP_DLL, 0x3D00); dev->interface = PHY_INTERFACE_MODE_RGMII_TXID; @@ -2207,28 +2200,27 @@ static int lan78xx_phy_init(struct lan78xx_net *dev) static int lan78xx_set_rx_max_frame_length(struct lan78xx_net *dev, int size) { - int ret = 0; u32 buf; bool rxenabled; - ret = lan78xx_read_reg(dev, MAC_RX, &buf); + lan78xx_read_reg(dev, MAC_RX, &buf); rxenabled = ((buf & MAC_RX_RXEN_) != 0); if (rxenabled) { buf &= ~MAC_RX_RXEN_; - ret = lan78xx_write_reg(dev, MAC_RX, buf); + lan78xx_write_reg(dev, MAC_RX, buf); } /* add 4 to size for FCS */ buf &= ~MAC_RX_MAX_SIZE_MASK_; buf |= (((size + 4) << MAC_RX_MAX_SIZE_SHIFT_) & MAC_RX_MAX_SIZE_MASK_); - ret = lan78xx_write_reg(dev, MAC_RX, buf); + lan78xx_write_reg(dev, MAC_RX, buf); if (rxenabled) { buf |= MAC_RX_RXEN_; - ret = lan78xx_write_reg(dev, MAC_RX, buf); + lan78xx_write_reg(dev, MAC_RX, buf); } return 0; @@ -2285,13 +2277,12 @@ static int lan78xx_change_mtu(struct net_device *netdev, int new_mtu) int ll_mtu = new_mtu + netdev->hard_header_len; int old_hard_mtu = dev->hard_mtu; int old_rx_urb_size = dev->rx_urb_size; - int ret; /* no second zero-length packet read wanted after mtu-sized packets */ if ((ll_mtu % dev->maxpacket) == 0) return -EDOM; - ret = lan78xx_set_rx_max_frame_length(dev, new_mtu + VLAN_ETH_HLEN); + lan78xx_set_rx_max_frame_length(dev, new_mtu + VLAN_ETH_HLEN); netdev->mtu = new_mtu; @@ -2314,7 +2305,6 @@ static int lan78xx_set_mac_addr(struct net_device *netdev, void *p) struct lan78xx_net *dev = netdev_priv(netdev); struct sockaddr *addr = p; u32 addr_lo, addr_hi; - int ret; if (netif_running(netdev)) return -EBUSY; @@ -2331,12 +2321,12 @@ static int lan78xx_set_mac_addr(struct net_device *netdev, void *p) addr_hi = netdev->dev_addr[4] | netdev->dev_addr[5] << 8; - ret = lan78xx_write_reg(dev, RX_ADDRL, addr_lo); - ret = lan78xx_write_reg(dev, RX_ADDRH, addr_hi); + lan78xx_write_reg(dev, RX_ADDRL, addr_lo); + lan78xx_write_reg(dev, RX_ADDRH, addr_hi); /* Added to support MAC address changes */ - ret = lan78xx_write_reg(dev, MAF_LO(0), addr_lo); - ret = lan78xx_write_reg(dev, MAF_HI(0), addr_hi | MAF_HI_VALID_); + lan78xx_write_reg(dev, MAF_LO(0), addr_lo); + lan78xx_write_reg(dev, MAF_HI(0), addr_hi | MAF_HI_VALID_); return 0; } @@ -2348,7 +2338,6 @@ static int lan78xx_set_features(struct net_device *netdev, struct lan78xx_net *dev = netdev_priv(netdev); struct lan78xx_priv *pdata = (struct lan78xx_priv *)(dev->data[0]); unsigned long flags; - int ret; spin_lock_irqsave(&pdata->rfe_ctl_lock, flags); @@ -2372,7 +2361,7 @@ static int lan78xx_set_features(struct net_device *netdev, spin_unlock_irqrestore(&pdata->rfe_ctl_lock, flags); - ret = lan78xx_write_reg(dev, RFE_CTL, pdata->rfe_ctl); + lan78xx_write_reg(dev, RFE_CTL, pdata->rfe_ctl); return 0; } @@ -3828,7 +3817,6 @@ static u16 lan78xx_wakeframe_crc16(const u8 *buf, int len) static int lan78xx_set_suspend(struct lan78xx_net *dev, u32 wol) { u32 buf; - int ret; int mask_index; u16 crc; u32 temp_wucsr; @@ -3837,26 +3825,26 @@ static int lan78xx_set_suspend(struct lan78xx_net *dev, u32 wol) const u8 ipv6_multicast[3] = { 0x33, 0x33 }; const u8 arp_type[2] = { 0x08, 0x06 }; - ret = lan78xx_read_reg(dev, MAC_TX, &buf); + lan78xx_read_reg(dev, MAC_TX, &buf); buf &= ~MAC_TX_TXEN_; - ret = lan78xx_write_reg(dev, MAC_TX, buf); - ret = lan78xx_read_reg(dev, MAC_RX, &buf); + lan78xx_write_reg(dev, MAC_TX, buf); + lan78xx_read_reg(dev, MAC_RX, &buf); buf &= ~MAC_RX_RXEN_; - ret = lan78xx_write_reg(dev, MAC_RX, buf); + lan78xx_write_reg(dev, MAC_RX, buf); - ret = lan78xx_write_reg(dev, WUCSR, 0); - ret = lan78xx_write_reg(dev, WUCSR2, 0); - ret = lan78xx_write_reg(dev, WK_SRC, 0xFFF1FF1FUL); + lan78xx_write_reg(dev, WUCSR, 0); + lan78xx_write_reg(dev, WUCSR2, 0); + lan78xx_write_reg(dev, WK_SRC, 0xFFF1FF1FUL); temp_wucsr = 0; temp_pmt_ctl = 0; - ret = lan78xx_read_reg(dev, PMT_CTL, &temp_pmt_ctl); + lan78xx_read_reg(dev, PMT_CTL, &temp_pmt_ctl); temp_pmt_ctl &= ~PMT_CTL_RES_CLR_WKP_EN_; temp_pmt_ctl |= PMT_CTL_RES_CLR_WKP_STS_; for (mask_index = 0; mask_index < NUM_OF_WUF_CFG; mask_index++) - ret = lan78xx_write_reg(dev, WUF_CFG(mask_index), 0); + lan78xx_write_reg(dev, WUF_CFG(mask_index), 0); mask_index = 0; if (wol & WAKE_PHY) { @@ -3885,30 +3873,30 @@ static int lan78xx_set_suspend(struct lan78xx_net *dev, u32 wol) /* set WUF_CFG & WUF_MASK for IPv4 Multicast */ crc = lan78xx_wakeframe_crc16(ipv4_multicast, 3); - ret = lan78xx_write_reg(dev, WUF_CFG(mask_index), + lan78xx_write_reg(dev, WUF_CFG(mask_index), WUF_CFGX_EN_ | WUF_CFGX_TYPE_MCAST_ | (0 << WUF_CFGX_OFFSET_SHIFT_) | (crc & WUF_CFGX_CRC16_MASK_)); - ret = lan78xx_write_reg(dev, WUF_MASK0(mask_index), 7); - ret = lan78xx_write_reg(dev, WUF_MASK1(mask_index), 0); - ret = lan78xx_write_reg(dev, WUF_MASK2(mask_index), 0); - ret = lan78xx_write_reg(dev, WUF_MASK3(mask_index), 0); + lan78xx_write_reg(dev, WUF_MASK0(mask_index), 7); + lan78xx_write_reg(dev, WUF_MASK1(mask_index), 0); + lan78xx_write_reg(dev, WUF_MASK2(mask_index), 0); + lan78xx_write_reg(dev, WUF_MASK3(mask_index), 0); mask_index++; /* for IPv6 Multicast */ crc = lan78xx_wakeframe_crc16(ipv6_multicast, 2); - ret = lan78xx_write_reg(dev, WUF_CFG(mask_index), + lan78xx_write_reg(dev, WUF_CFG(mask_index), WUF_CFGX_EN_ | WUF_CFGX_TYPE_MCAST_ | (0 << WUF_CFGX_OFFSET_SHIFT_) | (crc & WUF_CFGX_CRC16_MASK_)); - ret = lan78xx_write_reg(dev, WUF_MASK0(mask_index), 3); - ret = lan78xx_write_reg(dev, WUF_MASK1(mask_index), 0); - ret = lan78xx_write_reg(dev, WUF_MASK2(mask_index), 0); - ret = lan78xx_write_reg(dev, WUF_MASK3(mask_index), 0); + lan78xx_write_reg(dev, WUF_MASK0(mask_index), 3); + lan78xx_write_reg(dev, WUF_MASK1(mask_index), 0); + lan78xx_write_reg(dev, WUF_MASK2(mask_index), 0); + lan78xx_write_reg(dev, WUF_MASK3(mask_index), 0); mask_index++; temp_pmt_ctl |= PMT_CTL_WOL_EN_; @@ -3929,16 +3917,16 @@ static int lan78xx_set_suspend(struct lan78xx_net *dev, u32 wol) * for packettype (offset 12,13) = ARP (0x0806) */ crc = lan78xx_wakeframe_crc16(arp_type, 2); - ret = lan78xx_write_reg(dev, WUF_CFG(mask_index), + lan78xx_write_reg(dev, WUF_CFG(mask_index), WUF_CFGX_EN_ | WUF_CFGX_TYPE_ALL_ | (0 << WUF_CFGX_OFFSET_SHIFT_) | (crc & WUF_CFGX_CRC16_MASK_)); - ret = lan78xx_write_reg(dev, WUF_MASK0(mask_index), 0x3000); - ret = lan78xx_write_reg(dev, WUF_MASK1(mask_index), 0); - ret = lan78xx_write_reg(dev, WUF_MASK2(mask_index), 0); - ret = lan78xx_write_reg(dev, WUF_MASK3(mask_index), 0); + lan78xx_write_reg(dev, WUF_MASK0(mask_index), 0x3000); + lan78xx_write_reg(dev, WUF_MASK1(mask_index), 0); + lan78xx_write_reg(dev, WUF_MASK2(mask_index), 0); + lan78xx_write_reg(dev, WUF_MASK3(mask_index), 0); mask_index++; temp_pmt_ctl |= PMT_CTL_WOL_EN_; @@ -3946,7 +3934,7 @@ static int lan78xx_set_suspend(struct lan78xx_net *dev, u32 wol) temp_pmt_ctl |= PMT_CTL_SUS_MODE_0_; } - ret = lan78xx_write_reg(dev, WUCSR, temp_wucsr); + lan78xx_write_reg(dev, WUCSR, temp_wucsr); /* when multiple WOL bits are set */ if (hweight_long((unsigned long)wol) > 1) { @@ -3954,16 +3942,16 @@ static int lan78xx_set_suspend(struct lan78xx_net *dev, u32 wol) temp_pmt_ctl &= ~PMT_CTL_SUS_MODE_MASK_; temp_pmt_ctl |= PMT_CTL_SUS_MODE_0_; } - ret = lan78xx_write_reg(dev, PMT_CTL, temp_pmt_ctl); + lan78xx_write_reg(dev, PMT_CTL, temp_pmt_ctl); /* clear WUPS */ - ret = lan78xx_read_reg(dev, PMT_CTL, &buf); + lan78xx_read_reg(dev, PMT_CTL, &buf); buf |= PMT_CTL_WUPS_MASK_; - ret = lan78xx_write_reg(dev, PMT_CTL, buf); + lan78xx_write_reg(dev, PMT_CTL, buf); - ret = lan78xx_read_reg(dev, MAC_RX, &buf); + lan78xx_read_reg(dev, MAC_RX, &buf); buf |= MAC_RX_RXEN_; - ret = lan78xx_write_reg(dev, MAC_RX, buf); + lan78xx_write_reg(dev, MAC_RX, buf); return 0; } From 8018aa0863d64ee7c14997a1d8e5f112ee95f447 Mon Sep 17 00:00:00 2001 From: Yuiko Oshino Date: Wed, 1 Mar 2023 08:43:07 -0700 Subject: [PATCH 30/55] net: lan78xx: fix accessing the LAN7800's internal phy specific registers from the MAC driver [ Upstream commit e57cf3639c323eeed05d3725fd82f91b349adca8 ] Move the LAN7800 internal phy (phy ID 0x0007c132) specific register accesses to the phy driver (microchip.c). Fix the error reported by Enguerrand de Ribaucourt in December 2022, "Some operations during the cable switch workaround modify the register LAN88XX_INT_MASK of the PHY. However, this register is specific to the LAN8835 PHY. For instance, if a DP8322I PHY is connected to the LAN7801, that register (0x19), corresponds to the LED and MAC address configuration, resulting in unapropriate behavior." I did not test with the DP8322I PHY, but I tested with an EVB-LAN7800 with the internal PHY. Fixes: 14437e3fa284 ("lan78xx: workaround of forced 100 Full/Half duplex mode error") Signed-off-by: Yuiko Oshino Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20230301154307.30438-1-yuiko.oshino@microchip.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/phy/microchip.c | 32 ++++++++++++++++++++++++++++++++ drivers/net/usb/lan78xx.c | 27 +-------------------------- 2 files changed, 33 insertions(+), 26 deletions(-) diff --git a/drivers/net/phy/microchip.c b/drivers/net/phy/microchip.c index a644e8e5071c..375bbd60b38a 100644 --- a/drivers/net/phy/microchip.c +++ b/drivers/net/phy/microchip.c @@ -326,6 +326,37 @@ static int lan88xx_config_aneg(struct phy_device *phydev) return genphy_config_aneg(phydev); } +static void lan88xx_link_change_notify(struct phy_device *phydev) +{ + int temp; + + /* At forced 100 F/H mode, chip may fail to set mode correctly + * when cable is switched between long(~50+m) and short one. + * As workaround, set to 10 before setting to 100 + * at forced 100 F/H mode. + */ + if (!phydev->autoneg && phydev->speed == 100) { + /* disable phy interrupt */ + temp = phy_read(phydev, LAN88XX_INT_MASK); + temp &= ~LAN88XX_INT_MASK_MDINTPIN_EN_; + phy_write(phydev, LAN88XX_INT_MASK, temp); + + temp = phy_read(phydev, MII_BMCR); + temp &= ~(BMCR_SPEED100 | BMCR_SPEED1000); + phy_write(phydev, MII_BMCR, temp); /* set to 10 first */ + temp |= BMCR_SPEED100; + phy_write(phydev, MII_BMCR, temp); /* set to 100 later */ + + /* clear pending interrupt generated while workaround */ + temp = phy_read(phydev, LAN88XX_INT_STS); + + /* enable phy interrupt back */ + temp = phy_read(phydev, LAN88XX_INT_MASK); + temp |= LAN88XX_INT_MASK_MDINTPIN_EN_; + phy_write(phydev, LAN88XX_INT_MASK, temp); + } +} + static struct phy_driver microchip_phy_driver[] = { { .phy_id = 0x0007c130, @@ -339,6 +370,7 @@ static struct phy_driver microchip_phy_driver[] = { .config_init = lan88xx_config_init, .config_aneg = lan88xx_config_aneg, + .link_change_notify = lan88xx_link_change_notify, .ack_interrupt = lan88xx_phy_ack_interrupt, .config_intr = lan88xx_phy_config_intr, diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 5454a2038428..b51017966bb3 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -1851,33 +1851,8 @@ static void lan78xx_remove_mdio(struct lan78xx_net *dev) static void lan78xx_link_status_change(struct net_device *net) { struct phy_device *phydev = net->phydev; - int temp; - - /* At forced 100 F/H mode, chip may fail to set mode correctly - * when cable is switched between long(~50+m) and short one. - * As workaround, set to 10 before setting to 100 - * at forced 100 F/H mode. - */ - if (!phydev->autoneg && (phydev->speed == 100)) { - /* disable phy interrupt */ - temp = phy_read(phydev, LAN88XX_INT_MASK); - temp &= ~LAN88XX_INT_MASK_MDINTPIN_EN_; - phy_write(phydev, LAN88XX_INT_MASK, temp); - temp = phy_read(phydev, MII_BMCR); - temp &= ~(BMCR_SPEED100 | BMCR_SPEED1000); - phy_write(phydev, MII_BMCR, temp); /* set to 10 first */ - temp |= BMCR_SPEED100; - phy_write(phydev, MII_BMCR, temp); /* set to 100 later */ - - /* clear pending interrupt generated while workaround */ - temp = phy_read(phydev, LAN88XX_INT_STS); - - /* enable phy interrupt back */ - temp = phy_read(phydev, LAN88XX_INT_MASK); - temp |= LAN88XX_INT_MASK_MDINTPIN_EN_; - phy_write(phydev, LAN88XX_INT_MASK, temp); - } + phy_print_status(phydev); } static int irq_map(struct irq_domain *d, unsigned int irq, From 9dc16be373b382ddd4c274052a6e870a95e76c01 Mon Sep 17 00:00:00 2001 From: Shigeru Yoshida Date: Thu, 2 Mar 2023 01:39:13 +0900 Subject: [PATCH 31/55] net: caif: Fix use-after-free in cfusbl_device_notify() [ Upstream commit 9781e98a97110f5e76999058368b4be76a788484 ] syzbot reported use-after-free in cfusbl_device_notify() [1]. This causes a stack trace like below: BUG: KASAN: use-after-free in cfusbl_device_notify+0x7c9/0x870 net/caif/caif_usb.c:138 Read of size 8 at addr ffff88807ac4e6f0 by task kworker/u4:6/1214 CPU: 0 PID: 1214 Comm: kworker/u4:6 Not tainted 5.19.0-rc3-syzkaller-00146-g92f20ff72066 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: netns cleanup_net Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106 print_address_description.constprop.0.cold+0xeb/0x467 mm/kasan/report.c:313 print_report mm/kasan/report.c:429 [inline] kasan_report.cold+0xf4/0x1c6 mm/kasan/report.c:491 cfusbl_device_notify+0x7c9/0x870 net/caif/caif_usb.c:138 notifier_call_chain+0xb5/0x200 kernel/notifier.c:87 call_netdevice_notifiers_info+0xb5/0x130 net/core/dev.c:1945 call_netdevice_notifiers_extack net/core/dev.c:1983 [inline] call_netdevice_notifiers net/core/dev.c:1997 [inline] netdev_wait_allrefs_any net/core/dev.c:10227 [inline] netdev_run_todo+0xbc0/0x10f0 net/core/dev.c:10341 default_device_exit_batch+0x44e/0x590 net/core/dev.c:11334 ops_exit_list+0x125/0x170 net/core/net_namespace.c:167 cleanup_net+0x4ea/0xb00 net/core/net_namespace.c:594 process_one_work+0x996/0x1610 kernel/workqueue.c:2289 worker_thread+0x665/0x1080 kernel/workqueue.c:2436 kthread+0x2e9/0x3a0 kernel/kthread.c:376 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:302 When unregistering a net device, unregister_netdevice_many_notify() sets the device's reg_state to NETREG_UNREGISTERING, calls notifiers with NETDEV_UNREGISTER, and adds the device to the todo list. Later on, devices in the todo list are processed by netdev_run_todo(). netdev_run_todo() waits devices' reference count become 1 while rebdoadcasting NETDEV_UNREGISTER notification. When cfusbl_device_notify() is called with NETDEV_UNREGISTER multiple times, the parent device might be freed. This could cause UAF. Processing NETDEV_UNREGISTER multiple times also causes inbalance of reference count for the module. This patch fixes the issue by accepting only first NETDEV_UNREGISTER notification. Fixes: 7ad65bf68d70 ("caif: Add support for CAIF over CDC NCM USB interface") CC: sjur.brandeland@stericsson.com Reported-by: syzbot+b563d33852b893653a9e@syzkaller.appspotmail.com Link: https://syzkaller.appspot.com/bug?id=c3bfd8e2450adab3bffe4d80821fbbced600407f [1] Signed-off-by: Shigeru Yoshida Link: https://lore.kernel.org/r/20230301163913.391304-1-syoshida@redhat.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/caif/caif_usb.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/caif/caif_usb.c b/net/caif/caif_usb.c index 46c62dd1479b..862226be2286 100644 --- a/net/caif/caif_usb.c +++ b/net/caif/caif_usb.c @@ -134,6 +134,9 @@ static int cfusbl_device_notify(struct notifier_block *me, unsigned long what, struct usb_device *usbdev; int res; + if (what == NETDEV_UNREGISTER && dev->reg_state >= NETREG_UNREGISTERED) + return 0; + /* Check whether we have a NCM device, and find its VID/PID. */ if (!(dev->dev.parent && dev->dev.parent->driver && strcmp(dev->dev.parent->driver->name, "cdc_ncm") == 0)) From 16f3aae1aa2dd89bc8d073a67f190af580386ae9 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Fri, 3 Mar 2023 18:43:57 -0800 Subject: [PATCH 32/55] bnxt_en: Avoid order-5 memory allocation for TPA data [ Upstream commit accd7e23693aaaa9aa0d3e9eca0ae77d1be80ab3 ] The driver needs to keep track of all the possible concurrent TPA (GRO/LRO) completions on the aggregation ring. On P5 chips, the maximum number of concurrent TPA is 256 and the amount of memory we allocate is order-5 on systems using 4K pages. Memory allocation failure has been reported: NetworkManager: page allocation failure: order:5, mode:0x40dc0(GFP_KERNEL|__GFP_COMP|__GFP_ZERO), nodemask=(null),cpuset=/,mems_allowed=0-1 CPU: 15 PID: 2995 Comm: NetworkManager Kdump: loaded Not tainted 5.10.156 #1 Hardware name: Dell Inc. PowerEdge R660/0M1CC5, BIOS 0.2.25 08/12/2022 Call Trace: dump_stack+0x57/0x6e warn_alloc.cold.120+0x7b/0xdd ? _cond_resched+0x15/0x30 ? __alloc_pages_direct_compact+0x15f/0x170 __alloc_pages_slowpath.constprop.108+0xc58/0xc70 __alloc_pages_nodemask+0x2d0/0x300 kmalloc_order+0x24/0xe0 kmalloc_order_trace+0x19/0x80 bnxt_alloc_mem+0x1150/0x15c0 [bnxt_en] ? bnxt_get_func_stat_ctxs+0x13/0x60 [bnxt_en] __bnxt_open_nic+0x12e/0x780 [bnxt_en] bnxt_open+0x10b/0x240 [bnxt_en] __dev_open+0xe9/0x180 __dev_change_flags+0x1af/0x220 dev_change_flags+0x21/0x60 do_setlink+0x35c/0x1100 Instead of allocating this big chunk of memory and dividing it up for the concurrent TPA instances, allocate each small chunk separately for each TPA instance. This will reduce it to order-0 allocations. Fixes: 79632e9ba386 ("bnxt_en: Expand bnxt_tpa_info struct to support 57500 chips.") Reviewed-by: Somnath Kotur Reviewed-by: Damodharam Ammepalli Reviewed-by: Pavan Chebbi Signed-off-by: Michael Chan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index ef8225b7445d..9fb1da36e9eb 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -2747,7 +2747,7 @@ static int bnxt_alloc_ring(struct bnxt *bp, struct bnxt_ring_mem_info *rmem) static void bnxt_free_tpa_info(struct bnxt *bp) { - int i; + int i, j; for (i = 0; i < bp->rx_nr_rings; i++) { struct bnxt_rx_ring_info *rxr = &bp->rx_ring[i]; @@ -2755,8 +2755,10 @@ static void bnxt_free_tpa_info(struct bnxt *bp) kfree(rxr->rx_tpa_idx_map); rxr->rx_tpa_idx_map = NULL; if (rxr->rx_tpa) { - kfree(rxr->rx_tpa[0].agg_arr); - rxr->rx_tpa[0].agg_arr = NULL; + for (j = 0; j < bp->max_tpa; j++) { + kfree(rxr->rx_tpa[j].agg_arr); + rxr->rx_tpa[j].agg_arr = NULL; + } } kfree(rxr->rx_tpa); rxr->rx_tpa = NULL; @@ -2765,14 +2767,13 @@ static void bnxt_free_tpa_info(struct bnxt *bp) static int bnxt_alloc_tpa_info(struct bnxt *bp) { - int i, j, total_aggs = 0; + int i, j; bp->max_tpa = MAX_TPA; if (bp->flags & BNXT_FLAG_CHIP_P5) { if (!bp->max_tpa_v2) return 0; bp->max_tpa = max_t(u16, bp->max_tpa_v2, MAX_TPA_P5); - total_aggs = bp->max_tpa * MAX_SKB_FRAGS; } for (i = 0; i < bp->rx_nr_rings; i++) { @@ -2786,12 +2787,12 @@ static int bnxt_alloc_tpa_info(struct bnxt *bp) if (!(bp->flags & BNXT_FLAG_CHIP_P5)) continue; - agg = kcalloc(total_aggs, sizeof(*agg), GFP_KERNEL); - rxr->rx_tpa[0].agg_arr = agg; - if (!agg) - return -ENOMEM; - for (j = 1; j < bp->max_tpa; j++) - rxr->rx_tpa[j].agg_arr = agg + j * MAX_SKB_FRAGS; + for (j = 0; j < bp->max_tpa; j++) { + agg = kcalloc(MAX_SKB_FRAGS, sizeof(*agg), GFP_KERNEL); + if (!agg) + return -ENOMEM; + rxr->rx_tpa[j].agg_arr = agg; + } rxr->rx_tpa_idx_map = kzalloc(sizeof(*rxr->rx_tpa_idx_map), GFP_KERNEL); if (!rxr->rx_tpa_idx_map) From 9f2e063dcbe2605917edc391e872fb82d852111c Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 3 Mar 2023 10:58:56 +0100 Subject: [PATCH 33/55] netfilter: tproxy: fix deadlock due to missing BH disable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 4a02426787bf024dafdb79b362285ee325de3f5e ] The xtables packet traverser performs an unconditional local_bh_disable(), but the nf_tables evaluation loop does not. Functions that are called from either xtables or nftables must assume that they can be called in process context. inet_twsk_deschedule_put() assumes that no softirq interrupt can occur. If tproxy is used from nf_tables its possible that we'll deadlock trying to aquire a lock already held in process context. Add a small helper that takes care of this and use it. Link: https://lore.kernel.org/netfilter-devel/401bd6ed-314a-a196-1cdc-e13c720cc8f2@balasys.hu/ Fixes: 4ed8eb6570a4 ("netfilter: nf_tables: Add native tproxy support") Reported-and-tested-by: Major Dávid Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- include/net/netfilter/nf_tproxy.h | 7 +++++++ net/ipv4/netfilter/nf_tproxy_ipv4.c | 2 +- net/ipv6/netfilter/nf_tproxy_ipv6.c | 2 +- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/include/net/netfilter/nf_tproxy.h b/include/net/netfilter/nf_tproxy.h index 82d0e41b76f2..faa108b1ba67 100644 --- a/include/net/netfilter/nf_tproxy.h +++ b/include/net/netfilter/nf_tproxy.h @@ -17,6 +17,13 @@ static inline bool nf_tproxy_sk_is_transparent(struct sock *sk) return false; } +static inline void nf_tproxy_twsk_deschedule_put(struct inet_timewait_sock *tw) +{ + local_bh_disable(); + inet_twsk_deschedule_put(tw); + local_bh_enable(); +} + /* assign a socket to the skb -- consumes sk */ static inline void nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk) { diff --git a/net/ipv4/netfilter/nf_tproxy_ipv4.c b/net/ipv4/netfilter/nf_tproxy_ipv4.c index b2bae0b0e42a..61cb2341f50f 100644 --- a/net/ipv4/netfilter/nf_tproxy_ipv4.c +++ b/net/ipv4/netfilter/nf_tproxy_ipv4.c @@ -38,7 +38,7 @@ nf_tproxy_handle_time_wait4(struct net *net, struct sk_buff *skb, hp->source, lport ? lport : hp->dest, skb->dev, NF_TPROXY_LOOKUP_LISTENER); if (sk2) { - inet_twsk_deschedule_put(inet_twsk(sk)); + nf_tproxy_twsk_deschedule_put(inet_twsk(sk)); sk = sk2; } } diff --git a/net/ipv6/netfilter/nf_tproxy_ipv6.c b/net/ipv6/netfilter/nf_tproxy_ipv6.c index 34d51cd426b0..00761924f276 100644 --- a/net/ipv6/netfilter/nf_tproxy_ipv6.c +++ b/net/ipv6/netfilter/nf_tproxy_ipv6.c @@ -63,7 +63,7 @@ nf_tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff, lport ? lport : hp->dest, skb->dev, NF_TPROXY_LOOKUP_LISTENER); if (sk2) { - inet_twsk_deschedule_put(inet_twsk(sk)); + nf_tproxy_twsk_deschedule_put(inet_twsk(sk)); sk = sk2; } } From d800996fcf604adf28f11fa7836bdf9a1e09f436 Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Mon, 6 Mar 2023 11:21:37 +0000 Subject: [PATCH 34/55] btf: fix resolving BTF_KIND_VAR after ARRAY, STRUCT, UNION, PTR [ Upstream commit 9b459804ff9973e173fabafba2a1319f771e85fa ] btf_datasec_resolve contains a bug that causes the following BTF to fail loading: [1] DATASEC a size=2 vlen=2 type_id=4 offset=0 size=1 type_id=7 offset=1 size=1 [2] INT (anon) size=1 bits_offset=0 nr_bits=8 encoding=(none) [3] PTR (anon) type_id=2 [4] VAR a type_id=3 linkage=0 [5] INT (anon) size=1 bits_offset=0 nr_bits=8 encoding=(none) [6] TYPEDEF td type_id=5 [7] VAR b type_id=6 linkage=0 This error message is printed during btf_check_all_types: [1] DATASEC a size=2 vlen=2 type_id=7 offset=1 size=1 Invalid type By tracing btf_*_resolve we can pinpoint the problem: btf_datasec_resolve(depth: 1, type_id: 1, mode: RESOLVE_TBD) = 0 btf_var_resolve(depth: 2, type_id: 4, mode: RESOLVE_TBD) = 0 btf_ptr_resolve(depth: 3, type_id: 3, mode: RESOLVE_PTR) = 0 btf_var_resolve(depth: 2, type_id: 4, mode: RESOLVE_PTR) = 0 btf_datasec_resolve(depth: 1, type_id: 1, mode: RESOLVE_PTR) = -22 The last invocation of btf_datasec_resolve should invoke btf_var_resolve by means of env_stack_push, instead it returns EINVAL. The reason is that env_stack_push is never executed for the second VAR. if (!env_type_is_resolve_sink(env, var_type) && !env_type_is_resolved(env, var_type_id)) { env_stack_set_next_member(env, i + 1); return env_stack_push(env, var_type, var_type_id); } env_type_is_resolve_sink() changes its behaviour based on resolve_mode. For RESOLVE_PTR, we can simplify the if condition to the following: (btf_type_is_modifier() || btf_type_is_ptr) && !env_type_is_resolved() Since we're dealing with a VAR the clause evaluates to false. This is not sufficient to trigger the bug however. The log output and EINVAL are only generated if btf_type_id_size() fails. if (!btf_type_id_size(btf, &type_id, &type_size)) { btf_verifier_log_vsi(env, v->t, vsi, "Invalid type"); return -EINVAL; } Most types are sized, so for example a VAR referring to an INT is not a problem. The bug is only triggered if a VAR points at a modifier. Since we skipped btf_var_resolve that modifier was also never resolved, which means that btf_resolved_type_id returns 0 aka VOID for the modifier. This in turn causes btf_type_id_size to return NULL, triggering EINVAL. To summarise, the following conditions are necessary: - VAR pointing at PTR, STRUCT, UNION or ARRAY - Followed by a VAR pointing at TYPEDEF, VOLATILE, CONST, RESTRICT or TYPE_TAG The fix is to reset resolve_mode to RESOLVE_TBD before attempting to resolve a VAR from a DATASEC. Fixes: 1dc92851849c ("bpf: kernel side support for BTF Var and DataSec") Signed-off-by: Lorenz Bauer Link: https://lore.kernel.org/r/20230306112138.155352-2-lmb@isovalent.com Signed-off-by: Martin KaFai Lau Signed-off-by: Sasha Levin --- kernel/bpf/btf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 8fd65a0eb7f3..5189bc5ebd89 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -2719,6 +2719,7 @@ static int btf_datasec_resolve(struct btf_verifier_env *env, struct btf *btf = env->btf; u16 i; + env->resolve_mode = RESOLVE_TBD; for_each_vsi_from(i, v->next_member, v->t, vsi) { u32 var_type_id = vsi->type, type_id, type_size = 0; const struct btf_type *var_type = btf_type_by_id(env->btf, From 3a747490f9c316a7cbabbbccea4cfdc8a6d563e3 Mon Sep 17 00:00:00 2001 From: Chandrakanth Patil Date: Thu, 2 Mar 2023 16:23:40 +0530 Subject: [PATCH 35/55] scsi: megaraid_sas: Update max supported LD IDs to 240 [ Upstream commit bfa659177dcba48cf13f2bd88c1972f12a60bf1c ] The firmware only supports Logical Disk IDs up to 240 and LD ID 255 (0xFF) is reserved for deleted LDs. However, in some cases, firmware was assigning LD ID 254 (0xFE) to deleted LDs and this was causing the driver to mark the wrong disk as deleted. This in turn caused the wrong disk device to be taken offline by the SCSI midlayer. To address this issue, limit the LD ID range from 255 to 240. This ensures the deleted LD ID is properly identified and removed by the driver without accidently deleting any valid LDs. Fixes: ae6874ba4b43 ("scsi: megaraid_sas: Early detection of VD deletion through RaidMap update") Reported-by: Martin K. Petersen Signed-off-by: Chandrakanth Patil Signed-off-by: Sumit Saxena Link: https://lore.kernel.org/r/20230302105342.34933-2-chandrakanth.patil@broadcom.com Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/megaraid/megaraid_sas.h | 2 ++ drivers/scsi/megaraid/megaraid_sas_fp.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/megaraid/megaraid_sas.h b/drivers/scsi/megaraid/megaraid_sas.h index aa62cc8ffd0a..ce0c36fa26bf 100644 --- a/drivers/scsi/megaraid/megaraid_sas.h +++ b/drivers/scsi/megaraid/megaraid_sas.h @@ -1515,6 +1515,8 @@ struct megasas_ctrl_info { #define MEGASAS_MAX_LD_IDS (MEGASAS_MAX_LD_CHANNELS * \ MEGASAS_MAX_DEV_PER_CHANNEL) +#define MEGASAS_MAX_SUPPORTED_LD_IDS 240 + #define MEGASAS_MAX_SECTORS (2*1024) #define MEGASAS_MAX_SECTORS_IEEE (2*128) #define MEGASAS_DBG_LVL 1 diff --git a/drivers/scsi/megaraid/megaraid_sas_fp.c b/drivers/scsi/megaraid/megaraid_sas_fp.c index 8bfb46dbbed3..ff20f4709081 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fp.c +++ b/drivers/scsi/megaraid/megaraid_sas_fp.c @@ -359,7 +359,7 @@ u8 MR_ValidateMapInfo(struct megasas_instance *instance, u64 map_id) ld = MR_TargetIdToLdGet(i, drv_map); /* For non existing VDs, iterate to next VD*/ - if (ld >= (MAX_LOGICAL_DRIVES_EXT - 1)) + if (ld >= MEGASAS_MAX_SUPPORTED_LD_IDS) continue; raid = MR_LdRaidGet(ld, drv_map); From 1a517302dbe020750da30f1a398544a0db38dcc9 Mon Sep 17 00:00:00 2001 From: "D. Wythe" Date: Tue, 7 Mar 2023 11:23:46 +0800 Subject: [PATCH 36/55] net/smc: fix fallback failed while sendmsg with fastopen [ Upstream commit ce7ca794712f186da99719e8b4e97bd5ddbb04c3 ] Before determining whether the msg has unsupported options, it has been prematurely terminated by the wrong status check. For the application, the general usages of MSG_FASTOPEN likes fd = socket(...) /* rather than connect */ sendto(fd, data, len, MSG_FASTOPEN) Hence, We need to check the flag before state check, because the sock state here is always SMC_INIT when applications tries MSG_FASTOPEN. Once we found unsupported options, fallback it to TCP. Fixes: ee9dfbef02d1 ("net/smc: handle sockopts forcing fallback") Signed-off-by: D. Wythe Signed-off-by: Simon Horman v2 -> v1: Optimize code style Reviewed-by: Tony Lu Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/smc/af_smc.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 5d696b7fb47e..b398d72a7bc3 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -1542,16 +1542,14 @@ static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct smc_sock *smc; - int rc = -EPIPE; + int rc; smc = smc_sk(sk); lock_sock(sk); - if ((sk->sk_state != SMC_ACTIVE) && - (sk->sk_state != SMC_APPCLOSEWAIT1) && - (sk->sk_state != SMC_INIT)) - goto out; + /* SMC does not support connect with fastopen */ if (msg->msg_flags & MSG_FASTOPEN) { + /* not connected yet, fallback */ if (sk->sk_state == SMC_INIT && !smc->connect_nonblock) { smc_switch_to_fallback(smc); smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP; @@ -1559,6 +1557,11 @@ static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) rc = -EINVAL; goto out; } + } else if ((sk->sk_state != SMC_ACTIVE) && + (sk->sk_state != SMC_APPCLOSEWAIT1) && + (sk->sk_state != SMC_INIT)) { + rc = -EPIPE; + goto out; } if (smc->use_fallback) From a99a61d9e1bfca2fc37d223a6a185c0eb66aba02 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Wed, 8 Mar 2023 10:16:39 +0100 Subject: [PATCH 37/55] riscv: Use READ_ONCE_NOCHECK in imprecise unwinding stack mode [ Upstream commit 76950340cf03b149412fe0d5f0810e52ac1df8cb ] When CONFIG_FRAME_POINTER is unset, the stack unwinding function walk_stackframe randomly reads the stack and then, when KASAN is enabled, it can lead to the following backtrace: [ 0.000000] ================================================================== [ 0.000000] BUG: KASAN: stack-out-of-bounds in walk_stackframe+0xa6/0x11a [ 0.000000] Read of size 8 at addr ffffffff81807c40 by task swapper/0 [ 0.000000] [ 0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 6.2.0-12919-g24203e6db61f #43 [ 0.000000] Hardware name: riscv-virtio,qemu (DT) [ 0.000000] Call Trace: [ 0.000000] [] walk_stackframe+0x0/0x11a [ 0.000000] [] init_param_lock+0x26/0x2a [ 0.000000] [] walk_stackframe+0xa2/0x11a [ 0.000000] [] dump_stack_lvl+0x22/0x36 [ 0.000000] [] print_report+0x198/0x4a8 [ 0.000000] [] init_param_lock+0x26/0x2a [ 0.000000] [] walk_stackframe+0xa2/0x11a [ 0.000000] [] kasan_report+0x9a/0xc8 [ 0.000000] [] walk_stackframe+0xa2/0x11a [ 0.000000] [] walk_stackframe+0xa2/0x11a [ 0.000000] [] desc_make_final+0x80/0x84 [ 0.000000] [] stack_trace_save+0x88/0xa6 [ 0.000000] [] filter_irq_stacks+0x72/0x76 [ 0.000000] [] devkmsg_read+0x32a/0x32e [ 0.000000] [] kasan_save_stack+0x28/0x52 [ 0.000000] [] desc_make_final+0x7c/0x84 [ 0.000000] [] stack_trace_save+0x84/0xa6 [ 0.000000] [] kasan_set_track+0x12/0x20 [ 0.000000] [] __kasan_slab_alloc+0x58/0x5e [ 0.000000] [] __kmem_cache_create+0x21e/0x39a [ 0.000000] [] create_boot_cache+0x70/0x9c [ 0.000000] [] kmem_cache_init+0x6c/0x11e [ 0.000000] [] mm_init+0xd8/0xfe [ 0.000000] [] start_kernel+0x190/0x3ca [ 0.000000] [ 0.000000] The buggy address belongs to stack of task swapper/0 [ 0.000000] and is located at offset 0 in frame: [ 0.000000] stack_trace_save+0x0/0xa6 [ 0.000000] [ 0.000000] This frame has 1 object: [ 0.000000] [32, 56) 'c' [ 0.000000] [ 0.000000] The buggy address belongs to the physical page: [ 0.000000] page:(____ptrval____) refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x81a07 [ 0.000000] flags: 0x1000(reserved|zone=0) [ 0.000000] raw: 0000000000001000 ff600003f1e3d150 ff600003f1e3d150 0000000000000000 [ 0.000000] raw: 0000000000000000 0000000000000000 00000001ffffffff [ 0.000000] page dumped because: kasan: bad access detected [ 0.000000] [ 0.000000] Memory state around the buggy address: [ 0.000000] ffffffff81807b00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 0.000000] ffffffff81807b80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 0.000000] >ffffffff81807c00: 00 00 00 00 00 00 00 00 f1 f1 f1 f1 00 00 00 f3 [ 0.000000] ^ [ 0.000000] ffffffff81807c80: f3 f3 f3 f3 00 00 00 00 00 00 00 00 00 00 00 00 [ 0.000000] ffffffff81807d00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 0.000000] ================================================================== Fix that by using READ_ONCE_NOCHECK when reading the stack in imprecise mode. Fixes: 5d8544e2d007 ("RISC-V: Generic library routines and assembly") Reported-by: Chathura Rajapaksha Link: https://lore.kernel.org/all/CAD7mqryDQCYyJ1gAmtMm8SASMWAQ4i103ptTb0f6Oda=tPY2=A@mail.gmail.com/ Suggested-by: Dmitry Vyukov Signed-off-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20230308091639.602024-1-alexghiti@rivosinc.com Signed-off-by: Palmer Dabbelt Signed-off-by: Sasha Levin --- arch/riscv/kernel/stacktrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c index 19e46f4160cc..5ba4d23971fd 100644 --- a/arch/riscv/kernel/stacktrace.c +++ b/arch/riscv/kernel/stacktrace.c @@ -89,7 +89,7 @@ void notrace walk_stackframe(struct task_struct *task, while (!kstack_end(ksp)) { if (__kernel_text_address(pc) && unlikely(fn(pc, arg))) break; - pc = (*ksp++) - 0x4; + pc = READ_ONCE_NOCHECK(*ksp++) - 0x4; } } From 6b06c4ae64e3557a19b3bb0b6dbf641bc41fc218 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 1 Mar 2023 15:10:04 +0100 Subject: [PATCH 38/55] ext4: Fix deadlock during directory rename [ Upstream commit 3c92792da8506a295afb6d032b4476e46f979725 ] As lockdep properly warns, we should not be locking i_rwsem while having transactions started as the proper lock ordering used by all directory handling operations is i_rwsem -> transaction start. Fix the lock ordering by moving the locking of the directory earlier in ext4_rename(). Reported-by: syzbot+9d16c39efb5fade84574@syzkaller.appspotmail.com Fixes: 0813299c586b ("ext4: Fix possible corruption when moving a directory") Link: https://syzkaller.appspot.com/bug?extid=9d16c39efb5fade84574 Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20230301141004.15087-1-jack@suse.cz Signed-off-by: Theodore Ts'o Signed-off-by: Sasha Levin --- fs/ext4/namei.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index f9d11f59df7d..b708b437b3e3 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -3795,10 +3795,20 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, return retval; } + /* + * We need to protect against old.inode directory getting converted + * from inline directory format into a normal one. + */ + if (S_ISDIR(old.inode->i_mode)) + inode_lock_nested(old.inode, I_MUTEX_NONDIR2); + old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, &old.inlined); - if (IS_ERR(old.bh)) - return PTR_ERR(old.bh); + if (IS_ERR(old.bh)) { + retval = PTR_ERR(old.bh); + goto unlock_moved_dir; + } + /* * Check for inode number is _not_ due to possible IO errors. * We might rmdir the source, keep it as pwd of some process @@ -3855,11 +3865,6 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, if (new.dir != old.dir && EXT4_DIR_LINK_MAX(new.dir)) goto end_rename; } - /* - * We need to protect against old.inode directory getting - * converted from inline directory format into a normal one. - */ - inode_lock_nested(old.inode, I_MUTEX_NONDIR2); retval = ext4_rename_dir_prepare(handle, &old); if (retval) { inode_unlock(old.inode); @@ -3960,12 +3965,15 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, } else { ext4_journal_stop(handle); } - if (old.dir_bh) - inode_unlock(old.inode); release_bh: brelse(old.dir_bh); brelse(old.bh); brelse(new.bh); + +unlock_moved_dir: + if (S_ISDIR(old.inode->i_mode)) + inode_unlock(old.inode); + return retval; } From 2fea235ef07fdbaa156948904943fe61352f6c2f Mon Sep 17 00:00:00 2001 From: xurui Date: Wed, 18 Jan 2023 16:59:12 +0800 Subject: [PATCH 39/55] MIPS: Fix a compilation issue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 109d587a4b4d7ccca2200ab1f808f43ae23e2585 ] arch/mips/include/asm/mach-rc32434/pci.h:377: cc1: error: result of ‘-117440512 << 16’ requires 44 bits to represent, but ‘int’ only has 32 bits [-Werror=shift-overflow=] All bits in KORINA_STAT are already at the correct position, so there is no addtional shift needed. Signed-off-by: xurui Signed-off-by: Thomas Bogendoerfer Signed-off-by: Sasha Levin --- arch/mips/include/asm/mach-rc32434/pci.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/include/asm/mach-rc32434/pci.h b/arch/mips/include/asm/mach-rc32434/pci.h index 6f40d1515580..1ff8a987025c 100644 --- a/arch/mips/include/asm/mach-rc32434/pci.h +++ b/arch/mips/include/asm/mach-rc32434/pci.h @@ -377,7 +377,7 @@ struct pci_msu { PCI_CFG04_STAT_SSE | \ PCI_CFG04_STAT_PE) -#define KORINA_CNFG1 ((KORINA_STAT<<16)|KORINA_CMD) +#define KORINA_CNFG1 (KORINA_STAT | KORINA_CMD) #define KORINA_REVID 0 #define KORINA_CLASS_CODE 0 From 737985dbcb67513789daca6628a37eceeb4cba85 Mon Sep 17 00:00:00 2001 From: Edward Humes Date: Sat, 27 Aug 2022 02:49:39 -0400 Subject: [PATCH 40/55] alpha: fix R_ALPHA_LITERAL reloc for large modules [ Upstream commit b6b17a8b3ecd878d98d5472a9023ede9e669ca72 ] Previously, R_ALPHA_LITERAL relocations would overflow for large kernel modules. This was because the Alpha's apply_relocate_add was relying on the kernel's module loader to have sorted the GOT towards the very end of the module as it was mapped into memory in order to correctly assign the global pointer. While this behavior would mostly work fine for small kernel modules, this approach would overflow on kernel modules with large GOT's since the global pointer would be very far away from the GOT, and thus, certain entries would be out of range. This patch fixes this by instead using the Tru64 behavior of assigning the global pointer to be 32KB away from the start of the GOT. The change made in this patch won't work for multi-GOT kernel modules as it makes the assumption the module only has one GOT located at the beginning of .got, although for the vast majority kernel modules, this should be fine. Of the kernel modules that would previously result in a relocation error, none of them, even modules like nouveau, have even come close to filling up a single GOT, and they've all worked fine under this patch. Signed-off-by: Edward Humes Signed-off-by: Matt Turner Signed-off-by: Sasha Levin --- arch/alpha/kernel/module.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/alpha/kernel/module.c b/arch/alpha/kernel/module.c index ac110ae8f978..b19a8aae74e1 100644 --- a/arch/alpha/kernel/module.c +++ b/arch/alpha/kernel/module.c @@ -146,10 +146,8 @@ apply_relocate_add(Elf64_Shdr *sechdrs, const char *strtab, base = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr; symtab = (Elf64_Sym *)sechdrs[symindex].sh_addr; - /* The small sections were sorted to the end of the segment. - The following should definitely cover them. */ - gp = (u64)me->core_layout.base + me->core_layout.size - 0x8000; got = sechdrs[me->arch.gotsecindex].sh_addr; + gp = got + 0x8000; for (i = 0; i < n; i++) { unsigned long r_sym = ELF64_R_SYM (rela[i].r_info); From 094a073605b124851cd472f2f5b0515c99b770e6 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 15 Feb 2023 10:12:12 -0700 Subject: [PATCH 41/55] macintosh: windfarm: Use unsigned type for 1-bit bitfields [ Upstream commit 748ea32d2dbd813d3bd958117bde5191182f909a ] Clang warns: drivers/macintosh/windfarm_lm75_sensor.c:63:14: error: implicit truncation from 'int' to a one-bit wide bit-field changes value from 1 to -1 [-Werror,-Wsingle-bit-bitfield-constant-conversion] lm->inited = 1; ^ ~ drivers/macintosh/windfarm_smu_sensors.c:356:19: error: implicit truncation from 'int' to a one-bit wide bit-field changes value from 1 to -1 [-Werror,-Wsingle-bit-bitfield-constant-conversion] pow->fake_volts = 1; ^ ~ drivers/macintosh/windfarm_smu_sensors.c:368:18: error: implicit truncation from 'int' to a one-bit wide bit-field changes value from 1 to -1 [-Werror,-Wsingle-bit-bitfield-constant-conversion] pow->quadratic = 1; ^ ~ There is no bug here since no code checks the actual value of these fields, just whether or not they are zero (boolean context), but this can be easily fixed by switching to an unsigned type. Signed-off-by: Nathan Chancellor Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230215-windfarm-wsingle-bit-bitfield-constant-conversion-v1-1-26415072e855@kernel.org Signed-off-by: Sasha Levin --- drivers/macintosh/windfarm_lm75_sensor.c | 4 ++-- drivers/macintosh/windfarm_smu_sensors.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/macintosh/windfarm_lm75_sensor.c b/drivers/macintosh/windfarm_lm75_sensor.c index 1e5fa09845e7..8713e80201c0 100644 --- a/drivers/macintosh/windfarm_lm75_sensor.c +++ b/drivers/macintosh/windfarm_lm75_sensor.c @@ -34,8 +34,8 @@ #endif struct wf_lm75_sensor { - int ds1775 : 1; - int inited : 1; + unsigned int ds1775 : 1; + unsigned int inited : 1; struct i2c_client *i2c; struct wf_sensor sens; }; diff --git a/drivers/macintosh/windfarm_smu_sensors.c b/drivers/macintosh/windfarm_smu_sensors.c index 3e6059eaa138..90823b428025 100644 --- a/drivers/macintosh/windfarm_smu_sensors.c +++ b/drivers/macintosh/windfarm_smu_sensors.c @@ -273,8 +273,8 @@ struct smu_cpu_power_sensor { struct list_head link; struct wf_sensor *volts; struct wf_sensor *amps; - int fake_volts : 1; - int quadratic : 1; + unsigned int fake_volts : 1; + unsigned int quadratic : 1; struct wf_sensor sens; }; #define to_smu_cpu_power(c) container_of(c, struct smu_cpu_power_sensor, sens) From fc9bc831509f3be45ce5273330cb6f4a35efad6c Mon Sep 17 00:00:00 2001 From: Alvaro Karsz Date: Tue, 10 Jan 2023 18:56:36 +0200 Subject: [PATCH 42/55] PCI: Add SolidRun vendor ID [ Upstream commit db6c4dee4c104f50ed163af71c53bfdb878a8318 ] Add SolidRun vendor ID to pci_ids.h The vendor ID is used in 2 different source files, the SNET vDPA driver and PCI quirks. Signed-off-by: Alvaro Karsz Acked-by: Bjorn Helgaas Message-Id: <20230110165638.123745-2-alvaro.karsz@solid-run.com> Signed-off-by: Michael S. Tsirkin Signed-off-by: Sasha Levin --- include/linux/pci_ids.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index a31aa3ac4219..d35000669db5 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -3106,6 +3106,8 @@ #define PCI_VENDOR_ID_3COM_2 0xa727 +#define PCI_VENDOR_ID_SOLIDRUN 0xd063 + #define PCI_VENDOR_ID_DIGIUM 0xd161 #define PCI_DEVICE_ID_DIGIUM_HFC4S 0xb410 From f266cdd6796fc0312c3ee62ca634a0746cd363ab Mon Sep 17 00:00:00 2001 From: Paul Elder Date: Mon, 28 Nov 2022 09:02:01 +0100 Subject: [PATCH 43/55] media: ov5640: Fix analogue gain control [ Upstream commit afa4805799c1d332980ad23339fdb07b5e0cf7e0 ] Gain control is badly documented in publicly available (including leaked) documentation. There is an AGC pre-gain in register 0x3a13, expressed as a 6-bit value (plus an enable bit in bit 6). The driver hardcodes it to 0x43, which one application note states is equal to x1.047. The documentation also states that 0x40 is equel to x1.000. The pre-gain thus seems to be expressed as in 1/64 increments, and thus ranges from x1.00 to x1.984. What the pre-gain does is however unspecified. There is then an AGC gain limit, in registers 0x3a18 and 0x3a19, expressed as a 10-bit "real gain format" value. One application note sets it to 0x00f8 and states it is equal to x15.5, so it appears to be expressed in 1/16 increments, up to x63.9375. The manual gain is stored in registers 0x350a and 0x350b, also as a 10-bit "real gain format" value. It is documented in the application note as a Q6.4 values, up to x63.9375. One version of the datasheet indicates that the sensor supports a digital gain: The OV5640 supports 1/2/4 digital gain. Normally, the gain is controlled automatically by the automatic gain control (AGC) block. It isn't clear how that would be controlled manually. There appears to be no indication regarding whether the gain controlled through registers 0x350a and 0x350b is an analogue gain only or also includes digital gain. The words "real gain" don't necessarily mean "combined analogue and digital gains". Some OmniVision sensors (such as the OV8858) are documented as supoprting different formats for the gain values, selectable through a register bit, and they are called "real gain format" and "sensor gain format". For that sensor, we have (one of) the gain registers documented as 0x3503[2]=0, gain[7:0] is real gain format, where low 4 bits are fraction bits, for example, 0x10 is 1x gain, 0x28 is 2.5x gain If 0x3503[2]=1, gain[7:0] is sensor gain format, gain[7:4] is coarse gain, 00000: 1x, 00001: 2x, 00011: 4x, 00111: 8x, gain[7] is 1, gain[3:0] is fine gain. For example, 0x10 is 1x gain, 0x30 is 2x gain, 0x70 is 4x gain (The second part of the text makes little sense) "Real gain" may thus refer to the combination of the coarse and fine analogue gains as a single value. The OV5640 0x350a and 0x350b registers thus appear to control analogue gain. The driver incorrectly uses V4L2_CID_GAIN as V4L2 has a specific control for analogue gain, V4L2_CID_ANALOGUE_GAIN. Use it. If registers 0x350a and 0x350b are later found to control digital gain as well, the driver could then restrict the range of the analogue gain control value to lower than x64 and add a separate digital gain control. Signed-off-by: Paul Elder Signed-off-by: Laurent Pinchart Reviewed-by: Jacopo Mondi Reviewed-by: Jai Luthra Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/i2c/ov5640.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/i2c/ov5640.c b/drivers/media/i2c/ov5640.c index be6c882dd1d5..087fb464ffc1 100644 --- a/drivers/media/i2c/ov5640.c +++ b/drivers/media/i2c/ov5640.c @@ -2704,7 +2704,7 @@ static int ov5640_init_controls(struct ov5640_dev *sensor) /* Auto/manual gain */ ctrls->auto_gain = v4l2_ctrl_new_std(hdl, ops, V4L2_CID_AUTOGAIN, 0, 1, 1, 1); - ctrls->gain = v4l2_ctrl_new_std(hdl, ops, V4L2_CID_GAIN, + ctrls->gain = v4l2_ctrl_new_std(hdl, ops, V4L2_CID_ANALOGUE_GAIN, 0, 1023, 1, 0); ctrls->saturation = v4l2_ctrl_new_std(hdl, ops, V4L2_CID_SATURATION, From dbfae25b01961a326847e81547cd7e30de2cae0f Mon Sep 17 00:00:00 2001 From: Yejune Deng Date: Mon, 16 Nov 2020 15:30:07 +0800 Subject: [PATCH 44/55] ipmi/watchdog: replace atomic_add() and atomic_sub() commit a01a89b1db1066a6af23ae08b9a0c345b7966f0b upstream. atomic_inc() and atomic_dec() looks better Signed-off-by: Yejune Deng Message-Id: <1605511807-7135-1-git-send-email-yejune.deng@gmail.com> Signed-off-by: Corey Minyard Signed-off-by: Greg Kroah-Hartman --- drivers/char/ipmi/ipmi_watchdog.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/char/ipmi/ipmi_watchdog.c b/drivers/char/ipmi/ipmi_watchdog.c index 72ad7fff64a7..5fd782749311 100644 --- a/drivers/char/ipmi/ipmi_watchdog.c +++ b/drivers/char/ipmi/ipmi_watchdog.c @@ -498,7 +498,7 @@ static void panic_halt_ipmi_heartbeat(void) msg.cmd = IPMI_WDOG_RESET_TIMER; msg.data = NULL; msg.data_len = 0; - atomic_add(1, &panic_done_count); + atomic_inc(&panic_done_count); rv = ipmi_request_supply_msgs(watchdog_user, (struct ipmi_addr *) &addr, 0, @@ -508,7 +508,7 @@ static void panic_halt_ipmi_heartbeat(void) &panic_halt_heartbeat_recv_msg, 1); if (rv) - atomic_sub(1, &panic_done_count); + atomic_dec(&panic_done_count); } static struct ipmi_smi_msg panic_halt_smi_msg = { @@ -532,12 +532,12 @@ static void panic_halt_ipmi_set_timeout(void) /* Wait for the messages to be free. */ while (atomic_read(&panic_done_count) != 0) ipmi_poll_interface(watchdog_user); - atomic_add(1, &panic_done_count); + atomic_inc(&panic_done_count); rv = __ipmi_set_timeout(&panic_halt_smi_msg, &panic_halt_recv_msg, &send_heartbeat_now); if (rv) { - atomic_sub(1, &panic_done_count); + atomic_dec(&panic_done_count); pr_warn("Unable to extend the watchdog timeout\n"); } else { if (send_heartbeat_now) From 52fc917855ce4ef1603fc23f7b0fb3f7300a8b70 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Mon, 20 Sep 2021 06:25:37 -0500 Subject: [PATCH 45/55] ipmi:watchdog: Set panic count to proper value on a panic commit db05ddf7f321634c5659a0cf7ea56594e22365f7 upstream. You will get two decrements when the messages on a panic are sent, not one, since commit 2033f6858970 ("ipmi: Free receive messages when in an oops") was added, but the watchdog code had a bug where it didn't set the value properly. Reported-by: Anton Lundin Cc: # v5.4+ Fixes: 2033f6858970 ("ipmi: Free receive messages when in an oops") Signed-off-by: Corey Minyard Signed-off-by: Greg Kroah-Hartman --- drivers/char/ipmi/ipmi_watchdog.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/char/ipmi/ipmi_watchdog.c b/drivers/char/ipmi/ipmi_watchdog.c index 5fd782749311..ccb62c480bdd 100644 --- a/drivers/char/ipmi/ipmi_watchdog.c +++ b/drivers/char/ipmi/ipmi_watchdog.c @@ -498,7 +498,7 @@ static void panic_halt_ipmi_heartbeat(void) msg.cmd = IPMI_WDOG_RESET_TIMER; msg.data = NULL; msg.data_len = 0; - atomic_inc(&panic_done_count); + atomic_add(2, &panic_done_count); rv = ipmi_request_supply_msgs(watchdog_user, (struct ipmi_addr *) &addr, 0, @@ -508,7 +508,7 @@ static void panic_halt_ipmi_heartbeat(void) &panic_halt_heartbeat_recv_msg, 1); if (rv) - atomic_dec(&panic_done_count); + atomic_sub(2, &panic_done_count); } static struct ipmi_smi_msg panic_halt_smi_msg = { @@ -532,12 +532,12 @@ static void panic_halt_ipmi_set_timeout(void) /* Wait for the messages to be free. */ while (atomic_read(&panic_done_count) != 0) ipmi_poll_interface(watchdog_user); - atomic_inc(&panic_done_count); + atomic_add(2, &panic_done_count); rv = __ipmi_set_timeout(&panic_halt_smi_msg, &panic_halt_recv_msg, &send_heartbeat_now); if (rv) { - atomic_dec(&panic_done_count); + atomic_sub(2, &panic_done_count); pr_warn("Unable to extend the watchdog timeout\n"); } else { if (send_heartbeat_now) From 1aed78cfda7f17f3cc71cb127a85a188eafc679a Mon Sep 17 00:00:00 2001 From: John Harrison Date: Wed, 15 Feb 2023 17:11:01 -0800 Subject: [PATCH 46/55] drm/i915: Don't use BAR mappings for ring buffers with LLC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 85636167e3206c3fbd52254fc432991cc4e90194 upstream. Direction from hardware is that ring buffers should never be mapped via the BAR on systems with LLC. There are too many caching pitfalls due to the way BAR accesses are routed. So it is safest to just not use it. Signed-off-by: John Harrison Fixes: 9d80841ea4c9 ("drm/i915: Allow ringbuffers to be bound anywhere") Cc: Chris Wilson Cc: Joonas Lahtinen Cc: Jani Nikula Cc: Rodrigo Vivi Cc: Tvrtko Ursulin Cc: intel-gfx@lists.freedesktop.org Cc: # v4.9+ Tested-by: Jouni Högander Reviewed-by: Daniele Ceraolo Spurio Link: https://patchwork.freedesktop.org/patch/msgid/20230216011101.1909009-3-John.C.Harrison@Intel.com (cherry picked from commit 65c08339db1ada87afd6cfe7db8e60bb4851d919) Signed-off-by: Jani Nikula Signed-off-by: John Harrison Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/gt/intel_ringbuffer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c index eee9fcbe0434..808269b2108f 100644 --- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c @@ -1208,7 +1208,7 @@ int intel_ring_pin(struct intel_ring *ring) if (unlikely(ret)) goto err_unpin; - if (i915_vma_is_map_and_fenceable(vma)) + if (i915_vma_is_map_and_fenceable(vma) && !HAS_LLC(vma->vm->i915)) addr = (void __force *)i915_vma_pin_iomap(vma); else addr = i915_gem_object_pin_map(vma->obj, @@ -1252,7 +1252,7 @@ void intel_ring_unpin(struct intel_ring *ring) intel_ring_reset(ring, ring->emit); i915_vma_unset_ggtt_write(vma); - if (i915_vma_is_map_and_fenceable(vma)) + if (i915_vma_is_map_and_fenceable(vma) && !HAS_LLC(vma->vm->i915)) i915_vma_unpin_iomap(vma); else i915_gem_object_unpin_map(vma->obj); From a4bd6d4df382a6285900c9bd2b30d2a44c52e2d0 Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Wed, 1 Mar 2023 19:06:59 -0700 Subject: [PATCH 47/55] x86, vmlinux.lds: Add RUNTIME_DISCARD_EXIT to generic DISCARDS commit 84d5f77fc2ee4e010c2c037750e32f06e55224b0 upstream. In the x86 kernel, .exit.text and .exit.data sections are discarded at runtime, not by the linker. Add RUNTIME_DISCARD_EXIT to generic DISCARDS and define it in the x86 kernel linker script to keep them. The sections are added before the DISCARD directive so document here only the situation explicitly as this change doesn't have any effect on the generated kernel. Also, other architectures like ARM64 will use it too so generalize the approach with the RUNTIME_DISCARD_EXIT define. [ bp: Massage and extend commit message. ] Signed-off-by: H.J. Lu Signed-off-by: Borislav Petkov Reviewed-by: Kees Cook Link: https://lkml.kernel.org/r/20200326193021.255002-1-hjl.tools@gmail.com Signed-off-by: Tom Saeger --- arch/x86/kernel/vmlinux.lds.S | 2 ++ include/asm-generic/vmlinux.lds.h | 11 +++++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 1afe211d7a7c..7e0e21082c93 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -21,6 +21,8 @@ #define LOAD_OFFSET __START_KERNEL_map #endif +#define RUNTIME_DISCARD_EXIT + #include #include #include diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index c3bcac22c389..2d45d98773e2 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -900,10 +900,17 @@ * section definitions so that such archs put those in earlier section * definitions. */ +#ifdef RUNTIME_DISCARD_EXIT +#define EXIT_DISCARDS +#else +#define EXIT_DISCARDS \ + EXIT_TEXT \ + EXIT_DATA +#endif + #define DISCARDS \ /DISCARD/ : { \ - EXIT_TEXT \ - EXIT_DATA \ + EXIT_DISCARDS \ EXIT_CALL \ *(.discard) \ *(.discard.*) \ From d0fcf59038c509510cdb39f304ffcdce9f826317 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 1 Mar 2023 19:07:00 -0700 Subject: [PATCH 48/55] arch: fix broken BuildID for arm64 and riscv commit 99cb0d917ffa1ab628bb67364ca9b162c07699b1 upstream. Dennis Gilmore reports that the BuildID is missing in the arm64 vmlinux since commit 994b7ac1697b ("arm64: remove special treatment for the link order of head.o"). The issue is that the type of .notes section, which contains the BuildID, changed from NOTES to PROGBITS. Ard Biesheuvel figured out that whichever object gets linked first gets to decide the type of a section. The PROGBITS type is the result of the compiler emitting .note.GNU-stack as PROGBITS rather than NOTE. While Ard provided a fix for arm64, I want to fix this globally because the same issue is happening on riscv since commit 2348e6bf4421 ("riscv: remove special treatment for the link order of head.o"). This problem will happen in general for other architectures if they start to drop unneeded entries from scripts/head-object-list.txt. Discard .note.GNU-stack in include/asm-generic/vmlinux.lds.h. Link: https://lore.kernel.org/lkml/CAABkxwuQoz1CTbyb57n0ZX65eSYiTonFCU8-LCQc=74D=xE=rA@mail.gmail.com/ Fixes: 994b7ac1697b ("arm64: remove special treatment for the link order of head.o") Fixes: 2348e6bf4421 ("riscv: remove special treatment for the link order of head.o") Reported-by: Dennis Gilmore Suggested-by: Ard Biesheuvel Signed-off-by: Masahiro Yamada Acked-by: Palmer Dabbelt [Tom: stable backport 5.15.y, 5.10.y, 5.4.y] Though the above "Fixes:" commits are not in this kernel, the conditions which lead to a missing Build ID in arm64 vmlinux are similar. Evidence points to these conditions: 1. ld version > 2.36 (exact binutils commit documented in a494398bde27) 2. first object which gets linked (head.o) has a PROGBITS .note.GNU-stack segment These conditions can be observed when: - 5.15.60+ OR 5.10.136+ OR 5.4.210+ - AND ld version > 2.36 - AND arch=arm64 - AND CONFIG_MODVERSIONS=y There are notable differences in the vmlinux elf files produced before(bad) and after(good) applying this series. Good: p_type:PT_NOTE segment exists. Bad: p_type:PT_NOTE segment is missing. Good: sh_name_str:.notes section has sh_type:SHT_NOTE Bad: sh_name_str:.notes section has sh_type:SHT_PROGBITS `readelf -n` (as of v2.40) searches for Build Id by processing only the very first note in sh_type:SHT_NOTE sections. This was previously bisected to the stable backport of 0d362be5b142. Follow-up experiments were discussed here: https://lore.kernel.org/all/20221221235413.xaisboqmr7dkqwn6@oracle.com/ which strongly hints at condition 2. Signed-off-by: Tom Saeger Signed-off-by: Greg Kroah-Hartman --- include/asm-generic/vmlinux.lds.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 2d45d98773e2..a68535f36d13 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -825,7 +825,12 @@ #define TRACEDATA #endif +/* + * Discard .note.GNU-stack, which is emitted as PROGBITS by the compiler. + * Otherwise, the type of .notes section would become PROGBITS instead of NOTES. + */ #define NOTES \ + /DISCARD/ : { *(.note.GNU-stack) } \ .notes : AT(ADDR(.notes) - LOAD_OFFSET) { \ __start_notes = .; \ KEEP(*(.note.*)) \ From 4eede1173fb55615d64536a9f1b42977d65fe273 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 1 Mar 2023 19:07:01 -0700 Subject: [PATCH 49/55] powerpc/vmlinux.lds: Define RUNTIME_DISCARD_EXIT commit 4b9880dbf3bdba3a7c56445137c3d0e30aaa0a40 upstream. The powerpc linker script explicitly includes .exit.text, because otherwise the link fails due to references from __bug_table and __ex_table. The code is freed (discarded) at runtime along with .init.text and data. That has worked in the past despite powerpc not defining RUNTIME_DISCARD_EXIT because DISCARDS appears late in the powerpc linker script (line 410), and the explicit inclusion of .exit.text earlier (line 280) supersedes the discard. However commit 99cb0d917ffa ("arch: fix broken BuildID for arm64 and riscv") introduced an earlier use of DISCARD as part of the RO_DATA macro (line 136). With binutils < 2.36 that causes the DISCARD directives later in the script to be applied earlier [1], causing .exit.text to actually be discarded at link time, leading to build errors: '.exit.text' referenced in section '__bug_table' of crypto/algboss.o: defined in discarded section '.exit.text' of crypto/algboss.o '.exit.text' referenced in section '__ex_table' of drivers/nvdimm/core.o: defined in discarded section '.exit.text' of drivers/nvdimm/core.o Fix it by defining RUNTIME_DISCARD_EXIT, which causes the generic DISCARDS macro to not include .exit.text at all. 1: https://lore.kernel.org/lkml/87fscp2v7k.fsf@igel.home/ Fixes: 99cb0d917ffa ("arch: fix broken BuildID for arm64 and riscv") Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230105132349.384666-1-mpe@ellerman.id.au Signed-off-by: Tom Saeger Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/vmlinux.lds.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 3ea360cad337..4d5e1662a0ba 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -6,6 +6,7 @@ #endif #define BSS_FIRST_SECTIONS *(.bss.prominit) +#define RUNTIME_DISCARD_EXIT #include #include From 219cc98501ff2bbf7aebb654fae57a4cda1edef9 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 1 Mar 2023 19:07:02 -0700 Subject: [PATCH 50/55] powerpc/vmlinux.lds: Don't discard .rela* for relocatable builds commit 07b050f9290ee012a407a0f64151db902a1520f5 upstream. Relocatable kernels must not discard relocations, they need to be processed at runtime. As such they are included for CONFIG_RELOCATABLE builds in the powerpc linker script (line 340). However they are also unconditionally discarded later in the script (line 414). Previously that worked because the earlier inclusion superseded the discard. However commit 99cb0d917ffa ("arch: fix broken BuildID for arm64 and riscv") introduced an earlier use of DISCARD as part of the RO_DATA macro (line 137). With binutils < 2.36 that causes the DISCARD directives later in the script to be applied earlier, causing .rela* to actually be discarded at link time, leading to build warnings and a kernel that doesn't boot: ld: warning: discarding dynamic section .rela.init.rodata Fix it by conditionally discarding .rela* only when CONFIG_RELOCATABLE is disabled. Fixes: 99cb0d917ffa ("arch: fix broken BuildID for arm64 and riscv") Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230105132349.384666-2-mpe@ellerman.id.au Signed-off-by: Tom Saeger Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/vmlinux.lds.S | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 4d5e1662a0ba..46dfb3701c6e 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -395,9 +395,12 @@ SECTIONS DISCARDS /DISCARD/ : { *(*.EMB.apuinfo) - *(.glink .iplt .plt .rela* .comment) + *(.glink .iplt .plt .comment) *(.gnu.version*) *(.gnu.attributes) *(.eh_frame) +#ifndef CONFIG_RELOCATABLE + *(.rela*) +#endif } } From eb9dbb70cdd52b92e61030d49599aac4e5f21944 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 1 Mar 2023 19:07:03 -0700 Subject: [PATCH 51/55] s390: define RUNTIME_DISCARD_EXIT to fix link error with GNU ld < 2.36 commit a494398bde273143c2352dd373cad8211f7d94b2 upstream. Nathan Chancellor reports that the s390 vmlinux fails to link with GNU ld < 2.36 since commit 99cb0d917ffa ("arch: fix broken BuildID for arm64 and riscv"). It happens for defconfig, or more specifically for CONFIG_EXPOLINE=y. $ s390x-linux-gnu-ld --version | head -n1 GNU ld (GNU Binutils for Debian) 2.35.2 $ make -s ARCH=s390 CROSS_COMPILE=s390x-linux-gnu- allnoconfig $ ./scripts/config -e CONFIG_EXPOLINE $ make -s ARCH=s390 CROSS_COMPILE=s390x-linux-gnu- olddefconfig $ make -s ARCH=s390 CROSS_COMPILE=s390x-linux-gnu- `.exit.text' referenced in section `.s390_return_reg' of drivers/base/dd.o: defined in discarded section `.exit.text' of drivers/base/dd.o make[1]: *** [scripts/Makefile.vmlinux:34: vmlinux] Error 1 make: *** [Makefile:1252: vmlinux] Error 2 arch/s390/kernel/vmlinux.lds.S wants to keep EXIT_TEXT: .exit.text : { EXIT_TEXT } But, at the same time, EXIT_TEXT is thrown away by DISCARD because s390 does not define RUNTIME_DISCARD_EXIT. I still do not understand why the latter wins after 99cb0d917ffa, but defining RUNTIME_DISCARD_EXIT seems correct because the comment line in arch/s390/kernel/vmlinux.lds.S says: /* * .exit.text is discarded at runtime, not link time, * to deal with references from __bug_table */ Nathan also found that binutils commit 21401fc7bf67 ("Duplicate output sections in scripts") cured this issue, so we cannot reproduce it with binutils 2.36+, but it is better to not rely on it. Fixes: 99cb0d917ffa ("arch: fix broken BuildID for arm64 and riscv") Link: https://lore.kernel.org/all/Y7Jal56f6UBh1abE@dev-arch.thelio-3990X/ Reported-by: Nathan Chancellor Signed-off-by: Masahiro Yamada Link: https://lore.kernel.org/r/20230105031306.1455409-1-masahiroy@kernel.org Signed-off-by: Heiko Carstens Signed-off-by: Tom Saeger Signed-off-by: Greg Kroah-Hartman --- arch/s390/kernel/vmlinux.lds.S | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index 99053c80388e..f8cc4e8524bf 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -15,6 +15,8 @@ /* Handle ro_after_init data on our own. */ #define RO_AFTER_INIT_DATA +#define RUNTIME_DISCARD_EXIT + #include #include From 87fcce7a6f86b81e2db39cb04297e847849e0da0 Mon Sep 17 00:00:00 2001 From: Tom Saeger Date: Wed, 1 Mar 2023 19:07:04 -0700 Subject: [PATCH 52/55] sh: define RUNTIME_DISCARD_EXIT commit c1c551bebf928889e7a8fef7415b44f9a64975f4 upstream. sh vmlinux fails to link with GNU ld < 2.40 (likely < 2.36) since commit 99cb0d917ffa ("arch: fix broken BuildID for arm64 and riscv"). This is similar to fixes for powerpc and s390: commit 4b9880dbf3bd ("powerpc/vmlinux.lds: Define RUNTIME_DISCARD_EXIT"). commit a494398bde27 ("s390: define RUNTIME_DISCARD_EXIT to fix link error with GNU ld < 2.36"). $ sh4-linux-gnu-ld --version | head -n1 GNU ld (GNU Binutils for Debian) 2.35.2 $ make ARCH=sh CROSS_COMPILE=sh4-linux-gnu- microdev_defconfig $ make ARCH=sh CROSS_COMPILE=sh4-linux-gnu- `.exit.text' referenced in section `__bug_table' of crypto/algboss.o: defined in discarded section `.exit.text' of crypto/algboss.o `.exit.text' referenced in section `__bug_table' of drivers/char/hw_random/core.o: defined in discarded section `.exit.text' of drivers/char/hw_random/core.o make[2]: *** [scripts/Makefile.vmlinux:34: vmlinux] Error 1 make[1]: *** [Makefile:1252: vmlinux] Error 2 arch/sh/kernel/vmlinux.lds.S keeps EXIT_TEXT: /* * .exit.text is discarded at runtime, not link time, to deal with * references from __bug_table */ .exit.text : AT(ADDR(.exit.text)) { EXIT_TEXT } However, EXIT_TEXT is thrown away by DISCARD(include/asm-generic/vmlinux.lds.h) because sh does not define RUNTIME_DISCARD_EXIT. GNU ld 2.40 does not have this issue and builds fine. This corresponds with Masahiro's comments in a494398bde27: "Nathan [Chancellor] also found that binutils commit 21401fc7bf67 ("Duplicate output sections in scripts") cured this issue, so we cannot reproduce it with binutils 2.36+, but it is better to not rely on it." Link: https://lkml.kernel.org/r/9166a8abdc0f979e50377e61780a4bba1dfa2f52.1674518464.git.tom.saeger@oracle.com Fixes: 99cb0d917ffa ("arch: fix broken BuildID for arm64 and riscv") Link: https://lore.kernel.org/all/Y7Jal56f6UBh1abE@dev-arch.thelio-3990X/ Link: https://lore.kernel.org/all/20230123194218.47ssfzhrpnv3xfez@oracle.com/ Signed-off-by: Tom Saeger Tested-by: John Paul Adrian Glaubitz Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: Christoph Hellwig Cc: Dennis Gilmore Cc: Greg Kroah-Hartman Cc: Masahiro Yamada Cc: Naresh Kamboju Cc: Nathan Chancellor Cc: Palmer Dabbelt Cc: Rich Felker Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Tom Saeger Signed-off-by: Greg Kroah-Hartman --- arch/sh/kernel/vmlinux.lds.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/sh/kernel/vmlinux.lds.S b/arch/sh/kernel/vmlinux.lds.S index 77a59d8c6b4d..ec3bae172b20 100644 --- a/arch/sh/kernel/vmlinux.lds.S +++ b/arch/sh/kernel/vmlinux.lds.S @@ -10,6 +10,7 @@ OUTPUT_ARCH(sh:sh5) #define LOAD_OFFSET 0 OUTPUT_ARCH(sh) #endif +#define RUNTIME_DISCARD_EXIT #include #include From 7a934a77f11a68b3f3a73f64ee10880bfeacaf79 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 8 Feb 2023 01:41:56 +0900 Subject: [PATCH 53/55] UML: define RUNTIME_DISCARD_EXIT commit b99ddbe8336ee680257c8ab479f75051eaa49dcf upstream. With CONFIG_VIRTIO_UML=y, GNU ld < 2.36 fails to link UML vmlinux (w/wo CONFIG_LD_SCRIPT_STATIC). `.exit.text' referenced in section `.uml.exitcall.exit' of arch/um/drivers/virtio_uml.o: defined in discarded section `.exit.text' of arch/um/drivers/virtio_uml.o collect2: error: ld returned 1 exit status This fix is similar to the following commits: - 4b9880dbf3bd ("powerpc/vmlinux.lds: Define RUNTIME_DISCARD_EXIT") - a494398bde27 ("s390: define RUNTIME_DISCARD_EXIT to fix link error with GNU ld < 2.36") - c1c551bebf92 ("sh: define RUNTIME_DISCARD_EXIT") Fixes: 99cb0d917ffa ("arch: fix broken BuildID for arm64 and riscv") Reported-by: SeongJae Park Signed-off-by: Masahiro Yamada Tested-by: SeongJae Park Signed-off-by: Richard Weinberger Signed-off-by: Greg Kroah-Hartman --- arch/um/kernel/vmlinux.lds.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/um/kernel/vmlinux.lds.S b/arch/um/kernel/vmlinux.lds.S index 16e49bfa2b42..53d719c04ba9 100644 --- a/arch/um/kernel/vmlinux.lds.S +++ b/arch/um/kernel/vmlinux.lds.S @@ -1,4 +1,4 @@ - +#define RUNTIME_DISCARD_EXIT KERNEL_STACK_SIZE = 4096 * (1 << CONFIG_KERNEL_STACK_ORDER); #ifdef CONFIG_LD_SCRIPT_STATIC From 6a16810068e70959bc1df686424aa35ce05578f1 Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Tue, 25 May 2021 14:50:06 +0200 Subject: [PATCH 54/55] s390/dasd: add missing discipline function commit c0c8a8397fa8a74d04915f4d3d28cb4a5d401427 upstream. Fix crash with illegal operation exception in dasd_device_tasklet. Commit b72949328869 ("s390/dasd: Prepare for additional path event handling") renamed the verify_path function for ECKD but not for FBA and DIAG. This leads to a panic when the path verification function is called for a FBA or DIAG device. Fix by defining a wrapper function for dasd_generic_verify_path(). Fixes: b72949328869 ("s390/dasd: Prepare for additional path event handling") Cc: #5.11 Reviewed-by: Jan Hoeppner Signed-off-by: Stefan Haberland Reviewed-by: Cornelia Huck Link: https://lore.kernel.org/r/20210525125006.157531-2-sth@linux.ibm.com Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/s390/block/dasd_diag.c | 7 ++++++- drivers/s390/block/dasd_fba.c | 7 ++++++- drivers/s390/block/dasd_int.h | 1 - 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/s390/block/dasd_diag.c b/drivers/s390/block/dasd_diag.c index f7ae03fd36cb..f029884eabd1 100644 --- a/drivers/s390/block/dasd_diag.c +++ b/drivers/s390/block/dasd_diag.c @@ -644,12 +644,17 @@ static void dasd_diag_setup_blk_queue(struct dasd_block *block) blk_queue_segment_boundary(q, PAGE_SIZE - 1); } +static int dasd_diag_pe_handler(struct dasd_device *device, __u8 tbvpm) +{ + return dasd_generic_verify_path(device, tbvpm); +} + static struct dasd_discipline dasd_diag_discipline = { .owner = THIS_MODULE, .name = "DIAG", .ebcname = "DIAG", .check_device = dasd_diag_check_device, - .verify_path = dasd_generic_verify_path, + .pe_handler = dasd_diag_pe_handler, .fill_geometry = dasd_diag_fill_geometry, .setup_blk_queue = dasd_diag_setup_blk_queue, .start_IO = dasd_start_diag, diff --git a/drivers/s390/block/dasd_fba.c b/drivers/s390/block/dasd_fba.c index 1a44e321b54e..b159575a2760 100644 --- a/drivers/s390/block/dasd_fba.c +++ b/drivers/s390/block/dasd_fba.c @@ -803,13 +803,18 @@ static void dasd_fba_setup_blk_queue(struct dasd_block *block) blk_queue_flag_set(QUEUE_FLAG_DISCARD, q); } +static int dasd_fba_pe_handler(struct dasd_device *device, __u8 tbvpm) +{ + return dasd_generic_verify_path(device, tbvpm); +} + static struct dasd_discipline dasd_fba_discipline = { .owner = THIS_MODULE, .name = "FBA ", .ebcname = "FBA ", .check_device = dasd_fba_check_characteristics, .do_analysis = dasd_fba_do_analysis, - .verify_path = dasd_generic_verify_path, + .pe_handler = dasd_fba_pe_handler, .setup_blk_queue = dasd_fba_setup_blk_queue, .fill_geometry = dasd_fba_fill_geometry, .start_IO = dasd_start_IO, diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h index e8a06d85d6f7..5d7d35ca5eb4 100644 --- a/drivers/s390/block/dasd_int.h +++ b/drivers/s390/block/dasd_int.h @@ -298,7 +298,6 @@ struct dasd_discipline { * e.g. verify that new path is compatible with the current * configuration. */ - int (*verify_path)(struct dasd_device *, __u8); int (*pe_handler)(struct dasd_device *, __u8); /* From e4b5c766f50525d84754cfdf0e4edef1db9622c0 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 17 Mar 2023 08:32:54 +0100 Subject: [PATCH 55/55] Linux 5.4.237 Link: https://lore.kernel.org/r/20230315115726.103942885@linuxfoundation.org Tested-by: Florian Fainelli Tested-by: Shuah Khan Link: https://lore.kernel.org/r/20230316083403.224993044@linuxfoundation.org Tested-by: Chris Paterson (CIP) Tested-by: Florian Fainelli Tested-by: Harshit Mogalapalli Tested-by: Tom Saeger Tested-by: Guenter Roeck Tested-by: Linux Kernel Functional Testing Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index e3b56259d50a..ccac1c82eb78 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 4 -SUBLEVEL = 236 +SUBLEVEL = 237 EXTRAVERSION = NAME = Kleptomaniac Octopus